{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 200.0, "global_step": 30500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003278688524590164, "grad_norm": 107.12727355957031, "learning_rate": 2.1857923497267763e-08, "loss": 6.791, "step": 1 }, { "epoch": 0.006557377049180328, "grad_norm": 39.37515640258789, "learning_rate": 4.3715846994535526e-08, "loss": 6.7754, "step": 2 }, { "epoch": 0.009836065573770493, "grad_norm": 43.05866622924805, "learning_rate": 6.557377049180328e-08, "loss": 6.6367, "step": 3 }, { "epoch": 0.013114754098360656, "grad_norm": 38.684410095214844, "learning_rate": 8.743169398907105e-08, "loss": 6.707, "step": 4 }, { "epoch": 0.01639344262295082, "grad_norm": 38.7274169921875, "learning_rate": 1.0928961748633881e-07, "loss": 7.0684, "step": 5 }, { "epoch": 0.019672131147540985, "grad_norm": 44.165931701660156, "learning_rate": 1.3114754098360656e-07, "loss": 6.791, "step": 6 }, { "epoch": 0.022950819672131147, "grad_norm": 38.53837585449219, "learning_rate": 1.5300546448087432e-07, "loss": 6.7812, "step": 7 }, { "epoch": 0.02622950819672131, "grad_norm": 53.22453308105469, "learning_rate": 1.748633879781421e-07, "loss": 6.7598, "step": 8 }, { "epoch": 0.029508196721311476, "grad_norm": 35.678428649902344, "learning_rate": 1.9672131147540986e-07, "loss": 6.6191, "step": 9 }, { "epoch": 0.03278688524590164, "grad_norm": 45.10407638549805, "learning_rate": 2.1857923497267762e-07, "loss": 6.9395, "step": 10 }, { "epoch": 0.036065573770491806, "grad_norm": 33.5763053894043, "learning_rate": 2.404371584699454e-07, "loss": 6.709, "step": 11 }, { "epoch": 0.03934426229508197, "grad_norm": 40.943138122558594, "learning_rate": 2.622950819672131e-07, "loss": 6.5977, "step": 12 }, { "epoch": 0.04262295081967213, "grad_norm": 49.889522552490234, "learning_rate": 2.841530054644809e-07, "loss": 6.9023, "step": 13 }, { "epoch": 0.04590163934426229, "grad_norm": 33.823768615722656, "learning_rate": 3.0601092896174863e-07, "loss": 6.8867, "step": 14 }, { "epoch": 0.04918032786885246, "grad_norm": 65.17041015625, "learning_rate": 3.278688524590164e-07, "loss": 6.8984, "step": 15 }, { "epoch": 0.05245901639344262, "grad_norm": 48.78119659423828, "learning_rate": 3.497267759562842e-07, "loss": 6.8984, "step": 16 }, { "epoch": 0.05573770491803279, "grad_norm": 33.122615814208984, "learning_rate": 3.7158469945355194e-07, "loss": 6.9043, "step": 17 }, { "epoch": 0.05901639344262295, "grad_norm": 41.464237213134766, "learning_rate": 3.934426229508197e-07, "loss": 6.8457, "step": 18 }, { "epoch": 0.06229508196721312, "grad_norm": 34.63102340698242, "learning_rate": 4.1530054644808746e-07, "loss": 6.6895, "step": 19 }, { "epoch": 0.06557377049180328, "grad_norm": 37.85832977294922, "learning_rate": 4.3715846994535524e-07, "loss": 6.8574, "step": 20 }, { "epoch": 0.06885245901639345, "grad_norm": 34.53938293457031, "learning_rate": 4.59016393442623e-07, "loss": 6.7988, "step": 21 }, { "epoch": 0.07213114754098361, "grad_norm": 40.21219253540039, "learning_rate": 4.808743169398908e-07, "loss": 6.9395, "step": 22 }, { "epoch": 0.07540983606557378, "grad_norm": 34.840667724609375, "learning_rate": 5.027322404371585e-07, "loss": 6.8105, "step": 23 }, { "epoch": 0.07868852459016394, "grad_norm": 51.123416900634766, "learning_rate": 5.245901639344262e-07, "loss": 6.7383, "step": 24 }, { "epoch": 0.08196721311475409, "grad_norm": 31.573322296142578, "learning_rate": 5.46448087431694e-07, "loss": 6.7793, "step": 25 }, { "epoch": 0.08524590163934426, "grad_norm": 34.410343170166016, "learning_rate": 5.683060109289618e-07, "loss": 6.6328, "step": 26 }, { "epoch": 0.08852459016393442, "grad_norm": 37.17652893066406, "learning_rate": 5.901639344262295e-07, "loss": 6.5996, "step": 27 }, { "epoch": 0.09180327868852459, "grad_norm": 36.607112884521484, "learning_rate": 6.120218579234973e-07, "loss": 6.5801, "step": 28 }, { "epoch": 0.09508196721311475, "grad_norm": 34.92662048339844, "learning_rate": 6.338797814207651e-07, "loss": 6.543, "step": 29 }, { "epoch": 0.09836065573770492, "grad_norm": 33.33162307739258, "learning_rate": 6.557377049180328e-07, "loss": 6.6426, "step": 30 }, { "epoch": 0.10163934426229508, "grad_norm": 31.3615779876709, "learning_rate": 6.775956284153006e-07, "loss": 6.6738, "step": 31 }, { "epoch": 0.10491803278688525, "grad_norm": 36.32351303100586, "learning_rate": 6.994535519125684e-07, "loss": 6.5527, "step": 32 }, { "epoch": 0.10819672131147541, "grad_norm": 49.216548919677734, "learning_rate": 7.213114754098361e-07, "loss": 6.6035, "step": 33 }, { "epoch": 0.11147540983606558, "grad_norm": 30.870573043823242, "learning_rate": 7.431693989071039e-07, "loss": 6.6172, "step": 34 }, { "epoch": 0.11475409836065574, "grad_norm": 35.218265533447266, "learning_rate": 7.650273224043716e-07, "loss": 6.2832, "step": 35 }, { "epoch": 0.1180327868852459, "grad_norm": 32.573673248291016, "learning_rate": 7.868852459016395e-07, "loss": 6.5566, "step": 36 }, { "epoch": 0.12131147540983607, "grad_norm": 28.819101333618164, "learning_rate": 8.087431693989072e-07, "loss": 6.4668, "step": 37 }, { "epoch": 0.12459016393442623, "grad_norm": 36.37307357788086, "learning_rate": 8.306010928961749e-07, "loss": 6.4629, "step": 38 }, { "epoch": 0.12786885245901639, "grad_norm": 44.01520538330078, "learning_rate": 8.524590163934427e-07, "loss": 6.5215, "step": 39 }, { "epoch": 0.13114754098360656, "grad_norm": 37.58476257324219, "learning_rate": 8.743169398907105e-07, "loss": 6.6074, "step": 40 }, { "epoch": 0.13442622950819672, "grad_norm": 28.232091903686523, "learning_rate": 8.961748633879782e-07, "loss": 6.2793, "step": 41 }, { "epoch": 0.1377049180327869, "grad_norm": 31.33584213256836, "learning_rate": 9.18032786885246e-07, "loss": 6.041, "step": 42 }, { "epoch": 0.14098360655737704, "grad_norm": 31.67304039001465, "learning_rate": 9.398907103825138e-07, "loss": 6.2188, "step": 43 }, { "epoch": 0.14426229508196722, "grad_norm": 25.08854103088379, "learning_rate": 9.617486338797815e-07, "loss": 6.0723, "step": 44 }, { "epoch": 0.14754098360655737, "grad_norm": 31.22587776184082, "learning_rate": 9.836065573770493e-07, "loss": 6.0254, "step": 45 }, { "epoch": 0.15081967213114755, "grad_norm": 40.86006164550781, "learning_rate": 1.005464480874317e-06, "loss": 6.2051, "step": 46 }, { "epoch": 0.1540983606557377, "grad_norm": 22.220481872558594, "learning_rate": 1.0273224043715847e-06, "loss": 6.1406, "step": 47 }, { "epoch": 0.15737704918032788, "grad_norm": 80.24386596679688, "learning_rate": 1.0491803278688525e-06, "loss": 6.3984, "step": 48 }, { "epoch": 0.16065573770491803, "grad_norm": 24.569290161132812, "learning_rate": 1.0710382513661204e-06, "loss": 6.1484, "step": 49 }, { "epoch": 0.16393442622950818, "grad_norm": 41.60352325439453, "learning_rate": 1.092896174863388e-06, "loss": 6.498, "step": 50 }, { "epoch": 0.16721311475409836, "grad_norm": 28.932811737060547, "learning_rate": 1.1147540983606559e-06, "loss": 6.1211, "step": 51 }, { "epoch": 0.17049180327868851, "grad_norm": 30.018802642822266, "learning_rate": 1.1366120218579236e-06, "loss": 6.0801, "step": 52 }, { "epoch": 0.1737704918032787, "grad_norm": 31.629169464111328, "learning_rate": 1.1584699453551913e-06, "loss": 6.252, "step": 53 }, { "epoch": 0.17704918032786884, "grad_norm": 29.04078483581543, "learning_rate": 1.180327868852459e-06, "loss": 6.0879, "step": 54 }, { "epoch": 0.18032786885245902, "grad_norm": 25.79541778564453, "learning_rate": 1.2021857923497268e-06, "loss": 6.0469, "step": 55 }, { "epoch": 0.18360655737704917, "grad_norm": 27.386442184448242, "learning_rate": 1.2240437158469945e-06, "loss": 5.9395, "step": 56 }, { "epoch": 0.18688524590163935, "grad_norm": 21.353425979614258, "learning_rate": 1.2459016393442625e-06, "loss": 5.6855, "step": 57 }, { "epoch": 0.1901639344262295, "grad_norm": 18.77074432373047, "learning_rate": 1.2677595628415302e-06, "loss": 5.6777, "step": 58 }, { "epoch": 0.19344262295081968, "grad_norm": 18.23274803161621, "learning_rate": 1.2896174863387977e-06, "loss": 5.7617, "step": 59 }, { "epoch": 0.19672131147540983, "grad_norm": 21.128416061401367, "learning_rate": 1.3114754098360657e-06, "loss": 5.5449, "step": 60 }, { "epoch": 0.2, "grad_norm": 22.085844039916992, "learning_rate": 1.3333333333333334e-06, "loss": 5.7734, "step": 61 }, { "epoch": 0.20327868852459016, "grad_norm": 20.5605525970459, "learning_rate": 1.3551912568306011e-06, "loss": 5.7227, "step": 62 }, { "epoch": 0.20655737704918034, "grad_norm": 16.064468383789062, "learning_rate": 1.377049180327869e-06, "loss": 5.3203, "step": 63 }, { "epoch": 0.2098360655737705, "grad_norm": 18.00359344482422, "learning_rate": 1.3989071038251368e-06, "loss": 5.4648, "step": 64 }, { "epoch": 0.21311475409836064, "grad_norm": 16.466703414916992, "learning_rate": 1.4207650273224043e-06, "loss": 5.4434, "step": 65 }, { "epoch": 0.21639344262295082, "grad_norm": 19.324068069458008, "learning_rate": 1.4426229508196723e-06, "loss": 5.3047, "step": 66 }, { "epoch": 0.21967213114754097, "grad_norm": 15.71777057647705, "learning_rate": 1.46448087431694e-06, "loss": 5.418, "step": 67 }, { "epoch": 0.22295081967213115, "grad_norm": 17.3053035736084, "learning_rate": 1.4863387978142078e-06, "loss": 5.4883, "step": 68 }, { "epoch": 0.2262295081967213, "grad_norm": 19.766681671142578, "learning_rate": 1.5081967213114757e-06, "loss": 5.4551, "step": 69 }, { "epoch": 0.22950819672131148, "grad_norm": 18.133867263793945, "learning_rate": 1.5300546448087432e-06, "loss": 5.4473, "step": 70 }, { "epoch": 0.23278688524590163, "grad_norm": 19.12177085876465, "learning_rate": 1.551912568306011e-06, "loss": 5.293, "step": 71 }, { "epoch": 0.2360655737704918, "grad_norm": 17.061220169067383, "learning_rate": 1.573770491803279e-06, "loss": 5.5273, "step": 72 }, { "epoch": 0.23934426229508196, "grad_norm": 22.179052352905273, "learning_rate": 1.5956284153005466e-06, "loss": 5.4883, "step": 73 }, { "epoch": 0.24262295081967214, "grad_norm": 15.77593994140625, "learning_rate": 1.6174863387978144e-06, "loss": 5.4941, "step": 74 }, { "epoch": 0.2459016393442623, "grad_norm": 12.635430335998535, "learning_rate": 1.6393442622950819e-06, "loss": 5.3496, "step": 75 }, { "epoch": 0.24918032786885247, "grad_norm": 17.9576416015625, "learning_rate": 1.6612021857923498e-06, "loss": 5.2383, "step": 76 }, { "epoch": 0.25245901639344265, "grad_norm": 14.523795127868652, "learning_rate": 1.6830601092896176e-06, "loss": 4.9668, "step": 77 }, { "epoch": 0.25573770491803277, "grad_norm": 14.337334632873535, "learning_rate": 1.7049180327868853e-06, "loss": 5.3828, "step": 78 }, { "epoch": 0.25901639344262295, "grad_norm": 12.044458389282227, "learning_rate": 1.7267759562841532e-06, "loss": 5.1719, "step": 79 }, { "epoch": 0.26229508196721313, "grad_norm": 16.18140983581543, "learning_rate": 1.748633879781421e-06, "loss": 5.2949, "step": 80 }, { "epoch": 0.26557377049180325, "grad_norm": 43.2642822265625, "learning_rate": 1.7704918032786885e-06, "loss": 5.0195, "step": 81 }, { "epoch": 0.26885245901639343, "grad_norm": 12.876953125, "learning_rate": 1.7923497267759564e-06, "loss": 5.0156, "step": 82 }, { "epoch": 0.2721311475409836, "grad_norm": 12.590550422668457, "learning_rate": 1.8142076502732242e-06, "loss": 5.0137, "step": 83 }, { "epoch": 0.2754098360655738, "grad_norm": 14.633137702941895, "learning_rate": 1.836065573770492e-06, "loss": 5.2441, "step": 84 }, { "epoch": 0.2786885245901639, "grad_norm": 15.94819164276123, "learning_rate": 1.8579234972677599e-06, "loss": 5.0566, "step": 85 }, { "epoch": 0.2819672131147541, "grad_norm": 11.65058422088623, "learning_rate": 1.8797814207650276e-06, "loss": 4.917, "step": 86 }, { "epoch": 0.28524590163934427, "grad_norm": 14.6964111328125, "learning_rate": 1.9016393442622951e-06, "loss": 4.9844, "step": 87 }, { "epoch": 0.28852459016393445, "grad_norm": 14.43775749206543, "learning_rate": 1.923497267759563e-06, "loss": 4.9023, "step": 88 }, { "epoch": 0.29180327868852457, "grad_norm": 14.704740524291992, "learning_rate": 1.945355191256831e-06, "loss": 4.9688, "step": 89 }, { "epoch": 0.29508196721311475, "grad_norm": 15.031285285949707, "learning_rate": 1.9672131147540985e-06, "loss": 5.1641, "step": 90 }, { "epoch": 0.2983606557377049, "grad_norm": 15.47419261932373, "learning_rate": 1.9890710382513663e-06, "loss": 4.7119, "step": 91 }, { "epoch": 0.3016393442622951, "grad_norm": 16.273160934448242, "learning_rate": 2.010928961748634e-06, "loss": 4.7168, "step": 92 }, { "epoch": 0.30491803278688523, "grad_norm": 11.872836112976074, "learning_rate": 2.0327868852459017e-06, "loss": 4.8594, "step": 93 }, { "epoch": 0.3081967213114754, "grad_norm": 10.827252388000488, "learning_rate": 2.0546448087431695e-06, "loss": 4.8857, "step": 94 }, { "epoch": 0.3114754098360656, "grad_norm": 11.806995391845703, "learning_rate": 2.0765027322404376e-06, "loss": 4.7461, "step": 95 }, { "epoch": 0.31475409836065577, "grad_norm": 12.305721282958984, "learning_rate": 2.098360655737705e-06, "loss": 4.8184, "step": 96 }, { "epoch": 0.3180327868852459, "grad_norm": 11.313177108764648, "learning_rate": 2.1202185792349727e-06, "loss": 4.6621, "step": 97 }, { "epoch": 0.32131147540983607, "grad_norm": 15.540268898010254, "learning_rate": 2.142076502732241e-06, "loss": 4.6221, "step": 98 }, { "epoch": 0.32459016393442625, "grad_norm": 11.560362815856934, "learning_rate": 2.1639344262295085e-06, "loss": 4.5625, "step": 99 }, { "epoch": 0.32786885245901637, "grad_norm": 13.390657424926758, "learning_rate": 2.185792349726776e-06, "loss": 4.6797, "step": 100 }, { "epoch": 0.33114754098360655, "grad_norm": 11.008138656616211, "learning_rate": 2.207650273224044e-06, "loss": 4.5352, "step": 101 }, { "epoch": 0.3344262295081967, "grad_norm": 12.352751731872559, "learning_rate": 2.2295081967213117e-06, "loss": 4.5225, "step": 102 }, { "epoch": 0.3377049180327869, "grad_norm": 13.494210243225098, "learning_rate": 2.2513661202185795e-06, "loss": 4.5488, "step": 103 }, { "epoch": 0.34098360655737703, "grad_norm": 10.558372497558594, "learning_rate": 2.273224043715847e-06, "loss": 4.5156, "step": 104 }, { "epoch": 0.3442622950819672, "grad_norm": 10.773033142089844, "learning_rate": 2.295081967213115e-06, "loss": 4.7217, "step": 105 }, { "epoch": 0.3475409836065574, "grad_norm": 11.376216888427734, "learning_rate": 2.3169398907103827e-06, "loss": 4.6367, "step": 106 }, { "epoch": 0.35081967213114756, "grad_norm": 11.933897972106934, "learning_rate": 2.3387978142076504e-06, "loss": 4.7686, "step": 107 }, { "epoch": 0.3540983606557377, "grad_norm": 13.400888442993164, "learning_rate": 2.360655737704918e-06, "loss": 4.793, "step": 108 }, { "epoch": 0.35737704918032787, "grad_norm": 9.70680046081543, "learning_rate": 2.382513661202186e-06, "loss": 4.54, "step": 109 }, { "epoch": 0.36065573770491804, "grad_norm": 13.82170295715332, "learning_rate": 2.4043715846994536e-06, "loss": 4.2227, "step": 110 }, { "epoch": 0.3639344262295082, "grad_norm": 11.030941009521484, "learning_rate": 2.4262295081967218e-06, "loss": 4.498, "step": 111 }, { "epoch": 0.36721311475409835, "grad_norm": 12.952763557434082, "learning_rate": 2.448087431693989e-06, "loss": 4.291, "step": 112 }, { "epoch": 0.3704918032786885, "grad_norm": 23.691164016723633, "learning_rate": 2.469945355191257e-06, "loss": 4.3555, "step": 113 }, { "epoch": 0.3737704918032787, "grad_norm": 9.469575881958008, "learning_rate": 2.491803278688525e-06, "loss": 4.4375, "step": 114 }, { "epoch": 0.3770491803278688, "grad_norm": 9.776338577270508, "learning_rate": 2.5136612021857927e-06, "loss": 4.3955, "step": 115 }, { "epoch": 0.380327868852459, "grad_norm": 10.166370391845703, "learning_rate": 2.5355191256830604e-06, "loss": 4.1016, "step": 116 }, { "epoch": 0.3836065573770492, "grad_norm": 9.470163345336914, "learning_rate": 2.5573770491803277e-06, "loss": 4.4336, "step": 117 }, { "epoch": 0.38688524590163936, "grad_norm": 10.342138290405273, "learning_rate": 2.5792349726775955e-06, "loss": 4.416, "step": 118 }, { "epoch": 0.3901639344262295, "grad_norm": 9.317627906799316, "learning_rate": 2.6010928961748636e-06, "loss": 4.6436, "step": 119 }, { "epoch": 0.39344262295081966, "grad_norm": 10.099000930786133, "learning_rate": 2.6229508196721314e-06, "loss": 4.0742, "step": 120 }, { "epoch": 0.39672131147540984, "grad_norm": 12.326156616210938, "learning_rate": 2.644808743169399e-06, "loss": 4.333, "step": 121 }, { "epoch": 0.4, "grad_norm": 10.647047996520996, "learning_rate": 2.666666666666667e-06, "loss": 4.3896, "step": 122 }, { "epoch": 0.40327868852459015, "grad_norm": 10.167106628417969, "learning_rate": 2.6885245901639346e-06, "loss": 4.3955, "step": 123 }, { "epoch": 0.4065573770491803, "grad_norm": 9.0632963180542, "learning_rate": 2.7103825136612023e-06, "loss": 4.1914, "step": 124 }, { "epoch": 0.4098360655737705, "grad_norm": 12.698443412780762, "learning_rate": 2.7322404371584705e-06, "loss": 4.3242, "step": 125 }, { "epoch": 0.4131147540983607, "grad_norm": 9.297367095947266, "learning_rate": 2.754098360655738e-06, "loss": 4.1357, "step": 126 }, { "epoch": 0.4163934426229508, "grad_norm": 9.011900901794434, "learning_rate": 2.775956284153006e-06, "loss": 4.2314, "step": 127 }, { "epoch": 0.419672131147541, "grad_norm": 9.375920295715332, "learning_rate": 2.7978142076502737e-06, "loss": 4.1514, "step": 128 }, { "epoch": 0.42295081967213116, "grad_norm": 9.063838958740234, "learning_rate": 2.819672131147541e-06, "loss": 3.8975, "step": 129 }, { "epoch": 0.4262295081967213, "grad_norm": 8.948882102966309, "learning_rate": 2.8415300546448087e-06, "loss": 4.1436, "step": 130 }, { "epoch": 0.42950819672131146, "grad_norm": 11.557685852050781, "learning_rate": 2.8633879781420764e-06, "loss": 4.1973, "step": 131 }, { "epoch": 0.43278688524590164, "grad_norm": 9.393291473388672, "learning_rate": 2.8852459016393446e-06, "loss": 4.1826, "step": 132 }, { "epoch": 0.4360655737704918, "grad_norm": 10.744751930236816, "learning_rate": 2.9071038251366123e-06, "loss": 4.0518, "step": 133 }, { "epoch": 0.43934426229508194, "grad_norm": 13.094522476196289, "learning_rate": 2.92896174863388e-06, "loss": 4.1025, "step": 134 }, { "epoch": 0.4426229508196721, "grad_norm": 11.121308326721191, "learning_rate": 2.9508196721311478e-06, "loss": 3.9248, "step": 135 }, { "epoch": 0.4459016393442623, "grad_norm": 8.48041820526123, "learning_rate": 2.9726775956284155e-06, "loss": 4.0322, "step": 136 }, { "epoch": 0.4491803278688525, "grad_norm": 8.480998039245605, "learning_rate": 2.9945355191256832e-06, "loss": 3.6387, "step": 137 }, { "epoch": 0.4524590163934426, "grad_norm": 8.432883262634277, "learning_rate": 3.0163934426229514e-06, "loss": 4.0039, "step": 138 }, { "epoch": 0.4557377049180328, "grad_norm": 9.76298713684082, "learning_rate": 3.038251366120219e-06, "loss": 3.8584, "step": 139 }, { "epoch": 0.45901639344262296, "grad_norm": 9.772171020507812, "learning_rate": 3.0601092896174864e-06, "loss": 4.0537, "step": 140 }, { "epoch": 0.46229508196721314, "grad_norm": 9.448822021484375, "learning_rate": 3.081967213114754e-06, "loss": 3.7471, "step": 141 }, { "epoch": 0.46557377049180326, "grad_norm": 7.831855297088623, "learning_rate": 3.103825136612022e-06, "loss": 3.9375, "step": 142 }, { "epoch": 0.46885245901639344, "grad_norm": 11.503125190734863, "learning_rate": 3.1256830601092896e-06, "loss": 4.1387, "step": 143 }, { "epoch": 0.4721311475409836, "grad_norm": 8.486551284790039, "learning_rate": 3.147540983606558e-06, "loss": 4.1768, "step": 144 }, { "epoch": 0.47540983606557374, "grad_norm": 10.36031723022461, "learning_rate": 3.1693989071038255e-06, "loss": 3.9883, "step": 145 }, { "epoch": 0.4786885245901639, "grad_norm": 18.261810302734375, "learning_rate": 3.1912568306010933e-06, "loss": 3.9531, "step": 146 }, { "epoch": 0.4819672131147541, "grad_norm": 9.914095878601074, "learning_rate": 3.213114754098361e-06, "loss": 4.0273, "step": 147 }, { "epoch": 0.4852459016393443, "grad_norm": 8.614510536193848, "learning_rate": 3.2349726775956287e-06, "loss": 4.166, "step": 148 }, { "epoch": 0.4885245901639344, "grad_norm": 9.460521697998047, "learning_rate": 3.2568306010928965e-06, "loss": 4.0518, "step": 149 }, { "epoch": 0.4918032786885246, "grad_norm": 7.961089134216309, "learning_rate": 3.2786885245901638e-06, "loss": 3.8428, "step": 150 }, { "epoch": 0.49508196721311476, "grad_norm": 8.407172203063965, "learning_rate": 3.3005464480874324e-06, "loss": 3.8828, "step": 151 }, { "epoch": 0.49836065573770494, "grad_norm": 7.765562534332275, "learning_rate": 3.3224043715846997e-06, "loss": 3.7568, "step": 152 }, { "epoch": 0.5016393442622951, "grad_norm": 8.047646522521973, "learning_rate": 3.3442622950819674e-06, "loss": 3.7227, "step": 153 }, { "epoch": 0.5049180327868853, "grad_norm": 12.692337989807129, "learning_rate": 3.366120218579235e-06, "loss": 3.8291, "step": 154 }, { "epoch": 0.5081967213114754, "grad_norm": 7.891917705535889, "learning_rate": 3.387978142076503e-06, "loss": 3.8193, "step": 155 }, { "epoch": 0.5114754098360655, "grad_norm": 7.640501022338867, "learning_rate": 3.4098360655737706e-06, "loss": 3.8418, "step": 156 }, { "epoch": 0.5147540983606558, "grad_norm": 12.07009220123291, "learning_rate": 3.4316939890710388e-06, "loss": 3.6758, "step": 157 }, { "epoch": 0.5180327868852459, "grad_norm": 8.717643737792969, "learning_rate": 3.4535519125683065e-06, "loss": 3.8867, "step": 158 }, { "epoch": 0.521311475409836, "grad_norm": 8.530437469482422, "learning_rate": 3.4754098360655742e-06, "loss": 3.9521, "step": 159 }, { "epoch": 0.5245901639344263, "grad_norm": 7.405368328094482, "learning_rate": 3.497267759562842e-06, "loss": 3.7764, "step": 160 }, { "epoch": 0.5278688524590164, "grad_norm": 6.732872486114502, "learning_rate": 3.5191256830601097e-06, "loss": 3.9375, "step": 161 }, { "epoch": 0.5311475409836065, "grad_norm": 7.813531398773193, "learning_rate": 3.540983606557377e-06, "loss": 3.6543, "step": 162 }, { "epoch": 0.5344262295081967, "grad_norm": 8.299860954284668, "learning_rate": 3.5628415300546447e-06, "loss": 3.832, "step": 163 }, { "epoch": 0.5377049180327869, "grad_norm": 7.310373306274414, "learning_rate": 3.584699453551913e-06, "loss": 3.6074, "step": 164 }, { "epoch": 0.5409836065573771, "grad_norm": 9.410871505737305, "learning_rate": 3.6065573770491806e-06, "loss": 3.7705, "step": 165 }, { "epoch": 0.5442622950819672, "grad_norm": 9.892586708068848, "learning_rate": 3.6284153005464484e-06, "loss": 3.8789, "step": 166 }, { "epoch": 0.5475409836065573, "grad_norm": 8.145835876464844, "learning_rate": 3.650273224043716e-06, "loss": 3.5215, "step": 167 }, { "epoch": 0.5508196721311476, "grad_norm": 11.302416801452637, "learning_rate": 3.672131147540984e-06, "loss": 3.5488, "step": 168 }, { "epoch": 0.5540983606557377, "grad_norm": 8.711554527282715, "learning_rate": 3.6939890710382516e-06, "loss": 3.8174, "step": 169 }, { "epoch": 0.5573770491803278, "grad_norm": 13.121417045593262, "learning_rate": 3.7158469945355197e-06, "loss": 3.8525, "step": 170 }, { "epoch": 0.5606557377049181, "grad_norm": 9.338717460632324, "learning_rate": 3.7377049180327874e-06, "loss": 3.9033, "step": 171 }, { "epoch": 0.5639344262295082, "grad_norm": 7.382042407989502, "learning_rate": 3.759562841530055e-06, "loss": 3.6055, "step": 172 }, { "epoch": 0.5672131147540984, "grad_norm": 9.314645767211914, "learning_rate": 3.7814207650273225e-06, "loss": 3.584, "step": 173 }, { "epoch": 0.5704918032786885, "grad_norm": 7.7643961906433105, "learning_rate": 3.8032786885245902e-06, "loss": 3.6943, "step": 174 }, { "epoch": 0.5737704918032787, "grad_norm": 7.766274929046631, "learning_rate": 3.825136612021858e-06, "loss": 3.5869, "step": 175 }, { "epoch": 0.5770491803278689, "grad_norm": 8.145323753356934, "learning_rate": 3.846994535519126e-06, "loss": 3.6641, "step": 176 }, { "epoch": 0.580327868852459, "grad_norm": 12.117157936096191, "learning_rate": 3.868852459016394e-06, "loss": 3.6904, "step": 177 }, { "epoch": 0.5836065573770491, "grad_norm": 7.230101585388184, "learning_rate": 3.890710382513662e-06, "loss": 3.792, "step": 178 }, { "epoch": 0.5868852459016394, "grad_norm": 7.818004131317139, "learning_rate": 3.912568306010929e-06, "loss": 3.5391, "step": 179 }, { "epoch": 0.5901639344262295, "grad_norm": 10.013121604919434, "learning_rate": 3.934426229508197e-06, "loss": 3.501, "step": 180 }, { "epoch": 0.5934426229508196, "grad_norm": 7.364282131195068, "learning_rate": 3.956284153005464e-06, "loss": 3.4287, "step": 181 }, { "epoch": 0.5967213114754099, "grad_norm": 8.13361930847168, "learning_rate": 3.9781420765027325e-06, "loss": 3.5332, "step": 182 }, { "epoch": 0.6, "grad_norm": 10.99166488647461, "learning_rate": 4.000000000000001e-06, "loss": 3.6396, "step": 183 }, { "epoch": 0.6032786885245902, "grad_norm": 6.205257415771484, "learning_rate": 4.021857923497268e-06, "loss": 3.4961, "step": 184 }, { "epoch": 0.6065573770491803, "grad_norm": 8.9110107421875, "learning_rate": 4.043715846994536e-06, "loss": 3.6582, "step": 185 }, { "epoch": 0.6098360655737705, "grad_norm": 8.355937957763672, "learning_rate": 4.0655737704918034e-06, "loss": 3.6172, "step": 186 }, { "epoch": 0.6131147540983607, "grad_norm": 7.629919528961182, "learning_rate": 4.087431693989072e-06, "loss": 3.6113, "step": 187 }, { "epoch": 0.6163934426229508, "grad_norm": 8.521677017211914, "learning_rate": 4.109289617486339e-06, "loss": 3.6143, "step": 188 }, { "epoch": 0.6196721311475409, "grad_norm": 8.300387382507324, "learning_rate": 4.131147540983607e-06, "loss": 3.7988, "step": 189 }, { "epoch": 0.6229508196721312, "grad_norm": 7.292804718017578, "learning_rate": 4.153005464480875e-06, "loss": 3.583, "step": 190 }, { "epoch": 0.6262295081967213, "grad_norm": 9.001298904418945, "learning_rate": 4.1748633879781425e-06, "loss": 3.6113, "step": 191 }, { "epoch": 0.6295081967213115, "grad_norm": 9.8739595413208, "learning_rate": 4.19672131147541e-06, "loss": 3.4648, "step": 192 }, { "epoch": 0.6327868852459017, "grad_norm": 14.604150772094727, "learning_rate": 4.218579234972678e-06, "loss": 3.5166, "step": 193 }, { "epoch": 0.6360655737704918, "grad_norm": 9.129476547241211, "learning_rate": 4.240437158469945e-06, "loss": 3.6973, "step": 194 }, { "epoch": 0.639344262295082, "grad_norm": 8.652037620544434, "learning_rate": 4.2622950819672135e-06, "loss": 3.9111, "step": 195 }, { "epoch": 0.6426229508196721, "grad_norm": 7.4107818603515625, "learning_rate": 4.284153005464482e-06, "loss": 3.6885, "step": 196 }, { "epoch": 0.6459016393442623, "grad_norm": 8.924422264099121, "learning_rate": 4.306010928961749e-06, "loss": 3.6143, "step": 197 }, { "epoch": 0.6491803278688525, "grad_norm": 6.763689994812012, "learning_rate": 4.327868852459017e-06, "loss": 3.6729, "step": 198 }, { "epoch": 0.6524590163934426, "grad_norm": 7.485657691955566, "learning_rate": 4.349726775956284e-06, "loss": 3.4346, "step": 199 }, { "epoch": 0.6557377049180327, "grad_norm": 10.416451454162598, "learning_rate": 4.371584699453552e-06, "loss": 3.4209, "step": 200 }, { "epoch": 0.659016393442623, "grad_norm": 6.11414098739624, "learning_rate": 4.39344262295082e-06, "loss": 3.6006, "step": 201 }, { "epoch": 0.6622950819672131, "grad_norm": 6.935802459716797, "learning_rate": 4.415300546448088e-06, "loss": 3.5518, "step": 202 }, { "epoch": 0.6655737704918033, "grad_norm": 6.1799397468566895, "learning_rate": 4.437158469945355e-06, "loss": 3.438, "step": 203 }, { "epoch": 0.6688524590163935, "grad_norm": 9.38018798828125, "learning_rate": 4.4590163934426235e-06, "loss": 3.7695, "step": 204 }, { "epoch": 0.6721311475409836, "grad_norm": 9.983596801757812, "learning_rate": 4.480874316939891e-06, "loss": 3.6104, "step": 205 }, { "epoch": 0.6754098360655738, "grad_norm": 8.200432777404785, "learning_rate": 4.502732240437159e-06, "loss": 3.4648, "step": 206 }, { "epoch": 0.6786885245901639, "grad_norm": 7.308393955230713, "learning_rate": 4.524590163934426e-06, "loss": 3.7686, "step": 207 }, { "epoch": 0.6819672131147541, "grad_norm": 8.666681289672852, "learning_rate": 4.546448087431694e-06, "loss": 3.8135, "step": 208 }, { "epoch": 0.6852459016393443, "grad_norm": 39.242897033691406, "learning_rate": 4.5683060109289626e-06, "loss": 3.6973, "step": 209 }, { "epoch": 0.6885245901639344, "grad_norm": 6.763496398925781, "learning_rate": 4.59016393442623e-06, "loss": 3.5791, "step": 210 }, { "epoch": 0.6918032786885245, "grad_norm": 7.513566493988037, "learning_rate": 4.612021857923498e-06, "loss": 3.6387, "step": 211 }, { "epoch": 0.6950819672131148, "grad_norm": 9.143583297729492, "learning_rate": 4.633879781420765e-06, "loss": 3.5742, "step": 212 }, { "epoch": 0.6983606557377049, "grad_norm": 6.883993625640869, "learning_rate": 4.655737704918033e-06, "loss": 3.4746, "step": 213 }, { "epoch": 0.7016393442622951, "grad_norm": 6.728498935699463, "learning_rate": 4.677595628415301e-06, "loss": 3.7783, "step": 214 }, { "epoch": 0.7049180327868853, "grad_norm": 7.97144889831543, "learning_rate": 4.699453551912569e-06, "loss": 3.6572, "step": 215 }, { "epoch": 0.7081967213114754, "grad_norm": 6.470061779022217, "learning_rate": 4.721311475409836e-06, "loss": 3.499, "step": 216 }, { "epoch": 0.7114754098360656, "grad_norm": 7.31796407699585, "learning_rate": 4.7431693989071044e-06, "loss": 3.71, "step": 217 }, { "epoch": 0.7147540983606557, "grad_norm": 7.550661087036133, "learning_rate": 4.765027322404372e-06, "loss": 3.4551, "step": 218 }, { "epoch": 0.7180327868852459, "grad_norm": 7.475368976593018, "learning_rate": 4.78688524590164e-06, "loss": 3.6162, "step": 219 }, { "epoch": 0.7213114754098361, "grad_norm": 7.453362464904785, "learning_rate": 4.808743169398907e-06, "loss": 3.5488, "step": 220 }, { "epoch": 0.7245901639344262, "grad_norm": 7.835952281951904, "learning_rate": 4.830601092896175e-06, "loss": 3.5527, "step": 221 }, { "epoch": 0.7278688524590164, "grad_norm": 7.135741233825684, "learning_rate": 4.8524590163934435e-06, "loss": 3.4121, "step": 222 }, { "epoch": 0.7311475409836066, "grad_norm": 8.004170417785645, "learning_rate": 4.874316939890711e-06, "loss": 3.5908, "step": 223 }, { "epoch": 0.7344262295081967, "grad_norm": 7.196075916290283, "learning_rate": 4.896174863387978e-06, "loss": 3.6367, "step": 224 }, { "epoch": 0.7377049180327869, "grad_norm": 7.3263959884643555, "learning_rate": 4.918032786885246e-06, "loss": 3.7041, "step": 225 }, { "epoch": 0.740983606557377, "grad_norm": 8.18163776397705, "learning_rate": 4.939890710382514e-06, "loss": 3.752, "step": 226 }, { "epoch": 0.7442622950819672, "grad_norm": 8.318378448486328, "learning_rate": 4.961748633879782e-06, "loss": 3.5195, "step": 227 }, { "epoch": 0.7475409836065574, "grad_norm": 7.514578819274902, "learning_rate": 4.98360655737705e-06, "loss": 3.54, "step": 228 }, { "epoch": 0.7508196721311475, "grad_norm": 10.023043632507324, "learning_rate": 5.005464480874317e-06, "loss": 3.7266, "step": 229 }, { "epoch": 0.7540983606557377, "grad_norm": 6.372830867767334, "learning_rate": 5.027322404371585e-06, "loss": 3.2627, "step": 230 }, { "epoch": 0.7573770491803279, "grad_norm": 9.449186325073242, "learning_rate": 5.0491803278688535e-06, "loss": 3.4346, "step": 231 }, { "epoch": 0.760655737704918, "grad_norm": 8.713711738586426, "learning_rate": 5.071038251366121e-06, "loss": 3.5771, "step": 232 }, { "epoch": 0.7639344262295082, "grad_norm": 7.086660861968994, "learning_rate": 5.092896174863389e-06, "loss": 3.3604, "step": 233 }, { "epoch": 0.7672131147540984, "grad_norm": 9.122855186462402, "learning_rate": 5.1147540983606555e-06, "loss": 3.4902, "step": 234 }, { "epoch": 0.7704918032786885, "grad_norm": 7.898484706878662, "learning_rate": 5.1366120218579245e-06, "loss": 3.2871, "step": 235 }, { "epoch": 0.7737704918032787, "grad_norm": 7.65049934387207, "learning_rate": 5.158469945355191e-06, "loss": 3.5273, "step": 236 }, { "epoch": 0.7770491803278688, "grad_norm": 8.61430549621582, "learning_rate": 5.180327868852459e-06, "loss": 3.752, "step": 237 }, { "epoch": 0.780327868852459, "grad_norm": 9.246840476989746, "learning_rate": 5.202185792349727e-06, "loss": 3.4287, "step": 238 }, { "epoch": 0.7836065573770492, "grad_norm": 7.609210014343262, "learning_rate": 5.2240437158469946e-06, "loss": 3.3389, "step": 239 }, { "epoch": 0.7868852459016393, "grad_norm": 9.62894058227539, "learning_rate": 5.245901639344263e-06, "loss": 3.3906, "step": 240 }, { "epoch": 0.7901639344262295, "grad_norm": 7.9709086418151855, "learning_rate": 5.26775956284153e-06, "loss": 3.416, "step": 241 }, { "epoch": 0.7934426229508197, "grad_norm": 9.003321647644043, "learning_rate": 5.289617486338798e-06, "loss": 3.4863, "step": 242 }, { "epoch": 0.7967213114754098, "grad_norm": 8.25271224975586, "learning_rate": 5.3114754098360655e-06, "loss": 3.5654, "step": 243 }, { "epoch": 0.8, "grad_norm": 8.407796859741211, "learning_rate": 5.333333333333334e-06, "loss": 3.2764, "step": 244 }, { "epoch": 0.8032786885245902, "grad_norm": 9.78926944732666, "learning_rate": 5.355191256830602e-06, "loss": 3.6318, "step": 245 }, { "epoch": 0.8065573770491803, "grad_norm": 7.671077251434326, "learning_rate": 5.377049180327869e-06, "loss": 3.4531, "step": 246 }, { "epoch": 0.8098360655737705, "grad_norm": 11.25075626373291, "learning_rate": 5.398907103825137e-06, "loss": 3.4932, "step": 247 }, { "epoch": 0.8131147540983606, "grad_norm": 10.65971851348877, "learning_rate": 5.420765027322405e-06, "loss": 3.3242, "step": 248 }, { "epoch": 0.8163934426229508, "grad_norm": 8.171120643615723, "learning_rate": 5.442622950819673e-06, "loss": 3.292, "step": 249 }, { "epoch": 0.819672131147541, "grad_norm": 8.115395545959473, "learning_rate": 5.464480874316941e-06, "loss": 3.3838, "step": 250 }, { "epoch": 0.8229508196721311, "grad_norm": 9.406537055969238, "learning_rate": 5.486338797814208e-06, "loss": 3.3779, "step": 251 }, { "epoch": 0.8262295081967214, "grad_norm": 9.194366455078125, "learning_rate": 5.508196721311476e-06, "loss": 3.2959, "step": 252 }, { "epoch": 0.8295081967213115, "grad_norm": 8.282931327819824, "learning_rate": 5.530054644808744e-06, "loss": 3.4883, "step": 253 }, { "epoch": 0.8327868852459016, "grad_norm": 9.173993110656738, "learning_rate": 5.551912568306012e-06, "loss": 3.3984, "step": 254 }, { "epoch": 0.8360655737704918, "grad_norm": 8.763354301452637, "learning_rate": 5.573770491803278e-06, "loss": 3.7578, "step": 255 }, { "epoch": 0.839344262295082, "grad_norm": 28.468799591064453, "learning_rate": 5.595628415300547e-06, "loss": 3.5381, "step": 256 }, { "epoch": 0.8426229508196721, "grad_norm": 9.242548942565918, "learning_rate": 5.6174863387978155e-06, "loss": 3.4453, "step": 257 }, { "epoch": 0.8459016393442623, "grad_norm": 7.0126848220825195, "learning_rate": 5.639344262295082e-06, "loss": 3.4492, "step": 258 }, { "epoch": 0.8491803278688524, "grad_norm": 7.629612922668457, "learning_rate": 5.66120218579235e-06, "loss": 3.2871, "step": 259 }, { "epoch": 0.8524590163934426, "grad_norm": 9.50594425201416, "learning_rate": 5.683060109289617e-06, "loss": 3.4033, "step": 260 }, { "epoch": 0.8557377049180328, "grad_norm": 7.893380641937256, "learning_rate": 5.7049180327868855e-06, "loss": 3.5107, "step": 261 }, { "epoch": 0.8590163934426229, "grad_norm": 8.083130836486816, "learning_rate": 5.726775956284153e-06, "loss": 3.5645, "step": 262 }, { "epoch": 0.8622950819672132, "grad_norm": 11.09216022491455, "learning_rate": 5.748633879781421e-06, "loss": 3.5791, "step": 263 }, { "epoch": 0.8655737704918033, "grad_norm": 6.572359561920166, "learning_rate": 5.770491803278689e-06, "loss": 3.3281, "step": 264 }, { "epoch": 0.8688524590163934, "grad_norm": 7.271296977996826, "learning_rate": 5.7923497267759565e-06, "loss": 3.3564, "step": 265 }, { "epoch": 0.8721311475409836, "grad_norm": 6.553459644317627, "learning_rate": 5.814207650273225e-06, "loss": 3.3291, "step": 266 }, { "epoch": 0.8754098360655738, "grad_norm": 8.210236549377441, "learning_rate": 5.836065573770492e-06, "loss": 3.5303, "step": 267 }, { "epoch": 0.8786885245901639, "grad_norm": 29.75713348388672, "learning_rate": 5.85792349726776e-06, "loss": 3.4521, "step": 268 }, { "epoch": 0.8819672131147541, "grad_norm": 7.295584201812744, "learning_rate": 5.879781420765028e-06, "loss": 3.3633, "step": 269 }, { "epoch": 0.8852459016393442, "grad_norm": 8.188180923461914, "learning_rate": 5.9016393442622956e-06, "loss": 3.3672, "step": 270 }, { "epoch": 0.8885245901639345, "grad_norm": 9.042287826538086, "learning_rate": 5.923497267759564e-06, "loss": 3.3936, "step": 271 }, { "epoch": 0.8918032786885246, "grad_norm": 10.361300468444824, "learning_rate": 5.945355191256831e-06, "loss": 3.335, "step": 272 }, { "epoch": 0.8950819672131147, "grad_norm": 9.618983268737793, "learning_rate": 5.967213114754099e-06, "loss": 3.6064, "step": 273 }, { "epoch": 0.898360655737705, "grad_norm": 8.307942390441895, "learning_rate": 5.9890710382513665e-06, "loss": 3.2871, "step": 274 }, { "epoch": 0.9016393442622951, "grad_norm": 9.875873565673828, "learning_rate": 6.010928961748635e-06, "loss": 3.458, "step": 275 }, { "epoch": 0.9049180327868852, "grad_norm": 7.100111961364746, "learning_rate": 6.032786885245903e-06, "loss": 3.376, "step": 276 }, { "epoch": 0.9081967213114754, "grad_norm": 6.606748580932617, "learning_rate": 6.05464480874317e-06, "loss": 3.499, "step": 277 }, { "epoch": 0.9114754098360656, "grad_norm": 11.300165176391602, "learning_rate": 6.076502732240438e-06, "loss": 3.3506, "step": 278 }, { "epoch": 0.9147540983606557, "grad_norm": 8.899120330810547, "learning_rate": 6.098360655737705e-06, "loss": 3.5381, "step": 279 }, { "epoch": 0.9180327868852459, "grad_norm": 8.266671180725098, "learning_rate": 6.120218579234973e-06, "loss": 3.5889, "step": 280 }, { "epoch": 0.921311475409836, "grad_norm": 6.319106578826904, "learning_rate": 6.14207650273224e-06, "loss": 3.1992, "step": 281 }, { "epoch": 0.9245901639344263, "grad_norm": 8.241362571716309, "learning_rate": 6.163934426229508e-06, "loss": 3.4014, "step": 282 }, { "epoch": 0.9278688524590164, "grad_norm": 7.752572536468506, "learning_rate": 6.1857923497267765e-06, "loss": 3.3877, "step": 283 }, { "epoch": 0.9311475409836065, "grad_norm": 6.711929798126221, "learning_rate": 6.207650273224044e-06, "loss": 3.6299, "step": 284 }, { "epoch": 0.9344262295081968, "grad_norm": 8.619841575622559, "learning_rate": 6.229508196721312e-06, "loss": 3.5449, "step": 285 }, { "epoch": 0.9377049180327869, "grad_norm": 7.708560466766357, "learning_rate": 6.251366120218579e-06, "loss": 3.415, "step": 286 }, { "epoch": 0.940983606557377, "grad_norm": 6.34720516204834, "learning_rate": 6.2732240437158475e-06, "loss": 3.2998, "step": 287 }, { "epoch": 0.9442622950819672, "grad_norm": 6.200289726257324, "learning_rate": 6.295081967213116e-06, "loss": 3.3652, "step": 288 }, { "epoch": 0.9475409836065574, "grad_norm": 7.1104888916015625, "learning_rate": 6.316939890710383e-06, "loss": 3.2007, "step": 289 }, { "epoch": 0.9508196721311475, "grad_norm": 7.194540500640869, "learning_rate": 6.338797814207651e-06, "loss": 3.1699, "step": 290 }, { "epoch": 0.9540983606557377, "grad_norm": 7.613133907318115, "learning_rate": 6.360655737704918e-06, "loss": 3.5361, "step": 291 }, { "epoch": 0.9573770491803278, "grad_norm": 5.6760687828063965, "learning_rate": 6.3825136612021865e-06, "loss": 3.0557, "step": 292 }, { "epoch": 0.9606557377049181, "grad_norm": 6.683442115783691, "learning_rate": 6.404371584699454e-06, "loss": 3.293, "step": 293 }, { "epoch": 0.9639344262295082, "grad_norm": 9.961114883422852, "learning_rate": 6.426229508196722e-06, "loss": 3.3662, "step": 294 }, { "epoch": 0.9672131147540983, "grad_norm": 7.420706748962402, "learning_rate": 6.44808743169399e-06, "loss": 3.585, "step": 295 }, { "epoch": 0.9704918032786886, "grad_norm": 7.708073616027832, "learning_rate": 6.4699453551912575e-06, "loss": 3.4307, "step": 296 }, { "epoch": 0.9737704918032787, "grad_norm": 6.772336006164551, "learning_rate": 6.491803278688526e-06, "loss": 3.2588, "step": 297 }, { "epoch": 0.9770491803278688, "grad_norm": 6.622044086456299, "learning_rate": 6.513661202185793e-06, "loss": 3.2588, "step": 298 }, { "epoch": 0.980327868852459, "grad_norm": 8.353110313415527, "learning_rate": 6.535519125683061e-06, "loss": 3.2695, "step": 299 }, { "epoch": 0.9836065573770492, "grad_norm": 9.576027870178223, "learning_rate": 6.5573770491803276e-06, "loss": 3.5381, "step": 300 }, { "epoch": 0.9868852459016394, "grad_norm": 6.3359222412109375, "learning_rate": 6.5792349726775966e-06, "loss": 3.4297, "step": 301 }, { "epoch": 0.9901639344262295, "grad_norm": 7.942861080169678, "learning_rate": 6.601092896174865e-06, "loss": 3.5762, "step": 302 }, { "epoch": 0.9934426229508196, "grad_norm": 8.833518981933594, "learning_rate": 6.622950819672131e-06, "loss": 3.3564, "step": 303 }, { "epoch": 0.9967213114754099, "grad_norm": 7.458176136016846, "learning_rate": 6.644808743169399e-06, "loss": 3.3184, "step": 304 }, { "epoch": 1.0, "grad_norm": 9.097708702087402, "learning_rate": 6.666666666666667e-06, "loss": 3.2939, "step": 305 }, { "epoch": 1.0032786885245901, "grad_norm": 8.06629753112793, "learning_rate": 6.688524590163935e-06, "loss": 3.2949, "step": 306 }, { "epoch": 1.0065573770491802, "grad_norm": 7.366222381591797, "learning_rate": 6.710382513661202e-06, "loss": 3.251, "step": 307 }, { "epoch": 1.0098360655737706, "grad_norm": 5.475732803344727, "learning_rate": 6.73224043715847e-06, "loss": 3.3008, "step": 308 }, { "epoch": 1.0131147540983607, "grad_norm": 6.481733798980713, "learning_rate": 6.7540983606557384e-06, "loss": 3.2554, "step": 309 }, { "epoch": 1.0163934426229508, "grad_norm": 7.563251972198486, "learning_rate": 6.775956284153006e-06, "loss": 3.4189, "step": 310 }, { "epoch": 1.019672131147541, "grad_norm": 5.353977680206299, "learning_rate": 6.797814207650274e-06, "loss": 2.9932, "step": 311 }, { "epoch": 1.022950819672131, "grad_norm": 7.936785697937012, "learning_rate": 6.819672131147541e-06, "loss": 3.3877, "step": 312 }, { "epoch": 1.0262295081967212, "grad_norm": 7.27408504486084, "learning_rate": 6.841530054644809e-06, "loss": 3.3086, "step": 313 }, { "epoch": 1.0295081967213116, "grad_norm": 13.004531860351562, "learning_rate": 6.8633879781420775e-06, "loss": 3.5332, "step": 314 }, { "epoch": 1.0327868852459017, "grad_norm": 6.719598770141602, "learning_rate": 6.885245901639345e-06, "loss": 3.4277, "step": 315 }, { "epoch": 1.0360655737704918, "grad_norm": 6.563772201538086, "learning_rate": 6.907103825136613e-06, "loss": 3.3545, "step": 316 }, { "epoch": 1.039344262295082, "grad_norm": 7.342610836029053, "learning_rate": 6.92896174863388e-06, "loss": 3.3477, "step": 317 }, { "epoch": 1.042622950819672, "grad_norm": 7.246837615966797, "learning_rate": 6.9508196721311484e-06, "loss": 3.4814, "step": 318 }, { "epoch": 1.0459016393442624, "grad_norm": 11.285829544067383, "learning_rate": 6.972677595628416e-06, "loss": 3.1719, "step": 319 }, { "epoch": 1.0491803278688525, "grad_norm": 6.613443851470947, "learning_rate": 6.994535519125684e-06, "loss": 3.5439, "step": 320 }, { "epoch": 1.0524590163934426, "grad_norm": 6.840619087219238, "learning_rate": 7.016393442622952e-06, "loss": 3.3525, "step": 321 }, { "epoch": 1.0557377049180328, "grad_norm": 7.807780742645264, "learning_rate": 7.038251366120219e-06, "loss": 3.3125, "step": 322 }, { "epoch": 1.0590163934426229, "grad_norm": 9.67931079864502, "learning_rate": 7.0601092896174875e-06, "loss": 3.4971, "step": 323 }, { "epoch": 1.0622950819672132, "grad_norm": 7.217667579650879, "learning_rate": 7.081967213114754e-06, "loss": 3.3213, "step": 324 }, { "epoch": 1.0655737704918034, "grad_norm": 7.655477046966553, "learning_rate": 7.103825136612022e-06, "loss": 3.2012, "step": 325 }, { "epoch": 1.0688524590163935, "grad_norm": 7.859333038330078, "learning_rate": 7.1256830601092895e-06, "loss": 3.2275, "step": 326 }, { "epoch": 1.0721311475409836, "grad_norm": 6.968972682952881, "learning_rate": 7.147540983606558e-06, "loss": 3.0742, "step": 327 }, { "epoch": 1.0754098360655737, "grad_norm": 6.933469772338867, "learning_rate": 7.169398907103826e-06, "loss": 3.2754, "step": 328 }, { "epoch": 1.0786885245901638, "grad_norm": 7.4891581535339355, "learning_rate": 7.191256830601093e-06, "loss": 3.1748, "step": 329 }, { "epoch": 1.0819672131147542, "grad_norm": 9.045150756835938, "learning_rate": 7.213114754098361e-06, "loss": 3.2617, "step": 330 }, { "epoch": 1.0852459016393443, "grad_norm": 8.630818367004395, "learning_rate": 7.2349726775956286e-06, "loss": 3.0894, "step": 331 }, { "epoch": 1.0885245901639344, "grad_norm": 6.107805252075195, "learning_rate": 7.256830601092897e-06, "loss": 3.3174, "step": 332 }, { "epoch": 1.0918032786885246, "grad_norm": 10.461400985717773, "learning_rate": 7.278688524590165e-06, "loss": 3.3828, "step": 333 }, { "epoch": 1.0950819672131147, "grad_norm": 8.165550231933594, "learning_rate": 7.300546448087432e-06, "loss": 3.3213, "step": 334 }, { "epoch": 1.098360655737705, "grad_norm": 7.256746292114258, "learning_rate": 7.3224043715847e-06, "loss": 3.3281, "step": 335 }, { "epoch": 1.1016393442622952, "grad_norm": 8.9988374710083, "learning_rate": 7.344262295081968e-06, "loss": 3.4541, "step": 336 }, { "epoch": 1.1049180327868853, "grad_norm": 7.413319110870361, "learning_rate": 7.366120218579236e-06, "loss": 3.1973, "step": 337 }, { "epoch": 1.1081967213114754, "grad_norm": 6.412325382232666, "learning_rate": 7.387978142076503e-06, "loss": 3.0605, "step": 338 }, { "epoch": 1.1114754098360655, "grad_norm": 5.906131267547607, "learning_rate": 7.409836065573771e-06, "loss": 3.2534, "step": 339 }, { "epoch": 1.1147540983606556, "grad_norm": 9.132383346557617, "learning_rate": 7.4316939890710394e-06, "loss": 3.46, "step": 340 }, { "epoch": 1.118032786885246, "grad_norm": 8.18934154510498, "learning_rate": 7.453551912568307e-06, "loss": 3.3682, "step": 341 }, { "epoch": 1.1213114754098361, "grad_norm": 5.677730083465576, "learning_rate": 7.475409836065575e-06, "loss": 3.1201, "step": 342 }, { "epoch": 1.1245901639344262, "grad_norm": 5.328213214874268, "learning_rate": 7.497267759562842e-06, "loss": 3.1494, "step": 343 }, { "epoch": 1.1278688524590164, "grad_norm": 7.565134525299072, "learning_rate": 7.51912568306011e-06, "loss": 3.4619, "step": 344 }, { "epoch": 1.1311475409836065, "grad_norm": 8.86823844909668, "learning_rate": 7.540983606557377e-06, "loss": 3.4121, "step": 345 }, { "epoch": 1.1344262295081968, "grad_norm": 8.37868881225586, "learning_rate": 7.562841530054645e-06, "loss": 3.3701, "step": 346 }, { "epoch": 1.137704918032787, "grad_norm": 7.8658223152160645, "learning_rate": 7.584699453551914e-06, "loss": 3.2432, "step": 347 }, { "epoch": 1.140983606557377, "grad_norm": 6.036170959472656, "learning_rate": 7.6065573770491804e-06, "loss": 3.4053, "step": 348 }, { "epoch": 1.1442622950819672, "grad_norm": 9.704240798950195, "learning_rate": 7.628415300546449e-06, "loss": 3.3701, "step": 349 }, { "epoch": 1.1475409836065573, "grad_norm": 7.064136028289795, "learning_rate": 7.650273224043716e-06, "loss": 3.3135, "step": 350 }, { "epoch": 1.1508196721311474, "grad_norm": 7.8385796546936035, "learning_rate": 7.672131147540985e-06, "loss": 3.1787, "step": 351 }, { "epoch": 1.1540983606557378, "grad_norm": 9.22484302520752, "learning_rate": 7.693989071038252e-06, "loss": 3.1689, "step": 352 }, { "epoch": 1.157377049180328, "grad_norm": 8.596626281738281, "learning_rate": 7.71584699453552e-06, "loss": 3.2285, "step": 353 }, { "epoch": 1.160655737704918, "grad_norm": 7.071377277374268, "learning_rate": 7.737704918032789e-06, "loss": 3.1211, "step": 354 }, { "epoch": 1.1639344262295082, "grad_norm": 10.608611106872559, "learning_rate": 7.759562841530056e-06, "loss": 3.3418, "step": 355 }, { "epoch": 1.1672131147540983, "grad_norm": 7.783306121826172, "learning_rate": 7.781420765027323e-06, "loss": 3.252, "step": 356 }, { "epoch": 1.1704918032786886, "grad_norm": 7.013043403625488, "learning_rate": 7.80327868852459e-06, "loss": 3.2451, "step": 357 }, { "epoch": 1.1737704918032787, "grad_norm": 6.960917949676514, "learning_rate": 7.825136612021858e-06, "loss": 3.3335, "step": 358 }, { "epoch": 1.1770491803278689, "grad_norm": 9.2056884765625, "learning_rate": 7.846994535519127e-06, "loss": 3.2715, "step": 359 }, { "epoch": 1.180327868852459, "grad_norm": 8.35204029083252, "learning_rate": 7.868852459016394e-06, "loss": 3.3223, "step": 360 }, { "epoch": 1.1836065573770491, "grad_norm": 7.012753486633301, "learning_rate": 7.890710382513661e-06, "loss": 3.2031, "step": 361 }, { "epoch": 1.1868852459016392, "grad_norm": 8.867563247680664, "learning_rate": 7.912568306010929e-06, "loss": 3.3271, "step": 362 }, { "epoch": 1.1901639344262296, "grad_norm": 7.510512828826904, "learning_rate": 7.934426229508198e-06, "loss": 3.1172, "step": 363 }, { "epoch": 1.1934426229508197, "grad_norm": 8.075335502624512, "learning_rate": 7.956284153005465e-06, "loss": 3.2471, "step": 364 }, { "epoch": 1.1967213114754098, "grad_norm": 6.094025611877441, "learning_rate": 7.978142076502732e-06, "loss": 3.332, "step": 365 }, { "epoch": 1.2, "grad_norm": 8.405220031738281, "learning_rate": 8.000000000000001e-06, "loss": 3.1133, "step": 366 }, { "epoch": 1.20327868852459, "grad_norm": 7.3422441482543945, "learning_rate": 8.021857923497269e-06, "loss": 3.1826, "step": 367 }, { "epoch": 1.2065573770491804, "grad_norm": 7.883208751678467, "learning_rate": 8.043715846994536e-06, "loss": 3.293, "step": 368 }, { "epoch": 1.2098360655737705, "grad_norm": 7.131059646606445, "learning_rate": 8.065573770491803e-06, "loss": 3.293, "step": 369 }, { "epoch": 1.2131147540983607, "grad_norm": 8.132555961608887, "learning_rate": 8.087431693989072e-06, "loss": 3.1289, "step": 370 }, { "epoch": 1.2163934426229508, "grad_norm": 10.832856178283691, "learning_rate": 8.10928961748634e-06, "loss": 3.2725, "step": 371 }, { "epoch": 1.219672131147541, "grad_norm": 7.161411285400391, "learning_rate": 8.131147540983607e-06, "loss": 3.1719, "step": 372 }, { "epoch": 1.222950819672131, "grad_norm": 6.2912492752075195, "learning_rate": 8.153005464480876e-06, "loss": 3.335, "step": 373 }, { "epoch": 1.2262295081967214, "grad_norm": 6.172238826751709, "learning_rate": 8.174863387978143e-06, "loss": 3.124, "step": 374 }, { "epoch": 1.2295081967213115, "grad_norm": 8.51742935180664, "learning_rate": 8.19672131147541e-06, "loss": 3.1455, "step": 375 }, { "epoch": 1.2327868852459016, "grad_norm": 8.487931251525879, "learning_rate": 8.218579234972678e-06, "loss": 3.3174, "step": 376 }, { "epoch": 1.2360655737704918, "grad_norm": 8.450814247131348, "learning_rate": 8.240437158469947e-06, "loss": 3.2061, "step": 377 }, { "epoch": 1.2393442622950819, "grad_norm": 7.521083354949951, "learning_rate": 8.262295081967214e-06, "loss": 3.2832, "step": 378 }, { "epoch": 1.2426229508196722, "grad_norm": 7.510092258453369, "learning_rate": 8.284153005464481e-06, "loss": 3.1904, "step": 379 }, { "epoch": 1.2459016393442623, "grad_norm": 7.772053241729736, "learning_rate": 8.30601092896175e-06, "loss": 2.9766, "step": 380 }, { "epoch": 1.2491803278688525, "grad_norm": 10.449871063232422, "learning_rate": 8.327868852459016e-06, "loss": 2.9795, "step": 381 }, { "epoch": 1.2524590163934426, "grad_norm": 5.180129528045654, "learning_rate": 8.349726775956285e-06, "loss": 3.1553, "step": 382 }, { "epoch": 1.2557377049180327, "grad_norm": 8.635339736938477, "learning_rate": 8.371584699453552e-06, "loss": 2.9268, "step": 383 }, { "epoch": 1.2590163934426228, "grad_norm": 10.486495018005371, "learning_rate": 8.39344262295082e-06, "loss": 3.2324, "step": 384 }, { "epoch": 1.2622950819672132, "grad_norm": 5.866320610046387, "learning_rate": 8.415300546448089e-06, "loss": 3.1602, "step": 385 }, { "epoch": 1.2655737704918033, "grad_norm": 6.6386213302612305, "learning_rate": 8.437158469945356e-06, "loss": 3.1914, "step": 386 }, { "epoch": 1.2688524590163934, "grad_norm": 8.030076026916504, "learning_rate": 8.459016393442623e-06, "loss": 3.0371, "step": 387 }, { "epoch": 1.2721311475409836, "grad_norm": 7.1930999755859375, "learning_rate": 8.48087431693989e-06, "loss": 3.4258, "step": 388 }, { "epoch": 1.275409836065574, "grad_norm": 10.632017135620117, "learning_rate": 8.50273224043716e-06, "loss": 3.4062, "step": 389 }, { "epoch": 1.278688524590164, "grad_norm": 7.430197238922119, "learning_rate": 8.524590163934427e-06, "loss": 3.3281, "step": 390 }, { "epoch": 1.2819672131147541, "grad_norm": 8.186731338500977, "learning_rate": 8.546448087431694e-06, "loss": 3.2783, "step": 391 }, { "epoch": 1.2852459016393443, "grad_norm": 9.68680191040039, "learning_rate": 8.568306010928963e-06, "loss": 3.373, "step": 392 }, { "epoch": 1.2885245901639344, "grad_norm": 5.870998382568359, "learning_rate": 8.59016393442623e-06, "loss": 3.1875, "step": 393 }, { "epoch": 1.2918032786885245, "grad_norm": 14.752120018005371, "learning_rate": 8.612021857923498e-06, "loss": 3.3457, "step": 394 }, { "epoch": 1.2950819672131146, "grad_norm": 5.772409439086914, "learning_rate": 8.633879781420765e-06, "loss": 3.1973, "step": 395 }, { "epoch": 1.298360655737705, "grad_norm": 6.008660316467285, "learning_rate": 8.655737704918034e-06, "loss": 3.2227, "step": 396 }, { "epoch": 1.301639344262295, "grad_norm": 7.292191982269287, "learning_rate": 8.677595628415301e-06, "loss": 3.084, "step": 397 }, { "epoch": 1.3049180327868852, "grad_norm": 6.463682174682617, "learning_rate": 8.699453551912569e-06, "loss": 3.2158, "step": 398 }, { "epoch": 1.3081967213114754, "grad_norm": 8.021401405334473, "learning_rate": 8.721311475409838e-06, "loss": 3.0439, "step": 399 }, { "epoch": 1.3114754098360657, "grad_norm": 9.434222221374512, "learning_rate": 8.743169398907103e-06, "loss": 3.3018, "step": 400 }, { "epoch": 1.3147540983606558, "grad_norm": 9.166703224182129, "learning_rate": 8.765027322404372e-06, "loss": 3.1816, "step": 401 }, { "epoch": 1.318032786885246, "grad_norm": 7.37322473526001, "learning_rate": 8.78688524590164e-06, "loss": 3.2168, "step": 402 }, { "epoch": 1.321311475409836, "grad_norm": 7.557761192321777, "learning_rate": 8.808743169398907e-06, "loss": 3.2969, "step": 403 }, { "epoch": 1.3245901639344262, "grad_norm": 7.885385990142822, "learning_rate": 8.830601092896176e-06, "loss": 3.0771, "step": 404 }, { "epoch": 1.3278688524590163, "grad_norm": 6.812907695770264, "learning_rate": 8.852459016393443e-06, "loss": 3.0303, "step": 405 }, { "epoch": 1.3311475409836064, "grad_norm": 7.278564929962158, "learning_rate": 8.87431693989071e-06, "loss": 3.1699, "step": 406 }, { "epoch": 1.3344262295081968, "grad_norm": 8.254582405090332, "learning_rate": 8.896174863387978e-06, "loss": 3.1509, "step": 407 }, { "epoch": 1.337704918032787, "grad_norm": 7.769384384155273, "learning_rate": 8.918032786885247e-06, "loss": 2.9375, "step": 408 }, { "epoch": 1.340983606557377, "grad_norm": 10.014665603637695, "learning_rate": 8.939890710382514e-06, "loss": 3.125, "step": 409 }, { "epoch": 1.3442622950819672, "grad_norm": 10.952587127685547, "learning_rate": 8.961748633879782e-06, "loss": 3.168, "step": 410 }, { "epoch": 1.3475409836065575, "grad_norm": 8.671469688415527, "learning_rate": 8.98360655737705e-06, "loss": 3.2236, "step": 411 }, { "epoch": 1.3508196721311476, "grad_norm": 7.624638557434082, "learning_rate": 9.005464480874318e-06, "loss": 3.3398, "step": 412 }, { "epoch": 1.3540983606557377, "grad_norm": 8.919805526733398, "learning_rate": 9.027322404371585e-06, "loss": 3.1084, "step": 413 }, { "epoch": 1.3573770491803279, "grad_norm": 6.81168794631958, "learning_rate": 9.049180327868853e-06, "loss": 3.1816, "step": 414 }, { "epoch": 1.360655737704918, "grad_norm": 8.779436111450195, "learning_rate": 9.071038251366122e-06, "loss": 3.2793, "step": 415 }, { "epoch": 1.3639344262295081, "grad_norm": 7.018307685852051, "learning_rate": 9.092896174863389e-06, "loss": 3.2783, "step": 416 }, { "epoch": 1.3672131147540982, "grad_norm": 7.2693867683410645, "learning_rate": 9.114754098360656e-06, "loss": 3.3486, "step": 417 }, { "epoch": 1.3704918032786886, "grad_norm": 6.59342098236084, "learning_rate": 9.136612021857925e-06, "loss": 3.0693, "step": 418 }, { "epoch": 1.3737704918032787, "grad_norm": 10.807698249816895, "learning_rate": 9.158469945355192e-06, "loss": 3.1777, "step": 419 }, { "epoch": 1.3770491803278688, "grad_norm": 8.815043449401855, "learning_rate": 9.18032786885246e-06, "loss": 3.1211, "step": 420 }, { "epoch": 1.380327868852459, "grad_norm": 11.670936584472656, "learning_rate": 9.202185792349727e-06, "loss": 3.0713, "step": 421 }, { "epoch": 1.3836065573770493, "grad_norm": 7.277181148529053, "learning_rate": 9.224043715846996e-06, "loss": 3.127, "step": 422 }, { "epoch": 1.3868852459016394, "grad_norm": 12.879034996032715, "learning_rate": 9.245901639344263e-06, "loss": 3.1709, "step": 423 }, { "epoch": 1.3901639344262295, "grad_norm": 11.65292739868164, "learning_rate": 9.26775956284153e-06, "loss": 3.1592, "step": 424 }, { "epoch": 1.3934426229508197, "grad_norm": 8.583467483520508, "learning_rate": 9.2896174863388e-06, "loss": 3.042, "step": 425 }, { "epoch": 1.3967213114754098, "grad_norm": 7.3521342277526855, "learning_rate": 9.311475409836065e-06, "loss": 3.2129, "step": 426 }, { "epoch": 1.4, "grad_norm": 6.942303657531738, "learning_rate": 9.333333333333334e-06, "loss": 3.2578, "step": 427 }, { "epoch": 1.40327868852459, "grad_norm": 13.020303726196289, "learning_rate": 9.355191256830602e-06, "loss": 3.1865, "step": 428 }, { "epoch": 1.4065573770491804, "grad_norm": 6.4604926109313965, "learning_rate": 9.377049180327869e-06, "loss": 3.1855, "step": 429 }, { "epoch": 1.4098360655737705, "grad_norm": 6.410093307495117, "learning_rate": 9.398907103825138e-06, "loss": 3.1445, "step": 430 }, { "epoch": 1.4131147540983606, "grad_norm": 7.316162586212158, "learning_rate": 9.420765027322405e-06, "loss": 3.1816, "step": 431 }, { "epoch": 1.4163934426229507, "grad_norm": 6.467679977416992, "learning_rate": 9.442622950819673e-06, "loss": 3.2461, "step": 432 }, { "epoch": 1.419672131147541, "grad_norm": 7.589075565338135, "learning_rate": 9.46448087431694e-06, "loss": 3.1113, "step": 433 }, { "epoch": 1.4229508196721312, "grad_norm": 8.083184242248535, "learning_rate": 9.486338797814209e-06, "loss": 3.2002, "step": 434 }, { "epoch": 1.4262295081967213, "grad_norm": 8.817448616027832, "learning_rate": 9.508196721311476e-06, "loss": 3.0791, "step": 435 }, { "epoch": 1.4295081967213115, "grad_norm": 11.268832206726074, "learning_rate": 9.530054644808743e-06, "loss": 3.2158, "step": 436 }, { "epoch": 1.4327868852459016, "grad_norm": 7.328851222991943, "learning_rate": 9.551912568306013e-06, "loss": 3.2246, "step": 437 }, { "epoch": 1.4360655737704917, "grad_norm": 8.936716079711914, "learning_rate": 9.57377049180328e-06, "loss": 2.9111, "step": 438 }, { "epoch": 1.4393442622950818, "grad_norm": 7.039148807525635, "learning_rate": 9.595628415300547e-06, "loss": 3.0137, "step": 439 }, { "epoch": 1.4426229508196722, "grad_norm": 7.636387348175049, "learning_rate": 9.617486338797814e-06, "loss": 3.1553, "step": 440 }, { "epoch": 1.4459016393442623, "grad_norm": 8.412440299987793, "learning_rate": 9.639344262295083e-06, "loss": 3.0977, "step": 441 }, { "epoch": 1.4491803278688524, "grad_norm": 7.210337162017822, "learning_rate": 9.66120218579235e-06, "loss": 3.1279, "step": 442 }, { "epoch": 1.4524590163934425, "grad_norm": 9.598974227905273, "learning_rate": 9.683060109289618e-06, "loss": 3.3086, "step": 443 }, { "epoch": 1.455737704918033, "grad_norm": 7.735237121582031, "learning_rate": 9.704918032786887e-06, "loss": 3.1426, "step": 444 }, { "epoch": 1.459016393442623, "grad_norm": 8.490365028381348, "learning_rate": 9.726775956284153e-06, "loss": 3.1592, "step": 445 }, { "epoch": 1.4622950819672131, "grad_norm": 6.358397006988525, "learning_rate": 9.748633879781422e-06, "loss": 2.9062, "step": 446 }, { "epoch": 1.4655737704918033, "grad_norm": 8.213370323181152, "learning_rate": 9.770491803278689e-06, "loss": 3.1494, "step": 447 }, { "epoch": 1.4688524590163934, "grad_norm": 9.127837181091309, "learning_rate": 9.792349726775956e-06, "loss": 3.3789, "step": 448 }, { "epoch": 1.4721311475409835, "grad_norm": 6.8252763748168945, "learning_rate": 9.814207650273225e-06, "loss": 3.333, "step": 449 }, { "epoch": 1.4754098360655736, "grad_norm": 8.278053283691406, "learning_rate": 9.836065573770493e-06, "loss": 3.1895, "step": 450 }, { "epoch": 1.478688524590164, "grad_norm": 18.759681701660156, "learning_rate": 9.85792349726776e-06, "loss": 3.1152, "step": 451 }, { "epoch": 1.481967213114754, "grad_norm": 8.832106590270996, "learning_rate": 9.879781420765027e-06, "loss": 3.1865, "step": 452 }, { "epoch": 1.4852459016393442, "grad_norm": 7.454258918762207, "learning_rate": 9.901639344262296e-06, "loss": 3.0928, "step": 453 }, { "epoch": 1.4885245901639343, "grad_norm": 7.323270320892334, "learning_rate": 9.923497267759564e-06, "loss": 3.0918, "step": 454 }, { "epoch": 1.4918032786885247, "grad_norm": 7.288318157196045, "learning_rate": 9.945355191256831e-06, "loss": 2.896, "step": 455 }, { "epoch": 1.4950819672131148, "grad_norm": 7.476467132568359, "learning_rate": 9.9672131147541e-06, "loss": 3.0068, "step": 456 }, { "epoch": 1.498360655737705, "grad_norm": 8.226696014404297, "learning_rate": 9.989071038251367e-06, "loss": 3.1523, "step": 457 }, { "epoch": 1.501639344262295, "grad_norm": 5.282556533813477, "learning_rate": 1.0010928961748634e-05, "loss": 3.0869, "step": 458 }, { "epoch": 1.5049180327868852, "grad_norm": 7.260207176208496, "learning_rate": 1.0032786885245902e-05, "loss": 3.1924, "step": 459 }, { "epoch": 1.5081967213114753, "grad_norm": 9.529824256896973, "learning_rate": 1.005464480874317e-05, "loss": 2.9712, "step": 460 }, { "epoch": 1.5114754098360654, "grad_norm": 6.16287899017334, "learning_rate": 1.0076502732240438e-05, "loss": 3.2197, "step": 461 }, { "epoch": 1.5147540983606558, "grad_norm": 7.414689064025879, "learning_rate": 1.0098360655737707e-05, "loss": 3.0498, "step": 462 }, { "epoch": 1.518032786885246, "grad_norm": 9.589216232299805, "learning_rate": 1.0120218579234973e-05, "loss": 3.0352, "step": 463 }, { "epoch": 1.521311475409836, "grad_norm": 8.72669506072998, "learning_rate": 1.0142076502732242e-05, "loss": 3.0527, "step": 464 }, { "epoch": 1.5245901639344264, "grad_norm": 7.771150588989258, "learning_rate": 1.0163934426229509e-05, "loss": 3.0986, "step": 465 }, { "epoch": 1.5278688524590165, "grad_norm": 7.726975917816162, "learning_rate": 1.0185792349726778e-05, "loss": 3.2178, "step": 466 }, { "epoch": 1.5311475409836066, "grad_norm": 7.3010663986206055, "learning_rate": 1.0207650273224044e-05, "loss": 3.2373, "step": 467 }, { "epoch": 1.5344262295081967, "grad_norm": 9.289816856384277, "learning_rate": 1.0229508196721311e-05, "loss": 3.1943, "step": 468 }, { "epoch": 1.5377049180327869, "grad_norm": 8.855957984924316, "learning_rate": 1.025136612021858e-05, "loss": 3.1816, "step": 469 }, { "epoch": 1.540983606557377, "grad_norm": 7.273669719696045, "learning_rate": 1.0273224043715849e-05, "loss": 3.0215, "step": 470 }, { "epoch": 1.544262295081967, "grad_norm": 12.998229026794434, "learning_rate": 1.0295081967213116e-05, "loss": 3.1016, "step": 471 }, { "epoch": 1.5475409836065572, "grad_norm": 8.234139442443848, "learning_rate": 1.0316939890710382e-05, "loss": 3.0869, "step": 472 }, { "epoch": 1.5508196721311476, "grad_norm": 9.508413314819336, "learning_rate": 1.0338797814207651e-05, "loss": 2.9023, "step": 473 }, { "epoch": 1.5540983606557377, "grad_norm": 5.5015950202941895, "learning_rate": 1.0360655737704918e-05, "loss": 2.9053, "step": 474 }, { "epoch": 1.5573770491803278, "grad_norm": 9.956327438354492, "learning_rate": 1.0382513661202187e-05, "loss": 2.9023, "step": 475 }, { "epoch": 1.5606557377049182, "grad_norm": 7.4480485916137695, "learning_rate": 1.0404371584699455e-05, "loss": 2.9858, "step": 476 }, { "epoch": 1.5639344262295083, "grad_norm": 7.231725215911865, "learning_rate": 1.0426229508196722e-05, "loss": 3.1846, "step": 477 }, { "epoch": 1.5672131147540984, "grad_norm": 9.158205032348633, "learning_rate": 1.0448087431693989e-05, "loss": 3.1152, "step": 478 }, { "epoch": 1.5704918032786885, "grad_norm": 10.097272872924805, "learning_rate": 1.0469945355191258e-05, "loss": 3.1064, "step": 479 }, { "epoch": 1.5737704918032787, "grad_norm": 8.471349716186523, "learning_rate": 1.0491803278688525e-05, "loss": 3.2178, "step": 480 }, { "epoch": 1.5770491803278688, "grad_norm": 9.068158149719238, "learning_rate": 1.0513661202185794e-05, "loss": 2.8926, "step": 481 }, { "epoch": 1.580327868852459, "grad_norm": 8.110392570495605, "learning_rate": 1.053551912568306e-05, "loss": 2.8618, "step": 482 }, { "epoch": 1.583606557377049, "grad_norm": 6.56627082824707, "learning_rate": 1.0557377049180329e-05, "loss": 2.9883, "step": 483 }, { "epoch": 1.5868852459016394, "grad_norm": 10.498800277709961, "learning_rate": 1.0579234972677596e-05, "loss": 3.0371, "step": 484 }, { "epoch": 1.5901639344262295, "grad_norm": 8.715287208557129, "learning_rate": 1.0601092896174865e-05, "loss": 3.0332, "step": 485 }, { "epoch": 1.5934426229508196, "grad_norm": 7.810750484466553, "learning_rate": 1.0622950819672131e-05, "loss": 3.0879, "step": 486 }, { "epoch": 1.59672131147541, "grad_norm": 6.240459442138672, "learning_rate": 1.06448087431694e-05, "loss": 3.0361, "step": 487 }, { "epoch": 1.6, "grad_norm": 9.219979286193848, "learning_rate": 1.0666666666666667e-05, "loss": 3.0244, "step": 488 }, { "epoch": 1.6032786885245902, "grad_norm": 5.915600776672363, "learning_rate": 1.0688524590163936e-05, "loss": 2.9385, "step": 489 }, { "epoch": 1.6065573770491803, "grad_norm": 8.248745918273926, "learning_rate": 1.0710382513661204e-05, "loss": 2.9736, "step": 490 }, { "epoch": 1.6098360655737705, "grad_norm": 7.134410381317139, "learning_rate": 1.073224043715847e-05, "loss": 3.0557, "step": 491 }, { "epoch": 1.6131147540983606, "grad_norm": 7.792252540588379, "learning_rate": 1.0754098360655738e-05, "loss": 3.0391, "step": 492 }, { "epoch": 1.6163934426229507, "grad_norm": 13.239994049072266, "learning_rate": 1.0775956284153006e-05, "loss": 3.0615, "step": 493 }, { "epoch": 1.6196721311475408, "grad_norm": 4.773660659790039, "learning_rate": 1.0797814207650275e-05, "loss": 2.8706, "step": 494 }, { "epoch": 1.6229508196721312, "grad_norm": 8.649115562438965, "learning_rate": 1.0819672131147544e-05, "loss": 3.0889, "step": 495 }, { "epoch": 1.6262295081967213, "grad_norm": 11.081911087036133, "learning_rate": 1.084153005464481e-05, "loss": 3.1357, "step": 496 }, { "epoch": 1.6295081967213116, "grad_norm": 8.279897689819336, "learning_rate": 1.0863387978142076e-05, "loss": 2.9951, "step": 497 }, { "epoch": 1.6327868852459018, "grad_norm": 7.361734867095947, "learning_rate": 1.0885245901639345e-05, "loss": 3.0781, "step": 498 }, { "epoch": 1.6360655737704919, "grad_norm": 7.211650371551514, "learning_rate": 1.0907103825136613e-05, "loss": 3.0908, "step": 499 }, { "epoch": 1.639344262295082, "grad_norm": 9.744074821472168, "learning_rate": 1.0928961748633882e-05, "loss": 3.0337, "step": 500 }, { "epoch": 1.6426229508196721, "grad_norm": 12.223567008972168, "learning_rate": 1.0950819672131147e-05, "loss": 2.8159, "step": 501 }, { "epoch": 1.6459016393442623, "grad_norm": 10.419316291809082, "learning_rate": 1.0972677595628416e-05, "loss": 2.9863, "step": 502 }, { "epoch": 1.6491803278688524, "grad_norm": 8.777406692504883, "learning_rate": 1.0994535519125684e-05, "loss": 3.0791, "step": 503 }, { "epoch": 1.6524590163934425, "grad_norm": 6.492453098297119, "learning_rate": 1.1016393442622953e-05, "loss": 3.0371, "step": 504 }, { "epoch": 1.6557377049180326, "grad_norm": 6.75477409362793, "learning_rate": 1.1038251366120218e-05, "loss": 3.0547, "step": 505 }, { "epoch": 1.659016393442623, "grad_norm": 7.675685882568359, "learning_rate": 1.1060109289617487e-05, "loss": 2.938, "step": 506 }, { "epoch": 1.662295081967213, "grad_norm": 6.355978965759277, "learning_rate": 1.1081967213114755e-05, "loss": 3.3662, "step": 507 }, { "epoch": 1.6655737704918034, "grad_norm": 6.885853290557861, "learning_rate": 1.1103825136612024e-05, "loss": 2.876, "step": 508 }, { "epoch": 1.6688524590163936, "grad_norm": 11.049001693725586, "learning_rate": 1.1125683060109291e-05, "loss": 2.999, "step": 509 }, { "epoch": 1.6721311475409837, "grad_norm": 8.036545753479004, "learning_rate": 1.1147540983606557e-05, "loss": 3.1836, "step": 510 }, { "epoch": 1.6754098360655738, "grad_norm": 7.571477890014648, "learning_rate": 1.1169398907103826e-05, "loss": 2.8398, "step": 511 }, { "epoch": 1.678688524590164, "grad_norm": 7.599063873291016, "learning_rate": 1.1191256830601095e-05, "loss": 3.0239, "step": 512 }, { "epoch": 1.681967213114754, "grad_norm": 8.331276893615723, "learning_rate": 1.1213114754098362e-05, "loss": 3.085, "step": 513 }, { "epoch": 1.6852459016393442, "grad_norm": 9.476777076721191, "learning_rate": 1.1234972677595631e-05, "loss": 2.897, "step": 514 }, { "epoch": 1.6885245901639343, "grad_norm": 7.061502933502197, "learning_rate": 1.1256830601092897e-05, "loss": 3.167, "step": 515 }, { "epoch": 1.6918032786885244, "grad_norm": 6.935581207275391, "learning_rate": 1.1278688524590164e-05, "loss": 3.1191, "step": 516 }, { "epoch": 1.6950819672131148, "grad_norm": 8.577431678771973, "learning_rate": 1.1300546448087433e-05, "loss": 2.8706, "step": 517 }, { "epoch": 1.698360655737705, "grad_norm": 10.003890991210938, "learning_rate": 1.13224043715847e-05, "loss": 2.9902, "step": 518 }, { "epoch": 1.7016393442622952, "grad_norm": 7.193459987640381, "learning_rate": 1.134426229508197e-05, "loss": 3.1035, "step": 519 }, { "epoch": 1.7049180327868854, "grad_norm": 10.009678840637207, "learning_rate": 1.1366120218579235e-05, "loss": 3.1768, "step": 520 }, { "epoch": 1.7081967213114755, "grad_norm": 8.841766357421875, "learning_rate": 1.1387978142076504e-05, "loss": 3.168, "step": 521 }, { "epoch": 1.7114754098360656, "grad_norm": 9.256268501281738, "learning_rate": 1.1409836065573771e-05, "loss": 3.1016, "step": 522 }, { "epoch": 1.7147540983606557, "grad_norm": 9.372642517089844, "learning_rate": 1.143169398907104e-05, "loss": 3.1729, "step": 523 }, { "epoch": 1.7180327868852459, "grad_norm": 7.072874069213867, "learning_rate": 1.1453551912568306e-05, "loss": 2.9111, "step": 524 }, { "epoch": 1.721311475409836, "grad_norm": 8.845539093017578, "learning_rate": 1.1475409836065575e-05, "loss": 3.0273, "step": 525 }, { "epoch": 1.724590163934426, "grad_norm": 9.354480743408203, "learning_rate": 1.1497267759562842e-05, "loss": 2.9365, "step": 526 }, { "epoch": 1.7278688524590164, "grad_norm": 11.709383964538574, "learning_rate": 1.1519125683060111e-05, "loss": 3.0342, "step": 527 }, { "epoch": 1.7311475409836066, "grad_norm": 10.795539855957031, "learning_rate": 1.1540983606557378e-05, "loss": 3.0732, "step": 528 }, { "epoch": 1.7344262295081967, "grad_norm": 10.706090927124023, "learning_rate": 1.1562841530054646e-05, "loss": 3.0488, "step": 529 }, { "epoch": 1.737704918032787, "grad_norm": 9.368484497070312, "learning_rate": 1.1584699453551913e-05, "loss": 3.1465, "step": 530 }, { "epoch": 1.7409836065573772, "grad_norm": 8.194517135620117, "learning_rate": 1.1606557377049182e-05, "loss": 2.9326, "step": 531 }, { "epoch": 1.7442622950819673, "grad_norm": 9.181654930114746, "learning_rate": 1.162841530054645e-05, "loss": 3.041, "step": 532 }, { "epoch": 1.7475409836065574, "grad_norm": 8.885173797607422, "learning_rate": 1.1650273224043718e-05, "loss": 2.958, "step": 533 }, { "epoch": 1.7508196721311475, "grad_norm": 6.891660690307617, "learning_rate": 1.1672131147540984e-05, "loss": 3.0249, "step": 534 }, { "epoch": 1.7540983606557377, "grad_norm": 8.627812385559082, "learning_rate": 1.1693989071038251e-05, "loss": 2.9839, "step": 535 }, { "epoch": 1.7573770491803278, "grad_norm": 9.876832962036133, "learning_rate": 1.171584699453552e-05, "loss": 3.1143, "step": 536 }, { "epoch": 1.760655737704918, "grad_norm": 6.01844596862793, "learning_rate": 1.173770491803279e-05, "loss": 2.8623, "step": 537 }, { "epoch": 1.7639344262295082, "grad_norm": 7.731630802154541, "learning_rate": 1.1759562841530057e-05, "loss": 2.9912, "step": 538 }, { "epoch": 1.7672131147540984, "grad_norm": 7.613369941711426, "learning_rate": 1.1781420765027322e-05, "loss": 2.9609, "step": 539 }, { "epoch": 1.7704918032786885, "grad_norm": 6.943933963775635, "learning_rate": 1.1803278688524591e-05, "loss": 3.2109, "step": 540 }, { "epoch": 1.7737704918032788, "grad_norm": 7.462012767791748, "learning_rate": 1.1825136612021858e-05, "loss": 3.147, "step": 541 }, { "epoch": 1.777049180327869, "grad_norm": 8.614116668701172, "learning_rate": 1.1846994535519127e-05, "loss": 3.1562, "step": 542 }, { "epoch": 1.780327868852459, "grad_norm": 8.50653076171875, "learning_rate": 1.1868852459016393e-05, "loss": 2.916, "step": 543 }, { "epoch": 1.7836065573770492, "grad_norm": 8.910242080688477, "learning_rate": 1.1890710382513662e-05, "loss": 2.9141, "step": 544 }, { "epoch": 1.7868852459016393, "grad_norm": 12.94006633758545, "learning_rate": 1.191256830601093e-05, "loss": 3.0283, "step": 545 }, { "epoch": 1.7901639344262295, "grad_norm": 6.548953533172607, "learning_rate": 1.1934426229508198e-05, "loss": 2.9619, "step": 546 }, { "epoch": 1.7934426229508196, "grad_norm": 7.512703895568848, "learning_rate": 1.1956284153005466e-05, "loss": 3.0576, "step": 547 }, { "epoch": 1.7967213114754097, "grad_norm": 11.134489059448242, "learning_rate": 1.1978142076502733e-05, "loss": 2.8511, "step": 548 }, { "epoch": 1.8, "grad_norm": 8.355422973632812, "learning_rate": 1.2e-05, "loss": 2.8867, "step": 549 }, { "epoch": 1.8032786885245902, "grad_norm": 10.706781387329102, "learning_rate": 1.202185792349727e-05, "loss": 3.0088, "step": 550 }, { "epoch": 1.8065573770491803, "grad_norm": 12.192737579345703, "learning_rate": 1.2043715846994537e-05, "loss": 2.7666, "step": 551 }, { "epoch": 1.8098360655737706, "grad_norm": 8.282095909118652, "learning_rate": 1.2065573770491806e-05, "loss": 2.9561, "step": 552 }, { "epoch": 1.8131147540983608, "grad_norm": 8.472875595092773, "learning_rate": 1.2087431693989071e-05, "loss": 3.123, "step": 553 }, { "epoch": 1.8163934426229509, "grad_norm": 5.796938896179199, "learning_rate": 1.210928961748634e-05, "loss": 2.8516, "step": 554 }, { "epoch": 1.819672131147541, "grad_norm": 34.678680419921875, "learning_rate": 1.2131147540983608e-05, "loss": 2.7969, "step": 555 }, { "epoch": 1.8229508196721311, "grad_norm": 8.78286075592041, "learning_rate": 1.2153005464480877e-05, "loss": 3.0996, "step": 556 }, { "epoch": 1.8262295081967213, "grad_norm": 10.316128730773926, "learning_rate": 1.2174863387978144e-05, "loss": 2.9932, "step": 557 }, { "epoch": 1.8295081967213114, "grad_norm": 15.336982727050781, "learning_rate": 1.219672131147541e-05, "loss": 3.0356, "step": 558 }, { "epoch": 1.8327868852459015, "grad_norm": 10.696682929992676, "learning_rate": 1.2218579234972678e-05, "loss": 3.0371, "step": 559 }, { "epoch": 1.8360655737704918, "grad_norm": 12.45301342010498, "learning_rate": 1.2240437158469946e-05, "loss": 3.1064, "step": 560 }, { "epoch": 1.839344262295082, "grad_norm": 11.073538780212402, "learning_rate": 1.2262295081967215e-05, "loss": 2.9102, "step": 561 }, { "epoch": 1.842622950819672, "grad_norm": 9.035368919372559, "learning_rate": 1.228415300546448e-05, "loss": 2.9746, "step": 562 }, { "epoch": 1.8459016393442624, "grad_norm": 7.247426986694336, "learning_rate": 1.230601092896175e-05, "loss": 2.7539, "step": 563 }, { "epoch": 1.8491803278688526, "grad_norm": 8.459596633911133, "learning_rate": 1.2327868852459017e-05, "loss": 3.0586, "step": 564 }, { "epoch": 1.8524590163934427, "grad_norm": 7.890843868255615, "learning_rate": 1.2349726775956286e-05, "loss": 2.7549, "step": 565 }, { "epoch": 1.8557377049180328, "grad_norm": 7.545921802520752, "learning_rate": 1.2371584699453553e-05, "loss": 2.9775, "step": 566 }, { "epoch": 1.859016393442623, "grad_norm": 13.730195045471191, "learning_rate": 1.239344262295082e-05, "loss": 2.9492, "step": 567 }, { "epoch": 1.862295081967213, "grad_norm": 6.649563312530518, "learning_rate": 1.2415300546448088e-05, "loss": 2.8486, "step": 568 }, { "epoch": 1.8655737704918032, "grad_norm": 8.950404167175293, "learning_rate": 1.2437158469945357e-05, "loss": 3.0381, "step": 569 }, { "epoch": 1.8688524590163933, "grad_norm": 10.147465705871582, "learning_rate": 1.2459016393442624e-05, "loss": 2.9365, "step": 570 }, { "epoch": 1.8721311475409836, "grad_norm": 9.774699211120605, "learning_rate": 1.2480874316939893e-05, "loss": 2.8984, "step": 571 }, { "epoch": 1.8754098360655738, "grad_norm": 8.203218460083008, "learning_rate": 1.2502732240437159e-05, "loss": 2.9316, "step": 572 }, { "epoch": 1.8786885245901639, "grad_norm": 10.311019897460938, "learning_rate": 1.2524590163934428e-05, "loss": 3.0684, "step": 573 }, { "epoch": 1.8819672131147542, "grad_norm": 10.506092071533203, "learning_rate": 1.2546448087431695e-05, "loss": 3.1494, "step": 574 }, { "epoch": 1.8852459016393444, "grad_norm": 8.820181846618652, "learning_rate": 1.2568306010928964e-05, "loss": 2.9619, "step": 575 }, { "epoch": 1.8885245901639345, "grad_norm": 8.202141761779785, "learning_rate": 1.2590163934426231e-05, "loss": 3.1377, "step": 576 }, { "epoch": 1.8918032786885246, "grad_norm": 12.081815719604492, "learning_rate": 1.2612021857923497e-05, "loss": 3.0645, "step": 577 }, { "epoch": 1.8950819672131147, "grad_norm": 9.20134162902832, "learning_rate": 1.2633879781420766e-05, "loss": 2.8701, "step": 578 }, { "epoch": 1.8983606557377048, "grad_norm": 8.229741096496582, "learning_rate": 1.2655737704918035e-05, "loss": 2.7622, "step": 579 }, { "epoch": 1.901639344262295, "grad_norm": 9.243247985839844, "learning_rate": 1.2677595628415302e-05, "loss": 3.0146, "step": 580 }, { "epoch": 1.904918032786885, "grad_norm": 6.2619476318359375, "learning_rate": 1.2699453551912568e-05, "loss": 2.9014, "step": 581 }, { "epoch": 1.9081967213114754, "grad_norm": 9.404212951660156, "learning_rate": 1.2721311475409837e-05, "loss": 2.8491, "step": 582 }, { "epoch": 1.9114754098360656, "grad_norm": 9.562809944152832, "learning_rate": 1.2743169398907104e-05, "loss": 2.9619, "step": 583 }, { "epoch": 1.9147540983606557, "grad_norm": 7.288749694824219, "learning_rate": 1.2765027322404373e-05, "loss": 3.0127, "step": 584 }, { "epoch": 1.918032786885246, "grad_norm": 12.618924140930176, "learning_rate": 1.2786885245901642e-05, "loss": 2.9365, "step": 585 }, { "epoch": 1.9213114754098362, "grad_norm": 11.679717063903809, "learning_rate": 1.2808743169398908e-05, "loss": 2.8765, "step": 586 }, { "epoch": 1.9245901639344263, "grad_norm": 9.34235668182373, "learning_rate": 1.2830601092896175e-05, "loss": 3.0527, "step": 587 }, { "epoch": 1.9278688524590164, "grad_norm": 7.485538959503174, "learning_rate": 1.2852459016393444e-05, "loss": 3.0195, "step": 588 }, { "epoch": 1.9311475409836065, "grad_norm": 11.429547309875488, "learning_rate": 1.2874316939890711e-05, "loss": 2.9023, "step": 589 }, { "epoch": 1.9344262295081966, "grad_norm": 11.830915451049805, "learning_rate": 1.289617486338798e-05, "loss": 2.9087, "step": 590 }, { "epoch": 1.9377049180327868, "grad_norm": 8.940601348876953, "learning_rate": 1.2918032786885246e-05, "loss": 3.1436, "step": 591 }, { "epoch": 1.940983606557377, "grad_norm": 10.751770973205566, "learning_rate": 1.2939890710382515e-05, "loss": 2.9395, "step": 592 }, { "epoch": 1.9442622950819672, "grad_norm": 9.14109992980957, "learning_rate": 1.2961748633879782e-05, "loss": 2.8379, "step": 593 }, { "epoch": 1.9475409836065574, "grad_norm": 11.651500701904297, "learning_rate": 1.2983606557377051e-05, "loss": 2.9355, "step": 594 }, { "epoch": 1.9508196721311475, "grad_norm": 9.323256492614746, "learning_rate": 1.3005464480874317e-05, "loss": 2.916, "step": 595 }, { "epoch": 1.9540983606557378, "grad_norm": 8.094505310058594, "learning_rate": 1.3027322404371586e-05, "loss": 2.875, "step": 596 }, { "epoch": 1.957377049180328, "grad_norm": 12.412293434143066, "learning_rate": 1.3049180327868853e-05, "loss": 2.9248, "step": 597 }, { "epoch": 1.960655737704918, "grad_norm": 6.592497825622559, "learning_rate": 1.3071038251366122e-05, "loss": 2.9424, "step": 598 }, { "epoch": 1.9639344262295082, "grad_norm": 7.575547218322754, "learning_rate": 1.309289617486339e-05, "loss": 3.249, "step": 599 }, { "epoch": 1.9672131147540983, "grad_norm": 9.228963851928711, "learning_rate": 1.3114754098360655e-05, "loss": 2.9355, "step": 600 }, { "epoch": 1.9704918032786884, "grad_norm": 8.920585632324219, "learning_rate": 1.3136612021857924e-05, "loss": 2.8442, "step": 601 }, { "epoch": 1.9737704918032786, "grad_norm": 9.234753608703613, "learning_rate": 1.3158469945355193e-05, "loss": 2.7393, "step": 602 }, { "epoch": 1.9770491803278687, "grad_norm": 6.251789569854736, "learning_rate": 1.318032786885246e-05, "loss": 2.9004, "step": 603 }, { "epoch": 1.980327868852459, "grad_norm": 8.842354774475098, "learning_rate": 1.320218579234973e-05, "loss": 2.8218, "step": 604 }, { "epoch": 1.9836065573770492, "grad_norm": 8.381169319152832, "learning_rate": 1.3224043715846995e-05, "loss": 3.0166, "step": 605 }, { "epoch": 1.9868852459016395, "grad_norm": 11.622941970825195, "learning_rate": 1.3245901639344262e-05, "loss": 3.0371, "step": 606 }, { "epoch": 1.9901639344262296, "grad_norm": 14.927705764770508, "learning_rate": 1.3267759562841531e-05, "loss": 2.9531, "step": 607 }, { "epoch": 1.9934426229508198, "grad_norm": 8.419644355773926, "learning_rate": 1.3289617486338799e-05, "loss": 2.8735, "step": 608 }, { "epoch": 1.9967213114754099, "grad_norm": 8.18002700805664, "learning_rate": 1.3311475409836068e-05, "loss": 3.1074, "step": 609 }, { "epoch": 2.0, "grad_norm": 10.592801094055176, "learning_rate": 1.3333333333333333e-05, "loss": 2.9746, "step": 610 }, { "epoch": 2.00327868852459, "grad_norm": 12.578067779541016, "learning_rate": 1.3355191256830602e-05, "loss": 3.0029, "step": 611 }, { "epoch": 2.0065573770491802, "grad_norm": 9.11426067352295, "learning_rate": 1.337704918032787e-05, "loss": 2.7979, "step": 612 }, { "epoch": 2.0098360655737704, "grad_norm": 9.439793586730957, "learning_rate": 1.3398907103825139e-05, "loss": 2.8662, "step": 613 }, { "epoch": 2.0131147540983605, "grad_norm": 7.33017110824585, "learning_rate": 1.3420765027322404e-05, "loss": 2.9951, "step": 614 }, { "epoch": 2.0163934426229506, "grad_norm": 9.140331268310547, "learning_rate": 1.3442622950819673e-05, "loss": 2.8706, "step": 615 }, { "epoch": 2.019672131147541, "grad_norm": 10.5852632522583, "learning_rate": 1.346448087431694e-05, "loss": 2.8462, "step": 616 }, { "epoch": 2.0229508196721313, "grad_norm": 10.432097434997559, "learning_rate": 1.348633879781421e-05, "loss": 2.9478, "step": 617 }, { "epoch": 2.0262295081967214, "grad_norm": 10.92910099029541, "learning_rate": 1.3508196721311477e-05, "loss": 2.7866, "step": 618 }, { "epoch": 2.0295081967213116, "grad_norm": 8.120308876037598, "learning_rate": 1.3530054644808742e-05, "loss": 2.8311, "step": 619 }, { "epoch": 2.0327868852459017, "grad_norm": 9.891181945800781, "learning_rate": 1.3551912568306011e-05, "loss": 2.9219, "step": 620 }, { "epoch": 2.036065573770492, "grad_norm": 6.828457832336426, "learning_rate": 1.357377049180328e-05, "loss": 2.8652, "step": 621 }, { "epoch": 2.039344262295082, "grad_norm": 8.373826026916504, "learning_rate": 1.3595628415300548e-05, "loss": 3.0361, "step": 622 }, { "epoch": 2.042622950819672, "grad_norm": 9.538612365722656, "learning_rate": 1.3617486338797817e-05, "loss": 2.9688, "step": 623 }, { "epoch": 2.045901639344262, "grad_norm": 9.133373260498047, "learning_rate": 1.3639344262295082e-05, "loss": 2.8232, "step": 624 }, { "epoch": 2.0491803278688523, "grad_norm": 6.526598930358887, "learning_rate": 1.366120218579235e-05, "loss": 2.793, "step": 625 }, { "epoch": 2.0524590163934424, "grad_norm": 6.870913982391357, "learning_rate": 1.3683060109289619e-05, "loss": 2.8936, "step": 626 }, { "epoch": 2.055737704918033, "grad_norm": 10.34265422821045, "learning_rate": 1.3704918032786888e-05, "loss": 3.0005, "step": 627 }, { "epoch": 2.059016393442623, "grad_norm": 9.242664337158203, "learning_rate": 1.3726775956284155e-05, "loss": 2.9434, "step": 628 }, { "epoch": 2.0622950819672132, "grad_norm": 7.90790319442749, "learning_rate": 1.374863387978142e-05, "loss": 2.7969, "step": 629 }, { "epoch": 2.0655737704918034, "grad_norm": 8.831404685974121, "learning_rate": 1.377049180327869e-05, "loss": 2.7856, "step": 630 }, { "epoch": 2.0688524590163935, "grad_norm": 8.074416160583496, "learning_rate": 1.3792349726775957e-05, "loss": 2.7998, "step": 631 }, { "epoch": 2.0721311475409836, "grad_norm": 7.026223182678223, "learning_rate": 1.3814207650273226e-05, "loss": 2.915, "step": 632 }, { "epoch": 2.0754098360655737, "grad_norm": 10.131784439086914, "learning_rate": 1.3836065573770492e-05, "loss": 2.8589, "step": 633 }, { "epoch": 2.078688524590164, "grad_norm": 12.220357894897461, "learning_rate": 1.385792349726776e-05, "loss": 2.8643, "step": 634 }, { "epoch": 2.081967213114754, "grad_norm": 8.521344184875488, "learning_rate": 1.3879781420765028e-05, "loss": 2.8867, "step": 635 }, { "epoch": 2.085245901639344, "grad_norm": 9.801836013793945, "learning_rate": 1.3901639344262297e-05, "loss": 2.8848, "step": 636 }, { "epoch": 2.088524590163934, "grad_norm": 10.907489776611328, "learning_rate": 1.3923497267759564e-05, "loss": 2.918, "step": 637 }, { "epoch": 2.091803278688525, "grad_norm": 7.224272727966309, "learning_rate": 1.3945355191256832e-05, "loss": 2.9844, "step": 638 }, { "epoch": 2.095081967213115, "grad_norm": 11.709176063537598, "learning_rate": 1.3967213114754099e-05, "loss": 2.8486, "step": 639 }, { "epoch": 2.098360655737705, "grad_norm": 9.210360527038574, "learning_rate": 1.3989071038251368e-05, "loss": 2.9429, "step": 640 }, { "epoch": 2.101639344262295, "grad_norm": 8.488046646118164, "learning_rate": 1.4010928961748635e-05, "loss": 2.8682, "step": 641 }, { "epoch": 2.1049180327868853, "grad_norm": 9.414198875427246, "learning_rate": 1.4032786885245904e-05, "loss": 2.7842, "step": 642 }, { "epoch": 2.1081967213114754, "grad_norm": 13.909337043762207, "learning_rate": 1.405464480874317e-05, "loss": 2.6919, "step": 643 }, { "epoch": 2.1114754098360655, "grad_norm": 13.214593887329102, "learning_rate": 1.4076502732240439e-05, "loss": 2.9697, "step": 644 }, { "epoch": 2.1147540983606556, "grad_norm": 7.7530131340026855, "learning_rate": 1.4098360655737706e-05, "loss": 2.8643, "step": 645 }, { "epoch": 2.1180327868852458, "grad_norm": 6.9398322105407715, "learning_rate": 1.4120218579234975e-05, "loss": 2.9844, "step": 646 }, { "epoch": 2.121311475409836, "grad_norm": 9.363463401794434, "learning_rate": 1.4142076502732242e-05, "loss": 2.7637, "step": 647 }, { "epoch": 2.1245901639344265, "grad_norm": 8.3583345413208, "learning_rate": 1.4163934426229508e-05, "loss": 2.8809, "step": 648 }, { "epoch": 2.1278688524590166, "grad_norm": 7.736499309539795, "learning_rate": 1.4185792349726777e-05, "loss": 2.8203, "step": 649 }, { "epoch": 2.1311475409836067, "grad_norm": 8.790932655334473, "learning_rate": 1.4207650273224044e-05, "loss": 2.6782, "step": 650 }, { "epoch": 2.134426229508197, "grad_norm": 9.243220329284668, "learning_rate": 1.4229508196721313e-05, "loss": 3.0244, "step": 651 }, { "epoch": 2.137704918032787, "grad_norm": 7.804422378540039, "learning_rate": 1.4251366120218579e-05, "loss": 2.916, "step": 652 }, { "epoch": 2.140983606557377, "grad_norm": 6.331820011138916, "learning_rate": 1.4273224043715848e-05, "loss": 2.7798, "step": 653 }, { "epoch": 2.144262295081967, "grad_norm": 12.55066967010498, "learning_rate": 1.4295081967213115e-05, "loss": 2.5903, "step": 654 }, { "epoch": 2.1475409836065573, "grad_norm": 8.666050910949707, "learning_rate": 1.4316939890710384e-05, "loss": 2.7715, "step": 655 }, { "epoch": 2.1508196721311474, "grad_norm": 7.571608066558838, "learning_rate": 1.4338797814207652e-05, "loss": 2.7627, "step": 656 }, { "epoch": 2.1540983606557376, "grad_norm": 7.4407453536987305, "learning_rate": 1.4360655737704919e-05, "loss": 2.6499, "step": 657 }, { "epoch": 2.1573770491803277, "grad_norm": 5.808840274810791, "learning_rate": 1.4382513661202186e-05, "loss": 2.9634, "step": 658 }, { "epoch": 2.160655737704918, "grad_norm": 7.161530494689941, "learning_rate": 1.4404371584699455e-05, "loss": 2.9238, "step": 659 }, { "epoch": 2.1639344262295084, "grad_norm": 7.970677852630615, "learning_rate": 1.4426229508196722e-05, "loss": 2.9209, "step": 660 }, { "epoch": 2.1672131147540985, "grad_norm": 7.444771766662598, "learning_rate": 1.4448087431693991e-05, "loss": 2.8945, "step": 661 }, { "epoch": 2.1704918032786886, "grad_norm": 10.269222259521484, "learning_rate": 1.4469945355191257e-05, "loss": 2.7979, "step": 662 }, { "epoch": 2.1737704918032787, "grad_norm": 7.707312107086182, "learning_rate": 1.4491803278688526e-05, "loss": 2.9253, "step": 663 }, { "epoch": 2.177049180327869, "grad_norm": 9.85374641418457, "learning_rate": 1.4513661202185793e-05, "loss": 2.9434, "step": 664 }, { "epoch": 2.180327868852459, "grad_norm": 10.44987964630127, "learning_rate": 1.4535519125683062e-05, "loss": 2.7622, "step": 665 }, { "epoch": 2.183606557377049, "grad_norm": 9.46280288696289, "learning_rate": 1.455737704918033e-05, "loss": 2.8779, "step": 666 }, { "epoch": 2.1868852459016392, "grad_norm": 4.987039089202881, "learning_rate": 1.4579234972677595e-05, "loss": 2.7305, "step": 667 }, { "epoch": 2.1901639344262294, "grad_norm": 16.176502227783203, "learning_rate": 1.4601092896174864e-05, "loss": 2.8438, "step": 668 }, { "epoch": 2.1934426229508195, "grad_norm": 9.620067596435547, "learning_rate": 1.4622950819672133e-05, "loss": 2.7637, "step": 669 }, { "epoch": 2.19672131147541, "grad_norm": 8.859009742736816, "learning_rate": 1.46448087431694e-05, "loss": 2.8135, "step": 670 }, { "epoch": 2.2, "grad_norm": 8.628076553344727, "learning_rate": 1.4666666666666666e-05, "loss": 2.6851, "step": 671 }, { "epoch": 2.2032786885245903, "grad_norm": 9.537768363952637, "learning_rate": 1.4688524590163935e-05, "loss": 2.8623, "step": 672 }, { "epoch": 2.2065573770491804, "grad_norm": 8.154317855834961, "learning_rate": 1.4710382513661203e-05, "loss": 2.6865, "step": 673 }, { "epoch": 2.2098360655737705, "grad_norm": 8.968615531921387, "learning_rate": 1.4732240437158472e-05, "loss": 2.7754, "step": 674 }, { "epoch": 2.2131147540983607, "grad_norm": 11.579047203063965, "learning_rate": 1.4754098360655739e-05, "loss": 2.8301, "step": 675 }, { "epoch": 2.216393442622951, "grad_norm": 14.858353614807129, "learning_rate": 1.4775956284153006e-05, "loss": 2.7476, "step": 676 }, { "epoch": 2.219672131147541, "grad_norm": 6.79957914352417, "learning_rate": 1.4797814207650274e-05, "loss": 2.8213, "step": 677 }, { "epoch": 2.222950819672131, "grad_norm": 10.219999313354492, "learning_rate": 1.4819672131147543e-05, "loss": 2.9277, "step": 678 }, { "epoch": 2.226229508196721, "grad_norm": 10.137675285339355, "learning_rate": 1.484153005464481e-05, "loss": 2.9346, "step": 679 }, { "epoch": 2.2295081967213113, "grad_norm": 6.660505771636963, "learning_rate": 1.4863387978142079e-05, "loss": 2.6875, "step": 680 }, { "epoch": 2.2327868852459014, "grad_norm": 7.818025588989258, "learning_rate": 1.4885245901639344e-05, "loss": 2.8135, "step": 681 }, { "epoch": 2.236065573770492, "grad_norm": 8.326101303100586, "learning_rate": 1.4907103825136613e-05, "loss": 2.6211, "step": 682 }, { "epoch": 2.239344262295082, "grad_norm": 8.638043403625488, "learning_rate": 1.492896174863388e-05, "loss": 2.7705, "step": 683 }, { "epoch": 2.2426229508196722, "grad_norm": 6.520962238311768, "learning_rate": 1.495081967213115e-05, "loss": 2.6201, "step": 684 }, { "epoch": 2.2459016393442623, "grad_norm": 13.932276725769043, "learning_rate": 1.4972677595628417e-05, "loss": 2.9258, "step": 685 }, { "epoch": 2.2491803278688525, "grad_norm": 7.981418609619141, "learning_rate": 1.4994535519125684e-05, "loss": 2.8477, "step": 686 }, { "epoch": 2.2524590163934426, "grad_norm": 9.5775785446167, "learning_rate": 1.5016393442622952e-05, "loss": 2.9639, "step": 687 }, { "epoch": 2.2557377049180327, "grad_norm": 9.990405082702637, "learning_rate": 1.503825136612022e-05, "loss": 2.9541, "step": 688 }, { "epoch": 2.259016393442623, "grad_norm": 7.782906532287598, "learning_rate": 1.5060109289617488e-05, "loss": 2.8574, "step": 689 }, { "epoch": 2.262295081967213, "grad_norm": 13.485301971435547, "learning_rate": 1.5081967213114754e-05, "loss": 2.8525, "step": 690 }, { "epoch": 2.265573770491803, "grad_norm": 9.191924095153809, "learning_rate": 1.5103825136612023e-05, "loss": 2.835, "step": 691 }, { "epoch": 2.2688524590163937, "grad_norm": 9.732707977294922, "learning_rate": 1.512568306010929e-05, "loss": 2.6328, "step": 692 }, { "epoch": 2.2721311475409838, "grad_norm": 9.551609992980957, "learning_rate": 1.5147540983606559e-05, "loss": 2.8711, "step": 693 }, { "epoch": 2.275409836065574, "grad_norm": 7.456890106201172, "learning_rate": 1.5169398907103828e-05, "loss": 2.7627, "step": 694 }, { "epoch": 2.278688524590164, "grad_norm": 8.034096717834473, "learning_rate": 1.5191256830601094e-05, "loss": 2.6289, "step": 695 }, { "epoch": 2.281967213114754, "grad_norm": 8.816054344177246, "learning_rate": 1.5213114754098361e-05, "loss": 2.833, "step": 696 }, { "epoch": 2.2852459016393443, "grad_norm": 8.906683921813965, "learning_rate": 1.523497267759563e-05, "loss": 2.6733, "step": 697 }, { "epoch": 2.2885245901639344, "grad_norm": 8.340088844299316, "learning_rate": 1.5256830601092897e-05, "loss": 2.752, "step": 698 }, { "epoch": 2.2918032786885245, "grad_norm": 6.659605026245117, "learning_rate": 1.5278688524590165e-05, "loss": 2.7344, "step": 699 }, { "epoch": 2.2950819672131146, "grad_norm": 11.773367881774902, "learning_rate": 1.5300546448087432e-05, "loss": 2.9834, "step": 700 }, { "epoch": 2.2983606557377048, "grad_norm": 8.509276390075684, "learning_rate": 1.53224043715847e-05, "loss": 2.7217, "step": 701 }, { "epoch": 2.301639344262295, "grad_norm": 7.970211982727051, "learning_rate": 1.534426229508197e-05, "loss": 2.7666, "step": 702 }, { "epoch": 2.304918032786885, "grad_norm": 8.453312873840332, "learning_rate": 1.5366120218579237e-05, "loss": 2.6963, "step": 703 }, { "epoch": 2.3081967213114756, "grad_norm": 11.792177200317383, "learning_rate": 1.5387978142076504e-05, "loss": 2.6191, "step": 704 }, { "epoch": 2.3114754098360657, "grad_norm": 27.311664581298828, "learning_rate": 1.5409836065573772e-05, "loss": 2.6211, "step": 705 }, { "epoch": 2.314754098360656, "grad_norm": 10.260173797607422, "learning_rate": 1.543169398907104e-05, "loss": 2.9365, "step": 706 }, { "epoch": 2.318032786885246, "grad_norm": 10.283576011657715, "learning_rate": 1.5453551912568306e-05, "loss": 2.7412, "step": 707 }, { "epoch": 2.321311475409836, "grad_norm": 9.64803409576416, "learning_rate": 1.5475409836065577e-05, "loss": 2.7676, "step": 708 }, { "epoch": 2.324590163934426, "grad_norm": 22.90440559387207, "learning_rate": 1.549726775956284e-05, "loss": 2.8408, "step": 709 }, { "epoch": 2.3278688524590163, "grad_norm": 6.724837779998779, "learning_rate": 1.551912568306011e-05, "loss": 2.8296, "step": 710 }, { "epoch": 2.3311475409836064, "grad_norm": 10.349586486816406, "learning_rate": 1.554098360655738e-05, "loss": 2.7485, "step": 711 }, { "epoch": 2.3344262295081966, "grad_norm": 9.311531066894531, "learning_rate": 1.5562841530054646e-05, "loss": 2.6191, "step": 712 }, { "epoch": 2.337704918032787, "grad_norm": 8.226256370544434, "learning_rate": 1.5584699453551914e-05, "loss": 2.6372, "step": 713 }, { "epoch": 2.3409836065573773, "grad_norm": 13.512642860412598, "learning_rate": 1.560655737704918e-05, "loss": 2.7363, "step": 714 }, { "epoch": 2.3442622950819674, "grad_norm": 8.527244567871094, "learning_rate": 1.5628415300546448e-05, "loss": 2.8496, "step": 715 }, { "epoch": 2.3475409836065575, "grad_norm": 11.886306762695312, "learning_rate": 1.5650273224043716e-05, "loss": 2.8477, "step": 716 }, { "epoch": 2.3508196721311476, "grad_norm": 11.699198722839355, "learning_rate": 1.5672131147540986e-05, "loss": 2.9434, "step": 717 }, { "epoch": 2.3540983606557377, "grad_norm": 9.16463851928711, "learning_rate": 1.5693989071038254e-05, "loss": 2.895, "step": 718 }, { "epoch": 2.357377049180328, "grad_norm": 8.900703430175781, "learning_rate": 1.571584699453552e-05, "loss": 2.666, "step": 719 }, { "epoch": 2.360655737704918, "grad_norm": 7.757689476013184, "learning_rate": 1.5737704918032788e-05, "loss": 2.6396, "step": 720 }, { "epoch": 2.363934426229508, "grad_norm": 14.790778160095215, "learning_rate": 1.5759562841530055e-05, "loss": 2.792, "step": 721 }, { "epoch": 2.3672131147540982, "grad_norm": 7.769460678100586, "learning_rate": 1.5781420765027323e-05, "loss": 2.7671, "step": 722 }, { "epoch": 2.3704918032786884, "grad_norm": 8.544053077697754, "learning_rate": 1.580327868852459e-05, "loss": 2.8096, "step": 723 }, { "epoch": 2.3737704918032785, "grad_norm": 8.660293579101562, "learning_rate": 1.5825136612021857e-05, "loss": 2.8213, "step": 724 }, { "epoch": 2.3770491803278686, "grad_norm": 11.198596000671387, "learning_rate": 1.5846994535519128e-05, "loss": 2.7695, "step": 725 }, { "epoch": 2.380327868852459, "grad_norm": 9.35493278503418, "learning_rate": 1.5868852459016395e-05, "loss": 2.9111, "step": 726 }, { "epoch": 2.3836065573770493, "grad_norm": 8.911916732788086, "learning_rate": 1.5890710382513663e-05, "loss": 2.7183, "step": 727 }, { "epoch": 2.3868852459016394, "grad_norm": 8.407244682312012, "learning_rate": 1.591256830601093e-05, "loss": 2.6079, "step": 728 }, { "epoch": 2.3901639344262295, "grad_norm": 10.869140625, "learning_rate": 1.5934426229508197e-05, "loss": 2.7812, "step": 729 }, { "epoch": 2.3934426229508197, "grad_norm": 9.949702262878418, "learning_rate": 1.5956284153005465e-05, "loss": 2.6079, "step": 730 }, { "epoch": 2.39672131147541, "grad_norm": 8.05864429473877, "learning_rate": 1.5978142076502735e-05, "loss": 2.6084, "step": 731 }, { "epoch": 2.4, "grad_norm": 10.41093921661377, "learning_rate": 1.6000000000000003e-05, "loss": 2.957, "step": 732 }, { "epoch": 2.40327868852459, "grad_norm": 11.401305198669434, "learning_rate": 1.6021857923497267e-05, "loss": 2.8125, "step": 733 }, { "epoch": 2.40655737704918, "grad_norm": 9.666413307189941, "learning_rate": 1.6043715846994537e-05, "loss": 2.7725, "step": 734 }, { "epoch": 2.4098360655737707, "grad_norm": 6.593109130859375, "learning_rate": 1.6065573770491805e-05, "loss": 2.9106, "step": 735 }, { "epoch": 2.413114754098361, "grad_norm": 8.044692993164062, "learning_rate": 1.6087431693989072e-05, "loss": 2.855, "step": 736 }, { "epoch": 2.416393442622951, "grad_norm": 6.958666801452637, "learning_rate": 1.6109289617486343e-05, "loss": 2.7554, "step": 737 }, { "epoch": 2.419672131147541, "grad_norm": 10.04643726348877, "learning_rate": 1.6131147540983607e-05, "loss": 2.7837, "step": 738 }, { "epoch": 2.422950819672131, "grad_norm": 7.517574787139893, "learning_rate": 1.6153005464480874e-05, "loss": 2.832, "step": 739 }, { "epoch": 2.4262295081967213, "grad_norm": 8.448453903198242, "learning_rate": 1.6174863387978145e-05, "loss": 2.5186, "step": 740 }, { "epoch": 2.4295081967213115, "grad_norm": 7.578656196594238, "learning_rate": 1.6196721311475412e-05, "loss": 2.6543, "step": 741 }, { "epoch": 2.4327868852459016, "grad_norm": 7.055178165435791, "learning_rate": 1.621857923497268e-05, "loss": 2.7822, "step": 742 }, { "epoch": 2.4360655737704917, "grad_norm": 9.748170852661133, "learning_rate": 1.6240437158469946e-05, "loss": 2.8291, "step": 743 }, { "epoch": 2.439344262295082, "grad_norm": 8.351161003112793, "learning_rate": 1.6262295081967214e-05, "loss": 2.5669, "step": 744 }, { "epoch": 2.442622950819672, "grad_norm": 8.569477081298828, "learning_rate": 1.628415300546448e-05, "loss": 2.8428, "step": 745 }, { "epoch": 2.445901639344262, "grad_norm": 7.793284893035889, "learning_rate": 1.6306010928961752e-05, "loss": 2.8203, "step": 746 }, { "epoch": 2.4491803278688526, "grad_norm": 8.372857093811035, "learning_rate": 1.6327868852459016e-05, "loss": 2.7998, "step": 747 }, { "epoch": 2.4524590163934428, "grad_norm": 8.956133842468262, "learning_rate": 1.6349726775956286e-05, "loss": 2.9004, "step": 748 }, { "epoch": 2.455737704918033, "grad_norm": 8.408917427062988, "learning_rate": 1.6371584699453554e-05, "loss": 2.793, "step": 749 }, { "epoch": 2.459016393442623, "grad_norm": 8.389541625976562, "learning_rate": 1.639344262295082e-05, "loss": 2.8027, "step": 750 }, { "epoch": 2.462295081967213, "grad_norm": 10.774764060974121, "learning_rate": 1.641530054644809e-05, "loss": 2.6758, "step": 751 }, { "epoch": 2.4655737704918033, "grad_norm": 9.28886890411377, "learning_rate": 1.6437158469945356e-05, "loss": 2.7686, "step": 752 }, { "epoch": 2.4688524590163934, "grad_norm": 9.93682861328125, "learning_rate": 1.6459016393442623e-05, "loss": 2.6963, "step": 753 }, { "epoch": 2.4721311475409835, "grad_norm": 10.796219825744629, "learning_rate": 1.6480874316939894e-05, "loss": 2.5649, "step": 754 }, { "epoch": 2.4754098360655736, "grad_norm": 10.576736450195312, "learning_rate": 1.650273224043716e-05, "loss": 3.0269, "step": 755 }, { "epoch": 2.4786885245901638, "grad_norm": 8.215065956115723, "learning_rate": 1.6524590163934428e-05, "loss": 2.6616, "step": 756 }, { "epoch": 2.4819672131147543, "grad_norm": 7.00184965133667, "learning_rate": 1.6546448087431696e-05, "loss": 2.7969, "step": 757 }, { "epoch": 2.4852459016393444, "grad_norm": 8.432991027832031, "learning_rate": 1.6568306010928963e-05, "loss": 2.6758, "step": 758 }, { "epoch": 2.4885245901639346, "grad_norm": 10.626726150512695, "learning_rate": 1.659016393442623e-05, "loss": 2.8506, "step": 759 }, { "epoch": 2.4918032786885247, "grad_norm": 11.666621208190918, "learning_rate": 1.66120218579235e-05, "loss": 2.8613, "step": 760 }, { "epoch": 2.495081967213115, "grad_norm": 9.367171287536621, "learning_rate": 1.6633879781420765e-05, "loss": 2.7075, "step": 761 }, { "epoch": 2.498360655737705, "grad_norm": 7.7698564529418945, "learning_rate": 1.6655737704918032e-05, "loss": 2.668, "step": 762 }, { "epoch": 2.501639344262295, "grad_norm": 31.236648559570312, "learning_rate": 1.6677595628415303e-05, "loss": 2.8799, "step": 763 }, { "epoch": 2.504918032786885, "grad_norm": 10.960731506347656, "learning_rate": 1.669945355191257e-05, "loss": 2.8784, "step": 764 }, { "epoch": 2.5081967213114753, "grad_norm": 9.97085189819336, "learning_rate": 1.6721311475409837e-05, "loss": 2.7798, "step": 765 }, { "epoch": 2.5114754098360654, "grad_norm": 7.896324157714844, "learning_rate": 1.6743169398907105e-05, "loss": 2.6763, "step": 766 }, { "epoch": 2.5147540983606556, "grad_norm": 9.505319595336914, "learning_rate": 1.6765027322404372e-05, "loss": 2.7778, "step": 767 }, { "epoch": 2.5180327868852457, "grad_norm": 10.900887489318848, "learning_rate": 1.678688524590164e-05, "loss": 2.5977, "step": 768 }, { "epoch": 2.521311475409836, "grad_norm": 8.308507919311523, "learning_rate": 1.680874316939891e-05, "loss": 2.7803, "step": 769 }, { "epoch": 2.5245901639344264, "grad_norm": 7.002642631530762, "learning_rate": 1.6830601092896177e-05, "loss": 2.6309, "step": 770 }, { "epoch": 2.5278688524590165, "grad_norm": 9.453266143798828, "learning_rate": 1.6852459016393445e-05, "loss": 2.582, "step": 771 }, { "epoch": 2.5311475409836066, "grad_norm": 10.33962631225586, "learning_rate": 1.6874316939890712e-05, "loss": 2.8691, "step": 772 }, { "epoch": 2.5344262295081967, "grad_norm": 7.222407817840576, "learning_rate": 1.689617486338798e-05, "loss": 2.7314, "step": 773 }, { "epoch": 2.537704918032787, "grad_norm": 9.046460151672363, "learning_rate": 1.6918032786885247e-05, "loss": 2.6699, "step": 774 }, { "epoch": 2.540983606557377, "grad_norm": 8.476741790771484, "learning_rate": 1.6939890710382517e-05, "loss": 2.9062, "step": 775 }, { "epoch": 2.544262295081967, "grad_norm": 11.385180473327637, "learning_rate": 1.696174863387978e-05, "loss": 2.4858, "step": 776 }, { "epoch": 2.5475409836065572, "grad_norm": 8.54112720489502, "learning_rate": 1.6983606557377052e-05, "loss": 2.7881, "step": 777 }, { "epoch": 2.550819672131148, "grad_norm": 8.183646202087402, "learning_rate": 1.700546448087432e-05, "loss": 2.8057, "step": 778 }, { "epoch": 2.554098360655738, "grad_norm": 11.577500343322754, "learning_rate": 1.7027322404371587e-05, "loss": 2.6934, "step": 779 }, { "epoch": 2.557377049180328, "grad_norm": 7.221826553344727, "learning_rate": 1.7049180327868854e-05, "loss": 2.832, "step": 780 }, { "epoch": 2.560655737704918, "grad_norm": 7.161230087280273, "learning_rate": 1.707103825136612e-05, "loss": 2.7988, "step": 781 }, { "epoch": 2.5639344262295083, "grad_norm": 8.842094421386719, "learning_rate": 1.709289617486339e-05, "loss": 2.6343, "step": 782 }, { "epoch": 2.5672131147540984, "grad_norm": 7.682264804840088, "learning_rate": 1.711475409836066e-05, "loss": 2.8535, "step": 783 }, { "epoch": 2.5704918032786885, "grad_norm": 6.879706859588623, "learning_rate": 1.7136612021857926e-05, "loss": 2.7246, "step": 784 }, { "epoch": 2.5737704918032787, "grad_norm": 10.265910148620605, "learning_rate": 1.715846994535519e-05, "loss": 2.5825, "step": 785 }, { "epoch": 2.577049180327869, "grad_norm": 8.314364433288574, "learning_rate": 1.718032786885246e-05, "loss": 2.916, "step": 786 }, { "epoch": 2.580327868852459, "grad_norm": 10.12905216217041, "learning_rate": 1.720218579234973e-05, "loss": 2.605, "step": 787 }, { "epoch": 2.583606557377049, "grad_norm": 9.527913093566895, "learning_rate": 1.7224043715846996e-05, "loss": 2.9082, "step": 788 }, { "epoch": 2.586885245901639, "grad_norm": 12.781251907348633, "learning_rate": 1.7245901639344263e-05, "loss": 2.8379, "step": 789 }, { "epoch": 2.5901639344262293, "grad_norm": 9.129727363586426, "learning_rate": 1.726775956284153e-05, "loss": 2.9561, "step": 790 }, { "epoch": 2.5934426229508194, "grad_norm": 12.990401268005371, "learning_rate": 1.7289617486338798e-05, "loss": 2.8916, "step": 791 }, { "epoch": 2.59672131147541, "grad_norm": 10.858464241027832, "learning_rate": 1.731147540983607e-05, "loss": 2.8008, "step": 792 }, { "epoch": 2.6, "grad_norm": 10.542573928833008, "learning_rate": 1.7333333333333336e-05, "loss": 2.4873, "step": 793 }, { "epoch": 2.60327868852459, "grad_norm": 6.003284931182861, "learning_rate": 1.7355191256830603e-05, "loss": 2.6548, "step": 794 }, { "epoch": 2.6065573770491803, "grad_norm": 9.605287551879883, "learning_rate": 1.737704918032787e-05, "loss": 2.8242, "step": 795 }, { "epoch": 2.6098360655737705, "grad_norm": 10.770798683166504, "learning_rate": 1.7398907103825138e-05, "loss": 2.9229, "step": 796 }, { "epoch": 2.6131147540983606, "grad_norm": 19.55732536315918, "learning_rate": 1.7420765027322405e-05, "loss": 2.772, "step": 797 }, { "epoch": 2.6163934426229507, "grad_norm": 7.803356170654297, "learning_rate": 1.7442622950819676e-05, "loss": 2.6943, "step": 798 }, { "epoch": 2.619672131147541, "grad_norm": 8.736838340759277, "learning_rate": 1.746448087431694e-05, "loss": 2.7935, "step": 799 }, { "epoch": 2.6229508196721314, "grad_norm": 8.470075607299805, "learning_rate": 1.7486338797814207e-05, "loss": 2.749, "step": 800 }, { "epoch": 2.6262295081967215, "grad_norm": 7.939591407775879, "learning_rate": 1.7508196721311478e-05, "loss": 2.7275, "step": 801 }, { "epoch": 2.6295081967213116, "grad_norm": 7.82527494430542, "learning_rate": 1.7530054644808745e-05, "loss": 2.6826, "step": 802 }, { "epoch": 2.6327868852459018, "grad_norm": 6.74731969833374, "learning_rate": 1.7551912568306012e-05, "loss": 2.9102, "step": 803 }, { "epoch": 2.636065573770492, "grad_norm": 6.719389915466309, "learning_rate": 1.757377049180328e-05, "loss": 2.6758, "step": 804 }, { "epoch": 2.639344262295082, "grad_norm": 8.659001350402832, "learning_rate": 1.7595628415300547e-05, "loss": 2.708, "step": 805 }, { "epoch": 2.642622950819672, "grad_norm": 13.350650787353516, "learning_rate": 1.7617486338797814e-05, "loss": 2.5864, "step": 806 }, { "epoch": 2.6459016393442623, "grad_norm": 10.931968688964844, "learning_rate": 1.7639344262295085e-05, "loss": 2.625, "step": 807 }, { "epoch": 2.6491803278688524, "grad_norm": 8.375774383544922, "learning_rate": 1.7661202185792352e-05, "loss": 2.6499, "step": 808 }, { "epoch": 2.6524590163934425, "grad_norm": 6.123229026794434, "learning_rate": 1.768306010928962e-05, "loss": 2.6826, "step": 809 }, { "epoch": 2.6557377049180326, "grad_norm": 8.122771263122559, "learning_rate": 1.7704918032786887e-05, "loss": 2.5459, "step": 810 }, { "epoch": 2.6590163934426227, "grad_norm": 10.21648120880127, "learning_rate": 1.7726775956284154e-05, "loss": 2.6416, "step": 811 }, { "epoch": 2.662295081967213, "grad_norm": 7.8264946937561035, "learning_rate": 1.774863387978142e-05, "loss": 2.6816, "step": 812 }, { "epoch": 2.6655737704918034, "grad_norm": 8.922752380371094, "learning_rate": 1.7770491803278692e-05, "loss": 2.7349, "step": 813 }, { "epoch": 2.6688524590163936, "grad_norm": 9.361424446105957, "learning_rate": 1.7792349726775956e-05, "loss": 2.7871, "step": 814 }, { "epoch": 2.6721311475409837, "grad_norm": 7.679667949676514, "learning_rate": 1.7814207650273227e-05, "loss": 2.7456, "step": 815 }, { "epoch": 2.675409836065574, "grad_norm": 9.674117088317871, "learning_rate": 1.7836065573770494e-05, "loss": 2.6914, "step": 816 }, { "epoch": 2.678688524590164, "grad_norm": 11.991046905517578, "learning_rate": 1.785792349726776e-05, "loss": 2.7124, "step": 817 }, { "epoch": 2.681967213114754, "grad_norm": 10.152223587036133, "learning_rate": 1.787978142076503e-05, "loss": 2.7314, "step": 818 }, { "epoch": 2.685245901639344, "grad_norm": 9.937880516052246, "learning_rate": 1.7901639344262296e-05, "loss": 2.8203, "step": 819 }, { "epoch": 2.6885245901639343, "grad_norm": 6.642800807952881, "learning_rate": 1.7923497267759563e-05, "loss": 2.7686, "step": 820 }, { "epoch": 2.6918032786885244, "grad_norm": 8.606639862060547, "learning_rate": 1.7945355191256834e-05, "loss": 2.8311, "step": 821 }, { "epoch": 2.695081967213115, "grad_norm": 8.246641159057617, "learning_rate": 1.79672131147541e-05, "loss": 2.6846, "step": 822 }, { "epoch": 2.698360655737705, "grad_norm": 14.536860466003418, "learning_rate": 1.7989071038251365e-05, "loss": 2.6514, "step": 823 }, { "epoch": 2.7016393442622952, "grad_norm": 7.956295490264893, "learning_rate": 1.8010928961748636e-05, "loss": 2.5928, "step": 824 }, { "epoch": 2.7049180327868854, "grad_norm": 8.905553817749023, "learning_rate": 1.8032786885245903e-05, "loss": 2.8281, "step": 825 }, { "epoch": 2.7081967213114755, "grad_norm": 10.812027931213379, "learning_rate": 1.805464480874317e-05, "loss": 2.686, "step": 826 }, { "epoch": 2.7114754098360656, "grad_norm": 7.690039157867432, "learning_rate": 1.807650273224044e-05, "loss": 2.9121, "step": 827 }, { "epoch": 2.7147540983606557, "grad_norm": 7.706568717956543, "learning_rate": 1.8098360655737705e-05, "loss": 2.7207, "step": 828 }, { "epoch": 2.718032786885246, "grad_norm": 8.738513946533203, "learning_rate": 1.8120218579234972e-05, "loss": 2.5815, "step": 829 }, { "epoch": 2.721311475409836, "grad_norm": 12.856246948242188, "learning_rate": 1.8142076502732243e-05, "loss": 2.5654, "step": 830 }, { "epoch": 2.724590163934426, "grad_norm": 7.308026313781738, "learning_rate": 1.816393442622951e-05, "loss": 2.6201, "step": 831 }, { "epoch": 2.7278688524590162, "grad_norm": 8.97202205657959, "learning_rate": 1.8185792349726778e-05, "loss": 2.6768, "step": 832 }, { "epoch": 2.7311475409836063, "grad_norm": 8.592097282409668, "learning_rate": 1.8207650273224045e-05, "loss": 2.7329, "step": 833 }, { "epoch": 2.7344262295081965, "grad_norm": 8.988395690917969, "learning_rate": 1.8229508196721312e-05, "loss": 2.4902, "step": 834 }, { "epoch": 2.737704918032787, "grad_norm": 7.56791877746582, "learning_rate": 1.825136612021858e-05, "loss": 2.8115, "step": 835 }, { "epoch": 2.740983606557377, "grad_norm": 7.672260284423828, "learning_rate": 1.827322404371585e-05, "loss": 2.6777, "step": 836 }, { "epoch": 2.7442622950819673, "grad_norm": 11.010668754577637, "learning_rate": 1.8295081967213114e-05, "loss": 2.7285, "step": 837 }, { "epoch": 2.7475409836065574, "grad_norm": 7.444753170013428, "learning_rate": 1.8316939890710385e-05, "loss": 2.7656, "step": 838 }, { "epoch": 2.7508196721311475, "grad_norm": 10.716898918151855, "learning_rate": 1.8338797814207652e-05, "loss": 2.7534, "step": 839 }, { "epoch": 2.7540983606557377, "grad_norm": 7.2057976722717285, "learning_rate": 1.836065573770492e-05, "loss": 2.7148, "step": 840 }, { "epoch": 2.7573770491803278, "grad_norm": 11.322420120239258, "learning_rate": 1.8382513661202187e-05, "loss": 2.7795, "step": 841 }, { "epoch": 2.760655737704918, "grad_norm": 6.895336627960205, "learning_rate": 1.8404371584699454e-05, "loss": 2.5513, "step": 842 }, { "epoch": 2.7639344262295085, "grad_norm": 8.652134895324707, "learning_rate": 1.842622950819672e-05, "loss": 2.5137, "step": 843 }, { "epoch": 2.7672131147540986, "grad_norm": 9.330353736877441, "learning_rate": 1.8448087431693992e-05, "loss": 2.7285, "step": 844 }, { "epoch": 2.7704918032786887, "grad_norm": 8.63540267944336, "learning_rate": 1.846994535519126e-05, "loss": 2.6675, "step": 845 }, { "epoch": 2.773770491803279, "grad_norm": 7.995960712432861, "learning_rate": 1.8491803278688527e-05, "loss": 2.6582, "step": 846 }, { "epoch": 2.777049180327869, "grad_norm": 8.073319435119629, "learning_rate": 1.8513661202185794e-05, "loss": 2.7451, "step": 847 }, { "epoch": 2.780327868852459, "grad_norm": 9.42282772064209, "learning_rate": 1.853551912568306e-05, "loss": 2.7827, "step": 848 }, { "epoch": 2.783606557377049, "grad_norm": 6.896193027496338, "learning_rate": 1.855737704918033e-05, "loss": 2.4565, "step": 849 }, { "epoch": 2.7868852459016393, "grad_norm": 8.550512313842773, "learning_rate": 1.85792349726776e-05, "loss": 2.9106, "step": 850 }, { "epoch": 2.7901639344262295, "grad_norm": 8.658825874328613, "learning_rate": 1.8601092896174863e-05, "loss": 2.7188, "step": 851 }, { "epoch": 2.7934426229508196, "grad_norm": 7.076747417449951, "learning_rate": 1.862295081967213e-05, "loss": 2.7139, "step": 852 }, { "epoch": 2.7967213114754097, "grad_norm": 12.229289054870605, "learning_rate": 1.86448087431694e-05, "loss": 2.75, "step": 853 }, { "epoch": 2.8, "grad_norm": 7.412968158721924, "learning_rate": 1.866666666666667e-05, "loss": 2.7969, "step": 854 }, { "epoch": 2.80327868852459, "grad_norm": 8.882694244384766, "learning_rate": 1.8688524590163936e-05, "loss": 2.5269, "step": 855 }, { "epoch": 2.80655737704918, "grad_norm": 8.073248863220215, "learning_rate": 1.8710382513661203e-05, "loss": 2.7139, "step": 856 }, { "epoch": 2.8098360655737706, "grad_norm": 10.759687423706055, "learning_rate": 1.873224043715847e-05, "loss": 2.6807, "step": 857 }, { "epoch": 2.8131147540983608, "grad_norm": 8.367733001708984, "learning_rate": 1.8754098360655738e-05, "loss": 2.6777, "step": 858 }, { "epoch": 2.816393442622951, "grad_norm": 9.234270095825195, "learning_rate": 1.877595628415301e-05, "loss": 2.7754, "step": 859 }, { "epoch": 2.819672131147541, "grad_norm": 8.449861526489258, "learning_rate": 1.8797814207650276e-05, "loss": 2.6172, "step": 860 }, { "epoch": 2.822950819672131, "grad_norm": 7.904532432556152, "learning_rate": 1.8819672131147543e-05, "loss": 2.4717, "step": 861 }, { "epoch": 2.8262295081967213, "grad_norm": 7.0473456382751465, "learning_rate": 1.884153005464481e-05, "loss": 2.7471, "step": 862 }, { "epoch": 2.8295081967213114, "grad_norm": 9.994675636291504, "learning_rate": 1.8863387978142078e-05, "loss": 2.7612, "step": 863 }, { "epoch": 2.8327868852459015, "grad_norm": 7.583832263946533, "learning_rate": 1.8885245901639345e-05, "loss": 2.6172, "step": 864 }, { "epoch": 2.836065573770492, "grad_norm": 7.8538665771484375, "learning_rate": 1.8907103825136616e-05, "loss": 2.5801, "step": 865 }, { "epoch": 2.839344262295082, "grad_norm": 7.400686264038086, "learning_rate": 1.892896174863388e-05, "loss": 2.644, "step": 866 }, { "epoch": 2.8426229508196723, "grad_norm": 8.935163497924805, "learning_rate": 1.895081967213115e-05, "loss": 2.6504, "step": 867 }, { "epoch": 2.8459016393442624, "grad_norm": 10.832537651062012, "learning_rate": 1.8972677595628418e-05, "loss": 2.6567, "step": 868 }, { "epoch": 2.8491803278688526, "grad_norm": 8.754270553588867, "learning_rate": 1.8994535519125685e-05, "loss": 2.6128, "step": 869 }, { "epoch": 2.8524590163934427, "grad_norm": 8.40954303741455, "learning_rate": 1.9016393442622952e-05, "loss": 2.7959, "step": 870 }, { "epoch": 2.855737704918033, "grad_norm": 7.30268669128418, "learning_rate": 1.903825136612022e-05, "loss": 2.6758, "step": 871 }, { "epoch": 2.859016393442623, "grad_norm": 7.991669654846191, "learning_rate": 1.9060109289617487e-05, "loss": 2.7173, "step": 872 }, { "epoch": 2.862295081967213, "grad_norm": 9.572431564331055, "learning_rate": 1.9081967213114754e-05, "loss": 2.5825, "step": 873 }, { "epoch": 2.865573770491803, "grad_norm": 6.853316307067871, "learning_rate": 1.9103825136612025e-05, "loss": 2.5752, "step": 874 }, { "epoch": 2.8688524590163933, "grad_norm": 6.736260414123535, "learning_rate": 1.912568306010929e-05, "loss": 2.5513, "step": 875 }, { "epoch": 2.8721311475409834, "grad_norm": 7.564079761505127, "learning_rate": 1.914754098360656e-05, "loss": 2.5073, "step": 876 }, { "epoch": 2.8754098360655735, "grad_norm": 9.132017135620117, "learning_rate": 1.9169398907103827e-05, "loss": 2.6768, "step": 877 }, { "epoch": 2.8786885245901637, "grad_norm": 13.534090042114258, "learning_rate": 1.9191256830601094e-05, "loss": 2.6548, "step": 878 }, { "epoch": 2.8819672131147542, "grad_norm": 9.779181480407715, "learning_rate": 1.921311475409836e-05, "loss": 2.4585, "step": 879 }, { "epoch": 2.8852459016393444, "grad_norm": 8.805858612060547, "learning_rate": 1.923497267759563e-05, "loss": 2.6797, "step": 880 }, { "epoch": 2.8885245901639345, "grad_norm": 7.486392021179199, "learning_rate": 1.9256830601092896e-05, "loss": 2.5098, "step": 881 }, { "epoch": 2.8918032786885246, "grad_norm": 7.680242538452148, "learning_rate": 1.9278688524590167e-05, "loss": 2.6719, "step": 882 }, { "epoch": 2.8950819672131147, "grad_norm": 10.778884887695312, "learning_rate": 1.9300546448087434e-05, "loss": 2.6826, "step": 883 }, { "epoch": 2.898360655737705, "grad_norm": 9.804247856140137, "learning_rate": 1.93224043715847e-05, "loss": 2.4829, "step": 884 }, { "epoch": 2.901639344262295, "grad_norm": 9.84227466583252, "learning_rate": 1.934426229508197e-05, "loss": 2.6221, "step": 885 }, { "epoch": 2.904918032786885, "grad_norm": 13.599005699157715, "learning_rate": 1.9366120218579236e-05, "loss": 2.6831, "step": 886 }, { "epoch": 2.9081967213114757, "grad_norm": 14.571623802185059, "learning_rate": 1.9387978142076503e-05, "loss": 2.6104, "step": 887 }, { "epoch": 2.911475409836066, "grad_norm": 9.80305004119873, "learning_rate": 1.9409836065573774e-05, "loss": 2.6436, "step": 888 }, { "epoch": 2.914754098360656, "grad_norm": 8.376777648925781, "learning_rate": 1.9431693989071038e-05, "loss": 2.5957, "step": 889 }, { "epoch": 2.918032786885246, "grad_norm": 8.830951690673828, "learning_rate": 1.9453551912568305e-05, "loss": 2.6934, "step": 890 }, { "epoch": 2.921311475409836, "grad_norm": 7.117215156555176, "learning_rate": 1.9475409836065576e-05, "loss": 2.792, "step": 891 }, { "epoch": 2.9245901639344263, "grad_norm": 8.289185523986816, "learning_rate": 1.9497267759562843e-05, "loss": 2.4966, "step": 892 }, { "epoch": 2.9278688524590164, "grad_norm": 8.42607307434082, "learning_rate": 1.951912568306011e-05, "loss": 2.563, "step": 893 }, { "epoch": 2.9311475409836065, "grad_norm": 9.336840629577637, "learning_rate": 1.9540983606557378e-05, "loss": 2.4746, "step": 894 }, { "epoch": 2.9344262295081966, "grad_norm": 12.969695091247559, "learning_rate": 1.9562841530054645e-05, "loss": 2.7451, "step": 895 }, { "epoch": 2.9377049180327868, "grad_norm": 10.63592529296875, "learning_rate": 1.9584699453551913e-05, "loss": 2.4463, "step": 896 }, { "epoch": 2.940983606557377, "grad_norm": 11.565185546875, "learning_rate": 1.9606557377049183e-05, "loss": 2.6133, "step": 897 }, { "epoch": 2.944262295081967, "grad_norm": 9.6402587890625, "learning_rate": 1.962841530054645e-05, "loss": 2.7803, "step": 898 }, { "epoch": 2.947540983606557, "grad_norm": 9.897551536560059, "learning_rate": 1.9650273224043718e-05, "loss": 2.8398, "step": 899 }, { "epoch": 2.9508196721311473, "grad_norm": 11.588571548461914, "learning_rate": 1.9672131147540985e-05, "loss": 2.7959, "step": 900 }, { "epoch": 2.954098360655738, "grad_norm": 9.108914375305176, "learning_rate": 1.9693989071038253e-05, "loss": 2.6152, "step": 901 }, { "epoch": 2.957377049180328, "grad_norm": 7.927682876586914, "learning_rate": 1.971584699453552e-05, "loss": 2.6758, "step": 902 }, { "epoch": 2.960655737704918, "grad_norm": 16.150230407714844, "learning_rate": 1.973770491803279e-05, "loss": 2.707, "step": 903 }, { "epoch": 2.963934426229508, "grad_norm": 11.004813194274902, "learning_rate": 1.9759562841530054e-05, "loss": 2.7188, "step": 904 }, { "epoch": 2.9672131147540983, "grad_norm": 7.6752448081970215, "learning_rate": 1.9781420765027325e-05, "loss": 2.6787, "step": 905 }, { "epoch": 2.9704918032786884, "grad_norm": 8.396573066711426, "learning_rate": 1.9803278688524592e-05, "loss": 2.5928, "step": 906 }, { "epoch": 2.9737704918032786, "grad_norm": 9.46358585357666, "learning_rate": 1.982513661202186e-05, "loss": 2.4722, "step": 907 }, { "epoch": 2.9770491803278687, "grad_norm": 11.854802131652832, "learning_rate": 1.9846994535519127e-05, "loss": 2.5718, "step": 908 }, { "epoch": 2.9803278688524593, "grad_norm": 24.60706901550293, "learning_rate": 1.9868852459016394e-05, "loss": 2.543, "step": 909 }, { "epoch": 2.9836065573770494, "grad_norm": 9.278965950012207, "learning_rate": 1.9890710382513662e-05, "loss": 2.6133, "step": 910 }, { "epoch": 2.9868852459016395, "grad_norm": 7.506885051727295, "learning_rate": 1.9912568306010932e-05, "loss": 2.6479, "step": 911 }, { "epoch": 2.9901639344262296, "grad_norm": 11.967893600463867, "learning_rate": 1.99344262295082e-05, "loss": 2.6963, "step": 912 }, { "epoch": 2.9934426229508198, "grad_norm": 9.58698844909668, "learning_rate": 1.9956284153005464e-05, "loss": 2.751, "step": 913 }, { "epoch": 2.99672131147541, "grad_norm": 13.272806167602539, "learning_rate": 1.9978142076502734e-05, "loss": 2.6265, "step": 914 }, { "epoch": 3.0, "grad_norm": 9.485453605651855, "learning_rate": 2e-05, "loss": 2.5806, "step": 915 }, { "epoch": 3.00327868852459, "grad_norm": 8.895525932312012, "learning_rate": 1.9999999943619805e-05, "loss": 2.6904, "step": 916 }, { "epoch": 3.0065573770491802, "grad_norm": 11.00062084197998, "learning_rate": 1.9999999774479207e-05, "loss": 2.6606, "step": 917 }, { "epoch": 3.0098360655737704, "grad_norm": 8.478384017944336, "learning_rate": 1.9999999492578216e-05, "loss": 2.8159, "step": 918 }, { "epoch": 3.0131147540983605, "grad_norm": 10.20749282836914, "learning_rate": 1.9999999097916835e-05, "loss": 2.6704, "step": 919 }, { "epoch": 3.0163934426229506, "grad_norm": 7.856454849243164, "learning_rate": 1.9999998590495066e-05, "loss": 2.5664, "step": 920 }, { "epoch": 3.019672131147541, "grad_norm": 10.136770248413086, "learning_rate": 1.9999997970312918e-05, "loss": 2.5156, "step": 921 }, { "epoch": 3.0229508196721313, "grad_norm": 10.679862976074219, "learning_rate": 1.999999723737039e-05, "loss": 2.5859, "step": 922 }, { "epoch": 3.0262295081967214, "grad_norm": 8.857410430908203, "learning_rate": 1.9999996391667497e-05, "loss": 2.6143, "step": 923 }, { "epoch": 3.0295081967213116, "grad_norm": 11.089912414550781, "learning_rate": 1.999999543320425e-05, "loss": 2.4395, "step": 924 }, { "epoch": 3.0327868852459017, "grad_norm": 9.240669250488281, "learning_rate": 1.9999994361980657e-05, "loss": 2.4565, "step": 925 }, { "epoch": 3.036065573770492, "grad_norm": 10.590537071228027, "learning_rate": 1.999999317799673e-05, "loss": 2.79, "step": 926 }, { "epoch": 3.039344262295082, "grad_norm": 7.117664337158203, "learning_rate": 1.9999991881252482e-05, "loss": 2.4985, "step": 927 }, { "epoch": 3.042622950819672, "grad_norm": 8.991994857788086, "learning_rate": 1.9999990471747926e-05, "loss": 2.6562, "step": 928 }, { "epoch": 3.045901639344262, "grad_norm": 8.434478759765625, "learning_rate": 1.9999988949483082e-05, "loss": 2.3916, "step": 929 }, { "epoch": 3.0491803278688523, "grad_norm": 8.919354438781738, "learning_rate": 1.9999987314457966e-05, "loss": 2.6396, "step": 930 }, { "epoch": 3.0524590163934424, "grad_norm": 7.428524017333984, "learning_rate": 1.9999985566672594e-05, "loss": 2.5767, "step": 931 }, { "epoch": 3.055737704918033, "grad_norm": 8.805930137634277, "learning_rate": 1.9999983706126985e-05, "loss": 2.5762, "step": 932 }, { "epoch": 3.059016393442623, "grad_norm": 7.271883964538574, "learning_rate": 1.999998173282117e-05, "loss": 2.5244, "step": 933 }, { "epoch": 3.0622950819672132, "grad_norm": 8.887184143066406, "learning_rate": 1.9999979646755155e-05, "loss": 2.4453, "step": 934 }, { "epoch": 3.0655737704918034, "grad_norm": 7.305138111114502, "learning_rate": 1.9999977447928978e-05, "loss": 2.5796, "step": 935 }, { "epoch": 3.0688524590163935, "grad_norm": 10.527942657470703, "learning_rate": 1.9999975136342655e-05, "loss": 2.6045, "step": 936 }, { "epoch": 3.0721311475409836, "grad_norm": 10.81055736541748, "learning_rate": 1.9999972711996216e-05, "loss": 2.6836, "step": 937 }, { "epoch": 3.0754098360655737, "grad_norm": 9.615017890930176, "learning_rate": 1.999997017488969e-05, "loss": 2.5488, "step": 938 }, { "epoch": 3.078688524590164, "grad_norm": 9.555176734924316, "learning_rate": 1.9999967525023098e-05, "loss": 2.4067, "step": 939 }, { "epoch": 3.081967213114754, "grad_norm": 10.501823425292969, "learning_rate": 1.9999964762396476e-05, "loss": 2.5234, "step": 940 }, { "epoch": 3.085245901639344, "grad_norm": 8.979534149169922, "learning_rate": 1.9999961887009855e-05, "loss": 2.4263, "step": 941 }, { "epoch": 3.088524590163934, "grad_norm": 12.115559577941895, "learning_rate": 1.9999958898863266e-05, "loss": 2.5273, "step": 942 }, { "epoch": 3.091803278688525, "grad_norm": 11.81564712524414, "learning_rate": 1.9999955797956744e-05, "loss": 2.543, "step": 943 }, { "epoch": 3.095081967213115, "grad_norm": 11.86068058013916, "learning_rate": 1.9999952584290324e-05, "loss": 2.7173, "step": 944 }, { "epoch": 3.098360655737705, "grad_norm": 10.29223346710205, "learning_rate": 1.999994925786404e-05, "loss": 2.5171, "step": 945 }, { "epoch": 3.101639344262295, "grad_norm": 8.068531036376953, "learning_rate": 1.999994581867793e-05, "loss": 2.6494, "step": 946 }, { "epoch": 3.1049180327868853, "grad_norm": 7.268367290496826, "learning_rate": 1.9999942266732037e-05, "loss": 2.6807, "step": 947 }, { "epoch": 3.1081967213114754, "grad_norm": 9.479072570800781, "learning_rate": 1.9999938602026392e-05, "loss": 2.5928, "step": 948 }, { "epoch": 3.1114754098360655, "grad_norm": 9.300265312194824, "learning_rate": 1.9999934824561046e-05, "loss": 2.4883, "step": 949 }, { "epoch": 3.1147540983606556, "grad_norm": 8.146627426147461, "learning_rate": 1.9999930934336037e-05, "loss": 2.5239, "step": 950 }, { "epoch": 3.1180327868852458, "grad_norm": 14.126138687133789, "learning_rate": 1.9999926931351407e-05, "loss": 2.4556, "step": 951 }, { "epoch": 3.121311475409836, "grad_norm": 8.466760635375977, "learning_rate": 1.9999922815607203e-05, "loss": 2.5151, "step": 952 }, { "epoch": 3.1245901639344265, "grad_norm": 9.84329605102539, "learning_rate": 1.9999918587103476e-05, "loss": 2.4526, "step": 953 }, { "epoch": 3.1278688524590166, "grad_norm": 6.107912063598633, "learning_rate": 1.9999914245840267e-05, "loss": 2.6826, "step": 954 }, { "epoch": 3.1311475409836067, "grad_norm": 10.263236045837402, "learning_rate": 1.999990979181763e-05, "loss": 2.3608, "step": 955 }, { "epoch": 3.134426229508197, "grad_norm": 7.445752143859863, "learning_rate": 1.999990522503561e-05, "loss": 2.4858, "step": 956 }, { "epoch": 3.137704918032787, "grad_norm": 6.834010124206543, "learning_rate": 1.999990054549426e-05, "loss": 2.624, "step": 957 }, { "epoch": 3.140983606557377, "grad_norm": 6.470528602600098, "learning_rate": 1.9999895753193638e-05, "loss": 2.6162, "step": 958 }, { "epoch": 3.144262295081967, "grad_norm": 8.46196174621582, "learning_rate": 1.9999890848133793e-05, "loss": 2.5254, "step": 959 }, { "epoch": 3.1475409836065573, "grad_norm": 8.889535903930664, "learning_rate": 1.999988583031478e-05, "loss": 2.7363, "step": 960 }, { "epoch": 3.1508196721311474, "grad_norm": 12.471363067626953, "learning_rate": 1.999988069973666e-05, "loss": 2.5503, "step": 961 }, { "epoch": 3.1540983606557376, "grad_norm": 7.4106597900390625, "learning_rate": 1.9999875456399485e-05, "loss": 2.6699, "step": 962 }, { "epoch": 3.1573770491803277, "grad_norm": 12.486066818237305, "learning_rate": 1.9999870100303317e-05, "loss": 2.5356, "step": 963 }, { "epoch": 3.160655737704918, "grad_norm": 8.479612350463867, "learning_rate": 1.9999864631448215e-05, "loss": 2.5537, "step": 964 }, { "epoch": 3.1639344262295084, "grad_norm": 5.7135186195373535, "learning_rate": 1.9999859049834244e-05, "loss": 2.5723, "step": 965 }, { "epoch": 3.1672131147540985, "grad_norm": 9.543395042419434, "learning_rate": 1.999985335546147e-05, "loss": 2.458, "step": 966 }, { "epoch": 3.1704918032786886, "grad_norm": 9.243505477905273, "learning_rate": 1.9999847548329943e-05, "loss": 2.4229, "step": 967 }, { "epoch": 3.1737704918032787, "grad_norm": 8.582405090332031, "learning_rate": 1.999984162843974e-05, "loss": 2.5864, "step": 968 }, { "epoch": 3.177049180327869, "grad_norm": 9.936787605285645, "learning_rate": 1.9999835595790927e-05, "loss": 2.5464, "step": 969 }, { "epoch": 3.180327868852459, "grad_norm": 8.625935554504395, "learning_rate": 1.9999829450383573e-05, "loss": 2.522, "step": 970 }, { "epoch": 3.183606557377049, "grad_norm": 9.894515991210938, "learning_rate": 1.9999823192217743e-05, "loss": 2.5645, "step": 971 }, { "epoch": 3.1868852459016392, "grad_norm": 8.795000076293945, "learning_rate": 1.9999816821293508e-05, "loss": 2.3696, "step": 972 }, { "epoch": 3.1901639344262294, "grad_norm": 7.390370845794678, "learning_rate": 1.999981033761094e-05, "loss": 2.6738, "step": 973 }, { "epoch": 3.1934426229508195, "grad_norm": 10.03864860534668, "learning_rate": 1.9999803741170115e-05, "loss": 2.6328, "step": 974 }, { "epoch": 3.19672131147541, "grad_norm": 11.004067420959473, "learning_rate": 1.9999797031971106e-05, "loss": 2.7891, "step": 975 }, { "epoch": 3.2, "grad_norm": 10.63773250579834, "learning_rate": 1.999979021001399e-05, "loss": 2.4146, "step": 976 }, { "epoch": 3.2032786885245903, "grad_norm": 18.479623794555664, "learning_rate": 1.9999783275298838e-05, "loss": 2.6377, "step": 977 }, { "epoch": 3.2065573770491804, "grad_norm": 9.630121231079102, "learning_rate": 1.9999776227825736e-05, "loss": 2.5654, "step": 978 }, { "epoch": 3.2098360655737705, "grad_norm": 10.646713256835938, "learning_rate": 1.999976906759476e-05, "loss": 2.5625, "step": 979 }, { "epoch": 3.2131147540983607, "grad_norm": 6.547485828399658, "learning_rate": 1.9999761794605986e-05, "loss": 2.646, "step": 980 }, { "epoch": 3.216393442622951, "grad_norm": 8.353838920593262, "learning_rate": 1.9999754408859506e-05, "loss": 2.6025, "step": 981 }, { "epoch": 3.219672131147541, "grad_norm": 6.407874584197998, "learning_rate": 1.9999746910355396e-05, "loss": 2.7129, "step": 982 }, { "epoch": 3.222950819672131, "grad_norm": 10.026453971862793, "learning_rate": 1.999973929909374e-05, "loss": 2.479, "step": 983 }, { "epoch": 3.226229508196721, "grad_norm": 8.444430351257324, "learning_rate": 1.999973157507463e-05, "loss": 2.4136, "step": 984 }, { "epoch": 3.2295081967213113, "grad_norm": 8.352838516235352, "learning_rate": 1.9999723738298146e-05, "loss": 2.625, "step": 985 }, { "epoch": 3.2327868852459014, "grad_norm": 8.967344284057617, "learning_rate": 1.9999715788764384e-05, "loss": 2.4536, "step": 986 }, { "epoch": 3.236065573770492, "grad_norm": 8.589666366577148, "learning_rate": 1.9999707726473427e-05, "loss": 2.5566, "step": 987 }, { "epoch": 3.239344262295082, "grad_norm": 9.392852783203125, "learning_rate": 1.9999699551425365e-05, "loss": 2.606, "step": 988 }, { "epoch": 3.2426229508196722, "grad_norm": 9.048327445983887, "learning_rate": 1.99996912636203e-05, "loss": 2.5684, "step": 989 }, { "epoch": 3.2459016393442623, "grad_norm": 6.722256183624268, "learning_rate": 1.9999682863058314e-05, "loss": 2.5029, "step": 990 }, { "epoch": 3.2491803278688525, "grad_norm": 7.99813985824585, "learning_rate": 1.9999674349739507e-05, "loss": 2.3618, "step": 991 }, { "epoch": 3.2524590163934426, "grad_norm": 9.017924308776855, "learning_rate": 1.9999665723663976e-05, "loss": 2.6338, "step": 992 }, { "epoch": 3.2557377049180327, "grad_norm": 10.350461959838867, "learning_rate": 1.9999656984831815e-05, "loss": 2.6152, "step": 993 }, { "epoch": 3.259016393442623, "grad_norm": 11.45598030090332, "learning_rate": 1.999964813324313e-05, "loss": 2.5039, "step": 994 }, { "epoch": 3.262295081967213, "grad_norm": 8.116050720214844, "learning_rate": 1.999963916889801e-05, "loss": 2.3936, "step": 995 }, { "epoch": 3.265573770491803, "grad_norm": 8.33285903930664, "learning_rate": 1.9999630091796565e-05, "loss": 2.4097, "step": 996 }, { "epoch": 3.2688524590163937, "grad_norm": 7.075627326965332, "learning_rate": 1.999962090193889e-05, "loss": 2.6021, "step": 997 }, { "epoch": 3.2721311475409838, "grad_norm": 12.9270601272583, "learning_rate": 1.9999611599325095e-05, "loss": 2.5566, "step": 998 }, { "epoch": 3.275409836065574, "grad_norm": 8.043966293334961, "learning_rate": 1.999960218395528e-05, "loss": 2.5415, "step": 999 }, { "epoch": 3.278688524590164, "grad_norm": 11.196298599243164, "learning_rate": 1.9999592655829553e-05, "loss": 2.4707, "step": 1000 }, { "epoch": 3.281967213114754, "grad_norm": 12.312674522399902, "learning_rate": 1.9999583014948025e-05, "loss": 2.6968, "step": 1001 }, { "epoch": 3.2852459016393443, "grad_norm": 9.54806900024414, "learning_rate": 1.99995732613108e-05, "loss": 2.2861, "step": 1002 }, { "epoch": 3.2885245901639344, "grad_norm": 8.33041763305664, "learning_rate": 1.9999563394917988e-05, "loss": 2.6426, "step": 1003 }, { "epoch": 3.2918032786885245, "grad_norm": 7.63833475112915, "learning_rate": 1.9999553415769702e-05, "loss": 2.4487, "step": 1004 }, { "epoch": 3.2950819672131146, "grad_norm": 8.66500473022461, "learning_rate": 1.9999543323866058e-05, "loss": 2.5098, "step": 1005 }, { "epoch": 3.2983606557377048, "grad_norm": 8.31765079498291, "learning_rate": 1.9999533119207162e-05, "loss": 2.4185, "step": 1006 }, { "epoch": 3.301639344262295, "grad_norm": 8.071196556091309, "learning_rate": 1.9999522801793135e-05, "loss": 2.6299, "step": 1007 }, { "epoch": 3.304918032786885, "grad_norm": 12.039742469787598, "learning_rate": 1.999951237162409e-05, "loss": 2.4541, "step": 1008 }, { "epoch": 3.3081967213114756, "grad_norm": 9.734672546386719, "learning_rate": 1.999950182870015e-05, "loss": 2.458, "step": 1009 }, { "epoch": 3.3114754098360657, "grad_norm": 11.262027740478516, "learning_rate": 1.9999491173021427e-05, "loss": 2.4575, "step": 1010 }, { "epoch": 3.314754098360656, "grad_norm": 10.107178688049316, "learning_rate": 1.9999480404588044e-05, "loss": 2.4092, "step": 1011 }, { "epoch": 3.318032786885246, "grad_norm": 8.701667785644531, "learning_rate": 1.9999469523400122e-05, "loss": 2.4854, "step": 1012 }, { "epoch": 3.321311475409836, "grad_norm": 8.253076553344727, "learning_rate": 1.9999458529457787e-05, "loss": 2.5, "step": 1013 }, { "epoch": 3.324590163934426, "grad_norm": 9.387953758239746, "learning_rate": 1.999944742276116e-05, "loss": 2.3911, "step": 1014 }, { "epoch": 3.3278688524590163, "grad_norm": 8.177924156188965, "learning_rate": 1.9999436203310366e-05, "loss": 2.3848, "step": 1015 }, { "epoch": 3.3311475409836064, "grad_norm": 13.475935935974121, "learning_rate": 1.9999424871105528e-05, "loss": 2.708, "step": 1016 }, { "epoch": 3.3344262295081966, "grad_norm": 7.21196174621582, "learning_rate": 1.9999413426146785e-05, "loss": 2.5107, "step": 1017 }, { "epoch": 3.337704918032787, "grad_norm": 10.945043563842773, "learning_rate": 1.9999401868434254e-05, "loss": 2.6094, "step": 1018 }, { "epoch": 3.3409836065573773, "grad_norm": 8.765878677368164, "learning_rate": 1.999939019796807e-05, "loss": 2.4375, "step": 1019 }, { "epoch": 3.3442622950819674, "grad_norm": 8.606671333312988, "learning_rate": 1.9999378414748365e-05, "loss": 2.7705, "step": 1020 }, { "epoch": 3.3475409836065575, "grad_norm": 11.605402946472168, "learning_rate": 1.9999366518775273e-05, "loss": 2.5645, "step": 1021 }, { "epoch": 3.3508196721311476, "grad_norm": 10.742819786071777, "learning_rate": 1.9999354510048924e-05, "loss": 2.54, "step": 1022 }, { "epoch": 3.3540983606557377, "grad_norm": 9.042339324951172, "learning_rate": 1.999934238856946e-05, "loss": 2.6143, "step": 1023 }, { "epoch": 3.357377049180328, "grad_norm": 8.476964950561523, "learning_rate": 1.999933015433701e-05, "loss": 2.4707, "step": 1024 }, { "epoch": 3.360655737704918, "grad_norm": 8.318426132202148, "learning_rate": 1.999931780735172e-05, "loss": 2.4468, "step": 1025 }, { "epoch": 3.363934426229508, "grad_norm": 8.739582061767578, "learning_rate": 1.9999305347613723e-05, "loss": 2.541, "step": 1026 }, { "epoch": 3.3672131147540982, "grad_norm": 7.144016742706299, "learning_rate": 1.9999292775123162e-05, "loss": 2.5762, "step": 1027 }, { "epoch": 3.3704918032786884, "grad_norm": 7.552481174468994, "learning_rate": 1.999928008988018e-05, "loss": 2.8027, "step": 1028 }, { "epoch": 3.3737704918032785, "grad_norm": 12.724674224853516, "learning_rate": 1.9999267291884914e-05, "loss": 2.5459, "step": 1029 }, { "epoch": 3.3770491803278686, "grad_norm": 8.015802383422852, "learning_rate": 1.9999254381137515e-05, "loss": 2.5483, "step": 1030 }, { "epoch": 3.380327868852459, "grad_norm": 8.618480682373047, "learning_rate": 1.9999241357638126e-05, "loss": 2.7588, "step": 1031 }, { "epoch": 3.3836065573770493, "grad_norm": 10.828287124633789, "learning_rate": 1.9999228221386894e-05, "loss": 2.8389, "step": 1032 }, { "epoch": 3.3868852459016394, "grad_norm": 7.304479598999023, "learning_rate": 1.999921497238397e-05, "loss": 2.4102, "step": 1033 }, { "epoch": 3.3901639344262295, "grad_norm": 6.833317279815674, "learning_rate": 1.9999201610629497e-05, "loss": 2.522, "step": 1034 }, { "epoch": 3.3934426229508197, "grad_norm": 9.132148742675781, "learning_rate": 1.999918813612363e-05, "loss": 2.4224, "step": 1035 }, { "epoch": 3.39672131147541, "grad_norm": 37.22112274169922, "learning_rate": 1.999917454886652e-05, "loss": 2.5566, "step": 1036 }, { "epoch": 3.4, "grad_norm": 7.276547431945801, "learning_rate": 1.999916084885832e-05, "loss": 2.3467, "step": 1037 }, { "epoch": 3.40327868852459, "grad_norm": 7.194701194763184, "learning_rate": 1.9999147036099184e-05, "loss": 2.4985, "step": 1038 }, { "epoch": 3.40655737704918, "grad_norm": 13.528975486755371, "learning_rate": 1.9999133110589272e-05, "loss": 2.48, "step": 1039 }, { "epoch": 3.4098360655737707, "grad_norm": 11.268060684204102, "learning_rate": 1.9999119072328738e-05, "loss": 2.5273, "step": 1040 }, { "epoch": 3.413114754098361, "grad_norm": 10.659382820129395, "learning_rate": 1.9999104921317737e-05, "loss": 2.729, "step": 1041 }, { "epoch": 3.416393442622951, "grad_norm": 5.9398274421691895, "learning_rate": 1.9999090657556433e-05, "loss": 2.4585, "step": 1042 }, { "epoch": 3.419672131147541, "grad_norm": 8.587797164916992, "learning_rate": 1.9999076281044984e-05, "loss": 2.6084, "step": 1043 }, { "epoch": 3.422950819672131, "grad_norm": 7.14030647277832, "learning_rate": 1.9999061791783556e-05, "loss": 2.6528, "step": 1044 }, { "epoch": 3.4262295081967213, "grad_norm": 8.487658500671387, "learning_rate": 1.9999047189772305e-05, "loss": 2.5259, "step": 1045 }, { "epoch": 3.4295081967213115, "grad_norm": 8.024482727050781, "learning_rate": 1.9999032475011408e-05, "loss": 2.5596, "step": 1046 }, { "epoch": 3.4327868852459016, "grad_norm": 12.274941444396973, "learning_rate": 1.9999017647501017e-05, "loss": 2.5649, "step": 1047 }, { "epoch": 3.4360655737704917, "grad_norm": 10.351056098937988, "learning_rate": 1.999900270724131e-05, "loss": 2.436, "step": 1048 }, { "epoch": 3.439344262295082, "grad_norm": 7.7278947830200195, "learning_rate": 1.999898765423245e-05, "loss": 2.3975, "step": 1049 }, { "epoch": 3.442622950819672, "grad_norm": 9.229077339172363, "learning_rate": 1.9998972488474607e-05, "loss": 2.5156, "step": 1050 }, { "epoch": 3.445901639344262, "grad_norm": 8.97681999206543, "learning_rate": 1.9998957209967953e-05, "loss": 2.5454, "step": 1051 }, { "epoch": 3.4491803278688526, "grad_norm": 6.999489784240723, "learning_rate": 1.999894181871266e-05, "loss": 2.3906, "step": 1052 }, { "epoch": 3.4524590163934428, "grad_norm": 9.013455390930176, "learning_rate": 1.99989263147089e-05, "loss": 2.54, "step": 1053 }, { "epoch": 3.455737704918033, "grad_norm": 12.050437927246094, "learning_rate": 1.9998910697956853e-05, "loss": 2.5518, "step": 1054 }, { "epoch": 3.459016393442623, "grad_norm": 6.409142017364502, "learning_rate": 1.999889496845669e-05, "loss": 2.54, "step": 1055 }, { "epoch": 3.462295081967213, "grad_norm": 7.117398738861084, "learning_rate": 1.999887912620859e-05, "loss": 2.625, "step": 1056 }, { "epoch": 3.4655737704918033, "grad_norm": 8.64953327178955, "learning_rate": 1.999886317121273e-05, "loss": 2.4683, "step": 1057 }, { "epoch": 3.4688524590163934, "grad_norm": 7.707614421844482, "learning_rate": 1.9998847103469294e-05, "loss": 2.498, "step": 1058 }, { "epoch": 3.4721311475409835, "grad_norm": 8.171473503112793, "learning_rate": 1.999883092297846e-05, "loss": 2.2852, "step": 1059 }, { "epoch": 3.4754098360655736, "grad_norm": 9.552652359008789, "learning_rate": 1.999881462974041e-05, "loss": 2.4272, "step": 1060 }, { "epoch": 3.4786885245901638, "grad_norm": 7.0876054763793945, "learning_rate": 1.999879822375533e-05, "loss": 2.4258, "step": 1061 }, { "epoch": 3.4819672131147543, "grad_norm": 8.02631950378418, "learning_rate": 1.9998781705023405e-05, "loss": 2.3208, "step": 1062 }, { "epoch": 3.4852459016393444, "grad_norm": 8.258910179138184, "learning_rate": 1.9998765073544818e-05, "loss": 2.5952, "step": 1063 }, { "epoch": 3.4885245901639346, "grad_norm": 8.260116577148438, "learning_rate": 1.999874832931976e-05, "loss": 2.5264, "step": 1064 }, { "epoch": 3.4918032786885247, "grad_norm": 7.401209354400635, "learning_rate": 1.9998731472348418e-05, "loss": 2.4175, "step": 1065 }, { "epoch": 3.495081967213115, "grad_norm": 8.415180206298828, "learning_rate": 1.999871450263098e-05, "loss": 2.54, "step": 1066 }, { "epoch": 3.498360655737705, "grad_norm": 7.983108997344971, "learning_rate": 1.9998697420167645e-05, "loss": 2.5161, "step": 1067 }, { "epoch": 3.501639344262295, "grad_norm": 7.6998701095581055, "learning_rate": 1.99986802249586e-05, "loss": 2.5054, "step": 1068 }, { "epoch": 3.504918032786885, "grad_norm": 7.636318206787109, "learning_rate": 1.9998662917004033e-05, "loss": 2.5391, "step": 1069 }, { "epoch": 3.5081967213114753, "grad_norm": 8.81999683380127, "learning_rate": 1.999864549630415e-05, "loss": 2.4165, "step": 1070 }, { "epoch": 3.5114754098360654, "grad_norm": 9.513495445251465, "learning_rate": 1.9998627962859145e-05, "loss": 2.4106, "step": 1071 }, { "epoch": 3.5147540983606556, "grad_norm": 7.4322967529296875, "learning_rate": 1.9998610316669213e-05, "loss": 2.4165, "step": 1072 }, { "epoch": 3.5180327868852457, "grad_norm": 7.662577152252197, "learning_rate": 1.9998592557734553e-05, "loss": 2.3936, "step": 1073 }, { "epoch": 3.521311475409836, "grad_norm": 8.410754203796387, "learning_rate": 1.9998574686055366e-05, "loss": 2.5933, "step": 1074 }, { "epoch": 3.5245901639344264, "grad_norm": 8.758652687072754, "learning_rate": 1.9998556701631852e-05, "loss": 2.4985, "step": 1075 }, { "epoch": 3.5278688524590165, "grad_norm": 7.701985836029053, "learning_rate": 1.9998538604464218e-05, "loss": 2.5562, "step": 1076 }, { "epoch": 3.5311475409836066, "grad_norm": 17.00328826904297, "learning_rate": 1.9998520394552663e-05, "loss": 2.3574, "step": 1077 }, { "epoch": 3.5344262295081967, "grad_norm": 8.27288818359375, "learning_rate": 1.9998502071897397e-05, "loss": 2.5869, "step": 1078 }, { "epoch": 3.537704918032787, "grad_norm": 9.73587417602539, "learning_rate": 1.999848363649862e-05, "loss": 2.3765, "step": 1079 }, { "epoch": 3.540983606557377, "grad_norm": 26.910167694091797, "learning_rate": 1.999846508835655e-05, "loss": 2.4502, "step": 1080 }, { "epoch": 3.544262295081967, "grad_norm": 8.435151100158691, "learning_rate": 1.9998446427471386e-05, "loss": 2.4604, "step": 1081 }, { "epoch": 3.5475409836065572, "grad_norm": 10.184490203857422, "learning_rate": 1.9998427653843345e-05, "loss": 2.4937, "step": 1082 }, { "epoch": 3.550819672131148, "grad_norm": 8.945355415344238, "learning_rate": 1.9998408767472633e-05, "loss": 2.6147, "step": 1083 }, { "epoch": 3.554098360655738, "grad_norm": 6.778802394866943, "learning_rate": 1.9998389768359468e-05, "loss": 2.396, "step": 1084 }, { "epoch": 3.557377049180328, "grad_norm": 9.711347579956055, "learning_rate": 1.9998370656504066e-05, "loss": 2.4839, "step": 1085 }, { "epoch": 3.560655737704918, "grad_norm": 6.233287811279297, "learning_rate": 1.9998351431906637e-05, "loss": 2.397, "step": 1086 }, { "epoch": 3.5639344262295083, "grad_norm": 8.602604866027832, "learning_rate": 1.99983320945674e-05, "loss": 2.6426, "step": 1087 }, { "epoch": 3.5672131147540984, "grad_norm": 7.0858330726623535, "learning_rate": 1.9998312644486574e-05, "loss": 2.4673, "step": 1088 }, { "epoch": 3.5704918032786885, "grad_norm": 8.051176071166992, "learning_rate": 1.9998293081664376e-05, "loss": 2.4634, "step": 1089 }, { "epoch": 3.5737704918032787, "grad_norm": 46.63436508178711, "learning_rate": 1.9998273406101026e-05, "loss": 2.6079, "step": 1090 }, { "epoch": 3.577049180327869, "grad_norm": 7.245494365692139, "learning_rate": 1.999825361779675e-05, "loss": 2.4229, "step": 1091 }, { "epoch": 3.580327868852459, "grad_norm": 8.619274139404297, "learning_rate": 1.9998233716751766e-05, "loss": 2.5645, "step": 1092 }, { "epoch": 3.583606557377049, "grad_norm": 8.048002243041992, "learning_rate": 1.9998213702966307e-05, "loss": 2.5176, "step": 1093 }, { "epoch": 3.586885245901639, "grad_norm": 13.449807167053223, "learning_rate": 1.999819357644059e-05, "loss": 2.5034, "step": 1094 }, { "epoch": 3.5901639344262293, "grad_norm": 10.545843124389648, "learning_rate": 1.999817333717484e-05, "loss": 2.4229, "step": 1095 }, { "epoch": 3.5934426229508194, "grad_norm": 7.5046892166137695, "learning_rate": 1.99981529851693e-05, "loss": 2.623, "step": 1096 }, { "epoch": 3.59672131147541, "grad_norm": 9.909340858459473, "learning_rate": 1.9998132520424183e-05, "loss": 2.3257, "step": 1097 }, { "epoch": 3.6, "grad_norm": 15.407533645629883, "learning_rate": 1.9998111942939727e-05, "loss": 2.4067, "step": 1098 }, { "epoch": 3.60327868852459, "grad_norm": 7.203710079193115, "learning_rate": 1.9998091252716166e-05, "loss": 2.2534, "step": 1099 }, { "epoch": 3.6065573770491803, "grad_norm": 9.455061912536621, "learning_rate": 1.9998070449753728e-05, "loss": 2.4888, "step": 1100 }, { "epoch": 3.6098360655737705, "grad_norm": 12.68065071105957, "learning_rate": 1.999804953405265e-05, "loss": 2.6348, "step": 1101 }, { "epoch": 3.6131147540983606, "grad_norm": 8.782686233520508, "learning_rate": 1.999802850561317e-05, "loss": 2.5859, "step": 1102 }, { "epoch": 3.6163934426229507, "grad_norm": 13.915461540222168, "learning_rate": 1.9998007364435522e-05, "loss": 2.5537, "step": 1103 }, { "epoch": 3.619672131147541, "grad_norm": 13.791587829589844, "learning_rate": 1.9997986110519947e-05, "loss": 2.4688, "step": 1104 }, { "epoch": 3.6229508196721314, "grad_norm": 20.60467529296875, "learning_rate": 1.999796474386668e-05, "loss": 2.6475, "step": 1105 }, { "epoch": 3.6262295081967215, "grad_norm": 10.511011123657227, "learning_rate": 1.9997943264475973e-05, "loss": 2.5933, "step": 1106 }, { "epoch": 3.6295081967213116, "grad_norm": 12.208161354064941, "learning_rate": 1.9997921672348053e-05, "loss": 2.4634, "step": 1107 }, { "epoch": 3.6327868852459018, "grad_norm": 10.338377952575684, "learning_rate": 1.999789996748317e-05, "loss": 2.334, "step": 1108 }, { "epoch": 3.636065573770492, "grad_norm": 10.38150405883789, "learning_rate": 1.9997878149881576e-05, "loss": 2.624, "step": 1109 }, { "epoch": 3.639344262295082, "grad_norm": 10.902780532836914, "learning_rate": 1.9997856219543506e-05, "loss": 2.6865, "step": 1110 }, { "epoch": 3.642622950819672, "grad_norm": 8.980260848999023, "learning_rate": 1.9997834176469214e-05, "loss": 2.5137, "step": 1111 }, { "epoch": 3.6459016393442623, "grad_norm": 13.631241798400879, "learning_rate": 1.9997812020658947e-05, "loss": 2.5303, "step": 1112 }, { "epoch": 3.6491803278688524, "grad_norm": 19.392169952392578, "learning_rate": 1.999778975211295e-05, "loss": 2.3867, "step": 1113 }, { "epoch": 3.6524590163934425, "grad_norm": 8.234687805175781, "learning_rate": 1.9997767370831485e-05, "loss": 2.4087, "step": 1114 }, { "epoch": 3.6557377049180326, "grad_norm": 12.392560958862305, "learning_rate": 1.9997744876814792e-05, "loss": 2.5146, "step": 1115 }, { "epoch": 3.6590163934426227, "grad_norm": 9.365938186645508, "learning_rate": 1.9997722270063137e-05, "loss": 2.3887, "step": 1116 }, { "epoch": 3.662295081967213, "grad_norm": 14.384208679199219, "learning_rate": 1.9997699550576763e-05, "loss": 2.8018, "step": 1117 }, { "epoch": 3.6655737704918034, "grad_norm": 7.792603969573975, "learning_rate": 1.9997676718355935e-05, "loss": 2.5469, "step": 1118 }, { "epoch": 3.6688524590163936, "grad_norm": 12.322665214538574, "learning_rate": 1.9997653773400903e-05, "loss": 2.4434, "step": 1119 }, { "epoch": 3.6721311475409837, "grad_norm": 9.611536979675293, "learning_rate": 1.9997630715711932e-05, "loss": 2.5571, "step": 1120 }, { "epoch": 3.675409836065574, "grad_norm": 11.296429634094238, "learning_rate": 1.999760754528928e-05, "loss": 2.4644, "step": 1121 }, { "epoch": 3.678688524590164, "grad_norm": 9.094149589538574, "learning_rate": 1.9997584262133207e-05, "loss": 2.5459, "step": 1122 }, { "epoch": 3.681967213114754, "grad_norm": 10.407469749450684, "learning_rate": 1.9997560866243977e-05, "loss": 2.4038, "step": 1123 }, { "epoch": 3.685245901639344, "grad_norm": 7.562070369720459, "learning_rate": 1.999753735762185e-05, "loss": 2.5039, "step": 1124 }, { "epoch": 3.6885245901639343, "grad_norm": 10.490191459655762, "learning_rate": 1.99975137362671e-05, "loss": 2.4199, "step": 1125 }, { "epoch": 3.6918032786885244, "grad_norm": 13.462574005126953, "learning_rate": 1.9997490002179987e-05, "loss": 2.3379, "step": 1126 }, { "epoch": 3.695081967213115, "grad_norm": 9.644124984741211, "learning_rate": 1.9997466155360777e-05, "loss": 2.6709, "step": 1127 }, { "epoch": 3.698360655737705, "grad_norm": 11.05855941772461, "learning_rate": 1.9997442195809742e-05, "loss": 2.6172, "step": 1128 }, { "epoch": 3.7016393442622952, "grad_norm": 8.339716911315918, "learning_rate": 1.9997418123527153e-05, "loss": 2.3052, "step": 1129 }, { "epoch": 3.7049180327868854, "grad_norm": 8.703628540039062, "learning_rate": 1.999739393851328e-05, "loss": 2.5342, "step": 1130 }, { "epoch": 3.7081967213114755, "grad_norm": 6.976150989532471, "learning_rate": 1.9997369640768395e-05, "loss": 2.4214, "step": 1131 }, { "epoch": 3.7114754098360656, "grad_norm": 15.241157531738281, "learning_rate": 1.999734523029277e-05, "loss": 2.3462, "step": 1132 }, { "epoch": 3.7147540983606557, "grad_norm": 7.6587395668029785, "learning_rate": 1.9997320707086686e-05, "loss": 2.5376, "step": 1133 }, { "epoch": 3.718032786885246, "grad_norm": 9.125812530517578, "learning_rate": 1.9997296071150417e-05, "loss": 2.4897, "step": 1134 }, { "epoch": 3.721311475409836, "grad_norm": 6.397696495056152, "learning_rate": 1.9997271322484237e-05, "loss": 2.4473, "step": 1135 }, { "epoch": 3.724590163934426, "grad_norm": 7.435883522033691, "learning_rate": 1.999724646108843e-05, "loss": 2.3574, "step": 1136 }, { "epoch": 3.7278688524590162, "grad_norm": 8.100706100463867, "learning_rate": 1.9997221486963276e-05, "loss": 2.4106, "step": 1137 }, { "epoch": 3.7311475409836063, "grad_norm": 16.62838363647461, "learning_rate": 1.9997196400109055e-05, "loss": 2.355, "step": 1138 }, { "epoch": 3.7344262295081965, "grad_norm": 11.141331672668457, "learning_rate": 1.9997171200526048e-05, "loss": 2.4097, "step": 1139 }, { "epoch": 3.737704918032787, "grad_norm": 9.208991050720215, "learning_rate": 1.9997145888214542e-05, "loss": 2.4668, "step": 1140 }, { "epoch": 3.740983606557377, "grad_norm": 8.903801918029785, "learning_rate": 1.999712046317482e-05, "loss": 2.4341, "step": 1141 }, { "epoch": 3.7442622950819673, "grad_norm": 7.412717342376709, "learning_rate": 1.9997094925407173e-05, "loss": 2.5171, "step": 1142 }, { "epoch": 3.7475409836065574, "grad_norm": 12.357730865478516, "learning_rate": 1.9997069274911886e-05, "loss": 2.5889, "step": 1143 }, { "epoch": 3.7508196721311475, "grad_norm": 16.167156219482422, "learning_rate": 1.999704351168925e-05, "loss": 2.6328, "step": 1144 }, { "epoch": 3.7540983606557377, "grad_norm": 12.978521347045898, "learning_rate": 1.9997017635739554e-05, "loss": 2.5303, "step": 1145 }, { "epoch": 3.7573770491803278, "grad_norm": 14.716732025146484, "learning_rate": 1.9996991647063085e-05, "loss": 2.6895, "step": 1146 }, { "epoch": 3.760655737704918, "grad_norm": 9.328570365905762, "learning_rate": 1.9996965545660145e-05, "loss": 2.3926, "step": 1147 }, { "epoch": 3.7639344262295085, "grad_norm": 7.146029472351074, "learning_rate": 1.999693933153102e-05, "loss": 2.397, "step": 1148 }, { "epoch": 3.7672131147540986, "grad_norm": 9.458820343017578, "learning_rate": 1.9996913004676015e-05, "loss": 2.4438, "step": 1149 }, { "epoch": 3.7704918032786887, "grad_norm": 12.254371643066406, "learning_rate": 1.9996886565095422e-05, "loss": 2.4971, "step": 1150 }, { "epoch": 3.773770491803279, "grad_norm": 9.237293243408203, "learning_rate": 1.9996860012789536e-05, "loss": 2.6196, "step": 1151 }, { "epoch": 3.777049180327869, "grad_norm": 7.544373512268066, "learning_rate": 1.9996833347758658e-05, "loss": 2.5298, "step": 1152 }, { "epoch": 3.780327868852459, "grad_norm": 7.315064430236816, "learning_rate": 1.9996806570003095e-05, "loss": 2.5493, "step": 1153 }, { "epoch": 3.783606557377049, "grad_norm": 8.56457805633545, "learning_rate": 1.9996779679523143e-05, "loss": 2.5391, "step": 1154 }, { "epoch": 3.7868852459016393, "grad_norm": 7.406822204589844, "learning_rate": 1.99967526763191e-05, "loss": 2.5151, "step": 1155 }, { "epoch": 3.7901639344262295, "grad_norm": 10.355732917785645, "learning_rate": 1.999672556039128e-05, "loss": 2.3218, "step": 1156 }, { "epoch": 3.7934426229508196, "grad_norm": 6.795393943786621, "learning_rate": 1.999669833173999e-05, "loss": 2.2749, "step": 1157 }, { "epoch": 3.7967213114754097, "grad_norm": 7.5008015632629395, "learning_rate": 1.9996670990365524e-05, "loss": 2.4277, "step": 1158 }, { "epoch": 3.8, "grad_norm": 8.136270523071289, "learning_rate": 1.9996643536268202e-05, "loss": 2.4434, "step": 1159 }, { "epoch": 3.80327868852459, "grad_norm": 8.368075370788574, "learning_rate": 1.9996615969448333e-05, "loss": 2.4668, "step": 1160 }, { "epoch": 3.80655737704918, "grad_norm": 6.0503339767456055, "learning_rate": 1.9996588289906223e-05, "loss": 2.314, "step": 1161 }, { "epoch": 3.8098360655737706, "grad_norm": 8.184891700744629, "learning_rate": 1.9996560497642185e-05, "loss": 2.3086, "step": 1162 }, { "epoch": 3.8131147540983608, "grad_norm": 8.195368766784668, "learning_rate": 1.9996532592656534e-05, "loss": 2.4707, "step": 1163 }, { "epoch": 3.816393442622951, "grad_norm": 10.186288833618164, "learning_rate": 1.9996504574949588e-05, "loss": 2.5811, "step": 1164 }, { "epoch": 3.819672131147541, "grad_norm": 7.433164596557617, "learning_rate": 1.9996476444521656e-05, "loss": 2.4033, "step": 1165 }, { "epoch": 3.822950819672131, "grad_norm": 7.608038425445557, "learning_rate": 1.999644820137306e-05, "loss": 2.332, "step": 1166 }, { "epoch": 3.8262295081967213, "grad_norm": 7.975740909576416, "learning_rate": 1.9996419845504113e-05, "loss": 2.3594, "step": 1167 }, { "epoch": 3.8295081967213114, "grad_norm": 7.50789737701416, "learning_rate": 1.999639137691514e-05, "loss": 2.4072, "step": 1168 }, { "epoch": 3.8327868852459015, "grad_norm": 6.985324382781982, "learning_rate": 1.999636279560646e-05, "loss": 2.3853, "step": 1169 }, { "epoch": 3.836065573770492, "grad_norm": 9.697884559631348, "learning_rate": 1.99963341015784e-05, "loss": 2.4883, "step": 1170 }, { "epoch": 3.839344262295082, "grad_norm": 7.200610637664795, "learning_rate": 1.9996305294831275e-05, "loss": 2.4707, "step": 1171 }, { "epoch": 3.8426229508196723, "grad_norm": 8.145841598510742, "learning_rate": 1.9996276375365417e-05, "loss": 2.5469, "step": 1172 }, { "epoch": 3.8459016393442624, "grad_norm": 7.77617883682251, "learning_rate": 1.9996247343181147e-05, "loss": 2.5, "step": 1173 }, { "epoch": 3.8491803278688526, "grad_norm": 7.07886266708374, "learning_rate": 1.9996218198278798e-05, "loss": 2.4448, "step": 1174 }, { "epoch": 3.8524590163934427, "grad_norm": 8.599520683288574, "learning_rate": 1.999618894065869e-05, "loss": 2.564, "step": 1175 }, { "epoch": 3.855737704918033, "grad_norm": 7.856714725494385, "learning_rate": 1.9996159570321162e-05, "loss": 2.4585, "step": 1176 }, { "epoch": 3.859016393442623, "grad_norm": 9.131505012512207, "learning_rate": 1.9996130087266544e-05, "loss": 2.4834, "step": 1177 }, { "epoch": 3.862295081967213, "grad_norm": 12.291702270507812, "learning_rate": 1.9996100491495164e-05, "loss": 2.4424, "step": 1178 }, { "epoch": 3.865573770491803, "grad_norm": 7.869556903839111, "learning_rate": 1.9996070783007354e-05, "loss": 2.4663, "step": 1179 }, { "epoch": 3.8688524590163933, "grad_norm": 10.647014617919922, "learning_rate": 1.9996040961803454e-05, "loss": 2.416, "step": 1180 }, { "epoch": 3.8721311475409834, "grad_norm": 9.316129684448242, "learning_rate": 1.9996011027883803e-05, "loss": 2.417, "step": 1181 }, { "epoch": 3.8754098360655735, "grad_norm": 8.0274019241333, "learning_rate": 1.999598098124873e-05, "loss": 2.4062, "step": 1182 }, { "epoch": 3.8786885245901637, "grad_norm": 11.731205940246582, "learning_rate": 1.999595082189858e-05, "loss": 2.5312, "step": 1183 }, { "epoch": 3.8819672131147542, "grad_norm": 8.19453239440918, "learning_rate": 1.999592054983369e-05, "loss": 2.6147, "step": 1184 }, { "epoch": 3.8852459016393444, "grad_norm": 7.780589580535889, "learning_rate": 1.9995890165054404e-05, "loss": 2.3779, "step": 1185 }, { "epoch": 3.8885245901639345, "grad_norm": 9.532950401306152, "learning_rate": 1.9995859667561063e-05, "loss": 2.5059, "step": 1186 }, { "epoch": 3.8918032786885246, "grad_norm": 5.131052494049072, "learning_rate": 1.9995829057354012e-05, "loss": 2.4517, "step": 1187 }, { "epoch": 3.8950819672131147, "grad_norm": 8.857417106628418, "learning_rate": 1.9995798334433595e-05, "loss": 2.3752, "step": 1188 }, { "epoch": 3.898360655737705, "grad_norm": 5.925896644592285, "learning_rate": 1.9995767498800158e-05, "loss": 2.6318, "step": 1189 }, { "epoch": 3.901639344262295, "grad_norm": 8.34481430053711, "learning_rate": 1.999573655045405e-05, "loss": 2.3643, "step": 1190 }, { "epoch": 3.904918032786885, "grad_norm": 9.561638832092285, "learning_rate": 1.999570548939562e-05, "loss": 2.2612, "step": 1191 }, { "epoch": 3.9081967213114757, "grad_norm": 8.168082237243652, "learning_rate": 1.9995674315625216e-05, "loss": 2.3423, "step": 1192 }, { "epoch": 3.911475409836066, "grad_norm": 8.058500289916992, "learning_rate": 1.999564302914319e-05, "loss": 2.5312, "step": 1193 }, { "epoch": 3.914754098360656, "grad_norm": 26.882665634155273, "learning_rate": 1.99956116299499e-05, "loss": 2.4922, "step": 1194 }, { "epoch": 3.918032786885246, "grad_norm": 9.903360366821289, "learning_rate": 1.9995580118045694e-05, "loss": 2.29, "step": 1195 }, { "epoch": 3.921311475409836, "grad_norm": 11.410151481628418, "learning_rate": 1.999554849343093e-05, "loss": 2.5781, "step": 1196 }, { "epoch": 3.9245901639344263, "grad_norm": 12.448854446411133, "learning_rate": 1.9995516756105965e-05, "loss": 2.4668, "step": 1197 }, { "epoch": 3.9278688524590164, "grad_norm": 9.391043663024902, "learning_rate": 1.9995484906071152e-05, "loss": 2.3672, "step": 1198 }, { "epoch": 3.9311475409836065, "grad_norm": 9.655935287475586, "learning_rate": 1.9995452943326855e-05, "loss": 2.5825, "step": 1199 }, { "epoch": 3.9344262295081966, "grad_norm": 13.180578231811523, "learning_rate": 1.9995420867873437e-05, "loss": 2.5264, "step": 1200 }, { "epoch": 3.9377049180327868, "grad_norm": 8.280306816101074, "learning_rate": 1.999538867971125e-05, "loss": 2.5195, "step": 1201 }, { "epoch": 3.940983606557377, "grad_norm": 9.276575088500977, "learning_rate": 1.9995356378840667e-05, "loss": 2.4702, "step": 1202 }, { "epoch": 3.944262295081967, "grad_norm": 10.467595100402832, "learning_rate": 1.999532396526205e-05, "loss": 2.356, "step": 1203 }, { "epoch": 3.947540983606557, "grad_norm": 8.06556224822998, "learning_rate": 1.999529143897576e-05, "loss": 2.4141, "step": 1204 }, { "epoch": 3.9508196721311473, "grad_norm": 8.059613227844238, "learning_rate": 1.9995258799982168e-05, "loss": 2.4771, "step": 1205 }, { "epoch": 3.954098360655738, "grad_norm": 12.374889373779297, "learning_rate": 1.999522604828164e-05, "loss": 2.5503, "step": 1206 }, { "epoch": 3.957377049180328, "grad_norm": 9.705270767211914, "learning_rate": 1.9995193183874545e-05, "loss": 2.5649, "step": 1207 }, { "epoch": 3.960655737704918, "grad_norm": 6.501126766204834, "learning_rate": 1.9995160206761256e-05, "loss": 2.4629, "step": 1208 }, { "epoch": 3.963934426229508, "grad_norm": 9.53034782409668, "learning_rate": 1.9995127116942143e-05, "loss": 2.4839, "step": 1209 }, { "epoch": 3.9672131147540983, "grad_norm": 15.754103660583496, "learning_rate": 1.9995093914417574e-05, "loss": 2.4897, "step": 1210 }, { "epoch": 3.9704918032786884, "grad_norm": 30.69171714782715, "learning_rate": 1.9995060599187937e-05, "loss": 2.439, "step": 1211 }, { "epoch": 3.9737704918032786, "grad_norm": 7.652256965637207, "learning_rate": 1.9995027171253597e-05, "loss": 2.418, "step": 1212 }, { "epoch": 3.9770491803278687, "grad_norm": 7.82116174697876, "learning_rate": 1.9994993630614933e-05, "loss": 2.4409, "step": 1213 }, { "epoch": 3.9803278688524593, "grad_norm": 9.03420352935791, "learning_rate": 1.9994959977272322e-05, "loss": 2.5439, "step": 1214 }, { "epoch": 3.9836065573770494, "grad_norm": 9.0132417678833, "learning_rate": 1.9994926211226146e-05, "loss": 2.3789, "step": 1215 }, { "epoch": 3.9868852459016395, "grad_norm": 8.262798309326172, "learning_rate": 1.999489233247679e-05, "loss": 2.5278, "step": 1216 }, { "epoch": 3.9901639344262296, "grad_norm": 7.400125980377197, "learning_rate": 1.9994858341024622e-05, "loss": 2.5088, "step": 1217 }, { "epoch": 3.9934426229508198, "grad_norm": 8.21820068359375, "learning_rate": 1.999482423687004e-05, "loss": 2.6035, "step": 1218 }, { "epoch": 3.99672131147541, "grad_norm": 10.349150657653809, "learning_rate": 1.999479002001342e-05, "loss": 2.3228, "step": 1219 }, { "epoch": 4.0, "grad_norm": 8.012970924377441, "learning_rate": 1.9994755690455154e-05, "loss": 2.4629, "step": 1220 }, { "epoch": 4.00327868852459, "grad_norm": 6.783666610717773, "learning_rate": 1.999472124819562e-05, "loss": 2.2925, "step": 1221 }, { "epoch": 4.00655737704918, "grad_norm": 9.79698371887207, "learning_rate": 1.9994686693235215e-05, "loss": 2.3672, "step": 1222 }, { "epoch": 4.00983606557377, "grad_norm": 11.497304916381836, "learning_rate": 1.9994652025574326e-05, "loss": 2.3765, "step": 1223 }, { "epoch": 4.0131147540983605, "grad_norm": 9.051271438598633, "learning_rate": 1.9994617245213344e-05, "loss": 2.521, "step": 1224 }, { "epoch": 4.016393442622951, "grad_norm": 9.07767105102539, "learning_rate": 1.9994582352152658e-05, "loss": 2.3809, "step": 1225 }, { "epoch": 4.019672131147541, "grad_norm": 10.365415573120117, "learning_rate": 1.999454734639267e-05, "loss": 2.4419, "step": 1226 }, { "epoch": 4.022950819672131, "grad_norm": 9.505521774291992, "learning_rate": 1.9994512227933763e-05, "loss": 2.4824, "step": 1227 }, { "epoch": 4.026229508196721, "grad_norm": 8.799982070922852, "learning_rate": 1.9994476996776342e-05, "loss": 2.3994, "step": 1228 }, { "epoch": 4.029508196721311, "grad_norm": 26.96458625793457, "learning_rate": 1.99944416529208e-05, "loss": 2.3364, "step": 1229 }, { "epoch": 4.032786885245901, "grad_norm": 7.701173305511475, "learning_rate": 1.999440619636754e-05, "loss": 2.4463, "step": 1230 }, { "epoch": 4.036065573770492, "grad_norm": 8.00834846496582, "learning_rate": 1.9994370627116954e-05, "loss": 2.3906, "step": 1231 }, { "epoch": 4.039344262295082, "grad_norm": 11.912819862365723, "learning_rate": 1.9994334945169448e-05, "loss": 2.3901, "step": 1232 }, { "epoch": 4.0426229508196725, "grad_norm": 8.086908340454102, "learning_rate": 1.9994299150525425e-05, "loss": 2.3789, "step": 1233 }, { "epoch": 4.045901639344263, "grad_norm": 7.912247657775879, "learning_rate": 1.999426324318529e-05, "loss": 2.3926, "step": 1234 }, { "epoch": 4.049180327868853, "grad_norm": 8.235618591308594, "learning_rate": 1.9994227223149444e-05, "loss": 2.5586, "step": 1235 }, { "epoch": 4.052459016393443, "grad_norm": 8.790522575378418, "learning_rate": 1.999419109041829e-05, "loss": 2.3481, "step": 1236 }, { "epoch": 4.055737704918033, "grad_norm": 8.46109676361084, "learning_rate": 1.9994154844992248e-05, "loss": 2.3906, "step": 1237 }, { "epoch": 4.059016393442623, "grad_norm": 10.11383056640625, "learning_rate": 1.9994118486871714e-05, "loss": 2.3672, "step": 1238 }, { "epoch": 4.062295081967213, "grad_norm": 9.413474082946777, "learning_rate": 1.9994082016057105e-05, "loss": 2.5684, "step": 1239 }, { "epoch": 4.065573770491803, "grad_norm": 12.732465744018555, "learning_rate": 1.9994045432548828e-05, "loss": 2.46, "step": 1240 }, { "epoch": 4.0688524590163935, "grad_norm": 9.13823127746582, "learning_rate": 1.99940087363473e-05, "loss": 2.3677, "step": 1241 }, { "epoch": 4.072131147540984, "grad_norm": 10.329314231872559, "learning_rate": 1.9993971927452928e-05, "loss": 2.437, "step": 1242 }, { "epoch": 4.075409836065574, "grad_norm": 8.57168960571289, "learning_rate": 1.9993935005866138e-05, "loss": 2.4082, "step": 1243 }, { "epoch": 4.078688524590164, "grad_norm": 10.766560554504395, "learning_rate": 1.9993897971587333e-05, "loss": 2.3462, "step": 1244 }, { "epoch": 4.081967213114754, "grad_norm": 8.659952163696289, "learning_rate": 1.999386082461694e-05, "loss": 2.3428, "step": 1245 }, { "epoch": 4.085245901639344, "grad_norm": 8.546940803527832, "learning_rate": 1.9993823564955375e-05, "loss": 2.4321, "step": 1246 }, { "epoch": 4.088524590163934, "grad_norm": 6.723453044891357, "learning_rate": 1.999378619260306e-05, "loss": 2.3276, "step": 1247 }, { "epoch": 4.091803278688524, "grad_norm": 9.047709465026855, "learning_rate": 1.9993748707560413e-05, "loss": 2.3037, "step": 1248 }, { "epoch": 4.0950819672131145, "grad_norm": 10.210500717163086, "learning_rate": 1.999371110982786e-05, "loss": 2.3574, "step": 1249 }, { "epoch": 4.098360655737705, "grad_norm": 12.697441101074219, "learning_rate": 1.999367339940582e-05, "loss": 2.3496, "step": 1250 }, { "epoch": 4.101639344262295, "grad_norm": 10.301984786987305, "learning_rate": 1.9993635576294726e-05, "loss": 2.3271, "step": 1251 }, { "epoch": 4.104918032786885, "grad_norm": 11.044748306274414, "learning_rate": 1.9993597640494998e-05, "loss": 2.2612, "step": 1252 }, { "epoch": 4.108196721311476, "grad_norm": 11.483135223388672, "learning_rate": 1.9993559592007067e-05, "loss": 2.4512, "step": 1253 }, { "epoch": 4.111475409836066, "grad_norm": 11.380659103393555, "learning_rate": 1.9993521430831357e-05, "loss": 2.5415, "step": 1254 }, { "epoch": 4.114754098360656, "grad_norm": 10.04318904876709, "learning_rate": 1.9993483156968305e-05, "loss": 2.355, "step": 1255 }, { "epoch": 4.118032786885246, "grad_norm": 8.149246215820312, "learning_rate": 1.999344477041834e-05, "loss": 2.4458, "step": 1256 }, { "epoch": 4.121311475409836, "grad_norm": 8.3594388961792, "learning_rate": 1.9993406271181898e-05, "loss": 2.25, "step": 1257 }, { "epoch": 4.1245901639344265, "grad_norm": 7.578390121459961, "learning_rate": 1.9993367659259404e-05, "loss": 2.3799, "step": 1258 }, { "epoch": 4.127868852459017, "grad_norm": 17.484107971191406, "learning_rate": 1.9993328934651303e-05, "loss": 2.2935, "step": 1259 }, { "epoch": 4.131147540983607, "grad_norm": 11.083518981933594, "learning_rate": 1.9993290097358024e-05, "loss": 2.4541, "step": 1260 }, { "epoch": 4.134426229508197, "grad_norm": 7.251643180847168, "learning_rate": 1.9993251147380012e-05, "loss": 2.4482, "step": 1261 }, { "epoch": 4.137704918032787, "grad_norm": 8.927546501159668, "learning_rate": 1.99932120847177e-05, "loss": 2.4722, "step": 1262 }, { "epoch": 4.140983606557377, "grad_norm": 11.113164901733398, "learning_rate": 1.9993172909371533e-05, "loss": 2.3989, "step": 1263 }, { "epoch": 4.144262295081967, "grad_norm": 7.636909484863281, "learning_rate": 1.999313362134195e-05, "loss": 2.4463, "step": 1264 }, { "epoch": 4.147540983606557, "grad_norm": 7.058381080627441, "learning_rate": 1.99930942206294e-05, "loss": 2.3052, "step": 1265 }, { "epoch": 4.150819672131147, "grad_norm": 6.089980125427246, "learning_rate": 1.9993054707234317e-05, "loss": 2.3359, "step": 1266 }, { "epoch": 4.154098360655738, "grad_norm": 7.302461624145508, "learning_rate": 1.9993015081157155e-05, "loss": 2.4653, "step": 1267 }, { "epoch": 4.157377049180328, "grad_norm": 12.26522159576416, "learning_rate": 1.999297534239836e-05, "loss": 2.2515, "step": 1268 }, { "epoch": 4.160655737704918, "grad_norm": 11.144664764404297, "learning_rate": 1.999293549095837e-05, "loss": 2.2112, "step": 1269 }, { "epoch": 4.163934426229508, "grad_norm": 8.022135734558105, "learning_rate": 1.9992895526837647e-05, "loss": 2.2856, "step": 1270 }, { "epoch": 4.167213114754098, "grad_norm": 9.923857688903809, "learning_rate": 1.9992855450036638e-05, "loss": 2.2905, "step": 1271 }, { "epoch": 4.170491803278688, "grad_norm": 7.326773166656494, "learning_rate": 1.9992815260555792e-05, "loss": 2.4062, "step": 1272 }, { "epoch": 4.173770491803278, "grad_norm": 6.982698917388916, "learning_rate": 1.9992774958395565e-05, "loss": 2.5537, "step": 1273 }, { "epoch": 4.177049180327868, "grad_norm": 8.69603443145752, "learning_rate": 1.9992734543556413e-05, "loss": 2.3691, "step": 1274 }, { "epoch": 4.180327868852459, "grad_norm": 9.209389686584473, "learning_rate": 1.9992694016038785e-05, "loss": 2.4136, "step": 1275 }, { "epoch": 4.18360655737705, "grad_norm": 7.987884521484375, "learning_rate": 1.9992653375843143e-05, "loss": 2.0801, "step": 1276 }, { "epoch": 4.18688524590164, "grad_norm": 7.78518533706665, "learning_rate": 1.9992612622969946e-05, "loss": 2.5454, "step": 1277 }, { "epoch": 4.19016393442623, "grad_norm": 8.160201072692871, "learning_rate": 1.9992571757419653e-05, "loss": 2.3364, "step": 1278 }, { "epoch": 4.19344262295082, "grad_norm": 7.156608581542969, "learning_rate": 1.999253077919272e-05, "loss": 2.2866, "step": 1279 }, { "epoch": 4.19672131147541, "grad_norm": 9.011981964111328, "learning_rate": 1.9992489688289614e-05, "loss": 2.2544, "step": 1280 }, { "epoch": 4.2, "grad_norm": 14.778875350952148, "learning_rate": 1.99924484847108e-05, "loss": 2.4482, "step": 1281 }, { "epoch": 4.20327868852459, "grad_norm": 8.17931079864502, "learning_rate": 1.9992407168456735e-05, "loss": 2.332, "step": 1282 }, { "epoch": 4.20655737704918, "grad_norm": 11.447026252746582, "learning_rate": 1.999236573952789e-05, "loss": 2.5254, "step": 1283 }, { "epoch": 4.2098360655737705, "grad_norm": 10.602884292602539, "learning_rate": 1.9992324197924736e-05, "loss": 2.394, "step": 1284 }, { "epoch": 4.213114754098361, "grad_norm": 11.10487174987793, "learning_rate": 1.9992282543647737e-05, "loss": 2.2939, "step": 1285 }, { "epoch": 4.216393442622951, "grad_norm": 10.086874008178711, "learning_rate": 1.999224077669736e-05, "loss": 2.2427, "step": 1286 }, { "epoch": 4.219672131147541, "grad_norm": 7.340033054351807, "learning_rate": 1.999219889707408e-05, "loss": 2.3032, "step": 1287 }, { "epoch": 4.222950819672131, "grad_norm": 9.554770469665527, "learning_rate": 1.999215690477837e-05, "loss": 2.5, "step": 1288 }, { "epoch": 4.226229508196721, "grad_norm": 6.221395492553711, "learning_rate": 1.99921147998107e-05, "loss": 2.3335, "step": 1289 }, { "epoch": 4.229508196721311, "grad_norm": 8.47671890258789, "learning_rate": 1.9992072582171546e-05, "loss": 2.4434, "step": 1290 }, { "epoch": 4.232786885245901, "grad_norm": 10.2838134765625, "learning_rate": 1.9992030251861384e-05, "loss": 2.374, "step": 1291 }, { "epoch": 4.2360655737704915, "grad_norm": 9.429166793823242, "learning_rate": 1.999198780888069e-05, "loss": 2.4272, "step": 1292 }, { "epoch": 4.239344262295082, "grad_norm": 11.456494331359863, "learning_rate": 1.9991945253229953e-05, "loss": 2.4585, "step": 1293 }, { "epoch": 4.242622950819672, "grad_norm": 9.173245429992676, "learning_rate": 1.9991902584909636e-05, "loss": 2.2085, "step": 1294 }, { "epoch": 4.245901639344262, "grad_norm": 7.914997577667236, "learning_rate": 1.999185980392023e-05, "loss": 2.332, "step": 1295 }, { "epoch": 4.249180327868853, "grad_norm": 7.1806793212890625, "learning_rate": 1.999181691026222e-05, "loss": 2.5532, "step": 1296 }, { "epoch": 4.252459016393443, "grad_norm": 8.028154373168945, "learning_rate": 1.999177390393608e-05, "loss": 2.498, "step": 1297 }, { "epoch": 4.255737704918033, "grad_norm": 18.189422607421875, "learning_rate": 1.9991730784942304e-05, "loss": 2.2236, "step": 1298 }, { "epoch": 4.259016393442623, "grad_norm": 8.869684219360352, "learning_rate": 1.999168755328137e-05, "loss": 2.3018, "step": 1299 }, { "epoch": 4.262295081967213, "grad_norm": 9.2525053024292, "learning_rate": 1.9991644208953776e-05, "loss": 2.3691, "step": 1300 }, { "epoch": 4.2655737704918035, "grad_norm": 8.678208351135254, "learning_rate": 1.999160075196e-05, "loss": 2.229, "step": 1301 }, { "epoch": 4.268852459016394, "grad_norm": 7.419463634490967, "learning_rate": 1.9991557182300538e-05, "loss": 2.2905, "step": 1302 }, { "epoch": 4.272131147540984, "grad_norm": 8.448445320129395, "learning_rate": 1.9991513499975883e-05, "loss": 2.4932, "step": 1303 }, { "epoch": 4.275409836065574, "grad_norm": 8.667378425598145, "learning_rate": 1.9991469704986523e-05, "loss": 2.3516, "step": 1304 }, { "epoch": 4.278688524590164, "grad_norm": 8.88077449798584, "learning_rate": 1.9991425797332952e-05, "loss": 1.9443, "step": 1305 }, { "epoch": 4.281967213114754, "grad_norm": 8.904132843017578, "learning_rate": 1.9991381777015667e-05, "loss": 2.4775, "step": 1306 }, { "epoch": 4.285245901639344, "grad_norm": 9.807610511779785, "learning_rate": 1.9991337644035166e-05, "loss": 2.354, "step": 1307 }, { "epoch": 4.288524590163934, "grad_norm": 7.800211429595947, "learning_rate": 1.9991293398391945e-05, "loss": 2.4102, "step": 1308 }, { "epoch": 4.2918032786885245, "grad_norm": 9.08542251586914, "learning_rate": 1.99912490400865e-05, "loss": 2.2676, "step": 1309 }, { "epoch": 4.295081967213115, "grad_norm": 9.975735664367676, "learning_rate": 1.9991204569119337e-05, "loss": 2.499, "step": 1310 }, { "epoch": 4.298360655737705, "grad_norm": 12.140556335449219, "learning_rate": 1.9991159985490952e-05, "loss": 2.4424, "step": 1311 }, { "epoch": 4.301639344262295, "grad_norm": 7.582167625427246, "learning_rate": 1.999111528920185e-05, "loss": 2.3486, "step": 1312 }, { "epoch": 4.304918032786885, "grad_norm": 6.4798808097839355, "learning_rate": 1.9991070480252533e-05, "loss": 2.1785, "step": 1313 }, { "epoch": 4.308196721311475, "grad_norm": 6.357883453369141, "learning_rate": 1.999102555864351e-05, "loss": 2.2876, "step": 1314 }, { "epoch": 4.311475409836065, "grad_norm": 6.38567590713501, "learning_rate": 1.9990980524375286e-05, "loss": 2.377, "step": 1315 }, { "epoch": 4.314754098360655, "grad_norm": 6.859234809875488, "learning_rate": 1.9990935377448372e-05, "loss": 2.2725, "step": 1316 }, { "epoch": 4.3180327868852455, "grad_norm": 7.533396244049072, "learning_rate": 1.9990890117863267e-05, "loss": 2.4033, "step": 1317 }, { "epoch": 4.321311475409836, "grad_norm": 7.803981304168701, "learning_rate": 1.9990844745620493e-05, "loss": 2.4844, "step": 1318 }, { "epoch": 4.324590163934427, "grad_norm": 11.516681671142578, "learning_rate": 1.9990799260720555e-05, "loss": 2.3594, "step": 1319 }, { "epoch": 4.327868852459017, "grad_norm": 13.669341087341309, "learning_rate": 1.9990753663163968e-05, "loss": 2.4717, "step": 1320 }, { "epoch": 4.331147540983607, "grad_norm": 11.37423324584961, "learning_rate": 1.9990707952951243e-05, "loss": 2.2632, "step": 1321 }, { "epoch": 4.334426229508197, "grad_norm": 7.9196553230285645, "learning_rate": 1.9990662130082903e-05, "loss": 2.3896, "step": 1322 }, { "epoch": 4.337704918032787, "grad_norm": 7.518824100494385, "learning_rate": 1.9990616194559455e-05, "loss": 2.251, "step": 1323 }, { "epoch": 4.340983606557377, "grad_norm": 10.750685691833496, "learning_rate": 1.9990570146381424e-05, "loss": 2.3804, "step": 1324 }, { "epoch": 4.344262295081967, "grad_norm": 9.521597862243652, "learning_rate": 1.9990523985549327e-05, "loss": 2.2026, "step": 1325 }, { "epoch": 4.3475409836065575, "grad_norm": 7.824872016906738, "learning_rate": 1.9990477712063687e-05, "loss": 2.5669, "step": 1326 }, { "epoch": 4.350819672131148, "grad_norm": 13.887478828430176, "learning_rate": 1.999043132592502e-05, "loss": 2.3071, "step": 1327 }, { "epoch": 4.354098360655738, "grad_norm": 7.8038153648376465, "learning_rate": 1.999038482713385e-05, "loss": 2.3843, "step": 1328 }, { "epoch": 4.357377049180328, "grad_norm": 8.697489738464355, "learning_rate": 1.999033821569071e-05, "loss": 2.5, "step": 1329 }, { "epoch": 4.360655737704918, "grad_norm": 8.630002975463867, "learning_rate": 1.9990291491596116e-05, "loss": 2.5283, "step": 1330 }, { "epoch": 4.363934426229508, "grad_norm": 7.922541618347168, "learning_rate": 1.9990244654850598e-05, "loss": 2.5029, "step": 1331 }, { "epoch": 4.367213114754098, "grad_norm": 6.813348293304443, "learning_rate": 1.9990197705454682e-05, "loss": 2.373, "step": 1332 }, { "epoch": 4.370491803278688, "grad_norm": 10.454228401184082, "learning_rate": 1.9990150643408904e-05, "loss": 2.1831, "step": 1333 }, { "epoch": 4.3737704918032785, "grad_norm": 13.209990501403809, "learning_rate": 1.9990103468713788e-05, "loss": 2.2632, "step": 1334 }, { "epoch": 4.377049180327869, "grad_norm": 6.7609686851501465, "learning_rate": 1.999005618136987e-05, "loss": 2.3833, "step": 1335 }, { "epoch": 4.380327868852459, "grad_norm": 18.02646827697754, "learning_rate": 1.9990008781377677e-05, "loss": 2.4678, "step": 1336 }, { "epoch": 4.383606557377049, "grad_norm": 9.582710266113281, "learning_rate": 1.9989961268737754e-05, "loss": 2.499, "step": 1337 }, { "epoch": 4.386885245901639, "grad_norm": 8.781536102294922, "learning_rate": 1.9989913643450627e-05, "loss": 2.2241, "step": 1338 }, { "epoch": 4.390163934426229, "grad_norm": 10.857329368591309, "learning_rate": 1.9989865905516836e-05, "loss": 2.3848, "step": 1339 }, { "epoch": 4.39344262295082, "grad_norm": 8.140976905822754, "learning_rate": 1.9989818054936923e-05, "loss": 2.1396, "step": 1340 }, { "epoch": 4.39672131147541, "grad_norm": 7.074567794799805, "learning_rate": 1.9989770091711423e-05, "loss": 2.3091, "step": 1341 }, { "epoch": 4.4, "grad_norm": 8.033045768737793, "learning_rate": 1.998972201584088e-05, "loss": 2.2686, "step": 1342 }, { "epoch": 4.4032786885245905, "grad_norm": 10.790445327758789, "learning_rate": 1.9989673827325834e-05, "loss": 2.2949, "step": 1343 }, { "epoch": 4.406557377049181, "grad_norm": 8.636160850524902, "learning_rate": 1.998962552616683e-05, "loss": 2.3252, "step": 1344 }, { "epoch": 4.409836065573771, "grad_norm": 9.624173164367676, "learning_rate": 1.9989577112364405e-05, "loss": 2.2559, "step": 1345 }, { "epoch": 4.413114754098361, "grad_norm": 8.146405220031738, "learning_rate": 1.998952858591912e-05, "loss": 2.3442, "step": 1346 }, { "epoch": 4.416393442622951, "grad_norm": 10.223978042602539, "learning_rate": 1.998947994683151e-05, "loss": 2.2578, "step": 1347 }, { "epoch": 4.419672131147541, "grad_norm": 11.80959701538086, "learning_rate": 1.9989431195102127e-05, "loss": 2.2124, "step": 1348 }, { "epoch": 4.422950819672131, "grad_norm": 9.19405746459961, "learning_rate": 1.998938233073152e-05, "loss": 2.3867, "step": 1349 }, { "epoch": 4.426229508196721, "grad_norm": 8.727531433105469, "learning_rate": 1.9989333353720243e-05, "loss": 2.2065, "step": 1350 }, { "epoch": 4.4295081967213115, "grad_norm": 8.301379203796387, "learning_rate": 1.9989284264068845e-05, "loss": 2.4087, "step": 1351 }, { "epoch": 4.432786885245902, "grad_norm": 8.163114547729492, "learning_rate": 1.9989235061777878e-05, "loss": 2.2632, "step": 1352 }, { "epoch": 4.436065573770492, "grad_norm": 10.444515228271484, "learning_rate": 1.9989185746847903e-05, "loss": 2.2695, "step": 1353 }, { "epoch": 4.439344262295082, "grad_norm": 8.769352912902832, "learning_rate": 1.9989136319279474e-05, "loss": 2.2817, "step": 1354 }, { "epoch": 4.442622950819672, "grad_norm": 9.16578197479248, "learning_rate": 1.9989086779073142e-05, "loss": 2.2603, "step": 1355 }, { "epoch": 4.445901639344262, "grad_norm": 7.620275497436523, "learning_rate": 1.9989037126229474e-05, "loss": 2.3394, "step": 1356 }, { "epoch": 4.449180327868852, "grad_norm": 8.804341316223145, "learning_rate": 1.9988987360749027e-05, "loss": 2.269, "step": 1357 }, { "epoch": 4.452459016393442, "grad_norm": 7.574732303619385, "learning_rate": 1.998893748263236e-05, "loss": 2.458, "step": 1358 }, { "epoch": 4.4557377049180324, "grad_norm": 7.560117244720459, "learning_rate": 1.9988887491880037e-05, "loss": 2.2988, "step": 1359 }, { "epoch": 4.459016393442623, "grad_norm": 8.800701141357422, "learning_rate": 1.9988837388492622e-05, "loss": 2.3794, "step": 1360 }, { "epoch": 4.462295081967213, "grad_norm": 8.843282699584961, "learning_rate": 1.9988787172470682e-05, "loss": 2.2837, "step": 1361 }, { "epoch": 4.465573770491803, "grad_norm": 6.815879821777344, "learning_rate": 1.9988736843814777e-05, "loss": 2.2695, "step": 1362 }, { "epoch": 4.468852459016394, "grad_norm": 9.710630416870117, "learning_rate": 1.9988686402525478e-05, "loss": 2.271, "step": 1363 }, { "epoch": 4.472131147540984, "grad_norm": 7.8310227394104, "learning_rate": 1.9988635848603356e-05, "loss": 2.2998, "step": 1364 }, { "epoch": 4.475409836065574, "grad_norm": 7.848716735839844, "learning_rate": 1.998858518204898e-05, "loss": 2.314, "step": 1365 }, { "epoch": 4.478688524590164, "grad_norm": 8.3385591506958, "learning_rate": 1.998853440286292e-05, "loss": 2.2627, "step": 1366 }, { "epoch": 4.481967213114754, "grad_norm": 8.65140151977539, "learning_rate": 1.998848351104575e-05, "loss": 2.4932, "step": 1367 }, { "epoch": 4.4852459016393444, "grad_norm": 13.115348815917969, "learning_rate": 1.998843250659804e-05, "loss": 2.3569, "step": 1368 }, { "epoch": 4.488524590163935, "grad_norm": 8.959574699401855, "learning_rate": 1.998838138952037e-05, "loss": 2.0493, "step": 1369 }, { "epoch": 4.491803278688525, "grad_norm": 10.984434127807617, "learning_rate": 1.9988330159813313e-05, "loss": 2.3423, "step": 1370 }, { "epoch": 4.495081967213115, "grad_norm": 10.901837348937988, "learning_rate": 1.998827881747745e-05, "loss": 2.4502, "step": 1371 }, { "epoch": 4.498360655737705, "grad_norm": 7.557483196258545, "learning_rate": 1.998822736251336e-05, "loss": 2.2627, "step": 1372 }, { "epoch": 4.501639344262295, "grad_norm": 7.176002502441406, "learning_rate": 1.9988175794921618e-05, "loss": 2.5513, "step": 1373 }, { "epoch": 4.504918032786885, "grad_norm": 7.615395545959473, "learning_rate": 1.998812411470281e-05, "loss": 2.3003, "step": 1374 }, { "epoch": 4.508196721311475, "grad_norm": 11.295527458190918, "learning_rate": 1.998807232185752e-05, "loss": 2.2974, "step": 1375 }, { "epoch": 4.511475409836065, "grad_norm": 8.2101411819458, "learning_rate": 1.9988020416386327e-05, "loss": 2.3486, "step": 1376 }, { "epoch": 4.5147540983606556, "grad_norm": 8.161543846130371, "learning_rate": 1.9987968398289818e-05, "loss": 2.2539, "step": 1377 }, { "epoch": 4.518032786885246, "grad_norm": 7.049025535583496, "learning_rate": 1.998791626756858e-05, "loss": 2.4619, "step": 1378 }, { "epoch": 4.521311475409836, "grad_norm": 7.304931640625, "learning_rate": 1.9987864024223205e-05, "loss": 2.3071, "step": 1379 }, { "epoch": 4.524590163934426, "grad_norm": 9.173112869262695, "learning_rate": 1.9987811668254276e-05, "loss": 2.2378, "step": 1380 }, { "epoch": 4.527868852459016, "grad_norm": 11.782327651977539, "learning_rate": 1.9987759199662386e-05, "loss": 2.3887, "step": 1381 }, { "epoch": 4.531147540983606, "grad_norm": 12.809149742126465, "learning_rate": 1.9987706618448125e-05, "loss": 2.2471, "step": 1382 }, { "epoch": 4.534426229508197, "grad_norm": 7.3819966316223145, "learning_rate": 1.9987653924612088e-05, "loss": 2.2412, "step": 1383 }, { "epoch": 4.537704918032787, "grad_norm": 8.070784568786621, "learning_rate": 1.998760111815487e-05, "loss": 2.438, "step": 1384 }, { "epoch": 4.540983606557377, "grad_norm": 9.689772605895996, "learning_rate": 1.9987548199077062e-05, "loss": 2.4126, "step": 1385 }, { "epoch": 4.5442622950819676, "grad_norm": 8.912281036376953, "learning_rate": 1.9987495167379265e-05, "loss": 2.3574, "step": 1386 }, { "epoch": 4.547540983606558, "grad_norm": 9.495426177978516, "learning_rate": 1.9987442023062077e-05, "loss": 2.1919, "step": 1387 }, { "epoch": 4.550819672131148, "grad_norm": 12.27763843536377, "learning_rate": 1.9987388766126096e-05, "loss": 2.4082, "step": 1388 }, { "epoch": 4.554098360655738, "grad_norm": 7.919150352478027, "learning_rate": 1.9987335396571922e-05, "loss": 2.5366, "step": 1389 }, { "epoch": 4.557377049180328, "grad_norm": 9.006173133850098, "learning_rate": 1.9987281914400153e-05, "loss": 2.4395, "step": 1390 }, { "epoch": 4.560655737704918, "grad_norm": 8.00467586517334, "learning_rate": 1.99872283196114e-05, "loss": 2.3613, "step": 1391 }, { "epoch": 4.563934426229508, "grad_norm": 9.067948341369629, "learning_rate": 1.9987174612206262e-05, "loss": 2.4023, "step": 1392 }, { "epoch": 4.567213114754098, "grad_norm": 8.458083152770996, "learning_rate": 1.998712079218535e-05, "loss": 2.3574, "step": 1393 }, { "epoch": 4.5704918032786885, "grad_norm": 7.4238176345825195, "learning_rate": 1.9987066859549266e-05, "loss": 2.3916, "step": 1394 }, { "epoch": 4.573770491803279, "grad_norm": 8.348522186279297, "learning_rate": 1.9987012814298617e-05, "loss": 2.3486, "step": 1395 }, { "epoch": 4.577049180327869, "grad_norm": 8.868863105773926, "learning_rate": 1.9986958656434016e-05, "loss": 2.2231, "step": 1396 }, { "epoch": 4.580327868852459, "grad_norm": 15.475521087646484, "learning_rate": 1.998690438595607e-05, "loss": 2.2866, "step": 1397 }, { "epoch": 4.583606557377049, "grad_norm": 7.3383002281188965, "learning_rate": 1.9986850002865394e-05, "loss": 2.3184, "step": 1398 }, { "epoch": 4.586885245901639, "grad_norm": 7.329399108886719, "learning_rate": 1.9986795507162603e-05, "loss": 2.3071, "step": 1399 }, { "epoch": 4.590163934426229, "grad_norm": 7.681207180023193, "learning_rate": 1.9986740898848306e-05, "loss": 2.2144, "step": 1400 }, { "epoch": 4.593442622950819, "grad_norm": 7.430589199066162, "learning_rate": 1.9986686177923124e-05, "loss": 2.3833, "step": 1401 }, { "epoch": 4.5967213114754095, "grad_norm": 7.837014198303223, "learning_rate": 1.998663134438767e-05, "loss": 2.3613, "step": 1402 }, { "epoch": 4.6, "grad_norm": 6.814788818359375, "learning_rate": 1.9986576398242566e-05, "loss": 2.2646, "step": 1403 }, { "epoch": 4.60327868852459, "grad_norm": 10.246014595031738, "learning_rate": 1.9986521339488427e-05, "loss": 2.3662, "step": 1404 }, { "epoch": 4.60655737704918, "grad_norm": 6.776881694793701, "learning_rate": 1.998646616812588e-05, "loss": 2.5195, "step": 1405 }, { "epoch": 4.60983606557377, "grad_norm": 10.07409381866455, "learning_rate": 1.998641088415554e-05, "loss": 2.1514, "step": 1406 }, { "epoch": 4.613114754098361, "grad_norm": 12.955272674560547, "learning_rate": 1.998635548757804e-05, "loss": 2.3188, "step": 1407 }, { "epoch": 4.616393442622951, "grad_norm": 8.030448913574219, "learning_rate": 1.998629997839399e-05, "loss": 2.2129, "step": 1408 }, { "epoch": 4.619672131147541, "grad_norm": 8.398825645446777, "learning_rate": 1.998624435660403e-05, "loss": 2.3281, "step": 1409 }, { "epoch": 4.622950819672131, "grad_norm": 8.136346817016602, "learning_rate": 1.9986188622208782e-05, "loss": 2.2949, "step": 1410 }, { "epoch": 4.6262295081967215, "grad_norm": 7.373635768890381, "learning_rate": 1.9986132775208872e-05, "loss": 2.2173, "step": 1411 }, { "epoch": 4.629508196721312, "grad_norm": 9.786977767944336, "learning_rate": 1.9986076815604934e-05, "loss": 2.4316, "step": 1412 }, { "epoch": 4.632786885245902, "grad_norm": 7.390148639678955, "learning_rate": 1.9986020743397595e-05, "loss": 2.2852, "step": 1413 }, { "epoch": 4.636065573770492, "grad_norm": 7.080379962921143, "learning_rate": 1.998596455858749e-05, "loss": 2.1968, "step": 1414 }, { "epoch": 4.639344262295082, "grad_norm": 6.412715435028076, "learning_rate": 1.9985908261175253e-05, "loss": 2.438, "step": 1415 }, { "epoch": 4.642622950819672, "grad_norm": 7.366628646850586, "learning_rate": 1.998585185116152e-05, "loss": 2.2373, "step": 1416 }, { "epoch": 4.645901639344262, "grad_norm": 7.691655158996582, "learning_rate": 1.998579532854692e-05, "loss": 2.3936, "step": 1417 }, { "epoch": 4.649180327868852, "grad_norm": 6.390206336975098, "learning_rate": 1.9985738693332095e-05, "loss": 2.4575, "step": 1418 }, { "epoch": 4.6524590163934425, "grad_norm": 7.274484634399414, "learning_rate": 1.9985681945517687e-05, "loss": 2.3906, "step": 1419 }, { "epoch": 4.655737704918033, "grad_norm": 15.284529685974121, "learning_rate": 1.998562508510433e-05, "loss": 2.439, "step": 1420 }, { "epoch": 4.659016393442623, "grad_norm": 7.436450958251953, "learning_rate": 1.9985568112092667e-05, "loss": 2.3589, "step": 1421 }, { "epoch": 4.662295081967213, "grad_norm": 31.060091018676758, "learning_rate": 1.9985511026483343e-05, "loss": 2.4087, "step": 1422 }, { "epoch": 4.665573770491803, "grad_norm": 8.542333602905273, "learning_rate": 1.9985453828277e-05, "loss": 2.4458, "step": 1423 }, { "epoch": 4.668852459016393, "grad_norm": 8.806581497192383, "learning_rate": 1.9985396517474283e-05, "loss": 2.2729, "step": 1424 }, { "epoch": 4.672131147540983, "grad_norm": 7.180908679962158, "learning_rate": 1.9985339094075836e-05, "loss": 2.1638, "step": 1425 }, { "epoch": 4.675409836065574, "grad_norm": 10.384662628173828, "learning_rate": 1.9985281558082312e-05, "loss": 2.2705, "step": 1426 }, { "epoch": 4.678688524590164, "grad_norm": 8.211709976196289, "learning_rate": 1.9985223909494352e-05, "loss": 2.376, "step": 1427 }, { "epoch": 4.6819672131147545, "grad_norm": 6.525832176208496, "learning_rate": 1.9985166148312616e-05, "loss": 2.4404, "step": 1428 }, { "epoch": 4.685245901639345, "grad_norm": 7.950165748596191, "learning_rate": 1.9985108274537745e-05, "loss": 2.4038, "step": 1429 }, { "epoch": 4.688524590163935, "grad_norm": 7.936550140380859, "learning_rate": 1.9985050288170396e-05, "loss": 2.2158, "step": 1430 }, { "epoch": 4.691803278688525, "grad_norm": 8.364604949951172, "learning_rate": 1.9984992189211227e-05, "loss": 2.2524, "step": 1431 }, { "epoch": 4.695081967213115, "grad_norm": 7.43726110458374, "learning_rate": 1.9984933977660884e-05, "loss": 2.4087, "step": 1432 }, { "epoch": 4.698360655737705, "grad_norm": 11.914563179016113, "learning_rate": 1.9984875653520035e-05, "loss": 2.3589, "step": 1433 }, { "epoch": 4.701639344262295, "grad_norm": 7.180919647216797, "learning_rate": 1.9984817216789327e-05, "loss": 2.3638, "step": 1434 }, { "epoch": 4.704918032786885, "grad_norm": 7.561214923858643, "learning_rate": 1.9984758667469424e-05, "loss": 2.373, "step": 1435 }, { "epoch": 4.7081967213114755, "grad_norm": 8.03368854522705, "learning_rate": 1.9984700005560987e-05, "loss": 2.4541, "step": 1436 }, { "epoch": 4.711475409836066, "grad_norm": 8.162429809570312, "learning_rate": 1.9984641231064673e-05, "loss": 2.2573, "step": 1437 }, { "epoch": 4.714754098360656, "grad_norm": 9.085460662841797, "learning_rate": 1.9984582343981153e-05, "loss": 2.3257, "step": 1438 }, { "epoch": 4.718032786885246, "grad_norm": 8.309141159057617, "learning_rate": 1.998452334431108e-05, "loss": 2.4385, "step": 1439 }, { "epoch": 4.721311475409836, "grad_norm": 7.733190059661865, "learning_rate": 1.9984464232055128e-05, "loss": 2.3784, "step": 1440 }, { "epoch": 4.724590163934426, "grad_norm": 7.66124153137207, "learning_rate": 1.998440500721396e-05, "loss": 2.2935, "step": 1441 }, { "epoch": 4.727868852459016, "grad_norm": 9.72463607788086, "learning_rate": 1.9984345669788244e-05, "loss": 2.4526, "step": 1442 }, { "epoch": 4.731147540983606, "grad_norm": 7.808207988739014, "learning_rate": 1.998428621977865e-05, "loss": 2.0674, "step": 1443 }, { "epoch": 4.7344262295081965, "grad_norm": 8.949491500854492, "learning_rate": 1.9984226657185845e-05, "loss": 2.3179, "step": 1444 }, { "epoch": 4.737704918032787, "grad_norm": 9.026613235473633, "learning_rate": 1.9984166982010508e-05, "loss": 2.4043, "step": 1445 }, { "epoch": 4.740983606557377, "grad_norm": 9.163273811340332, "learning_rate": 1.9984107194253305e-05, "loss": 2.3638, "step": 1446 }, { "epoch": 4.744262295081967, "grad_norm": 6.7496442794799805, "learning_rate": 1.9984047293914912e-05, "loss": 2.272, "step": 1447 }, { "epoch": 4.747540983606557, "grad_norm": 8.551309585571289, "learning_rate": 1.9983987280996006e-05, "loss": 2.3359, "step": 1448 }, { "epoch": 4.750819672131147, "grad_norm": 8.013650894165039, "learning_rate": 1.9983927155497262e-05, "loss": 2.2266, "step": 1449 }, { "epoch": 4.754098360655737, "grad_norm": 7.148861885070801, "learning_rate": 1.998386691741936e-05, "loss": 2.2529, "step": 1450 }, { "epoch": 4.757377049180328, "grad_norm": 7.59727668762207, "learning_rate": 1.9983806566762975e-05, "loss": 2.2036, "step": 1451 }, { "epoch": 4.760655737704918, "grad_norm": 31.275827407836914, "learning_rate": 1.9983746103528794e-05, "loss": 2.3418, "step": 1452 }, { "epoch": 4.7639344262295085, "grad_norm": 8.260653495788574, "learning_rate": 1.9983685527717493e-05, "loss": 2.3076, "step": 1453 }, { "epoch": 4.767213114754099, "grad_norm": 8.440363883972168, "learning_rate": 1.9983624839329757e-05, "loss": 2.7207, "step": 1454 }, { "epoch": 4.770491803278689, "grad_norm": 7.246259689331055, "learning_rate": 1.9983564038366274e-05, "loss": 2.2959, "step": 1455 }, { "epoch": 4.773770491803279, "grad_norm": 10.31042194366455, "learning_rate": 1.998350312482772e-05, "loss": 2.4072, "step": 1456 }, { "epoch": 4.777049180327869, "grad_norm": 9.639517784118652, "learning_rate": 1.9983442098714792e-05, "loss": 2.2178, "step": 1457 }, { "epoch": 4.780327868852459, "grad_norm": 6.186031341552734, "learning_rate": 1.9983380960028174e-05, "loss": 2.5098, "step": 1458 }, { "epoch": 4.783606557377049, "grad_norm": 8.785659790039062, "learning_rate": 1.9983319708768555e-05, "loss": 2.3857, "step": 1459 }, { "epoch": 4.786885245901639, "grad_norm": 10.682461738586426, "learning_rate": 1.9983258344936628e-05, "loss": 2.3164, "step": 1460 }, { "epoch": 4.7901639344262295, "grad_norm": 8.240211486816406, "learning_rate": 1.998319686853308e-05, "loss": 2.2715, "step": 1461 }, { "epoch": 4.79344262295082, "grad_norm": 11.612015724182129, "learning_rate": 1.998313527955861e-05, "loss": 2.4697, "step": 1462 }, { "epoch": 4.79672131147541, "grad_norm": 9.253621101379395, "learning_rate": 1.998307357801391e-05, "loss": 2.2852, "step": 1463 }, { "epoch": 4.8, "grad_norm": 7.8999738693237305, "learning_rate": 1.9983011763899674e-05, "loss": 2.4551, "step": 1464 }, { "epoch": 4.80327868852459, "grad_norm": 9.64350414276123, "learning_rate": 1.99829498372166e-05, "loss": 2.313, "step": 1465 }, { "epoch": 4.80655737704918, "grad_norm": 5.6994757652282715, "learning_rate": 1.9982887797965388e-05, "loss": 2.4492, "step": 1466 }, { "epoch": 4.80983606557377, "grad_norm": 14.06264877319336, "learning_rate": 1.9982825646146734e-05, "loss": 2.2476, "step": 1467 }, { "epoch": 4.81311475409836, "grad_norm": 6.881730079650879, "learning_rate": 1.9982763381761344e-05, "loss": 2.334, "step": 1468 }, { "epoch": 4.81639344262295, "grad_norm": 15.638511657714844, "learning_rate": 1.9982701004809918e-05, "loss": 2.0894, "step": 1469 }, { "epoch": 4.8196721311475414, "grad_norm": 6.975289821624756, "learning_rate": 1.998263851529316e-05, "loss": 2.2432, "step": 1470 }, { "epoch": 4.822950819672132, "grad_norm": 8.193575859069824, "learning_rate": 1.9982575913211773e-05, "loss": 2.3281, "step": 1471 }, { "epoch": 4.826229508196722, "grad_norm": 7.435802459716797, "learning_rate": 1.998251319856646e-05, "loss": 2.2847, "step": 1472 }, { "epoch": 4.829508196721312, "grad_norm": 8.786240577697754, "learning_rate": 1.998245037135793e-05, "loss": 2.2339, "step": 1473 }, { "epoch": 4.832786885245902, "grad_norm": 8.233465194702148, "learning_rate": 1.9982387431586897e-05, "loss": 2.2559, "step": 1474 }, { "epoch": 4.836065573770492, "grad_norm": 7.0231475830078125, "learning_rate": 1.9982324379254068e-05, "loss": 2.3975, "step": 1475 }, { "epoch": 4.839344262295082, "grad_norm": 8.464741706848145, "learning_rate": 1.998226121436015e-05, "loss": 2.2578, "step": 1476 }, { "epoch": 4.842622950819672, "grad_norm": 8.211801528930664, "learning_rate": 1.998219793690586e-05, "loss": 2.5146, "step": 1477 }, { "epoch": 4.845901639344262, "grad_norm": 8.236821174621582, "learning_rate": 1.9982134546891904e-05, "loss": 2.3496, "step": 1478 }, { "epoch": 4.849180327868853, "grad_norm": 9.920329093933105, "learning_rate": 1.9982071044319007e-05, "loss": 2.4502, "step": 1479 }, { "epoch": 4.852459016393443, "grad_norm": 11.404678344726562, "learning_rate": 1.9982007429187876e-05, "loss": 2.3989, "step": 1480 }, { "epoch": 4.855737704918033, "grad_norm": 7.178466796875, "learning_rate": 1.9981943701499236e-05, "loss": 2.2773, "step": 1481 }, { "epoch": 4.859016393442623, "grad_norm": 6.59832239151001, "learning_rate": 1.9981879861253802e-05, "loss": 2.2217, "step": 1482 }, { "epoch": 4.862295081967213, "grad_norm": 8.11084270477295, "learning_rate": 1.9981815908452296e-05, "loss": 2.1943, "step": 1483 }, { "epoch": 4.865573770491803, "grad_norm": 8.164994239807129, "learning_rate": 1.9981751843095435e-05, "loss": 2.1763, "step": 1484 }, { "epoch": 4.868852459016393, "grad_norm": 9.756218910217285, "learning_rate": 1.9981687665183945e-05, "loss": 2.3652, "step": 1485 }, { "epoch": 4.872131147540983, "grad_norm": 6.745073318481445, "learning_rate": 1.9981623374718545e-05, "loss": 2.3936, "step": 1486 }, { "epoch": 4.8754098360655735, "grad_norm": 9.289725303649902, "learning_rate": 1.9981558971699965e-05, "loss": 2.3813, "step": 1487 }, { "epoch": 4.878688524590164, "grad_norm": 9.686363220214844, "learning_rate": 1.9981494456128928e-05, "loss": 2.396, "step": 1488 }, { "epoch": 4.881967213114754, "grad_norm": 8.180270195007324, "learning_rate": 1.9981429828006162e-05, "loss": 2.3472, "step": 1489 }, { "epoch": 4.885245901639344, "grad_norm": 9.265848159790039, "learning_rate": 1.99813650873324e-05, "loss": 2.4351, "step": 1490 }, { "epoch": 4.888524590163934, "grad_norm": 8.362054824829102, "learning_rate": 1.9981300234108368e-05, "loss": 2.3394, "step": 1491 }, { "epoch": 4.891803278688524, "grad_norm": 6.974201679229736, "learning_rate": 1.9981235268334796e-05, "loss": 2.231, "step": 1492 }, { "epoch": 4.895081967213114, "grad_norm": 8.866076469421387, "learning_rate": 1.998117019001242e-05, "loss": 2.1929, "step": 1493 }, { "epoch": 4.898360655737705, "grad_norm": 7.233956336975098, "learning_rate": 1.998110499914197e-05, "loss": 2.1865, "step": 1494 }, { "epoch": 4.901639344262295, "grad_norm": 7.293722629547119, "learning_rate": 1.9981039695724186e-05, "loss": 2.2583, "step": 1495 }, { "epoch": 4.9049180327868855, "grad_norm": 8.194658279418945, "learning_rate": 1.9980974279759803e-05, "loss": 2.2495, "step": 1496 }, { "epoch": 4.908196721311476, "grad_norm": 11.401469230651855, "learning_rate": 1.9980908751249556e-05, "loss": 2.4146, "step": 1497 }, { "epoch": 4.911475409836066, "grad_norm": 7.947628974914551, "learning_rate": 1.9980843110194183e-05, "loss": 2.3076, "step": 1498 }, { "epoch": 4.914754098360656, "grad_norm": 10.911042213439941, "learning_rate": 1.9980777356594427e-05, "loss": 2.1157, "step": 1499 }, { "epoch": 4.918032786885246, "grad_norm": 11.631654739379883, "learning_rate": 1.998071149045103e-05, "loss": 2.4102, "step": 1500 }, { "epoch": 4.921311475409836, "grad_norm": 9.260208129882812, "learning_rate": 1.9980645511764733e-05, "loss": 2.3521, "step": 1501 }, { "epoch": 4.924590163934426, "grad_norm": 8.914488792419434, "learning_rate": 1.9980579420536282e-05, "loss": 2.396, "step": 1502 }, { "epoch": 4.927868852459016, "grad_norm": 9.85061264038086, "learning_rate": 1.9980513216766423e-05, "loss": 2.3179, "step": 1503 }, { "epoch": 4.9311475409836065, "grad_norm": 8.877732276916504, "learning_rate": 1.9980446900455893e-05, "loss": 2.1616, "step": 1504 }, { "epoch": 4.934426229508197, "grad_norm": 7.858672618865967, "learning_rate": 1.9980380471605453e-05, "loss": 2.2891, "step": 1505 }, { "epoch": 4.937704918032787, "grad_norm": 6.5707292556762695, "learning_rate": 1.9980313930215843e-05, "loss": 2.3252, "step": 1506 }, { "epoch": 4.940983606557377, "grad_norm": 8.017571449279785, "learning_rate": 1.998024727628782e-05, "loss": 2.2529, "step": 1507 }, { "epoch": 4.944262295081967, "grad_norm": 15.17930793762207, "learning_rate": 1.9980180509822132e-05, "loss": 2.1782, "step": 1508 }, { "epoch": 4.947540983606557, "grad_norm": 8.849798202514648, "learning_rate": 1.998011363081953e-05, "loss": 2.167, "step": 1509 }, { "epoch": 4.950819672131147, "grad_norm": 10.19586181640625, "learning_rate": 1.998004663928077e-05, "loss": 2.4404, "step": 1510 }, { "epoch": 4.954098360655737, "grad_norm": 9.179126739501953, "learning_rate": 1.9979979535206605e-05, "loss": 2.4116, "step": 1511 }, { "epoch": 4.9573770491803275, "grad_norm": 13.973380088806152, "learning_rate": 1.9979912318597797e-05, "loss": 2.3521, "step": 1512 }, { "epoch": 4.9606557377049185, "grad_norm": 10.007936477661133, "learning_rate": 1.9979844989455105e-05, "loss": 2.2534, "step": 1513 }, { "epoch": 4.963934426229509, "grad_norm": 9.276679992675781, "learning_rate": 1.997977754777928e-05, "loss": 2.1431, "step": 1514 }, { "epoch": 4.967213114754099, "grad_norm": 9.994882583618164, "learning_rate": 1.9979709993571086e-05, "loss": 2.1753, "step": 1515 }, { "epoch": 4.970491803278689, "grad_norm": 12.655614852905273, "learning_rate": 1.9979642326831287e-05, "loss": 2.2983, "step": 1516 }, { "epoch": 4.973770491803279, "grad_norm": 14.948689460754395, "learning_rate": 1.9979574547560644e-05, "loss": 2.1089, "step": 1517 }, { "epoch": 4.977049180327869, "grad_norm": 9.689878463745117, "learning_rate": 1.997950665575992e-05, "loss": 2.4014, "step": 1518 }, { "epoch": 4.980327868852459, "grad_norm": 7.628085136413574, "learning_rate": 1.9979438651429886e-05, "loss": 2.2676, "step": 1519 }, { "epoch": 4.983606557377049, "grad_norm": 8.706101417541504, "learning_rate": 1.9979370534571304e-05, "loss": 2.3154, "step": 1520 }, { "epoch": 4.9868852459016395, "grad_norm": 8.004510879516602, "learning_rate": 1.9979302305184943e-05, "loss": 2.1533, "step": 1521 }, { "epoch": 4.99016393442623, "grad_norm": 6.789425373077393, "learning_rate": 1.997923396327157e-05, "loss": 2.249, "step": 1522 }, { "epoch": 4.99344262295082, "grad_norm": 9.101860046386719, "learning_rate": 1.9979165508831964e-05, "loss": 2.2627, "step": 1523 }, { "epoch": 4.99672131147541, "grad_norm": 11.099681854248047, "learning_rate": 1.9979096941866887e-05, "loss": 2.2549, "step": 1524 }, { "epoch": 5.0, "grad_norm": 15.715274810791016, "learning_rate": 1.997902826237712e-05, "loss": 2.2314, "step": 1525 }, { "epoch": 5.00327868852459, "grad_norm": 7.986493110656738, "learning_rate": 1.997895947036343e-05, "loss": 2.2295, "step": 1526 }, { "epoch": 5.00655737704918, "grad_norm": 8.786258697509766, "learning_rate": 1.9978890565826596e-05, "loss": 2.1733, "step": 1527 }, { "epoch": 5.00983606557377, "grad_norm": 7.511916637420654, "learning_rate": 1.9978821548767398e-05, "loss": 2.3169, "step": 1528 }, { "epoch": 5.0131147540983605, "grad_norm": 11.900248527526855, "learning_rate": 1.997875241918661e-05, "loss": 2.228, "step": 1529 }, { "epoch": 5.016393442622951, "grad_norm": 9.723897933959961, "learning_rate": 1.9978683177085013e-05, "loss": 2.1353, "step": 1530 }, { "epoch": 5.019672131147541, "grad_norm": 11.902832984924316, "learning_rate": 1.9978613822463392e-05, "loss": 2.2617, "step": 1531 }, { "epoch": 5.022950819672131, "grad_norm": 8.817872047424316, "learning_rate": 1.997854435532252e-05, "loss": 2.2891, "step": 1532 }, { "epoch": 5.026229508196721, "grad_norm": 13.005709648132324, "learning_rate": 1.997847477566319e-05, "loss": 1.9136, "step": 1533 }, { "epoch": 5.029508196721311, "grad_norm": 7.313089847564697, "learning_rate": 1.9978405083486175e-05, "loss": 2.292, "step": 1534 }, { "epoch": 5.032786885245901, "grad_norm": 8.144798278808594, "learning_rate": 1.9978335278792272e-05, "loss": 2.1479, "step": 1535 }, { "epoch": 5.036065573770492, "grad_norm": 9.976720809936523, "learning_rate": 1.9978265361582264e-05, "loss": 2.3296, "step": 1536 }, { "epoch": 5.039344262295082, "grad_norm": 6.427967071533203, "learning_rate": 1.997819533185694e-05, "loss": 2.376, "step": 1537 }, { "epoch": 5.0426229508196725, "grad_norm": 8.206558227539062, "learning_rate": 1.9978125189617086e-05, "loss": 2.229, "step": 1538 }, { "epoch": 5.045901639344263, "grad_norm": 8.117238998413086, "learning_rate": 1.9978054934863496e-05, "loss": 2.3906, "step": 1539 }, { "epoch": 5.049180327868853, "grad_norm": 8.827094078063965, "learning_rate": 1.9977984567596965e-05, "loss": 2.1006, "step": 1540 }, { "epoch": 5.052459016393443, "grad_norm": 6.719709873199463, "learning_rate": 1.9977914087818284e-05, "loss": 2.3574, "step": 1541 }, { "epoch": 5.055737704918033, "grad_norm": 11.94161605834961, "learning_rate": 1.9977843495528245e-05, "loss": 2.3389, "step": 1542 }, { "epoch": 5.059016393442623, "grad_norm": 11.543363571166992, "learning_rate": 1.9977772790727646e-05, "loss": 2.2783, "step": 1543 }, { "epoch": 5.062295081967213, "grad_norm": 10.08199405670166, "learning_rate": 1.9977701973417286e-05, "loss": 2.1572, "step": 1544 }, { "epoch": 5.065573770491803, "grad_norm": 10.379772186279297, "learning_rate": 1.997763104359796e-05, "loss": 2.1851, "step": 1545 }, { "epoch": 5.0688524590163935, "grad_norm": 7.086282730102539, "learning_rate": 1.9977560001270472e-05, "loss": 2.1763, "step": 1546 }, { "epoch": 5.072131147540984, "grad_norm": 8.310592651367188, "learning_rate": 1.9977488846435616e-05, "loss": 2.0977, "step": 1547 }, { "epoch": 5.075409836065574, "grad_norm": 9.908846855163574, "learning_rate": 1.9977417579094206e-05, "loss": 2.1885, "step": 1548 }, { "epoch": 5.078688524590164, "grad_norm": 8.847105026245117, "learning_rate": 1.9977346199247037e-05, "loss": 2.2534, "step": 1549 }, { "epoch": 5.081967213114754, "grad_norm": 8.683076858520508, "learning_rate": 1.9977274706894912e-05, "loss": 2.311, "step": 1550 }, { "epoch": 5.085245901639344, "grad_norm": 8.377821922302246, "learning_rate": 1.9977203102038645e-05, "loss": 2.2607, "step": 1551 }, { "epoch": 5.088524590163934, "grad_norm": 7.588249206542969, "learning_rate": 1.9977131384679038e-05, "loss": 2.1729, "step": 1552 }, { "epoch": 5.091803278688524, "grad_norm": 6.5741753578186035, "learning_rate": 1.99770595548169e-05, "loss": 2.311, "step": 1553 }, { "epoch": 5.0950819672131145, "grad_norm": 7.309812068939209, "learning_rate": 1.9976987612453044e-05, "loss": 2.3594, "step": 1554 }, { "epoch": 5.098360655737705, "grad_norm": 10.961227416992188, "learning_rate": 1.9976915557588277e-05, "loss": 2.3159, "step": 1555 }, { "epoch": 5.101639344262295, "grad_norm": 7.925613880157471, "learning_rate": 1.9976843390223416e-05, "loss": 2.2563, "step": 1556 }, { "epoch": 5.104918032786885, "grad_norm": 8.854681015014648, "learning_rate": 1.997677111035927e-05, "loss": 2.145, "step": 1557 }, { "epoch": 5.108196721311476, "grad_norm": 7.563417434692383, "learning_rate": 1.9976698717996662e-05, "loss": 2.2417, "step": 1558 }, { "epoch": 5.111475409836066, "grad_norm": 8.602195739746094, "learning_rate": 1.9976626213136396e-05, "loss": 2.2207, "step": 1559 }, { "epoch": 5.114754098360656, "grad_norm": 7.6425251960754395, "learning_rate": 1.9976553595779298e-05, "loss": 2.2031, "step": 1560 }, { "epoch": 5.118032786885246, "grad_norm": 9.380359649658203, "learning_rate": 1.997648086592619e-05, "loss": 2.0845, "step": 1561 }, { "epoch": 5.121311475409836, "grad_norm": 7.270132541656494, "learning_rate": 1.997640802357788e-05, "loss": 2.3008, "step": 1562 }, { "epoch": 5.1245901639344265, "grad_norm": 8.725974082946777, "learning_rate": 1.9976335068735202e-05, "loss": 2.1802, "step": 1563 }, { "epoch": 5.127868852459017, "grad_norm": 8.222843170166016, "learning_rate": 1.9976262001398973e-05, "loss": 2.2671, "step": 1564 }, { "epoch": 5.131147540983607, "grad_norm": 7.581567287445068, "learning_rate": 1.9976188821570013e-05, "loss": 2.1826, "step": 1565 }, { "epoch": 5.134426229508197, "grad_norm": 8.926217079162598, "learning_rate": 1.9976115529249155e-05, "loss": 2.1519, "step": 1566 }, { "epoch": 5.137704918032787, "grad_norm": 7.832777500152588, "learning_rate": 1.9976042124437222e-05, "loss": 2.2476, "step": 1567 }, { "epoch": 5.140983606557377, "grad_norm": 10.035630226135254, "learning_rate": 1.9975968607135038e-05, "loss": 2.1646, "step": 1568 }, { "epoch": 5.144262295081967, "grad_norm": 6.808441162109375, "learning_rate": 1.9975894977343437e-05, "loss": 2.3906, "step": 1569 }, { "epoch": 5.147540983606557, "grad_norm": 7.076091766357422, "learning_rate": 1.997582123506325e-05, "loss": 2.2656, "step": 1570 }, { "epoch": 5.150819672131147, "grad_norm": 9.308235168457031, "learning_rate": 1.9975747380295307e-05, "loss": 2.2075, "step": 1571 }, { "epoch": 5.154098360655738, "grad_norm": 7.561956405639648, "learning_rate": 1.9975673413040437e-05, "loss": 2.2568, "step": 1572 }, { "epoch": 5.157377049180328, "grad_norm": 10.773516654968262, "learning_rate": 1.997559933329948e-05, "loss": 2.4023, "step": 1573 }, { "epoch": 5.160655737704918, "grad_norm": 8.186235427856445, "learning_rate": 1.9975525141073263e-05, "loss": 2.083, "step": 1574 }, { "epoch": 5.163934426229508, "grad_norm": 7.714053630828857, "learning_rate": 1.9975450836362635e-05, "loss": 2.272, "step": 1575 }, { "epoch": 5.167213114754098, "grad_norm": 7.110463619232178, "learning_rate": 1.9975376419168423e-05, "loss": 2.1895, "step": 1576 }, { "epoch": 5.170491803278688, "grad_norm": 8.600821495056152, "learning_rate": 1.997530188949147e-05, "loss": 2.0503, "step": 1577 }, { "epoch": 5.173770491803278, "grad_norm": 7.8853044509887695, "learning_rate": 1.997522724733262e-05, "loss": 2.2207, "step": 1578 }, { "epoch": 5.177049180327868, "grad_norm": 14.151033401489258, "learning_rate": 1.9975152492692706e-05, "loss": 2.1953, "step": 1579 }, { "epoch": 5.180327868852459, "grad_norm": 8.599089622497559, "learning_rate": 1.997507762557258e-05, "loss": 2.229, "step": 1580 }, { "epoch": 5.18360655737705, "grad_norm": 7.752981662750244, "learning_rate": 1.9975002645973082e-05, "loss": 2.3428, "step": 1581 }, { "epoch": 5.18688524590164, "grad_norm": 8.226442337036133, "learning_rate": 1.9974927553895056e-05, "loss": 2.2578, "step": 1582 }, { "epoch": 5.19016393442623, "grad_norm": 10.11301040649414, "learning_rate": 1.997485234933935e-05, "loss": 2.1348, "step": 1583 }, { "epoch": 5.19344262295082, "grad_norm": 9.07986068725586, "learning_rate": 1.9974777032306817e-05, "loss": 2.2104, "step": 1584 }, { "epoch": 5.19672131147541, "grad_norm": 8.915624618530273, "learning_rate": 1.9974701602798298e-05, "loss": 2.3789, "step": 1585 }, { "epoch": 5.2, "grad_norm": 9.924843788146973, "learning_rate": 1.997462606081465e-05, "loss": 2.2534, "step": 1586 }, { "epoch": 5.20327868852459, "grad_norm": 8.275459289550781, "learning_rate": 1.9974550406356718e-05, "loss": 2.2354, "step": 1587 }, { "epoch": 5.20655737704918, "grad_norm": 7.557249546051025, "learning_rate": 1.997447463942536e-05, "loss": 2.0303, "step": 1588 }, { "epoch": 5.2098360655737705, "grad_norm": 8.49648666381836, "learning_rate": 1.997439876002143e-05, "loss": 2.1333, "step": 1589 }, { "epoch": 5.213114754098361, "grad_norm": 10.370760917663574, "learning_rate": 1.9974322768145787e-05, "loss": 2.1875, "step": 1590 }, { "epoch": 5.216393442622951, "grad_norm": 7.185684680938721, "learning_rate": 1.997424666379928e-05, "loss": 2.2817, "step": 1591 }, { "epoch": 5.219672131147541, "grad_norm": 9.491006851196289, "learning_rate": 1.9974170446982773e-05, "loss": 2.2002, "step": 1592 }, { "epoch": 5.222950819672131, "grad_norm": 9.802785873413086, "learning_rate": 1.9974094117697125e-05, "loss": 1.9756, "step": 1593 }, { "epoch": 5.226229508196721, "grad_norm": 10.929823875427246, "learning_rate": 1.997401767594319e-05, "loss": 2.0864, "step": 1594 }, { "epoch": 5.229508196721311, "grad_norm": 6.627323627471924, "learning_rate": 1.997394112172184e-05, "loss": 2.2026, "step": 1595 }, { "epoch": 5.232786885245901, "grad_norm": 8.76904010772705, "learning_rate": 1.9973864455033933e-05, "loss": 2.1455, "step": 1596 }, { "epoch": 5.2360655737704915, "grad_norm": 9.064985275268555, "learning_rate": 1.9973787675880334e-05, "loss": 2.5098, "step": 1597 }, { "epoch": 5.239344262295082, "grad_norm": 10.462045669555664, "learning_rate": 1.997371078426191e-05, "loss": 2.3291, "step": 1598 }, { "epoch": 5.242622950819672, "grad_norm": 6.407308101654053, "learning_rate": 1.997363378017952e-05, "loss": 2.3853, "step": 1599 }, { "epoch": 5.245901639344262, "grad_norm": 6.751407146453857, "learning_rate": 1.997355666363405e-05, "loss": 2.2412, "step": 1600 }, { "epoch": 5.249180327868853, "grad_norm": 10.70000171661377, "learning_rate": 1.997347943462635e-05, "loss": 1.9883, "step": 1601 }, { "epoch": 5.252459016393443, "grad_norm": 8.793156623840332, "learning_rate": 1.9973402093157303e-05, "loss": 2.29, "step": 1602 }, { "epoch": 5.255737704918033, "grad_norm": 10.6327486038208, "learning_rate": 1.997332463922778e-05, "loss": 2.0239, "step": 1603 }, { "epoch": 5.259016393442623, "grad_norm": 9.58663272857666, "learning_rate": 1.9973247072838646e-05, "loss": 2.1499, "step": 1604 }, { "epoch": 5.262295081967213, "grad_norm": 8.786312103271484, "learning_rate": 1.9973169393990784e-05, "loss": 2.1582, "step": 1605 }, { "epoch": 5.2655737704918035, "grad_norm": 7.230452537536621, "learning_rate": 1.997309160268507e-05, "loss": 2.3037, "step": 1606 }, { "epoch": 5.268852459016394, "grad_norm": 8.836136817932129, "learning_rate": 1.9973013698922378e-05, "loss": 2.0845, "step": 1607 }, { "epoch": 5.272131147540984, "grad_norm": 8.41998291015625, "learning_rate": 1.9972935682703584e-05, "loss": 2.3965, "step": 1608 }, { "epoch": 5.275409836065574, "grad_norm": 7.065313339233398, "learning_rate": 1.9972857554029576e-05, "loss": 2.1992, "step": 1609 }, { "epoch": 5.278688524590164, "grad_norm": 8.005906105041504, "learning_rate": 1.9972779312901227e-05, "loss": 2.3359, "step": 1610 }, { "epoch": 5.281967213114754, "grad_norm": 7.743003845214844, "learning_rate": 1.9972700959319422e-05, "loss": 2.0886, "step": 1611 }, { "epoch": 5.285245901639344, "grad_norm": 7.282946586608887, "learning_rate": 1.9972622493285045e-05, "loss": 2.085, "step": 1612 }, { "epoch": 5.288524590163934, "grad_norm": 7.685099124908447, "learning_rate": 1.997254391479898e-05, "loss": 2.2437, "step": 1613 }, { "epoch": 5.2918032786885245, "grad_norm": 7.889583110809326, "learning_rate": 1.9972465223862114e-05, "loss": 2.2217, "step": 1614 }, { "epoch": 5.295081967213115, "grad_norm": 7.384847164154053, "learning_rate": 1.9972386420475334e-05, "loss": 1.9668, "step": 1615 }, { "epoch": 5.298360655737705, "grad_norm": 9.779648780822754, "learning_rate": 1.997230750463953e-05, "loss": 2.2812, "step": 1616 }, { "epoch": 5.301639344262295, "grad_norm": 9.862998008728027, "learning_rate": 1.997222847635559e-05, "loss": 2.2871, "step": 1617 }, { "epoch": 5.304918032786885, "grad_norm": 9.545348167419434, "learning_rate": 1.9972149335624404e-05, "loss": 2.1714, "step": 1618 }, { "epoch": 5.308196721311475, "grad_norm": 10.586275100708008, "learning_rate": 1.9972070082446863e-05, "loss": 2.1626, "step": 1619 }, { "epoch": 5.311475409836065, "grad_norm": 10.911495208740234, "learning_rate": 1.997199071682387e-05, "loss": 2.2886, "step": 1620 }, { "epoch": 5.314754098360655, "grad_norm": 9.304929733276367, "learning_rate": 1.997191123875631e-05, "loss": 2.2305, "step": 1621 }, { "epoch": 5.3180327868852455, "grad_norm": 9.16247844696045, "learning_rate": 1.997183164824508e-05, "loss": 2.2295, "step": 1622 }, { "epoch": 5.321311475409836, "grad_norm": 21.405466079711914, "learning_rate": 1.9971751945291083e-05, "loss": 2.3335, "step": 1623 }, { "epoch": 5.324590163934427, "grad_norm": 9.13647747039795, "learning_rate": 1.9971672129895214e-05, "loss": 2.2129, "step": 1624 }, { "epoch": 5.327868852459017, "grad_norm": 17.8540096282959, "learning_rate": 1.9971592202058373e-05, "loss": 2.2319, "step": 1625 }, { "epoch": 5.331147540983607, "grad_norm": 8.430570602416992, "learning_rate": 1.9971512161781463e-05, "loss": 2.313, "step": 1626 }, { "epoch": 5.334426229508197, "grad_norm": 13.841293334960938, "learning_rate": 1.9971432009065384e-05, "loss": 2.2275, "step": 1627 }, { "epoch": 5.337704918032787, "grad_norm": 11.388249397277832, "learning_rate": 1.9971351743911046e-05, "loss": 2.417, "step": 1628 }, { "epoch": 5.340983606557377, "grad_norm": 8.155027389526367, "learning_rate": 1.9971271366319348e-05, "loss": 2.2871, "step": 1629 }, { "epoch": 5.344262295081967, "grad_norm": 9.103852272033691, "learning_rate": 1.9971190876291195e-05, "loss": 2.4277, "step": 1630 }, { "epoch": 5.3475409836065575, "grad_norm": 18.018831253051758, "learning_rate": 1.99711102738275e-05, "loss": 2.3252, "step": 1631 }, { "epoch": 5.350819672131148, "grad_norm": 7.307884693145752, "learning_rate": 1.9971029558929166e-05, "loss": 2.1631, "step": 1632 }, { "epoch": 5.354098360655738, "grad_norm": 10.438246726989746, "learning_rate": 1.9970948731597112e-05, "loss": 2.2607, "step": 1633 }, { "epoch": 5.357377049180328, "grad_norm": 8.40207576751709, "learning_rate": 1.997086779183224e-05, "loss": 2.1479, "step": 1634 }, { "epoch": 5.360655737704918, "grad_norm": 9.077658653259277, "learning_rate": 1.9970786739635465e-05, "loss": 2.3408, "step": 1635 }, { "epoch": 5.363934426229508, "grad_norm": 29.278385162353516, "learning_rate": 1.9970705575007705e-05, "loss": 2.334, "step": 1636 }, { "epoch": 5.367213114754098, "grad_norm": 10.387931823730469, "learning_rate": 1.997062429794987e-05, "loss": 2.2344, "step": 1637 }, { "epoch": 5.370491803278688, "grad_norm": 9.315001487731934, "learning_rate": 1.9970542908462883e-05, "loss": 2.0552, "step": 1638 }, { "epoch": 5.3737704918032785, "grad_norm": 8.673306465148926, "learning_rate": 1.9970461406547657e-05, "loss": 2.2222, "step": 1639 }, { "epoch": 5.377049180327869, "grad_norm": 7.705928325653076, "learning_rate": 1.997037979220511e-05, "loss": 2.4004, "step": 1640 }, { "epoch": 5.380327868852459, "grad_norm": 13.616782188415527, "learning_rate": 1.9970298065436167e-05, "loss": 2.1411, "step": 1641 }, { "epoch": 5.383606557377049, "grad_norm": 8.128501892089844, "learning_rate": 1.9970216226241742e-05, "loss": 2.416, "step": 1642 }, { "epoch": 5.386885245901639, "grad_norm": 12.860286712646484, "learning_rate": 1.997013427462277e-05, "loss": 2.3579, "step": 1643 }, { "epoch": 5.390163934426229, "grad_norm": 12.972006797790527, "learning_rate": 1.9970052210580162e-05, "loss": 2.0835, "step": 1644 }, { "epoch": 5.39344262295082, "grad_norm": 8.77358341217041, "learning_rate": 1.9969970034114853e-05, "loss": 2.1479, "step": 1645 }, { "epoch": 5.39672131147541, "grad_norm": 10.940195083618164, "learning_rate": 1.9969887745227764e-05, "loss": 2.3643, "step": 1646 }, { "epoch": 5.4, "grad_norm": 10.756182670593262, "learning_rate": 1.9969805343919822e-05, "loss": 2.1895, "step": 1647 }, { "epoch": 5.4032786885245905, "grad_norm": 8.897026062011719, "learning_rate": 1.9969722830191964e-05, "loss": 2.2695, "step": 1648 }, { "epoch": 5.406557377049181, "grad_norm": 13.711145401000977, "learning_rate": 1.996964020404511e-05, "loss": 2.3237, "step": 1649 }, { "epoch": 5.409836065573771, "grad_norm": 6.55612325668335, "learning_rate": 1.99695574654802e-05, "loss": 2.1123, "step": 1650 }, { "epoch": 5.413114754098361, "grad_norm": 10.302178382873535, "learning_rate": 1.996947461449816e-05, "loss": 2.2441, "step": 1651 }, { "epoch": 5.416393442622951, "grad_norm": 10.32493782043457, "learning_rate": 1.9969391651099933e-05, "loss": 2.4702, "step": 1652 }, { "epoch": 5.419672131147541, "grad_norm": 23.160476684570312, "learning_rate": 1.9969308575286445e-05, "loss": 2.4668, "step": 1653 }, { "epoch": 5.422950819672131, "grad_norm": 8.262652397155762, "learning_rate": 1.996922538705864e-05, "loss": 2.2749, "step": 1654 }, { "epoch": 5.426229508196721, "grad_norm": 8.628045082092285, "learning_rate": 1.9969142086417452e-05, "loss": 2.1958, "step": 1655 }, { "epoch": 5.4295081967213115, "grad_norm": 8.489818572998047, "learning_rate": 1.9969058673363824e-05, "loss": 2.3643, "step": 1656 }, { "epoch": 5.432786885245902, "grad_norm": 11.182476997375488, "learning_rate": 1.996897514789869e-05, "loss": 2.2422, "step": 1657 }, { "epoch": 5.436065573770492, "grad_norm": 9.722021102905273, "learning_rate": 1.9968891510023e-05, "loss": 2.1001, "step": 1658 }, { "epoch": 5.439344262295082, "grad_norm": 10.572216033935547, "learning_rate": 1.9968807759737695e-05, "loss": 2.2544, "step": 1659 }, { "epoch": 5.442622950819672, "grad_norm": 16.142730712890625, "learning_rate": 1.9968723897043714e-05, "loss": 2.1748, "step": 1660 }, { "epoch": 5.445901639344262, "grad_norm": 6.76345157623291, "learning_rate": 1.9968639921942007e-05, "loss": 2.2646, "step": 1661 }, { "epoch": 5.449180327868852, "grad_norm": 13.4464693069458, "learning_rate": 1.996855583443352e-05, "loss": 2.2393, "step": 1662 }, { "epoch": 5.452459016393442, "grad_norm": 10.846250534057617, "learning_rate": 1.99684716345192e-05, "loss": 2.21, "step": 1663 }, { "epoch": 5.4557377049180324, "grad_norm": 9.558996200561523, "learning_rate": 1.9968387322199998e-05, "loss": 2.0112, "step": 1664 }, { "epoch": 5.459016393442623, "grad_norm": 7.416174411773682, "learning_rate": 1.9968302897476865e-05, "loss": 2.2979, "step": 1665 }, { "epoch": 5.462295081967213, "grad_norm": 8.773212432861328, "learning_rate": 1.9968218360350752e-05, "loss": 2.1489, "step": 1666 }, { "epoch": 5.465573770491803, "grad_norm": 11.177427291870117, "learning_rate": 1.9968133710822616e-05, "loss": 2.1597, "step": 1667 }, { "epoch": 5.468852459016394, "grad_norm": 8.333635330200195, "learning_rate": 1.9968048948893406e-05, "loss": 2.1504, "step": 1668 }, { "epoch": 5.472131147540984, "grad_norm": 10.27275276184082, "learning_rate": 1.9967964074564077e-05, "loss": 2.2021, "step": 1669 }, { "epoch": 5.475409836065574, "grad_norm": 8.350446701049805, "learning_rate": 1.9967879087835594e-05, "loss": 2.2954, "step": 1670 }, { "epoch": 5.478688524590164, "grad_norm": 8.18280029296875, "learning_rate": 1.9967793988708908e-05, "loss": 2.2563, "step": 1671 }, { "epoch": 5.481967213114754, "grad_norm": 10.903337478637695, "learning_rate": 1.996770877718498e-05, "loss": 2.2515, "step": 1672 }, { "epoch": 5.4852459016393444, "grad_norm": 9.249473571777344, "learning_rate": 1.9967623453264773e-05, "loss": 2.2163, "step": 1673 }, { "epoch": 5.488524590163935, "grad_norm": 11.188718795776367, "learning_rate": 1.9967538016949247e-05, "loss": 2.0708, "step": 1674 }, { "epoch": 5.491803278688525, "grad_norm": 16.521568298339844, "learning_rate": 1.9967452468239366e-05, "loss": 2.2334, "step": 1675 }, { "epoch": 5.495081967213115, "grad_norm": 10.360943794250488, "learning_rate": 1.9967366807136094e-05, "loss": 2.2153, "step": 1676 }, { "epoch": 5.498360655737705, "grad_norm": 8.645186424255371, "learning_rate": 1.99672810336404e-05, "loss": 2.1929, "step": 1677 }, { "epoch": 5.501639344262295, "grad_norm": 8.558978080749512, "learning_rate": 1.996719514775325e-05, "loss": 2.3389, "step": 1678 }, { "epoch": 5.504918032786885, "grad_norm": 11.260418891906738, "learning_rate": 1.9967109149475608e-05, "loss": 2.1885, "step": 1679 }, { "epoch": 5.508196721311475, "grad_norm": 8.086681365966797, "learning_rate": 1.9967023038808448e-05, "loss": 2.1802, "step": 1680 }, { "epoch": 5.511475409836065, "grad_norm": 9.164934158325195, "learning_rate": 1.996693681575274e-05, "loss": 2.2476, "step": 1681 }, { "epoch": 5.5147540983606556, "grad_norm": 12.52342414855957, "learning_rate": 1.996685048030946e-05, "loss": 2.1377, "step": 1682 }, { "epoch": 5.518032786885246, "grad_norm": 8.65980052947998, "learning_rate": 1.996676403247957e-05, "loss": 2.2266, "step": 1683 }, { "epoch": 5.521311475409836, "grad_norm": 8.700172424316406, "learning_rate": 1.9966677472264064e-05, "loss": 2.1875, "step": 1684 }, { "epoch": 5.524590163934426, "grad_norm": 10.896636009216309, "learning_rate": 1.99665907996639e-05, "loss": 2.2012, "step": 1685 }, { "epoch": 5.527868852459016, "grad_norm": 8.844015121459961, "learning_rate": 1.996650401468006e-05, "loss": 2.2183, "step": 1686 }, { "epoch": 5.531147540983606, "grad_norm": 7.892405986785889, "learning_rate": 1.9966417117313527e-05, "loss": 2.2197, "step": 1687 }, { "epoch": 5.534426229508197, "grad_norm": 8.769259452819824, "learning_rate": 1.996633010756528e-05, "loss": 2.0103, "step": 1688 }, { "epoch": 5.537704918032787, "grad_norm": 7.0891642570495605, "learning_rate": 1.9966242985436298e-05, "loss": 2.2939, "step": 1689 }, { "epoch": 5.540983606557377, "grad_norm": 8.564269065856934, "learning_rate": 1.996615575092756e-05, "loss": 2.1978, "step": 1690 }, { "epoch": 5.5442622950819676, "grad_norm": 8.245220184326172, "learning_rate": 1.996606840404006e-05, "loss": 2.4717, "step": 1691 }, { "epoch": 5.547540983606558, "grad_norm": 12.031129837036133, "learning_rate": 1.9965980944774773e-05, "loss": 2.3364, "step": 1692 }, { "epoch": 5.550819672131148, "grad_norm": 9.232559204101562, "learning_rate": 1.9965893373132686e-05, "loss": 2.2041, "step": 1693 }, { "epoch": 5.554098360655738, "grad_norm": 9.553603172302246, "learning_rate": 1.9965805689114796e-05, "loss": 1.9849, "step": 1694 }, { "epoch": 5.557377049180328, "grad_norm": 10.281964302062988, "learning_rate": 1.996571789272208e-05, "loss": 2.0435, "step": 1695 }, { "epoch": 5.560655737704918, "grad_norm": 7.917313575744629, "learning_rate": 1.9965629983955535e-05, "loss": 2.2778, "step": 1696 }, { "epoch": 5.563934426229508, "grad_norm": 9.684171676635742, "learning_rate": 1.9965541962816154e-05, "loss": 2.0757, "step": 1697 }, { "epoch": 5.567213114754098, "grad_norm": 8.004581451416016, "learning_rate": 1.996545382930492e-05, "loss": 2.165, "step": 1698 }, { "epoch": 5.5704918032786885, "grad_norm": 7.896296501159668, "learning_rate": 1.9965365583422834e-05, "loss": 2.2485, "step": 1699 }, { "epoch": 5.573770491803279, "grad_norm": 6.686058044433594, "learning_rate": 1.996527722517089e-05, "loss": 2.269, "step": 1700 }, { "epoch": 5.577049180327869, "grad_norm": 10.759909629821777, "learning_rate": 1.9965188754550086e-05, "loss": 2.2871, "step": 1701 }, { "epoch": 5.580327868852459, "grad_norm": 10.096692085266113, "learning_rate": 1.9965100171561414e-05, "loss": 2.2319, "step": 1702 }, { "epoch": 5.583606557377049, "grad_norm": 11.114628791809082, "learning_rate": 1.9965011476205876e-05, "loss": 2.2485, "step": 1703 }, { "epoch": 5.586885245901639, "grad_norm": 11.68272876739502, "learning_rate": 1.996492266848448e-05, "loss": 2.252, "step": 1704 }, { "epoch": 5.590163934426229, "grad_norm": 8.923806190490723, "learning_rate": 1.9964833748398213e-05, "loss": 2.2178, "step": 1705 }, { "epoch": 5.593442622950819, "grad_norm": 8.27995491027832, "learning_rate": 1.996474471594809e-05, "loss": 2.0645, "step": 1706 }, { "epoch": 5.5967213114754095, "grad_norm": 9.054986953735352, "learning_rate": 1.9964655571135105e-05, "loss": 2.1128, "step": 1707 }, { "epoch": 5.6, "grad_norm": 9.339746475219727, "learning_rate": 1.9964566313960265e-05, "loss": 2.3721, "step": 1708 }, { "epoch": 5.60327868852459, "grad_norm": 12.626460075378418, "learning_rate": 1.9964476944424585e-05, "loss": 2.1821, "step": 1709 }, { "epoch": 5.60655737704918, "grad_norm": 7.956984996795654, "learning_rate": 1.9964387462529066e-05, "loss": 2.25, "step": 1710 }, { "epoch": 5.60983606557377, "grad_norm": 8.211455345153809, "learning_rate": 1.9964297868274717e-05, "loss": 2.1709, "step": 1711 }, { "epoch": 5.613114754098361, "grad_norm": 8.122361183166504, "learning_rate": 1.996420816166255e-05, "loss": 2.1538, "step": 1712 }, { "epoch": 5.616393442622951, "grad_norm": 9.856185913085938, "learning_rate": 1.9964118342693576e-05, "loss": 2.1875, "step": 1713 }, { "epoch": 5.619672131147541, "grad_norm": 10.976922035217285, "learning_rate": 1.9964028411368805e-05, "loss": 2.1338, "step": 1714 }, { "epoch": 5.622950819672131, "grad_norm": 10.290626525878906, "learning_rate": 1.9963938367689255e-05, "loss": 2.2778, "step": 1715 }, { "epoch": 5.6262295081967215, "grad_norm": 7.359794616699219, "learning_rate": 1.996384821165594e-05, "loss": 2.2212, "step": 1716 }, { "epoch": 5.629508196721312, "grad_norm": 32.10971450805664, "learning_rate": 1.996375794326988e-05, "loss": 2.1306, "step": 1717 }, { "epoch": 5.632786885245902, "grad_norm": 11.226677894592285, "learning_rate": 1.9963667562532083e-05, "loss": 2.0537, "step": 1718 }, { "epoch": 5.636065573770492, "grad_norm": 17.040607452392578, "learning_rate": 1.996357706944358e-05, "loss": 2.1445, "step": 1719 }, { "epoch": 5.639344262295082, "grad_norm": 7.1702070236206055, "learning_rate": 1.9963486464005385e-05, "loss": 2.1094, "step": 1720 }, { "epoch": 5.642622950819672, "grad_norm": 8.24137020111084, "learning_rate": 1.996339574621852e-05, "loss": 2.1899, "step": 1721 }, { "epoch": 5.645901639344262, "grad_norm": 9.911822319030762, "learning_rate": 1.9963304916084008e-05, "loss": 2.2007, "step": 1722 }, { "epoch": 5.649180327868852, "grad_norm": 12.970846176147461, "learning_rate": 1.9963213973602876e-05, "loss": 2.1582, "step": 1723 }, { "epoch": 5.6524590163934425, "grad_norm": 8.423989295959473, "learning_rate": 1.9963122918776142e-05, "loss": 2.2256, "step": 1724 }, { "epoch": 5.655737704918033, "grad_norm": 9.586701393127441, "learning_rate": 1.9963031751604843e-05, "loss": 2.3076, "step": 1725 }, { "epoch": 5.659016393442623, "grad_norm": 10.905159950256348, "learning_rate": 1.9962940472090003e-05, "loss": 2.332, "step": 1726 }, { "epoch": 5.662295081967213, "grad_norm": 8.905807495117188, "learning_rate": 1.9962849080232643e-05, "loss": 2.3613, "step": 1727 }, { "epoch": 5.665573770491803, "grad_norm": 7.821110725402832, "learning_rate": 1.996275757603381e-05, "loss": 2.4312, "step": 1728 }, { "epoch": 5.668852459016393, "grad_norm": 7.2217116355896, "learning_rate": 1.996266595949452e-05, "loss": 2.3638, "step": 1729 }, { "epoch": 5.672131147540983, "grad_norm": 9.773286819458008, "learning_rate": 1.9962574230615816e-05, "loss": 2.3423, "step": 1730 }, { "epoch": 5.675409836065574, "grad_norm": 8.814380645751953, "learning_rate": 1.996248238939873e-05, "loss": 2.0928, "step": 1731 }, { "epoch": 5.678688524590164, "grad_norm": 9.540377616882324, "learning_rate": 1.9962390435844296e-05, "loss": 2.3682, "step": 1732 }, { "epoch": 5.6819672131147545, "grad_norm": 10.270794868469238, "learning_rate": 1.996229836995355e-05, "loss": 2.1772, "step": 1733 }, { "epoch": 5.685245901639345, "grad_norm": 8.249228477478027, "learning_rate": 1.996220619172753e-05, "loss": 2.2139, "step": 1734 }, { "epoch": 5.688524590163935, "grad_norm": 10.476709365844727, "learning_rate": 1.9962113901167282e-05, "loss": 2.209, "step": 1735 }, { "epoch": 5.691803278688525, "grad_norm": 9.809861183166504, "learning_rate": 1.9962021498273837e-05, "loss": 2.1626, "step": 1736 }, { "epoch": 5.695081967213115, "grad_norm": 12.842060089111328, "learning_rate": 1.9961928983048244e-05, "loss": 2.1138, "step": 1737 }, { "epoch": 5.698360655737705, "grad_norm": 15.840921401977539, "learning_rate": 1.9961836355491543e-05, "loss": 2.3394, "step": 1738 }, { "epoch": 5.701639344262295, "grad_norm": 10.703783988952637, "learning_rate": 1.996174361560478e-05, "loss": 2.0605, "step": 1739 }, { "epoch": 5.704918032786885, "grad_norm": 19.116832733154297, "learning_rate": 1.9961650763389e-05, "loss": 2.1313, "step": 1740 }, { "epoch": 5.7081967213114755, "grad_norm": 12.179343223571777, "learning_rate": 1.996155779884525e-05, "loss": 2.0015, "step": 1741 }, { "epoch": 5.711475409836066, "grad_norm": 9.190407752990723, "learning_rate": 1.996146472197458e-05, "loss": 2.1323, "step": 1742 }, { "epoch": 5.714754098360656, "grad_norm": 11.215591430664062, "learning_rate": 1.9961371532778038e-05, "loss": 2.3613, "step": 1743 }, { "epoch": 5.718032786885246, "grad_norm": 35.214195251464844, "learning_rate": 1.9961278231256672e-05, "loss": 2.1421, "step": 1744 }, { "epoch": 5.721311475409836, "grad_norm": 9.722872734069824, "learning_rate": 1.996118481741154e-05, "loss": 2.2163, "step": 1745 }, { "epoch": 5.724590163934426, "grad_norm": 13.476407051086426, "learning_rate": 1.996109129124369e-05, "loss": 2.1001, "step": 1746 }, { "epoch": 5.727868852459016, "grad_norm": 13.275736808776855, "learning_rate": 1.996099765275418e-05, "loss": 2.3628, "step": 1747 }, { "epoch": 5.731147540983606, "grad_norm": 13.250733375549316, "learning_rate": 1.9960903901944066e-05, "loss": 2.062, "step": 1748 }, { "epoch": 5.7344262295081965, "grad_norm": 10.251801490783691, "learning_rate": 1.99608100388144e-05, "loss": 2.1753, "step": 1749 }, { "epoch": 5.737704918032787, "grad_norm": 8.183125495910645, "learning_rate": 1.9960716063366244e-05, "loss": 2.3809, "step": 1750 }, { "epoch": 5.740983606557377, "grad_norm": 11.402488708496094, "learning_rate": 1.996062197560066e-05, "loss": 2.3047, "step": 1751 }, { "epoch": 5.744262295081967, "grad_norm": 12.4561128616333, "learning_rate": 1.9960527775518708e-05, "loss": 2.0771, "step": 1752 }, { "epoch": 5.747540983606557, "grad_norm": 12.609081268310547, "learning_rate": 1.9960433463121447e-05, "loss": 2.2349, "step": 1753 }, { "epoch": 5.750819672131147, "grad_norm": 11.299022674560547, "learning_rate": 1.996033903840994e-05, "loss": 2.0718, "step": 1754 }, { "epoch": 5.754098360655737, "grad_norm": 9.95595645904541, "learning_rate": 1.996024450138526e-05, "loss": 2.2031, "step": 1755 }, { "epoch": 5.757377049180328, "grad_norm": 9.684558868408203, "learning_rate": 1.9960149852048463e-05, "loss": 2.4658, "step": 1756 }, { "epoch": 5.760655737704918, "grad_norm": 12.440092086791992, "learning_rate": 1.996005509040062e-05, "loss": 2.2256, "step": 1757 }, { "epoch": 5.7639344262295085, "grad_norm": 7.9703898429870605, "learning_rate": 1.9959960216442803e-05, "loss": 2.1665, "step": 1758 }, { "epoch": 5.767213114754099, "grad_norm": 15.445773124694824, "learning_rate": 1.9959865230176077e-05, "loss": 2.3042, "step": 1759 }, { "epoch": 5.770491803278689, "grad_norm": 9.901790618896484, "learning_rate": 1.9959770131601516e-05, "loss": 2.0669, "step": 1760 }, { "epoch": 5.773770491803279, "grad_norm": 8.81729793548584, "learning_rate": 1.9959674920720192e-05, "loss": 2.3242, "step": 1761 }, { "epoch": 5.777049180327869, "grad_norm": 9.612850189208984, "learning_rate": 1.9959579597533176e-05, "loss": 2.1504, "step": 1762 }, { "epoch": 5.780327868852459, "grad_norm": 13.390079498291016, "learning_rate": 1.9959484162041544e-05, "loss": 2.0483, "step": 1763 }, { "epoch": 5.783606557377049, "grad_norm": 8.042240142822266, "learning_rate": 1.9959388614246373e-05, "loss": 2.1875, "step": 1764 }, { "epoch": 5.786885245901639, "grad_norm": 11.215533256530762, "learning_rate": 1.995929295414874e-05, "loss": 2.0205, "step": 1765 }, { "epoch": 5.7901639344262295, "grad_norm": 10.559536933898926, "learning_rate": 1.9959197181749725e-05, "loss": 2.3135, "step": 1766 }, { "epoch": 5.79344262295082, "grad_norm": 6.25512170791626, "learning_rate": 1.995910129705041e-05, "loss": 2.2852, "step": 1767 }, { "epoch": 5.79672131147541, "grad_norm": 9.57976245880127, "learning_rate": 1.9959005300051867e-05, "loss": 2.334, "step": 1768 }, { "epoch": 5.8, "grad_norm": 15.716439247131348, "learning_rate": 1.995890919075519e-05, "loss": 2.269, "step": 1769 }, { "epoch": 5.80327868852459, "grad_norm": 8.494184494018555, "learning_rate": 1.995881296916145e-05, "loss": 2.2139, "step": 1770 }, { "epoch": 5.80655737704918, "grad_norm": 8.643269538879395, "learning_rate": 1.9958716635271745e-05, "loss": 2.3423, "step": 1771 }, { "epoch": 5.80983606557377, "grad_norm": 7.75386905670166, "learning_rate": 1.9958620189087153e-05, "loss": 2.1724, "step": 1772 }, { "epoch": 5.81311475409836, "grad_norm": 8.51791763305664, "learning_rate": 1.9958523630608767e-05, "loss": 2.147, "step": 1773 }, { "epoch": 5.81639344262295, "grad_norm": 9.915811538696289, "learning_rate": 1.9958426959837668e-05, "loss": 2.249, "step": 1774 }, { "epoch": 5.8196721311475414, "grad_norm": 9.226865768432617, "learning_rate": 1.9958330176774953e-05, "loss": 2.2261, "step": 1775 }, { "epoch": 5.822950819672132, "grad_norm": 8.280596733093262, "learning_rate": 1.9958233281421715e-05, "loss": 2.2393, "step": 1776 }, { "epoch": 5.826229508196722, "grad_norm": 14.41901683807373, "learning_rate": 1.995813627377904e-05, "loss": 2.1602, "step": 1777 }, { "epoch": 5.829508196721312, "grad_norm": 20.742292404174805, "learning_rate": 1.9958039153848025e-05, "loss": 2.2314, "step": 1778 }, { "epoch": 5.832786885245902, "grad_norm": 7.871368408203125, "learning_rate": 1.9957941921629763e-05, "loss": 2.1885, "step": 1779 }, { "epoch": 5.836065573770492, "grad_norm": 8.002140998840332, "learning_rate": 1.995784457712535e-05, "loss": 2.0391, "step": 1780 }, { "epoch": 5.839344262295082, "grad_norm": 6.867682933807373, "learning_rate": 1.995774712033589e-05, "loss": 2.2588, "step": 1781 }, { "epoch": 5.842622950819672, "grad_norm": 10.287399291992188, "learning_rate": 1.995764955126248e-05, "loss": 2.3613, "step": 1782 }, { "epoch": 5.845901639344262, "grad_norm": 8.996435165405273, "learning_rate": 1.9957551869906214e-05, "loss": 2.2437, "step": 1783 }, { "epoch": 5.849180327868853, "grad_norm": 10.079620361328125, "learning_rate": 1.9957454076268196e-05, "loss": 2.1841, "step": 1784 }, { "epoch": 5.852459016393443, "grad_norm": 7.576639652252197, "learning_rate": 1.9957356170349533e-05, "loss": 2.415, "step": 1785 }, { "epoch": 5.855737704918033, "grad_norm": 7.169600963592529, "learning_rate": 1.9957258152151327e-05, "loss": 2.0239, "step": 1786 }, { "epoch": 5.859016393442623, "grad_norm": 8.235438346862793, "learning_rate": 1.995716002167468e-05, "loss": 2.3286, "step": 1787 }, { "epoch": 5.862295081967213, "grad_norm": 7.916664123535156, "learning_rate": 1.9957061778920703e-05, "loss": 2.2271, "step": 1788 }, { "epoch": 5.865573770491803, "grad_norm": 7.0798659324646, "learning_rate": 1.9956963423890497e-05, "loss": 2.1475, "step": 1789 }, { "epoch": 5.868852459016393, "grad_norm": 20.339094161987305, "learning_rate": 1.995686495658518e-05, "loss": 2.333, "step": 1790 }, { "epoch": 5.872131147540983, "grad_norm": 13.580154418945312, "learning_rate": 1.995676637700586e-05, "loss": 2.0562, "step": 1791 }, { "epoch": 5.8754098360655735, "grad_norm": 8.690961837768555, "learning_rate": 1.9956667685153643e-05, "loss": 2.1514, "step": 1792 }, { "epoch": 5.878688524590164, "grad_norm": 11.184842109680176, "learning_rate": 1.9956568881029645e-05, "loss": 2.2363, "step": 1793 }, { "epoch": 5.881967213114754, "grad_norm": 7.691061973571777, "learning_rate": 1.995646996463498e-05, "loss": 2.2803, "step": 1794 }, { "epoch": 5.885245901639344, "grad_norm": 8.549989700317383, "learning_rate": 1.995637093597077e-05, "loss": 2.2759, "step": 1795 }, { "epoch": 5.888524590163934, "grad_norm": 6.868963241577148, "learning_rate": 1.995627179503812e-05, "loss": 2.2207, "step": 1796 }, { "epoch": 5.891803278688524, "grad_norm": 7.533074855804443, "learning_rate": 1.9956172541838154e-05, "loss": 2.2671, "step": 1797 }, { "epoch": 5.895081967213114, "grad_norm": 8.900843620300293, "learning_rate": 1.9956073176371992e-05, "loss": 2.2964, "step": 1798 }, { "epoch": 5.898360655737705, "grad_norm": 9.968503952026367, "learning_rate": 1.9955973698640753e-05, "loss": 2.2739, "step": 1799 }, { "epoch": 5.901639344262295, "grad_norm": 10.435636520385742, "learning_rate": 1.9955874108645557e-05, "loss": 2.313, "step": 1800 }, { "epoch": 5.9049180327868855, "grad_norm": 9.472744941711426, "learning_rate": 1.9955774406387535e-05, "loss": 2.0122, "step": 1801 }, { "epoch": 5.908196721311476, "grad_norm": 10.326810836791992, "learning_rate": 1.99556745918678e-05, "loss": 2.3223, "step": 1802 }, { "epoch": 5.911475409836066, "grad_norm": 6.46143913269043, "learning_rate": 1.9955574665087487e-05, "loss": 2.2451, "step": 1803 }, { "epoch": 5.914754098360656, "grad_norm": 8.80515193939209, "learning_rate": 1.9955474626047714e-05, "loss": 2.0474, "step": 1804 }, { "epoch": 5.918032786885246, "grad_norm": 9.15241813659668, "learning_rate": 1.9955374474749615e-05, "loss": 2.3169, "step": 1805 }, { "epoch": 5.921311475409836, "grad_norm": 9.763246536254883, "learning_rate": 1.9955274211194317e-05, "loss": 2.2822, "step": 1806 }, { "epoch": 5.924590163934426, "grad_norm": 8.111355781555176, "learning_rate": 1.9955173835382952e-05, "loss": 2.1201, "step": 1807 }, { "epoch": 5.927868852459016, "grad_norm": 15.310430526733398, "learning_rate": 1.9955073347316652e-05, "loss": 2.1631, "step": 1808 }, { "epoch": 5.9311475409836065, "grad_norm": 9.83159351348877, "learning_rate": 1.9954972746996545e-05, "loss": 2.2373, "step": 1809 }, { "epoch": 5.934426229508197, "grad_norm": 7.417263507843018, "learning_rate": 1.9954872034423776e-05, "loss": 2.1865, "step": 1810 }, { "epoch": 5.937704918032787, "grad_norm": 9.07601547241211, "learning_rate": 1.9954771209599473e-05, "loss": 2.3228, "step": 1811 }, { "epoch": 5.940983606557377, "grad_norm": 7.206950664520264, "learning_rate": 1.995467027252477e-05, "loss": 2.1572, "step": 1812 }, { "epoch": 5.944262295081967, "grad_norm": 8.827249526977539, "learning_rate": 1.9954569223200814e-05, "loss": 2.3989, "step": 1813 }, { "epoch": 5.947540983606557, "grad_norm": 8.41893482208252, "learning_rate": 1.9954468061628738e-05, "loss": 2.1196, "step": 1814 }, { "epoch": 5.950819672131147, "grad_norm": 9.526124954223633, "learning_rate": 1.9954366787809685e-05, "loss": 2.2842, "step": 1815 }, { "epoch": 5.954098360655737, "grad_norm": 9.995370864868164, "learning_rate": 1.9954265401744797e-05, "loss": 2.2036, "step": 1816 }, { "epoch": 5.9573770491803275, "grad_norm": 8.068039894104004, "learning_rate": 1.9954163903435213e-05, "loss": 2.2446, "step": 1817 }, { "epoch": 5.9606557377049185, "grad_norm": 8.115623474121094, "learning_rate": 1.9954062292882088e-05, "loss": 2.1523, "step": 1818 }, { "epoch": 5.963934426229509, "grad_norm": 6.316954135894775, "learning_rate": 1.9953960570086553e-05, "loss": 2.2866, "step": 1819 }, { "epoch": 5.967213114754099, "grad_norm": 7.696208477020264, "learning_rate": 1.9953858735049768e-05, "loss": 2.1592, "step": 1820 }, { "epoch": 5.970491803278689, "grad_norm": 7.248624801635742, "learning_rate": 1.9953756787772874e-05, "loss": 2.2427, "step": 1821 }, { "epoch": 5.973770491803279, "grad_norm": 6.832733631134033, "learning_rate": 1.995365472825703e-05, "loss": 2.3369, "step": 1822 }, { "epoch": 5.977049180327869, "grad_norm": 7.697269916534424, "learning_rate": 1.995355255650337e-05, "loss": 2.2451, "step": 1823 }, { "epoch": 5.980327868852459, "grad_norm": 10.632890701293945, "learning_rate": 1.9953450272513062e-05, "loss": 2.2007, "step": 1824 }, { "epoch": 5.983606557377049, "grad_norm": 5.421412944793701, "learning_rate": 1.9953347876287252e-05, "loss": 2.2241, "step": 1825 }, { "epoch": 5.9868852459016395, "grad_norm": 7.700380325317383, "learning_rate": 1.9953245367827094e-05, "loss": 2.0386, "step": 1826 }, { "epoch": 5.99016393442623, "grad_norm": 8.467804908752441, "learning_rate": 1.9953142747133748e-05, "loss": 2.3389, "step": 1827 }, { "epoch": 5.99344262295082, "grad_norm": 6.7457709312438965, "learning_rate": 1.9953040014208367e-05, "loss": 2.2295, "step": 1828 }, { "epoch": 5.99672131147541, "grad_norm": 8.715658187866211, "learning_rate": 1.995293716905211e-05, "loss": 2.083, "step": 1829 }, { "epoch": 6.0, "grad_norm": 5.9249701499938965, "learning_rate": 1.995283421166614e-05, "loss": 2.1763, "step": 1830 }, { "epoch": 6.00327868852459, "grad_norm": 6.902915000915527, "learning_rate": 1.9952731142051617e-05, "loss": 2.1626, "step": 1831 }, { "epoch": 6.00655737704918, "grad_norm": 9.057726860046387, "learning_rate": 1.99526279602097e-05, "loss": 2.124, "step": 1832 }, { "epoch": 6.00983606557377, "grad_norm": 7.486324787139893, "learning_rate": 1.9952524666141558e-05, "loss": 2.1128, "step": 1833 }, { "epoch": 6.0131147540983605, "grad_norm": 11.869269371032715, "learning_rate": 1.995242125984835e-05, "loss": 2.0054, "step": 1834 }, { "epoch": 6.016393442622951, "grad_norm": 8.213662147521973, "learning_rate": 1.9952317741331244e-05, "loss": 2.1543, "step": 1835 }, { "epoch": 6.019672131147541, "grad_norm": 7.076062202453613, "learning_rate": 1.9952214110591407e-05, "loss": 2.4771, "step": 1836 }, { "epoch": 6.022950819672131, "grad_norm": 8.457642555236816, "learning_rate": 1.9952110367630008e-05, "loss": 2.1553, "step": 1837 }, { "epoch": 6.026229508196721, "grad_norm": 10.849021911621094, "learning_rate": 1.995200651244822e-05, "loss": 2.0991, "step": 1838 }, { "epoch": 6.029508196721311, "grad_norm": 8.567112922668457, "learning_rate": 1.995190254504721e-05, "loss": 2.126, "step": 1839 }, { "epoch": 6.032786885245901, "grad_norm": 7.778982639312744, "learning_rate": 1.995179846542815e-05, "loss": 2.1672, "step": 1840 }, { "epoch": 6.036065573770492, "grad_norm": 7.31590461730957, "learning_rate": 1.9951694273592216e-05, "loss": 2.0439, "step": 1841 }, { "epoch": 6.039344262295082, "grad_norm": 9.387816429138184, "learning_rate": 1.995158996954058e-05, "loss": 2.1094, "step": 1842 }, { "epoch": 6.0426229508196725, "grad_norm": 20.289663314819336, "learning_rate": 1.9951485553274422e-05, "loss": 1.9795, "step": 1843 }, { "epoch": 6.045901639344263, "grad_norm": 8.073261260986328, "learning_rate": 1.9951381024794916e-05, "loss": 2.2573, "step": 1844 }, { "epoch": 6.049180327868853, "grad_norm": 8.226812362670898, "learning_rate": 1.9951276384103245e-05, "loss": 2.0933, "step": 1845 }, { "epoch": 6.052459016393443, "grad_norm": 7.311246871948242, "learning_rate": 1.9951171631200584e-05, "loss": 2.1406, "step": 1846 }, { "epoch": 6.055737704918033, "grad_norm": 11.909429550170898, "learning_rate": 1.9951066766088117e-05, "loss": 1.9668, "step": 1847 }, { "epoch": 6.059016393442623, "grad_norm": 6.796880722045898, "learning_rate": 1.9950961788767024e-05, "loss": 1.9868, "step": 1848 }, { "epoch": 6.062295081967213, "grad_norm": 8.941761016845703, "learning_rate": 1.995085669923849e-05, "loss": 2.1172, "step": 1849 }, { "epoch": 6.065573770491803, "grad_norm": 11.887747764587402, "learning_rate": 1.9950751497503705e-05, "loss": 2.1191, "step": 1850 }, { "epoch": 6.0688524590163935, "grad_norm": 10.763343811035156, "learning_rate": 1.9950646183563848e-05, "loss": 2.0549, "step": 1851 }, { "epoch": 6.072131147540984, "grad_norm": 6.8512187004089355, "learning_rate": 1.9950540757420108e-05, "loss": 2.3193, "step": 1852 }, { "epoch": 6.075409836065574, "grad_norm": 7.491742134094238, "learning_rate": 1.9950435219073674e-05, "loss": 2.1313, "step": 1853 }, { "epoch": 6.078688524590164, "grad_norm": 6.703156471252441, "learning_rate": 1.995032956852574e-05, "loss": 2.3652, "step": 1854 }, { "epoch": 6.081967213114754, "grad_norm": 7.336785316467285, "learning_rate": 1.995022380577749e-05, "loss": 2.0806, "step": 1855 }, { "epoch": 6.085245901639344, "grad_norm": 11.150304794311523, "learning_rate": 1.9950117930830127e-05, "loss": 2.1899, "step": 1856 }, { "epoch": 6.088524590163934, "grad_norm": 7.84497594833374, "learning_rate": 1.9950011943684835e-05, "loss": 2.2349, "step": 1857 }, { "epoch": 6.091803278688524, "grad_norm": 11.478938102722168, "learning_rate": 1.9949905844342815e-05, "loss": 2.1035, "step": 1858 }, { "epoch": 6.0950819672131145, "grad_norm": 7.204645156860352, "learning_rate": 1.994979963280526e-05, "loss": 2.2246, "step": 1859 }, { "epoch": 6.098360655737705, "grad_norm": 10.494635581970215, "learning_rate": 1.9949693309073368e-05, "loss": 1.8843, "step": 1860 }, { "epoch": 6.101639344262295, "grad_norm": 10.48495101928711, "learning_rate": 1.994958687314834e-05, "loss": 2.1689, "step": 1861 }, { "epoch": 6.104918032786885, "grad_norm": 9.776907920837402, "learning_rate": 1.9949480325031375e-05, "loss": 1.9531, "step": 1862 }, { "epoch": 6.108196721311476, "grad_norm": 6.377320289611816, "learning_rate": 1.9949373664723676e-05, "loss": 1.9951, "step": 1863 }, { "epoch": 6.111475409836066, "grad_norm": 6.653491497039795, "learning_rate": 1.994926689222644e-05, "loss": 2.0103, "step": 1864 }, { "epoch": 6.114754098360656, "grad_norm": 8.879034042358398, "learning_rate": 1.994916000754088e-05, "loss": 2.2197, "step": 1865 }, { "epoch": 6.118032786885246, "grad_norm": 8.941155433654785, "learning_rate": 1.9949053010668194e-05, "loss": 2.1445, "step": 1866 }, { "epoch": 6.121311475409836, "grad_norm": 7.8695969581604, "learning_rate": 1.994894590160959e-05, "loss": 1.9727, "step": 1867 }, { "epoch": 6.1245901639344265, "grad_norm": 7.5513458251953125, "learning_rate": 1.9948838680366277e-05, "loss": 2.2012, "step": 1868 }, { "epoch": 6.127868852459017, "grad_norm": 9.352849006652832, "learning_rate": 1.9948731346939466e-05, "loss": 2.0596, "step": 1869 }, { "epoch": 6.131147540983607, "grad_norm": 10.540132522583008, "learning_rate": 1.994862390133036e-05, "loss": 2.1699, "step": 1870 }, { "epoch": 6.134426229508197, "grad_norm": 11.321372985839844, "learning_rate": 1.994851634354018e-05, "loss": 2.1626, "step": 1871 }, { "epoch": 6.137704918032787, "grad_norm": 6.862692832946777, "learning_rate": 1.9948408673570133e-05, "loss": 2.1138, "step": 1872 }, { "epoch": 6.140983606557377, "grad_norm": 8.318596839904785, "learning_rate": 1.9948300891421438e-05, "loss": 2.2275, "step": 1873 }, { "epoch": 6.144262295081967, "grad_norm": 6.7623467445373535, "learning_rate": 1.9948192997095305e-05, "loss": 2.1523, "step": 1874 }, { "epoch": 6.147540983606557, "grad_norm": 6.668220520019531, "learning_rate": 1.994808499059295e-05, "loss": 2.1284, "step": 1875 }, { "epoch": 6.150819672131147, "grad_norm": 11.48351764678955, "learning_rate": 1.994797687191559e-05, "loss": 2.1162, "step": 1876 }, { "epoch": 6.154098360655738, "grad_norm": 9.185522079467773, "learning_rate": 1.9947868641064453e-05, "loss": 2.1416, "step": 1877 }, { "epoch": 6.157377049180328, "grad_norm": 9.468822479248047, "learning_rate": 1.9947760298040753e-05, "loss": 2.0688, "step": 1878 }, { "epoch": 6.160655737704918, "grad_norm": 6.803839683532715, "learning_rate": 1.9947651842845716e-05, "loss": 2.1455, "step": 1879 }, { "epoch": 6.163934426229508, "grad_norm": 7.649156093597412, "learning_rate": 1.9947543275480555e-05, "loss": 1.9502, "step": 1880 }, { "epoch": 6.167213114754098, "grad_norm": 11.883367538452148, "learning_rate": 1.99474345959465e-05, "loss": 1.9429, "step": 1881 }, { "epoch": 6.170491803278688, "grad_norm": 9.25278377532959, "learning_rate": 1.9947325804244783e-05, "loss": 2.2754, "step": 1882 }, { "epoch": 6.173770491803278, "grad_norm": 8.91667652130127, "learning_rate": 1.9947216900376624e-05, "loss": 2.1284, "step": 1883 }, { "epoch": 6.177049180327868, "grad_norm": 9.084482192993164, "learning_rate": 1.994710788434325e-05, "loss": 2.189, "step": 1884 }, { "epoch": 6.180327868852459, "grad_norm": 10.994677543640137, "learning_rate": 1.9946998756145894e-05, "loss": 2.2158, "step": 1885 }, { "epoch": 6.18360655737705, "grad_norm": 10.100496292114258, "learning_rate": 1.9946889515785782e-05, "loss": 2.3159, "step": 1886 }, { "epoch": 6.18688524590164, "grad_norm": 10.109360694885254, "learning_rate": 1.994678016326415e-05, "loss": 2.1172, "step": 1887 }, { "epoch": 6.19016393442623, "grad_norm": 8.541740417480469, "learning_rate": 1.994667069858223e-05, "loss": 2.1211, "step": 1888 }, { "epoch": 6.19344262295082, "grad_norm": 12.425455093383789, "learning_rate": 1.9946561121741253e-05, "loss": 2.1294, "step": 1889 }, { "epoch": 6.19672131147541, "grad_norm": 14.929104804992676, "learning_rate": 1.994645143274246e-05, "loss": 2.2212, "step": 1890 }, { "epoch": 6.2, "grad_norm": 8.939020156860352, "learning_rate": 1.9946341631587086e-05, "loss": 2.2368, "step": 1891 }, { "epoch": 6.20327868852459, "grad_norm": 7.79070520401001, "learning_rate": 1.994623171827637e-05, "loss": 2.2529, "step": 1892 }, { "epoch": 6.20655737704918, "grad_norm": 6.194929122924805, "learning_rate": 1.9946121692811547e-05, "loss": 2.231, "step": 1893 }, { "epoch": 6.2098360655737705, "grad_norm": 7.996180057525635, "learning_rate": 1.994601155519386e-05, "loss": 2.2163, "step": 1894 }, { "epoch": 6.213114754098361, "grad_norm": 9.179889678955078, "learning_rate": 1.9945901305424554e-05, "loss": 2.2065, "step": 1895 }, { "epoch": 6.216393442622951, "grad_norm": 7.476059436798096, "learning_rate": 1.9945790943504868e-05, "loss": 2.145, "step": 1896 }, { "epoch": 6.219672131147541, "grad_norm": 8.017434120178223, "learning_rate": 1.9945680469436047e-05, "loss": 2.2563, "step": 1897 }, { "epoch": 6.222950819672131, "grad_norm": 8.644536972045898, "learning_rate": 1.994556988321934e-05, "loss": 2.123, "step": 1898 }, { "epoch": 6.226229508196721, "grad_norm": 8.009377479553223, "learning_rate": 1.9945459184855995e-05, "loss": 2.2422, "step": 1899 }, { "epoch": 6.229508196721311, "grad_norm": 9.635599136352539, "learning_rate": 1.994534837434725e-05, "loss": 2.1333, "step": 1900 }, { "epoch": 6.232786885245901, "grad_norm": 9.88387393951416, "learning_rate": 1.9945237451694364e-05, "loss": 2.1431, "step": 1901 }, { "epoch": 6.2360655737704915, "grad_norm": 8.213668823242188, "learning_rate": 1.9945126416898586e-05, "loss": 2.0132, "step": 1902 }, { "epoch": 6.239344262295082, "grad_norm": 8.926654815673828, "learning_rate": 1.9945015269961168e-05, "loss": 2.0063, "step": 1903 }, { "epoch": 6.242622950819672, "grad_norm": 10.03172492980957, "learning_rate": 1.994490401088336e-05, "loss": 2.0576, "step": 1904 }, { "epoch": 6.245901639344262, "grad_norm": 8.167764663696289, "learning_rate": 1.994479263966642e-05, "loss": 2.2461, "step": 1905 }, { "epoch": 6.249180327868853, "grad_norm": 7.498723983764648, "learning_rate": 1.9944681156311606e-05, "loss": 2.0957, "step": 1906 }, { "epoch": 6.252459016393443, "grad_norm": 8.726080894470215, "learning_rate": 1.994456956082017e-05, "loss": 2.1396, "step": 1907 }, { "epoch": 6.255737704918033, "grad_norm": 10.373954772949219, "learning_rate": 1.994445785319337e-05, "loss": 2.1953, "step": 1908 }, { "epoch": 6.259016393442623, "grad_norm": 7.079370975494385, "learning_rate": 1.9944346033432472e-05, "loss": 2.1548, "step": 1909 }, { "epoch": 6.262295081967213, "grad_norm": 8.659900665283203, "learning_rate": 1.994423410153873e-05, "loss": 2.3384, "step": 1910 }, { "epoch": 6.2655737704918035, "grad_norm": 7.252270221710205, "learning_rate": 1.9944122057513413e-05, "loss": 2.1655, "step": 1911 }, { "epoch": 6.268852459016394, "grad_norm": 10.125774383544922, "learning_rate": 1.9944009901357777e-05, "loss": 2.0518, "step": 1912 }, { "epoch": 6.272131147540984, "grad_norm": 11.476261138916016, "learning_rate": 1.994389763307309e-05, "loss": 2.1675, "step": 1913 }, { "epoch": 6.275409836065574, "grad_norm": 9.893810272216797, "learning_rate": 1.994378525266062e-05, "loss": 2.1245, "step": 1914 }, { "epoch": 6.278688524590164, "grad_norm": 7.604547023773193, "learning_rate": 1.9943672760121634e-05, "loss": 2.1455, "step": 1915 }, { "epoch": 6.281967213114754, "grad_norm": 8.835915565490723, "learning_rate": 1.9943560155457394e-05, "loss": 2.0659, "step": 1916 }, { "epoch": 6.285245901639344, "grad_norm": 7.78879451751709, "learning_rate": 1.994344743866918e-05, "loss": 2.0679, "step": 1917 }, { "epoch": 6.288524590163934, "grad_norm": 11.053923606872559, "learning_rate": 1.9943334609758255e-05, "loss": 2.0688, "step": 1918 }, { "epoch": 6.2918032786885245, "grad_norm": 9.878127098083496, "learning_rate": 1.994322166872589e-05, "loss": 2.2002, "step": 1919 }, { "epoch": 6.295081967213115, "grad_norm": 6.707089424133301, "learning_rate": 1.994310861557337e-05, "loss": 2.2173, "step": 1920 }, { "epoch": 6.298360655737705, "grad_norm": 10.379472732543945, "learning_rate": 1.9942995450301958e-05, "loss": 2.3789, "step": 1921 }, { "epoch": 6.301639344262295, "grad_norm": 11.268558502197266, "learning_rate": 1.9942882172912932e-05, "loss": 2.1768, "step": 1922 }, { "epoch": 6.304918032786885, "grad_norm": 7.793530464172363, "learning_rate": 1.9942768783407573e-05, "loss": 2.1274, "step": 1923 }, { "epoch": 6.308196721311475, "grad_norm": 9.312516212463379, "learning_rate": 1.9942655281787158e-05, "loss": 2.0913, "step": 1924 }, { "epoch": 6.311475409836065, "grad_norm": 6.3459296226501465, "learning_rate": 1.9942541668052968e-05, "loss": 2.1553, "step": 1925 }, { "epoch": 6.314754098360655, "grad_norm": 12.395493507385254, "learning_rate": 1.9942427942206282e-05, "loss": 2.1978, "step": 1926 }, { "epoch": 6.3180327868852455, "grad_norm": 6.502713680267334, "learning_rate": 1.9942314104248382e-05, "loss": 2.2632, "step": 1927 }, { "epoch": 6.321311475409836, "grad_norm": 7.008403778076172, "learning_rate": 1.994220015418056e-05, "loss": 2.1631, "step": 1928 }, { "epoch": 6.324590163934427, "grad_norm": 6.683540344238281, "learning_rate": 1.9942086092004084e-05, "loss": 2.3789, "step": 1929 }, { "epoch": 6.327868852459017, "grad_norm": 7.96644926071167, "learning_rate": 1.9941971917720256e-05, "loss": 2.2407, "step": 1930 }, { "epoch": 6.331147540983607, "grad_norm": 9.422170639038086, "learning_rate": 1.9941857631330358e-05, "loss": 2.063, "step": 1931 }, { "epoch": 6.334426229508197, "grad_norm": 8.07923412322998, "learning_rate": 1.9941743232835676e-05, "loss": 2.0996, "step": 1932 }, { "epoch": 6.337704918032787, "grad_norm": 7.783099174499512, "learning_rate": 1.9941628722237505e-05, "loss": 2.2822, "step": 1933 }, { "epoch": 6.340983606557377, "grad_norm": 7.213054656982422, "learning_rate": 1.994151409953713e-05, "loss": 2.0225, "step": 1934 }, { "epoch": 6.344262295081967, "grad_norm": 8.881487846374512, "learning_rate": 1.994139936473585e-05, "loss": 2.3242, "step": 1935 }, { "epoch": 6.3475409836065575, "grad_norm": 10.747904777526855, "learning_rate": 1.9941284517834952e-05, "loss": 2.1641, "step": 1936 }, { "epoch": 6.350819672131148, "grad_norm": 10.156397819519043, "learning_rate": 1.9941169558835737e-05, "loss": 2.1577, "step": 1937 }, { "epoch": 6.354098360655738, "grad_norm": 5.889184951782227, "learning_rate": 1.99410544877395e-05, "loss": 2.0991, "step": 1938 }, { "epoch": 6.357377049180328, "grad_norm": 6.90334415435791, "learning_rate": 1.9940939304547536e-05, "loss": 2.1616, "step": 1939 }, { "epoch": 6.360655737704918, "grad_norm": 7.876635551452637, "learning_rate": 1.994082400926115e-05, "loss": 2.1831, "step": 1940 }, { "epoch": 6.363934426229508, "grad_norm": 9.602057456970215, "learning_rate": 1.9940708601881628e-05, "loss": 2.1543, "step": 1941 }, { "epoch": 6.367213114754098, "grad_norm": 7.3673481941223145, "learning_rate": 1.994059308241029e-05, "loss": 2.1025, "step": 1942 }, { "epoch": 6.370491803278688, "grad_norm": 10.938836097717285, "learning_rate": 1.9940477450848425e-05, "loss": 2.0713, "step": 1943 }, { "epoch": 6.3737704918032785, "grad_norm": 10.637357711791992, "learning_rate": 1.994036170719734e-05, "loss": 2.0664, "step": 1944 }, { "epoch": 6.377049180327869, "grad_norm": 8.152972221374512, "learning_rate": 1.994024585145834e-05, "loss": 2.1206, "step": 1945 }, { "epoch": 6.380327868852459, "grad_norm": 13.89376449584961, "learning_rate": 1.9940129883632738e-05, "loss": 2.1714, "step": 1946 }, { "epoch": 6.383606557377049, "grad_norm": 10.68784236907959, "learning_rate": 1.9940013803721836e-05, "loss": 2.1875, "step": 1947 }, { "epoch": 6.386885245901639, "grad_norm": 7.447419166564941, "learning_rate": 1.993989761172694e-05, "loss": 2.0566, "step": 1948 }, { "epoch": 6.390163934426229, "grad_norm": 7.127077102661133, "learning_rate": 1.9939781307649366e-05, "loss": 2.0815, "step": 1949 }, { "epoch": 6.39344262295082, "grad_norm": 10.628864288330078, "learning_rate": 1.9939664891490423e-05, "loss": 2.2495, "step": 1950 }, { "epoch": 6.39672131147541, "grad_norm": 8.847882270812988, "learning_rate": 1.9939548363251424e-05, "loss": 1.9653, "step": 1951 }, { "epoch": 6.4, "grad_norm": 10.821041107177734, "learning_rate": 1.9939431722933678e-05, "loss": 2.189, "step": 1952 }, { "epoch": 6.4032786885245905, "grad_norm": 9.411775588989258, "learning_rate": 1.9939314970538512e-05, "loss": 2.1519, "step": 1953 }, { "epoch": 6.406557377049181, "grad_norm": 9.03133487701416, "learning_rate": 1.993919810606723e-05, "loss": 2.103, "step": 1954 }, { "epoch": 6.409836065573771, "grad_norm": 9.268997192382812, "learning_rate": 1.9939081129521155e-05, "loss": 1.9277, "step": 1955 }, { "epoch": 6.413114754098361, "grad_norm": 9.407624244689941, "learning_rate": 1.993896404090161e-05, "loss": 2.0391, "step": 1956 }, { "epoch": 6.416393442622951, "grad_norm": 10.568206787109375, "learning_rate": 1.993884684020991e-05, "loss": 2.3335, "step": 1957 }, { "epoch": 6.419672131147541, "grad_norm": 9.821986198425293, "learning_rate": 1.9938729527447374e-05, "loss": 2.0825, "step": 1958 }, { "epoch": 6.422950819672131, "grad_norm": 8.3070650100708, "learning_rate": 1.9938612102615336e-05, "loss": 2.2256, "step": 1959 }, { "epoch": 6.426229508196721, "grad_norm": 8.0685396194458, "learning_rate": 1.993849456571511e-05, "loss": 2.0581, "step": 1960 }, { "epoch": 6.4295081967213115, "grad_norm": 7.9021196365356445, "learning_rate": 1.9938376916748024e-05, "loss": 2.0273, "step": 1961 }, { "epoch": 6.432786885245902, "grad_norm": 9.908732414245605, "learning_rate": 1.9938259155715403e-05, "loss": 2.1865, "step": 1962 }, { "epoch": 6.436065573770492, "grad_norm": 12.204852104187012, "learning_rate": 1.993814128261858e-05, "loss": 2.1216, "step": 1963 }, { "epoch": 6.439344262295082, "grad_norm": 8.745471954345703, "learning_rate": 1.993802329745888e-05, "loss": 2.0278, "step": 1964 }, { "epoch": 6.442622950819672, "grad_norm": 10.346932411193848, "learning_rate": 1.993790520023763e-05, "loss": 2.23, "step": 1965 }, { "epoch": 6.445901639344262, "grad_norm": 6.679001808166504, "learning_rate": 1.9937786990956174e-05, "loss": 2.1382, "step": 1966 }, { "epoch": 6.449180327868852, "grad_norm": 8.444979667663574, "learning_rate": 1.993766866961583e-05, "loss": 2.0718, "step": 1967 }, { "epoch": 6.452459016393442, "grad_norm": 8.41856861114502, "learning_rate": 1.9937550236217943e-05, "loss": 2.1182, "step": 1968 }, { "epoch": 6.4557377049180324, "grad_norm": 9.241497039794922, "learning_rate": 1.9937431690763844e-05, "loss": 2.1714, "step": 1969 }, { "epoch": 6.459016393442623, "grad_norm": 10.6510009765625, "learning_rate": 1.993731303325487e-05, "loss": 1.95, "step": 1970 }, { "epoch": 6.462295081967213, "grad_norm": 8.340679168701172, "learning_rate": 1.993719426369236e-05, "loss": 1.9678, "step": 1971 }, { "epoch": 6.465573770491803, "grad_norm": 9.44299602508545, "learning_rate": 1.9937075382077654e-05, "loss": 2.2363, "step": 1972 }, { "epoch": 6.468852459016394, "grad_norm": 8.645605087280273, "learning_rate": 1.9936956388412086e-05, "loss": 2.0947, "step": 1973 }, { "epoch": 6.472131147540984, "grad_norm": 11.675431251525879, "learning_rate": 1.993683728269701e-05, "loss": 2.1084, "step": 1974 }, { "epoch": 6.475409836065574, "grad_norm": 11.971819877624512, "learning_rate": 1.9936718064933757e-05, "loss": 2.0649, "step": 1975 }, { "epoch": 6.478688524590164, "grad_norm": 15.542035102844238, "learning_rate": 1.9936598735123675e-05, "loss": 2.0674, "step": 1976 }, { "epoch": 6.481967213114754, "grad_norm": 11.200737953186035, "learning_rate": 1.9936479293268113e-05, "loss": 2.1382, "step": 1977 }, { "epoch": 6.4852459016393444, "grad_norm": 10.876280784606934, "learning_rate": 1.9936359739368418e-05, "loss": 2.0811, "step": 1978 }, { "epoch": 6.488524590163935, "grad_norm": 8.708824157714844, "learning_rate": 1.9936240073425932e-05, "loss": 1.9526, "step": 1979 }, { "epoch": 6.491803278688525, "grad_norm": 11.803284645080566, "learning_rate": 1.993612029544201e-05, "loss": 2.1104, "step": 1980 }, { "epoch": 6.495081967213115, "grad_norm": 16.12431526184082, "learning_rate": 1.9936000405418e-05, "loss": 2.0376, "step": 1981 }, { "epoch": 6.498360655737705, "grad_norm": 8.514876365661621, "learning_rate": 1.9935880403355255e-05, "loss": 2.1025, "step": 1982 }, { "epoch": 6.501639344262295, "grad_norm": 12.267180442810059, "learning_rate": 1.9935760289255125e-05, "loss": 2.1606, "step": 1983 }, { "epoch": 6.504918032786885, "grad_norm": 8.32889175415039, "learning_rate": 1.993564006311897e-05, "loss": 2.2861, "step": 1984 }, { "epoch": 6.508196721311475, "grad_norm": 9.351122856140137, "learning_rate": 1.993551972494814e-05, "loss": 2.1748, "step": 1985 }, { "epoch": 6.511475409836065, "grad_norm": 8.823625564575195, "learning_rate": 1.9935399274744e-05, "loss": 2.3389, "step": 1986 }, { "epoch": 6.5147540983606556, "grad_norm": 8.488384246826172, "learning_rate": 1.99352787125079e-05, "loss": 2.2017, "step": 1987 }, { "epoch": 6.518032786885246, "grad_norm": 11.6527099609375, "learning_rate": 1.9935158038241203e-05, "loss": 2.3345, "step": 1988 }, { "epoch": 6.521311475409836, "grad_norm": 8.129007339477539, "learning_rate": 1.9935037251945267e-05, "loss": 2.0625, "step": 1989 }, { "epoch": 6.524590163934426, "grad_norm": 12.530169486999512, "learning_rate": 1.9934916353621458e-05, "loss": 2.2026, "step": 1990 }, { "epoch": 6.527868852459016, "grad_norm": 11.82460880279541, "learning_rate": 1.9934795343271138e-05, "loss": 1.9985, "step": 1991 }, { "epoch": 6.531147540983606, "grad_norm": 11.18681812286377, "learning_rate": 1.993467422089567e-05, "loss": 2.2661, "step": 1992 }, { "epoch": 6.534426229508197, "grad_norm": 11.472406387329102, "learning_rate": 1.993455298649642e-05, "loss": 1.9019, "step": 1993 }, { "epoch": 6.537704918032787, "grad_norm": 9.629566192626953, "learning_rate": 1.993443164007476e-05, "loss": 2.2271, "step": 1994 }, { "epoch": 6.540983606557377, "grad_norm": 7.969003200531006, "learning_rate": 1.9934310181632047e-05, "loss": 2.2432, "step": 1995 }, { "epoch": 6.5442622950819676, "grad_norm": 8.654975891113281, "learning_rate": 1.9934188611169664e-05, "loss": 2.0479, "step": 1996 }, { "epoch": 6.547540983606558, "grad_norm": 10.15146255493164, "learning_rate": 1.993406692868897e-05, "loss": 2.0891, "step": 1997 }, { "epoch": 6.550819672131148, "grad_norm": 8.328570365905762, "learning_rate": 1.9933945134191346e-05, "loss": 2.2109, "step": 1998 }, { "epoch": 6.554098360655738, "grad_norm": 10.728532791137695, "learning_rate": 1.9933823227678162e-05, "loss": 2.0913, "step": 1999 }, { "epoch": 6.557377049180328, "grad_norm": 9.10097885131836, "learning_rate": 1.993370120915079e-05, "loss": 2.2104, "step": 2000 }, { "epoch": 6.560655737704918, "grad_norm": 9.744760513305664, "learning_rate": 1.993357907861061e-05, "loss": 2.1733, "step": 2001 }, { "epoch": 6.563934426229508, "grad_norm": 8.839049339294434, "learning_rate": 1.9933456836058996e-05, "loss": 2.2693, "step": 2002 }, { "epoch": 6.567213114754098, "grad_norm": 9.056255340576172, "learning_rate": 1.993333448149733e-05, "loss": 2.1069, "step": 2003 }, { "epoch": 6.5704918032786885, "grad_norm": 10.815664291381836, "learning_rate": 1.993321201492699e-05, "loss": 2.1069, "step": 2004 }, { "epoch": 6.573770491803279, "grad_norm": 9.4467191696167, "learning_rate": 1.9933089436349355e-05, "loss": 2.2393, "step": 2005 }, { "epoch": 6.577049180327869, "grad_norm": 10.666362762451172, "learning_rate": 1.993296674576581e-05, "loss": 2.043, "step": 2006 }, { "epoch": 6.580327868852459, "grad_norm": 8.47905445098877, "learning_rate": 1.9932843943177737e-05, "loss": 2.1929, "step": 2007 }, { "epoch": 6.583606557377049, "grad_norm": 8.056649208068848, "learning_rate": 1.9932721028586522e-05, "loss": 2.0752, "step": 2008 }, { "epoch": 6.586885245901639, "grad_norm": 9.66953182220459, "learning_rate": 1.9932598001993547e-05, "loss": 2.0801, "step": 2009 }, { "epoch": 6.590163934426229, "grad_norm": 12.361433982849121, "learning_rate": 1.9932474863400204e-05, "loss": 2.0164, "step": 2010 }, { "epoch": 6.593442622950819, "grad_norm": 13.151530265808105, "learning_rate": 1.9932351612807878e-05, "loss": 2.0776, "step": 2011 }, { "epoch": 6.5967213114754095, "grad_norm": 12.8294038772583, "learning_rate": 1.9932228250217963e-05, "loss": 2.1987, "step": 2012 }, { "epoch": 6.6, "grad_norm": 8.328173637390137, "learning_rate": 1.9932104775631847e-05, "loss": 2.0811, "step": 2013 }, { "epoch": 6.60327868852459, "grad_norm": 6.942007064819336, "learning_rate": 1.9931981189050923e-05, "loss": 2.0254, "step": 2014 }, { "epoch": 6.60655737704918, "grad_norm": 11.555302619934082, "learning_rate": 1.9931857490476583e-05, "loss": 2.1553, "step": 2015 }, { "epoch": 6.60983606557377, "grad_norm": 10.828457832336426, "learning_rate": 1.9931733679910224e-05, "loss": 1.9565, "step": 2016 }, { "epoch": 6.613114754098361, "grad_norm": 15.421426773071289, "learning_rate": 1.9931609757353245e-05, "loss": 2.0063, "step": 2017 }, { "epoch": 6.616393442622951, "grad_norm": 9.979962348937988, "learning_rate": 1.9931485722807034e-05, "loss": 2.0684, "step": 2018 }, { "epoch": 6.619672131147541, "grad_norm": 10.201489448547363, "learning_rate": 1.9931361576272998e-05, "loss": 1.9995, "step": 2019 }, { "epoch": 6.622950819672131, "grad_norm": 8.526914596557617, "learning_rate": 1.9931237317752532e-05, "loss": 1.9136, "step": 2020 }, { "epoch": 6.6262295081967215, "grad_norm": 8.820357322692871, "learning_rate": 1.993111294724704e-05, "loss": 2.1738, "step": 2021 }, { "epoch": 6.629508196721312, "grad_norm": 10.04029655456543, "learning_rate": 1.9930988464757925e-05, "loss": 1.9971, "step": 2022 }, { "epoch": 6.632786885245902, "grad_norm": 15.65739631652832, "learning_rate": 1.9930863870286588e-05, "loss": 1.9429, "step": 2023 }, { "epoch": 6.636065573770492, "grad_norm": 11.614222526550293, "learning_rate": 1.993073916383444e-05, "loss": 2.0195, "step": 2024 }, { "epoch": 6.639344262295082, "grad_norm": 6.979326248168945, "learning_rate": 1.993061434540288e-05, "loss": 2.062, "step": 2025 }, { "epoch": 6.642622950819672, "grad_norm": 7.6354570388793945, "learning_rate": 1.9930489414993315e-05, "loss": 2.0615, "step": 2026 }, { "epoch": 6.645901639344262, "grad_norm": 9.723102569580078, "learning_rate": 1.9930364372607157e-05, "loss": 2.167, "step": 2027 }, { "epoch": 6.649180327868852, "grad_norm": 11.116286277770996, "learning_rate": 1.993023921824582e-05, "loss": 2.0493, "step": 2028 }, { "epoch": 6.6524590163934425, "grad_norm": 6.076024532318115, "learning_rate": 1.9930113951910706e-05, "loss": 2.0879, "step": 2029 }, { "epoch": 6.655737704918033, "grad_norm": 10.655635833740234, "learning_rate": 1.992998857360324e-05, "loss": 2.0718, "step": 2030 }, { "epoch": 6.659016393442623, "grad_norm": 9.748366355895996, "learning_rate": 1.9929863083324822e-05, "loss": 2.4502, "step": 2031 }, { "epoch": 6.662295081967213, "grad_norm": 9.168255805969238, "learning_rate": 1.9929737481076873e-05, "loss": 2.3359, "step": 2032 }, { "epoch": 6.665573770491803, "grad_norm": 12.547882080078125, "learning_rate": 1.992961176686081e-05, "loss": 2.186, "step": 2033 }, { "epoch": 6.668852459016393, "grad_norm": 9.728793144226074, "learning_rate": 1.9929485940678052e-05, "loss": 2.1011, "step": 2034 }, { "epoch": 6.672131147540983, "grad_norm": 7.55112361907959, "learning_rate": 1.9929360002530016e-05, "loss": 2.1665, "step": 2035 }, { "epoch": 6.675409836065574, "grad_norm": 8.223713874816895, "learning_rate": 1.9929233952418123e-05, "loss": 1.9863, "step": 2036 }, { "epoch": 6.678688524590164, "grad_norm": 14.65629768371582, "learning_rate": 1.9929107790343788e-05, "loss": 2.0308, "step": 2037 }, { "epoch": 6.6819672131147545, "grad_norm": 8.470552444458008, "learning_rate": 1.9928981516308445e-05, "loss": 2.0059, "step": 2038 }, { "epoch": 6.685245901639345, "grad_norm": 6.029008388519287, "learning_rate": 1.9928855130313507e-05, "loss": 2.3232, "step": 2039 }, { "epoch": 6.688524590163935, "grad_norm": 8.399635314941406, "learning_rate": 1.9928728632360407e-05, "loss": 2.3696, "step": 2040 }, { "epoch": 6.691803278688525, "grad_norm": 8.500980377197266, "learning_rate": 1.9928602022450567e-05, "loss": 1.9209, "step": 2041 }, { "epoch": 6.695081967213115, "grad_norm": 8.385444641113281, "learning_rate": 1.9928475300585415e-05, "loss": 2.1362, "step": 2042 }, { "epoch": 6.698360655737705, "grad_norm": 11.541020393371582, "learning_rate": 1.9928348466766384e-05, "loss": 2.2817, "step": 2043 }, { "epoch": 6.701639344262295, "grad_norm": 10.697196006774902, "learning_rate": 1.9928221520994903e-05, "loss": 2.0225, "step": 2044 }, { "epoch": 6.704918032786885, "grad_norm": 6.810125827789307, "learning_rate": 1.9928094463272395e-05, "loss": 1.9858, "step": 2045 }, { "epoch": 6.7081967213114755, "grad_norm": 8.139636993408203, "learning_rate": 1.9927967293600304e-05, "loss": 2.0962, "step": 2046 }, { "epoch": 6.711475409836066, "grad_norm": 39.275535583496094, "learning_rate": 1.9927840011980057e-05, "loss": 2.2349, "step": 2047 }, { "epoch": 6.714754098360656, "grad_norm": 7.1435112953186035, "learning_rate": 1.992771261841309e-05, "loss": 2.1064, "step": 2048 }, { "epoch": 6.718032786885246, "grad_norm": 12.063252449035645, "learning_rate": 1.9927585112900846e-05, "loss": 2.123, "step": 2049 }, { "epoch": 6.721311475409836, "grad_norm": 6.839615821838379, "learning_rate": 1.9927457495444757e-05, "loss": 2.2363, "step": 2050 }, { "epoch": 6.724590163934426, "grad_norm": 7.326773643493652, "learning_rate": 1.992732976604626e-05, "loss": 2.2632, "step": 2051 }, { "epoch": 6.727868852459016, "grad_norm": 10.008984565734863, "learning_rate": 1.9927201924706798e-05, "loss": 2.0283, "step": 2052 }, { "epoch": 6.731147540983606, "grad_norm": 9.400469779968262, "learning_rate": 1.9927073971427813e-05, "loss": 2.0156, "step": 2053 }, { "epoch": 6.7344262295081965, "grad_norm": 9.186841011047363, "learning_rate": 1.9926945906210752e-05, "loss": 2.1089, "step": 2054 }, { "epoch": 6.737704918032787, "grad_norm": 9.247535705566406, "learning_rate": 1.992681772905705e-05, "loss": 2.2568, "step": 2055 }, { "epoch": 6.740983606557377, "grad_norm": 12.184240341186523, "learning_rate": 1.9926689439968157e-05, "loss": 2.1079, "step": 2056 }, { "epoch": 6.744262295081967, "grad_norm": 10.185497283935547, "learning_rate": 1.992656103894552e-05, "loss": 2.1411, "step": 2057 }, { "epoch": 6.747540983606557, "grad_norm": 8.747899055480957, "learning_rate": 1.9926432525990584e-05, "loss": 2.1738, "step": 2058 }, { "epoch": 6.750819672131147, "grad_norm": 10.782968521118164, "learning_rate": 1.99263039011048e-05, "loss": 1.9185, "step": 2059 }, { "epoch": 6.754098360655737, "grad_norm": 9.67247200012207, "learning_rate": 1.9926175164289623e-05, "loss": 2.1411, "step": 2060 }, { "epoch": 6.757377049180328, "grad_norm": 11.232000350952148, "learning_rate": 1.99260463155465e-05, "loss": 1.9077, "step": 2061 }, { "epoch": 6.760655737704918, "grad_norm": 9.568819046020508, "learning_rate": 1.9925917354876883e-05, "loss": 2.0601, "step": 2062 }, { "epoch": 6.7639344262295085, "grad_norm": 9.499794960021973, "learning_rate": 1.9925788282282226e-05, "loss": 2.1807, "step": 2063 }, { "epoch": 6.767213114754099, "grad_norm": 9.688780784606934, "learning_rate": 1.9925659097763986e-05, "loss": 2.3062, "step": 2064 }, { "epoch": 6.770491803278689, "grad_norm": 8.140621185302734, "learning_rate": 1.992552980132362e-05, "loss": 2.1328, "step": 2065 }, { "epoch": 6.773770491803279, "grad_norm": 8.873625755310059, "learning_rate": 1.992540039296259e-05, "loss": 2.1548, "step": 2066 }, { "epoch": 6.777049180327869, "grad_norm": 7.4055585861206055, "learning_rate": 1.9925270872682347e-05, "loss": 2.168, "step": 2067 }, { "epoch": 6.780327868852459, "grad_norm": 9.417954444885254, "learning_rate": 1.9925141240484355e-05, "loss": 2.0894, "step": 2068 }, { "epoch": 6.783606557377049, "grad_norm": 7.289278507232666, "learning_rate": 1.9925011496370075e-05, "loss": 2.062, "step": 2069 }, { "epoch": 6.786885245901639, "grad_norm": 7.087806224822998, "learning_rate": 1.992488164034097e-05, "loss": 2.1069, "step": 2070 }, { "epoch": 6.7901639344262295, "grad_norm": 8.499780654907227, "learning_rate": 1.9924751672398506e-05, "loss": 2.187, "step": 2071 }, { "epoch": 6.79344262295082, "grad_norm": 8.442893028259277, "learning_rate": 1.9924621592544154e-05, "loss": 2.0913, "step": 2072 }, { "epoch": 6.79672131147541, "grad_norm": 7.47904109954834, "learning_rate": 1.9924491400779366e-05, "loss": 2.1172, "step": 2073 }, { "epoch": 6.8, "grad_norm": 8.93885612487793, "learning_rate": 1.9924361097105624e-05, "loss": 1.9795, "step": 2074 }, { "epoch": 6.80327868852459, "grad_norm": 8.129201889038086, "learning_rate": 1.992423068152439e-05, "loss": 2.2471, "step": 2075 }, { "epoch": 6.80655737704918, "grad_norm": 12.495665550231934, "learning_rate": 1.9924100154037137e-05, "loss": 2.2222, "step": 2076 }, { "epoch": 6.80983606557377, "grad_norm": 8.255494117736816, "learning_rate": 1.9923969514645333e-05, "loss": 2.0288, "step": 2077 }, { "epoch": 6.81311475409836, "grad_norm": 12.016514778137207, "learning_rate": 1.9923838763350457e-05, "loss": 2.2646, "step": 2078 }, { "epoch": 6.81639344262295, "grad_norm": 10.845169067382812, "learning_rate": 1.9923707900153984e-05, "loss": 2.167, "step": 2079 }, { "epoch": 6.8196721311475414, "grad_norm": 13.8118257522583, "learning_rate": 1.9923576925057385e-05, "loss": 2.2236, "step": 2080 }, { "epoch": 6.822950819672132, "grad_norm": 8.450613021850586, "learning_rate": 1.9923445838062136e-05, "loss": 2.1133, "step": 2081 }, { "epoch": 6.826229508196722, "grad_norm": 8.504615783691406, "learning_rate": 1.9923314639169718e-05, "loss": 1.978, "step": 2082 }, { "epoch": 6.829508196721312, "grad_norm": 11.480603218078613, "learning_rate": 1.992318332838161e-05, "loss": 2.0278, "step": 2083 }, { "epoch": 6.832786885245902, "grad_norm": 10.174057006835938, "learning_rate": 1.9923051905699288e-05, "loss": 2.1201, "step": 2084 }, { "epoch": 6.836065573770492, "grad_norm": 6.981424808502197, "learning_rate": 1.9922920371124243e-05, "loss": 2.0776, "step": 2085 }, { "epoch": 6.839344262295082, "grad_norm": 6.5622076988220215, "learning_rate": 1.9922788724657956e-05, "loss": 2.1279, "step": 2086 }, { "epoch": 6.842622950819672, "grad_norm": 10.610015869140625, "learning_rate": 1.9922656966301903e-05, "loss": 1.8306, "step": 2087 }, { "epoch": 6.845901639344262, "grad_norm": 8.332664489746094, "learning_rate": 1.9922525096057578e-05, "loss": 2.042, "step": 2088 }, { "epoch": 6.849180327868853, "grad_norm": 6.701235771179199, "learning_rate": 1.992239311392647e-05, "loss": 2.3398, "step": 2089 }, { "epoch": 6.852459016393443, "grad_norm": 7.798120021820068, "learning_rate": 1.9922261019910056e-05, "loss": 2.1553, "step": 2090 }, { "epoch": 6.855737704918033, "grad_norm": 10.3976411819458, "learning_rate": 1.9922128814009837e-05, "loss": 2.0981, "step": 2091 }, { "epoch": 6.859016393442623, "grad_norm": 6.7393479347229, "learning_rate": 1.99219964962273e-05, "loss": 2.0571, "step": 2092 }, { "epoch": 6.862295081967213, "grad_norm": 6.426358699798584, "learning_rate": 1.9921864066563933e-05, "loss": 2.1294, "step": 2093 }, { "epoch": 6.865573770491803, "grad_norm": 7.288980007171631, "learning_rate": 1.9921731525021234e-05, "loss": 2.0337, "step": 2094 }, { "epoch": 6.868852459016393, "grad_norm": 8.970771789550781, "learning_rate": 1.9921598871600694e-05, "loss": 2.1772, "step": 2095 }, { "epoch": 6.872131147540983, "grad_norm": 8.01812744140625, "learning_rate": 1.9921466106303817e-05, "loss": 2.1099, "step": 2096 }, { "epoch": 6.8754098360655735, "grad_norm": 9.807394981384277, "learning_rate": 1.9921333229132087e-05, "loss": 2.1426, "step": 2097 }, { "epoch": 6.878688524590164, "grad_norm": 14.02174186706543, "learning_rate": 1.992120024008701e-05, "loss": 2.0488, "step": 2098 }, { "epoch": 6.881967213114754, "grad_norm": 10.658517837524414, "learning_rate": 1.992106713917009e-05, "loss": 2.063, "step": 2099 }, { "epoch": 6.885245901639344, "grad_norm": 6.006734371185303, "learning_rate": 1.992093392638282e-05, "loss": 2.2397, "step": 2100 }, { "epoch": 6.888524590163934, "grad_norm": 9.759272575378418, "learning_rate": 1.99208006017267e-05, "loss": 2.187, "step": 2101 }, { "epoch": 6.891803278688524, "grad_norm": 9.912515640258789, "learning_rate": 1.9920667165203243e-05, "loss": 2.2075, "step": 2102 }, { "epoch": 6.895081967213114, "grad_norm": 13.34266185760498, "learning_rate": 1.992053361681395e-05, "loss": 2.1396, "step": 2103 }, { "epoch": 6.898360655737705, "grad_norm": 7.302621364593506, "learning_rate": 1.9920399956560322e-05, "loss": 2.1187, "step": 2104 }, { "epoch": 6.901639344262295, "grad_norm": 11.172077178955078, "learning_rate": 1.9920266184443874e-05, "loss": 2.1699, "step": 2105 }, { "epoch": 6.9049180327868855, "grad_norm": 8.61091423034668, "learning_rate": 1.9920132300466107e-05, "loss": 2.2344, "step": 2106 }, { "epoch": 6.908196721311476, "grad_norm": 8.390424728393555, "learning_rate": 1.9919998304628535e-05, "loss": 2.0967, "step": 2107 }, { "epoch": 6.911475409836066, "grad_norm": 9.408942222595215, "learning_rate": 1.9919864196932667e-05, "loss": 2.1191, "step": 2108 }, { "epoch": 6.914754098360656, "grad_norm": 13.746171951293945, "learning_rate": 1.991972997738002e-05, "loss": 2.0791, "step": 2109 }, { "epoch": 6.918032786885246, "grad_norm": 11.086090087890625, "learning_rate": 1.9919595645972097e-05, "loss": 2.2109, "step": 2110 }, { "epoch": 6.921311475409836, "grad_norm": 7.7390522956848145, "learning_rate": 1.9919461202710422e-05, "loss": 2.1431, "step": 2111 }, { "epoch": 6.924590163934426, "grad_norm": 9.663055419921875, "learning_rate": 1.9919326647596508e-05, "loss": 1.8818, "step": 2112 }, { "epoch": 6.927868852459016, "grad_norm": 10.828108787536621, "learning_rate": 1.9919191980631875e-05, "loss": 2.2334, "step": 2113 }, { "epoch": 6.9311475409836065, "grad_norm": 10.171906471252441, "learning_rate": 1.9919057201818037e-05, "loss": 2.0305, "step": 2114 }, { "epoch": 6.934426229508197, "grad_norm": 12.384102821350098, "learning_rate": 1.9918922311156517e-05, "loss": 2.0884, "step": 2115 }, { "epoch": 6.937704918032787, "grad_norm": 11.454440116882324, "learning_rate": 1.9918787308648836e-05, "loss": 1.9883, "step": 2116 }, { "epoch": 6.940983606557377, "grad_norm": 7.322190284729004, "learning_rate": 1.9918652194296512e-05, "loss": 2.2036, "step": 2117 }, { "epoch": 6.944262295081967, "grad_norm": 7.516680717468262, "learning_rate": 1.9918516968101074e-05, "loss": 2.0156, "step": 2118 }, { "epoch": 6.947540983606557, "grad_norm": 13.019115447998047, "learning_rate": 1.9918381630064042e-05, "loss": 2.1445, "step": 2119 }, { "epoch": 6.950819672131147, "grad_norm": 16.660375595092773, "learning_rate": 1.9918246180186948e-05, "loss": 2.3145, "step": 2120 }, { "epoch": 6.954098360655737, "grad_norm": 8.60330867767334, "learning_rate": 1.9918110618471314e-05, "loss": 2.188, "step": 2121 }, { "epoch": 6.9573770491803275, "grad_norm": 6.687853813171387, "learning_rate": 1.991797494491867e-05, "loss": 2.2466, "step": 2122 }, { "epoch": 6.9606557377049185, "grad_norm": 7.7221150398254395, "learning_rate": 1.991783915953055e-05, "loss": 2.0601, "step": 2123 }, { "epoch": 6.963934426229509, "grad_norm": 7.55028772354126, "learning_rate": 1.9917703262308478e-05, "loss": 2.1069, "step": 2124 }, { "epoch": 6.967213114754099, "grad_norm": 7.130031108856201, "learning_rate": 1.9917567253253988e-05, "loss": 2.0156, "step": 2125 }, { "epoch": 6.970491803278689, "grad_norm": 9.693096160888672, "learning_rate": 1.991743113236862e-05, "loss": 2.0129, "step": 2126 }, { "epoch": 6.973770491803279, "grad_norm": 10.406140327453613, "learning_rate": 1.99172948996539e-05, "loss": 2.3726, "step": 2127 }, { "epoch": 6.977049180327869, "grad_norm": 8.774336814880371, "learning_rate": 1.9917158555111375e-05, "loss": 2.1489, "step": 2128 }, { "epoch": 6.980327868852459, "grad_norm": 8.42363166809082, "learning_rate": 1.991702209874257e-05, "loss": 1.9785, "step": 2129 }, { "epoch": 6.983606557377049, "grad_norm": 7.202642917633057, "learning_rate": 1.9916885530549032e-05, "loss": 2.0654, "step": 2130 }, { "epoch": 6.9868852459016395, "grad_norm": 8.105684280395508, "learning_rate": 1.9916748850532298e-05, "loss": 2.0327, "step": 2131 }, { "epoch": 6.99016393442623, "grad_norm": 9.789905548095703, "learning_rate": 1.991661205869391e-05, "loss": 2.1143, "step": 2132 }, { "epoch": 6.99344262295082, "grad_norm": 8.80323600769043, "learning_rate": 1.991647515503541e-05, "loss": 2.2241, "step": 2133 }, { "epoch": 6.99672131147541, "grad_norm": 8.992297172546387, "learning_rate": 1.9916338139558343e-05, "loss": 2.1172, "step": 2134 }, { "epoch": 7.0, "grad_norm": 6.700535774230957, "learning_rate": 1.9916201012264255e-05, "loss": 2.0669, "step": 2135 }, { "epoch": 7.00327868852459, "grad_norm": 8.14384937286377, "learning_rate": 1.9916063773154686e-05, "loss": 2.0825, "step": 2136 }, { "epoch": 7.00655737704918, "grad_norm": 9.97191047668457, "learning_rate": 1.991592642223119e-05, "loss": 2.1113, "step": 2137 }, { "epoch": 7.00983606557377, "grad_norm": 7.4541544914245605, "learning_rate": 1.991578895949531e-05, "loss": 2.186, "step": 2138 }, { "epoch": 7.0131147540983605, "grad_norm": 7.538042068481445, "learning_rate": 1.9915651384948606e-05, "loss": 2.0845, "step": 2139 }, { "epoch": 7.016393442622951, "grad_norm": 10.43117618560791, "learning_rate": 1.991551369859262e-05, "loss": 1.9546, "step": 2140 }, { "epoch": 7.019672131147541, "grad_norm": 8.539770126342773, "learning_rate": 1.991537590042891e-05, "loss": 2.0796, "step": 2141 }, { "epoch": 7.022950819672131, "grad_norm": 10.263898849487305, "learning_rate": 1.9915237990459024e-05, "loss": 2.1021, "step": 2142 }, { "epoch": 7.026229508196721, "grad_norm": 6.827020645141602, "learning_rate": 1.9915099968684523e-05, "loss": 2.0791, "step": 2143 }, { "epoch": 7.029508196721311, "grad_norm": 7.8725175857543945, "learning_rate": 1.991496183510696e-05, "loss": 1.9536, "step": 2144 }, { "epoch": 7.032786885245901, "grad_norm": 8.306406021118164, "learning_rate": 1.991482358972789e-05, "loss": 1.8921, "step": 2145 }, { "epoch": 7.036065573770492, "grad_norm": 5.408417224884033, "learning_rate": 1.9914685232548877e-05, "loss": 2.1135, "step": 2146 }, { "epoch": 7.039344262295082, "grad_norm": 8.607769966125488, "learning_rate": 1.991454676357148e-05, "loss": 1.9272, "step": 2147 }, { "epoch": 7.0426229508196725, "grad_norm": 7.899447917938232, "learning_rate": 1.991440818279726e-05, "loss": 1.835, "step": 2148 }, { "epoch": 7.045901639344263, "grad_norm": 9.239822387695312, "learning_rate": 1.991426949022778e-05, "loss": 2.0352, "step": 2149 }, { "epoch": 7.049180327868853, "grad_norm": 8.427528381347656, "learning_rate": 1.9914130685864602e-05, "loss": 2.084, "step": 2150 }, { "epoch": 7.052459016393443, "grad_norm": 9.602498054504395, "learning_rate": 1.991399176970929e-05, "loss": 1.9893, "step": 2151 }, { "epoch": 7.055737704918033, "grad_norm": 7.272158145904541, "learning_rate": 1.9913852741763416e-05, "loss": 2.0601, "step": 2152 }, { "epoch": 7.059016393442623, "grad_norm": 8.44846248626709, "learning_rate": 1.9913713602028546e-05, "loss": 1.9722, "step": 2153 }, { "epoch": 7.062295081967213, "grad_norm": 6.981979846954346, "learning_rate": 1.9913574350506243e-05, "loss": 2.0654, "step": 2154 }, { "epoch": 7.065573770491803, "grad_norm": 9.568303108215332, "learning_rate": 1.9913434987198087e-05, "loss": 2.0664, "step": 2155 }, { "epoch": 7.0688524590163935, "grad_norm": 11.49937915802002, "learning_rate": 1.9913295512105638e-05, "loss": 1.9946, "step": 2156 }, { "epoch": 7.072131147540984, "grad_norm": 7.119683742523193, "learning_rate": 1.991315592523048e-05, "loss": 2.0051, "step": 2157 }, { "epoch": 7.075409836065574, "grad_norm": 9.205483436584473, "learning_rate": 1.9913016226574182e-05, "loss": 2.0322, "step": 2158 }, { "epoch": 7.078688524590164, "grad_norm": 9.080696105957031, "learning_rate": 1.9912876416138317e-05, "loss": 2.0293, "step": 2159 }, { "epoch": 7.081967213114754, "grad_norm": 7.644835948944092, "learning_rate": 1.9912736493924463e-05, "loss": 2.0, "step": 2160 }, { "epoch": 7.085245901639344, "grad_norm": 9.25672721862793, "learning_rate": 1.9912596459934197e-05, "loss": 2.0532, "step": 2161 }, { "epoch": 7.088524590163934, "grad_norm": 9.758240699768066, "learning_rate": 1.9912456314169103e-05, "loss": 2.0938, "step": 2162 }, { "epoch": 7.091803278688524, "grad_norm": 7.018928050994873, "learning_rate": 1.9912316056630756e-05, "loss": 2.0332, "step": 2163 }, { "epoch": 7.0950819672131145, "grad_norm": 8.876880645751953, "learning_rate": 1.991217568732074e-05, "loss": 2.0344, "step": 2164 }, { "epoch": 7.098360655737705, "grad_norm": 7.901134490966797, "learning_rate": 1.9912035206240638e-05, "loss": 1.9229, "step": 2165 }, { "epoch": 7.101639344262295, "grad_norm": 10.582725524902344, "learning_rate": 1.991189461339203e-05, "loss": 2.0596, "step": 2166 }, { "epoch": 7.104918032786885, "grad_norm": 7.81044864654541, "learning_rate": 1.9911753908776505e-05, "loss": 2.0571, "step": 2167 }, { "epoch": 7.108196721311476, "grad_norm": 9.056600570678711, "learning_rate": 1.991161309239565e-05, "loss": 1.833, "step": 2168 }, { "epoch": 7.111475409836066, "grad_norm": 9.05362606048584, "learning_rate": 1.9911472164251053e-05, "loss": 2.1924, "step": 2169 }, { "epoch": 7.114754098360656, "grad_norm": 7.1621413230896, "learning_rate": 1.99113311243443e-05, "loss": 2.1572, "step": 2170 }, { "epoch": 7.118032786885246, "grad_norm": 14.145328521728516, "learning_rate": 1.9911189972676987e-05, "loss": 1.9443, "step": 2171 }, { "epoch": 7.121311475409836, "grad_norm": 9.060120582580566, "learning_rate": 1.9911048709250696e-05, "loss": 2.1016, "step": 2172 }, { "epoch": 7.1245901639344265, "grad_norm": 9.194727897644043, "learning_rate": 1.9910907334067028e-05, "loss": 2.0215, "step": 2173 }, { "epoch": 7.127868852459017, "grad_norm": 8.853386878967285, "learning_rate": 1.9910765847127578e-05, "loss": 1.9082, "step": 2174 }, { "epoch": 7.131147540983607, "grad_norm": 8.130133628845215, "learning_rate": 1.9910624248433938e-05, "loss": 1.9819, "step": 2175 }, { "epoch": 7.134426229508197, "grad_norm": 11.325582504272461, "learning_rate": 1.9910482537987704e-05, "loss": 1.9548, "step": 2176 }, { "epoch": 7.137704918032787, "grad_norm": 10.839491844177246, "learning_rate": 1.991034071579047e-05, "loss": 2.0747, "step": 2177 }, { "epoch": 7.140983606557377, "grad_norm": 7.6594157218933105, "learning_rate": 1.9910198781843847e-05, "loss": 2.1089, "step": 2178 }, { "epoch": 7.144262295081967, "grad_norm": 10.156678199768066, "learning_rate": 1.9910056736149427e-05, "loss": 2.1196, "step": 2179 }, { "epoch": 7.147540983606557, "grad_norm": 8.08143138885498, "learning_rate": 1.9909914578708816e-05, "loss": 2.0049, "step": 2180 }, { "epoch": 7.150819672131147, "grad_norm": 8.448464393615723, "learning_rate": 1.990977230952361e-05, "loss": 2.0059, "step": 2181 }, { "epoch": 7.154098360655738, "grad_norm": 7.835197448730469, "learning_rate": 1.990962992859542e-05, "loss": 2.1233, "step": 2182 }, { "epoch": 7.157377049180328, "grad_norm": 8.664180755615234, "learning_rate": 1.990948743592585e-05, "loss": 1.9404, "step": 2183 }, { "epoch": 7.160655737704918, "grad_norm": 10.204388618469238, "learning_rate": 1.9909344831516503e-05, "loss": 2.0391, "step": 2184 }, { "epoch": 7.163934426229508, "grad_norm": 8.731762886047363, "learning_rate": 1.9909202115368992e-05, "loss": 2.0312, "step": 2185 }, { "epoch": 7.167213114754098, "grad_norm": 8.774275779724121, "learning_rate": 1.990905928748492e-05, "loss": 2.0342, "step": 2186 }, { "epoch": 7.170491803278688, "grad_norm": 7.799920558929443, "learning_rate": 1.9908916347865907e-05, "loss": 2.0698, "step": 2187 }, { "epoch": 7.173770491803278, "grad_norm": 10.068140029907227, "learning_rate": 1.9908773296513557e-05, "loss": 1.8716, "step": 2188 }, { "epoch": 7.177049180327868, "grad_norm": 6.637802600860596, "learning_rate": 1.9908630133429488e-05, "loss": 2.1023, "step": 2189 }, { "epoch": 7.180327868852459, "grad_norm": 9.914735794067383, "learning_rate": 1.990848685861531e-05, "loss": 2.0046, "step": 2190 }, { "epoch": 7.18360655737705, "grad_norm": 9.205065727233887, "learning_rate": 1.990834347207264e-05, "loss": 1.9707, "step": 2191 }, { "epoch": 7.18688524590164, "grad_norm": 9.01873779296875, "learning_rate": 1.9908199973803094e-05, "loss": 2.1152, "step": 2192 }, { "epoch": 7.19016393442623, "grad_norm": 18.999286651611328, "learning_rate": 1.9908056363808294e-05, "loss": 1.8511, "step": 2193 }, { "epoch": 7.19344262295082, "grad_norm": 9.369831085205078, "learning_rate": 1.9907912642089855e-05, "loss": 2.0854, "step": 2194 }, { "epoch": 7.19672131147541, "grad_norm": 8.313058853149414, "learning_rate": 1.99077688086494e-05, "loss": 2.0273, "step": 2195 }, { "epoch": 7.2, "grad_norm": 11.988727569580078, "learning_rate": 1.990762486348855e-05, "loss": 2.1152, "step": 2196 }, { "epoch": 7.20327868852459, "grad_norm": 8.235483169555664, "learning_rate": 1.9907480806608927e-05, "loss": 2.1196, "step": 2197 }, { "epoch": 7.20655737704918, "grad_norm": 7.183086395263672, "learning_rate": 1.9907336638012162e-05, "loss": 2.0, "step": 2198 }, { "epoch": 7.2098360655737705, "grad_norm": 9.286660194396973, "learning_rate": 1.990719235769987e-05, "loss": 2.1475, "step": 2199 }, { "epoch": 7.213114754098361, "grad_norm": 10.76718807220459, "learning_rate": 1.9907047965673684e-05, "loss": 2.0801, "step": 2200 }, { "epoch": 7.216393442622951, "grad_norm": 9.71985149383545, "learning_rate": 1.990690346193523e-05, "loss": 1.9326, "step": 2201 }, { "epoch": 7.219672131147541, "grad_norm": 7.498706817626953, "learning_rate": 1.9906758846486146e-05, "loss": 2.0166, "step": 2202 }, { "epoch": 7.222950819672131, "grad_norm": 11.801671028137207, "learning_rate": 1.990661411932805e-05, "loss": 1.9443, "step": 2203 }, { "epoch": 7.226229508196721, "grad_norm": 8.56965160369873, "learning_rate": 1.990646928046258e-05, "loss": 2.1045, "step": 2204 }, { "epoch": 7.229508196721311, "grad_norm": 6.586101531982422, "learning_rate": 1.9906324329891366e-05, "loss": 2.1079, "step": 2205 }, { "epoch": 7.232786885245901, "grad_norm": 10.880240440368652, "learning_rate": 1.9906179267616047e-05, "loss": 1.9688, "step": 2206 }, { "epoch": 7.2360655737704915, "grad_norm": 10.820351600646973, "learning_rate": 1.990603409363826e-05, "loss": 2.1187, "step": 2207 }, { "epoch": 7.239344262295082, "grad_norm": 7.806088447570801, "learning_rate": 1.990588880795964e-05, "loss": 2.0396, "step": 2208 }, { "epoch": 7.242622950819672, "grad_norm": 7.870795249938965, "learning_rate": 1.990574341058182e-05, "loss": 2.2017, "step": 2209 }, { "epoch": 7.245901639344262, "grad_norm": 9.075604438781738, "learning_rate": 1.9905597901506442e-05, "loss": 2.1133, "step": 2210 }, { "epoch": 7.249180327868853, "grad_norm": 9.030492782592773, "learning_rate": 1.9905452280735155e-05, "loss": 2.1035, "step": 2211 }, { "epoch": 7.252459016393443, "grad_norm": 8.530692100524902, "learning_rate": 1.9905306548269587e-05, "loss": 2.0381, "step": 2212 }, { "epoch": 7.255737704918033, "grad_norm": 7.107717514038086, "learning_rate": 1.9905160704111392e-05, "loss": 2.0742, "step": 2213 }, { "epoch": 7.259016393442623, "grad_norm": 11.362473487854004, "learning_rate": 1.9905014748262212e-05, "loss": 2.1973, "step": 2214 }, { "epoch": 7.262295081967213, "grad_norm": 9.760663986206055, "learning_rate": 1.9904868680723692e-05, "loss": 2.1465, "step": 2215 }, { "epoch": 7.2655737704918035, "grad_norm": 9.318731307983398, "learning_rate": 1.9904722501497477e-05, "loss": 2.2695, "step": 2216 }, { "epoch": 7.268852459016394, "grad_norm": 15.796124458312988, "learning_rate": 1.9904576210585222e-05, "loss": 2.1807, "step": 2217 }, { "epoch": 7.272131147540984, "grad_norm": 8.76150894165039, "learning_rate": 1.990442980798857e-05, "loss": 2.1025, "step": 2218 }, { "epoch": 7.275409836065574, "grad_norm": 8.40908145904541, "learning_rate": 1.990428329370917e-05, "loss": 1.8994, "step": 2219 }, { "epoch": 7.278688524590164, "grad_norm": 11.212888717651367, "learning_rate": 1.9904136667748683e-05, "loss": 1.9414, "step": 2220 }, { "epoch": 7.281967213114754, "grad_norm": 10.22061824798584, "learning_rate": 1.9903989930108757e-05, "loss": 1.8906, "step": 2221 }, { "epoch": 7.285245901639344, "grad_norm": 9.810503005981445, "learning_rate": 1.9903843080791044e-05, "loss": 2.2749, "step": 2222 }, { "epoch": 7.288524590163934, "grad_norm": 7.862120151519775, "learning_rate": 1.9903696119797204e-05, "loss": 2.1401, "step": 2223 }, { "epoch": 7.2918032786885245, "grad_norm": 11.422638893127441, "learning_rate": 1.9903549047128894e-05, "loss": 2.0903, "step": 2224 }, { "epoch": 7.295081967213115, "grad_norm": 9.236800193786621, "learning_rate": 1.9903401862787773e-05, "loss": 2.0601, "step": 2225 }, { "epoch": 7.298360655737705, "grad_norm": 10.583460807800293, "learning_rate": 1.9903254566775495e-05, "loss": 2.106, "step": 2226 }, { "epoch": 7.301639344262295, "grad_norm": 6.897363662719727, "learning_rate": 1.9903107159093728e-05, "loss": 2.0508, "step": 2227 }, { "epoch": 7.304918032786885, "grad_norm": 33.33744430541992, "learning_rate": 1.9902959639744127e-05, "loss": 2.0947, "step": 2228 }, { "epoch": 7.308196721311475, "grad_norm": 10.787033081054688, "learning_rate": 1.9902812008728364e-05, "loss": 1.9893, "step": 2229 }, { "epoch": 7.311475409836065, "grad_norm": 10.674025535583496, "learning_rate": 1.99026642660481e-05, "loss": 2.1304, "step": 2230 }, { "epoch": 7.314754098360655, "grad_norm": 11.389634132385254, "learning_rate": 1.9902516411704994e-05, "loss": 2.0376, "step": 2231 }, { "epoch": 7.3180327868852455, "grad_norm": 35.09420394897461, "learning_rate": 1.9902368445700727e-05, "loss": 2.1499, "step": 2232 }, { "epoch": 7.321311475409836, "grad_norm": 11.384184837341309, "learning_rate": 1.9902220368036956e-05, "loss": 2.2012, "step": 2233 }, { "epoch": 7.324590163934427, "grad_norm": 7.655187606811523, "learning_rate": 1.9902072178715353e-05, "loss": 2.1138, "step": 2234 }, { "epoch": 7.327868852459017, "grad_norm": 10.812405586242676, "learning_rate": 1.9901923877737593e-05, "loss": 2.085, "step": 2235 }, { "epoch": 7.331147540983607, "grad_norm": 11.405535697937012, "learning_rate": 1.9901775465105346e-05, "loss": 2.0547, "step": 2236 }, { "epoch": 7.334426229508197, "grad_norm": 10.784693717956543, "learning_rate": 1.990162694082028e-05, "loss": 2.1724, "step": 2237 }, { "epoch": 7.337704918032787, "grad_norm": 10.273119926452637, "learning_rate": 1.9901478304884084e-05, "loss": 1.896, "step": 2238 }, { "epoch": 7.340983606557377, "grad_norm": 13.223830223083496, "learning_rate": 1.990132955729842e-05, "loss": 1.9165, "step": 2239 }, { "epoch": 7.344262295081967, "grad_norm": 9.507148742675781, "learning_rate": 1.9901180698064972e-05, "loss": 2.0493, "step": 2240 }, { "epoch": 7.3475409836065575, "grad_norm": 9.298182487487793, "learning_rate": 1.9901031727185415e-05, "loss": 2.0469, "step": 2241 }, { "epoch": 7.350819672131148, "grad_norm": 9.381261825561523, "learning_rate": 1.9900882644661433e-05, "loss": 1.9023, "step": 2242 }, { "epoch": 7.354098360655738, "grad_norm": 8.476492881774902, "learning_rate": 1.99007334504947e-05, "loss": 2.1477, "step": 2243 }, { "epoch": 7.357377049180328, "grad_norm": 8.93335247039795, "learning_rate": 1.990058414468691e-05, "loss": 2.1338, "step": 2244 }, { "epoch": 7.360655737704918, "grad_norm": 15.41599178314209, "learning_rate": 1.990043472723974e-05, "loss": 2.0967, "step": 2245 }, { "epoch": 7.363934426229508, "grad_norm": 9.338556289672852, "learning_rate": 1.990028519815487e-05, "loss": 2.1465, "step": 2246 }, { "epoch": 7.367213114754098, "grad_norm": 11.743231773376465, "learning_rate": 1.9900135557433994e-05, "loss": 1.9351, "step": 2247 }, { "epoch": 7.370491803278688, "grad_norm": 11.813755989074707, "learning_rate": 1.989998580507879e-05, "loss": 2.0117, "step": 2248 }, { "epoch": 7.3737704918032785, "grad_norm": 8.614374160766602, "learning_rate": 1.989983594109096e-05, "loss": 2.0728, "step": 2249 }, { "epoch": 7.377049180327869, "grad_norm": 6.096471309661865, "learning_rate": 1.9899685965472183e-05, "loss": 2.2197, "step": 2250 }, { "epoch": 7.380327868852459, "grad_norm": 10.615204811096191, "learning_rate": 1.9899535878224153e-05, "loss": 1.9893, "step": 2251 }, { "epoch": 7.383606557377049, "grad_norm": 8.146445274353027, "learning_rate": 1.9899385679348562e-05, "loss": 1.9961, "step": 2252 }, { "epoch": 7.386885245901639, "grad_norm": 8.326016426086426, "learning_rate": 1.9899235368847107e-05, "loss": 1.9932, "step": 2253 }, { "epoch": 7.390163934426229, "grad_norm": 8.494912147521973, "learning_rate": 1.989908494672148e-05, "loss": 2.0254, "step": 2254 }, { "epoch": 7.39344262295082, "grad_norm": 15.39877700805664, "learning_rate": 1.989893441297338e-05, "loss": 2.0601, "step": 2255 }, { "epoch": 7.39672131147541, "grad_norm": 9.550536155700684, "learning_rate": 1.9898783767604503e-05, "loss": 1.9409, "step": 2256 }, { "epoch": 7.4, "grad_norm": 9.613469123840332, "learning_rate": 1.989863301061654e-05, "loss": 2.2104, "step": 2257 }, { "epoch": 7.4032786885245905, "grad_norm": 9.370243072509766, "learning_rate": 1.9898482142011203e-05, "loss": 1.8945, "step": 2258 }, { "epoch": 7.406557377049181, "grad_norm": 8.169482231140137, "learning_rate": 1.9898331161790188e-05, "loss": 2.0562, "step": 2259 }, { "epoch": 7.409836065573771, "grad_norm": 10.492158889770508, "learning_rate": 1.9898180069955195e-05, "loss": 1.9429, "step": 2260 }, { "epoch": 7.413114754098361, "grad_norm": 7.802425861358643, "learning_rate": 1.9898028866507934e-05, "loss": 2.1196, "step": 2261 }, { "epoch": 7.416393442622951, "grad_norm": 9.129581451416016, "learning_rate": 1.9897877551450102e-05, "loss": 2.2295, "step": 2262 }, { "epoch": 7.419672131147541, "grad_norm": 11.888111114501953, "learning_rate": 1.9897726124783412e-05, "loss": 1.9653, "step": 2263 }, { "epoch": 7.422950819672131, "grad_norm": 7.87205171585083, "learning_rate": 1.989757458650957e-05, "loss": 2.1567, "step": 2264 }, { "epoch": 7.426229508196721, "grad_norm": 9.036527633666992, "learning_rate": 1.989742293663028e-05, "loss": 1.9507, "step": 2265 }, { "epoch": 7.4295081967213115, "grad_norm": 6.640478610992432, "learning_rate": 1.9897271175147258e-05, "loss": 2.0571, "step": 2266 }, { "epoch": 7.432786885245902, "grad_norm": 17.91398811340332, "learning_rate": 1.989711930206221e-05, "loss": 1.7959, "step": 2267 }, { "epoch": 7.436065573770492, "grad_norm": 10.049760818481445, "learning_rate": 1.9896967317376858e-05, "loss": 2.1279, "step": 2268 }, { "epoch": 7.439344262295082, "grad_norm": 11.196359634399414, "learning_rate": 1.9896815221092902e-05, "loss": 2.0928, "step": 2269 }, { "epoch": 7.442622950819672, "grad_norm": 7.841104984283447, "learning_rate": 1.9896663013212065e-05, "loss": 2.0508, "step": 2270 }, { "epoch": 7.445901639344262, "grad_norm": 8.686910629272461, "learning_rate": 1.9896510693736066e-05, "loss": 2.0312, "step": 2271 }, { "epoch": 7.449180327868852, "grad_norm": 10.193824768066406, "learning_rate": 1.9896358262666618e-05, "loss": 2.1021, "step": 2272 }, { "epoch": 7.452459016393442, "grad_norm": 9.160529136657715, "learning_rate": 1.989620572000544e-05, "loss": 2.0298, "step": 2273 }, { "epoch": 7.4557377049180324, "grad_norm": 9.88660717010498, "learning_rate": 1.9896053065754255e-05, "loss": 2.0474, "step": 2274 }, { "epoch": 7.459016393442623, "grad_norm": 9.284172058105469, "learning_rate": 1.989590029991478e-05, "loss": 2.0015, "step": 2275 }, { "epoch": 7.462295081967213, "grad_norm": 10.983124732971191, "learning_rate": 1.9895747422488743e-05, "loss": 1.98, "step": 2276 }, { "epoch": 7.465573770491803, "grad_norm": 7.208837985992432, "learning_rate": 1.9895594433477862e-05, "loss": 2.0283, "step": 2277 }, { "epoch": 7.468852459016394, "grad_norm": 6.9756083488464355, "learning_rate": 1.989544133288387e-05, "loss": 2.0747, "step": 2278 }, { "epoch": 7.472131147540984, "grad_norm": 12.078001022338867, "learning_rate": 1.989528812070848e-05, "loss": 2.1831, "step": 2279 }, { "epoch": 7.475409836065574, "grad_norm": 11.636483192443848, "learning_rate": 1.9895134796953434e-05, "loss": 1.9062, "step": 2280 }, { "epoch": 7.478688524590164, "grad_norm": 9.353297233581543, "learning_rate": 1.9894981361620452e-05, "loss": 1.8252, "step": 2281 }, { "epoch": 7.481967213114754, "grad_norm": 7.179344177246094, "learning_rate": 1.989482781471127e-05, "loss": 2.1489, "step": 2282 }, { "epoch": 7.4852459016393444, "grad_norm": 7.663068771362305, "learning_rate": 1.989467415622761e-05, "loss": 1.9131, "step": 2283 }, { "epoch": 7.488524590163935, "grad_norm": 7.676731586456299, "learning_rate": 1.9894520386171217e-05, "loss": 2.0747, "step": 2284 }, { "epoch": 7.491803278688525, "grad_norm": 9.867507934570312, "learning_rate": 1.989436650454382e-05, "loss": 1.9873, "step": 2285 }, { "epoch": 7.495081967213115, "grad_norm": 13.696599006652832, "learning_rate": 1.989421251134715e-05, "loss": 2.0532, "step": 2286 }, { "epoch": 7.498360655737705, "grad_norm": 7.671548843383789, "learning_rate": 1.9894058406582945e-05, "loss": 1.9961, "step": 2287 }, { "epoch": 7.501639344262295, "grad_norm": 31.605173110961914, "learning_rate": 1.9893904190252945e-05, "loss": 1.8665, "step": 2288 }, { "epoch": 7.504918032786885, "grad_norm": 8.236170768737793, "learning_rate": 1.989374986235889e-05, "loss": 2.2041, "step": 2289 }, { "epoch": 7.508196721311475, "grad_norm": 12.109850883483887, "learning_rate": 1.989359542290252e-05, "loss": 2.0815, "step": 2290 }, { "epoch": 7.511475409836065, "grad_norm": 10.398293495178223, "learning_rate": 1.989344087188557e-05, "loss": 1.9907, "step": 2291 }, { "epoch": 7.5147540983606556, "grad_norm": 8.61717414855957, "learning_rate": 1.9893286209309793e-05, "loss": 2.0449, "step": 2292 }, { "epoch": 7.518032786885246, "grad_norm": 11.732513427734375, "learning_rate": 1.989313143517692e-05, "loss": 2.0898, "step": 2293 }, { "epoch": 7.521311475409836, "grad_norm": 9.171683311462402, "learning_rate": 1.989297654948871e-05, "loss": 1.9302, "step": 2294 }, { "epoch": 7.524590163934426, "grad_norm": 7.868172645568848, "learning_rate": 1.9892821552246902e-05, "loss": 1.9292, "step": 2295 }, { "epoch": 7.527868852459016, "grad_norm": 8.638437271118164, "learning_rate": 1.9892666443453244e-05, "loss": 2.0625, "step": 2296 }, { "epoch": 7.531147540983606, "grad_norm": 8.85921573638916, "learning_rate": 1.989251122310949e-05, "loss": 2.1401, "step": 2297 }, { "epoch": 7.534426229508197, "grad_norm": 7.682435989379883, "learning_rate": 1.989235589121738e-05, "loss": 2.0767, "step": 2298 }, { "epoch": 7.537704918032787, "grad_norm": 10.143163681030273, "learning_rate": 1.9892200447778674e-05, "loss": 2.2061, "step": 2299 }, { "epoch": 7.540983606557377, "grad_norm": 12.164468765258789, "learning_rate": 1.9892044892795124e-05, "loss": 1.9355, "step": 2300 }, { "epoch": 7.5442622950819676, "grad_norm": 7.943674564361572, "learning_rate": 1.9891889226268482e-05, "loss": 1.9099, "step": 2301 }, { "epoch": 7.547540983606558, "grad_norm": 7.771194934844971, "learning_rate": 1.9891733448200506e-05, "loss": 2.1567, "step": 2302 }, { "epoch": 7.550819672131148, "grad_norm": 10.384893417358398, "learning_rate": 1.9891577558592948e-05, "loss": 2.0654, "step": 2303 }, { "epoch": 7.554098360655738, "grad_norm": 9.036417007446289, "learning_rate": 1.989142155744757e-05, "loss": 1.9683, "step": 2304 }, { "epoch": 7.557377049180328, "grad_norm": 10.798004150390625, "learning_rate": 1.989126544476613e-05, "loss": 2.0039, "step": 2305 }, { "epoch": 7.560655737704918, "grad_norm": 9.42746353149414, "learning_rate": 1.9891109220550383e-05, "loss": 2.0542, "step": 2306 }, { "epoch": 7.563934426229508, "grad_norm": 10.469093322753906, "learning_rate": 1.98909528848021e-05, "loss": 2.0132, "step": 2307 }, { "epoch": 7.567213114754098, "grad_norm": 10.020151138305664, "learning_rate": 1.989079643752304e-05, "loss": 2.0078, "step": 2308 }, { "epoch": 7.5704918032786885, "grad_norm": 9.045001029968262, "learning_rate": 1.9890639878714963e-05, "loss": 2.0513, "step": 2309 }, { "epoch": 7.573770491803279, "grad_norm": 11.236289024353027, "learning_rate": 1.9890483208379638e-05, "loss": 2.1709, "step": 2310 }, { "epoch": 7.577049180327869, "grad_norm": 14.851513862609863, "learning_rate": 1.9890326426518832e-05, "loss": 1.9604, "step": 2311 }, { "epoch": 7.580327868852459, "grad_norm": 13.454671859741211, "learning_rate": 1.9890169533134314e-05, "loss": 2.0059, "step": 2312 }, { "epoch": 7.583606557377049, "grad_norm": 10.151684761047363, "learning_rate": 1.9890012528227853e-05, "loss": 2.0649, "step": 2313 }, { "epoch": 7.586885245901639, "grad_norm": 9.791379928588867, "learning_rate": 1.988985541180121e-05, "loss": 2.0405, "step": 2314 }, { "epoch": 7.590163934426229, "grad_norm": 8.636167526245117, "learning_rate": 1.988969818385617e-05, "loss": 2.0054, "step": 2315 }, { "epoch": 7.593442622950819, "grad_norm": 14.524273872375488, "learning_rate": 1.98895408443945e-05, "loss": 2.0229, "step": 2316 }, { "epoch": 7.5967213114754095, "grad_norm": 10.09352970123291, "learning_rate": 1.988938339341797e-05, "loss": 1.9277, "step": 2317 }, { "epoch": 7.6, "grad_norm": 8.505220413208008, "learning_rate": 1.9889225830928365e-05, "loss": 2.1782, "step": 2318 }, { "epoch": 7.60327868852459, "grad_norm": 9.050237655639648, "learning_rate": 1.9889068156927454e-05, "loss": 1.8945, "step": 2319 }, { "epoch": 7.60655737704918, "grad_norm": 13.10051441192627, "learning_rate": 1.988891037141702e-05, "loss": 2.2583, "step": 2320 }, { "epoch": 7.60983606557377, "grad_norm": 9.33932876586914, "learning_rate": 1.988875247439884e-05, "loss": 1.9951, "step": 2321 }, { "epoch": 7.613114754098361, "grad_norm": 15.48393726348877, "learning_rate": 1.9888594465874692e-05, "loss": 2.0654, "step": 2322 }, { "epoch": 7.616393442622951, "grad_norm": 17.25335693359375, "learning_rate": 1.9888436345846357e-05, "loss": 2.1304, "step": 2323 }, { "epoch": 7.619672131147541, "grad_norm": 10.868805885314941, "learning_rate": 1.9888278114315628e-05, "loss": 2.0913, "step": 2324 }, { "epoch": 7.622950819672131, "grad_norm": 8.478281021118164, "learning_rate": 1.9888119771284277e-05, "loss": 2.0327, "step": 2325 }, { "epoch": 7.6262295081967215, "grad_norm": 8.825060844421387, "learning_rate": 1.9887961316754093e-05, "loss": 2.0376, "step": 2326 }, { "epoch": 7.629508196721312, "grad_norm": 27.0634765625, "learning_rate": 1.9887802750726868e-05, "loss": 2.0669, "step": 2327 }, { "epoch": 7.632786885245902, "grad_norm": 10.227781295776367, "learning_rate": 1.9887644073204385e-05, "loss": 1.8779, "step": 2328 }, { "epoch": 7.636065573770492, "grad_norm": 9.361814498901367, "learning_rate": 1.9887485284188432e-05, "loss": 2.0796, "step": 2329 }, { "epoch": 7.639344262295082, "grad_norm": 9.89785099029541, "learning_rate": 1.9887326383680805e-05, "loss": 2.0654, "step": 2330 }, { "epoch": 7.642622950819672, "grad_norm": 14.673310279846191, "learning_rate": 1.9887167371683293e-05, "loss": 1.8262, "step": 2331 }, { "epoch": 7.645901639344262, "grad_norm": 7.923158645629883, "learning_rate": 1.988700824819769e-05, "loss": 1.8989, "step": 2332 }, { "epoch": 7.649180327868852, "grad_norm": 11.670390129089355, "learning_rate": 1.9886849013225787e-05, "loss": 2.1475, "step": 2333 }, { "epoch": 7.6524590163934425, "grad_norm": 10.45964241027832, "learning_rate": 1.988668966676938e-05, "loss": 2.0234, "step": 2334 }, { "epoch": 7.655737704918033, "grad_norm": 8.66825008392334, "learning_rate": 1.988653020883027e-05, "loss": 2.2031, "step": 2335 }, { "epoch": 7.659016393442623, "grad_norm": 7.79220724105835, "learning_rate": 1.9886370639410252e-05, "loss": 2.1367, "step": 2336 }, { "epoch": 7.662295081967213, "grad_norm": 10.988043785095215, "learning_rate": 1.9886210958511126e-05, "loss": 1.9146, "step": 2337 }, { "epoch": 7.665573770491803, "grad_norm": 8.21385669708252, "learning_rate": 1.988605116613469e-05, "loss": 1.9937, "step": 2338 }, { "epoch": 7.668852459016393, "grad_norm": 7.907109260559082, "learning_rate": 1.988589126228275e-05, "loss": 2.0728, "step": 2339 }, { "epoch": 7.672131147540983, "grad_norm": 9.665154457092285, "learning_rate": 1.9885731246957108e-05, "loss": 2.0986, "step": 2340 }, { "epoch": 7.675409836065574, "grad_norm": 7.639835357666016, "learning_rate": 1.9885571120159568e-05, "loss": 2.0869, "step": 2341 }, { "epoch": 7.678688524590164, "grad_norm": 12.800773620605469, "learning_rate": 1.9885410881891933e-05, "loss": 2.1245, "step": 2342 }, { "epoch": 7.6819672131147545, "grad_norm": 9.090739250183105, "learning_rate": 1.9885250532156012e-05, "loss": 2.1211, "step": 2343 }, { "epoch": 7.685245901639345, "grad_norm": 13.500652313232422, "learning_rate": 1.9885090070953615e-05, "loss": 2.0361, "step": 2344 }, { "epoch": 7.688524590163935, "grad_norm": 11.357068061828613, "learning_rate": 1.9884929498286548e-05, "loss": 1.7959, "step": 2345 }, { "epoch": 7.691803278688525, "grad_norm": 8.234344482421875, "learning_rate": 1.9884768814156626e-05, "loss": 2.0083, "step": 2346 }, { "epoch": 7.695081967213115, "grad_norm": 10.727822303771973, "learning_rate": 1.9884608018565656e-05, "loss": 2.2183, "step": 2347 }, { "epoch": 7.698360655737705, "grad_norm": 6.7083821296691895, "learning_rate": 1.9884447111515453e-05, "loss": 2.0466, "step": 2348 }, { "epoch": 7.701639344262295, "grad_norm": 7.522562503814697, "learning_rate": 1.9884286093007833e-05, "loss": 1.9238, "step": 2349 }, { "epoch": 7.704918032786885, "grad_norm": 8.09029769897461, "learning_rate": 1.9884124963044606e-05, "loss": 2.0308, "step": 2350 }, { "epoch": 7.7081967213114755, "grad_norm": 17.23654556274414, "learning_rate": 1.98839637216276e-05, "loss": 2.0747, "step": 2351 }, { "epoch": 7.711475409836066, "grad_norm": 7.65261173248291, "learning_rate": 1.988380236875862e-05, "loss": 1.8647, "step": 2352 }, { "epoch": 7.714754098360656, "grad_norm": 12.851713180541992, "learning_rate": 1.988364090443949e-05, "loss": 1.9917, "step": 2353 }, { "epoch": 7.718032786885246, "grad_norm": 13.322216987609863, "learning_rate": 1.988347932867204e-05, "loss": 2.0405, "step": 2354 }, { "epoch": 7.721311475409836, "grad_norm": 8.594393730163574, "learning_rate": 1.988331764145808e-05, "loss": 2.1455, "step": 2355 }, { "epoch": 7.724590163934426, "grad_norm": 9.544543266296387, "learning_rate": 1.988315584279944e-05, "loss": 2.0132, "step": 2356 }, { "epoch": 7.727868852459016, "grad_norm": 9.011089324951172, "learning_rate": 1.988299393269794e-05, "loss": 2.1499, "step": 2357 }, { "epoch": 7.731147540983606, "grad_norm": 8.064428329467773, "learning_rate": 1.988283191115541e-05, "loss": 2.0923, "step": 2358 }, { "epoch": 7.7344262295081965, "grad_norm": 7.8765058517456055, "learning_rate": 1.9882669778173672e-05, "loss": 2.0786, "step": 2359 }, { "epoch": 7.737704918032787, "grad_norm": 8.80392074584961, "learning_rate": 1.9882507533754553e-05, "loss": 1.9404, "step": 2360 }, { "epoch": 7.740983606557377, "grad_norm": 10.333259582519531, "learning_rate": 1.9882345177899895e-05, "loss": 2.0303, "step": 2361 }, { "epoch": 7.744262295081967, "grad_norm": 10.647377014160156, "learning_rate": 1.9882182710611513e-05, "loss": 2.2104, "step": 2362 }, { "epoch": 7.747540983606557, "grad_norm": 15.693790435791016, "learning_rate": 1.9882020131891248e-05, "loss": 2.1187, "step": 2363 }, { "epoch": 7.750819672131147, "grad_norm": 7.485401153564453, "learning_rate": 1.9881857441740932e-05, "loss": 2.3032, "step": 2364 }, { "epoch": 7.754098360655737, "grad_norm": 9.08000373840332, "learning_rate": 1.9881694640162402e-05, "loss": 1.7817, "step": 2365 }, { "epoch": 7.757377049180328, "grad_norm": 9.100510597229004, "learning_rate": 1.9881531727157484e-05, "loss": 2.002, "step": 2366 }, { "epoch": 7.760655737704918, "grad_norm": 11.680412292480469, "learning_rate": 1.988136870272803e-05, "loss": 2.0398, "step": 2367 }, { "epoch": 7.7639344262295085, "grad_norm": 9.539140701293945, "learning_rate": 1.9881205566875864e-05, "loss": 2.001, "step": 2368 }, { "epoch": 7.767213114754099, "grad_norm": 7.397787570953369, "learning_rate": 1.988104231960283e-05, "loss": 1.873, "step": 2369 }, { "epoch": 7.770491803278689, "grad_norm": 8.176032066345215, "learning_rate": 1.9880878960910772e-05, "loss": 2.1084, "step": 2370 }, { "epoch": 7.773770491803279, "grad_norm": 7.6719512939453125, "learning_rate": 1.988071549080153e-05, "loss": 2.0537, "step": 2371 }, { "epoch": 7.777049180327869, "grad_norm": 11.091221809387207, "learning_rate": 1.988055190927695e-05, "loss": 2.0713, "step": 2372 }, { "epoch": 7.780327868852459, "grad_norm": 17.21943473815918, "learning_rate": 1.9880388216338873e-05, "loss": 2.0908, "step": 2373 }, { "epoch": 7.783606557377049, "grad_norm": 7.519406795501709, "learning_rate": 1.9880224411989143e-05, "loss": 2.2144, "step": 2374 }, { "epoch": 7.786885245901639, "grad_norm": 7.227258205413818, "learning_rate": 1.9880060496229614e-05, "loss": 2.2227, "step": 2375 }, { "epoch": 7.7901639344262295, "grad_norm": 7.77577543258667, "learning_rate": 1.9879896469062125e-05, "loss": 1.9944, "step": 2376 }, { "epoch": 7.79344262295082, "grad_norm": 13.212519645690918, "learning_rate": 1.9879732330488535e-05, "loss": 2.1353, "step": 2377 }, { "epoch": 7.79672131147541, "grad_norm": 7.6723456382751465, "learning_rate": 1.987956808051069e-05, "loss": 1.9585, "step": 2378 }, { "epoch": 7.8, "grad_norm": 7.84119176864624, "learning_rate": 1.987940371913044e-05, "loss": 1.9419, "step": 2379 }, { "epoch": 7.80327868852459, "grad_norm": 29.09961700439453, "learning_rate": 1.9879239246349647e-05, "loss": 2.0513, "step": 2380 }, { "epoch": 7.80655737704918, "grad_norm": 9.855175971984863, "learning_rate": 1.987907466217015e-05, "loss": 2.2764, "step": 2381 }, { "epoch": 7.80983606557377, "grad_norm": 7.288943767547607, "learning_rate": 1.9878909966593825e-05, "loss": 2.0874, "step": 2382 }, { "epoch": 7.81311475409836, "grad_norm": 7.598097324371338, "learning_rate": 1.9878745159622515e-05, "loss": 2.1328, "step": 2383 }, { "epoch": 7.81639344262295, "grad_norm": 7.966827869415283, "learning_rate": 1.987858024125808e-05, "loss": 2.1836, "step": 2384 }, { "epoch": 7.8196721311475414, "grad_norm": 7.582022666931152, "learning_rate": 1.9878415211502382e-05, "loss": 2.022, "step": 2385 }, { "epoch": 7.822950819672132, "grad_norm": 8.27962875366211, "learning_rate": 1.987825007035728e-05, "loss": 2.0176, "step": 2386 }, { "epoch": 7.826229508196722, "grad_norm": 8.886266708374023, "learning_rate": 1.987808481782464e-05, "loss": 2.2163, "step": 2387 }, { "epoch": 7.829508196721312, "grad_norm": 9.834465026855469, "learning_rate": 1.9877919453906325e-05, "loss": 2.1182, "step": 2388 }, { "epoch": 7.832786885245902, "grad_norm": 11.691359519958496, "learning_rate": 1.9877753978604194e-05, "loss": 1.9858, "step": 2389 }, { "epoch": 7.836065573770492, "grad_norm": 9.482034683227539, "learning_rate": 1.987758839192012e-05, "loss": 2.1343, "step": 2390 }, { "epoch": 7.839344262295082, "grad_norm": 9.792427062988281, "learning_rate": 1.9877422693855967e-05, "loss": 1.9321, "step": 2391 }, { "epoch": 7.842622950819672, "grad_norm": 12.464720726013184, "learning_rate": 1.98772568844136e-05, "loss": 2.1123, "step": 2392 }, { "epoch": 7.845901639344262, "grad_norm": 12.862398147583008, "learning_rate": 1.9877090963594892e-05, "loss": 2.1733, "step": 2393 }, { "epoch": 7.849180327868853, "grad_norm": 10.999598503112793, "learning_rate": 1.9876924931401717e-05, "loss": 1.9678, "step": 2394 }, { "epoch": 7.852459016393443, "grad_norm": 11.824982643127441, "learning_rate": 1.987675878783594e-05, "loss": 1.917, "step": 2395 }, { "epoch": 7.855737704918033, "grad_norm": 12.791261672973633, "learning_rate": 1.9876592532899442e-05, "loss": 2.2109, "step": 2396 }, { "epoch": 7.859016393442623, "grad_norm": 8.364901542663574, "learning_rate": 1.987642616659409e-05, "loss": 1.9585, "step": 2397 }, { "epoch": 7.862295081967213, "grad_norm": 8.829551696777344, "learning_rate": 1.9876259688921765e-05, "loss": 2.0371, "step": 2398 }, { "epoch": 7.865573770491803, "grad_norm": 9.255219459533691, "learning_rate": 1.9876093099884346e-05, "loss": 1.9644, "step": 2399 }, { "epoch": 7.868852459016393, "grad_norm": 10.48562240600586, "learning_rate": 1.9875926399483708e-05, "loss": 2.146, "step": 2400 }, { "epoch": 7.872131147540983, "grad_norm": 12.092361450195312, "learning_rate": 1.987575958772173e-05, "loss": 2.1099, "step": 2401 }, { "epoch": 7.8754098360655735, "grad_norm": 9.254863739013672, "learning_rate": 1.9875592664600294e-05, "loss": 2.0454, "step": 2402 }, { "epoch": 7.878688524590164, "grad_norm": 9.922590255737305, "learning_rate": 1.9875425630121285e-05, "loss": 2.0669, "step": 2403 }, { "epoch": 7.881967213114754, "grad_norm": 7.369717597961426, "learning_rate": 1.9875258484286582e-05, "loss": 2.313, "step": 2404 }, { "epoch": 7.885245901639344, "grad_norm": 7.119960308074951, "learning_rate": 1.9875091227098076e-05, "loss": 1.9219, "step": 2405 }, { "epoch": 7.888524590163934, "grad_norm": 9.132503509521484, "learning_rate": 1.9874923858557645e-05, "loss": 1.8921, "step": 2406 }, { "epoch": 7.891803278688524, "grad_norm": 14.17480182647705, "learning_rate": 1.987475637866718e-05, "loss": 2.0571, "step": 2407 }, { "epoch": 7.895081967213114, "grad_norm": 9.514469146728516, "learning_rate": 1.9874588787428572e-05, "loss": 1.8967, "step": 2408 }, { "epoch": 7.898360655737705, "grad_norm": 7.655304908752441, "learning_rate": 1.9874421084843707e-05, "loss": 1.9019, "step": 2409 }, { "epoch": 7.901639344262295, "grad_norm": 17.463218688964844, "learning_rate": 1.9874253270914478e-05, "loss": 1.9561, "step": 2410 }, { "epoch": 7.9049180327868855, "grad_norm": 12.459489822387695, "learning_rate": 1.9874085345642774e-05, "loss": 2.0098, "step": 2411 }, { "epoch": 7.908196721311476, "grad_norm": 12.401256561279297, "learning_rate": 1.9873917309030494e-05, "loss": 2.1851, "step": 2412 }, { "epoch": 7.911475409836066, "grad_norm": 11.78803825378418, "learning_rate": 1.987374916107953e-05, "loss": 2.1538, "step": 2413 }, { "epoch": 7.914754098360656, "grad_norm": 8.372515678405762, "learning_rate": 1.9873580901791775e-05, "loss": 1.8994, "step": 2414 }, { "epoch": 7.918032786885246, "grad_norm": 8.196864128112793, "learning_rate": 1.9873412531169135e-05, "loss": 1.9375, "step": 2415 }, { "epoch": 7.921311475409836, "grad_norm": 11.382407188415527, "learning_rate": 1.98732440492135e-05, "loss": 2.0562, "step": 2416 }, { "epoch": 7.924590163934426, "grad_norm": 12.833914756774902, "learning_rate": 1.9873075455926773e-05, "loss": 2.1963, "step": 2417 }, { "epoch": 7.927868852459016, "grad_norm": 8.473085403442383, "learning_rate": 1.9872906751310852e-05, "loss": 2.0137, "step": 2418 }, { "epoch": 7.9311475409836065, "grad_norm": 8.735611915588379, "learning_rate": 1.9872737935367647e-05, "loss": 2.1123, "step": 2419 }, { "epoch": 7.934426229508197, "grad_norm": 9.541522979736328, "learning_rate": 1.9872569008099053e-05, "loss": 2.0669, "step": 2420 }, { "epoch": 7.937704918032787, "grad_norm": 7.278489589691162, "learning_rate": 1.987239996950698e-05, "loss": 2.1772, "step": 2421 }, { "epoch": 7.940983606557377, "grad_norm": 10.923750877380371, "learning_rate": 1.9872230819593333e-05, "loss": 2.165, "step": 2422 }, { "epoch": 7.944262295081967, "grad_norm": 9.415023803710938, "learning_rate": 1.9872061558360015e-05, "loss": 1.9854, "step": 2423 }, { "epoch": 7.947540983606557, "grad_norm": 11.419829368591309, "learning_rate": 1.9871892185808945e-05, "loss": 2.042, "step": 2424 }, { "epoch": 7.950819672131147, "grad_norm": 8.202561378479004, "learning_rate": 1.9871722701942026e-05, "loss": 2.1338, "step": 2425 }, { "epoch": 7.954098360655737, "grad_norm": 14.230269432067871, "learning_rate": 1.9871553106761167e-05, "loss": 2.1514, "step": 2426 }, { "epoch": 7.9573770491803275, "grad_norm": 7.777876377105713, "learning_rate": 1.987138340026828e-05, "loss": 1.9014, "step": 2427 }, { "epoch": 7.9606557377049185, "grad_norm": 10.831121444702148, "learning_rate": 1.9871213582465282e-05, "loss": 2.2119, "step": 2428 }, { "epoch": 7.963934426229509, "grad_norm": 8.62537956237793, "learning_rate": 1.987104365335409e-05, "loss": 2.0122, "step": 2429 }, { "epoch": 7.967213114754099, "grad_norm": 8.720458984375, "learning_rate": 1.9870873612936618e-05, "loss": 2.1357, "step": 2430 }, { "epoch": 7.970491803278689, "grad_norm": 8.410870552062988, "learning_rate": 1.9870703461214784e-05, "loss": 2.0698, "step": 2431 }, { "epoch": 7.973770491803279, "grad_norm": 9.80127239227295, "learning_rate": 1.9870533198190503e-05, "loss": 1.9966, "step": 2432 }, { "epoch": 7.977049180327869, "grad_norm": 6.546308994293213, "learning_rate": 1.9870362823865696e-05, "loss": 2.1294, "step": 2433 }, { "epoch": 7.980327868852459, "grad_norm": 9.814133644104004, "learning_rate": 1.987019233824229e-05, "loss": 2.1235, "step": 2434 }, { "epoch": 7.983606557377049, "grad_norm": 32.75967025756836, "learning_rate": 1.9870021741322197e-05, "loss": 1.9717, "step": 2435 }, { "epoch": 7.9868852459016395, "grad_norm": 11.26630973815918, "learning_rate": 1.9869851033107354e-05, "loss": 1.9429, "step": 2436 }, { "epoch": 7.99016393442623, "grad_norm": 8.692780494689941, "learning_rate": 1.9869680213599672e-05, "loss": 1.9756, "step": 2437 }, { "epoch": 7.99344262295082, "grad_norm": 10.286293983459473, "learning_rate": 1.9869509282801087e-05, "loss": 2.0015, "step": 2438 }, { "epoch": 7.99672131147541, "grad_norm": 11.183858871459961, "learning_rate": 1.9869338240713523e-05, "loss": 1.8843, "step": 2439 }, { "epoch": 8.0, "grad_norm": 9.221745491027832, "learning_rate": 1.9869167087338908e-05, "loss": 1.918, "step": 2440 }, { "epoch": 8.00327868852459, "grad_norm": 8.316161155700684, "learning_rate": 1.9868995822679173e-05, "loss": 1.8638, "step": 2441 }, { "epoch": 8.00655737704918, "grad_norm": 9.047435760498047, "learning_rate": 1.9868824446736246e-05, "loss": 2.0303, "step": 2442 }, { "epoch": 8.00983606557377, "grad_norm": 7.563805103302002, "learning_rate": 1.986865295951207e-05, "loss": 1.8862, "step": 2443 }, { "epoch": 8.01311475409836, "grad_norm": 7.7042951583862305, "learning_rate": 1.9868481361008565e-05, "loss": 1.9497, "step": 2444 }, { "epoch": 8.01639344262295, "grad_norm": 7.576498031616211, "learning_rate": 1.9868309651227674e-05, "loss": 1.9116, "step": 2445 }, { "epoch": 8.01967213114754, "grad_norm": 9.683428764343262, "learning_rate": 1.986813783017133e-05, "loss": 1.8784, "step": 2446 }, { "epoch": 8.02295081967213, "grad_norm": 11.92547607421875, "learning_rate": 1.986796589784147e-05, "loss": 1.9844, "step": 2447 }, { "epoch": 8.026229508196721, "grad_norm": 9.496994972229004, "learning_rate": 1.986779385424004e-05, "loss": 1.8721, "step": 2448 }, { "epoch": 8.029508196721311, "grad_norm": 10.234097480773926, "learning_rate": 1.986762169936897e-05, "loss": 1.8945, "step": 2449 }, { "epoch": 8.032786885245901, "grad_norm": 8.953205108642578, "learning_rate": 1.9867449433230206e-05, "loss": 1.8696, "step": 2450 }, { "epoch": 8.036065573770491, "grad_norm": 7.733658313751221, "learning_rate": 1.986727705582569e-05, "loss": 1.9246, "step": 2451 }, { "epoch": 8.039344262295081, "grad_norm": 14.793386459350586, "learning_rate": 1.9867104567157367e-05, "loss": 2.1699, "step": 2452 }, { "epoch": 8.042622950819672, "grad_norm": 12.840620994567871, "learning_rate": 1.9866931967227183e-05, "loss": 1.9121, "step": 2453 }, { "epoch": 8.045901639344262, "grad_norm": 7.220935344696045, "learning_rate": 1.9866759256037076e-05, "loss": 2.1851, "step": 2454 }, { "epoch": 8.049180327868852, "grad_norm": 8.651944160461426, "learning_rate": 1.9866586433589002e-05, "loss": 1.9219, "step": 2455 }, { "epoch": 8.052459016393442, "grad_norm": 9.37271785736084, "learning_rate": 1.986641349988491e-05, "loss": 2.0425, "step": 2456 }, { "epoch": 8.055737704918032, "grad_norm": 8.528764724731445, "learning_rate": 1.9866240454926745e-05, "loss": 1.8511, "step": 2457 }, { "epoch": 8.059016393442622, "grad_norm": 9.887785911560059, "learning_rate": 1.986606729871646e-05, "loss": 1.9282, "step": 2458 }, { "epoch": 8.062295081967212, "grad_norm": 9.596588134765625, "learning_rate": 1.986589403125601e-05, "loss": 1.9077, "step": 2459 }, { "epoch": 8.065573770491802, "grad_norm": 8.39920425415039, "learning_rate": 1.9865720652547345e-05, "loss": 1.873, "step": 2460 }, { "epoch": 8.068852459016393, "grad_norm": 9.664958000183105, "learning_rate": 1.9865547162592423e-05, "loss": 1.9473, "step": 2461 }, { "epoch": 8.072131147540984, "grad_norm": 10.443802833557129, "learning_rate": 1.9865373561393197e-05, "loss": 1.9849, "step": 2462 }, { "epoch": 8.075409836065575, "grad_norm": 12.679463386535645, "learning_rate": 1.986519984895163e-05, "loss": 2.2026, "step": 2463 }, { "epoch": 8.078688524590165, "grad_norm": 12.192951202392578, "learning_rate": 1.9865026025269674e-05, "loss": 1.8047, "step": 2464 }, { "epoch": 8.081967213114755, "grad_norm": 8.562423706054688, "learning_rate": 1.9864852090349297e-05, "loss": 1.6411, "step": 2465 }, { "epoch": 8.085245901639345, "grad_norm": 7.655757904052734, "learning_rate": 1.9864678044192453e-05, "loss": 1.9543, "step": 2466 }, { "epoch": 8.088524590163935, "grad_norm": 7.759767532348633, "learning_rate": 1.9864503886801108e-05, "loss": 2.0107, "step": 2467 }, { "epoch": 8.091803278688525, "grad_norm": 7.0739827156066895, "learning_rate": 1.9864329618177223e-05, "loss": 1.9497, "step": 2468 }, { "epoch": 8.095081967213115, "grad_norm": 8.139227867126465, "learning_rate": 1.9864155238322768e-05, "loss": 1.8494, "step": 2469 }, { "epoch": 8.098360655737705, "grad_norm": 9.71851634979248, "learning_rate": 1.9863980747239707e-05, "loss": 2.0015, "step": 2470 }, { "epoch": 8.101639344262296, "grad_norm": 9.957058906555176, "learning_rate": 1.9863806144930005e-05, "loss": 1.9741, "step": 2471 }, { "epoch": 8.104918032786886, "grad_norm": 11.597049713134766, "learning_rate": 1.9863631431395634e-05, "loss": 1.8706, "step": 2472 }, { "epoch": 8.108196721311476, "grad_norm": 6.49490213394165, "learning_rate": 1.9863456606638563e-05, "loss": 1.9868, "step": 2473 }, { "epoch": 8.111475409836066, "grad_norm": 12.73015308380127, "learning_rate": 1.986328167066076e-05, "loss": 2.0835, "step": 2474 }, { "epoch": 8.114754098360656, "grad_norm": 9.65857219696045, "learning_rate": 1.9863106623464204e-05, "loss": 1.9736, "step": 2475 }, { "epoch": 8.118032786885246, "grad_norm": 8.646177291870117, "learning_rate": 1.9862931465050867e-05, "loss": 1.9727, "step": 2476 }, { "epoch": 8.121311475409836, "grad_norm": 10.139416694641113, "learning_rate": 1.986275619542272e-05, "loss": 1.9004, "step": 2477 }, { "epoch": 8.124590163934426, "grad_norm": 8.608704566955566, "learning_rate": 1.9862580814581743e-05, "loss": 1.8833, "step": 2478 }, { "epoch": 8.127868852459017, "grad_norm": 8.543478965759277, "learning_rate": 1.9862405322529918e-05, "loss": 1.9819, "step": 2479 }, { "epoch": 8.131147540983607, "grad_norm": 8.212593078613281, "learning_rate": 1.9862229719269212e-05, "loss": 2.0225, "step": 2480 }, { "epoch": 8.134426229508197, "grad_norm": 7.928706645965576, "learning_rate": 1.986205400480161e-05, "loss": 2.021, "step": 2481 }, { "epoch": 8.137704918032787, "grad_norm": 6.986930847167969, "learning_rate": 1.98618781791291e-05, "loss": 2.2153, "step": 2482 }, { "epoch": 8.140983606557377, "grad_norm": 11.244826316833496, "learning_rate": 1.986170224225366e-05, "loss": 1.907, "step": 2483 }, { "epoch": 8.144262295081967, "grad_norm": 10.159725189208984, "learning_rate": 1.9861526194177276e-05, "loss": 1.8804, "step": 2484 }, { "epoch": 8.147540983606557, "grad_norm": 9.946998596191406, "learning_rate": 1.9861350034901924e-05, "loss": 1.9131, "step": 2485 }, { "epoch": 8.150819672131147, "grad_norm": 8.203071594238281, "learning_rate": 1.98611737644296e-05, "loss": 2.1323, "step": 2486 }, { "epoch": 8.154098360655738, "grad_norm": 8.202122688293457, "learning_rate": 1.986099738276229e-05, "loss": 1.9722, "step": 2487 }, { "epoch": 8.157377049180328, "grad_norm": 8.094611167907715, "learning_rate": 1.9860820889901982e-05, "loss": 1.7642, "step": 2488 }, { "epoch": 8.160655737704918, "grad_norm": 10.585988998413086, "learning_rate": 1.9860644285850663e-05, "loss": 2.1128, "step": 2489 }, { "epoch": 8.163934426229508, "grad_norm": 8.083356857299805, "learning_rate": 1.986046757061033e-05, "loss": 1.8911, "step": 2490 }, { "epoch": 8.167213114754098, "grad_norm": 7.8323259353637695, "learning_rate": 1.986029074418297e-05, "loss": 2.167, "step": 2491 }, { "epoch": 8.170491803278688, "grad_norm": 9.164520263671875, "learning_rate": 1.986011380657058e-05, "loss": 1.918, "step": 2492 }, { "epoch": 8.173770491803278, "grad_norm": 7.913779258728027, "learning_rate": 1.9859936757775158e-05, "loss": 2.0376, "step": 2493 }, { "epoch": 8.177049180327868, "grad_norm": 10.751538276672363, "learning_rate": 1.9859759597798693e-05, "loss": 1.8506, "step": 2494 }, { "epoch": 8.180327868852459, "grad_norm": 9.521934509277344, "learning_rate": 1.9859582326643192e-05, "loss": 1.8604, "step": 2495 }, { "epoch": 8.183606557377049, "grad_norm": 6.678683280944824, "learning_rate": 1.9859404944310645e-05, "loss": 2.0078, "step": 2496 }, { "epoch": 8.186885245901639, "grad_norm": 21.45467758178711, "learning_rate": 1.9859227450803056e-05, "loss": 1.8555, "step": 2497 }, { "epoch": 8.190163934426229, "grad_norm": 7.190951824188232, "learning_rate": 1.985904984612243e-05, "loss": 2.1812, "step": 2498 }, { "epoch": 8.193442622950819, "grad_norm": 6.879979610443115, "learning_rate": 1.9858872130270764e-05, "loss": 1.9421, "step": 2499 }, { "epoch": 8.19672131147541, "grad_norm": 13.282389640808105, "learning_rate": 1.985869430325006e-05, "loss": 1.9839, "step": 2500 }, { "epoch": 8.2, "grad_norm": 7.892559051513672, "learning_rate": 1.9858516365062334e-05, "loss": 1.855, "step": 2501 }, { "epoch": 8.20327868852459, "grad_norm": 8.707128524780273, "learning_rate": 1.9858338315709586e-05, "loss": 1.9272, "step": 2502 }, { "epoch": 8.20655737704918, "grad_norm": 7.443018436431885, "learning_rate": 1.9858160155193817e-05, "loss": 2.0544, "step": 2503 }, { "epoch": 8.20983606557377, "grad_norm": 9.651239395141602, "learning_rate": 1.9857981883517045e-05, "loss": 1.9248, "step": 2504 }, { "epoch": 8.21311475409836, "grad_norm": 11.097803115844727, "learning_rate": 1.985780350068128e-05, "loss": 2.0659, "step": 2505 }, { "epoch": 8.216393442622952, "grad_norm": 7.811547756195068, "learning_rate": 1.9857625006688527e-05, "loss": 1.9141, "step": 2506 }, { "epoch": 8.219672131147542, "grad_norm": 8.471809387207031, "learning_rate": 1.9857446401540807e-05, "loss": 2.1387, "step": 2507 }, { "epoch": 8.222950819672132, "grad_norm": 11.285188674926758, "learning_rate": 1.9857267685240127e-05, "loss": 2.082, "step": 2508 }, { "epoch": 8.226229508196722, "grad_norm": 11.362229347229004, "learning_rate": 1.9857088857788504e-05, "loss": 1.9131, "step": 2509 }, { "epoch": 8.229508196721312, "grad_norm": 7.693389892578125, "learning_rate": 1.9856909919187958e-05, "loss": 1.9209, "step": 2510 }, { "epoch": 8.232786885245902, "grad_norm": 15.415765762329102, "learning_rate": 1.98567308694405e-05, "loss": 2.1172, "step": 2511 }, { "epoch": 8.236065573770492, "grad_norm": 103.38041687011719, "learning_rate": 1.9856551708548158e-05, "loss": 1.8516, "step": 2512 }, { "epoch": 8.239344262295083, "grad_norm": 9.479395866394043, "learning_rate": 1.9856372436512946e-05, "loss": 2.0249, "step": 2513 }, { "epoch": 8.242622950819673, "grad_norm": 27.054460525512695, "learning_rate": 1.9856193053336884e-05, "loss": 2.2632, "step": 2514 }, { "epoch": 8.245901639344263, "grad_norm": 13.54438591003418, "learning_rate": 1.9856013559022e-05, "loss": 2.1694, "step": 2515 }, { "epoch": 8.249180327868853, "grad_norm": 14.931588172912598, "learning_rate": 1.9855833953570313e-05, "loss": 2.0625, "step": 2516 }, { "epoch": 8.252459016393443, "grad_norm": 15.364200592041016, "learning_rate": 1.985565423698385e-05, "loss": 1.9556, "step": 2517 }, { "epoch": 8.255737704918033, "grad_norm": 9.206067085266113, "learning_rate": 1.9855474409264645e-05, "loss": 2.1299, "step": 2518 }, { "epoch": 8.259016393442623, "grad_norm": 10.340709686279297, "learning_rate": 1.9855294470414712e-05, "loss": 2.0908, "step": 2519 }, { "epoch": 8.262295081967213, "grad_norm": 8.076340675354004, "learning_rate": 1.9855114420436087e-05, "loss": 2.208, "step": 2520 }, { "epoch": 8.265573770491804, "grad_norm": 12.809579849243164, "learning_rate": 1.9854934259330804e-05, "loss": 2.063, "step": 2521 }, { "epoch": 8.268852459016394, "grad_norm": 18.120426177978516, "learning_rate": 1.985475398710089e-05, "loss": 2.1167, "step": 2522 }, { "epoch": 8.272131147540984, "grad_norm": 13.406224250793457, "learning_rate": 1.985457360374838e-05, "loss": 2.0938, "step": 2523 }, { "epoch": 8.275409836065574, "grad_norm": 10.251782417297363, "learning_rate": 1.9854393109275302e-05, "loss": 1.9316, "step": 2524 }, { "epoch": 8.278688524590164, "grad_norm": 19.55133819580078, "learning_rate": 1.9854212503683697e-05, "loss": 2.1113, "step": 2525 }, { "epoch": 8.281967213114754, "grad_norm": 11.577215194702148, "learning_rate": 1.98540317869756e-05, "loss": 2.1113, "step": 2526 }, { "epoch": 8.285245901639344, "grad_norm": 13.672295570373535, "learning_rate": 1.985385095915305e-05, "loss": 2.0, "step": 2527 }, { "epoch": 8.288524590163934, "grad_norm": 9.801811218261719, "learning_rate": 1.9853670020218084e-05, "loss": 2.0615, "step": 2528 }, { "epoch": 8.291803278688525, "grad_norm": 9.111653327941895, "learning_rate": 1.9853488970172747e-05, "loss": 2.0034, "step": 2529 }, { "epoch": 8.295081967213115, "grad_norm": 8.430712699890137, "learning_rate": 1.9853307809019072e-05, "loss": 2.1108, "step": 2530 }, { "epoch": 8.298360655737705, "grad_norm": 9.636516571044922, "learning_rate": 1.985312653675911e-05, "loss": 2.1152, "step": 2531 }, { "epoch": 8.301639344262295, "grad_norm": 12.89643669128418, "learning_rate": 1.98529451533949e-05, "loss": 2.1616, "step": 2532 }, { "epoch": 8.304918032786885, "grad_norm": 9.590496063232422, "learning_rate": 1.9852763658928488e-05, "loss": 2.0439, "step": 2533 }, { "epoch": 8.308196721311475, "grad_norm": 9.227338790893555, "learning_rate": 1.985258205336192e-05, "loss": 1.9038, "step": 2534 }, { "epoch": 8.311475409836065, "grad_norm": 8.45909309387207, "learning_rate": 1.985240033669725e-05, "loss": 2.0044, "step": 2535 }, { "epoch": 8.314754098360655, "grad_norm": 12.14145278930664, "learning_rate": 1.985221850893652e-05, "loss": 2.1641, "step": 2536 }, { "epoch": 8.318032786885245, "grad_norm": 8.971905708312988, "learning_rate": 1.985203657008178e-05, "loss": 2.1035, "step": 2537 }, { "epoch": 8.321311475409836, "grad_norm": 7.899770259857178, "learning_rate": 1.985185452013509e-05, "loss": 1.9883, "step": 2538 }, { "epoch": 8.324590163934426, "grad_norm": 10.840682029724121, "learning_rate": 1.985167235909849e-05, "loss": 2.1479, "step": 2539 }, { "epoch": 8.327868852459016, "grad_norm": 15.507417678833008, "learning_rate": 1.9851490086974045e-05, "loss": 2.1709, "step": 2540 }, { "epoch": 8.331147540983606, "grad_norm": 9.498331069946289, "learning_rate": 1.9851307703763806e-05, "loss": 2.1553, "step": 2541 }, { "epoch": 8.334426229508196, "grad_norm": 14.822694778442383, "learning_rate": 1.985112520946983e-05, "loss": 2.0762, "step": 2542 }, { "epoch": 8.337704918032786, "grad_norm": 16.110891342163086, "learning_rate": 1.9850942604094176e-05, "loss": 2.1338, "step": 2543 }, { "epoch": 8.340983606557376, "grad_norm": 9.431931495666504, "learning_rate": 1.9850759887638898e-05, "loss": 2.0566, "step": 2544 }, { "epoch": 8.344262295081966, "grad_norm": 12.074247360229492, "learning_rate": 1.985057706010606e-05, "loss": 1.9595, "step": 2545 }, { "epoch": 8.347540983606557, "grad_norm": 11.839923858642578, "learning_rate": 1.9850394121497727e-05, "loss": 1.9995, "step": 2546 }, { "epoch": 8.350819672131147, "grad_norm": 7.939126968383789, "learning_rate": 1.9850211071815958e-05, "loss": 1.9995, "step": 2547 }, { "epoch": 8.354098360655737, "grad_norm": 17.145099639892578, "learning_rate": 1.9850027911062816e-05, "loss": 2.042, "step": 2548 }, { "epoch": 8.357377049180329, "grad_norm": 13.825026512145996, "learning_rate": 1.984984463924037e-05, "loss": 2.0386, "step": 2549 }, { "epoch": 8.360655737704919, "grad_norm": 7.303048133850098, "learning_rate": 1.9849661256350683e-05, "loss": 1.9146, "step": 2550 }, { "epoch": 8.363934426229509, "grad_norm": 9.031376838684082, "learning_rate": 1.9849477762395823e-05, "loss": 1.9697, "step": 2551 }, { "epoch": 8.3672131147541, "grad_norm": 10.611978530883789, "learning_rate": 1.9849294157377865e-05, "loss": 2.0508, "step": 2552 }, { "epoch": 8.37049180327869, "grad_norm": 9.823750495910645, "learning_rate": 1.984911044129887e-05, "loss": 2.0322, "step": 2553 }, { "epoch": 8.37377049180328, "grad_norm": 11.914849281311035, "learning_rate": 1.9848926614160913e-05, "loss": 2.0083, "step": 2554 }, { "epoch": 8.37704918032787, "grad_norm": 16.12028694152832, "learning_rate": 1.984874267596607e-05, "loss": 1.9766, "step": 2555 }, { "epoch": 8.38032786885246, "grad_norm": 7.927615165710449, "learning_rate": 1.9848558626716415e-05, "loss": 2.0854, "step": 2556 }, { "epoch": 8.38360655737705, "grad_norm": 8.336257934570312, "learning_rate": 1.984837446641402e-05, "loss": 2.0029, "step": 2557 }, { "epoch": 8.38688524590164, "grad_norm": 7.558200836181641, "learning_rate": 1.9848190195060964e-05, "loss": 2.0752, "step": 2558 }, { "epoch": 8.39016393442623, "grad_norm": 10.135740280151367, "learning_rate": 1.9848005812659324e-05, "loss": 1.9448, "step": 2559 }, { "epoch": 8.39344262295082, "grad_norm": 8.584718704223633, "learning_rate": 1.9847821319211177e-05, "loss": 1.8569, "step": 2560 }, { "epoch": 8.39672131147541, "grad_norm": 11.5983247756958, "learning_rate": 1.9847636714718606e-05, "loss": 2.1294, "step": 2561 }, { "epoch": 8.4, "grad_norm": 11.722624778747559, "learning_rate": 1.9847451999183692e-05, "loss": 2.063, "step": 2562 }, { "epoch": 8.40327868852459, "grad_norm": 9.847159385681152, "learning_rate": 1.9847267172608518e-05, "loss": 1.9902, "step": 2563 }, { "epoch": 8.40655737704918, "grad_norm": 27.138301849365234, "learning_rate": 1.9847082234995172e-05, "loss": 2.019, "step": 2564 }, { "epoch": 8.40983606557377, "grad_norm": 16.22531509399414, "learning_rate": 1.9846897186345734e-05, "loss": 2.1318, "step": 2565 }, { "epoch": 8.41311475409836, "grad_norm": 11.755867958068848, "learning_rate": 1.984671202666229e-05, "loss": 2.1138, "step": 2566 }, { "epoch": 8.416393442622951, "grad_norm": 12.801701545715332, "learning_rate": 1.984652675594693e-05, "loss": 2.0146, "step": 2567 }, { "epoch": 8.419672131147541, "grad_norm": 9.015965461730957, "learning_rate": 1.9846341374201743e-05, "loss": 2.0264, "step": 2568 }, { "epoch": 8.422950819672131, "grad_norm": 177.72097778320312, "learning_rate": 1.984615588142882e-05, "loss": 1.9556, "step": 2569 }, { "epoch": 8.426229508196721, "grad_norm": 10.012619018554688, "learning_rate": 1.984597027763025e-05, "loss": 2.0811, "step": 2570 }, { "epoch": 8.429508196721311, "grad_norm": 15.004117012023926, "learning_rate": 1.984578456280813e-05, "loss": 2.2246, "step": 2571 }, { "epoch": 8.432786885245902, "grad_norm": 13.226746559143066, "learning_rate": 1.9845598736964553e-05, "loss": 2.0435, "step": 2572 }, { "epoch": 8.436065573770492, "grad_norm": 11.858372688293457, "learning_rate": 1.984541280010161e-05, "loss": 2.1377, "step": 2573 }, { "epoch": 8.439344262295082, "grad_norm": 12.570825576782227, "learning_rate": 1.9845226752221404e-05, "loss": 2.1641, "step": 2574 }, { "epoch": 8.442622950819672, "grad_norm": 21.491928100585938, "learning_rate": 1.9845040593326027e-05, "loss": 2.4077, "step": 2575 }, { "epoch": 8.445901639344262, "grad_norm": 14.763435363769531, "learning_rate": 1.9844854323417584e-05, "loss": 2.1431, "step": 2576 }, { "epoch": 8.449180327868852, "grad_norm": 16.928194046020508, "learning_rate": 1.984466794249817e-05, "loss": 2.3306, "step": 2577 }, { "epoch": 8.452459016393442, "grad_norm": 12.645976066589355, "learning_rate": 1.9844481450569894e-05, "loss": 1.9902, "step": 2578 }, { "epoch": 8.455737704918032, "grad_norm": 17.521469116210938, "learning_rate": 1.9844294847634848e-05, "loss": 2.2632, "step": 2579 }, { "epoch": 8.459016393442623, "grad_norm": 27.066526412963867, "learning_rate": 1.9844108133695146e-05, "loss": 2.0811, "step": 2580 }, { "epoch": 8.462295081967213, "grad_norm": 14.074593544006348, "learning_rate": 1.9843921308752887e-05, "loss": 2.2607, "step": 2581 }, { "epoch": 8.465573770491803, "grad_norm": 20.30356788635254, "learning_rate": 1.984373437281018e-05, "loss": 2.0229, "step": 2582 }, { "epoch": 8.468852459016393, "grad_norm": 16.068330764770508, "learning_rate": 1.9843547325869136e-05, "loss": 2.1851, "step": 2583 }, { "epoch": 8.472131147540983, "grad_norm": 31.928470611572266, "learning_rate": 1.984336016793186e-05, "loss": 2.2744, "step": 2584 }, { "epoch": 8.475409836065573, "grad_norm": 15.64861011505127, "learning_rate": 1.9843172899000462e-05, "loss": 1.9385, "step": 2585 }, { "epoch": 8.478688524590163, "grad_norm": 51.09635925292969, "learning_rate": 1.9842985519077052e-05, "loss": 1.9556, "step": 2586 }, { "epoch": 8.481967213114753, "grad_norm": 17.4949951171875, "learning_rate": 1.984279802816375e-05, "loss": 2.0273, "step": 2587 }, { "epoch": 8.485245901639344, "grad_norm": 18.949941635131836, "learning_rate": 1.984261042626267e-05, "loss": 2.2407, "step": 2588 }, { "epoch": 8.488524590163934, "grad_norm": 11.245784759521484, "learning_rate": 1.984242271337592e-05, "loss": 1.9246, "step": 2589 }, { "epoch": 8.491803278688524, "grad_norm": 28.461423873901367, "learning_rate": 1.984223488950562e-05, "loss": 2.1016, "step": 2590 }, { "epoch": 8.495081967213114, "grad_norm": 17.400287628173828, "learning_rate": 1.984204695465389e-05, "loss": 2.1494, "step": 2591 }, { "epoch": 8.498360655737706, "grad_norm": 13.84738540649414, "learning_rate": 1.9841858908822848e-05, "loss": 1.999, "step": 2592 }, { "epoch": 8.501639344262294, "grad_norm": 30.72934341430664, "learning_rate": 1.984167075201461e-05, "loss": 2.2324, "step": 2593 }, { "epoch": 8.504918032786886, "grad_norm": 14.038978576660156, "learning_rate": 1.9841482484231304e-05, "loss": 2.1016, "step": 2594 }, { "epoch": 8.508196721311476, "grad_norm": 17.998594284057617, "learning_rate": 1.984129410547505e-05, "loss": 1.9475, "step": 2595 }, { "epoch": 8.511475409836066, "grad_norm": 20.394102096557617, "learning_rate": 1.9841105615747974e-05, "loss": 1.9087, "step": 2596 }, { "epoch": 8.514754098360656, "grad_norm": 19.4716739654541, "learning_rate": 1.9840917015052197e-05, "loss": 2.2007, "step": 2597 }, { "epoch": 8.518032786885247, "grad_norm": 37.5899772644043, "learning_rate": 1.984072830338985e-05, "loss": 1.9819, "step": 2598 }, { "epoch": 8.521311475409837, "grad_norm": 14.39220142364502, "learning_rate": 1.984053948076306e-05, "loss": 1.9575, "step": 2599 }, { "epoch": 8.524590163934427, "grad_norm": 12.347497940063477, "learning_rate": 1.9840350547173954e-05, "loss": 1.8752, "step": 2600 }, { "epoch": 8.527868852459017, "grad_norm": 15.058660507202148, "learning_rate": 1.9840161502624665e-05, "loss": 2.123, "step": 2601 }, { "epoch": 8.531147540983607, "grad_norm": 17.060665130615234, "learning_rate": 1.9839972347117327e-05, "loss": 2.0337, "step": 2602 }, { "epoch": 8.534426229508197, "grad_norm": 25.7028751373291, "learning_rate": 1.9839783080654067e-05, "loss": 1.9092, "step": 2603 }, { "epoch": 8.537704918032787, "grad_norm": 14.227996826171875, "learning_rate": 1.9839593703237022e-05, "loss": 2.1641, "step": 2604 }, { "epoch": 8.540983606557377, "grad_norm": 12.311197280883789, "learning_rate": 1.9839404214868328e-05, "loss": 2.2832, "step": 2605 }, { "epoch": 8.544262295081968, "grad_norm": 14.264897346496582, "learning_rate": 1.983921461555012e-05, "loss": 2.2114, "step": 2606 }, { "epoch": 8.547540983606558, "grad_norm": 14.571435928344727, "learning_rate": 1.9839024905284538e-05, "loss": 2.1523, "step": 2607 }, { "epoch": 8.550819672131148, "grad_norm": 10.800597190856934, "learning_rate": 1.983883508407372e-05, "loss": 1.9839, "step": 2608 }, { "epoch": 8.554098360655738, "grad_norm": 17.965055465698242, "learning_rate": 1.9838645151919808e-05, "loss": 2.0273, "step": 2609 }, { "epoch": 8.557377049180328, "grad_norm": 18.45224380493164, "learning_rate": 1.983845510882494e-05, "loss": 2.1138, "step": 2610 }, { "epoch": 8.560655737704918, "grad_norm": 16.292724609375, "learning_rate": 1.9838264954791263e-05, "loss": 2.2695, "step": 2611 }, { "epoch": 8.563934426229508, "grad_norm": 14.987106323242188, "learning_rate": 1.9838074689820916e-05, "loss": 1.8301, "step": 2612 }, { "epoch": 8.567213114754098, "grad_norm": 14.711390495300293, "learning_rate": 1.9837884313916053e-05, "loss": 2.043, "step": 2613 }, { "epoch": 8.570491803278689, "grad_norm": 19.840551376342773, "learning_rate": 1.9837693827078812e-05, "loss": 2.0439, "step": 2614 }, { "epoch": 8.573770491803279, "grad_norm": 16.342653274536133, "learning_rate": 1.9837503229311347e-05, "loss": 1.8901, "step": 2615 }, { "epoch": 8.577049180327869, "grad_norm": 6.886907577514648, "learning_rate": 1.9837312520615798e-05, "loss": 1.9902, "step": 2616 }, { "epoch": 8.580327868852459, "grad_norm": 13.297571182250977, "learning_rate": 1.983712170099433e-05, "loss": 2.0713, "step": 2617 }, { "epoch": 8.583606557377049, "grad_norm": 13.778678894042969, "learning_rate": 1.9836930770449082e-05, "loss": 2.1685, "step": 2618 }, { "epoch": 8.58688524590164, "grad_norm": 12.0949068069458, "learning_rate": 1.9836739728982215e-05, "loss": 1.9507, "step": 2619 }, { "epoch": 8.59016393442623, "grad_norm": 12.203176498413086, "learning_rate": 1.9836548576595876e-05, "loss": 2.0742, "step": 2620 }, { "epoch": 8.59344262295082, "grad_norm": 10.915531158447266, "learning_rate": 1.983635731329223e-05, "loss": 1.9731, "step": 2621 }, { "epoch": 8.59672131147541, "grad_norm": 12.6407470703125, "learning_rate": 1.9836165939073423e-05, "loss": 1.8662, "step": 2622 }, { "epoch": 8.6, "grad_norm": 12.04987621307373, "learning_rate": 1.9835974453941623e-05, "loss": 1.8896, "step": 2623 }, { "epoch": 8.60327868852459, "grad_norm": 11.632305145263672, "learning_rate": 1.983578285789898e-05, "loss": 2.001, "step": 2624 }, { "epoch": 8.60655737704918, "grad_norm": 10.579940795898438, "learning_rate": 1.983559115094766e-05, "loss": 1.9785, "step": 2625 }, { "epoch": 8.60983606557377, "grad_norm": 9.788959503173828, "learning_rate": 1.9835399333089822e-05, "loss": 2.146, "step": 2626 }, { "epoch": 8.61311475409836, "grad_norm": 12.426809310913086, "learning_rate": 1.983520740432763e-05, "loss": 2.02, "step": 2627 }, { "epoch": 8.61639344262295, "grad_norm": 11.11230182647705, "learning_rate": 1.983501536466325e-05, "loss": 2.0884, "step": 2628 }, { "epoch": 8.61967213114754, "grad_norm": 8.037267684936523, "learning_rate": 1.9834823214098844e-05, "loss": 2.0132, "step": 2629 }, { "epoch": 8.62295081967213, "grad_norm": 6.581663608551025, "learning_rate": 1.9834630952636584e-05, "loss": 2.0854, "step": 2630 }, { "epoch": 8.62622950819672, "grad_norm": 10.058073997497559, "learning_rate": 1.983443858027863e-05, "loss": 1.959, "step": 2631 }, { "epoch": 8.62950819672131, "grad_norm": 7.047595024108887, "learning_rate": 1.983424609702716e-05, "loss": 2.0474, "step": 2632 }, { "epoch": 8.6327868852459, "grad_norm": 12.389864921569824, "learning_rate": 1.9834053502884337e-05, "loss": 2.0146, "step": 2633 }, { "epoch": 8.636065573770491, "grad_norm": 10.548664093017578, "learning_rate": 1.983386079785234e-05, "loss": 1.9844, "step": 2634 }, { "epoch": 8.639344262295083, "grad_norm": 15.491829872131348, "learning_rate": 1.9833667981933335e-05, "loss": 1.9551, "step": 2635 }, { "epoch": 8.642622950819671, "grad_norm": 8.160717964172363, "learning_rate": 1.98334750551295e-05, "loss": 2.0171, "step": 2636 }, { "epoch": 8.645901639344263, "grad_norm": 9.434775352478027, "learning_rate": 1.983328201744301e-05, "loss": 2.1123, "step": 2637 }, { "epoch": 8.649180327868853, "grad_norm": 9.711187362670898, "learning_rate": 1.9833088868876042e-05, "loss": 2.1777, "step": 2638 }, { "epoch": 8.652459016393443, "grad_norm": 10.575687408447266, "learning_rate": 1.983289560943077e-05, "loss": 2.0347, "step": 2639 }, { "epoch": 8.655737704918034, "grad_norm": 13.580098152160645, "learning_rate": 1.9832702239109377e-05, "loss": 2.1162, "step": 2640 }, { "epoch": 8.659016393442624, "grad_norm": 10.331022262573242, "learning_rate": 1.9832508757914045e-05, "loss": 1.8489, "step": 2641 }, { "epoch": 8.662295081967214, "grad_norm": 9.248592376708984, "learning_rate": 1.983231516584695e-05, "loss": 1.8892, "step": 2642 }, { "epoch": 8.665573770491804, "grad_norm": 7.053696632385254, "learning_rate": 1.9832121462910282e-05, "loss": 1.8345, "step": 2643 }, { "epoch": 8.668852459016394, "grad_norm": 7.5762128829956055, "learning_rate": 1.983192764910622e-05, "loss": 1.897, "step": 2644 }, { "epoch": 8.672131147540984, "grad_norm": 13.332133293151855, "learning_rate": 1.983173372443695e-05, "loss": 1.8809, "step": 2645 }, { "epoch": 8.675409836065574, "grad_norm": 12.991759300231934, "learning_rate": 1.983153968890466e-05, "loss": 2.0503, "step": 2646 }, { "epoch": 8.678688524590164, "grad_norm": 7.591914653778076, "learning_rate": 1.9831345542511542e-05, "loss": 2.0156, "step": 2647 }, { "epoch": 8.681967213114755, "grad_norm": 12.175230026245117, "learning_rate": 1.983115128525978e-05, "loss": 2.0571, "step": 2648 }, { "epoch": 8.685245901639345, "grad_norm": 13.217798233032227, "learning_rate": 1.983095691715156e-05, "loss": 1.8047, "step": 2649 }, { "epoch": 8.688524590163935, "grad_norm": 9.71271800994873, "learning_rate": 1.9830762438189083e-05, "loss": 1.9937, "step": 2650 }, { "epoch": 8.691803278688525, "grad_norm": 7.168107032775879, "learning_rate": 1.9830567848374538e-05, "loss": 1.9658, "step": 2651 }, { "epoch": 8.695081967213115, "grad_norm": 9.198151588439941, "learning_rate": 1.9830373147710117e-05, "loss": 2.1123, "step": 2652 }, { "epoch": 8.698360655737705, "grad_norm": 7.850871562957764, "learning_rate": 1.983017833619802e-05, "loss": 2.0513, "step": 2653 }, { "epoch": 8.701639344262295, "grad_norm": 8.577938079833984, "learning_rate": 1.9829983413840442e-05, "loss": 2.0435, "step": 2654 }, { "epoch": 8.704918032786885, "grad_norm": 9.697039604187012, "learning_rate": 1.9829788380639576e-05, "loss": 2.0146, "step": 2655 }, { "epoch": 8.708196721311475, "grad_norm": 19.765466690063477, "learning_rate": 1.9829593236597632e-05, "loss": 2.105, "step": 2656 }, { "epoch": 8.711475409836066, "grad_norm": 13.642529487609863, "learning_rate": 1.98293979817168e-05, "loss": 1.8198, "step": 2657 }, { "epoch": 8.714754098360656, "grad_norm": 11.298113822937012, "learning_rate": 1.9829202615999285e-05, "loss": 1.8799, "step": 2658 }, { "epoch": 8.718032786885246, "grad_norm": 12.141453742980957, "learning_rate": 1.9829007139447294e-05, "loss": 2.188, "step": 2659 }, { "epoch": 8.721311475409836, "grad_norm": 12.519293785095215, "learning_rate": 1.9828811552063026e-05, "loss": 2.063, "step": 2660 }, { "epoch": 8.724590163934426, "grad_norm": 7.866223335266113, "learning_rate": 1.982861585384869e-05, "loss": 2.0068, "step": 2661 }, { "epoch": 8.727868852459016, "grad_norm": 11.25975513458252, "learning_rate": 1.982842004480649e-05, "loss": 1.8389, "step": 2662 }, { "epoch": 8.731147540983606, "grad_norm": 8.564680099487305, "learning_rate": 1.9828224124938634e-05, "loss": 2.145, "step": 2663 }, { "epoch": 8.734426229508196, "grad_norm": 10.197593688964844, "learning_rate": 1.982802809424733e-05, "loss": 1.9673, "step": 2664 }, { "epoch": 8.737704918032787, "grad_norm": 14.556341171264648, "learning_rate": 1.9827831952734797e-05, "loss": 2.0732, "step": 2665 }, { "epoch": 8.740983606557377, "grad_norm": 9.163792610168457, "learning_rate": 1.9827635700403235e-05, "loss": 2.1387, "step": 2666 }, { "epoch": 8.744262295081967, "grad_norm": 20.349353790283203, "learning_rate": 1.9827439337254865e-05, "loss": 1.7964, "step": 2667 }, { "epoch": 8.747540983606557, "grad_norm": 8.362723350524902, "learning_rate": 1.9827242863291898e-05, "loss": 2.0317, "step": 2668 }, { "epoch": 8.750819672131147, "grad_norm": 16.11823081970215, "learning_rate": 1.982704627851655e-05, "loss": 2.1558, "step": 2669 }, { "epoch": 8.754098360655737, "grad_norm": 10.341531753540039, "learning_rate": 1.9826849582931038e-05, "loss": 2.0322, "step": 2670 }, { "epoch": 8.757377049180327, "grad_norm": 9.017899513244629, "learning_rate": 1.982665277653758e-05, "loss": 2.0215, "step": 2671 }, { "epoch": 8.760655737704917, "grad_norm": 9.675415992736816, "learning_rate": 1.9826455859338392e-05, "loss": 1.9561, "step": 2672 }, { "epoch": 8.763934426229508, "grad_norm": 11.703560829162598, "learning_rate": 1.9826258831335697e-05, "loss": 2.0132, "step": 2673 }, { "epoch": 8.767213114754098, "grad_norm": 9.58186149597168, "learning_rate": 1.982606169253172e-05, "loss": 2.1777, "step": 2674 }, { "epoch": 8.770491803278688, "grad_norm": 9.014318466186523, "learning_rate": 1.982586444292868e-05, "loss": 1.9785, "step": 2675 }, { "epoch": 8.773770491803278, "grad_norm": 9.597293853759766, "learning_rate": 1.98256670825288e-05, "loss": 2.0742, "step": 2676 }, { "epoch": 8.777049180327868, "grad_norm": 10.858675956726074, "learning_rate": 1.982546961133431e-05, "loss": 2.1206, "step": 2677 }, { "epoch": 8.780327868852458, "grad_norm": 10.833518028259277, "learning_rate": 1.9825272029347437e-05, "loss": 2.0103, "step": 2678 }, { "epoch": 8.783606557377048, "grad_norm": 12.273036003112793, "learning_rate": 1.98250743365704e-05, "loss": 2.0923, "step": 2679 }, { "epoch": 8.78688524590164, "grad_norm": 9.871461868286133, "learning_rate": 1.9824876533005438e-05, "loss": 2.2236, "step": 2680 }, { "epoch": 8.790163934426229, "grad_norm": 11.18997859954834, "learning_rate": 1.9824678618654775e-05, "loss": 2.0073, "step": 2681 }, { "epoch": 8.79344262295082, "grad_norm": 9.358022689819336, "learning_rate": 1.9824480593520646e-05, "loss": 2.2612, "step": 2682 }, { "epoch": 8.79672131147541, "grad_norm": 16.62308692932129, "learning_rate": 1.9824282457605287e-05, "loss": 2.0117, "step": 2683 }, { "epoch": 8.8, "grad_norm": 10.187373161315918, "learning_rate": 1.9824084210910924e-05, "loss": 2.0894, "step": 2684 }, { "epoch": 8.80327868852459, "grad_norm": 8.997200965881348, "learning_rate": 1.98238858534398e-05, "loss": 1.9658, "step": 2685 }, { "epoch": 8.806557377049181, "grad_norm": 14.966231346130371, "learning_rate": 1.9823687385194147e-05, "loss": 2.0454, "step": 2686 }, { "epoch": 8.809836065573771, "grad_norm": 8.772174835205078, "learning_rate": 1.9823488806176206e-05, "loss": 1.8911, "step": 2687 }, { "epoch": 8.813114754098361, "grad_norm": 8.670890808105469, "learning_rate": 1.9823290116388215e-05, "loss": 2.2009, "step": 2688 }, { "epoch": 8.816393442622951, "grad_norm": 7.777942180633545, "learning_rate": 1.9823091315832415e-05, "loss": 2.0488, "step": 2689 }, { "epoch": 8.819672131147541, "grad_norm": 8.017129898071289, "learning_rate": 1.9822892404511044e-05, "loss": 1.9287, "step": 2690 }, { "epoch": 8.822950819672132, "grad_norm": 9.709564208984375, "learning_rate": 1.982269338242635e-05, "loss": 2.0879, "step": 2691 }, { "epoch": 8.826229508196722, "grad_norm": 8.657846450805664, "learning_rate": 1.9822494249580578e-05, "loss": 1.8774, "step": 2692 }, { "epoch": 8.829508196721312, "grad_norm": 7.3709821701049805, "learning_rate": 1.9822295005975964e-05, "loss": 2.0127, "step": 2693 }, { "epoch": 8.832786885245902, "grad_norm": 10.754831314086914, "learning_rate": 1.9822095651614766e-05, "loss": 1.8203, "step": 2694 }, { "epoch": 8.836065573770492, "grad_norm": 11.56656551361084, "learning_rate": 1.9821896186499226e-05, "loss": 1.8687, "step": 2695 }, { "epoch": 8.839344262295082, "grad_norm": 9.407668113708496, "learning_rate": 1.9821696610631594e-05, "loss": 2.0527, "step": 2696 }, { "epoch": 8.842622950819672, "grad_norm": 15.812549591064453, "learning_rate": 1.982149692401412e-05, "loss": 1.9805, "step": 2697 }, { "epoch": 8.845901639344262, "grad_norm": 8.656549453735352, "learning_rate": 1.9821297126649055e-05, "loss": 2.0391, "step": 2698 }, { "epoch": 8.849180327868853, "grad_norm": 9.00876522064209, "learning_rate": 1.9821097218538655e-05, "loss": 2.0918, "step": 2699 }, { "epoch": 8.852459016393443, "grad_norm": 9.139668464660645, "learning_rate": 1.9820897199685175e-05, "loss": 1.9443, "step": 2700 }, { "epoch": 8.855737704918033, "grad_norm": 9.716845512390137, "learning_rate": 1.9820697070090865e-05, "loss": 2.054, "step": 2701 }, { "epoch": 8.859016393442623, "grad_norm": 9.075309753417969, "learning_rate": 1.9820496829757985e-05, "loss": 1.9644, "step": 2702 }, { "epoch": 8.862295081967213, "grad_norm": 9.174186706542969, "learning_rate": 1.982029647868879e-05, "loss": 1.7839, "step": 2703 }, { "epoch": 8.865573770491803, "grad_norm": 8.762017250061035, "learning_rate": 1.9820096016885547e-05, "loss": 1.9746, "step": 2704 }, { "epoch": 8.868852459016393, "grad_norm": 22.39229393005371, "learning_rate": 1.981989544435051e-05, "loss": 1.874, "step": 2705 }, { "epoch": 8.872131147540983, "grad_norm": 9.339024543762207, "learning_rate": 1.9819694761085937e-05, "loss": 2.1646, "step": 2706 }, { "epoch": 8.875409836065574, "grad_norm": 8.285297393798828, "learning_rate": 1.9819493967094097e-05, "loss": 2.0918, "step": 2707 }, { "epoch": 8.878688524590164, "grad_norm": 10.69788646697998, "learning_rate": 1.9819293062377257e-05, "loss": 2.0151, "step": 2708 }, { "epoch": 8.881967213114754, "grad_norm": 9.374571800231934, "learning_rate": 1.9819092046937676e-05, "loss": 1.8433, "step": 2709 }, { "epoch": 8.885245901639344, "grad_norm": 7.721468448638916, "learning_rate": 1.981889092077762e-05, "loss": 2.001, "step": 2710 }, { "epoch": 8.888524590163934, "grad_norm": 10.987555503845215, "learning_rate": 1.9818689683899362e-05, "loss": 2.0483, "step": 2711 }, { "epoch": 8.891803278688524, "grad_norm": 10.322527885437012, "learning_rate": 1.981848833630517e-05, "loss": 1.9385, "step": 2712 }, { "epoch": 8.895081967213114, "grad_norm": 9.779792785644531, "learning_rate": 1.9818286877997315e-05, "loss": 1.7312, "step": 2713 }, { "epoch": 8.898360655737704, "grad_norm": 7.533933162689209, "learning_rate": 1.9818085308978064e-05, "loss": 2.0518, "step": 2714 }, { "epoch": 8.901639344262295, "grad_norm": 12.308555603027344, "learning_rate": 1.9817883629249693e-05, "loss": 1.9126, "step": 2715 }, { "epoch": 8.904918032786885, "grad_norm": 8.448444366455078, "learning_rate": 1.9817681838814478e-05, "loss": 2.0239, "step": 2716 }, { "epoch": 8.908196721311475, "grad_norm": 8.740413665771484, "learning_rate": 1.9817479937674692e-05, "loss": 1.8193, "step": 2717 }, { "epoch": 8.911475409836065, "grad_norm": 13.604113578796387, "learning_rate": 1.981727792583261e-05, "loss": 2.0908, "step": 2718 }, { "epoch": 8.914754098360655, "grad_norm": 11.382268905639648, "learning_rate": 1.9817075803290514e-05, "loss": 1.916, "step": 2719 }, { "epoch": 8.918032786885245, "grad_norm": 7.754290580749512, "learning_rate": 1.981687357005068e-05, "loss": 1.8101, "step": 2720 }, { "epoch": 8.921311475409835, "grad_norm": 9.99045181274414, "learning_rate": 1.9816671226115388e-05, "loss": 1.9839, "step": 2721 }, { "epoch": 8.924590163934425, "grad_norm": 14.659907341003418, "learning_rate": 1.9816468771486924e-05, "loss": 1.9463, "step": 2722 }, { "epoch": 8.927868852459017, "grad_norm": 6.895102500915527, "learning_rate": 1.9816266206167568e-05, "loss": 1.9917, "step": 2723 }, { "epoch": 8.931147540983606, "grad_norm": 9.081798553466797, "learning_rate": 1.9816063530159603e-05, "loss": 1.8828, "step": 2724 }, { "epoch": 8.934426229508198, "grad_norm": 10.692219734191895, "learning_rate": 1.9815860743465312e-05, "loss": 1.9268, "step": 2725 }, { "epoch": 8.937704918032788, "grad_norm": 15.525472640991211, "learning_rate": 1.981565784608699e-05, "loss": 2.063, "step": 2726 }, { "epoch": 8.940983606557378, "grad_norm": 9.132908821105957, "learning_rate": 1.9815454838026918e-05, "loss": 2.1094, "step": 2727 }, { "epoch": 8.944262295081968, "grad_norm": 14.807031631469727, "learning_rate": 1.9815251719287388e-05, "loss": 2.04, "step": 2728 }, { "epoch": 8.947540983606558, "grad_norm": 8.721817016601562, "learning_rate": 1.981504848987069e-05, "loss": 1.978, "step": 2729 }, { "epoch": 8.950819672131148, "grad_norm": 7.830840587615967, "learning_rate": 1.9814845149779117e-05, "loss": 1.8545, "step": 2730 }, { "epoch": 8.954098360655738, "grad_norm": 9.933076858520508, "learning_rate": 1.9814641699014957e-05, "loss": 2.1313, "step": 2731 }, { "epoch": 8.957377049180328, "grad_norm": 10.480230331420898, "learning_rate": 1.9814438137580507e-05, "loss": 2.0415, "step": 2732 }, { "epoch": 8.960655737704919, "grad_norm": 11.561656951904297, "learning_rate": 1.9814234465478063e-05, "loss": 1.7717, "step": 2733 }, { "epoch": 8.963934426229509, "grad_norm": 9.89234733581543, "learning_rate": 1.9814030682709923e-05, "loss": 1.9224, "step": 2734 }, { "epoch": 8.967213114754099, "grad_norm": 6.22161865234375, "learning_rate": 1.981382678927838e-05, "loss": 2.0518, "step": 2735 }, { "epoch": 8.970491803278689, "grad_norm": 8.48601245880127, "learning_rate": 1.981362278518574e-05, "loss": 2.2231, "step": 2736 }, { "epoch": 8.973770491803279, "grad_norm": 10.934041023254395, "learning_rate": 1.9813418670434298e-05, "loss": 1.8652, "step": 2737 }, { "epoch": 8.97704918032787, "grad_norm": 19.30779266357422, "learning_rate": 1.9813214445026357e-05, "loss": 1.9146, "step": 2738 }, { "epoch": 8.98032786885246, "grad_norm": 6.988764762878418, "learning_rate": 1.9813010108964218e-05, "loss": 2.0137, "step": 2739 }, { "epoch": 8.98360655737705, "grad_norm": 9.023285865783691, "learning_rate": 1.981280566225019e-05, "loss": 2.0085, "step": 2740 }, { "epoch": 8.98688524590164, "grad_norm": 9.15458869934082, "learning_rate": 1.9812601104886572e-05, "loss": 1.6311, "step": 2741 }, { "epoch": 8.99016393442623, "grad_norm": 9.389535903930664, "learning_rate": 1.9812396436875677e-05, "loss": 2.145, "step": 2742 }, { "epoch": 8.99344262295082, "grad_norm": 9.015975952148438, "learning_rate": 1.9812191658219808e-05, "loss": 1.9966, "step": 2743 }, { "epoch": 8.99672131147541, "grad_norm": 7.791294574737549, "learning_rate": 1.9811986768921278e-05, "loss": 2.0327, "step": 2744 }, { "epoch": 9.0, "grad_norm": 12.89783000946045, "learning_rate": 1.9811781768982392e-05, "loss": 1.8662, "step": 2745 }, { "epoch": 9.00327868852459, "grad_norm": 9.808492660522461, "learning_rate": 1.9811576658405465e-05, "loss": 1.8408, "step": 2746 }, { "epoch": 9.00655737704918, "grad_norm": 9.002872467041016, "learning_rate": 1.9811371437192815e-05, "loss": 1.9302, "step": 2747 }, { "epoch": 9.00983606557377, "grad_norm": 10.78365707397461, "learning_rate": 1.9811166105346746e-05, "loss": 1.981, "step": 2748 }, { "epoch": 9.01311475409836, "grad_norm": 10.820619583129883, "learning_rate": 1.9810960662869578e-05, "loss": 1.8413, "step": 2749 }, { "epoch": 9.01639344262295, "grad_norm": 7.686802864074707, "learning_rate": 1.981075510976363e-05, "loss": 2.0557, "step": 2750 }, { "epoch": 9.01967213114754, "grad_norm": 7.461746692657471, "learning_rate": 1.9810549446031216e-05, "loss": 1.865, "step": 2751 }, { "epoch": 9.02295081967213, "grad_norm": 11.28357219696045, "learning_rate": 1.9810343671674657e-05, "loss": 1.8242, "step": 2752 }, { "epoch": 9.026229508196721, "grad_norm": 8.852932929992676, "learning_rate": 1.9810137786696273e-05, "loss": 1.6738, "step": 2753 }, { "epoch": 9.029508196721311, "grad_norm": 7.925939083099365, "learning_rate": 1.9809931791098384e-05, "loss": 2.0693, "step": 2754 }, { "epoch": 9.032786885245901, "grad_norm": 12.214740753173828, "learning_rate": 1.9809725684883315e-05, "loss": 1.7261, "step": 2755 }, { "epoch": 9.036065573770491, "grad_norm": 9.899089813232422, "learning_rate": 1.9809519468053394e-05, "loss": 1.8618, "step": 2756 }, { "epoch": 9.039344262295081, "grad_norm": 9.877321243286133, "learning_rate": 1.9809313140610938e-05, "loss": 1.7598, "step": 2757 }, { "epoch": 9.042622950819672, "grad_norm": 10.57270336151123, "learning_rate": 1.9809106702558277e-05, "loss": 1.8589, "step": 2758 }, { "epoch": 9.045901639344262, "grad_norm": 9.555529594421387, "learning_rate": 1.9808900153897737e-05, "loss": 2.0127, "step": 2759 }, { "epoch": 9.049180327868852, "grad_norm": 10.927156448364258, "learning_rate": 1.980869349463165e-05, "loss": 2.1821, "step": 2760 }, { "epoch": 9.052459016393442, "grad_norm": 8.738113403320312, "learning_rate": 1.980848672476235e-05, "loss": 1.9458, "step": 2761 }, { "epoch": 9.055737704918032, "grad_norm": 29.258440017700195, "learning_rate": 1.9808279844292156e-05, "loss": 1.8721, "step": 2762 }, { "epoch": 9.059016393442622, "grad_norm": 8.10629940032959, "learning_rate": 1.9808072853223414e-05, "loss": 2.0024, "step": 2763 }, { "epoch": 9.062295081967212, "grad_norm": 8.670138359069824, "learning_rate": 1.980786575155845e-05, "loss": 1.8525, "step": 2764 }, { "epoch": 9.065573770491802, "grad_norm": 9.514972686767578, "learning_rate": 1.9807658539299605e-05, "loss": 1.896, "step": 2765 }, { "epoch": 9.068852459016393, "grad_norm": 8.451119422912598, "learning_rate": 1.9807451216449213e-05, "loss": 1.7812, "step": 2766 }, { "epoch": 9.072131147540984, "grad_norm": 7.841479301452637, "learning_rate": 1.980724378300961e-05, "loss": 1.9033, "step": 2767 }, { "epoch": 9.075409836065575, "grad_norm": 9.087882995605469, "learning_rate": 1.9807036238983137e-05, "loss": 1.9741, "step": 2768 }, { "epoch": 9.078688524590165, "grad_norm": 9.658917427062988, "learning_rate": 1.980682858437213e-05, "loss": 1.9609, "step": 2769 }, { "epoch": 9.081967213114755, "grad_norm": 8.648221015930176, "learning_rate": 1.980662081917894e-05, "loss": 1.9775, "step": 2770 }, { "epoch": 9.085245901639345, "grad_norm": 9.325286865234375, "learning_rate": 1.98064129434059e-05, "loss": 1.9072, "step": 2771 }, { "epoch": 9.088524590163935, "grad_norm": 9.133612632751465, "learning_rate": 1.980620495705536e-05, "loss": 1.8198, "step": 2772 }, { "epoch": 9.091803278688525, "grad_norm": 9.609594345092773, "learning_rate": 1.9805996860129658e-05, "loss": 1.9883, "step": 2773 }, { "epoch": 9.095081967213115, "grad_norm": 9.565534591674805, "learning_rate": 1.980578865263115e-05, "loss": 1.8672, "step": 2774 }, { "epoch": 9.098360655737705, "grad_norm": 8.39312744140625, "learning_rate": 1.9805580334562182e-05, "loss": 2.084, "step": 2775 }, { "epoch": 9.101639344262296, "grad_norm": 7.088196754455566, "learning_rate": 1.9805371905925097e-05, "loss": 2.0276, "step": 2776 }, { "epoch": 9.104918032786886, "grad_norm": 11.486749649047852, "learning_rate": 1.9805163366722247e-05, "loss": 2.0757, "step": 2777 }, { "epoch": 9.108196721311476, "grad_norm": 7.934074878692627, "learning_rate": 1.980495471695599e-05, "loss": 1.9231, "step": 2778 }, { "epoch": 9.111475409836066, "grad_norm": 16.815006256103516, "learning_rate": 1.9804745956628674e-05, "loss": 1.9663, "step": 2779 }, { "epoch": 9.114754098360656, "grad_norm": 13.937284469604492, "learning_rate": 1.980453708574265e-05, "loss": 2.0474, "step": 2780 }, { "epoch": 9.118032786885246, "grad_norm": 8.211224555969238, "learning_rate": 1.9804328104300275e-05, "loss": 1.729, "step": 2781 }, { "epoch": 9.121311475409836, "grad_norm": 8.337850570678711, "learning_rate": 1.9804119012303907e-05, "loss": 2.0095, "step": 2782 }, { "epoch": 9.124590163934426, "grad_norm": 10.58659553527832, "learning_rate": 1.9803909809755905e-05, "loss": 1.7681, "step": 2783 }, { "epoch": 9.127868852459017, "grad_norm": 8.719640731811523, "learning_rate": 1.9803700496658627e-05, "loss": 2.0176, "step": 2784 }, { "epoch": 9.131147540983607, "grad_norm": 8.992053031921387, "learning_rate": 1.9803491073014433e-05, "loss": 1.8604, "step": 2785 }, { "epoch": 9.134426229508197, "grad_norm": 10.294207572937012, "learning_rate": 1.9803281538825683e-05, "loss": 1.8408, "step": 2786 }, { "epoch": 9.137704918032787, "grad_norm": 9.366596221923828, "learning_rate": 1.980307189409474e-05, "loss": 1.8643, "step": 2787 }, { "epoch": 9.140983606557377, "grad_norm": 9.902997016906738, "learning_rate": 1.980286213882397e-05, "loss": 1.8999, "step": 2788 }, { "epoch": 9.144262295081967, "grad_norm": 9.708131790161133, "learning_rate": 1.9802652273015735e-05, "loss": 1.8672, "step": 2789 }, { "epoch": 9.147540983606557, "grad_norm": 7.32394552230835, "learning_rate": 1.9802442296672402e-05, "loss": 2.0833, "step": 2790 }, { "epoch": 9.150819672131147, "grad_norm": 10.758452415466309, "learning_rate": 1.9802232209796345e-05, "loss": 1.9097, "step": 2791 }, { "epoch": 9.154098360655738, "grad_norm": 9.41072940826416, "learning_rate": 1.9802022012389925e-05, "loss": 1.8447, "step": 2792 }, { "epoch": 9.157377049180328, "grad_norm": 8.89735221862793, "learning_rate": 1.9801811704455517e-05, "loss": 2.1055, "step": 2793 }, { "epoch": 9.160655737704918, "grad_norm": 11.52293872833252, "learning_rate": 1.980160128599549e-05, "loss": 1.7485, "step": 2794 }, { "epoch": 9.163934426229508, "grad_norm": 11.70916748046875, "learning_rate": 1.9801390757012216e-05, "loss": 1.9092, "step": 2795 }, { "epoch": 9.167213114754098, "grad_norm": 8.925753593444824, "learning_rate": 1.9801180117508076e-05, "loss": 1.9116, "step": 2796 }, { "epoch": 9.170491803278688, "grad_norm": 8.991741180419922, "learning_rate": 1.9800969367485435e-05, "loss": 1.8374, "step": 2797 }, { "epoch": 9.173770491803278, "grad_norm": 13.746184349060059, "learning_rate": 1.980075850694667e-05, "loss": 1.812, "step": 2798 }, { "epoch": 9.177049180327868, "grad_norm": 8.263548851013184, "learning_rate": 1.9800547535894168e-05, "loss": 1.8828, "step": 2799 }, { "epoch": 9.180327868852459, "grad_norm": 7.742788791656494, "learning_rate": 1.98003364543303e-05, "loss": 1.9346, "step": 2800 }, { "epoch": 9.183606557377049, "grad_norm": 9.313488006591797, "learning_rate": 1.9800125262257452e-05, "loss": 1.8774, "step": 2801 }, { "epoch": 9.186885245901639, "grad_norm": 8.42726993560791, "learning_rate": 1.9799913959678e-05, "loss": 2.0254, "step": 2802 }, { "epoch": 9.190163934426229, "grad_norm": 20.985666275024414, "learning_rate": 1.979970254659433e-05, "loss": 1.8813, "step": 2803 }, { "epoch": 9.193442622950819, "grad_norm": 7.449527740478516, "learning_rate": 1.979949102300882e-05, "loss": 1.9004, "step": 2804 }, { "epoch": 9.19672131147541, "grad_norm": 25.555774688720703, "learning_rate": 1.9799279388923866e-05, "loss": 1.8979, "step": 2805 }, { "epoch": 9.2, "grad_norm": 7.9828338623046875, "learning_rate": 1.9799067644341844e-05, "loss": 1.9219, "step": 2806 }, { "epoch": 9.20327868852459, "grad_norm": 9.887417793273926, "learning_rate": 1.979885578926515e-05, "loss": 1.9336, "step": 2807 }, { "epoch": 9.20655737704918, "grad_norm": 9.238239288330078, "learning_rate": 1.9798643823696164e-05, "loss": 1.9346, "step": 2808 }, { "epoch": 9.20983606557377, "grad_norm": 9.941357612609863, "learning_rate": 1.9798431747637285e-05, "loss": 1.7908, "step": 2809 }, { "epoch": 9.21311475409836, "grad_norm": 8.88890552520752, "learning_rate": 1.9798219561090896e-05, "loss": 1.9355, "step": 2810 }, { "epoch": 9.216393442622952, "grad_norm": 9.31170654296875, "learning_rate": 1.9798007264059396e-05, "loss": 1.856, "step": 2811 }, { "epoch": 9.219672131147542, "grad_norm": 8.898874282836914, "learning_rate": 1.9797794856545177e-05, "loss": 2.0293, "step": 2812 }, { "epoch": 9.222950819672132, "grad_norm": 7.918620586395264, "learning_rate": 1.9797582338550635e-05, "loss": 1.9697, "step": 2813 }, { "epoch": 9.226229508196722, "grad_norm": 10.530033111572266, "learning_rate": 1.979736971007816e-05, "loss": 2.0239, "step": 2814 }, { "epoch": 9.229508196721312, "grad_norm": 6.800505638122559, "learning_rate": 1.979715697113016e-05, "loss": 1.835, "step": 2815 }, { "epoch": 9.232786885245902, "grad_norm": 10.414892196655273, "learning_rate": 1.9796944121709026e-05, "loss": 1.8696, "step": 2816 }, { "epoch": 9.236065573770492, "grad_norm": 8.866390228271484, "learning_rate": 1.9796731161817165e-05, "loss": 1.8105, "step": 2817 }, { "epoch": 9.239344262295083, "grad_norm": 7.678443908691406, "learning_rate": 1.9796518091456968e-05, "loss": 1.9761, "step": 2818 }, { "epoch": 9.242622950819673, "grad_norm": 7.540291786193848, "learning_rate": 1.9796304910630847e-05, "loss": 2.0542, "step": 2819 }, { "epoch": 9.245901639344263, "grad_norm": 8.693949699401855, "learning_rate": 1.9796091619341205e-05, "loss": 2.0078, "step": 2820 }, { "epoch": 9.249180327868853, "grad_norm": 6.115839958190918, "learning_rate": 1.979587821759044e-05, "loss": 1.9619, "step": 2821 }, { "epoch": 9.252459016393443, "grad_norm": 6.903177261352539, "learning_rate": 1.9795664705380963e-05, "loss": 1.854, "step": 2822 }, { "epoch": 9.255737704918033, "grad_norm": 8.751993179321289, "learning_rate": 1.9795451082715186e-05, "loss": 1.761, "step": 2823 }, { "epoch": 9.259016393442623, "grad_norm": 6.400798797607422, "learning_rate": 1.979523734959551e-05, "loss": 2.0757, "step": 2824 }, { "epoch": 9.262295081967213, "grad_norm": 10.089033126831055, "learning_rate": 1.979502350602435e-05, "loss": 1.939, "step": 2825 }, { "epoch": 9.265573770491804, "grad_norm": 7.146869659423828, "learning_rate": 1.979480955200411e-05, "loss": 1.9226, "step": 2826 }, { "epoch": 9.268852459016394, "grad_norm": 8.50538158416748, "learning_rate": 1.9794595487537218e-05, "loss": 1.8701, "step": 2827 }, { "epoch": 9.272131147540984, "grad_norm": 8.3353271484375, "learning_rate": 1.979438131262607e-05, "loss": 1.9893, "step": 2828 }, { "epoch": 9.275409836065574, "grad_norm": 6.960373401641846, "learning_rate": 1.9794167027273096e-05, "loss": 1.9048, "step": 2829 }, { "epoch": 9.278688524590164, "grad_norm": 9.492671012878418, "learning_rate": 1.97939526314807e-05, "loss": 1.7856, "step": 2830 }, { "epoch": 9.281967213114754, "grad_norm": 10.464335441589355, "learning_rate": 1.9793738125251305e-05, "loss": 1.9883, "step": 2831 }, { "epoch": 9.285245901639344, "grad_norm": 7.225924968719482, "learning_rate": 1.9793523508587332e-05, "loss": 1.8687, "step": 2832 }, { "epoch": 9.288524590163934, "grad_norm": 6.906606674194336, "learning_rate": 1.97933087814912e-05, "loss": 1.9712, "step": 2833 }, { "epoch": 9.291803278688525, "grad_norm": 8.269371032714844, "learning_rate": 1.9793093943965324e-05, "loss": 1.8862, "step": 2834 }, { "epoch": 9.295081967213115, "grad_norm": 9.121938705444336, "learning_rate": 1.9792878996012136e-05, "loss": 2.0688, "step": 2835 }, { "epoch": 9.298360655737705, "grad_norm": 7.386578559875488, "learning_rate": 1.979266393763405e-05, "loss": 1.9575, "step": 2836 }, { "epoch": 9.301639344262295, "grad_norm": 7.677590370178223, "learning_rate": 1.9792448768833502e-05, "loss": 1.8179, "step": 2837 }, { "epoch": 9.304918032786885, "grad_norm": 13.07077693939209, "learning_rate": 1.9792233489612908e-05, "loss": 1.9966, "step": 2838 }, { "epoch": 9.308196721311475, "grad_norm": 6.839227676391602, "learning_rate": 1.9792018099974705e-05, "loss": 1.7671, "step": 2839 }, { "epoch": 9.311475409836065, "grad_norm": 8.917329788208008, "learning_rate": 1.9791802599921315e-05, "loss": 1.937, "step": 2840 }, { "epoch": 9.314754098360655, "grad_norm": 9.701395034790039, "learning_rate": 1.9791586989455168e-05, "loss": 1.8774, "step": 2841 }, { "epoch": 9.318032786885245, "grad_norm": 6.574543476104736, "learning_rate": 1.97913712685787e-05, "loss": 1.7822, "step": 2842 }, { "epoch": 9.321311475409836, "grad_norm": 8.963311195373535, "learning_rate": 1.9791155437294337e-05, "loss": 1.7163, "step": 2843 }, { "epoch": 9.324590163934426, "grad_norm": 7.9752044677734375, "learning_rate": 1.9790939495604518e-05, "loss": 1.835, "step": 2844 }, { "epoch": 9.327868852459016, "grad_norm": 8.710990905761719, "learning_rate": 1.9790723443511675e-05, "loss": 2.0396, "step": 2845 }, { "epoch": 9.331147540983606, "grad_norm": 8.808270454406738, "learning_rate": 1.9790507281018246e-05, "loss": 1.8184, "step": 2846 }, { "epoch": 9.334426229508196, "grad_norm": 8.9406099319458, "learning_rate": 1.979029100812667e-05, "loss": 1.8252, "step": 2847 }, { "epoch": 9.337704918032786, "grad_norm": 10.234537124633789, "learning_rate": 1.979007462483938e-05, "loss": 1.9375, "step": 2848 }, { "epoch": 9.340983606557376, "grad_norm": 8.664788246154785, "learning_rate": 1.978985813115882e-05, "loss": 1.8296, "step": 2849 }, { "epoch": 9.344262295081966, "grad_norm": 7.439891338348389, "learning_rate": 1.978964152708743e-05, "loss": 1.9463, "step": 2850 }, { "epoch": 9.347540983606557, "grad_norm": 7.146973133087158, "learning_rate": 1.9789424812627658e-05, "loss": 1.8613, "step": 2851 }, { "epoch": 9.350819672131147, "grad_norm": 7.466193675994873, "learning_rate": 1.978920798778194e-05, "loss": 1.7905, "step": 2852 }, { "epoch": 9.354098360655737, "grad_norm": 8.38935375213623, "learning_rate": 1.9788991052552724e-05, "loss": 1.939, "step": 2853 }, { "epoch": 9.357377049180329, "grad_norm": 9.298920631408691, "learning_rate": 1.9788774006942453e-05, "loss": 1.9443, "step": 2854 }, { "epoch": 9.360655737704919, "grad_norm": 9.818501472473145, "learning_rate": 1.978855685095358e-05, "loss": 1.9404, "step": 2855 }, { "epoch": 9.363934426229509, "grad_norm": 7.147253036499023, "learning_rate": 1.978833958458855e-05, "loss": 1.9932, "step": 2856 }, { "epoch": 9.3672131147541, "grad_norm": 6.742755889892578, "learning_rate": 1.9788122207849815e-05, "loss": 1.9146, "step": 2857 }, { "epoch": 9.37049180327869, "grad_norm": 7.478897571563721, "learning_rate": 1.9787904720739825e-05, "loss": 1.9077, "step": 2858 }, { "epoch": 9.37377049180328, "grad_norm": 8.203721046447754, "learning_rate": 1.9787687123261032e-05, "loss": 1.7803, "step": 2859 }, { "epoch": 9.37704918032787, "grad_norm": 6.565639495849609, "learning_rate": 1.978746941541589e-05, "loss": 2.0122, "step": 2860 }, { "epoch": 9.38032786885246, "grad_norm": 8.202149391174316, "learning_rate": 1.9787251597206853e-05, "loss": 2.0112, "step": 2861 }, { "epoch": 9.38360655737705, "grad_norm": 9.22158145904541, "learning_rate": 1.9787033668636376e-05, "loss": 1.9736, "step": 2862 }, { "epoch": 9.38688524590164, "grad_norm": 9.744245529174805, "learning_rate": 1.9786815629706925e-05, "loss": 1.9858, "step": 2863 }, { "epoch": 9.39016393442623, "grad_norm": 9.365893363952637, "learning_rate": 1.9786597480420946e-05, "loss": 1.8652, "step": 2864 }, { "epoch": 9.39344262295082, "grad_norm": 8.498387336730957, "learning_rate": 1.978637922078091e-05, "loss": 1.8374, "step": 2865 }, { "epoch": 9.39672131147541, "grad_norm": 6.660123825073242, "learning_rate": 1.978616085078927e-05, "loss": 1.9375, "step": 2866 }, { "epoch": 9.4, "grad_norm": 8.166836738586426, "learning_rate": 1.978594237044849e-05, "loss": 1.9165, "step": 2867 }, { "epoch": 9.40327868852459, "grad_norm": 8.077591896057129, "learning_rate": 1.9785723779761037e-05, "loss": 2.0386, "step": 2868 }, { "epoch": 9.40655737704918, "grad_norm": 7.919041633605957, "learning_rate": 1.9785505078729374e-05, "loss": 1.9644, "step": 2869 }, { "epoch": 9.40983606557377, "grad_norm": 8.098868370056152, "learning_rate": 1.9785286267355962e-05, "loss": 1.8486, "step": 2870 }, { "epoch": 9.41311475409836, "grad_norm": 8.34315299987793, "learning_rate": 1.978506734564328e-05, "loss": 1.98, "step": 2871 }, { "epoch": 9.416393442622951, "grad_norm": 8.507657051086426, "learning_rate": 1.9784848313593788e-05, "loss": 1.9829, "step": 2872 }, { "epoch": 9.419672131147541, "grad_norm": 11.6654634475708, "learning_rate": 1.9784629171209956e-05, "loss": 1.8262, "step": 2873 }, { "epoch": 9.422950819672131, "grad_norm": 7.484895706176758, "learning_rate": 1.9784409918494255e-05, "loss": 1.875, "step": 2874 }, { "epoch": 9.426229508196721, "grad_norm": 14.3984375, "learning_rate": 1.9784190555449162e-05, "loss": 1.9575, "step": 2875 }, { "epoch": 9.429508196721311, "grad_norm": 6.687252521514893, "learning_rate": 1.9783971082077147e-05, "loss": 1.9573, "step": 2876 }, { "epoch": 9.432786885245902, "grad_norm": 6.991376876831055, "learning_rate": 1.9783751498380686e-05, "loss": 2.0864, "step": 2877 }, { "epoch": 9.436065573770492, "grad_norm": 7.842502117156982, "learning_rate": 1.9783531804362252e-05, "loss": 1.9888, "step": 2878 }, { "epoch": 9.439344262295082, "grad_norm": 7.695973873138428, "learning_rate": 1.9783312000024327e-05, "loss": 1.907, "step": 2879 }, { "epoch": 9.442622950819672, "grad_norm": 8.950321197509766, "learning_rate": 1.9783092085369383e-05, "loss": 1.7944, "step": 2880 }, { "epoch": 9.445901639344262, "grad_norm": 8.950531959533691, "learning_rate": 1.978287206039991e-05, "loss": 1.9824, "step": 2881 }, { "epoch": 9.449180327868852, "grad_norm": 6.649013519287109, "learning_rate": 1.978265192511838e-05, "loss": 1.917, "step": 2882 }, { "epoch": 9.452459016393442, "grad_norm": 11.803238868713379, "learning_rate": 1.9782431679527276e-05, "loss": 1.7512, "step": 2883 }, { "epoch": 9.455737704918032, "grad_norm": 7.0812859535217285, "learning_rate": 1.9782211323629085e-05, "loss": 2.0864, "step": 2884 }, { "epoch": 9.459016393442623, "grad_norm": 17.002365112304688, "learning_rate": 1.9781990857426293e-05, "loss": 1.8672, "step": 2885 }, { "epoch": 9.462295081967213, "grad_norm": 8.042252540588379, "learning_rate": 1.978177028092138e-05, "loss": 1.9771, "step": 2886 }, { "epoch": 9.465573770491803, "grad_norm": 8.821525573730469, "learning_rate": 1.978154959411684e-05, "loss": 1.6707, "step": 2887 }, { "epoch": 9.468852459016393, "grad_norm": 7.083796501159668, "learning_rate": 1.9781328797015153e-05, "loss": 1.8848, "step": 2888 }, { "epoch": 9.472131147540983, "grad_norm": 8.25986385345459, "learning_rate": 1.9781107889618818e-05, "loss": 1.8911, "step": 2889 }, { "epoch": 9.475409836065573, "grad_norm": 8.548906326293945, "learning_rate": 1.978088687193032e-05, "loss": 1.8354, "step": 2890 }, { "epoch": 9.478688524590163, "grad_norm": 14.76005744934082, "learning_rate": 1.978066574395215e-05, "loss": 1.8921, "step": 2891 }, { "epoch": 9.481967213114753, "grad_norm": 8.735086441040039, "learning_rate": 1.978044450568681e-05, "loss": 1.9604, "step": 2892 }, { "epoch": 9.485245901639344, "grad_norm": 9.090287208557129, "learning_rate": 1.9780223157136786e-05, "loss": 1.8838, "step": 2893 }, { "epoch": 9.488524590163934, "grad_norm": 8.392118453979492, "learning_rate": 1.978000169830458e-05, "loss": 2.0366, "step": 2894 }, { "epoch": 9.491803278688524, "grad_norm": 7.294113636016846, "learning_rate": 1.9779780129192682e-05, "loss": 2.1133, "step": 2895 }, { "epoch": 9.495081967213114, "grad_norm": 8.169194221496582, "learning_rate": 1.9779558449803597e-05, "loss": 1.7634, "step": 2896 }, { "epoch": 9.498360655737706, "grad_norm": 7.540098190307617, "learning_rate": 1.9779336660139824e-05, "loss": 1.9868, "step": 2897 }, { "epoch": 9.501639344262294, "grad_norm": 9.914783477783203, "learning_rate": 1.977911476020386e-05, "loss": 1.9363, "step": 2898 }, { "epoch": 9.504918032786886, "grad_norm": 9.958855628967285, "learning_rate": 1.9778892749998207e-05, "loss": 1.8994, "step": 2899 }, { "epoch": 9.508196721311476, "grad_norm": 9.430990219116211, "learning_rate": 1.9778670629525375e-05, "loss": 1.9526, "step": 2900 }, { "epoch": 9.511475409836066, "grad_norm": 10.512418746948242, "learning_rate": 1.9778448398787863e-05, "loss": 1.876, "step": 2901 }, { "epoch": 9.514754098360656, "grad_norm": 8.22861099243164, "learning_rate": 1.9778226057788177e-05, "loss": 1.8066, "step": 2902 }, { "epoch": 9.518032786885247, "grad_norm": 6.663854598999023, "learning_rate": 1.9778003606528825e-05, "loss": 2.27, "step": 2903 }, { "epoch": 9.521311475409837, "grad_norm": 8.949197769165039, "learning_rate": 1.9777781045012317e-05, "loss": 1.9614, "step": 2904 }, { "epoch": 9.524590163934427, "grad_norm": 9.002121925354004, "learning_rate": 1.9777558373241162e-05, "loss": 1.8062, "step": 2905 }, { "epoch": 9.527868852459017, "grad_norm": 6.693758487701416, "learning_rate": 1.977733559121787e-05, "loss": 1.741, "step": 2906 }, { "epoch": 9.531147540983607, "grad_norm": 8.66898250579834, "learning_rate": 1.977711269894495e-05, "loss": 1.9922, "step": 2907 }, { "epoch": 9.534426229508197, "grad_norm": 8.533438682556152, "learning_rate": 1.9776889696424923e-05, "loss": 1.7686, "step": 2908 }, { "epoch": 9.537704918032787, "grad_norm": 16.081865310668945, "learning_rate": 1.97766665836603e-05, "loss": 2.146, "step": 2909 }, { "epoch": 9.540983606557377, "grad_norm": 9.19676399230957, "learning_rate": 1.9776443360653595e-05, "loss": 1.8164, "step": 2910 }, { "epoch": 9.544262295081968, "grad_norm": 8.258177757263184, "learning_rate": 1.9776220027407323e-05, "loss": 2.1182, "step": 2911 }, { "epoch": 9.547540983606558, "grad_norm": 8.40811538696289, "learning_rate": 1.977599658392401e-05, "loss": 2.0601, "step": 2912 }, { "epoch": 9.550819672131148, "grad_norm": 10.068113327026367, "learning_rate": 1.9775773030206168e-05, "loss": 1.8672, "step": 2913 }, { "epoch": 9.554098360655738, "grad_norm": 8.073887825012207, "learning_rate": 1.977554936625632e-05, "loss": 1.8696, "step": 2914 }, { "epoch": 9.557377049180328, "grad_norm": 7.63682746887207, "learning_rate": 1.977532559207699e-05, "loss": 1.8037, "step": 2915 }, { "epoch": 9.560655737704918, "grad_norm": 7.166914463043213, "learning_rate": 1.97751017076707e-05, "loss": 2.0479, "step": 2916 }, { "epoch": 9.563934426229508, "grad_norm": 10.44144344329834, "learning_rate": 1.977487771303998e-05, "loss": 1.8135, "step": 2917 }, { "epoch": 9.567213114754098, "grad_norm": 9.57508373260498, "learning_rate": 1.9774653608187343e-05, "loss": 1.876, "step": 2918 }, { "epoch": 9.570491803278689, "grad_norm": 8.03658390045166, "learning_rate": 1.9774429393115327e-05, "loss": 1.8589, "step": 2919 }, { "epoch": 9.573770491803279, "grad_norm": 6.743293285369873, "learning_rate": 1.9774205067826453e-05, "loss": 1.9717, "step": 2920 }, { "epoch": 9.577049180327869, "grad_norm": 8.526448249816895, "learning_rate": 1.9773980632323257e-05, "loss": 1.9453, "step": 2921 }, { "epoch": 9.580327868852459, "grad_norm": 12.219703674316406, "learning_rate": 1.977375608660827e-05, "loss": 1.8979, "step": 2922 }, { "epoch": 9.583606557377049, "grad_norm": 7.323392391204834, "learning_rate": 1.9773531430684017e-05, "loss": 2.186, "step": 2923 }, { "epoch": 9.58688524590164, "grad_norm": 9.747973442077637, "learning_rate": 1.9773306664553033e-05, "loss": 2.0708, "step": 2924 }, { "epoch": 9.59016393442623, "grad_norm": 10.076577186584473, "learning_rate": 1.977308178821786e-05, "loss": 1.8018, "step": 2925 }, { "epoch": 9.59344262295082, "grad_norm": 9.095359802246094, "learning_rate": 1.9772856801681026e-05, "loss": 2.1157, "step": 2926 }, { "epoch": 9.59672131147541, "grad_norm": 8.739762306213379, "learning_rate": 1.977263170494507e-05, "loss": 1.7939, "step": 2927 }, { "epoch": 9.6, "grad_norm": 6.914953708648682, "learning_rate": 1.977240649801253e-05, "loss": 1.9424, "step": 2928 }, { "epoch": 9.60327868852459, "grad_norm": 10.039566993713379, "learning_rate": 1.9772181180885947e-05, "loss": 1.8071, "step": 2929 }, { "epoch": 9.60655737704918, "grad_norm": 7.6404500007629395, "learning_rate": 1.9771955753567866e-05, "loss": 1.9292, "step": 2930 }, { "epoch": 9.60983606557377, "grad_norm": 6.515293598175049, "learning_rate": 1.9771730216060816e-05, "loss": 1.9717, "step": 2931 }, { "epoch": 9.61311475409836, "grad_norm": 9.885704040527344, "learning_rate": 1.9771504568367347e-05, "loss": 1.7725, "step": 2932 }, { "epoch": 9.61639344262295, "grad_norm": 6.148561000823975, "learning_rate": 1.977127881049001e-05, "loss": 1.9189, "step": 2933 }, { "epoch": 9.61967213114754, "grad_norm": 33.90431213378906, "learning_rate": 1.977105294243134e-05, "loss": 1.8135, "step": 2934 }, { "epoch": 9.62295081967213, "grad_norm": 8.811233520507812, "learning_rate": 1.9770826964193892e-05, "loss": 1.9097, "step": 2935 }, { "epoch": 9.62622950819672, "grad_norm": 7.48635196685791, "learning_rate": 1.977060087578021e-05, "loss": 1.8979, "step": 2936 }, { "epoch": 9.62950819672131, "grad_norm": 11.464388847351074, "learning_rate": 1.9770374677192845e-05, "loss": 1.8423, "step": 2937 }, { "epoch": 9.6327868852459, "grad_norm": 8.315145492553711, "learning_rate": 1.9770148368434345e-05, "loss": 1.8896, "step": 2938 }, { "epoch": 9.636065573770491, "grad_norm": 8.168401718139648, "learning_rate": 1.9769921949507264e-05, "loss": 1.8965, "step": 2939 }, { "epoch": 9.639344262295083, "grad_norm": 24.18089485168457, "learning_rate": 1.9769695420414156e-05, "loss": 1.793, "step": 2940 }, { "epoch": 9.642622950819671, "grad_norm": 8.871896743774414, "learning_rate": 1.9769468781157576e-05, "loss": 1.9097, "step": 2941 }, { "epoch": 9.645901639344263, "grad_norm": 9.087635040283203, "learning_rate": 1.976924203174007e-05, "loss": 1.9995, "step": 2942 }, { "epoch": 9.649180327868853, "grad_norm": 8.407702445983887, "learning_rate": 1.976901517216421e-05, "loss": 1.9019, "step": 2943 }, { "epoch": 9.652459016393443, "grad_norm": 6.9575886726379395, "learning_rate": 1.9768788202432546e-05, "loss": 1.8149, "step": 2944 }, { "epoch": 9.655737704918034, "grad_norm": 7.675687789916992, "learning_rate": 1.9768561122547635e-05, "loss": 2.0469, "step": 2945 }, { "epoch": 9.659016393442624, "grad_norm": 6.087562084197998, "learning_rate": 1.976833393251204e-05, "loss": 2.2246, "step": 2946 }, { "epoch": 9.662295081967214, "grad_norm": 7.3006391525268555, "learning_rate": 1.976810663232833e-05, "loss": 1.9221, "step": 2947 }, { "epoch": 9.665573770491804, "grad_norm": 9.312244415283203, "learning_rate": 1.976787922199906e-05, "loss": 1.9136, "step": 2948 }, { "epoch": 9.668852459016394, "grad_norm": 7.59458589553833, "learning_rate": 1.976765170152679e-05, "loss": 2.0205, "step": 2949 }, { "epoch": 9.672131147540984, "grad_norm": 8.4465913772583, "learning_rate": 1.9767424070914095e-05, "loss": 1.7549, "step": 2950 }, { "epoch": 9.675409836065574, "grad_norm": 7.745428085327148, "learning_rate": 1.9767196330163538e-05, "loss": 1.9512, "step": 2951 }, { "epoch": 9.678688524590164, "grad_norm": 8.132085800170898, "learning_rate": 1.9766968479277684e-05, "loss": 1.8638, "step": 2952 }, { "epoch": 9.681967213114755, "grad_norm": 7.139786720275879, "learning_rate": 1.976674051825911e-05, "loss": 1.8167, "step": 2953 }, { "epoch": 9.685245901639345, "grad_norm": 39.30353927612305, "learning_rate": 1.9766512447110377e-05, "loss": 1.8208, "step": 2954 }, { "epoch": 9.688524590163935, "grad_norm": 7.136844158172607, "learning_rate": 1.9766284265834064e-05, "loss": 1.9187, "step": 2955 }, { "epoch": 9.691803278688525, "grad_norm": 9.756486892700195, "learning_rate": 1.976605597443274e-05, "loss": 1.6724, "step": 2956 }, { "epoch": 9.695081967213115, "grad_norm": 9.802692413330078, "learning_rate": 1.9765827572908983e-05, "loss": 1.8867, "step": 2957 }, { "epoch": 9.698360655737705, "grad_norm": 17.979616165161133, "learning_rate": 1.9765599061265364e-05, "loss": 1.9419, "step": 2958 }, { "epoch": 9.701639344262295, "grad_norm": 16.8748722076416, "learning_rate": 1.9765370439504465e-05, "loss": 1.9487, "step": 2959 }, { "epoch": 9.704918032786885, "grad_norm": 14.24438190460205, "learning_rate": 1.9765141707628858e-05, "loss": 1.8271, "step": 2960 }, { "epoch": 9.708196721311475, "grad_norm": 9.479181289672852, "learning_rate": 1.9764912865641127e-05, "loss": 2.0532, "step": 2961 }, { "epoch": 9.711475409836066, "grad_norm": 14.467376708984375, "learning_rate": 1.976468391354385e-05, "loss": 1.8828, "step": 2962 }, { "epoch": 9.714754098360656, "grad_norm": 8.951837539672852, "learning_rate": 1.9764454851339606e-05, "loss": 1.8945, "step": 2963 }, { "epoch": 9.718032786885246, "grad_norm": 11.43278694152832, "learning_rate": 1.9764225679030987e-05, "loss": 1.9502, "step": 2964 }, { "epoch": 9.721311475409836, "grad_norm": 13.66273021697998, "learning_rate": 1.9763996396620567e-05, "loss": 1.9585, "step": 2965 }, { "epoch": 9.724590163934426, "grad_norm": 7.600408554077148, "learning_rate": 1.9763767004110937e-05, "loss": 2.2031, "step": 2966 }, { "epoch": 9.727868852459016, "grad_norm": 8.434077262878418, "learning_rate": 1.976353750150468e-05, "loss": 1.9067, "step": 2967 }, { "epoch": 9.731147540983606, "grad_norm": 9.957784652709961, "learning_rate": 1.9763307888804387e-05, "loss": 1.9219, "step": 2968 }, { "epoch": 9.734426229508196, "grad_norm": 7.059080600738525, "learning_rate": 1.976307816601265e-05, "loss": 1.8223, "step": 2969 }, { "epoch": 9.737704918032787, "grad_norm": 9.20995044708252, "learning_rate": 1.9762848333132056e-05, "loss": 1.7925, "step": 2970 }, { "epoch": 9.740983606557377, "grad_norm": 25.602602005004883, "learning_rate": 1.976261839016519e-05, "loss": 1.9653, "step": 2971 }, { "epoch": 9.744262295081967, "grad_norm": 9.632059097290039, "learning_rate": 1.9762388337114654e-05, "loss": 1.6587, "step": 2972 }, { "epoch": 9.747540983606557, "grad_norm": 9.437577247619629, "learning_rate": 1.9762158173983038e-05, "loss": 2.0996, "step": 2973 }, { "epoch": 9.750819672131147, "grad_norm": 10.441469192504883, "learning_rate": 1.976192790077294e-05, "loss": 1.7266, "step": 2974 }, { "epoch": 9.754098360655737, "grad_norm": 9.748167991638184, "learning_rate": 1.9761697517486955e-05, "loss": 1.7227, "step": 2975 }, { "epoch": 9.757377049180327, "grad_norm": 13.145956993103027, "learning_rate": 1.976146702412768e-05, "loss": 2.1138, "step": 2976 }, { "epoch": 9.760655737704917, "grad_norm": 8.729939460754395, "learning_rate": 1.976123642069772e-05, "loss": 1.8335, "step": 2977 }, { "epoch": 9.763934426229508, "grad_norm": 8.949777603149414, "learning_rate": 1.9761005707199662e-05, "loss": 1.8794, "step": 2978 }, { "epoch": 9.767213114754098, "grad_norm": 7.298949241638184, "learning_rate": 1.976077488363612e-05, "loss": 1.9143, "step": 2979 }, { "epoch": 9.770491803278688, "grad_norm": 11.483954429626465, "learning_rate": 1.9760543950009693e-05, "loss": 1.9263, "step": 2980 }, { "epoch": 9.773770491803278, "grad_norm": 6.75706148147583, "learning_rate": 1.9760312906322984e-05, "loss": 1.9106, "step": 2981 }, { "epoch": 9.777049180327868, "grad_norm": 12.492101669311523, "learning_rate": 1.9760081752578598e-05, "loss": 1.9268, "step": 2982 }, { "epoch": 9.780327868852458, "grad_norm": 7.369908332824707, "learning_rate": 1.9759850488779143e-05, "loss": 1.7676, "step": 2983 }, { "epoch": 9.783606557377048, "grad_norm": 11.350968360900879, "learning_rate": 1.9759619114927228e-05, "loss": 1.791, "step": 2984 }, { "epoch": 9.78688524590164, "grad_norm": 7.225944995880127, "learning_rate": 1.9759387631025455e-05, "loss": 1.9131, "step": 2985 }, { "epoch": 9.790163934426229, "grad_norm": 15.917712211608887, "learning_rate": 1.9759156037076443e-05, "loss": 1.8228, "step": 2986 }, { "epoch": 9.79344262295082, "grad_norm": 7.641489028930664, "learning_rate": 1.97589243330828e-05, "loss": 2.0688, "step": 2987 }, { "epoch": 9.79672131147541, "grad_norm": 6.346954345703125, "learning_rate": 1.9758692519047136e-05, "loss": 2.0547, "step": 2988 }, { "epoch": 9.8, "grad_norm": 7.196883678436279, "learning_rate": 1.9758460594972068e-05, "loss": 1.998, "step": 2989 }, { "epoch": 9.80327868852459, "grad_norm": 13.015891075134277, "learning_rate": 1.9758228560860213e-05, "loss": 1.9248, "step": 2990 }, { "epoch": 9.806557377049181, "grad_norm": 6.984917163848877, "learning_rate": 1.975799641671418e-05, "loss": 1.7837, "step": 2991 }, { "epoch": 9.809836065573771, "grad_norm": 7.145966529846191, "learning_rate": 1.9757764162536597e-05, "loss": 1.8115, "step": 2992 }, { "epoch": 9.813114754098361, "grad_norm": 6.843877792358398, "learning_rate": 1.9757531798330074e-05, "loss": 1.8794, "step": 2993 }, { "epoch": 9.816393442622951, "grad_norm": 7.630852222442627, "learning_rate": 1.9757299324097237e-05, "loss": 1.9497, "step": 2994 }, { "epoch": 9.819672131147541, "grad_norm": 8.207930564880371, "learning_rate": 1.9757066739840705e-05, "loss": 1.8286, "step": 2995 }, { "epoch": 9.822950819672132, "grad_norm": 9.979930877685547, "learning_rate": 1.97568340455631e-05, "loss": 1.8301, "step": 2996 }, { "epoch": 9.826229508196722, "grad_norm": 7.806500434875488, "learning_rate": 1.9756601241267045e-05, "loss": 2.0532, "step": 2997 }, { "epoch": 9.829508196721312, "grad_norm": 10.488140106201172, "learning_rate": 1.975636832695517e-05, "loss": 1.9585, "step": 2998 }, { "epoch": 9.832786885245902, "grad_norm": 6.874948501586914, "learning_rate": 1.9756135302630094e-05, "loss": 1.9373, "step": 2999 }, { "epoch": 9.836065573770492, "grad_norm": 9.277961730957031, "learning_rate": 1.9755902168294453e-05, "loss": 1.8418, "step": 3000 }, { "epoch": 9.839344262295082, "grad_norm": 9.85040283203125, "learning_rate": 1.9755668923950864e-05, "loss": 1.8467, "step": 3001 }, { "epoch": 9.842622950819672, "grad_norm": 9.875801086425781, "learning_rate": 1.9755435569601972e-05, "loss": 1.8506, "step": 3002 }, { "epoch": 9.845901639344262, "grad_norm": 7.520758152008057, "learning_rate": 1.97552021052504e-05, "loss": 1.7693, "step": 3003 }, { "epoch": 9.849180327868853, "grad_norm": 8.273428916931152, "learning_rate": 1.9754968530898776e-05, "loss": 1.8079, "step": 3004 }, { "epoch": 9.852459016393443, "grad_norm": 7.365856647491455, "learning_rate": 1.9754734846549744e-05, "loss": 1.8926, "step": 3005 }, { "epoch": 9.855737704918033, "grad_norm": 9.904956817626953, "learning_rate": 1.9754501052205935e-05, "loss": 1.8804, "step": 3006 }, { "epoch": 9.859016393442623, "grad_norm": 10.499674797058105, "learning_rate": 1.9754267147869982e-05, "loss": 1.9219, "step": 3007 }, { "epoch": 9.862295081967213, "grad_norm": 7.068496227264404, "learning_rate": 1.9754033133544527e-05, "loss": 1.7432, "step": 3008 }, { "epoch": 9.865573770491803, "grad_norm": 7.155332088470459, "learning_rate": 1.9753799009232203e-05, "loss": 1.9946, "step": 3009 }, { "epoch": 9.868852459016393, "grad_norm": 9.7294282913208, "learning_rate": 1.975356477493566e-05, "loss": 1.8132, "step": 3010 }, { "epoch": 9.872131147540983, "grad_norm": 8.715046882629395, "learning_rate": 1.975333043065753e-05, "loss": 1.7622, "step": 3011 }, { "epoch": 9.875409836065574, "grad_norm": 7.604315280914307, "learning_rate": 1.9753095976400457e-05, "loss": 2.0078, "step": 3012 }, { "epoch": 9.878688524590164, "grad_norm": 8.171728134155273, "learning_rate": 1.9752861412167084e-05, "loss": 1.9648, "step": 3013 }, { "epoch": 9.881967213114754, "grad_norm": 7.832663059234619, "learning_rate": 1.975262673796006e-05, "loss": 1.6455, "step": 3014 }, { "epoch": 9.885245901639344, "grad_norm": 9.099590301513672, "learning_rate": 1.9752391953782032e-05, "loss": 1.8606, "step": 3015 }, { "epoch": 9.888524590163934, "grad_norm": 7.8333210945129395, "learning_rate": 1.9752157059635645e-05, "loss": 1.8435, "step": 3016 }, { "epoch": 9.891803278688524, "grad_norm": 4.9662909507751465, "learning_rate": 1.9751922055523545e-05, "loss": 1.8154, "step": 3017 }, { "epoch": 9.895081967213114, "grad_norm": 6.507094860076904, "learning_rate": 1.9751686941448384e-05, "loss": 1.7703, "step": 3018 }, { "epoch": 9.898360655737704, "grad_norm": 10.377716064453125, "learning_rate": 1.9751451717412815e-05, "loss": 1.9546, "step": 3019 }, { "epoch": 9.901639344262295, "grad_norm": 8.812386512756348, "learning_rate": 1.9751216383419488e-05, "loss": 1.7798, "step": 3020 }, { "epoch": 9.904918032786885, "grad_norm": 7.50983190536499, "learning_rate": 1.975098093947106e-05, "loss": 1.9736, "step": 3021 }, { "epoch": 9.908196721311475, "grad_norm": 13.508729934692383, "learning_rate": 1.975074538557018e-05, "loss": 1.9194, "step": 3022 }, { "epoch": 9.911475409836065, "grad_norm": 8.6107816696167, "learning_rate": 1.975050972171951e-05, "loss": 1.874, "step": 3023 }, { "epoch": 9.914754098360655, "grad_norm": 7.751058101654053, "learning_rate": 1.9750273947921702e-05, "loss": 1.8262, "step": 3024 }, { "epoch": 9.918032786885245, "grad_norm": 7.265013217926025, "learning_rate": 1.975003806417942e-05, "loss": 1.9395, "step": 3025 }, { "epoch": 9.921311475409835, "grad_norm": 7.126663684844971, "learning_rate": 1.9749802070495324e-05, "loss": 1.8892, "step": 3026 }, { "epoch": 9.924590163934425, "grad_norm": 5.994967937469482, "learning_rate": 1.9749565966872065e-05, "loss": 2.0276, "step": 3027 }, { "epoch": 9.927868852459017, "grad_norm": 10.978878021240234, "learning_rate": 1.974932975331232e-05, "loss": 2.0264, "step": 3028 }, { "epoch": 9.931147540983606, "grad_norm": 8.800613403320312, "learning_rate": 1.974909342981874e-05, "loss": 1.8887, "step": 3029 }, { "epoch": 9.934426229508198, "grad_norm": 7.4890899658203125, "learning_rate": 1.9748856996394e-05, "loss": 1.9712, "step": 3030 }, { "epoch": 9.937704918032788, "grad_norm": 9.602069854736328, "learning_rate": 1.9748620453040756e-05, "loss": 1.9644, "step": 3031 }, { "epoch": 9.940983606557378, "grad_norm": 8.7616548538208, "learning_rate": 1.9748383799761684e-05, "loss": 1.897, "step": 3032 }, { "epoch": 9.944262295081968, "grad_norm": 8.129314422607422, "learning_rate": 1.9748147036559446e-05, "loss": 1.7886, "step": 3033 }, { "epoch": 9.947540983606558, "grad_norm": 8.84748363494873, "learning_rate": 1.9747910163436717e-05, "loss": 1.8882, "step": 3034 }, { "epoch": 9.950819672131148, "grad_norm": 8.124191284179688, "learning_rate": 1.9747673180396164e-05, "loss": 1.9951, "step": 3035 }, { "epoch": 9.954098360655738, "grad_norm": 8.463117599487305, "learning_rate": 1.974743608744046e-05, "loss": 1.9395, "step": 3036 }, { "epoch": 9.957377049180328, "grad_norm": 6.600022792816162, "learning_rate": 1.9747198884572283e-05, "loss": 2.0117, "step": 3037 }, { "epoch": 9.960655737704919, "grad_norm": 8.613930702209473, "learning_rate": 1.9746961571794303e-05, "loss": 1.9233, "step": 3038 }, { "epoch": 9.963934426229509, "grad_norm": 10.589235305786133, "learning_rate": 1.9746724149109194e-05, "loss": 1.8613, "step": 3039 }, { "epoch": 9.967213114754099, "grad_norm": 10.104825019836426, "learning_rate": 1.974648661651964e-05, "loss": 1.9883, "step": 3040 }, { "epoch": 9.970491803278689, "grad_norm": 10.14546012878418, "learning_rate": 1.974624897402831e-05, "loss": 1.8022, "step": 3041 }, { "epoch": 9.973770491803279, "grad_norm": 10.223020553588867, "learning_rate": 1.9746011221637897e-05, "loss": 1.9399, "step": 3042 }, { "epoch": 9.97704918032787, "grad_norm": 7.368712425231934, "learning_rate": 1.974577335935107e-05, "loss": 1.9033, "step": 3043 }, { "epoch": 9.98032786885246, "grad_norm": 8.679165840148926, "learning_rate": 1.9745535387170515e-05, "loss": 1.835, "step": 3044 }, { "epoch": 9.98360655737705, "grad_norm": 6.562261581420898, "learning_rate": 1.9745297305098917e-05, "loss": 1.9595, "step": 3045 }, { "epoch": 9.98688524590164, "grad_norm": 8.194323539733887, "learning_rate": 1.974505911313896e-05, "loss": 1.9751, "step": 3046 }, { "epoch": 9.99016393442623, "grad_norm": 7.133737564086914, "learning_rate": 1.9744820811293323e-05, "loss": 2.0386, "step": 3047 }, { "epoch": 9.99344262295082, "grad_norm": 11.289751052856445, "learning_rate": 1.9744582399564708e-05, "loss": 1.9233, "step": 3048 }, { "epoch": 9.99672131147541, "grad_norm": 10.297388076782227, "learning_rate": 1.974434387795579e-05, "loss": 1.8833, "step": 3049 }, { "epoch": 10.0, "grad_norm": 6.4888105392456055, "learning_rate": 1.9744105246469264e-05, "loss": 1.7153, "step": 3050 }, { "epoch": 10.00327868852459, "grad_norm": 5.756816387176514, "learning_rate": 1.974386650510782e-05, "loss": 1.7678, "step": 3051 }, { "epoch": 10.00655737704918, "grad_norm": 7.710630893707275, "learning_rate": 1.9743627653874147e-05, "loss": 1.8208, "step": 3052 }, { "epoch": 10.00983606557377, "grad_norm": 9.055428504943848, "learning_rate": 1.9743388692770943e-05, "loss": 1.8262, "step": 3053 }, { "epoch": 10.01311475409836, "grad_norm": 7.188420295715332, "learning_rate": 1.9743149621800905e-05, "loss": 2.1157, "step": 3054 }, { "epoch": 10.01639344262295, "grad_norm": 8.252676963806152, "learning_rate": 1.9742910440966722e-05, "loss": 1.6768, "step": 3055 }, { "epoch": 10.01967213114754, "grad_norm": 8.381914138793945, "learning_rate": 1.9742671150271093e-05, "loss": 1.6648, "step": 3056 }, { "epoch": 10.02295081967213, "grad_norm": 6.3974151611328125, "learning_rate": 1.9742431749716716e-05, "loss": 1.9238, "step": 3057 }, { "epoch": 10.026229508196721, "grad_norm": 9.471941947937012, "learning_rate": 1.9742192239306296e-05, "loss": 1.8472, "step": 3058 }, { "epoch": 10.029508196721311, "grad_norm": 15.054407119750977, "learning_rate": 1.9741952619042524e-05, "loss": 1.9229, "step": 3059 }, { "epoch": 10.032786885245901, "grad_norm": 10.149459838867188, "learning_rate": 1.974171288892811e-05, "loss": 1.9377, "step": 3060 }, { "epoch": 10.036065573770491, "grad_norm": 11.00224781036377, "learning_rate": 1.974147304896575e-05, "loss": 1.8188, "step": 3061 }, { "epoch": 10.039344262295081, "grad_norm": 7.348984241485596, "learning_rate": 1.974123309915816e-05, "loss": 1.835, "step": 3062 }, { "epoch": 10.042622950819672, "grad_norm": 9.583609580993652, "learning_rate": 1.9740993039508033e-05, "loss": 1.811, "step": 3063 }, { "epoch": 10.045901639344262, "grad_norm": 12.869410514831543, "learning_rate": 1.9740752870018082e-05, "loss": 1.8481, "step": 3064 }, { "epoch": 10.049180327868852, "grad_norm": 12.262420654296875, "learning_rate": 1.9740512590691017e-05, "loss": 1.8701, "step": 3065 }, { "epoch": 10.052459016393442, "grad_norm": 13.152379989624023, "learning_rate": 1.9740272201529543e-05, "loss": 1.7485, "step": 3066 }, { "epoch": 10.055737704918032, "grad_norm": 8.851727485656738, "learning_rate": 1.9740031702536373e-05, "loss": 1.814, "step": 3067 }, { "epoch": 10.059016393442622, "grad_norm": 7.743747234344482, "learning_rate": 1.9739791093714217e-05, "loss": 1.959, "step": 3068 }, { "epoch": 10.062295081967212, "grad_norm": 7.507944107055664, "learning_rate": 1.9739550375065796e-05, "loss": 1.7705, "step": 3069 }, { "epoch": 10.065573770491802, "grad_norm": 9.258041381835938, "learning_rate": 1.9739309546593814e-05, "loss": 1.7817, "step": 3070 }, { "epoch": 10.068852459016393, "grad_norm": 9.290193557739258, "learning_rate": 1.9739068608300985e-05, "loss": 1.6548, "step": 3071 }, { "epoch": 10.072131147540984, "grad_norm": 10.724689483642578, "learning_rate": 1.9738827560190036e-05, "loss": 1.8755, "step": 3072 }, { "epoch": 10.075409836065575, "grad_norm": 14.574670791625977, "learning_rate": 1.973858640226368e-05, "loss": 1.7019, "step": 3073 }, { "epoch": 10.078688524590165, "grad_norm": 10.402708053588867, "learning_rate": 1.9738345134524638e-05, "loss": 1.7988, "step": 3074 }, { "epoch": 10.081967213114755, "grad_norm": 7.714383125305176, "learning_rate": 1.973810375697563e-05, "loss": 1.7646, "step": 3075 }, { "epoch": 10.085245901639345, "grad_norm": 7.179898262023926, "learning_rate": 1.9737862269619375e-05, "loss": 1.8398, "step": 3076 }, { "epoch": 10.088524590163935, "grad_norm": 7.230700969696045, "learning_rate": 1.97376206724586e-05, "loss": 1.8203, "step": 3077 }, { "epoch": 10.091803278688525, "grad_norm": 11.872016906738281, "learning_rate": 1.9737378965496026e-05, "loss": 1.7964, "step": 3078 }, { "epoch": 10.095081967213115, "grad_norm": 9.4483003616333, "learning_rate": 1.973713714873438e-05, "loss": 2.0, "step": 3079 }, { "epoch": 10.098360655737705, "grad_norm": 9.729874610900879, "learning_rate": 1.9736895222176387e-05, "loss": 1.939, "step": 3080 }, { "epoch": 10.101639344262296, "grad_norm": 10.272254943847656, "learning_rate": 1.973665318582478e-05, "loss": 1.7485, "step": 3081 }, { "epoch": 10.104918032786886, "grad_norm": 7.850559234619141, "learning_rate": 1.973641103968228e-05, "loss": 1.9355, "step": 3082 }, { "epoch": 10.108196721311476, "grad_norm": 9.803003311157227, "learning_rate": 1.9736168783751622e-05, "loss": 1.8687, "step": 3083 }, { "epoch": 10.111475409836066, "grad_norm": 7.7135138511657715, "learning_rate": 1.9735926418035543e-05, "loss": 1.824, "step": 3084 }, { "epoch": 10.114754098360656, "grad_norm": 6.898592948913574, "learning_rate": 1.9735683942536768e-05, "loss": 1.6296, "step": 3085 }, { "epoch": 10.118032786885246, "grad_norm": 8.462286949157715, "learning_rate": 1.9735441357258036e-05, "loss": 1.9058, "step": 3086 }, { "epoch": 10.121311475409836, "grad_norm": 11.593672752380371, "learning_rate": 1.973519866220208e-05, "loss": 1.7012, "step": 3087 }, { "epoch": 10.124590163934426, "grad_norm": 8.576722145080566, "learning_rate": 1.9734955857371635e-05, "loss": 1.7539, "step": 3088 }, { "epoch": 10.127868852459017, "grad_norm": 7.437689781188965, "learning_rate": 1.973471294276944e-05, "loss": 1.5842, "step": 3089 }, { "epoch": 10.131147540983607, "grad_norm": 7.863327980041504, "learning_rate": 1.973446991839824e-05, "loss": 1.7964, "step": 3090 }, { "epoch": 10.134426229508197, "grad_norm": 6.562012195587158, "learning_rate": 1.9734226784260765e-05, "loss": 1.7798, "step": 3091 }, { "epoch": 10.137704918032787, "grad_norm": 8.235414505004883, "learning_rate": 1.9733983540359764e-05, "loss": 1.7729, "step": 3092 }, { "epoch": 10.140983606557377, "grad_norm": 11.803351402282715, "learning_rate": 1.9733740186697978e-05, "loss": 1.8755, "step": 3093 }, { "epoch": 10.144262295081967, "grad_norm": 8.286786079406738, "learning_rate": 1.973349672327815e-05, "loss": 1.9058, "step": 3094 }, { "epoch": 10.147540983606557, "grad_norm": 7.438994884490967, "learning_rate": 1.9733253150103028e-05, "loss": 1.8198, "step": 3095 }, { "epoch": 10.150819672131147, "grad_norm": 6.5992817878723145, "learning_rate": 1.9733009467175354e-05, "loss": 1.8735, "step": 3096 }, { "epoch": 10.154098360655738, "grad_norm": 7.656087398529053, "learning_rate": 1.9732765674497882e-05, "loss": 1.7993, "step": 3097 }, { "epoch": 10.157377049180328, "grad_norm": 6.9191789627075195, "learning_rate": 1.9732521772073352e-05, "loss": 1.9756, "step": 3098 }, { "epoch": 10.160655737704918, "grad_norm": 7.169549942016602, "learning_rate": 1.9732277759904527e-05, "loss": 2.0269, "step": 3099 }, { "epoch": 10.163934426229508, "grad_norm": 7.876904487609863, "learning_rate": 1.9732033637994147e-05, "loss": 1.8423, "step": 3100 }, { "epoch": 10.167213114754098, "grad_norm": 10.948310852050781, "learning_rate": 1.9731789406344967e-05, "loss": 1.8276, "step": 3101 }, { "epoch": 10.170491803278688, "grad_norm": 8.028573989868164, "learning_rate": 1.973154506495975e-05, "loss": 1.8335, "step": 3102 }, { "epoch": 10.173770491803278, "grad_norm": 7.674066066741943, "learning_rate": 1.9731300613841235e-05, "loss": 1.8315, "step": 3103 }, { "epoch": 10.177049180327868, "grad_norm": 10.94853687286377, "learning_rate": 1.973105605299219e-05, "loss": 1.7983, "step": 3104 }, { "epoch": 10.180327868852459, "grad_norm": 8.292555809020996, "learning_rate": 1.9730811382415377e-05, "loss": 1.7505, "step": 3105 }, { "epoch": 10.183606557377049, "grad_norm": 11.856802940368652, "learning_rate": 1.973056660211354e-05, "loss": 1.8447, "step": 3106 }, { "epoch": 10.186885245901639, "grad_norm": 10.74837589263916, "learning_rate": 1.9730321712089452e-05, "loss": 1.9272, "step": 3107 }, { "epoch": 10.190163934426229, "grad_norm": 7.5798540115356445, "learning_rate": 1.9730076712345866e-05, "loss": 1.9814, "step": 3108 }, { "epoch": 10.193442622950819, "grad_norm": 8.221776962280273, "learning_rate": 1.9729831602885553e-05, "loss": 1.7529, "step": 3109 }, { "epoch": 10.19672131147541, "grad_norm": 8.053369522094727, "learning_rate": 1.972958638371127e-05, "loss": 1.7495, "step": 3110 }, { "epoch": 10.2, "grad_norm": 6.569486618041992, "learning_rate": 1.9729341054825783e-05, "loss": 1.8438, "step": 3111 }, { "epoch": 10.20327868852459, "grad_norm": 8.722201347351074, "learning_rate": 1.972909561623186e-05, "loss": 1.7603, "step": 3112 }, { "epoch": 10.20655737704918, "grad_norm": 12.173925399780273, "learning_rate": 1.972885006793227e-05, "loss": 2.0293, "step": 3113 }, { "epoch": 10.20983606557377, "grad_norm": 8.344196319580078, "learning_rate": 1.9728604409929778e-05, "loss": 1.77, "step": 3114 }, { "epoch": 10.21311475409836, "grad_norm": 8.335097312927246, "learning_rate": 1.9728358642227158e-05, "loss": 1.7534, "step": 3115 }, { "epoch": 10.216393442622952, "grad_norm": 14.155872344970703, "learning_rate": 1.9728112764827178e-05, "loss": 1.8979, "step": 3116 }, { "epoch": 10.219672131147542, "grad_norm": 7.856560230255127, "learning_rate": 1.9727866777732613e-05, "loss": 1.8062, "step": 3117 }, { "epoch": 10.222950819672132, "grad_norm": 9.47176456451416, "learning_rate": 1.9727620680946234e-05, "loss": 1.6904, "step": 3118 }, { "epoch": 10.226229508196722, "grad_norm": 9.831292152404785, "learning_rate": 1.972737447447082e-05, "loss": 1.7642, "step": 3119 }, { "epoch": 10.229508196721312, "grad_norm": 7.638766765594482, "learning_rate": 1.9727128158309143e-05, "loss": 1.9868, "step": 3120 }, { "epoch": 10.232786885245902, "grad_norm": 8.866606712341309, "learning_rate": 1.9726881732463983e-05, "loss": 1.8184, "step": 3121 }, { "epoch": 10.236065573770492, "grad_norm": 7.430112361907959, "learning_rate": 1.972663519693812e-05, "loss": 1.8052, "step": 3122 }, { "epoch": 10.239344262295083, "grad_norm": 19.974102020263672, "learning_rate": 1.972638855173433e-05, "loss": 1.8374, "step": 3123 }, { "epoch": 10.242622950819673, "grad_norm": 7.761139392852783, "learning_rate": 1.9726141796855393e-05, "loss": 1.7825, "step": 3124 }, { "epoch": 10.245901639344263, "grad_norm": 7.9704484939575195, "learning_rate": 1.9725894932304097e-05, "loss": 1.7815, "step": 3125 }, { "epoch": 10.249180327868853, "grad_norm": 6.945657253265381, "learning_rate": 1.9725647958083224e-05, "loss": 1.8042, "step": 3126 }, { "epoch": 10.252459016393443, "grad_norm": 8.911750793457031, "learning_rate": 1.972540087419556e-05, "loss": 1.7034, "step": 3127 }, { "epoch": 10.255737704918033, "grad_norm": 9.384328842163086, "learning_rate": 1.9725153680643886e-05, "loss": 1.7173, "step": 3128 }, { "epoch": 10.259016393442623, "grad_norm": 9.301621437072754, "learning_rate": 1.9724906377430994e-05, "loss": 1.8135, "step": 3129 }, { "epoch": 10.262295081967213, "grad_norm": 11.133675575256348, "learning_rate": 1.972465896455967e-05, "loss": 1.6543, "step": 3130 }, { "epoch": 10.265573770491804, "grad_norm": 10.43723201751709, "learning_rate": 1.9724411442032706e-05, "loss": 1.772, "step": 3131 }, { "epoch": 10.268852459016394, "grad_norm": 32.952606201171875, "learning_rate": 1.9724163809852892e-05, "loss": 1.8733, "step": 3132 }, { "epoch": 10.272131147540984, "grad_norm": 9.376716613769531, "learning_rate": 1.972391606802302e-05, "loss": 1.6294, "step": 3133 }, { "epoch": 10.275409836065574, "grad_norm": 8.37209701538086, "learning_rate": 1.9723668216545885e-05, "loss": 1.78, "step": 3134 }, { "epoch": 10.278688524590164, "grad_norm": 9.751157760620117, "learning_rate": 1.9723420255424282e-05, "loss": 1.7405, "step": 3135 }, { "epoch": 10.281967213114754, "grad_norm": 9.368562698364258, "learning_rate": 1.9723172184661004e-05, "loss": 1.8325, "step": 3136 }, { "epoch": 10.285245901639344, "grad_norm": 9.226259231567383, "learning_rate": 1.972292400425885e-05, "loss": 2.0283, "step": 3137 }, { "epoch": 10.288524590163934, "grad_norm": 9.261013984680176, "learning_rate": 1.972267571422062e-05, "loss": 1.8599, "step": 3138 }, { "epoch": 10.291803278688525, "grad_norm": 10.764643669128418, "learning_rate": 1.972242731454911e-05, "loss": 1.7153, "step": 3139 }, { "epoch": 10.295081967213115, "grad_norm": 11.049966812133789, "learning_rate": 1.9722178805247127e-05, "loss": 1.7212, "step": 3140 }, { "epoch": 10.298360655737705, "grad_norm": 10.281566619873047, "learning_rate": 1.9721930186317464e-05, "loss": 2.0496, "step": 3141 }, { "epoch": 10.301639344262295, "grad_norm": 7.3857197761535645, "learning_rate": 1.9721681457762933e-05, "loss": 1.8096, "step": 3142 }, { "epoch": 10.304918032786885, "grad_norm": 9.051318168640137, "learning_rate": 1.9721432619586337e-05, "loss": 1.9346, "step": 3143 }, { "epoch": 10.308196721311475, "grad_norm": 8.786654472351074, "learning_rate": 1.9721183671790477e-05, "loss": 1.7935, "step": 3144 }, { "epoch": 10.311475409836065, "grad_norm": 11.551379203796387, "learning_rate": 1.9720934614378166e-05, "loss": 1.9382, "step": 3145 }, { "epoch": 10.314754098360655, "grad_norm": 8.994683265686035, "learning_rate": 1.972068544735221e-05, "loss": 1.8145, "step": 3146 }, { "epoch": 10.318032786885245, "grad_norm": 8.188711166381836, "learning_rate": 1.9720436170715418e-05, "loss": 1.8638, "step": 3147 }, { "epoch": 10.321311475409836, "grad_norm": 7.910181522369385, "learning_rate": 1.9720186784470602e-05, "loss": 1.7266, "step": 3148 }, { "epoch": 10.324590163934426, "grad_norm": 8.474749565124512, "learning_rate": 1.971993728862057e-05, "loss": 2.0679, "step": 3149 }, { "epoch": 10.327868852459016, "grad_norm": 7.017641544342041, "learning_rate": 1.9719687683168143e-05, "loss": 1.7715, "step": 3150 }, { "epoch": 10.331147540983606, "grad_norm": 8.333001136779785, "learning_rate": 1.9719437968116132e-05, "loss": 1.7781, "step": 3151 }, { "epoch": 10.334426229508196, "grad_norm": 10.126306533813477, "learning_rate": 1.9719188143467347e-05, "loss": 1.9526, "step": 3152 }, { "epoch": 10.337704918032786, "grad_norm": 9.568036079406738, "learning_rate": 1.9718938209224615e-05, "loss": 1.8833, "step": 3153 }, { "epoch": 10.340983606557376, "grad_norm": 8.150015830993652, "learning_rate": 1.971868816539075e-05, "loss": 1.8455, "step": 3154 }, { "epoch": 10.344262295081966, "grad_norm": 12.006462097167969, "learning_rate": 1.9718438011968567e-05, "loss": 1.7461, "step": 3155 }, { "epoch": 10.347540983606557, "grad_norm": 7.967652797698975, "learning_rate": 1.9718187748960896e-05, "loss": 1.8635, "step": 3156 }, { "epoch": 10.350819672131147, "grad_norm": 11.647507667541504, "learning_rate": 1.971793737637055e-05, "loss": 1.7842, "step": 3157 }, { "epoch": 10.354098360655737, "grad_norm": 12.575963973999023, "learning_rate": 1.9717686894200354e-05, "loss": 1.8022, "step": 3158 }, { "epoch": 10.357377049180329, "grad_norm": 9.415284156799316, "learning_rate": 1.9717436302453138e-05, "loss": 1.9336, "step": 3159 }, { "epoch": 10.360655737704919, "grad_norm": 10.401578903198242, "learning_rate": 1.9717185601131724e-05, "loss": 1.8533, "step": 3160 }, { "epoch": 10.363934426229509, "grad_norm": 10.809840202331543, "learning_rate": 1.9716934790238938e-05, "loss": 1.7993, "step": 3161 }, { "epoch": 10.3672131147541, "grad_norm": 6.4812140464782715, "learning_rate": 1.971668386977761e-05, "loss": 2.0183, "step": 3162 }, { "epoch": 10.37049180327869, "grad_norm": 8.901023864746094, "learning_rate": 1.971643283975057e-05, "loss": 1.9829, "step": 3163 }, { "epoch": 10.37377049180328, "grad_norm": 13.763797760009766, "learning_rate": 1.9716181700160644e-05, "loss": 1.9429, "step": 3164 }, { "epoch": 10.37704918032787, "grad_norm": 8.817293167114258, "learning_rate": 1.9715930451010666e-05, "loss": 1.9316, "step": 3165 }, { "epoch": 10.38032786885246, "grad_norm": 7.372616291046143, "learning_rate": 1.9715679092303475e-05, "loss": 2.0225, "step": 3166 }, { "epoch": 10.38360655737705, "grad_norm": 9.147624969482422, "learning_rate": 1.9715427624041896e-05, "loss": 1.8804, "step": 3167 }, { "epoch": 10.38688524590164, "grad_norm": 11.021023750305176, "learning_rate": 1.9715176046228773e-05, "loss": 1.7537, "step": 3168 }, { "epoch": 10.39016393442623, "grad_norm": 7.659778594970703, "learning_rate": 1.9714924358866937e-05, "loss": 1.8013, "step": 3169 }, { "epoch": 10.39344262295082, "grad_norm": 9.26504898071289, "learning_rate": 1.9714672561959224e-05, "loss": 1.8147, "step": 3170 }, { "epoch": 10.39672131147541, "grad_norm": 8.94663143157959, "learning_rate": 1.971442065550848e-05, "loss": 1.8799, "step": 3171 }, { "epoch": 10.4, "grad_norm": 8.875288009643555, "learning_rate": 1.9714168639517543e-05, "loss": 1.7397, "step": 3172 }, { "epoch": 10.40327868852459, "grad_norm": 9.364707946777344, "learning_rate": 1.9713916513989257e-05, "loss": 1.9912, "step": 3173 }, { "epoch": 10.40655737704918, "grad_norm": 15.973316192626953, "learning_rate": 1.9713664278926458e-05, "loss": 1.698, "step": 3174 }, { "epoch": 10.40983606557377, "grad_norm": 7.015569686889648, "learning_rate": 1.9713411934331995e-05, "loss": 1.9438, "step": 3175 }, { "epoch": 10.41311475409836, "grad_norm": 9.245319366455078, "learning_rate": 1.9713159480208718e-05, "loss": 1.8257, "step": 3176 }, { "epoch": 10.416393442622951, "grad_norm": 7.96970796585083, "learning_rate": 1.9712906916559463e-05, "loss": 1.7155, "step": 3177 }, { "epoch": 10.419672131147541, "grad_norm": 8.510266304016113, "learning_rate": 1.9712654243387087e-05, "loss": 1.7339, "step": 3178 }, { "epoch": 10.422950819672131, "grad_norm": 10.4220609664917, "learning_rate": 1.9712401460694438e-05, "loss": 1.7534, "step": 3179 }, { "epoch": 10.426229508196721, "grad_norm": 8.19952392578125, "learning_rate": 1.971214856848436e-05, "loss": 2.0791, "step": 3180 }, { "epoch": 10.429508196721311, "grad_norm": 8.092437744140625, "learning_rate": 1.9711895566759706e-05, "loss": 1.7197, "step": 3181 }, { "epoch": 10.432786885245902, "grad_norm": 11.729877471923828, "learning_rate": 1.9711642455523336e-05, "loss": 1.7517, "step": 3182 }, { "epoch": 10.436065573770492, "grad_norm": 12.330493927001953, "learning_rate": 1.97113892347781e-05, "loss": 1.8525, "step": 3183 }, { "epoch": 10.439344262295082, "grad_norm": 9.411478042602539, "learning_rate": 1.971113590452685e-05, "loss": 1.7988, "step": 3184 }, { "epoch": 10.442622950819672, "grad_norm": 9.364720344543457, "learning_rate": 1.971088246477245e-05, "loss": 1.7546, "step": 3185 }, { "epoch": 10.445901639344262, "grad_norm": 9.825109481811523, "learning_rate": 1.971062891551775e-05, "loss": 1.8342, "step": 3186 }, { "epoch": 10.449180327868852, "grad_norm": 9.390619277954102, "learning_rate": 1.9710375256765616e-05, "loss": 2.0098, "step": 3187 }, { "epoch": 10.452459016393442, "grad_norm": 9.561701774597168, "learning_rate": 1.97101214885189e-05, "loss": 1.9307, "step": 3188 }, { "epoch": 10.455737704918032, "grad_norm": 14.815324783325195, "learning_rate": 1.9709867610780475e-05, "loss": 1.7764, "step": 3189 }, { "epoch": 10.459016393442623, "grad_norm": 12.157458305358887, "learning_rate": 1.970961362355319e-05, "loss": 1.7698, "step": 3190 }, { "epoch": 10.462295081967213, "grad_norm": 10.35057258605957, "learning_rate": 1.9709359526839917e-05, "loss": 1.855, "step": 3191 }, { "epoch": 10.465573770491803, "grad_norm": 11.271316528320312, "learning_rate": 1.9709105320643524e-05, "loss": 1.7737, "step": 3192 }, { "epoch": 10.468852459016393, "grad_norm": 9.490958213806152, "learning_rate": 1.970885100496687e-05, "loss": 1.9917, "step": 3193 }, { "epoch": 10.472131147540983, "grad_norm": 7.228057384490967, "learning_rate": 1.9708596579812828e-05, "loss": 1.7744, "step": 3194 }, { "epoch": 10.475409836065573, "grad_norm": 9.257810592651367, "learning_rate": 1.970834204518426e-05, "loss": 1.8103, "step": 3195 }, { "epoch": 10.478688524590163, "grad_norm": 11.105865478515625, "learning_rate": 1.970808740108405e-05, "loss": 1.7915, "step": 3196 }, { "epoch": 10.481967213114753, "grad_norm": 15.0164213180542, "learning_rate": 1.9707832647515057e-05, "loss": 1.8667, "step": 3197 }, { "epoch": 10.485245901639344, "grad_norm": 8.319000244140625, "learning_rate": 1.9707577784480155e-05, "loss": 1.8037, "step": 3198 }, { "epoch": 10.488524590163934, "grad_norm": 7.823225498199463, "learning_rate": 1.9707322811982223e-05, "loss": 1.9341, "step": 3199 }, { "epoch": 10.491803278688524, "grad_norm": 10.337018013000488, "learning_rate": 1.970706773002413e-05, "loss": 1.8557, "step": 3200 }, { "epoch": 10.495081967213114, "grad_norm": 8.10497760772705, "learning_rate": 1.9706812538608758e-05, "loss": 1.8745, "step": 3201 }, { "epoch": 10.498360655737706, "grad_norm": 8.133485794067383, "learning_rate": 1.9706557237738985e-05, "loss": 1.7505, "step": 3202 }, { "epoch": 10.501639344262294, "grad_norm": 7.639120101928711, "learning_rate": 1.970630182741768e-05, "loss": 1.8608, "step": 3203 }, { "epoch": 10.504918032786886, "grad_norm": 8.126115798950195, "learning_rate": 1.9706046307647737e-05, "loss": 1.8823, "step": 3204 }, { "epoch": 10.508196721311476, "grad_norm": 7.537222862243652, "learning_rate": 1.9705790678432025e-05, "loss": 1.8237, "step": 3205 }, { "epoch": 10.511475409836066, "grad_norm": 9.095829010009766, "learning_rate": 1.9705534939773435e-05, "loss": 1.6714, "step": 3206 }, { "epoch": 10.514754098360656, "grad_norm": 12.472210884094238, "learning_rate": 1.9705279091674842e-05, "loss": 2.2139, "step": 3207 }, { "epoch": 10.518032786885247, "grad_norm": 14.2373685836792, "learning_rate": 1.9705023134139144e-05, "loss": 1.8389, "step": 3208 }, { "epoch": 10.521311475409837, "grad_norm": 7.531386375427246, "learning_rate": 1.9704767067169212e-05, "loss": 1.7031, "step": 3209 }, { "epoch": 10.524590163934427, "grad_norm": 8.329995155334473, "learning_rate": 1.9704510890767947e-05, "loss": 1.7485, "step": 3210 }, { "epoch": 10.527868852459017, "grad_norm": 9.165369033813477, "learning_rate": 1.9704254604938227e-05, "loss": 1.6753, "step": 3211 }, { "epoch": 10.531147540983607, "grad_norm": 10.321379661560059, "learning_rate": 1.970399820968295e-05, "loss": 1.707, "step": 3212 }, { "epoch": 10.534426229508197, "grad_norm": 8.100146293640137, "learning_rate": 1.9703741705004998e-05, "loss": 1.9199, "step": 3213 }, { "epoch": 10.537704918032787, "grad_norm": 7.8397345542907715, "learning_rate": 1.9703485090907277e-05, "loss": 1.7314, "step": 3214 }, { "epoch": 10.540983606557377, "grad_norm": 8.71284294128418, "learning_rate": 1.9703228367392665e-05, "loss": 1.8945, "step": 3215 }, { "epoch": 10.544262295081968, "grad_norm": 13.287342071533203, "learning_rate": 1.970297153446407e-05, "loss": 1.8848, "step": 3216 }, { "epoch": 10.547540983606558, "grad_norm": 8.874889373779297, "learning_rate": 1.9702714592124377e-05, "loss": 1.717, "step": 3217 }, { "epoch": 10.550819672131148, "grad_norm": 8.494499206542969, "learning_rate": 1.9702457540376492e-05, "loss": 1.7471, "step": 3218 }, { "epoch": 10.554098360655738, "grad_norm": 8.360159873962402, "learning_rate": 1.970220037922331e-05, "loss": 2.02, "step": 3219 }, { "epoch": 10.557377049180328, "grad_norm": 9.608602523803711, "learning_rate": 1.970194310866773e-05, "loss": 1.9829, "step": 3220 }, { "epoch": 10.560655737704918, "grad_norm": 8.046516418457031, "learning_rate": 1.9701685728712653e-05, "loss": 1.8169, "step": 3221 }, { "epoch": 10.563934426229508, "grad_norm": 11.120530128479004, "learning_rate": 1.9701428239360988e-05, "loss": 1.8311, "step": 3222 }, { "epoch": 10.567213114754098, "grad_norm": 8.193016052246094, "learning_rate": 1.9701170640615624e-05, "loss": 1.7632, "step": 3223 }, { "epoch": 10.570491803278689, "grad_norm": 8.979788780212402, "learning_rate": 1.9700912932479482e-05, "loss": 1.8068, "step": 3224 }, { "epoch": 10.573770491803279, "grad_norm": 8.599310874938965, "learning_rate": 1.9700655114955455e-05, "loss": 1.792, "step": 3225 }, { "epoch": 10.577049180327869, "grad_norm": 8.410070419311523, "learning_rate": 1.9700397188046458e-05, "loss": 1.7319, "step": 3226 }, { "epoch": 10.580327868852459, "grad_norm": 9.063151359558105, "learning_rate": 1.9700139151755397e-05, "loss": 1.8735, "step": 3227 }, { "epoch": 10.583606557377049, "grad_norm": 9.500715255737305, "learning_rate": 1.969988100608518e-05, "loss": 1.9248, "step": 3228 }, { "epoch": 10.58688524590164, "grad_norm": 8.156603813171387, "learning_rate": 1.969962275103872e-05, "loss": 1.9819, "step": 3229 }, { "epoch": 10.59016393442623, "grad_norm": 9.843350410461426, "learning_rate": 1.969936438661893e-05, "loss": 1.6333, "step": 3230 }, { "epoch": 10.59344262295082, "grad_norm": 6.775565147399902, "learning_rate": 1.969910591282872e-05, "loss": 1.9688, "step": 3231 }, { "epoch": 10.59672131147541, "grad_norm": 9.867042541503906, "learning_rate": 1.9698847329671004e-05, "loss": 1.8809, "step": 3232 }, { "epoch": 10.6, "grad_norm": 16.851594924926758, "learning_rate": 1.9698588637148705e-05, "loss": 1.7083, "step": 3233 }, { "epoch": 10.60327868852459, "grad_norm": 8.059934616088867, "learning_rate": 1.9698329835264732e-05, "loss": 1.8252, "step": 3234 }, { "epoch": 10.60655737704918, "grad_norm": 11.541181564331055, "learning_rate": 1.969807092402201e-05, "loss": 1.9141, "step": 3235 }, { "epoch": 10.60983606557377, "grad_norm": 9.995124816894531, "learning_rate": 1.969781190342345e-05, "loss": 1.7271, "step": 3236 }, { "epoch": 10.61311475409836, "grad_norm": 17.054481506347656, "learning_rate": 1.969755277347198e-05, "loss": 1.8965, "step": 3237 }, { "epoch": 10.61639344262295, "grad_norm": 7.429007530212402, "learning_rate": 1.969729353417052e-05, "loss": 1.8643, "step": 3238 }, { "epoch": 10.61967213114754, "grad_norm": 7.164456844329834, "learning_rate": 1.9697034185521992e-05, "loss": 1.7993, "step": 3239 }, { "epoch": 10.62295081967213, "grad_norm": 7.36306619644165, "learning_rate": 1.969677472752932e-05, "loss": 2.0562, "step": 3240 }, { "epoch": 10.62622950819672, "grad_norm": 12.70007610321045, "learning_rate": 1.969651516019543e-05, "loss": 1.8413, "step": 3241 }, { "epoch": 10.62950819672131, "grad_norm": 8.613593101501465, "learning_rate": 1.9696255483523252e-05, "loss": 2.1743, "step": 3242 }, { "epoch": 10.6327868852459, "grad_norm": 9.392148971557617, "learning_rate": 1.969599569751571e-05, "loss": 1.8687, "step": 3243 }, { "epoch": 10.636065573770491, "grad_norm": 8.22262191772461, "learning_rate": 1.9695735802175737e-05, "loss": 1.8843, "step": 3244 }, { "epoch": 10.639344262295083, "grad_norm": 10.407612800598145, "learning_rate": 1.9695475797506263e-05, "loss": 1.7202, "step": 3245 }, { "epoch": 10.642622950819671, "grad_norm": 7.887686729431152, "learning_rate": 1.969521568351022e-05, "loss": 2.0181, "step": 3246 }, { "epoch": 10.645901639344263, "grad_norm": 7.928227424621582, "learning_rate": 1.9694955460190534e-05, "loss": 1.9062, "step": 3247 }, { "epoch": 10.649180327868853, "grad_norm": 10.359393119812012, "learning_rate": 1.9694695127550147e-05, "loss": 1.8628, "step": 3248 }, { "epoch": 10.652459016393443, "grad_norm": 10.845928192138672, "learning_rate": 1.9694434685591993e-05, "loss": 1.8721, "step": 3249 }, { "epoch": 10.655737704918034, "grad_norm": 10.27662467956543, "learning_rate": 1.969417413431901e-05, "loss": 1.7549, "step": 3250 }, { "epoch": 10.659016393442624, "grad_norm": 7.440516948699951, "learning_rate": 1.9693913473734133e-05, "loss": 1.832, "step": 3251 }, { "epoch": 10.662295081967214, "grad_norm": 7.706398010253906, "learning_rate": 1.96936527038403e-05, "loss": 1.7456, "step": 3252 }, { "epoch": 10.665573770491804, "grad_norm": 10.009751319885254, "learning_rate": 1.9693391824640455e-05, "loss": 1.8823, "step": 3253 }, { "epoch": 10.668852459016394, "grad_norm": 9.27823257446289, "learning_rate": 1.969313083613754e-05, "loss": 1.8345, "step": 3254 }, { "epoch": 10.672131147540984, "grad_norm": 12.098302841186523, "learning_rate": 1.9692869738334498e-05, "loss": 1.7314, "step": 3255 }, { "epoch": 10.675409836065574, "grad_norm": 8.484342575073242, "learning_rate": 1.969260853123427e-05, "loss": 1.8281, "step": 3256 }, { "epoch": 10.678688524590164, "grad_norm": 9.857392311096191, "learning_rate": 1.96923472148398e-05, "loss": 1.7065, "step": 3257 }, { "epoch": 10.681967213114755, "grad_norm": 10.88862419128418, "learning_rate": 1.9692085789154044e-05, "loss": 1.8042, "step": 3258 }, { "epoch": 10.685245901639345, "grad_norm": 11.094364166259766, "learning_rate": 1.9691824254179936e-05, "loss": 1.9707, "step": 3259 }, { "epoch": 10.688524590163935, "grad_norm": 10.068081855773926, "learning_rate": 1.9691562609920435e-05, "loss": 1.8101, "step": 3260 }, { "epoch": 10.691803278688525, "grad_norm": 7.725575923919678, "learning_rate": 1.969130085637849e-05, "loss": 1.7839, "step": 3261 }, { "epoch": 10.695081967213115, "grad_norm": 13.37908935546875, "learning_rate": 1.9691038993557056e-05, "loss": 1.918, "step": 3262 }, { "epoch": 10.698360655737705, "grad_norm": 11.010886192321777, "learning_rate": 1.9690777021459077e-05, "loss": 1.7266, "step": 3263 }, { "epoch": 10.701639344262295, "grad_norm": 9.093156814575195, "learning_rate": 1.9690514940087508e-05, "loss": 1.6887, "step": 3264 }, { "epoch": 10.704918032786885, "grad_norm": 8.72419548034668, "learning_rate": 1.969025274944531e-05, "loss": 2.0781, "step": 3265 }, { "epoch": 10.708196721311475, "grad_norm": 10.787879943847656, "learning_rate": 1.9689990449535437e-05, "loss": 1.7568, "step": 3266 }, { "epoch": 10.711475409836066, "grad_norm": 8.89811897277832, "learning_rate": 1.9689728040360848e-05, "loss": 1.9229, "step": 3267 }, { "epoch": 10.714754098360656, "grad_norm": 14.235910415649414, "learning_rate": 1.96894655219245e-05, "loss": 1.9604, "step": 3268 }, { "epoch": 10.718032786885246, "grad_norm": 8.292569160461426, "learning_rate": 1.9689202894229352e-05, "loss": 1.7542, "step": 3269 }, { "epoch": 10.721311475409836, "grad_norm": 8.263043403625488, "learning_rate": 1.9688940157278372e-05, "loss": 1.8157, "step": 3270 }, { "epoch": 10.724590163934426, "grad_norm": 9.462933540344238, "learning_rate": 1.968867731107451e-05, "loss": 1.8032, "step": 3271 }, { "epoch": 10.727868852459016, "grad_norm": 15.401066780090332, "learning_rate": 1.9688414355620743e-05, "loss": 1.7095, "step": 3272 }, { "epoch": 10.731147540983606, "grad_norm": 6.107648849487305, "learning_rate": 1.968815129092003e-05, "loss": 1.9988, "step": 3273 }, { "epoch": 10.734426229508196, "grad_norm": 8.273368835449219, "learning_rate": 1.9687888116975337e-05, "loss": 1.7703, "step": 3274 }, { "epoch": 10.737704918032787, "grad_norm": 10.147671699523926, "learning_rate": 1.9687624833789635e-05, "loss": 1.9028, "step": 3275 }, { "epoch": 10.740983606557377, "grad_norm": 14.397002220153809, "learning_rate": 1.9687361441365888e-05, "loss": 1.7974, "step": 3276 }, { "epoch": 10.744262295081967, "grad_norm": 9.13316822052002, "learning_rate": 1.9687097939707068e-05, "loss": 1.9795, "step": 3277 }, { "epoch": 10.747540983606557, "grad_norm": 8.198638916015625, "learning_rate": 1.9686834328816146e-05, "loss": 1.6885, "step": 3278 }, { "epoch": 10.750819672131147, "grad_norm": 19.145334243774414, "learning_rate": 1.9686570608696097e-05, "loss": 1.8018, "step": 3279 }, { "epoch": 10.754098360655737, "grad_norm": 11.615135192871094, "learning_rate": 1.9686306779349897e-05, "loss": 1.7197, "step": 3280 }, { "epoch": 10.757377049180327, "grad_norm": 7.673832416534424, "learning_rate": 1.968604284078051e-05, "loss": 1.8911, "step": 3281 }, { "epoch": 10.760655737704917, "grad_norm": 13.856942176818848, "learning_rate": 1.968577879299092e-05, "loss": 1.9443, "step": 3282 }, { "epoch": 10.763934426229508, "grad_norm": 6.585768222808838, "learning_rate": 1.9685514635984105e-05, "loss": 1.876, "step": 3283 }, { "epoch": 10.767213114754098, "grad_norm": 8.397908210754395, "learning_rate": 1.9685250369763044e-05, "loss": 1.6531, "step": 3284 }, { "epoch": 10.770491803278688, "grad_norm": 9.549530029296875, "learning_rate": 1.968498599433071e-05, "loss": 1.6929, "step": 3285 }, { "epoch": 10.773770491803278, "grad_norm": 10.5293550491333, "learning_rate": 1.9684721509690094e-05, "loss": 1.7444, "step": 3286 }, { "epoch": 10.777049180327868, "grad_norm": 8.957834243774414, "learning_rate": 1.9684456915844173e-05, "loss": 1.7534, "step": 3287 }, { "epoch": 10.780327868852458, "grad_norm": 12.539636611938477, "learning_rate": 1.968419221279593e-05, "loss": 1.6108, "step": 3288 }, { "epoch": 10.783606557377048, "grad_norm": 8.509504318237305, "learning_rate": 1.968392740054835e-05, "loss": 1.7354, "step": 3289 }, { "epoch": 10.78688524590164, "grad_norm": 9.047447204589844, "learning_rate": 1.968366247910442e-05, "loss": 1.9297, "step": 3290 }, { "epoch": 10.790163934426229, "grad_norm": 11.28689193725586, "learning_rate": 1.9683397448467124e-05, "loss": 1.8159, "step": 3291 }, { "epoch": 10.79344262295082, "grad_norm": 9.284500122070312, "learning_rate": 1.9683132308639455e-05, "loss": 1.8984, "step": 3292 }, { "epoch": 10.79672131147541, "grad_norm": 13.616273880004883, "learning_rate": 1.9682867059624405e-05, "loss": 1.6885, "step": 3293 }, { "epoch": 10.8, "grad_norm": 12.712392807006836, "learning_rate": 1.9682601701424958e-05, "loss": 1.7478, "step": 3294 }, { "epoch": 10.80327868852459, "grad_norm": 19.091224670410156, "learning_rate": 1.9682336234044112e-05, "loss": 1.7847, "step": 3295 }, { "epoch": 10.806557377049181, "grad_norm": 7.839855194091797, "learning_rate": 1.9682070657484857e-05, "loss": 1.9507, "step": 3296 }, { "epoch": 10.809836065573771, "grad_norm": 9.904446601867676, "learning_rate": 1.9681804971750186e-05, "loss": 2.0952, "step": 3297 }, { "epoch": 10.813114754098361, "grad_norm": 10.258584976196289, "learning_rate": 1.96815391768431e-05, "loss": 2.043, "step": 3298 }, { "epoch": 10.816393442622951, "grad_norm": 10.806129455566406, "learning_rate": 1.968127327276659e-05, "loss": 1.8059, "step": 3299 }, { "epoch": 10.819672131147541, "grad_norm": 9.704085350036621, "learning_rate": 1.9681007259523664e-05, "loss": 1.8911, "step": 3300 }, { "epoch": 10.822950819672132, "grad_norm": 8.695749282836914, "learning_rate": 1.9680741137117312e-05, "loss": 1.7402, "step": 3301 }, { "epoch": 10.826229508196722, "grad_norm": 9.011630058288574, "learning_rate": 1.9680474905550538e-05, "loss": 1.8599, "step": 3302 }, { "epoch": 10.829508196721312, "grad_norm": 18.258508682250977, "learning_rate": 1.9680208564826344e-05, "loss": 1.9214, "step": 3303 }, { "epoch": 10.832786885245902, "grad_norm": 8.001184463500977, "learning_rate": 1.9679942114947734e-05, "loss": 1.8408, "step": 3304 }, { "epoch": 10.836065573770492, "grad_norm": 9.490826606750488, "learning_rate": 1.9679675555917714e-05, "loss": 1.8428, "step": 3305 }, { "epoch": 10.839344262295082, "grad_norm": 12.455510139465332, "learning_rate": 1.9679408887739282e-05, "loss": 1.8252, "step": 3306 }, { "epoch": 10.842622950819672, "grad_norm": 10.898642539978027, "learning_rate": 1.9679142110415455e-05, "loss": 1.998, "step": 3307 }, { "epoch": 10.845901639344262, "grad_norm": 74.94686889648438, "learning_rate": 1.9678875223949237e-05, "loss": 1.7803, "step": 3308 }, { "epoch": 10.849180327868853, "grad_norm": 8.935818672180176, "learning_rate": 1.967860822834364e-05, "loss": 1.8208, "step": 3309 }, { "epoch": 10.852459016393443, "grad_norm": 7.214135646820068, "learning_rate": 1.9678341123601666e-05, "loss": 1.8616, "step": 3310 }, { "epoch": 10.855737704918033, "grad_norm": 9.23388957977295, "learning_rate": 1.9678073909726335e-05, "loss": 1.708, "step": 3311 }, { "epoch": 10.859016393442623, "grad_norm": 14.396476745605469, "learning_rate": 1.9677806586720664e-05, "loss": 1.7454, "step": 3312 }, { "epoch": 10.862295081967213, "grad_norm": 7.066288471221924, "learning_rate": 1.9677539154587656e-05, "loss": 1.8188, "step": 3313 }, { "epoch": 10.865573770491803, "grad_norm": 16.66608238220215, "learning_rate": 1.967727161333033e-05, "loss": 1.6694, "step": 3314 }, { "epoch": 10.868852459016393, "grad_norm": 7.615236759185791, "learning_rate": 1.9677003962951706e-05, "loss": 1.8892, "step": 3315 }, { "epoch": 10.872131147540983, "grad_norm": 11.498525619506836, "learning_rate": 1.96767362034548e-05, "loss": 1.8242, "step": 3316 }, { "epoch": 10.875409836065574, "grad_norm": 14.381536483764648, "learning_rate": 1.9676468334842637e-05, "loss": 1.7871, "step": 3317 }, { "epoch": 10.878688524590164, "grad_norm": 12.406416893005371, "learning_rate": 1.9676200357118228e-05, "loss": 1.8496, "step": 3318 }, { "epoch": 10.881967213114754, "grad_norm": 9.197516441345215, "learning_rate": 1.96759322702846e-05, "loss": 1.9136, "step": 3319 }, { "epoch": 10.885245901639344, "grad_norm": 10.705495834350586, "learning_rate": 1.9675664074344777e-05, "loss": 1.9673, "step": 3320 }, { "epoch": 10.888524590163934, "grad_norm": 12.033994674682617, "learning_rate": 1.9675395769301778e-05, "loss": 2.0132, "step": 3321 }, { "epoch": 10.891803278688524, "grad_norm": 10.807482719421387, "learning_rate": 1.9675127355158632e-05, "loss": 1.7749, "step": 3322 }, { "epoch": 10.895081967213114, "grad_norm": 10.410526275634766, "learning_rate": 1.9674858831918368e-05, "loss": 1.9321, "step": 3323 }, { "epoch": 10.898360655737704, "grad_norm": 12.7272367477417, "learning_rate": 1.967459019958401e-05, "loss": 2.0493, "step": 3324 }, { "epoch": 10.901639344262295, "grad_norm": 12.419179916381836, "learning_rate": 1.967432145815859e-05, "loss": 1.8296, "step": 3325 }, { "epoch": 10.904918032786885, "grad_norm": 8.374032020568848, "learning_rate": 1.9674052607645137e-05, "loss": 1.7686, "step": 3326 }, { "epoch": 10.908196721311475, "grad_norm": 11.636983871459961, "learning_rate": 1.967378364804668e-05, "loss": 1.8364, "step": 3327 }, { "epoch": 10.911475409836065, "grad_norm": 10.73983383178711, "learning_rate": 1.9673514579366256e-05, "loss": 1.9365, "step": 3328 }, { "epoch": 10.914754098360655, "grad_norm": 9.893266677856445, "learning_rate": 1.9673245401606896e-05, "loss": 1.8613, "step": 3329 }, { "epoch": 10.918032786885245, "grad_norm": 9.52707290649414, "learning_rate": 1.9672976114771637e-05, "loss": 1.8572, "step": 3330 }, { "epoch": 10.921311475409835, "grad_norm": 11.8298921585083, "learning_rate": 1.9672706718863512e-05, "loss": 1.7979, "step": 3331 }, { "epoch": 10.924590163934425, "grad_norm": 10.20106029510498, "learning_rate": 1.9672437213885566e-05, "loss": 1.9214, "step": 3332 }, { "epoch": 10.927868852459017, "grad_norm": 9.923003196716309, "learning_rate": 1.9672167599840833e-05, "loss": 1.7617, "step": 3333 }, { "epoch": 10.931147540983606, "grad_norm": 8.27734661102295, "learning_rate": 1.967189787673235e-05, "loss": 2.0078, "step": 3334 }, { "epoch": 10.934426229508198, "grad_norm": 20.219524383544922, "learning_rate": 1.9671628044563165e-05, "loss": 1.7454, "step": 3335 }, { "epoch": 10.937704918032788, "grad_norm": 13.19632339477539, "learning_rate": 1.967135810333632e-05, "loss": 1.7515, "step": 3336 }, { "epoch": 10.940983606557378, "grad_norm": 9.345998764038086, "learning_rate": 1.9671088053054853e-05, "loss": 1.728, "step": 3337 }, { "epoch": 10.944262295081968, "grad_norm": 11.85040283203125, "learning_rate": 1.9670817893721815e-05, "loss": 1.9475, "step": 3338 }, { "epoch": 10.947540983606558, "grad_norm": 24.492494583129883, "learning_rate": 1.967054762534025e-05, "loss": 1.8542, "step": 3339 }, { "epoch": 10.950819672131148, "grad_norm": 9.187849998474121, "learning_rate": 1.9670277247913205e-05, "loss": 1.8027, "step": 3340 }, { "epoch": 10.954098360655738, "grad_norm": 13.39743423461914, "learning_rate": 1.967000676144373e-05, "loss": 1.8887, "step": 3341 }, { "epoch": 10.957377049180328, "grad_norm": 11.829488754272461, "learning_rate": 1.9669736165934873e-05, "loss": 1.6775, "step": 3342 }, { "epoch": 10.960655737704919, "grad_norm": 9.313850402832031, "learning_rate": 1.9669465461389688e-05, "loss": 1.75, "step": 3343 }, { "epoch": 10.963934426229509, "grad_norm": 7.582925796508789, "learning_rate": 1.9669194647811227e-05, "loss": 1.7905, "step": 3344 }, { "epoch": 10.967213114754099, "grad_norm": 17.05571174621582, "learning_rate": 1.966892372520254e-05, "loss": 1.7554, "step": 3345 }, { "epoch": 10.970491803278689, "grad_norm": 16.08258628845215, "learning_rate": 1.9668652693566687e-05, "loss": 1.9795, "step": 3346 }, { "epoch": 10.973770491803279, "grad_norm": 8.889665603637695, "learning_rate": 1.966838155290672e-05, "loss": 1.7703, "step": 3347 }, { "epoch": 10.97704918032787, "grad_norm": 8.392342567443848, "learning_rate": 1.9668110303225703e-05, "loss": 1.7969, "step": 3348 }, { "epoch": 10.98032786885246, "grad_norm": 7.9355878829956055, "learning_rate": 1.9667838944526686e-05, "loss": 1.8677, "step": 3349 }, { "epoch": 10.98360655737705, "grad_norm": 12.054856300354004, "learning_rate": 1.9667567476812733e-05, "loss": 1.9102, "step": 3350 }, { "epoch": 10.98688524590164, "grad_norm": 10.390793800354004, "learning_rate": 1.966729590008691e-05, "loss": 1.9111, "step": 3351 }, { "epoch": 10.99016393442623, "grad_norm": 19.131271362304688, "learning_rate": 1.9667024214352267e-05, "loss": 1.8975, "step": 3352 }, { "epoch": 10.99344262295082, "grad_norm": 13.801365852355957, "learning_rate": 1.966675241961188e-05, "loss": 1.6328, "step": 3353 }, { "epoch": 10.99672131147541, "grad_norm": 7.910378932952881, "learning_rate": 1.9666480515868805e-05, "loss": 1.8535, "step": 3354 }, { "epoch": 11.0, "grad_norm": 7.742691993713379, "learning_rate": 1.9666208503126115e-05, "loss": 1.8477, "step": 3355 }, { "epoch": 11.00327868852459, "grad_norm": 12.574678421020508, "learning_rate": 1.966593638138687e-05, "loss": 1.7188, "step": 3356 }, { "epoch": 11.00655737704918, "grad_norm": 8.493948936462402, "learning_rate": 1.9665664150654146e-05, "loss": 1.689, "step": 3357 }, { "epoch": 11.00983606557377, "grad_norm": 9.845747947692871, "learning_rate": 1.9665391810931006e-05, "loss": 1.8071, "step": 3358 }, { "epoch": 11.01311475409836, "grad_norm": 12.548916816711426, "learning_rate": 1.9665119362220526e-05, "loss": 1.8936, "step": 3359 }, { "epoch": 11.01639344262295, "grad_norm": 7.144313812255859, "learning_rate": 1.9664846804525775e-05, "loss": 1.7388, "step": 3360 }, { "epoch": 11.01967213114754, "grad_norm": 11.157736778259277, "learning_rate": 1.9664574137849825e-05, "loss": 1.824, "step": 3361 }, { "epoch": 11.02295081967213, "grad_norm": 12.89576530456543, "learning_rate": 1.9664301362195757e-05, "loss": 1.741, "step": 3362 }, { "epoch": 11.026229508196721, "grad_norm": 10.101936340332031, "learning_rate": 1.9664028477566642e-05, "loss": 1.6108, "step": 3363 }, { "epoch": 11.029508196721311, "grad_norm": 9.216773986816406, "learning_rate": 1.9663755483965556e-05, "loss": 1.821, "step": 3364 }, { "epoch": 11.032786885245901, "grad_norm": 12.422908782958984, "learning_rate": 1.966348238139558e-05, "loss": 1.9219, "step": 3365 }, { "epoch": 11.036065573770491, "grad_norm": 9.789422035217285, "learning_rate": 1.9663209169859792e-05, "loss": 1.7627, "step": 3366 }, { "epoch": 11.039344262295081, "grad_norm": 13.418237686157227, "learning_rate": 1.9662935849361275e-05, "loss": 1.6035, "step": 3367 }, { "epoch": 11.042622950819672, "grad_norm": 12.96240520477295, "learning_rate": 1.9662662419903106e-05, "loss": 1.7153, "step": 3368 }, { "epoch": 11.045901639344262, "grad_norm": 10.524531364440918, "learning_rate": 1.9662388881488374e-05, "loss": 1.6221, "step": 3369 }, { "epoch": 11.049180327868852, "grad_norm": 9.201083183288574, "learning_rate": 1.966211523412016e-05, "loss": 1.449, "step": 3370 }, { "epoch": 11.052459016393442, "grad_norm": 11.02775764465332, "learning_rate": 1.9661841477801552e-05, "loss": 2.0928, "step": 3371 }, { "epoch": 11.055737704918032, "grad_norm": 10.596912384033203, "learning_rate": 1.9661567612535638e-05, "loss": 1.72, "step": 3372 }, { "epoch": 11.059016393442622, "grad_norm": 19.518306732177734, "learning_rate": 1.96612936383255e-05, "loss": 1.8218, "step": 3373 }, { "epoch": 11.062295081967212, "grad_norm": 9.89681339263916, "learning_rate": 1.9661019555174232e-05, "loss": 1.8511, "step": 3374 }, { "epoch": 11.065573770491802, "grad_norm": 8.74593734741211, "learning_rate": 1.9660745363084924e-05, "loss": 1.7231, "step": 3375 }, { "epoch": 11.068852459016393, "grad_norm": 17.317951202392578, "learning_rate": 1.9660471062060664e-05, "loss": 1.8403, "step": 3376 }, { "epoch": 11.072131147540984, "grad_norm": 8.371026992797852, "learning_rate": 1.966019665210455e-05, "loss": 1.731, "step": 3377 }, { "epoch": 11.075409836065575, "grad_norm": 13.44865894317627, "learning_rate": 1.9659922133219676e-05, "loss": 1.6331, "step": 3378 }, { "epoch": 11.078688524590165, "grad_norm": 186.4241485595703, "learning_rate": 1.965964750540914e-05, "loss": 1.8149, "step": 3379 }, { "epoch": 11.081967213114755, "grad_norm": 10.315218925476074, "learning_rate": 1.965937276867603e-05, "loss": 1.7681, "step": 3380 }, { "epoch": 11.085245901639345, "grad_norm": 14.438949584960938, "learning_rate": 1.965909792302345e-05, "loss": 1.6887, "step": 3381 }, { "epoch": 11.088524590163935, "grad_norm": 16.66720962524414, "learning_rate": 1.9658822968454496e-05, "loss": 1.6941, "step": 3382 }, { "epoch": 11.091803278688525, "grad_norm": 9.4827880859375, "learning_rate": 1.965854790497227e-05, "loss": 1.8682, "step": 3383 }, { "epoch": 11.095081967213115, "grad_norm": 15.118977546691895, "learning_rate": 1.9658272732579878e-05, "loss": 1.698, "step": 3384 }, { "epoch": 11.098360655737705, "grad_norm": 9.829144477844238, "learning_rate": 1.9657997451280417e-05, "loss": 1.9248, "step": 3385 }, { "epoch": 11.101639344262296, "grad_norm": 14.338228225708008, "learning_rate": 1.9657722061076995e-05, "loss": 1.8032, "step": 3386 }, { "epoch": 11.104918032786886, "grad_norm": 12.119636535644531, "learning_rate": 1.965744656197271e-05, "loss": 1.7349, "step": 3387 }, { "epoch": 11.108196721311476, "grad_norm": 13.943153381347656, "learning_rate": 1.9657170953970677e-05, "loss": 1.853, "step": 3388 }, { "epoch": 11.111475409836066, "grad_norm": 18.3310489654541, "learning_rate": 1.9656895237074e-05, "loss": 1.8433, "step": 3389 }, { "epoch": 11.114754098360656, "grad_norm": 26.433969497680664, "learning_rate": 1.965661941128579e-05, "loss": 1.9614, "step": 3390 }, { "epoch": 11.118032786885246, "grad_norm": 12.390100479125977, "learning_rate": 1.9656343476609154e-05, "loss": 1.7842, "step": 3391 }, { "epoch": 11.121311475409836, "grad_norm": 16.437213897705078, "learning_rate": 1.9656067433047206e-05, "loss": 1.7788, "step": 3392 }, { "epoch": 11.124590163934426, "grad_norm": 10.050504684448242, "learning_rate": 1.965579128060306e-05, "loss": 1.7266, "step": 3393 }, { "epoch": 11.127868852459017, "grad_norm": 11.362885475158691, "learning_rate": 1.9655515019279825e-05, "loss": 2.0396, "step": 3394 }, { "epoch": 11.131147540983607, "grad_norm": 11.031777381896973, "learning_rate": 1.9655238649080617e-05, "loss": 1.8662, "step": 3395 }, { "epoch": 11.134426229508197, "grad_norm": 11.994868278503418, "learning_rate": 1.965496217000856e-05, "loss": 1.9316, "step": 3396 }, { "epoch": 11.137704918032787, "grad_norm": 12.275070190429688, "learning_rate": 1.9654685582066763e-05, "loss": 1.9443, "step": 3397 }, { "epoch": 11.140983606557377, "grad_norm": 10.789589881896973, "learning_rate": 1.9654408885258346e-05, "loss": 1.7515, "step": 3398 }, { "epoch": 11.144262295081967, "grad_norm": 68.41630554199219, "learning_rate": 1.9654132079586433e-05, "loss": 1.7244, "step": 3399 }, { "epoch": 11.147540983606557, "grad_norm": 11.86856460571289, "learning_rate": 1.965385516505414e-05, "loss": 1.6177, "step": 3400 }, { "epoch": 11.150819672131147, "grad_norm": 25.305030822753906, "learning_rate": 1.9653578141664598e-05, "loss": 1.8826, "step": 3401 }, { "epoch": 11.154098360655738, "grad_norm": 16.14450454711914, "learning_rate": 1.965330100942092e-05, "loss": 1.8438, "step": 3402 }, { "epoch": 11.157377049180328, "grad_norm": 10.661773681640625, "learning_rate": 1.965302376832624e-05, "loss": 1.9023, "step": 3403 }, { "epoch": 11.160655737704918, "grad_norm": 22.677261352539062, "learning_rate": 1.9652746418383676e-05, "loss": 1.8242, "step": 3404 }, { "epoch": 11.163934426229508, "grad_norm": 10.854621887207031, "learning_rate": 1.9652468959596366e-05, "loss": 1.895, "step": 3405 }, { "epoch": 11.167213114754098, "grad_norm": 14.837238311767578, "learning_rate": 1.9652191391967427e-05, "loss": 1.782, "step": 3406 }, { "epoch": 11.170491803278688, "grad_norm": 25.081029891967773, "learning_rate": 1.9651913715499996e-05, "loss": 1.8623, "step": 3407 }, { "epoch": 11.173770491803278, "grad_norm": 19.760900497436523, "learning_rate": 1.9651635930197203e-05, "loss": 1.6458, "step": 3408 }, { "epoch": 11.177049180327868, "grad_norm": 10.327495574951172, "learning_rate": 1.965135803606218e-05, "loss": 1.8652, "step": 3409 }, { "epoch": 11.180327868852459, "grad_norm": 11.00338363647461, "learning_rate": 1.9651080033098057e-05, "loss": 1.7253, "step": 3410 }, { "epoch": 11.183606557377049, "grad_norm": 19.716867446899414, "learning_rate": 1.9650801921307977e-05, "loss": 1.8416, "step": 3411 }, { "epoch": 11.186885245901639, "grad_norm": 12.028133392333984, "learning_rate": 1.9650523700695067e-05, "loss": 1.9204, "step": 3412 }, { "epoch": 11.190163934426229, "grad_norm": 13.172389030456543, "learning_rate": 1.965024537126247e-05, "loss": 1.5645, "step": 3413 }, { "epoch": 11.193442622950819, "grad_norm": 9.797115325927734, "learning_rate": 1.9649966933013324e-05, "loss": 1.7798, "step": 3414 }, { "epoch": 11.19672131147541, "grad_norm": 7.786625862121582, "learning_rate": 1.9649688385950765e-05, "loss": 1.7979, "step": 3415 }, { "epoch": 11.2, "grad_norm": 10.11483097076416, "learning_rate": 1.9649409730077934e-05, "loss": 1.708, "step": 3416 }, { "epoch": 11.20327868852459, "grad_norm": 18.389476776123047, "learning_rate": 1.964913096539798e-05, "loss": 1.8701, "step": 3417 }, { "epoch": 11.20655737704918, "grad_norm": 11.001218795776367, "learning_rate": 1.9648852091914042e-05, "loss": 1.8242, "step": 3418 }, { "epoch": 11.20983606557377, "grad_norm": 12.465447425842285, "learning_rate": 1.964857310962926e-05, "loss": 1.916, "step": 3419 }, { "epoch": 11.21311475409836, "grad_norm": 20.400146484375, "learning_rate": 1.964829401854679e-05, "loss": 2.0952, "step": 3420 }, { "epoch": 11.216393442622952, "grad_norm": 10.720946311950684, "learning_rate": 1.964801481866977e-05, "loss": 1.9092, "step": 3421 }, { "epoch": 11.219672131147542, "grad_norm": 13.87693977355957, "learning_rate": 1.964773551000135e-05, "loss": 1.7437, "step": 3422 }, { "epoch": 11.222950819672132, "grad_norm": 8.537506103515625, "learning_rate": 1.9647456092544683e-05, "loss": 1.8218, "step": 3423 }, { "epoch": 11.226229508196722, "grad_norm": 18.878934860229492, "learning_rate": 1.9647176566302913e-05, "loss": 1.9009, "step": 3424 }, { "epoch": 11.229508196721312, "grad_norm": 24.76968002319336, "learning_rate": 1.9646896931279206e-05, "loss": 1.854, "step": 3425 }, { "epoch": 11.232786885245902, "grad_norm": 11.580809593200684, "learning_rate": 1.9646617187476698e-05, "loss": 1.7239, "step": 3426 }, { "epoch": 11.236065573770492, "grad_norm": 11.504096984863281, "learning_rate": 1.9646337334898555e-05, "loss": 1.8213, "step": 3427 }, { "epoch": 11.239344262295083, "grad_norm": 9.972928047180176, "learning_rate": 1.9646057373547927e-05, "loss": 1.873, "step": 3428 }, { "epoch": 11.242622950819673, "grad_norm": 9.664129257202148, "learning_rate": 1.9645777303427972e-05, "loss": 1.8496, "step": 3429 }, { "epoch": 11.245901639344263, "grad_norm": 10.633790969848633, "learning_rate": 1.964549712454185e-05, "loss": 1.8423, "step": 3430 }, { "epoch": 11.249180327868853, "grad_norm": 12.449658393859863, "learning_rate": 1.9645216836892723e-05, "loss": 1.6836, "step": 3431 }, { "epoch": 11.252459016393443, "grad_norm": 9.955086708068848, "learning_rate": 1.9644936440483744e-05, "loss": 1.7566, "step": 3432 }, { "epoch": 11.255737704918033, "grad_norm": 9.265835762023926, "learning_rate": 1.964465593531808e-05, "loss": 1.9072, "step": 3433 }, { "epoch": 11.259016393442623, "grad_norm": 10.68213176727295, "learning_rate": 1.964437532139889e-05, "loss": 1.8999, "step": 3434 }, { "epoch": 11.262295081967213, "grad_norm": 8.099446296691895, "learning_rate": 1.964409459872934e-05, "loss": 2.0039, "step": 3435 }, { "epoch": 11.265573770491804, "grad_norm": 8.947677612304688, "learning_rate": 1.9643813767312597e-05, "loss": 1.4768, "step": 3436 }, { "epoch": 11.268852459016394, "grad_norm": 9.839004516601562, "learning_rate": 1.964353282715183e-05, "loss": 1.9531, "step": 3437 }, { "epoch": 11.272131147540984, "grad_norm": 10.928576469421387, "learning_rate": 1.9643251778250197e-05, "loss": 1.8164, "step": 3438 }, { "epoch": 11.275409836065574, "grad_norm": 10.58713436126709, "learning_rate": 1.9642970620610882e-05, "loss": 1.5569, "step": 3439 }, { "epoch": 11.278688524590164, "grad_norm": 11.603851318359375, "learning_rate": 1.964268935423704e-05, "loss": 1.6079, "step": 3440 }, { "epoch": 11.281967213114754, "grad_norm": 9.767876625061035, "learning_rate": 1.9642407979131855e-05, "loss": 1.7432, "step": 3441 }, { "epoch": 11.285245901639344, "grad_norm": 9.86620044708252, "learning_rate": 1.964212649529849e-05, "loss": 1.7207, "step": 3442 }, { "epoch": 11.288524590163934, "grad_norm": 10.455906867980957, "learning_rate": 1.9641844902740125e-05, "loss": 1.939, "step": 3443 }, { "epoch": 11.291803278688525, "grad_norm": 12.742776870727539, "learning_rate": 1.9641563201459933e-05, "loss": 1.769, "step": 3444 }, { "epoch": 11.295081967213115, "grad_norm": 9.428654670715332, "learning_rate": 1.9641281391461097e-05, "loss": 1.7332, "step": 3445 }, { "epoch": 11.298360655737705, "grad_norm": 15.130969047546387, "learning_rate": 1.9640999472746782e-05, "loss": 1.8367, "step": 3446 }, { "epoch": 11.301639344262295, "grad_norm": 10.603655815124512, "learning_rate": 1.9640717445320175e-05, "loss": 1.8425, "step": 3447 }, { "epoch": 11.304918032786885, "grad_norm": 9.928567886352539, "learning_rate": 1.9640435309184456e-05, "loss": 1.7803, "step": 3448 }, { "epoch": 11.308196721311475, "grad_norm": 12.106389999389648, "learning_rate": 1.9640153064342805e-05, "loss": 1.9399, "step": 3449 }, { "epoch": 11.311475409836065, "grad_norm": 11.124483108520508, "learning_rate": 1.9639870710798407e-05, "loss": 1.7207, "step": 3450 }, { "epoch": 11.314754098360655, "grad_norm": 11.320304870605469, "learning_rate": 1.963958824855444e-05, "loss": 1.7905, "step": 3451 }, { "epoch": 11.318032786885245, "grad_norm": 11.102259635925293, "learning_rate": 1.9639305677614097e-05, "loss": 1.8503, "step": 3452 }, { "epoch": 11.321311475409836, "grad_norm": 11.601052284240723, "learning_rate": 1.963902299798056e-05, "loss": 1.6082, "step": 3453 }, { "epoch": 11.324590163934426, "grad_norm": 17.73801612854004, "learning_rate": 1.9638740209657014e-05, "loss": 1.7056, "step": 3454 }, { "epoch": 11.327868852459016, "grad_norm": 16.631587982177734, "learning_rate": 1.963845731264665e-05, "loss": 1.7773, "step": 3455 }, { "epoch": 11.331147540983606, "grad_norm": 17.645519256591797, "learning_rate": 1.963817430695266e-05, "loss": 1.7151, "step": 3456 }, { "epoch": 11.334426229508196, "grad_norm": 13.688426971435547, "learning_rate": 1.9637891192578232e-05, "loss": 1.7625, "step": 3457 }, { "epoch": 11.337704918032786, "grad_norm": 14.642358779907227, "learning_rate": 1.963760796952656e-05, "loss": 1.918, "step": 3458 }, { "epoch": 11.340983606557376, "grad_norm": 10.056818962097168, "learning_rate": 1.963732463780084e-05, "loss": 1.7803, "step": 3459 }, { "epoch": 11.344262295081966, "grad_norm": 11.708414077758789, "learning_rate": 1.963704119740426e-05, "loss": 1.6213, "step": 3460 }, { "epoch": 11.347540983606557, "grad_norm": 9.06623363494873, "learning_rate": 1.9636757648340025e-05, "loss": 1.7314, "step": 3461 }, { "epoch": 11.350819672131147, "grad_norm": 11.930469512939453, "learning_rate": 1.9636473990611327e-05, "loss": 1.5588, "step": 3462 }, { "epoch": 11.354098360655737, "grad_norm": 15.384211540222168, "learning_rate": 1.9636190224221364e-05, "loss": 1.7703, "step": 3463 }, { "epoch": 11.357377049180329, "grad_norm": 9.136963844299316, "learning_rate": 1.9635906349173336e-05, "loss": 1.6536, "step": 3464 }, { "epoch": 11.360655737704919, "grad_norm": 9.788703918457031, "learning_rate": 1.9635622365470447e-05, "loss": 1.7725, "step": 3465 }, { "epoch": 11.363934426229509, "grad_norm": 12.233348846435547, "learning_rate": 1.9635338273115896e-05, "loss": 1.7354, "step": 3466 }, { "epoch": 11.3672131147541, "grad_norm": 8.519268989562988, "learning_rate": 1.963505407211289e-05, "loss": 1.9912, "step": 3467 }, { "epoch": 11.37049180327869, "grad_norm": 14.06479549407959, "learning_rate": 1.9634769762464628e-05, "loss": 1.8472, "step": 3468 }, { "epoch": 11.37377049180328, "grad_norm": 9.958650588989258, "learning_rate": 1.9634485344174324e-05, "loss": 1.8406, "step": 3469 }, { "epoch": 11.37704918032787, "grad_norm": 18.042558670043945, "learning_rate": 1.9634200817245176e-05, "loss": 1.6699, "step": 3470 }, { "epoch": 11.38032786885246, "grad_norm": 8.736756324768066, "learning_rate": 1.9633916181680397e-05, "loss": 1.8062, "step": 3471 }, { "epoch": 11.38360655737705, "grad_norm": 8.669109344482422, "learning_rate": 1.96336314374832e-05, "loss": 1.833, "step": 3472 }, { "epoch": 11.38688524590164, "grad_norm": 11.187738418579102, "learning_rate": 1.9633346584656787e-05, "loss": 1.896, "step": 3473 }, { "epoch": 11.39016393442623, "grad_norm": 9.831543922424316, "learning_rate": 1.963306162320438e-05, "loss": 1.8027, "step": 3474 }, { "epoch": 11.39344262295082, "grad_norm": 7.438764572143555, "learning_rate": 1.9632776553129185e-05, "loss": 2.0244, "step": 3475 }, { "epoch": 11.39672131147541, "grad_norm": 10.704166412353516, "learning_rate": 1.963249137443442e-05, "loss": 1.9253, "step": 3476 }, { "epoch": 11.4, "grad_norm": 8.081360816955566, "learning_rate": 1.9632206087123296e-05, "loss": 1.7769, "step": 3477 }, { "epoch": 11.40327868852459, "grad_norm": 8.269913673400879, "learning_rate": 1.9631920691199036e-05, "loss": 1.8489, "step": 3478 }, { "epoch": 11.40655737704918, "grad_norm": 8.861991882324219, "learning_rate": 1.9631635186664858e-05, "loss": 1.8323, "step": 3479 }, { "epoch": 11.40983606557377, "grad_norm": 10.127409934997559, "learning_rate": 1.9631349573523976e-05, "loss": 1.7507, "step": 3480 }, { "epoch": 11.41311475409836, "grad_norm": 9.302291870117188, "learning_rate": 1.963106385177961e-05, "loss": 1.8286, "step": 3481 }, { "epoch": 11.416393442622951, "grad_norm": 6.3533935546875, "learning_rate": 1.963077802143499e-05, "loss": 1.8667, "step": 3482 }, { "epoch": 11.419672131147541, "grad_norm": 15.02750301361084, "learning_rate": 1.9630492082493334e-05, "loss": 1.915, "step": 3483 }, { "epoch": 11.422950819672131, "grad_norm": 8.034000396728516, "learning_rate": 1.9630206034957867e-05, "loss": 1.8564, "step": 3484 }, { "epoch": 11.426229508196721, "grad_norm": 11.091194152832031, "learning_rate": 1.9629919878831813e-05, "loss": 1.6084, "step": 3485 }, { "epoch": 11.429508196721311, "grad_norm": 11.784246444702148, "learning_rate": 1.96296336141184e-05, "loss": 1.9165, "step": 3486 }, { "epoch": 11.432786885245902, "grad_norm": 13.219964027404785, "learning_rate": 1.9629347240820853e-05, "loss": 1.9424, "step": 3487 }, { "epoch": 11.436065573770492, "grad_norm": 8.189664840698242, "learning_rate": 1.9629060758942407e-05, "loss": 1.7886, "step": 3488 }, { "epoch": 11.439344262295082, "grad_norm": 9.717477798461914, "learning_rate": 1.9628774168486288e-05, "loss": 1.5999, "step": 3489 }, { "epoch": 11.442622950819672, "grad_norm": 9.972548484802246, "learning_rate": 1.9628487469455727e-05, "loss": 1.7048, "step": 3490 }, { "epoch": 11.445901639344262, "grad_norm": 5.957789897918701, "learning_rate": 1.9628200661853964e-05, "loss": 1.8147, "step": 3491 }, { "epoch": 11.449180327868852, "grad_norm": 8.24885082244873, "learning_rate": 1.9627913745684223e-05, "loss": 1.8513, "step": 3492 }, { "epoch": 11.452459016393442, "grad_norm": 6.9171881675720215, "learning_rate": 1.9627626720949748e-05, "loss": 1.9419, "step": 3493 }, { "epoch": 11.455737704918032, "grad_norm": 7.3613386154174805, "learning_rate": 1.9627339587653767e-05, "loss": 1.9355, "step": 3494 }, { "epoch": 11.459016393442623, "grad_norm": 9.617562294006348, "learning_rate": 1.9627052345799523e-05, "loss": 1.8062, "step": 3495 }, { "epoch": 11.462295081967213, "grad_norm": 8.095664024353027, "learning_rate": 1.9626764995390254e-05, "loss": 1.7637, "step": 3496 }, { "epoch": 11.465573770491803, "grad_norm": 6.9506611824035645, "learning_rate": 1.9626477536429204e-05, "loss": 1.8433, "step": 3497 }, { "epoch": 11.468852459016393, "grad_norm": 8.860635757446289, "learning_rate": 1.9626189968919608e-05, "loss": 1.7769, "step": 3498 }, { "epoch": 11.472131147540983, "grad_norm": 7.123080253601074, "learning_rate": 1.9625902292864715e-05, "loss": 1.7318, "step": 3499 }, { "epoch": 11.475409836065573, "grad_norm": 8.856499671936035, "learning_rate": 1.962561450826776e-05, "loss": 2.0098, "step": 3500 }, { "epoch": 11.478688524590163, "grad_norm": 14.221467018127441, "learning_rate": 1.9625326615131994e-05, "loss": 1.7454, "step": 3501 }, { "epoch": 11.481967213114753, "grad_norm": 8.587095260620117, "learning_rate": 1.9625038613460664e-05, "loss": 1.9468, "step": 3502 }, { "epoch": 11.485245901639344, "grad_norm": 7.525737762451172, "learning_rate": 1.9624750503257018e-05, "loss": 1.8083, "step": 3503 }, { "epoch": 11.488524590163934, "grad_norm": 16.77242088317871, "learning_rate": 1.96244622845243e-05, "loss": 1.5498, "step": 3504 }, { "epoch": 11.491803278688524, "grad_norm": 11.172452926635742, "learning_rate": 1.9624173957265765e-05, "loss": 1.5723, "step": 3505 }, { "epoch": 11.495081967213114, "grad_norm": 10.60984992980957, "learning_rate": 1.962388552148466e-05, "loss": 1.8433, "step": 3506 }, { "epoch": 11.498360655737706, "grad_norm": 13.32454776763916, "learning_rate": 1.962359697718424e-05, "loss": 1.8142, "step": 3507 }, { "epoch": 11.501639344262294, "grad_norm": 6.937154769897461, "learning_rate": 1.9623308324367758e-05, "loss": 1.8794, "step": 3508 }, { "epoch": 11.504918032786886, "grad_norm": 18.594161987304688, "learning_rate": 1.962301956303847e-05, "loss": 1.7354, "step": 3509 }, { "epoch": 11.508196721311476, "grad_norm": 9.283535957336426, "learning_rate": 1.962273069319963e-05, "loss": 1.8877, "step": 3510 }, { "epoch": 11.511475409836066, "grad_norm": 11.642733573913574, "learning_rate": 1.9622441714854495e-05, "loss": 1.9189, "step": 3511 }, { "epoch": 11.514754098360656, "grad_norm": 10.602508544921875, "learning_rate": 1.962215262800633e-05, "loss": 1.8477, "step": 3512 }, { "epoch": 11.518032786885247, "grad_norm": 8.717964172363281, "learning_rate": 1.9621863432658383e-05, "loss": 1.8203, "step": 3513 }, { "epoch": 11.521311475409837, "grad_norm": 7.247991561889648, "learning_rate": 1.9621574128813925e-05, "loss": 1.9663, "step": 3514 }, { "epoch": 11.524590163934427, "grad_norm": 10.288626670837402, "learning_rate": 1.9621284716476216e-05, "loss": 1.6208, "step": 3515 }, { "epoch": 11.527868852459017, "grad_norm": 9.535011291503906, "learning_rate": 1.9620995195648514e-05, "loss": 1.7649, "step": 3516 }, { "epoch": 11.531147540983607, "grad_norm": 8.325292587280273, "learning_rate": 1.962070556633409e-05, "loss": 1.7334, "step": 3517 }, { "epoch": 11.534426229508197, "grad_norm": 8.809945106506348, "learning_rate": 1.9620415828536208e-05, "loss": 1.6609, "step": 3518 }, { "epoch": 11.537704918032787, "grad_norm": 8.121406555175781, "learning_rate": 1.9620125982258136e-05, "loss": 1.8091, "step": 3519 }, { "epoch": 11.540983606557377, "grad_norm": 9.475238800048828, "learning_rate": 1.961983602750314e-05, "loss": 1.7627, "step": 3520 }, { "epoch": 11.544262295081968, "grad_norm": 8.872714042663574, "learning_rate": 1.9619545964274488e-05, "loss": 1.5601, "step": 3521 }, { "epoch": 11.547540983606558, "grad_norm": 8.980941772460938, "learning_rate": 1.9619255792575458e-05, "loss": 1.6963, "step": 3522 }, { "epoch": 11.550819672131148, "grad_norm": 8.187828063964844, "learning_rate": 1.9618965512409316e-05, "loss": 1.7422, "step": 3523 }, { "epoch": 11.554098360655738, "grad_norm": 8.955649375915527, "learning_rate": 1.9618675123779338e-05, "loss": 1.8982, "step": 3524 }, { "epoch": 11.557377049180328, "grad_norm": 8.559666633605957, "learning_rate": 1.9618384626688793e-05, "loss": 1.7246, "step": 3525 }, { "epoch": 11.560655737704918, "grad_norm": 7.822498798370361, "learning_rate": 1.9618094021140965e-05, "loss": 1.7891, "step": 3526 }, { "epoch": 11.563934426229508, "grad_norm": 7.051308631896973, "learning_rate": 1.9617803307139122e-05, "loss": 1.814, "step": 3527 }, { "epoch": 11.567213114754098, "grad_norm": 8.239081382751465, "learning_rate": 1.961751248468655e-05, "loss": 1.7524, "step": 3528 }, { "epoch": 11.570491803278689, "grad_norm": 7.943124771118164, "learning_rate": 1.9617221553786522e-05, "loss": 1.7983, "step": 3529 }, { "epoch": 11.573770491803279, "grad_norm": 8.78684139251709, "learning_rate": 1.9616930514442324e-05, "loss": 1.895, "step": 3530 }, { "epoch": 11.577049180327869, "grad_norm": 7.937125205993652, "learning_rate": 1.9616639366657237e-05, "loss": 1.6819, "step": 3531 }, { "epoch": 11.580327868852459, "grad_norm": 9.98132610321045, "learning_rate": 1.961634811043454e-05, "loss": 2.0562, "step": 3532 }, { "epoch": 11.583606557377049, "grad_norm": 8.293245315551758, "learning_rate": 1.961605674577752e-05, "loss": 1.7988, "step": 3533 }, { "epoch": 11.58688524590164, "grad_norm": 8.874798774719238, "learning_rate": 1.961576527268946e-05, "loss": 1.7349, "step": 3534 }, { "epoch": 11.59016393442623, "grad_norm": 12.527420043945312, "learning_rate": 1.9615473691173652e-05, "loss": 1.7854, "step": 3535 }, { "epoch": 11.59344262295082, "grad_norm": 9.056078910827637, "learning_rate": 1.961518200123338e-05, "loss": 1.7891, "step": 3536 }, { "epoch": 11.59672131147541, "grad_norm": 11.580677032470703, "learning_rate": 1.9614890202871933e-05, "loss": 1.6333, "step": 3537 }, { "epoch": 11.6, "grad_norm": 12.983285903930664, "learning_rate": 1.9614598296092603e-05, "loss": 1.8706, "step": 3538 }, { "epoch": 11.60327868852459, "grad_norm": 9.089096069335938, "learning_rate": 1.961430628089868e-05, "loss": 1.9048, "step": 3539 }, { "epoch": 11.60655737704918, "grad_norm": 8.208296775817871, "learning_rate": 1.9614014157293456e-05, "loss": 1.9834, "step": 3540 }, { "epoch": 11.60983606557377, "grad_norm": 7.475717544555664, "learning_rate": 1.9613721925280224e-05, "loss": 1.6272, "step": 3541 }, { "epoch": 11.61311475409836, "grad_norm": 10.594016075134277, "learning_rate": 1.9613429584862284e-05, "loss": 1.8169, "step": 3542 }, { "epoch": 11.61639344262295, "grad_norm": 8.649900436401367, "learning_rate": 1.9613137136042932e-05, "loss": 1.8145, "step": 3543 }, { "epoch": 11.61967213114754, "grad_norm": 7.196110725402832, "learning_rate": 1.9612844578825463e-05, "loss": 1.8748, "step": 3544 }, { "epoch": 11.62295081967213, "grad_norm": 13.539405822753906, "learning_rate": 1.9612551913213175e-05, "loss": 1.5449, "step": 3545 }, { "epoch": 11.62622950819672, "grad_norm": 12.971583366394043, "learning_rate": 1.961225913920937e-05, "loss": 1.6504, "step": 3546 }, { "epoch": 11.62950819672131, "grad_norm": 8.360922813415527, "learning_rate": 1.961196625681735e-05, "loss": 1.9077, "step": 3547 }, { "epoch": 11.6327868852459, "grad_norm": 9.215338706970215, "learning_rate": 1.9611673266040414e-05, "loss": 1.8723, "step": 3548 }, { "epoch": 11.636065573770491, "grad_norm": 9.638300895690918, "learning_rate": 1.961138016688187e-05, "loss": 1.708, "step": 3549 }, { "epoch": 11.639344262295083, "grad_norm": 9.799729347229004, "learning_rate": 1.961108695934502e-05, "loss": 1.8806, "step": 3550 }, { "epoch": 11.642622950819671, "grad_norm": 9.236055374145508, "learning_rate": 1.9610793643433175e-05, "loss": 1.6833, "step": 3551 }, { "epoch": 11.645901639344263, "grad_norm": 11.657554626464844, "learning_rate": 1.9610500219149637e-05, "loss": 1.7217, "step": 3552 }, { "epoch": 11.649180327868853, "grad_norm": 7.892841815948486, "learning_rate": 1.9610206686497717e-05, "loss": 1.6833, "step": 3553 }, { "epoch": 11.652459016393443, "grad_norm": 7.792559623718262, "learning_rate": 1.9609913045480725e-05, "loss": 1.9004, "step": 3554 }, { "epoch": 11.655737704918034, "grad_norm": 9.024582862854004, "learning_rate": 1.960961929610197e-05, "loss": 1.8301, "step": 3555 }, { "epoch": 11.659016393442624, "grad_norm": 10.568233489990234, "learning_rate": 1.9609325438364765e-05, "loss": 1.7026, "step": 3556 }, { "epoch": 11.662295081967214, "grad_norm": 8.158537864685059, "learning_rate": 1.9609031472272425e-05, "loss": 1.6721, "step": 3557 }, { "epoch": 11.665573770491804, "grad_norm": 8.24008560180664, "learning_rate": 1.9608737397828267e-05, "loss": 1.75, "step": 3558 }, { "epoch": 11.668852459016394, "grad_norm": 13.714336395263672, "learning_rate": 1.96084432150356e-05, "loss": 1.8613, "step": 3559 }, { "epoch": 11.672131147540984, "grad_norm": 8.549934387207031, "learning_rate": 1.9608148923897752e-05, "loss": 1.9258, "step": 3560 }, { "epoch": 11.675409836065574, "grad_norm": 9.736776351928711, "learning_rate": 1.960785452441803e-05, "loss": 1.7954, "step": 3561 }, { "epoch": 11.678688524590164, "grad_norm": 10.230415344238281, "learning_rate": 1.9607560016599758e-05, "loss": 1.8291, "step": 3562 }, { "epoch": 11.681967213114755, "grad_norm": 12.990575790405273, "learning_rate": 1.960726540044626e-05, "loss": 1.7275, "step": 3563 }, { "epoch": 11.685245901639345, "grad_norm": 7.823148250579834, "learning_rate": 1.9606970675960856e-05, "loss": 1.8105, "step": 3564 }, { "epoch": 11.688524590163935, "grad_norm": 7.72829008102417, "learning_rate": 1.9606675843146867e-05, "loss": 1.8599, "step": 3565 }, { "epoch": 11.691803278688525, "grad_norm": 7.03842830657959, "learning_rate": 1.960638090200762e-05, "loss": 1.7554, "step": 3566 }, { "epoch": 11.695081967213115, "grad_norm": 10.415154457092285, "learning_rate": 1.9606085852546438e-05, "loss": 1.7476, "step": 3567 }, { "epoch": 11.698360655737705, "grad_norm": 8.301414489746094, "learning_rate": 1.960579069476665e-05, "loss": 1.6213, "step": 3568 }, { "epoch": 11.701639344262295, "grad_norm": 18.719629287719727, "learning_rate": 1.9605495428671588e-05, "loss": 1.7749, "step": 3569 }, { "epoch": 11.704918032786885, "grad_norm": 73.6620101928711, "learning_rate": 1.9605200054264576e-05, "loss": 1.6545, "step": 3570 }, { "epoch": 11.708196721311475, "grad_norm": 10.296510696411133, "learning_rate": 1.960490457154895e-05, "loss": 1.7993, "step": 3571 }, { "epoch": 11.711475409836066, "grad_norm": 7.5581183433532715, "learning_rate": 1.9604608980528034e-05, "loss": 1.5786, "step": 3572 }, { "epoch": 11.714754098360656, "grad_norm": 11.98913288116455, "learning_rate": 1.9604313281205164e-05, "loss": 1.6553, "step": 3573 }, { "epoch": 11.718032786885246, "grad_norm": 10.04036808013916, "learning_rate": 1.960401747358368e-05, "loss": 1.7227, "step": 3574 }, { "epoch": 11.721311475409836, "grad_norm": 12.201398849487305, "learning_rate": 1.9603721557666913e-05, "loss": 1.6855, "step": 3575 }, { "epoch": 11.724590163934426, "grad_norm": 9.522883415222168, "learning_rate": 1.9603425533458197e-05, "loss": 1.9824, "step": 3576 }, { "epoch": 11.727868852459016, "grad_norm": 8.071319580078125, "learning_rate": 1.9603129400960875e-05, "loss": 1.7668, "step": 3577 }, { "epoch": 11.731147540983606, "grad_norm": 8.369147300720215, "learning_rate": 1.9602833160178286e-05, "loss": 1.8394, "step": 3578 }, { "epoch": 11.734426229508196, "grad_norm": 11.432537078857422, "learning_rate": 1.9602536811113766e-05, "loss": 1.8896, "step": 3579 }, { "epoch": 11.737704918032787, "grad_norm": 9.344724655151367, "learning_rate": 1.9602240353770662e-05, "loss": 1.6846, "step": 3580 }, { "epoch": 11.740983606557377, "grad_norm": 9.258996963500977, "learning_rate": 1.960194378815231e-05, "loss": 1.7568, "step": 3581 }, { "epoch": 11.744262295081967, "grad_norm": 12.04018783569336, "learning_rate": 1.9601647114262062e-05, "loss": 1.7998, "step": 3582 }, { "epoch": 11.747540983606557, "grad_norm": 9.434274673461914, "learning_rate": 1.9601350332103257e-05, "loss": 1.8081, "step": 3583 }, { "epoch": 11.750819672131147, "grad_norm": 10.160274505615234, "learning_rate": 1.9601053441679244e-05, "loss": 1.7881, "step": 3584 }, { "epoch": 11.754098360655737, "grad_norm": 6.641427516937256, "learning_rate": 1.9600756442993373e-05, "loss": 1.9038, "step": 3585 }, { "epoch": 11.757377049180327, "grad_norm": 18.761917114257812, "learning_rate": 1.960045933604899e-05, "loss": 1.9419, "step": 3586 }, { "epoch": 11.760655737704917, "grad_norm": 11.409210205078125, "learning_rate": 1.9600162120849445e-05, "loss": 1.6375, "step": 3587 }, { "epoch": 11.763934426229508, "grad_norm": 7.171947002410889, "learning_rate": 1.959986479739809e-05, "loss": 1.7671, "step": 3588 }, { "epoch": 11.767213114754098, "grad_norm": 8.620349884033203, "learning_rate": 1.9599567365698283e-05, "loss": 1.6641, "step": 3589 }, { "epoch": 11.770491803278688, "grad_norm": 8.71249771118164, "learning_rate": 1.9599269825753368e-05, "loss": 1.6995, "step": 3590 }, { "epoch": 11.773770491803278, "grad_norm": 8.897780418395996, "learning_rate": 1.9598972177566705e-05, "loss": 1.6118, "step": 3591 }, { "epoch": 11.777049180327868, "grad_norm": 12.132909774780273, "learning_rate": 1.9598674421141656e-05, "loss": 1.9072, "step": 3592 }, { "epoch": 11.780327868852458, "grad_norm": 6.226644515991211, "learning_rate": 1.9598376556481567e-05, "loss": 1.9253, "step": 3593 }, { "epoch": 11.783606557377048, "grad_norm": 9.221323013305664, "learning_rate": 1.95980785835898e-05, "loss": 1.7625, "step": 3594 }, { "epoch": 11.78688524590164, "grad_norm": 6.865513801574707, "learning_rate": 1.9597780502469725e-05, "loss": 1.7107, "step": 3595 }, { "epoch": 11.790163934426229, "grad_norm": 8.352940559387207, "learning_rate": 1.9597482313124693e-05, "loss": 1.7227, "step": 3596 }, { "epoch": 11.79344262295082, "grad_norm": 9.934743881225586, "learning_rate": 1.9597184015558066e-05, "loss": 1.7856, "step": 3597 }, { "epoch": 11.79672131147541, "grad_norm": 7.712265968322754, "learning_rate": 1.9596885609773212e-05, "loss": 1.7485, "step": 3598 }, { "epoch": 11.8, "grad_norm": 10.523466110229492, "learning_rate": 1.9596587095773496e-05, "loss": 1.8582, "step": 3599 }, { "epoch": 11.80327868852459, "grad_norm": 10.750805854797363, "learning_rate": 1.959628847356228e-05, "loss": 1.7256, "step": 3600 }, { "epoch": 11.806557377049181, "grad_norm": 9.086967468261719, "learning_rate": 1.9595989743142937e-05, "loss": 1.6892, "step": 3601 }, { "epoch": 11.809836065573771, "grad_norm": 11.369824409484863, "learning_rate": 1.9595690904518833e-05, "loss": 1.7437, "step": 3602 }, { "epoch": 11.813114754098361, "grad_norm": 10.440841674804688, "learning_rate": 1.9595391957693334e-05, "loss": 1.7319, "step": 3603 }, { "epoch": 11.816393442622951, "grad_norm": 6.418968200683594, "learning_rate": 1.9595092902669815e-05, "loss": 1.6909, "step": 3604 }, { "epoch": 11.819672131147541, "grad_norm": 9.445077896118164, "learning_rate": 1.9594793739451647e-05, "loss": 1.8848, "step": 3605 }, { "epoch": 11.822950819672132, "grad_norm": 9.52175521850586, "learning_rate": 1.95944944680422e-05, "loss": 1.8679, "step": 3606 }, { "epoch": 11.826229508196722, "grad_norm": 8.746968269348145, "learning_rate": 1.959419508844486e-05, "loss": 1.8271, "step": 3607 }, { "epoch": 11.829508196721312, "grad_norm": 8.325602531433105, "learning_rate": 1.9593895600662988e-05, "loss": 1.4658, "step": 3608 }, { "epoch": 11.832786885245902, "grad_norm": 9.785837173461914, "learning_rate": 1.959359600469997e-05, "loss": 1.6528, "step": 3609 }, { "epoch": 11.836065573770492, "grad_norm": 6.7347307205200195, "learning_rate": 1.9593296300559182e-05, "loss": 1.7549, "step": 3610 }, { "epoch": 11.839344262295082, "grad_norm": 9.237115859985352, "learning_rate": 1.9592996488244007e-05, "loss": 1.7708, "step": 3611 }, { "epoch": 11.842622950819672, "grad_norm": 11.026774406433105, "learning_rate": 1.9592696567757818e-05, "loss": 1.9722, "step": 3612 }, { "epoch": 11.845901639344262, "grad_norm": 11.1004056930542, "learning_rate": 1.9592396539104004e-05, "loss": 1.6367, "step": 3613 }, { "epoch": 11.849180327868853, "grad_norm": 7.002098083496094, "learning_rate": 1.9592096402285943e-05, "loss": 1.873, "step": 3614 }, { "epoch": 11.852459016393443, "grad_norm": 8.671931266784668, "learning_rate": 1.9591796157307022e-05, "loss": 1.7837, "step": 3615 }, { "epoch": 11.855737704918033, "grad_norm": 8.355453491210938, "learning_rate": 1.959149580417063e-05, "loss": 1.6062, "step": 3616 }, { "epoch": 11.859016393442623, "grad_norm": 8.367876052856445, "learning_rate": 1.9591195342880146e-05, "loss": 1.7979, "step": 3617 }, { "epoch": 11.862295081967213, "grad_norm": 8.252217292785645, "learning_rate": 1.9590894773438967e-05, "loss": 1.7749, "step": 3618 }, { "epoch": 11.865573770491803, "grad_norm": 7.885560989379883, "learning_rate": 1.9590594095850474e-05, "loss": 1.6401, "step": 3619 }, { "epoch": 11.868852459016393, "grad_norm": 8.024502754211426, "learning_rate": 1.959029331011806e-05, "loss": 1.8506, "step": 3620 }, { "epoch": 11.872131147540983, "grad_norm": 19.78169822692871, "learning_rate": 1.9589992416245118e-05, "loss": 1.8628, "step": 3621 }, { "epoch": 11.875409836065574, "grad_norm": 6.223826885223389, "learning_rate": 1.958969141423504e-05, "loss": 1.8611, "step": 3622 }, { "epoch": 11.878688524590164, "grad_norm": 7.1850810050964355, "learning_rate": 1.9589390304091223e-05, "loss": 1.9077, "step": 3623 }, { "epoch": 11.881967213114754, "grad_norm": 9.78791332244873, "learning_rate": 1.958908908581706e-05, "loss": 1.7019, "step": 3624 }, { "epoch": 11.885245901639344, "grad_norm": 6.959858417510986, "learning_rate": 1.9588787759415946e-05, "loss": 1.8372, "step": 3625 }, { "epoch": 11.888524590163934, "grad_norm": 7.115423202514648, "learning_rate": 1.958848632489128e-05, "loss": 1.8779, "step": 3626 }, { "epoch": 11.891803278688524, "grad_norm": 19.947433471679688, "learning_rate": 1.958818478224646e-05, "loss": 1.814, "step": 3627 }, { "epoch": 11.895081967213114, "grad_norm": 12.01190185546875, "learning_rate": 1.958788313148489e-05, "loss": 1.7429, "step": 3628 }, { "epoch": 11.898360655737704, "grad_norm": 10.453105926513672, "learning_rate": 1.9587581372609966e-05, "loss": 1.688, "step": 3629 }, { "epoch": 11.901639344262295, "grad_norm": 8.618165969848633, "learning_rate": 1.9587279505625094e-05, "loss": 1.915, "step": 3630 }, { "epoch": 11.904918032786885, "grad_norm": 21.591341018676758, "learning_rate": 1.9586977530533677e-05, "loss": 1.856, "step": 3631 }, { "epoch": 11.908196721311475, "grad_norm": 10.633044242858887, "learning_rate": 1.9586675447339124e-05, "loss": 1.7788, "step": 3632 }, { "epoch": 11.911475409836065, "grad_norm": 7.566139221191406, "learning_rate": 1.9586373256044835e-05, "loss": 1.6384, "step": 3633 }, { "epoch": 11.914754098360655, "grad_norm": 8.411309242248535, "learning_rate": 1.9586070956654223e-05, "loss": 1.8218, "step": 3634 }, { "epoch": 11.918032786885245, "grad_norm": 8.989068984985352, "learning_rate": 1.9585768549170688e-05, "loss": 1.6021, "step": 3635 }, { "epoch": 11.921311475409835, "grad_norm": 8.62917709350586, "learning_rate": 1.958546603359765e-05, "loss": 1.6152, "step": 3636 }, { "epoch": 11.924590163934425, "grad_norm": 6.841665267944336, "learning_rate": 1.9585163409938514e-05, "loss": 1.8025, "step": 3637 }, { "epoch": 11.927868852459017, "grad_norm": 7.586787223815918, "learning_rate": 1.95848606781967e-05, "loss": 1.8789, "step": 3638 }, { "epoch": 11.931147540983606, "grad_norm": 8.421981811523438, "learning_rate": 1.9584557838375608e-05, "loss": 1.675, "step": 3639 }, { "epoch": 11.934426229508198, "grad_norm": 7.0873870849609375, "learning_rate": 1.9584254890478665e-05, "loss": 1.7205, "step": 3640 }, { "epoch": 11.937704918032788, "grad_norm": 6.342845916748047, "learning_rate": 1.9583951834509284e-05, "loss": 1.9668, "step": 3641 }, { "epoch": 11.940983606557378, "grad_norm": 8.199467658996582, "learning_rate": 1.958364867047088e-05, "loss": 1.9346, "step": 3642 }, { "epoch": 11.944262295081968, "grad_norm": 7.1890482902526855, "learning_rate": 1.9583345398366872e-05, "loss": 1.8545, "step": 3643 }, { "epoch": 11.947540983606558, "grad_norm": 9.157346725463867, "learning_rate": 1.9583042018200682e-05, "loss": 2.0396, "step": 3644 }, { "epoch": 11.950819672131148, "grad_norm": 7.334303855895996, "learning_rate": 1.958273852997573e-05, "loss": 1.8906, "step": 3645 }, { "epoch": 11.954098360655738, "grad_norm": 7.6100006103515625, "learning_rate": 1.9582434933695436e-05, "loss": 1.7407, "step": 3646 }, { "epoch": 11.957377049180328, "grad_norm": 6.276791095733643, "learning_rate": 1.9582131229363225e-05, "loss": 1.6833, "step": 3647 }, { "epoch": 11.960655737704919, "grad_norm": 21.13701820373535, "learning_rate": 1.9581827416982522e-05, "loss": 1.8281, "step": 3648 }, { "epoch": 11.963934426229509, "grad_norm": 9.06950569152832, "learning_rate": 1.958152349655675e-05, "loss": 1.6995, "step": 3649 }, { "epoch": 11.967213114754099, "grad_norm": 26.04060173034668, "learning_rate": 1.958121946808934e-05, "loss": 1.8804, "step": 3650 }, { "epoch": 11.970491803278689, "grad_norm": 7.205924987792969, "learning_rate": 1.9580915331583717e-05, "loss": 1.6636, "step": 3651 }, { "epoch": 11.973770491803279, "grad_norm": 7.664764404296875, "learning_rate": 1.9580611087043315e-05, "loss": 1.7417, "step": 3652 }, { "epoch": 11.97704918032787, "grad_norm": 7.271993637084961, "learning_rate": 1.958030673447156e-05, "loss": 1.8589, "step": 3653 }, { "epoch": 11.98032786885246, "grad_norm": 7.369102478027344, "learning_rate": 1.9580002273871886e-05, "loss": 1.7681, "step": 3654 }, { "epoch": 11.98360655737705, "grad_norm": 13.902050971984863, "learning_rate": 1.9579697705247726e-05, "loss": 1.8721, "step": 3655 }, { "epoch": 11.98688524590164, "grad_norm": 9.28794002532959, "learning_rate": 1.9579393028602512e-05, "loss": 1.8662, "step": 3656 }, { "epoch": 11.99016393442623, "grad_norm": 13.471257209777832, "learning_rate": 1.9579088243939686e-05, "loss": 1.9434, "step": 3657 }, { "epoch": 11.99344262295082, "grad_norm": 9.150496482849121, "learning_rate": 1.9578783351262676e-05, "loss": 1.6746, "step": 3658 }, { "epoch": 11.99672131147541, "grad_norm": 7.807236194610596, "learning_rate": 1.9578478350574925e-05, "loss": 1.9482, "step": 3659 }, { "epoch": 12.0, "grad_norm": 10.065169334411621, "learning_rate": 1.957817324187987e-05, "loss": 1.7759, "step": 3660 }, { "epoch": 12.00327868852459, "grad_norm": 10.252614974975586, "learning_rate": 1.9577868025180958e-05, "loss": 1.6316, "step": 3661 }, { "epoch": 12.00655737704918, "grad_norm": 7.48585844039917, "learning_rate": 1.9577562700481624e-05, "loss": 1.8201, "step": 3662 }, { "epoch": 12.00983606557377, "grad_norm": 9.379265785217285, "learning_rate": 1.9577257267785308e-05, "loss": 1.7842, "step": 3663 }, { "epoch": 12.01311475409836, "grad_norm": 11.637460708618164, "learning_rate": 1.9576951727095462e-05, "loss": 1.6104, "step": 3664 }, { "epoch": 12.01639344262295, "grad_norm": 8.776354789733887, "learning_rate": 1.9576646078415526e-05, "loss": 1.6987, "step": 3665 }, { "epoch": 12.01967213114754, "grad_norm": 10.721081733703613, "learning_rate": 1.957634032174895e-05, "loss": 1.7026, "step": 3666 }, { "epoch": 12.02295081967213, "grad_norm": 7.057308197021484, "learning_rate": 1.957603445709918e-05, "loss": 1.6621, "step": 3667 }, { "epoch": 12.026229508196721, "grad_norm": 8.598077774047852, "learning_rate": 1.9575728484469665e-05, "loss": 1.7666, "step": 3668 }, { "epoch": 12.029508196721311, "grad_norm": 10.16799545288086, "learning_rate": 1.957542240386385e-05, "loss": 1.6494, "step": 3669 }, { "epoch": 12.032786885245901, "grad_norm": 7.682375431060791, "learning_rate": 1.95751162152852e-05, "loss": 1.7056, "step": 3670 }, { "epoch": 12.036065573770491, "grad_norm": 8.327125549316406, "learning_rate": 1.9574809918737155e-05, "loss": 1.7139, "step": 3671 }, { "epoch": 12.039344262295081, "grad_norm": 8.823237419128418, "learning_rate": 1.957450351422317e-05, "loss": 1.4873, "step": 3672 }, { "epoch": 12.042622950819672, "grad_norm": 6.985208034515381, "learning_rate": 1.9574197001746705e-05, "loss": 1.9502, "step": 3673 }, { "epoch": 12.045901639344262, "grad_norm": 10.021523475646973, "learning_rate": 1.9573890381311216e-05, "loss": 1.7495, "step": 3674 }, { "epoch": 12.049180327868852, "grad_norm": 7.9295430183410645, "learning_rate": 1.9573583652920157e-05, "loss": 1.6543, "step": 3675 }, { "epoch": 12.052459016393442, "grad_norm": 7.220077991485596, "learning_rate": 1.957327681657699e-05, "loss": 1.7795, "step": 3676 }, { "epoch": 12.055737704918032, "grad_norm": 8.383488655090332, "learning_rate": 1.9572969872285174e-05, "loss": 1.6042, "step": 3677 }, { "epoch": 12.059016393442622, "grad_norm": 8.167667388916016, "learning_rate": 1.9572662820048164e-05, "loss": 1.7632, "step": 3678 }, { "epoch": 12.062295081967212, "grad_norm": 8.267586708068848, "learning_rate": 1.957235565986943e-05, "loss": 1.7852, "step": 3679 }, { "epoch": 12.065573770491802, "grad_norm": 7.389055252075195, "learning_rate": 1.9572048391752436e-05, "loss": 1.5127, "step": 3680 }, { "epoch": 12.068852459016393, "grad_norm": 11.655980110168457, "learning_rate": 1.957174101570064e-05, "loss": 1.7903, "step": 3681 }, { "epoch": 12.072131147540984, "grad_norm": 6.627402305603027, "learning_rate": 1.9571433531717513e-05, "loss": 1.644, "step": 3682 }, { "epoch": 12.075409836065575, "grad_norm": 9.55419635772705, "learning_rate": 1.9571125939806523e-05, "loss": 1.6306, "step": 3683 }, { "epoch": 12.078688524590165, "grad_norm": 7.365169048309326, "learning_rate": 1.9570818239971134e-05, "loss": 1.5603, "step": 3684 }, { "epoch": 12.081967213114755, "grad_norm": 8.060035705566406, "learning_rate": 1.957051043221482e-05, "loss": 1.4543, "step": 3685 }, { "epoch": 12.085245901639345, "grad_norm": 8.013128280639648, "learning_rate": 1.957020251654105e-05, "loss": 1.6631, "step": 3686 }, { "epoch": 12.088524590163935, "grad_norm": 6.032121658325195, "learning_rate": 1.9569894492953298e-05, "loss": 1.6838, "step": 3687 }, { "epoch": 12.091803278688525, "grad_norm": 8.88003921508789, "learning_rate": 1.9569586361455032e-05, "loss": 1.9937, "step": 3688 }, { "epoch": 12.095081967213115, "grad_norm": 8.162574768066406, "learning_rate": 1.9569278122049732e-05, "loss": 1.6931, "step": 3689 }, { "epoch": 12.098360655737705, "grad_norm": 8.225072860717773, "learning_rate": 1.9568969774740868e-05, "loss": 1.5464, "step": 3690 }, { "epoch": 12.101639344262296, "grad_norm": 7.10259485244751, "learning_rate": 1.9568661319531922e-05, "loss": 1.6899, "step": 3691 }, { "epoch": 12.104918032786886, "grad_norm": 9.201571464538574, "learning_rate": 1.9568352756426374e-05, "loss": 1.6628, "step": 3692 }, { "epoch": 12.108196721311476, "grad_norm": 8.85312557220459, "learning_rate": 1.95680440854277e-05, "loss": 1.8389, "step": 3693 }, { "epoch": 12.111475409836066, "grad_norm": 7.9459991455078125, "learning_rate": 1.956773530653938e-05, "loss": 1.8862, "step": 3694 }, { "epoch": 12.114754098360656, "grad_norm": 12.033971786499023, "learning_rate": 1.9567426419764893e-05, "loss": 1.6619, "step": 3695 }, { "epoch": 12.118032786885246, "grad_norm": 8.937156677246094, "learning_rate": 1.9567117425107728e-05, "loss": 1.7642, "step": 3696 }, { "epoch": 12.121311475409836, "grad_norm": 7.710388660430908, "learning_rate": 1.9566808322571365e-05, "loss": 1.7676, "step": 3697 }, { "epoch": 12.124590163934426, "grad_norm": 9.724797248840332, "learning_rate": 1.9566499112159292e-05, "loss": 1.6545, "step": 3698 }, { "epoch": 12.127868852459017, "grad_norm": 6.825412273406982, "learning_rate": 1.9566189793874998e-05, "loss": 1.8169, "step": 3699 }, { "epoch": 12.131147540983607, "grad_norm": 12.68416976928711, "learning_rate": 1.9565880367721963e-05, "loss": 1.6016, "step": 3700 }, { "epoch": 12.134426229508197, "grad_norm": 8.33867073059082, "learning_rate": 1.9565570833703684e-05, "loss": 1.5872, "step": 3701 }, { "epoch": 12.137704918032787, "grad_norm": 8.48957347869873, "learning_rate": 1.956526119182365e-05, "loss": 1.749, "step": 3702 }, { "epoch": 12.140983606557377, "grad_norm": 9.333846092224121, "learning_rate": 1.9564951442085346e-05, "loss": 1.8223, "step": 3703 }, { "epoch": 12.144262295081967, "grad_norm": 7.1655378341674805, "learning_rate": 1.956464158449227e-05, "loss": 1.7458, "step": 3704 }, { "epoch": 12.147540983606557, "grad_norm": 8.860608100891113, "learning_rate": 1.9564331619047917e-05, "loss": 1.8486, "step": 3705 }, { "epoch": 12.150819672131147, "grad_norm": 9.86210823059082, "learning_rate": 1.956402154575578e-05, "loss": 1.7065, "step": 3706 }, { "epoch": 12.154098360655738, "grad_norm": 7.913991451263428, "learning_rate": 1.9563711364619356e-05, "loss": 1.7612, "step": 3707 }, { "epoch": 12.157377049180328, "grad_norm": 6.092958450317383, "learning_rate": 1.9563401075642142e-05, "loss": 1.813, "step": 3708 }, { "epoch": 12.160655737704918, "grad_norm": 8.886865615844727, "learning_rate": 1.956309067882764e-05, "loss": 1.7378, "step": 3709 }, { "epoch": 12.163934426229508, "grad_norm": 14.636565208435059, "learning_rate": 1.9562780174179346e-05, "loss": 1.8042, "step": 3710 }, { "epoch": 12.167213114754098, "grad_norm": 9.679365158081055, "learning_rate": 1.9562469561700757e-05, "loss": 1.8276, "step": 3711 }, { "epoch": 12.170491803278688, "grad_norm": 8.286332130432129, "learning_rate": 1.956215884139539e-05, "loss": 1.75, "step": 3712 }, { "epoch": 12.173770491803278, "grad_norm": 7.866945266723633, "learning_rate": 1.9561848013266736e-05, "loss": 1.7397, "step": 3713 }, { "epoch": 12.177049180327868, "grad_norm": 9.797919273376465, "learning_rate": 1.9561537077318306e-05, "loss": 1.6494, "step": 3714 }, { "epoch": 12.180327868852459, "grad_norm": 9.047992706298828, "learning_rate": 1.95612260335536e-05, "loss": 1.4641, "step": 3715 }, { "epoch": 12.183606557377049, "grad_norm": 22.947351455688477, "learning_rate": 1.9560914881976135e-05, "loss": 1.8096, "step": 3716 }, { "epoch": 12.186885245901639, "grad_norm": 10.077921867370605, "learning_rate": 1.9560603622589408e-05, "loss": 1.7847, "step": 3717 }, { "epoch": 12.190163934426229, "grad_norm": 8.153295516967773, "learning_rate": 1.956029225539694e-05, "loss": 1.7727, "step": 3718 }, { "epoch": 12.193442622950819, "grad_norm": 9.556252479553223, "learning_rate": 1.955998078040223e-05, "loss": 1.5449, "step": 3719 }, { "epoch": 12.19672131147541, "grad_norm": 8.015146255493164, "learning_rate": 1.9559669197608802e-05, "loss": 1.7456, "step": 3720 }, { "epoch": 12.2, "grad_norm": 8.612029075622559, "learning_rate": 1.9559357507020163e-05, "loss": 1.7988, "step": 3721 }, { "epoch": 12.20327868852459, "grad_norm": 9.942826271057129, "learning_rate": 1.955904570863983e-05, "loss": 1.7861, "step": 3722 }, { "epoch": 12.20655737704918, "grad_norm": 7.880398273468018, "learning_rate": 1.9558733802471313e-05, "loss": 1.5369, "step": 3723 }, { "epoch": 12.20983606557377, "grad_norm": 10.097931861877441, "learning_rate": 1.955842178851814e-05, "loss": 1.7578, "step": 3724 }, { "epoch": 12.21311475409836, "grad_norm": 7.127575874328613, "learning_rate": 1.9558109666783817e-05, "loss": 1.7139, "step": 3725 }, { "epoch": 12.216393442622952, "grad_norm": 6.573888778686523, "learning_rate": 1.955779743727187e-05, "loss": 1.7783, "step": 3726 }, { "epoch": 12.219672131147542, "grad_norm": 8.004319190979004, "learning_rate": 1.9557485099985825e-05, "loss": 1.8167, "step": 3727 }, { "epoch": 12.222950819672132, "grad_norm": 11.107699394226074, "learning_rate": 1.95571726549292e-05, "loss": 1.6094, "step": 3728 }, { "epoch": 12.226229508196722, "grad_norm": 9.989681243896484, "learning_rate": 1.955686010210551e-05, "loss": 1.7261, "step": 3729 }, { "epoch": 12.229508196721312, "grad_norm": 7.444446086883545, "learning_rate": 1.9556547441518285e-05, "loss": 1.8931, "step": 3730 }, { "epoch": 12.232786885245902, "grad_norm": 7.275251388549805, "learning_rate": 1.9556234673171053e-05, "loss": 1.6438, "step": 3731 }, { "epoch": 12.236065573770492, "grad_norm": 11.2361478805542, "learning_rate": 1.955592179706734e-05, "loss": 1.8394, "step": 3732 }, { "epoch": 12.239344262295083, "grad_norm": 7.86389684677124, "learning_rate": 1.9555608813210672e-05, "loss": 1.6326, "step": 3733 }, { "epoch": 12.242622950819673, "grad_norm": 7.608558654785156, "learning_rate": 1.955529572160458e-05, "loss": 1.8745, "step": 3734 }, { "epoch": 12.245901639344263, "grad_norm": 7.067723751068115, "learning_rate": 1.9554982522252596e-05, "loss": 1.6738, "step": 3735 }, { "epoch": 12.249180327868853, "grad_norm": 9.652447700500488, "learning_rate": 1.9554669215158247e-05, "loss": 1.6382, "step": 3736 }, { "epoch": 12.252459016393443, "grad_norm": 7.842999458312988, "learning_rate": 1.9554355800325068e-05, "loss": 1.7729, "step": 3737 }, { "epoch": 12.255737704918033, "grad_norm": 13.11279582977295, "learning_rate": 1.9554042277756592e-05, "loss": 1.7485, "step": 3738 }, { "epoch": 12.259016393442623, "grad_norm": 10.80677604675293, "learning_rate": 1.9553728647456358e-05, "loss": 1.8521, "step": 3739 }, { "epoch": 12.262295081967213, "grad_norm": 7.418334484100342, "learning_rate": 1.9553414909427898e-05, "loss": 1.7144, "step": 3740 }, { "epoch": 12.265573770491804, "grad_norm": 6.8581366539001465, "learning_rate": 1.9553101063674753e-05, "loss": 1.7014, "step": 3741 }, { "epoch": 12.268852459016394, "grad_norm": 9.344039916992188, "learning_rate": 1.955278711020046e-05, "loss": 1.6953, "step": 3742 }, { "epoch": 12.272131147540984, "grad_norm": 6.820585250854492, "learning_rate": 1.9552473049008563e-05, "loss": 1.6909, "step": 3743 }, { "epoch": 12.275409836065574, "grad_norm": 9.406440734863281, "learning_rate": 1.9552158880102595e-05, "loss": 1.7515, "step": 3744 }, { "epoch": 12.278688524590164, "grad_norm": 8.037901878356934, "learning_rate": 1.955184460348611e-05, "loss": 1.9277, "step": 3745 }, { "epoch": 12.281967213114754, "grad_norm": 8.935830116271973, "learning_rate": 1.9551530219162643e-05, "loss": 1.7256, "step": 3746 }, { "epoch": 12.285245901639344, "grad_norm": 12.529179573059082, "learning_rate": 1.9551215727135743e-05, "loss": 1.626, "step": 3747 }, { "epoch": 12.288524590163934, "grad_norm": 6.780552387237549, "learning_rate": 1.9550901127408953e-05, "loss": 1.6165, "step": 3748 }, { "epoch": 12.291803278688525, "grad_norm": 5.755768775939941, "learning_rate": 1.955058641998582e-05, "loss": 1.9395, "step": 3749 }, { "epoch": 12.295081967213115, "grad_norm": 7.373047351837158, "learning_rate": 1.95502716048699e-05, "loss": 1.7422, "step": 3750 }, { "epoch": 12.298360655737705, "grad_norm": 9.735115051269531, "learning_rate": 1.9549956682064733e-05, "loss": 1.7007, "step": 3751 }, { "epoch": 12.301639344262295, "grad_norm": 6.406325817108154, "learning_rate": 1.954964165157388e-05, "loss": 1.7939, "step": 3752 }, { "epoch": 12.304918032786885, "grad_norm": 8.61125659942627, "learning_rate": 1.9549326513400883e-05, "loss": 1.7754, "step": 3753 }, { "epoch": 12.308196721311475, "grad_norm": 7.227832317352295, "learning_rate": 1.9549011267549304e-05, "loss": 1.8784, "step": 3754 }, { "epoch": 12.311475409836065, "grad_norm": 7.443068981170654, "learning_rate": 1.9548695914022694e-05, "loss": 1.7324, "step": 3755 }, { "epoch": 12.314754098360655, "grad_norm": 6.69816255569458, "learning_rate": 1.954838045282461e-05, "loss": 1.8633, "step": 3756 }, { "epoch": 12.318032786885245, "grad_norm": 6.7160773277282715, "learning_rate": 1.9548064883958608e-05, "loss": 1.5571, "step": 3757 }, { "epoch": 12.321311475409836, "grad_norm": 6.589250564575195, "learning_rate": 1.9547749207428245e-05, "loss": 1.7935, "step": 3758 }, { "epoch": 12.324590163934426, "grad_norm": 10.79044246673584, "learning_rate": 1.9547433423237087e-05, "loss": 1.8267, "step": 3759 }, { "epoch": 12.327868852459016, "grad_norm": 8.367557525634766, "learning_rate": 1.9547117531388685e-05, "loss": 1.7354, "step": 3760 }, { "epoch": 12.331147540983606, "grad_norm": 7.502979278564453, "learning_rate": 1.954680153188661e-05, "loss": 1.9053, "step": 3761 }, { "epoch": 12.334426229508196, "grad_norm": 8.069281578063965, "learning_rate": 1.954648542473442e-05, "loss": 1.7549, "step": 3762 }, { "epoch": 12.337704918032786, "grad_norm": 7.77177095413208, "learning_rate": 1.954616920993568e-05, "loss": 1.6477, "step": 3763 }, { "epoch": 12.340983606557376, "grad_norm": 9.242547988891602, "learning_rate": 1.954585288749396e-05, "loss": 1.6211, "step": 3764 }, { "epoch": 12.344262295081966, "grad_norm": 6.475553035736084, "learning_rate": 1.954553645741282e-05, "loss": 1.8267, "step": 3765 }, { "epoch": 12.347540983606557, "grad_norm": 7.05175256729126, "learning_rate": 1.9545219919695834e-05, "loss": 1.7656, "step": 3766 }, { "epoch": 12.350819672131147, "grad_norm": 7.611342430114746, "learning_rate": 1.9544903274346567e-05, "loss": 1.7319, "step": 3767 }, { "epoch": 12.354098360655737, "grad_norm": 6.439746379852295, "learning_rate": 1.9544586521368594e-05, "loss": 1.6454, "step": 3768 }, { "epoch": 12.357377049180329, "grad_norm": 8.77396011352539, "learning_rate": 1.9544269660765483e-05, "loss": 1.7246, "step": 3769 }, { "epoch": 12.360655737704919, "grad_norm": 8.215704917907715, "learning_rate": 1.954395269254081e-05, "loss": 1.6426, "step": 3770 }, { "epoch": 12.363934426229509, "grad_norm": 6.359377861022949, "learning_rate": 1.9543635616698142e-05, "loss": 1.7305, "step": 3771 }, { "epoch": 12.3672131147541, "grad_norm": 7.253904342651367, "learning_rate": 1.954331843324106e-05, "loss": 1.5989, "step": 3772 }, { "epoch": 12.37049180327869, "grad_norm": 8.912155151367188, "learning_rate": 1.9543001142173144e-05, "loss": 1.6243, "step": 3773 }, { "epoch": 12.37377049180328, "grad_norm": 12.384425163269043, "learning_rate": 1.9542683743497964e-05, "loss": 1.6091, "step": 3774 }, { "epoch": 12.37704918032787, "grad_norm": 15.634674072265625, "learning_rate": 1.9542366237219107e-05, "loss": 1.6245, "step": 3775 }, { "epoch": 12.38032786885246, "grad_norm": 7.261328220367432, "learning_rate": 1.954204862334015e-05, "loss": 2.0308, "step": 3776 }, { "epoch": 12.38360655737705, "grad_norm": 9.242940902709961, "learning_rate": 1.9541730901864668e-05, "loss": 1.7046, "step": 3777 }, { "epoch": 12.38688524590164, "grad_norm": 7.596853733062744, "learning_rate": 1.9541413072796252e-05, "loss": 1.8179, "step": 3778 }, { "epoch": 12.39016393442623, "grad_norm": 8.199097633361816, "learning_rate": 1.9541095136138484e-05, "loss": 1.7205, "step": 3779 }, { "epoch": 12.39344262295082, "grad_norm": 10.566335678100586, "learning_rate": 1.9540777091894948e-05, "loss": 1.6201, "step": 3780 }, { "epoch": 12.39672131147541, "grad_norm": 11.80070972442627, "learning_rate": 1.9540458940069226e-05, "loss": 1.4941, "step": 3781 }, { "epoch": 12.4, "grad_norm": 10.282422065734863, "learning_rate": 1.9540140680664915e-05, "loss": 1.7721, "step": 3782 }, { "epoch": 12.40327868852459, "grad_norm": 9.019837379455566, "learning_rate": 1.9539822313685597e-05, "loss": 1.6377, "step": 3783 }, { "epoch": 12.40655737704918, "grad_norm": 8.7335786819458, "learning_rate": 1.9539503839134863e-05, "loss": 1.8535, "step": 3784 }, { "epoch": 12.40983606557377, "grad_norm": 8.836071014404297, "learning_rate": 1.9539185257016305e-05, "loss": 1.5491, "step": 3785 }, { "epoch": 12.41311475409836, "grad_norm": 8.213043212890625, "learning_rate": 1.9538866567333514e-05, "loss": 1.7124, "step": 3786 }, { "epoch": 12.416393442622951, "grad_norm": 8.838295936584473, "learning_rate": 1.9538547770090085e-05, "loss": 1.4817, "step": 3787 }, { "epoch": 12.419672131147541, "grad_norm": 8.641596794128418, "learning_rate": 1.9538228865289613e-05, "loss": 1.7, "step": 3788 }, { "epoch": 12.422950819672131, "grad_norm": 7.906771659851074, "learning_rate": 1.9537909852935692e-05, "loss": 1.8369, "step": 3789 }, { "epoch": 12.426229508196721, "grad_norm": 9.559908866882324, "learning_rate": 1.9537590733031925e-05, "loss": 1.561, "step": 3790 }, { "epoch": 12.429508196721311, "grad_norm": 8.05343246459961, "learning_rate": 1.9537271505581902e-05, "loss": 1.8511, "step": 3791 }, { "epoch": 12.432786885245902, "grad_norm": 8.375425338745117, "learning_rate": 1.9536952170589225e-05, "loss": 1.6836, "step": 3792 }, { "epoch": 12.436065573770492, "grad_norm": 10.131205558776855, "learning_rate": 1.95366327280575e-05, "loss": 1.6538, "step": 3793 }, { "epoch": 12.439344262295082, "grad_norm": 8.759239196777344, "learning_rate": 1.9536313177990323e-05, "loss": 1.561, "step": 3794 }, { "epoch": 12.442622950819672, "grad_norm": 6.165786266326904, "learning_rate": 1.9535993520391298e-05, "loss": 1.6846, "step": 3795 }, { "epoch": 12.445901639344262, "grad_norm": 6.912637710571289, "learning_rate": 1.9535673755264035e-05, "loss": 1.5474, "step": 3796 }, { "epoch": 12.449180327868852, "grad_norm": 8.492154121398926, "learning_rate": 1.953535388261213e-05, "loss": 1.666, "step": 3797 }, { "epoch": 12.452459016393442, "grad_norm": 8.540773391723633, "learning_rate": 1.95350339024392e-05, "loss": 1.802, "step": 3798 }, { "epoch": 12.455737704918032, "grad_norm": 9.651715278625488, "learning_rate": 1.9534713814748852e-05, "loss": 1.7949, "step": 3799 }, { "epoch": 12.459016393442623, "grad_norm": 8.197101593017578, "learning_rate": 1.9534393619544687e-05, "loss": 1.8872, "step": 3800 }, { "epoch": 12.462295081967213, "grad_norm": 7.172845363616943, "learning_rate": 1.9534073316830322e-05, "loss": 1.7698, "step": 3801 }, { "epoch": 12.465573770491803, "grad_norm": 7.5680460929870605, "learning_rate": 1.953375290660937e-05, "loss": 1.7046, "step": 3802 }, { "epoch": 12.468852459016393, "grad_norm": 8.392759323120117, "learning_rate": 1.9533432388885436e-05, "loss": 1.6643, "step": 3803 }, { "epoch": 12.472131147540983, "grad_norm": 9.281576156616211, "learning_rate": 1.9533111763662145e-05, "loss": 1.8467, "step": 3804 }, { "epoch": 12.475409836065573, "grad_norm": 7.809993743896484, "learning_rate": 1.9532791030943102e-05, "loss": 1.8206, "step": 3805 }, { "epoch": 12.478688524590163, "grad_norm": 15.781888961791992, "learning_rate": 1.9532470190731932e-05, "loss": 1.5132, "step": 3806 }, { "epoch": 12.481967213114753, "grad_norm": 7.978335380554199, "learning_rate": 1.9532149243032248e-05, "loss": 1.7754, "step": 3807 }, { "epoch": 12.485245901639344, "grad_norm": 8.904522895812988, "learning_rate": 1.9531828187847674e-05, "loss": 1.6299, "step": 3808 }, { "epoch": 12.488524590163934, "grad_norm": 10.885786056518555, "learning_rate": 1.953150702518182e-05, "loss": 1.6953, "step": 3809 }, { "epoch": 12.491803278688524, "grad_norm": 7.012712478637695, "learning_rate": 1.9531185755038318e-05, "loss": 1.6819, "step": 3810 }, { "epoch": 12.495081967213114, "grad_norm": 8.776663780212402, "learning_rate": 1.9530864377420786e-05, "loss": 1.6599, "step": 3811 }, { "epoch": 12.498360655737706, "grad_norm": 8.940269470214844, "learning_rate": 1.953054289233285e-05, "loss": 1.7329, "step": 3812 }, { "epoch": 12.501639344262294, "grad_norm": 7.876223564147949, "learning_rate": 1.953022129977813e-05, "loss": 1.7856, "step": 3813 }, { "epoch": 12.504918032786886, "grad_norm": 11.410469055175781, "learning_rate": 1.9529899599760257e-05, "loss": 1.6775, "step": 3814 }, { "epoch": 12.508196721311476, "grad_norm": 8.975622177124023, "learning_rate": 1.9529577792282863e-05, "loss": 1.6583, "step": 3815 }, { "epoch": 12.511475409836066, "grad_norm": 6.591473579406738, "learning_rate": 1.9529255877349564e-05, "loss": 1.9009, "step": 3816 }, { "epoch": 12.514754098360656, "grad_norm": 8.01889419555664, "learning_rate": 1.9528933854963997e-05, "loss": 1.7139, "step": 3817 }, { "epoch": 12.518032786885247, "grad_norm": 11.105778694152832, "learning_rate": 1.9528611725129797e-05, "loss": 1.8181, "step": 3818 }, { "epoch": 12.521311475409837, "grad_norm": 8.456811904907227, "learning_rate": 1.9528289487850592e-05, "loss": 1.5461, "step": 3819 }, { "epoch": 12.524590163934427, "grad_norm": 9.681485176086426, "learning_rate": 1.9527967143130014e-05, "loss": 1.7593, "step": 3820 }, { "epoch": 12.527868852459017, "grad_norm": 8.11294937133789, "learning_rate": 1.9527644690971704e-05, "loss": 1.7134, "step": 3821 }, { "epoch": 12.531147540983607, "grad_norm": 10.046004295349121, "learning_rate": 1.9527322131379288e-05, "loss": 1.7964, "step": 3822 }, { "epoch": 12.534426229508197, "grad_norm": 7.33745813369751, "learning_rate": 1.9526999464356413e-05, "loss": 1.6614, "step": 3823 }, { "epoch": 12.537704918032787, "grad_norm": 8.686037063598633, "learning_rate": 1.9526676689906714e-05, "loss": 1.687, "step": 3824 }, { "epoch": 12.540983606557377, "grad_norm": 7.066766262054443, "learning_rate": 1.9526353808033827e-05, "loss": 1.498, "step": 3825 }, { "epoch": 12.544262295081968, "grad_norm": 8.025383949279785, "learning_rate": 1.9526030818741397e-05, "loss": 1.8774, "step": 3826 }, { "epoch": 12.547540983606558, "grad_norm": 7.386172294616699, "learning_rate": 1.9525707722033064e-05, "loss": 1.6287, "step": 3827 }, { "epoch": 12.550819672131148, "grad_norm": 7.2489237785339355, "learning_rate": 1.9525384517912475e-05, "loss": 1.7646, "step": 3828 }, { "epoch": 12.554098360655738, "grad_norm": 9.7201509475708, "learning_rate": 1.952506120638327e-05, "loss": 1.5715, "step": 3829 }, { "epoch": 12.557377049180328, "grad_norm": 8.831141471862793, "learning_rate": 1.9524737787449096e-05, "loss": 1.5576, "step": 3830 }, { "epoch": 12.560655737704918, "grad_norm": 6.854958534240723, "learning_rate": 1.9524414261113604e-05, "loss": 1.8296, "step": 3831 }, { "epoch": 12.563934426229508, "grad_norm": 6.950232982635498, "learning_rate": 1.9524090627380432e-05, "loss": 1.6011, "step": 3832 }, { "epoch": 12.567213114754098, "grad_norm": 6.753100872039795, "learning_rate": 1.952376688625324e-05, "loss": 1.7036, "step": 3833 }, { "epoch": 12.570491803278689, "grad_norm": 5.585076332092285, "learning_rate": 1.952344303773567e-05, "loss": 1.8066, "step": 3834 }, { "epoch": 12.573770491803279, "grad_norm": 8.638566970825195, "learning_rate": 1.952311908183138e-05, "loss": 1.7139, "step": 3835 }, { "epoch": 12.577049180327869, "grad_norm": 10.3825101852417, "learning_rate": 1.9522795018544026e-05, "loss": 1.6536, "step": 3836 }, { "epoch": 12.580327868852459, "grad_norm": 10.92320728302002, "learning_rate": 1.9522470847877252e-05, "loss": 1.7021, "step": 3837 }, { "epoch": 12.583606557377049, "grad_norm": 7.071676731109619, "learning_rate": 1.9522146569834717e-05, "loss": 1.8506, "step": 3838 }, { "epoch": 12.58688524590164, "grad_norm": 8.00593376159668, "learning_rate": 1.952182218442008e-05, "loss": 1.7451, "step": 3839 }, { "epoch": 12.59016393442623, "grad_norm": 6.556617259979248, "learning_rate": 1.9521497691637e-05, "loss": 1.5696, "step": 3840 }, { "epoch": 12.59344262295082, "grad_norm": 7.697526454925537, "learning_rate": 1.952117309148913e-05, "loss": 1.6523, "step": 3841 }, { "epoch": 12.59672131147541, "grad_norm": 8.131362915039062, "learning_rate": 1.9520848383980136e-05, "loss": 1.7534, "step": 3842 }, { "epoch": 12.6, "grad_norm": 6.78043270111084, "learning_rate": 1.952052356911368e-05, "loss": 1.9155, "step": 3843 }, { "epoch": 12.60327868852459, "grad_norm": 8.43139362335205, "learning_rate": 1.9520198646893415e-05, "loss": 1.7017, "step": 3844 }, { "epoch": 12.60655737704918, "grad_norm": 8.06932258605957, "learning_rate": 1.9519873617323015e-05, "loss": 1.708, "step": 3845 }, { "epoch": 12.60983606557377, "grad_norm": 11.054096221923828, "learning_rate": 1.9519548480406144e-05, "loss": 1.73, "step": 3846 }, { "epoch": 12.61311475409836, "grad_norm": 8.80752944946289, "learning_rate": 1.9519223236146464e-05, "loss": 1.7546, "step": 3847 }, { "epoch": 12.61639344262295, "grad_norm": 8.700400352478027, "learning_rate": 1.9518897884547646e-05, "loss": 1.8003, "step": 3848 }, { "epoch": 12.61967213114754, "grad_norm": 8.017192840576172, "learning_rate": 1.9518572425613354e-05, "loss": 1.855, "step": 3849 }, { "epoch": 12.62295081967213, "grad_norm": 8.195836067199707, "learning_rate": 1.9518246859347263e-05, "loss": 1.6689, "step": 3850 }, { "epoch": 12.62622950819672, "grad_norm": 8.16276741027832, "learning_rate": 1.9517921185753044e-05, "loss": 1.8486, "step": 3851 }, { "epoch": 12.62950819672131, "grad_norm": 8.37197494506836, "learning_rate": 1.9517595404834363e-05, "loss": 1.5234, "step": 3852 }, { "epoch": 12.6327868852459, "grad_norm": 9.728370666503906, "learning_rate": 1.9517269516594904e-05, "loss": 1.668, "step": 3853 }, { "epoch": 12.636065573770491, "grad_norm": 16.022815704345703, "learning_rate": 1.9516943521038332e-05, "loss": 1.7334, "step": 3854 }, { "epoch": 12.639344262295083, "grad_norm": 10.351297378540039, "learning_rate": 1.9516617418168327e-05, "loss": 1.7358, "step": 3855 }, { "epoch": 12.642622950819671, "grad_norm": 12.47158145904541, "learning_rate": 1.9516291207988564e-05, "loss": 1.6477, "step": 3856 }, { "epoch": 12.645901639344263, "grad_norm": 8.351393699645996, "learning_rate": 1.9515964890502728e-05, "loss": 1.7554, "step": 3857 }, { "epoch": 12.649180327868853, "grad_norm": 7.927154541015625, "learning_rate": 1.9515638465714487e-05, "loss": 1.667, "step": 3858 }, { "epoch": 12.652459016393443, "grad_norm": 8.873129844665527, "learning_rate": 1.9515311933627534e-05, "loss": 1.7029, "step": 3859 }, { "epoch": 12.655737704918034, "grad_norm": 6.472550392150879, "learning_rate": 1.9514985294245545e-05, "loss": 1.6846, "step": 3860 }, { "epoch": 12.659016393442624, "grad_norm": 7.6146392822265625, "learning_rate": 1.95146585475722e-05, "loss": 1.7412, "step": 3861 }, { "epoch": 12.662295081967214, "grad_norm": 6.912752628326416, "learning_rate": 1.9514331693611193e-05, "loss": 1.6863, "step": 3862 }, { "epoch": 12.665573770491804, "grad_norm": 9.60619068145752, "learning_rate": 1.9514004732366198e-05, "loss": 1.5903, "step": 3863 }, { "epoch": 12.668852459016394, "grad_norm": 12.42648696899414, "learning_rate": 1.951367766384091e-05, "loss": 1.5742, "step": 3864 }, { "epoch": 12.672131147540984, "grad_norm": 8.3316011428833, "learning_rate": 1.9513350488039017e-05, "loss": 1.8057, "step": 3865 }, { "epoch": 12.675409836065574, "grad_norm": 9.225946426391602, "learning_rate": 1.9513023204964203e-05, "loss": 1.8147, "step": 3866 }, { "epoch": 12.678688524590164, "grad_norm": 7.441381454467773, "learning_rate": 1.951269581462016e-05, "loss": 1.5908, "step": 3867 }, { "epoch": 12.681967213114755, "grad_norm": 7.336119651794434, "learning_rate": 1.951236831701058e-05, "loss": 1.498, "step": 3868 }, { "epoch": 12.685245901639345, "grad_norm": 5.647051811218262, "learning_rate": 1.9512040712139164e-05, "loss": 1.8628, "step": 3869 }, { "epoch": 12.688524590163935, "grad_norm": 11.05293083190918, "learning_rate": 1.9511713000009592e-05, "loss": 1.8237, "step": 3870 }, { "epoch": 12.691803278688525, "grad_norm": 9.440353393554688, "learning_rate": 1.9511385180625568e-05, "loss": 1.7339, "step": 3871 }, { "epoch": 12.695081967213115, "grad_norm": 9.180976867675781, "learning_rate": 1.9511057253990787e-05, "loss": 1.5371, "step": 3872 }, { "epoch": 12.698360655737705, "grad_norm": 7.709895610809326, "learning_rate": 1.9510729220108947e-05, "loss": 1.6975, "step": 3873 }, { "epoch": 12.701639344262295, "grad_norm": 9.238068580627441, "learning_rate": 1.9510401078983747e-05, "loss": 1.7607, "step": 3874 }, { "epoch": 12.704918032786885, "grad_norm": 9.590826988220215, "learning_rate": 1.9510072830618887e-05, "loss": 1.6252, "step": 3875 }, { "epoch": 12.708196721311475, "grad_norm": 7.036394119262695, "learning_rate": 1.9509744475018067e-05, "loss": 1.8386, "step": 3876 }, { "epoch": 12.711475409836066, "grad_norm": 11.410401344299316, "learning_rate": 1.950941601218499e-05, "loss": 1.644, "step": 3877 }, { "epoch": 12.714754098360656, "grad_norm": 6.657715320587158, "learning_rate": 1.950908744212336e-05, "loss": 1.708, "step": 3878 }, { "epoch": 12.718032786885246, "grad_norm": 7.741693496704102, "learning_rate": 1.950875876483688e-05, "loss": 1.6653, "step": 3879 }, { "epoch": 12.721311475409836, "grad_norm": 7.162639617919922, "learning_rate": 1.9508429980329262e-05, "loss": 1.6462, "step": 3880 }, { "epoch": 12.724590163934426, "grad_norm": 8.070831298828125, "learning_rate": 1.9508101088604207e-05, "loss": 1.8872, "step": 3881 }, { "epoch": 12.727868852459016, "grad_norm": 7.35647439956665, "learning_rate": 1.950777208966543e-05, "loss": 1.6968, "step": 3882 }, { "epoch": 12.731147540983606, "grad_norm": 8.499799728393555, "learning_rate": 1.9507442983516634e-05, "loss": 1.5854, "step": 3883 }, { "epoch": 12.734426229508196, "grad_norm": 27.01288604736328, "learning_rate": 1.9507113770161533e-05, "loss": 1.6086, "step": 3884 }, { "epoch": 12.737704918032787, "grad_norm": 8.486231803894043, "learning_rate": 1.950678444960384e-05, "loss": 1.6299, "step": 3885 }, { "epoch": 12.740983606557377, "grad_norm": 6.280803680419922, "learning_rate": 1.9506455021847267e-05, "loss": 1.7793, "step": 3886 }, { "epoch": 12.744262295081967, "grad_norm": 8.803375244140625, "learning_rate": 1.9506125486895532e-05, "loss": 1.6785, "step": 3887 }, { "epoch": 12.747540983606557, "grad_norm": 9.573941230773926, "learning_rate": 1.9505795844752343e-05, "loss": 1.7197, "step": 3888 }, { "epoch": 12.750819672131147, "grad_norm": 6.5108842849731445, "learning_rate": 1.9505466095421428e-05, "loss": 1.6353, "step": 3889 }, { "epoch": 12.754098360655737, "grad_norm": 8.8269681930542, "learning_rate": 1.9505136238906496e-05, "loss": 1.7207, "step": 3890 }, { "epoch": 12.757377049180327, "grad_norm": 6.685344219207764, "learning_rate": 1.9504806275211274e-05, "loss": 1.7568, "step": 3891 }, { "epoch": 12.760655737704917, "grad_norm": 8.395445823669434, "learning_rate": 1.9504476204339475e-05, "loss": 1.7358, "step": 3892 }, { "epoch": 12.763934426229508, "grad_norm": 22.951969146728516, "learning_rate": 1.9504146026294824e-05, "loss": 1.7361, "step": 3893 }, { "epoch": 12.767213114754098, "grad_norm": 6.6346540451049805, "learning_rate": 1.9503815741081047e-05, "loss": 1.7451, "step": 3894 }, { "epoch": 12.770491803278688, "grad_norm": 10.084416389465332, "learning_rate": 1.9503485348701865e-05, "loss": 1.7051, "step": 3895 }, { "epoch": 12.773770491803278, "grad_norm": 7.781970500946045, "learning_rate": 1.9503154849161006e-05, "loss": 1.8291, "step": 3896 }, { "epoch": 12.777049180327868, "grad_norm": 9.921029090881348, "learning_rate": 1.9502824242462193e-05, "loss": 1.6885, "step": 3897 }, { "epoch": 12.780327868852458, "grad_norm": 7.326314449310303, "learning_rate": 1.9502493528609155e-05, "loss": 1.585, "step": 3898 }, { "epoch": 12.783606557377048, "grad_norm": 9.252840995788574, "learning_rate": 1.950216270760563e-05, "loss": 1.7029, "step": 3899 }, { "epoch": 12.78688524590164, "grad_norm": 7.285743713378906, "learning_rate": 1.9501831779455332e-05, "loss": 1.5439, "step": 3900 }, { "epoch": 12.790163934426229, "grad_norm": 7.189335346221924, "learning_rate": 1.9501500744162005e-05, "loss": 1.6748, "step": 3901 }, { "epoch": 12.79344262295082, "grad_norm": 6.552606105804443, "learning_rate": 1.9501169601729377e-05, "loss": 1.9343, "step": 3902 }, { "epoch": 12.79672131147541, "grad_norm": 9.628034591674805, "learning_rate": 1.9500838352161184e-05, "loss": 1.6516, "step": 3903 }, { "epoch": 12.8, "grad_norm": 6.563745498657227, "learning_rate": 1.950050699546116e-05, "loss": 1.7871, "step": 3904 }, { "epoch": 12.80327868852459, "grad_norm": 8.757065773010254, "learning_rate": 1.950017553163304e-05, "loss": 1.3669, "step": 3905 }, { "epoch": 12.806557377049181, "grad_norm": 7.923970699310303, "learning_rate": 1.9499843960680568e-05, "loss": 1.7793, "step": 3906 }, { "epoch": 12.809836065573771, "grad_norm": 6.542008876800537, "learning_rate": 1.9499512282607472e-05, "loss": 1.8218, "step": 3907 }, { "epoch": 12.813114754098361, "grad_norm": 11.706644058227539, "learning_rate": 1.9499180497417503e-05, "loss": 1.6416, "step": 3908 }, { "epoch": 12.816393442622951, "grad_norm": 8.678603172302246, "learning_rate": 1.9498848605114393e-05, "loss": 1.6162, "step": 3909 }, { "epoch": 12.819672131147541, "grad_norm": 9.569181442260742, "learning_rate": 1.949851660570189e-05, "loss": 1.5972, "step": 3910 }, { "epoch": 12.822950819672132, "grad_norm": 11.106756210327148, "learning_rate": 1.9498184499183738e-05, "loss": 1.6907, "step": 3911 }, { "epoch": 12.826229508196722, "grad_norm": 8.607733726501465, "learning_rate": 1.9497852285563677e-05, "loss": 1.5974, "step": 3912 }, { "epoch": 12.829508196721312, "grad_norm": 9.461520195007324, "learning_rate": 1.949751996484546e-05, "loss": 1.6282, "step": 3913 }, { "epoch": 12.832786885245902, "grad_norm": 7.467751979827881, "learning_rate": 1.949718753703283e-05, "loss": 1.7839, "step": 3914 }, { "epoch": 12.836065573770492, "grad_norm": 7.664709091186523, "learning_rate": 1.9496855002129532e-05, "loss": 1.6299, "step": 3915 }, { "epoch": 12.839344262295082, "grad_norm": 6.745265007019043, "learning_rate": 1.9496522360139323e-05, "loss": 1.8745, "step": 3916 }, { "epoch": 12.842622950819672, "grad_norm": 9.524698257446289, "learning_rate": 1.949618961106595e-05, "loss": 1.7441, "step": 3917 }, { "epoch": 12.845901639344262, "grad_norm": 9.110187530517578, "learning_rate": 1.949585675491316e-05, "loss": 1.625, "step": 3918 }, { "epoch": 12.849180327868853, "grad_norm": 8.537224769592285, "learning_rate": 1.949552379168472e-05, "loss": 1.5603, "step": 3919 }, { "epoch": 12.852459016393443, "grad_norm": 8.255026817321777, "learning_rate": 1.9495190721384377e-05, "loss": 1.8945, "step": 3920 }, { "epoch": 12.855737704918033, "grad_norm": 7.918971061706543, "learning_rate": 1.9494857544015883e-05, "loss": 1.7876, "step": 3921 }, { "epoch": 12.859016393442623, "grad_norm": 10.145623207092285, "learning_rate": 1.9494524259582994e-05, "loss": 1.6763, "step": 3922 }, { "epoch": 12.862295081967213, "grad_norm": 8.68545150756836, "learning_rate": 1.9494190868089475e-05, "loss": 1.7241, "step": 3923 }, { "epoch": 12.865573770491803, "grad_norm": 5.409553050994873, "learning_rate": 1.9493857369539084e-05, "loss": 1.751, "step": 3924 }, { "epoch": 12.868852459016393, "grad_norm": 8.40825366973877, "learning_rate": 1.949352376393558e-05, "loss": 1.8169, "step": 3925 }, { "epoch": 12.872131147540983, "grad_norm": 9.086198806762695, "learning_rate": 1.9493190051282723e-05, "loss": 1.7476, "step": 3926 }, { "epoch": 12.875409836065574, "grad_norm": 7.928402423858643, "learning_rate": 1.9492856231584277e-05, "loss": 1.7412, "step": 3927 }, { "epoch": 12.878688524590164, "grad_norm": 7.257669925689697, "learning_rate": 1.949252230484401e-05, "loss": 1.644, "step": 3928 }, { "epoch": 12.881967213114754, "grad_norm": 7.802363395690918, "learning_rate": 1.9492188271065685e-05, "loss": 1.7373, "step": 3929 }, { "epoch": 12.885245901639344, "grad_norm": 7.8457441329956055, "learning_rate": 1.9491854130253064e-05, "loss": 1.9019, "step": 3930 }, { "epoch": 12.888524590163934, "grad_norm": 11.413705825805664, "learning_rate": 1.949151988240992e-05, "loss": 1.9282, "step": 3931 }, { "epoch": 12.891803278688524, "grad_norm": 7.309439659118652, "learning_rate": 1.949118552754002e-05, "loss": 1.7197, "step": 3932 }, { "epoch": 12.895081967213114, "grad_norm": 8.28775691986084, "learning_rate": 1.949085106564714e-05, "loss": 1.7349, "step": 3933 }, { "epoch": 12.898360655737704, "grad_norm": 8.833959579467773, "learning_rate": 1.9490516496735038e-05, "loss": 1.8013, "step": 3934 }, { "epoch": 12.901639344262295, "grad_norm": 8.792633056640625, "learning_rate": 1.94901818208075e-05, "loss": 1.8911, "step": 3935 }, { "epoch": 12.904918032786885, "grad_norm": 7.159260272979736, "learning_rate": 1.948984703786829e-05, "loss": 1.8662, "step": 3936 }, { "epoch": 12.908196721311475, "grad_norm": 8.997212409973145, "learning_rate": 1.9489512147921195e-05, "loss": 1.7317, "step": 3937 }, { "epoch": 12.911475409836065, "grad_norm": 7.034785270690918, "learning_rate": 1.948917715096998e-05, "loss": 1.8984, "step": 3938 }, { "epoch": 12.914754098360655, "grad_norm": 7.199057579040527, "learning_rate": 1.9488842047018423e-05, "loss": 1.6121, "step": 3939 }, { "epoch": 12.918032786885245, "grad_norm": 80.73486328125, "learning_rate": 1.9488506836070313e-05, "loss": 1.6677, "step": 3940 }, { "epoch": 12.921311475409835, "grad_norm": 7.195659637451172, "learning_rate": 1.948817151812942e-05, "loss": 1.5701, "step": 3941 }, { "epoch": 12.924590163934425, "grad_norm": 11.181449890136719, "learning_rate": 1.9487836093199525e-05, "loss": 1.7451, "step": 3942 }, { "epoch": 12.927868852459017, "grad_norm": 7.8685173988342285, "learning_rate": 1.9487500561284417e-05, "loss": 1.8276, "step": 3943 }, { "epoch": 12.931147540983606, "grad_norm": 24.589941024780273, "learning_rate": 1.9487164922387875e-05, "loss": 1.8403, "step": 3944 }, { "epoch": 12.934426229508198, "grad_norm": 9.141525268554688, "learning_rate": 1.948682917651368e-05, "loss": 1.6909, "step": 3945 }, { "epoch": 12.937704918032788, "grad_norm": 29.617420196533203, "learning_rate": 1.9486493323665626e-05, "loss": 1.7344, "step": 3946 }, { "epoch": 12.940983606557378, "grad_norm": 9.826517105102539, "learning_rate": 1.9486157363847494e-05, "loss": 1.8091, "step": 3947 }, { "epoch": 12.944262295081968, "grad_norm": 10.539958953857422, "learning_rate": 1.9485821297063077e-05, "loss": 1.7085, "step": 3948 }, { "epoch": 12.947540983606558, "grad_norm": 7.84461784362793, "learning_rate": 1.948548512331616e-05, "loss": 1.7207, "step": 3949 }, { "epoch": 12.950819672131148, "grad_norm": 7.280426502227783, "learning_rate": 1.948514884261054e-05, "loss": 1.8455, "step": 3950 }, { "epoch": 12.954098360655738, "grad_norm": 7.275326251983643, "learning_rate": 1.948481245495e-05, "loss": 1.7173, "step": 3951 }, { "epoch": 12.957377049180328, "grad_norm": 7.248188018798828, "learning_rate": 1.9484475960338344e-05, "loss": 1.7241, "step": 3952 }, { "epoch": 12.960655737704919, "grad_norm": 19.262163162231445, "learning_rate": 1.9484139358779354e-05, "loss": 1.6165, "step": 3953 }, { "epoch": 12.963934426229509, "grad_norm": 8.239972114562988, "learning_rate": 1.9483802650276833e-05, "loss": 1.6406, "step": 3954 }, { "epoch": 12.967213114754099, "grad_norm": 9.645125389099121, "learning_rate": 1.9483465834834577e-05, "loss": 1.6665, "step": 3955 }, { "epoch": 12.970491803278689, "grad_norm": 8.55092716217041, "learning_rate": 1.9483128912456383e-05, "loss": 1.8333, "step": 3956 }, { "epoch": 12.973770491803279, "grad_norm": 8.103182792663574, "learning_rate": 1.9482791883146053e-05, "loss": 1.6519, "step": 3957 }, { "epoch": 12.97704918032787, "grad_norm": 7.98376989364624, "learning_rate": 1.948245474690738e-05, "loss": 1.707, "step": 3958 }, { "epoch": 12.98032786885246, "grad_norm": 19.878236770629883, "learning_rate": 1.9482117503744176e-05, "loss": 1.7261, "step": 3959 }, { "epoch": 12.98360655737705, "grad_norm": 10.094123840332031, "learning_rate": 1.9481780153660236e-05, "loss": 1.7104, "step": 3960 }, { "epoch": 12.98688524590164, "grad_norm": 10.656906127929688, "learning_rate": 1.9481442696659365e-05, "loss": 1.6724, "step": 3961 }, { "epoch": 12.99016393442623, "grad_norm": 11.122467994689941, "learning_rate": 1.948110513274537e-05, "loss": 1.5964, "step": 3962 }, { "epoch": 12.99344262295082, "grad_norm": 8.11228084564209, "learning_rate": 1.9480767461922053e-05, "loss": 1.8711, "step": 3963 }, { "epoch": 12.99672131147541, "grad_norm": 8.626066207885742, "learning_rate": 1.948042968419323e-05, "loss": 1.8584, "step": 3964 }, { "epoch": 13.0, "grad_norm": 9.312124252319336, "learning_rate": 1.9480091799562706e-05, "loss": 1.5664, "step": 3965 }, { "epoch": 13.00327868852459, "grad_norm": 12.448803901672363, "learning_rate": 1.947975380803429e-05, "loss": 1.6396, "step": 3966 }, { "epoch": 13.00655737704918, "grad_norm": 11.831323623657227, "learning_rate": 1.9479415709611792e-05, "loss": 1.511, "step": 3967 }, { "epoch": 13.00983606557377, "grad_norm": 6.855266094207764, "learning_rate": 1.9479077504299024e-05, "loss": 1.769, "step": 3968 }, { "epoch": 13.01311475409836, "grad_norm": 9.42711353302002, "learning_rate": 1.9478739192099802e-05, "loss": 1.7173, "step": 3969 }, { "epoch": 13.01639344262295, "grad_norm": 7.811249732971191, "learning_rate": 1.947840077301794e-05, "loss": 1.6267, "step": 3970 }, { "epoch": 13.01967213114754, "grad_norm": 8.105851173400879, "learning_rate": 1.9478062247057256e-05, "loss": 1.7656, "step": 3971 }, { "epoch": 13.02295081967213, "grad_norm": 8.16749095916748, "learning_rate": 1.9477723614221565e-05, "loss": 1.6599, "step": 3972 }, { "epoch": 13.026229508196721, "grad_norm": 11.562163352966309, "learning_rate": 1.9477384874514688e-05, "loss": 1.5171, "step": 3973 }, { "epoch": 13.029508196721311, "grad_norm": 7.363995552062988, "learning_rate": 1.947704602794044e-05, "loss": 1.6929, "step": 3974 }, { "epoch": 13.032786885245901, "grad_norm": 7.008986473083496, "learning_rate": 1.9476707074502645e-05, "loss": 1.8066, "step": 3975 }, { "epoch": 13.036065573770491, "grad_norm": 12.043866157531738, "learning_rate": 1.9476368014205123e-05, "loss": 1.6284, "step": 3976 }, { "epoch": 13.039344262295081, "grad_norm": 8.823991775512695, "learning_rate": 1.9476028847051702e-05, "loss": 1.6177, "step": 3977 }, { "epoch": 13.042622950819672, "grad_norm": 7.554912090301514, "learning_rate": 1.9475689573046195e-05, "loss": 1.7542, "step": 3978 }, { "epoch": 13.045901639344262, "grad_norm": 6.268368244171143, "learning_rate": 1.9475350192192443e-05, "loss": 1.77, "step": 3979 }, { "epoch": 13.049180327868852, "grad_norm": 212.0306396484375, "learning_rate": 1.9475010704494266e-05, "loss": 1.5881, "step": 3980 }, { "epoch": 13.052459016393442, "grad_norm": 10.441014289855957, "learning_rate": 1.947467110995549e-05, "loss": 1.6956, "step": 3981 }, { "epoch": 13.055737704918032, "grad_norm": 14.07640266418457, "learning_rate": 1.9474331408579944e-05, "loss": 1.7461, "step": 3982 }, { "epoch": 13.059016393442622, "grad_norm": 7.923159599304199, "learning_rate": 1.9473991600371463e-05, "loss": 1.7607, "step": 3983 }, { "epoch": 13.062295081967212, "grad_norm": 9.292080879211426, "learning_rate": 1.947365168533388e-05, "loss": 1.7068, "step": 3984 }, { "epoch": 13.065573770491802, "grad_norm": 8.289281845092773, "learning_rate": 1.9473311663471016e-05, "loss": 1.8367, "step": 3985 }, { "epoch": 13.068852459016393, "grad_norm": 10.492749214172363, "learning_rate": 1.9472971534786718e-05, "loss": 1.7217, "step": 3986 }, { "epoch": 13.072131147540984, "grad_norm": 16.236888885498047, "learning_rate": 1.947263129928482e-05, "loss": 1.7935, "step": 3987 }, { "epoch": 13.075409836065575, "grad_norm": 8.804736137390137, "learning_rate": 1.9472290956969146e-05, "loss": 1.688, "step": 3988 }, { "epoch": 13.078688524590165, "grad_norm": 14.174954414367676, "learning_rate": 1.947195050784355e-05, "loss": 1.8408, "step": 3989 }, { "epoch": 13.081967213114755, "grad_norm": 23.45444107055664, "learning_rate": 1.9471609951911862e-05, "loss": 1.7607, "step": 3990 }, { "epoch": 13.085245901639345, "grad_norm": 9.040553092956543, "learning_rate": 1.9471269289177922e-05, "loss": 1.6509, "step": 3991 }, { "epoch": 13.088524590163935, "grad_norm": 14.224903106689453, "learning_rate": 1.9470928519645572e-05, "loss": 1.8945, "step": 3992 }, { "epoch": 13.091803278688525, "grad_norm": 9.434395790100098, "learning_rate": 1.947058764331866e-05, "loss": 1.8164, "step": 3993 }, { "epoch": 13.095081967213115, "grad_norm": 8.915661811828613, "learning_rate": 1.947024666020102e-05, "loss": 1.783, "step": 3994 }, { "epoch": 13.098360655737705, "grad_norm": 11.711149215698242, "learning_rate": 1.946990557029651e-05, "loss": 1.8159, "step": 3995 }, { "epoch": 13.101639344262296, "grad_norm": 10.675622940063477, "learning_rate": 1.9469564373608963e-05, "loss": 1.7378, "step": 3996 }, { "epoch": 13.104918032786886, "grad_norm": 10.232847213745117, "learning_rate": 1.9469223070142232e-05, "loss": 1.7061, "step": 3997 }, { "epoch": 13.108196721311476, "grad_norm": 10.446562767028809, "learning_rate": 1.9468881659900164e-05, "loss": 1.7861, "step": 3998 }, { "epoch": 13.111475409836066, "grad_norm": 9.07907485961914, "learning_rate": 1.9468540142886616e-05, "loss": 1.5269, "step": 3999 }, { "epoch": 13.114754098360656, "grad_norm": 8.504253387451172, "learning_rate": 1.9468198519105428e-05, "loss": 1.6853, "step": 4000 }, { "epoch": 13.118032786885246, "grad_norm": 7.764340400695801, "learning_rate": 1.946785678856046e-05, "loss": 1.6948, "step": 4001 }, { "epoch": 13.121311475409836, "grad_norm": 7.777823448181152, "learning_rate": 1.946751495125556e-05, "loss": 1.6777, "step": 4002 }, { "epoch": 13.124590163934426, "grad_norm": 25.03780174255371, "learning_rate": 1.9467173007194588e-05, "loss": 1.7773, "step": 4003 }, { "epoch": 13.127868852459017, "grad_norm": 10.022920608520508, "learning_rate": 1.9466830956381394e-05, "loss": 1.5161, "step": 4004 }, { "epoch": 13.131147540983607, "grad_norm": 10.390702247619629, "learning_rate": 1.9466488798819837e-05, "loss": 1.7617, "step": 4005 }, { "epoch": 13.134426229508197, "grad_norm": 7.608402729034424, "learning_rate": 1.9466146534513777e-05, "loss": 1.8569, "step": 4006 }, { "epoch": 13.137704918032787, "grad_norm": 10.03055191040039, "learning_rate": 1.9465804163467076e-05, "loss": 1.6538, "step": 4007 }, { "epoch": 13.140983606557377, "grad_norm": 11.682971954345703, "learning_rate": 1.946546168568359e-05, "loss": 1.585, "step": 4008 }, { "epoch": 13.144262295081967, "grad_norm": 9.836063385009766, "learning_rate": 1.946511910116718e-05, "loss": 1.6902, "step": 4009 }, { "epoch": 13.147540983606557, "grad_norm": 10.620916366577148, "learning_rate": 1.946477640992171e-05, "loss": 1.7029, "step": 4010 }, { "epoch": 13.150819672131147, "grad_norm": 8.351707458496094, "learning_rate": 1.946443361195105e-05, "loss": 1.8562, "step": 4011 }, { "epoch": 13.154098360655738, "grad_norm": 9.757826805114746, "learning_rate": 1.9464090707259055e-05, "loss": 1.5869, "step": 4012 }, { "epoch": 13.157377049180328, "grad_norm": 10.365967750549316, "learning_rate": 1.94637476958496e-05, "loss": 1.708, "step": 4013 }, { "epoch": 13.160655737704918, "grad_norm": 10.86540699005127, "learning_rate": 1.9463404577726548e-05, "loss": 1.7949, "step": 4014 }, { "epoch": 13.163934426229508, "grad_norm": 8.792683601379395, "learning_rate": 1.9463061352893774e-05, "loss": 1.6963, "step": 4015 }, { "epoch": 13.167213114754098, "grad_norm": 7.8173747062683105, "learning_rate": 1.946271802135514e-05, "loss": 1.7588, "step": 4016 }, { "epoch": 13.170491803278688, "grad_norm": 7.198257923126221, "learning_rate": 1.9462374583114524e-05, "loss": 1.7437, "step": 4017 }, { "epoch": 13.173770491803278, "grad_norm": 8.718932151794434, "learning_rate": 1.946203103817579e-05, "loss": 1.6729, "step": 4018 }, { "epoch": 13.177049180327868, "grad_norm": 7.596401691436768, "learning_rate": 1.9461687386542826e-05, "loss": 1.5425, "step": 4019 }, { "epoch": 13.180327868852459, "grad_norm": 10.153214454650879, "learning_rate": 1.94613436282195e-05, "loss": 1.5403, "step": 4020 }, { "epoch": 13.183606557377049, "grad_norm": 11.312973976135254, "learning_rate": 1.9460999763209682e-05, "loss": 1.5811, "step": 4021 }, { "epoch": 13.186885245901639, "grad_norm": 7.251645088195801, "learning_rate": 1.9460655791517254e-05, "loss": 1.7314, "step": 4022 }, { "epoch": 13.190163934426229, "grad_norm": 8.092597961425781, "learning_rate": 1.94603117131461e-05, "loss": 1.6426, "step": 4023 }, { "epoch": 13.193442622950819, "grad_norm": 8.294105529785156, "learning_rate": 1.945996752810009e-05, "loss": 1.7407, "step": 4024 }, { "epoch": 13.19672131147541, "grad_norm": 8.140750885009766, "learning_rate": 1.9459623236383113e-05, "loss": 1.4358, "step": 4025 }, { "epoch": 13.2, "grad_norm": 9.134434700012207, "learning_rate": 1.9459278837999048e-05, "loss": 1.8032, "step": 4026 }, { "epoch": 13.20327868852459, "grad_norm": 7.190487384796143, "learning_rate": 1.945893433295178e-05, "loss": 1.5879, "step": 4027 }, { "epoch": 13.20655737704918, "grad_norm": 7.509505271911621, "learning_rate": 1.945858972124519e-05, "loss": 1.6572, "step": 4028 }, { "epoch": 13.20983606557377, "grad_norm": 8.878710746765137, "learning_rate": 1.9458245002883167e-05, "loss": 1.7537, "step": 4029 }, { "epoch": 13.21311475409836, "grad_norm": 8.31187629699707, "learning_rate": 1.94579001778696e-05, "loss": 1.5361, "step": 4030 }, { "epoch": 13.216393442622952, "grad_norm": 8.251019477844238, "learning_rate": 1.9457555246208373e-05, "loss": 1.6731, "step": 4031 }, { "epoch": 13.219672131147542, "grad_norm": 9.320127487182617, "learning_rate": 1.9457210207903378e-05, "loss": 1.6553, "step": 4032 }, { "epoch": 13.222950819672132, "grad_norm": 8.726923942565918, "learning_rate": 1.9456865062958503e-05, "loss": 1.563, "step": 4033 }, { "epoch": 13.226229508196722, "grad_norm": 9.16215991973877, "learning_rate": 1.9456519811377642e-05, "loss": 1.7205, "step": 4034 }, { "epoch": 13.229508196721312, "grad_norm": 8.42280387878418, "learning_rate": 1.945617445316469e-05, "loss": 1.6455, "step": 4035 }, { "epoch": 13.232786885245902, "grad_norm": 7.696001052856445, "learning_rate": 1.945582898832354e-05, "loss": 1.7437, "step": 4036 }, { "epoch": 13.236065573770492, "grad_norm": 7.306945323944092, "learning_rate": 1.9455483416858084e-05, "loss": 1.6875, "step": 4037 }, { "epoch": 13.239344262295083, "grad_norm": 6.30658483505249, "learning_rate": 1.945513773877222e-05, "loss": 1.6531, "step": 4038 }, { "epoch": 13.242622950819673, "grad_norm": 7.985534191131592, "learning_rate": 1.9454791954069852e-05, "loss": 1.5686, "step": 4039 }, { "epoch": 13.245901639344263, "grad_norm": 7.940131187438965, "learning_rate": 1.945444606275487e-05, "loss": 1.585, "step": 4040 }, { "epoch": 13.249180327868853, "grad_norm": 7.397742748260498, "learning_rate": 1.9454100064831177e-05, "loss": 1.6206, "step": 4041 }, { "epoch": 13.252459016393443, "grad_norm": 9.066386222839355, "learning_rate": 1.945375396030268e-05, "loss": 1.8259, "step": 4042 }, { "epoch": 13.255737704918033, "grad_norm": 7.39181661605835, "learning_rate": 1.9453407749173278e-05, "loss": 1.4897, "step": 4043 }, { "epoch": 13.259016393442623, "grad_norm": 6.96976375579834, "learning_rate": 1.9453061431446872e-05, "loss": 1.6677, "step": 4044 }, { "epoch": 13.262295081967213, "grad_norm": 12.935885429382324, "learning_rate": 1.945271500712737e-05, "loss": 1.5923, "step": 4045 }, { "epoch": 13.265573770491804, "grad_norm": 13.668791770935059, "learning_rate": 1.945236847621868e-05, "loss": 1.575, "step": 4046 }, { "epoch": 13.268852459016394, "grad_norm": 7.561299800872803, "learning_rate": 1.9452021838724703e-05, "loss": 1.5828, "step": 4047 }, { "epoch": 13.272131147540984, "grad_norm": 8.143725395202637, "learning_rate": 1.9451675094649356e-05, "loss": 1.5291, "step": 4048 }, { "epoch": 13.275409836065574, "grad_norm": 10.466972351074219, "learning_rate": 1.9451328243996546e-05, "loss": 1.4741, "step": 4049 }, { "epoch": 13.278688524590164, "grad_norm": 8.360208511352539, "learning_rate": 1.945098128677018e-05, "loss": 1.5181, "step": 4050 }, { "epoch": 13.281967213114754, "grad_norm": 8.326815605163574, "learning_rate": 1.9450634222974176e-05, "loss": 1.5222, "step": 4051 }, { "epoch": 13.285245901639344, "grad_norm": 8.189471244812012, "learning_rate": 1.945028705261244e-05, "loss": 1.8452, "step": 4052 }, { "epoch": 13.288524590163934, "grad_norm": 8.629169464111328, "learning_rate": 1.94499397756889e-05, "loss": 1.5957, "step": 4053 }, { "epoch": 13.291803278688525, "grad_norm": 16.37921142578125, "learning_rate": 1.9449592392207455e-05, "loss": 1.8098, "step": 4054 }, { "epoch": 13.295081967213115, "grad_norm": 8.250439643859863, "learning_rate": 1.9449244902172035e-05, "loss": 1.574, "step": 4055 }, { "epoch": 13.298360655737705, "grad_norm": 10.98304271697998, "learning_rate": 1.9448897305586554e-05, "loss": 1.5239, "step": 4056 }, { "epoch": 13.301639344262295, "grad_norm": 8.93533706665039, "learning_rate": 1.944854960245493e-05, "loss": 1.9014, "step": 4057 }, { "epoch": 13.304918032786885, "grad_norm": 7.071650981903076, "learning_rate": 1.9448201792781084e-05, "loss": 1.6528, "step": 4058 }, { "epoch": 13.308196721311475, "grad_norm": 8.014886856079102, "learning_rate": 1.9447853876568944e-05, "loss": 1.6328, "step": 4059 }, { "epoch": 13.311475409836065, "grad_norm": 9.838071823120117, "learning_rate": 1.9447505853822425e-05, "loss": 1.5449, "step": 4060 }, { "epoch": 13.314754098360655, "grad_norm": 7.512777805328369, "learning_rate": 1.9447157724545452e-05, "loss": 1.7402, "step": 4061 }, { "epoch": 13.318032786885245, "grad_norm": 6.561666965484619, "learning_rate": 1.9446809488741957e-05, "loss": 1.7456, "step": 4062 }, { "epoch": 13.321311475409836, "grad_norm": 9.675694465637207, "learning_rate": 1.9446461146415863e-05, "loss": 1.5935, "step": 4063 }, { "epoch": 13.324590163934426, "grad_norm": 8.16368579864502, "learning_rate": 1.9446112697571097e-05, "loss": 1.7437, "step": 4064 }, { "epoch": 13.327868852459016, "grad_norm": 7.386894702911377, "learning_rate": 1.9445764142211588e-05, "loss": 1.5308, "step": 4065 }, { "epoch": 13.331147540983606, "grad_norm": 8.738834381103516, "learning_rate": 1.9445415480341267e-05, "loss": 1.6418, "step": 4066 }, { "epoch": 13.334426229508196, "grad_norm": 8.362041473388672, "learning_rate": 1.9445066711964065e-05, "loss": 1.8076, "step": 4067 }, { "epoch": 13.337704918032786, "grad_norm": 7.568975448608398, "learning_rate": 1.944471783708392e-05, "loss": 1.7051, "step": 4068 }, { "epoch": 13.340983606557376, "grad_norm": 15.164999961853027, "learning_rate": 1.9444368855704754e-05, "loss": 1.5225, "step": 4069 }, { "epoch": 13.344262295081966, "grad_norm": 6.455002784729004, "learning_rate": 1.9444019767830516e-05, "loss": 1.6594, "step": 4070 }, { "epoch": 13.347540983606557, "grad_norm": 7.632217884063721, "learning_rate": 1.9443670573465133e-05, "loss": 1.7292, "step": 4071 }, { "epoch": 13.350819672131147, "grad_norm": 8.859711647033691, "learning_rate": 1.9443321272612544e-05, "loss": 1.595, "step": 4072 }, { "epoch": 13.354098360655737, "grad_norm": 6.650392055511475, "learning_rate": 1.9442971865276693e-05, "loss": 1.7085, "step": 4073 }, { "epoch": 13.357377049180329, "grad_norm": 6.330399513244629, "learning_rate": 1.9442622351461515e-05, "loss": 1.8267, "step": 4074 }, { "epoch": 13.360655737704919, "grad_norm": 8.453141212463379, "learning_rate": 1.9442272731170948e-05, "loss": 1.5168, "step": 4075 }, { "epoch": 13.363934426229509, "grad_norm": 9.506659507751465, "learning_rate": 1.944192300440894e-05, "loss": 1.6719, "step": 4076 }, { "epoch": 13.3672131147541, "grad_norm": 7.692640781402588, "learning_rate": 1.9441573171179433e-05, "loss": 1.6821, "step": 4077 }, { "epoch": 13.37049180327869, "grad_norm": 7.620084285736084, "learning_rate": 1.9441223231486375e-05, "loss": 1.769, "step": 4078 }, { "epoch": 13.37377049180328, "grad_norm": 7.5394463539123535, "learning_rate": 1.9440873185333707e-05, "loss": 1.4382, "step": 4079 }, { "epoch": 13.37704918032787, "grad_norm": 8.568036079406738, "learning_rate": 1.9440523032725376e-05, "loss": 1.5977, "step": 4080 }, { "epoch": 13.38032786885246, "grad_norm": 6.1578755378723145, "learning_rate": 1.9440172773665336e-05, "loss": 1.5269, "step": 4081 }, { "epoch": 13.38360655737705, "grad_norm": 7.145308017730713, "learning_rate": 1.943982240815753e-05, "loss": 1.5432, "step": 4082 }, { "epoch": 13.38688524590164, "grad_norm": 9.235299110412598, "learning_rate": 1.943947193620591e-05, "loss": 1.4104, "step": 4083 }, { "epoch": 13.39016393442623, "grad_norm": 8.268637657165527, "learning_rate": 1.943912135781443e-05, "loss": 1.6809, "step": 4084 }, { "epoch": 13.39344262295082, "grad_norm": 10.051078796386719, "learning_rate": 1.9438770672987045e-05, "loss": 1.5178, "step": 4085 }, { "epoch": 13.39672131147541, "grad_norm": 6.734185218811035, "learning_rate": 1.9438419881727703e-05, "loss": 1.752, "step": 4086 }, { "epoch": 13.4, "grad_norm": 11.991896629333496, "learning_rate": 1.9438068984040366e-05, "loss": 1.6045, "step": 4087 }, { "epoch": 13.40327868852459, "grad_norm": 7.183279037475586, "learning_rate": 1.9437717979928986e-05, "loss": 1.5518, "step": 4088 }, { "epoch": 13.40655737704918, "grad_norm": 9.412825584411621, "learning_rate": 1.9437366869397526e-05, "loss": 1.6753, "step": 4089 }, { "epoch": 13.40983606557377, "grad_norm": 9.282710075378418, "learning_rate": 1.943701565244994e-05, "loss": 1.7354, "step": 4090 }, { "epoch": 13.41311475409836, "grad_norm": 6.961466312408447, "learning_rate": 1.943666432909019e-05, "loss": 1.7524, "step": 4091 }, { "epoch": 13.416393442622951, "grad_norm": 9.173263549804688, "learning_rate": 1.943631289932224e-05, "loss": 1.8086, "step": 4092 }, { "epoch": 13.419672131147541, "grad_norm": 10.410613059997559, "learning_rate": 1.943596136315005e-05, "loss": 1.6147, "step": 4093 }, { "epoch": 13.422950819672131, "grad_norm": 7.851088523864746, "learning_rate": 1.9435609720577585e-05, "loss": 1.7153, "step": 4094 }, { "epoch": 13.426229508196721, "grad_norm": 7.088029861450195, "learning_rate": 1.943525797160881e-05, "loss": 1.875, "step": 4095 }, { "epoch": 13.429508196721311, "grad_norm": 8.048229217529297, "learning_rate": 1.943490611624769e-05, "loss": 1.5925, "step": 4096 }, { "epoch": 13.432786885245902, "grad_norm": 6.923166275024414, "learning_rate": 1.9434554154498196e-05, "loss": 1.5286, "step": 4097 }, { "epoch": 13.436065573770492, "grad_norm": 9.35735034942627, "learning_rate": 1.9434202086364292e-05, "loss": 1.6738, "step": 4098 }, { "epoch": 13.439344262295082, "grad_norm": 6.722282409667969, "learning_rate": 1.9433849911849952e-05, "loss": 1.7073, "step": 4099 }, { "epoch": 13.442622950819672, "grad_norm": 6.814615249633789, "learning_rate": 1.9433497630959143e-05, "loss": 1.5259, "step": 4100 }, { "epoch": 13.445901639344262, "grad_norm": 6.3102922439575195, "learning_rate": 1.9433145243695843e-05, "loss": 1.812, "step": 4101 }, { "epoch": 13.449180327868852, "grad_norm": 7.059499263763428, "learning_rate": 1.943279275006402e-05, "loss": 1.5688, "step": 4102 }, { "epoch": 13.452459016393442, "grad_norm": 8.39233112335205, "learning_rate": 1.943244015006765e-05, "loss": 1.6326, "step": 4103 }, { "epoch": 13.455737704918032, "grad_norm": 7.81504487991333, "learning_rate": 1.9432087443710716e-05, "loss": 1.6248, "step": 4104 }, { "epoch": 13.459016393442623, "grad_norm": 8.130852699279785, "learning_rate": 1.9431734630997183e-05, "loss": 1.6445, "step": 4105 }, { "epoch": 13.462295081967213, "grad_norm": 9.337672233581543, "learning_rate": 1.9431381711931036e-05, "loss": 1.7129, "step": 4106 }, { "epoch": 13.465573770491803, "grad_norm": 7.412915229797363, "learning_rate": 1.9431028686516254e-05, "loss": 1.6023, "step": 4107 }, { "epoch": 13.468852459016393, "grad_norm": 8.523846626281738, "learning_rate": 1.943067555475682e-05, "loss": 1.5503, "step": 4108 }, { "epoch": 13.472131147540983, "grad_norm": 8.257597923278809, "learning_rate": 1.9430322316656714e-05, "loss": 1.45, "step": 4109 }, { "epoch": 13.475409836065573, "grad_norm": 8.699477195739746, "learning_rate": 1.942996897221992e-05, "loss": 1.6309, "step": 4110 }, { "epoch": 13.478688524590163, "grad_norm": 11.129571914672852, "learning_rate": 1.942961552145042e-05, "loss": 1.6196, "step": 4111 }, { "epoch": 13.481967213114753, "grad_norm": 6.6089043617248535, "learning_rate": 1.9429261964352198e-05, "loss": 1.78, "step": 4112 }, { "epoch": 13.485245901639344, "grad_norm": 8.213396072387695, "learning_rate": 1.9428908300929246e-05, "loss": 1.5298, "step": 4113 }, { "epoch": 13.488524590163934, "grad_norm": 8.134876251220703, "learning_rate": 1.9428554531185545e-05, "loss": 1.7754, "step": 4114 }, { "epoch": 13.491803278688524, "grad_norm": 8.838604927062988, "learning_rate": 1.9428200655125095e-05, "loss": 1.7783, "step": 4115 }, { "epoch": 13.495081967213114, "grad_norm": 7.508490562438965, "learning_rate": 1.9427846672751873e-05, "loss": 1.6182, "step": 4116 }, { "epoch": 13.498360655737706, "grad_norm": 7.782855987548828, "learning_rate": 1.9427492584069883e-05, "loss": 1.7349, "step": 4117 }, { "epoch": 13.501639344262294, "grad_norm": 8.851091384887695, "learning_rate": 1.942713838908311e-05, "loss": 1.6155, "step": 4118 }, { "epoch": 13.504918032786886, "grad_norm": 6.976253509521484, "learning_rate": 1.9426784087795546e-05, "loss": 1.5195, "step": 4119 }, { "epoch": 13.508196721311476, "grad_norm": 8.947127342224121, "learning_rate": 1.9426429680211198e-05, "loss": 1.5649, "step": 4120 }, { "epoch": 13.511475409836066, "grad_norm": 7.321377277374268, "learning_rate": 1.9426075166334047e-05, "loss": 1.6831, "step": 4121 }, { "epoch": 13.514754098360656, "grad_norm": 7.6973748207092285, "learning_rate": 1.9425720546168102e-05, "loss": 1.7925, "step": 4122 }, { "epoch": 13.518032786885247, "grad_norm": 7.35550594329834, "learning_rate": 1.9425365819717354e-05, "loss": 1.7585, "step": 4123 }, { "epoch": 13.521311475409837, "grad_norm": 6.8556108474731445, "learning_rate": 1.942501098698581e-05, "loss": 1.6685, "step": 4124 }, { "epoch": 13.524590163934427, "grad_norm": 13.660849571228027, "learning_rate": 1.9424656047977468e-05, "loss": 1.4099, "step": 4125 }, { "epoch": 13.527868852459017, "grad_norm": 8.378252029418945, "learning_rate": 1.9424301002696325e-05, "loss": 1.6084, "step": 4126 }, { "epoch": 13.531147540983607, "grad_norm": 13.995795249938965, "learning_rate": 1.9423945851146395e-05, "loss": 1.5808, "step": 4127 }, { "epoch": 13.534426229508197, "grad_norm": 43.63557815551758, "learning_rate": 1.9423590593331676e-05, "loss": 1.6367, "step": 4128 }, { "epoch": 13.537704918032787, "grad_norm": 7.294759273529053, "learning_rate": 1.9423235229256172e-05, "loss": 1.6968, "step": 4129 }, { "epoch": 13.540983606557377, "grad_norm": 8.667647361755371, "learning_rate": 1.9422879758923895e-05, "loss": 1.7566, "step": 4130 }, { "epoch": 13.544262295081968, "grad_norm": 8.330094337463379, "learning_rate": 1.942252418233885e-05, "loss": 1.7573, "step": 4131 }, { "epoch": 13.547540983606558, "grad_norm": 9.541070938110352, "learning_rate": 1.942216849950505e-05, "loss": 1.5962, "step": 4132 }, { "epoch": 13.550819672131148, "grad_norm": 9.678520202636719, "learning_rate": 1.9421812710426503e-05, "loss": 1.604, "step": 4133 }, { "epoch": 13.554098360655738, "grad_norm": 8.700708389282227, "learning_rate": 1.9421456815107222e-05, "loss": 1.7786, "step": 4134 }, { "epoch": 13.557377049180328, "grad_norm": 15.892285346984863, "learning_rate": 1.9421100813551218e-05, "loss": 1.5862, "step": 4135 }, { "epoch": 13.560655737704918, "grad_norm": 8.7747163772583, "learning_rate": 1.9420744705762508e-05, "loss": 1.5798, "step": 4136 }, { "epoch": 13.563934426229508, "grad_norm": 8.887940406799316, "learning_rate": 1.9420388491745105e-05, "loss": 1.8433, "step": 4137 }, { "epoch": 13.567213114754098, "grad_norm": 6.4436845779418945, "learning_rate": 1.9420032171503028e-05, "loss": 1.6003, "step": 4138 }, { "epoch": 13.570491803278689, "grad_norm": 8.147934913635254, "learning_rate": 1.9419675745040295e-05, "loss": 1.6306, "step": 4139 }, { "epoch": 13.573770491803279, "grad_norm": 10.720159530639648, "learning_rate": 1.941931921236092e-05, "loss": 1.7139, "step": 4140 }, { "epoch": 13.577049180327869, "grad_norm": 8.424756050109863, "learning_rate": 1.941896257346893e-05, "loss": 1.9263, "step": 4141 }, { "epoch": 13.580327868852459, "grad_norm": 12.430261611938477, "learning_rate": 1.9418605828368344e-05, "loss": 1.5637, "step": 4142 }, { "epoch": 13.583606557377049, "grad_norm": 106.34735107421875, "learning_rate": 1.9418248977063182e-05, "loss": 1.5884, "step": 4143 }, { "epoch": 13.58688524590164, "grad_norm": 8.422475814819336, "learning_rate": 1.9417892019557473e-05, "loss": 1.646, "step": 4144 }, { "epoch": 13.59016393442623, "grad_norm": 10.0205078125, "learning_rate": 1.941753495585524e-05, "loss": 1.7041, "step": 4145 }, { "epoch": 13.59344262295082, "grad_norm": 10.915502548217773, "learning_rate": 1.9417177785960504e-05, "loss": 1.8862, "step": 4146 }, { "epoch": 13.59672131147541, "grad_norm": 8.161961555480957, "learning_rate": 1.94168205098773e-05, "loss": 1.3789, "step": 4147 }, { "epoch": 13.6, "grad_norm": 10.886244773864746, "learning_rate": 1.9416463127609655e-05, "loss": 1.7676, "step": 4148 }, { "epoch": 13.60327868852459, "grad_norm": 9.490571975708008, "learning_rate": 1.9416105639161598e-05, "loss": 1.7156, "step": 4149 }, { "epoch": 13.60655737704918, "grad_norm": 7.393622398376465, "learning_rate": 1.9415748044537157e-05, "loss": 1.6436, "step": 4150 }, { "epoch": 13.60983606557377, "grad_norm": 11.516347885131836, "learning_rate": 1.941539034374037e-05, "loss": 1.4395, "step": 4151 }, { "epoch": 13.61311475409836, "grad_norm": 10.853521347045898, "learning_rate": 1.9415032536775262e-05, "loss": 1.7573, "step": 4152 }, { "epoch": 13.61639344262295, "grad_norm": 9.613081932067871, "learning_rate": 1.941467462364588e-05, "loss": 1.7559, "step": 4153 }, { "epoch": 13.61967213114754, "grad_norm": 8.057561874389648, "learning_rate": 1.9414316604356248e-05, "loss": 1.7881, "step": 4154 }, { "epoch": 13.62295081967213, "grad_norm": 15.611857414245605, "learning_rate": 1.9413958478910408e-05, "loss": 1.5918, "step": 4155 }, { "epoch": 13.62622950819672, "grad_norm": 8.428732872009277, "learning_rate": 1.9413600247312397e-05, "loss": 1.562, "step": 4156 }, { "epoch": 13.62950819672131, "grad_norm": 7.947480201721191, "learning_rate": 1.941324190956626e-05, "loss": 1.7173, "step": 4157 }, { "epoch": 13.6327868852459, "grad_norm": 11.651315689086914, "learning_rate": 1.941288346567603e-05, "loss": 1.7651, "step": 4158 }, { "epoch": 13.636065573770491, "grad_norm": 8.843647956848145, "learning_rate": 1.9412524915645753e-05, "loss": 1.5537, "step": 4159 }, { "epoch": 13.639344262295083, "grad_norm": 7.8270158767700195, "learning_rate": 1.941216625947947e-05, "loss": 1.6914, "step": 4160 }, { "epoch": 13.642622950819671, "grad_norm": 8.98420238494873, "learning_rate": 1.9411807497181227e-05, "loss": 1.6145, "step": 4161 }, { "epoch": 13.645901639344263, "grad_norm": 9.911687850952148, "learning_rate": 1.9411448628755067e-05, "loss": 1.6914, "step": 4162 }, { "epoch": 13.649180327868853, "grad_norm": 11.876730918884277, "learning_rate": 1.941108965420504e-05, "loss": 1.6279, "step": 4163 }, { "epoch": 13.652459016393443, "grad_norm": 8.048325538635254, "learning_rate": 1.9410730573535193e-05, "loss": 1.6514, "step": 4164 }, { "epoch": 13.655737704918034, "grad_norm": 10.966631889343262, "learning_rate": 1.9410371386749572e-05, "loss": 1.6274, "step": 4165 }, { "epoch": 13.659016393442624, "grad_norm": 10.372278213500977, "learning_rate": 1.941001209385223e-05, "loss": 1.7576, "step": 4166 }, { "epoch": 13.662295081967214, "grad_norm": 8.455317497253418, "learning_rate": 1.9409652694847222e-05, "loss": 1.6021, "step": 4167 }, { "epoch": 13.665573770491804, "grad_norm": 8.769526481628418, "learning_rate": 1.940929318973859e-05, "loss": 1.7891, "step": 4168 }, { "epoch": 13.668852459016394, "grad_norm": 12.38381290435791, "learning_rate": 1.9408933578530395e-05, "loss": 1.7012, "step": 4169 }, { "epoch": 13.672131147540984, "grad_norm": 7.853065490722656, "learning_rate": 1.9408573861226694e-05, "loss": 1.6787, "step": 4170 }, { "epoch": 13.675409836065574, "grad_norm": 8.093713760375977, "learning_rate": 1.9408214037831537e-05, "loss": 1.8394, "step": 4171 }, { "epoch": 13.678688524590164, "grad_norm": 15.846765518188477, "learning_rate": 1.9407854108348987e-05, "loss": 1.7954, "step": 4172 }, { "epoch": 13.681967213114755, "grad_norm": 7.493491172790527, "learning_rate": 1.94074940727831e-05, "loss": 1.8096, "step": 4173 }, { "epoch": 13.685245901639345, "grad_norm": 8.17172622680664, "learning_rate": 1.9407133931137935e-05, "loss": 1.8081, "step": 4174 }, { "epoch": 13.688524590163935, "grad_norm": 5.895535469055176, "learning_rate": 1.9406773683417554e-05, "loss": 1.6045, "step": 4175 }, { "epoch": 13.691803278688525, "grad_norm": 8.088768005371094, "learning_rate": 1.940641332962602e-05, "loss": 1.7422, "step": 4176 }, { "epoch": 13.695081967213115, "grad_norm": 14.506843566894531, "learning_rate": 1.9406052869767395e-05, "loss": 1.7329, "step": 4177 }, { "epoch": 13.698360655737705, "grad_norm": 7.483107089996338, "learning_rate": 1.9405692303845742e-05, "loss": 1.729, "step": 4178 }, { "epoch": 13.701639344262295, "grad_norm": 8.794164657592773, "learning_rate": 1.9405331631865132e-05, "loss": 1.7227, "step": 4179 }, { "epoch": 13.704918032786885, "grad_norm": 8.21982479095459, "learning_rate": 1.9404970853829627e-05, "loss": 1.6638, "step": 4180 }, { "epoch": 13.708196721311475, "grad_norm": 9.86031723022461, "learning_rate": 1.9404609969743297e-05, "loss": 1.7495, "step": 4181 }, { "epoch": 13.711475409836066, "grad_norm": 19.794891357421875, "learning_rate": 1.9404248979610213e-05, "loss": 1.5613, "step": 4182 }, { "epoch": 13.714754098360656, "grad_norm": 9.076648712158203, "learning_rate": 1.9403887883434442e-05, "loss": 1.8408, "step": 4183 }, { "epoch": 13.718032786885246, "grad_norm": 6.782302379608154, "learning_rate": 1.9403526681220057e-05, "loss": 1.896, "step": 4184 }, { "epoch": 13.721311475409836, "grad_norm": 10.345964431762695, "learning_rate": 1.9403165372971133e-05, "loss": 1.4788, "step": 4185 }, { "epoch": 13.724590163934426, "grad_norm": 7.931593418121338, "learning_rate": 1.9402803958691742e-05, "loss": 1.8079, "step": 4186 }, { "epoch": 13.727868852459016, "grad_norm": 12.827163696289062, "learning_rate": 1.940244243838596e-05, "loss": 1.7578, "step": 4187 }, { "epoch": 13.731147540983606, "grad_norm": 26.761497497558594, "learning_rate": 1.9402080812057863e-05, "loss": 1.9221, "step": 4188 }, { "epoch": 13.734426229508196, "grad_norm": 11.638143539428711, "learning_rate": 1.940171907971153e-05, "loss": 1.8037, "step": 4189 }, { "epoch": 13.737704918032787, "grad_norm": 11.107481002807617, "learning_rate": 1.940135724135104e-05, "loss": 1.8145, "step": 4190 }, { "epoch": 13.740983606557377, "grad_norm": 8.804840087890625, "learning_rate": 1.940099529698047e-05, "loss": 1.6948, "step": 4191 }, { "epoch": 13.744262295081967, "grad_norm": 8.131617546081543, "learning_rate": 1.9400633246603904e-05, "loss": 1.6763, "step": 4192 }, { "epoch": 13.747540983606557, "grad_norm": 9.7262601852417, "learning_rate": 1.9400271090225423e-05, "loss": 1.6824, "step": 4193 }, { "epoch": 13.750819672131147, "grad_norm": 7.520802974700928, "learning_rate": 1.9399908827849114e-05, "loss": 1.8691, "step": 4194 }, { "epoch": 13.754098360655737, "grad_norm": 6.952887535095215, "learning_rate": 1.939954645947906e-05, "loss": 1.7607, "step": 4195 }, { "epoch": 13.757377049180327, "grad_norm": 6.128761291503906, "learning_rate": 1.9399183985119344e-05, "loss": 1.8896, "step": 4196 }, { "epoch": 13.760655737704917, "grad_norm": 9.90595531463623, "learning_rate": 1.939882140477406e-05, "loss": 1.7566, "step": 4197 }, { "epoch": 13.763934426229508, "grad_norm": 7.447091579437256, "learning_rate": 1.9398458718447287e-05, "loss": 1.7803, "step": 4198 }, { "epoch": 13.767213114754098, "grad_norm": 7.585498332977295, "learning_rate": 1.939809592614312e-05, "loss": 1.5889, "step": 4199 }, { "epoch": 13.770491803278688, "grad_norm": 7.637945652008057, "learning_rate": 1.9397733027865652e-05, "loss": 1.7925, "step": 4200 }, { "epoch": 13.773770491803278, "grad_norm": 13.279134750366211, "learning_rate": 1.939737002361897e-05, "loss": 1.6548, "step": 4201 }, { "epoch": 13.777049180327868, "grad_norm": 7.208018779754639, "learning_rate": 1.9397006913407175e-05, "loss": 1.5649, "step": 4202 }, { "epoch": 13.780327868852458, "grad_norm": 9.20479679107666, "learning_rate": 1.9396643697234355e-05, "loss": 1.6028, "step": 4203 }, { "epoch": 13.783606557377048, "grad_norm": 8.396806716918945, "learning_rate": 1.9396280375104605e-05, "loss": 1.7192, "step": 4204 }, { "epoch": 13.78688524590164, "grad_norm": 10.721236228942871, "learning_rate": 1.9395916947022028e-05, "loss": 1.7109, "step": 4205 }, { "epoch": 13.790163934426229, "grad_norm": 8.360424041748047, "learning_rate": 1.9395553412990716e-05, "loss": 1.7695, "step": 4206 }, { "epoch": 13.79344262295082, "grad_norm": 9.445950508117676, "learning_rate": 1.939518977301477e-05, "loss": 1.7559, "step": 4207 }, { "epoch": 13.79672131147541, "grad_norm": 18.73833656311035, "learning_rate": 1.9394826027098292e-05, "loss": 1.6309, "step": 4208 }, { "epoch": 13.8, "grad_norm": 7.120131969451904, "learning_rate": 1.9394462175245382e-05, "loss": 1.6958, "step": 4209 }, { "epoch": 13.80327868852459, "grad_norm": 6.926362991333008, "learning_rate": 1.939409821746014e-05, "loss": 1.7695, "step": 4210 }, { "epoch": 13.806557377049181, "grad_norm": 9.997129440307617, "learning_rate": 1.9393734153746678e-05, "loss": 1.6956, "step": 4211 }, { "epoch": 13.809836065573771, "grad_norm": 8.961267471313477, "learning_rate": 1.9393369984109094e-05, "loss": 1.7437, "step": 4212 }, { "epoch": 13.813114754098361, "grad_norm": 8.945545196533203, "learning_rate": 1.93930057085515e-05, "loss": 1.7378, "step": 4213 }, { "epoch": 13.816393442622951, "grad_norm": 7.982609272003174, "learning_rate": 1.9392641327077995e-05, "loss": 1.6296, "step": 4214 }, { "epoch": 13.819672131147541, "grad_norm": 9.445098876953125, "learning_rate": 1.9392276839692694e-05, "loss": 1.7344, "step": 4215 }, { "epoch": 13.822950819672132, "grad_norm": 9.052252769470215, "learning_rate": 1.939191224639971e-05, "loss": 1.4839, "step": 4216 }, { "epoch": 13.826229508196722, "grad_norm": 8.133589744567871, "learning_rate": 1.9391547547203146e-05, "loss": 1.6326, "step": 4217 }, { "epoch": 13.829508196721312, "grad_norm": 8.087217330932617, "learning_rate": 1.9391182742107123e-05, "loss": 1.5295, "step": 4218 }, { "epoch": 13.832786885245902, "grad_norm": 14.96584701538086, "learning_rate": 1.9390817831115745e-05, "loss": 1.5378, "step": 4219 }, { "epoch": 13.836065573770492, "grad_norm": 8.647777557373047, "learning_rate": 1.9390452814233133e-05, "loss": 1.8042, "step": 4220 }, { "epoch": 13.839344262295082, "grad_norm": 10.052943229675293, "learning_rate": 1.9390087691463407e-05, "loss": 1.7173, "step": 4221 }, { "epoch": 13.842622950819672, "grad_norm": 7.0726189613342285, "learning_rate": 1.9389722462810673e-05, "loss": 1.6714, "step": 4222 }, { "epoch": 13.845901639344262, "grad_norm": 12.18053913116455, "learning_rate": 1.938935712827906e-05, "loss": 1.6562, "step": 4223 }, { "epoch": 13.849180327868853, "grad_norm": 8.009459495544434, "learning_rate": 1.938899168787268e-05, "loss": 1.5986, "step": 4224 }, { "epoch": 13.852459016393443, "grad_norm": 9.125934600830078, "learning_rate": 1.938862614159566e-05, "loss": 1.5703, "step": 4225 }, { "epoch": 13.855737704918033, "grad_norm": 7.860286235809326, "learning_rate": 1.9388260489452113e-05, "loss": 1.8162, "step": 4226 }, { "epoch": 13.859016393442623, "grad_norm": 9.445369720458984, "learning_rate": 1.9387894731446172e-05, "loss": 1.7153, "step": 4227 }, { "epoch": 13.862295081967213, "grad_norm": 6.679192543029785, "learning_rate": 1.9387528867581957e-05, "loss": 1.7944, "step": 4228 }, { "epoch": 13.865573770491803, "grad_norm": 8.419344902038574, "learning_rate": 1.938716289786359e-05, "loss": 1.6694, "step": 4229 }, { "epoch": 13.868852459016393, "grad_norm": 8.41236400604248, "learning_rate": 1.9386796822295206e-05, "loss": 1.7449, "step": 4230 }, { "epoch": 13.872131147540983, "grad_norm": 7.306895732879639, "learning_rate": 1.9386430640880923e-05, "loss": 1.7368, "step": 4231 }, { "epoch": 13.875409836065574, "grad_norm": 8.655171394348145, "learning_rate": 1.9386064353624877e-05, "loss": 1.7275, "step": 4232 }, { "epoch": 13.878688524590164, "grad_norm": 9.617527961730957, "learning_rate": 1.9385697960531196e-05, "loss": 1.6533, "step": 4233 }, { "epoch": 13.881967213114754, "grad_norm": 9.099882125854492, "learning_rate": 1.9385331461604012e-05, "loss": 1.6538, "step": 4234 }, { "epoch": 13.885245901639344, "grad_norm": 8.934633255004883, "learning_rate": 1.938496485684746e-05, "loss": 1.8232, "step": 4235 }, { "epoch": 13.888524590163934, "grad_norm": 15.932384490966797, "learning_rate": 1.9384598146265668e-05, "loss": 1.5386, "step": 4236 }, { "epoch": 13.891803278688524, "grad_norm": 8.84284496307373, "learning_rate": 1.9384231329862776e-05, "loss": 1.8035, "step": 4237 }, { "epoch": 13.895081967213114, "grad_norm": 9.387417793273926, "learning_rate": 1.9383864407642918e-05, "loss": 1.6008, "step": 4238 }, { "epoch": 13.898360655737704, "grad_norm": 10.066659927368164, "learning_rate": 1.9383497379610233e-05, "loss": 1.6694, "step": 4239 }, { "epoch": 13.901639344262295, "grad_norm": 6.6204986572265625, "learning_rate": 1.938313024576886e-05, "loss": 1.6978, "step": 4240 }, { "epoch": 13.904918032786885, "grad_norm": 8.59158706665039, "learning_rate": 1.9382763006122932e-05, "loss": 1.6831, "step": 4241 }, { "epoch": 13.908196721311475, "grad_norm": 9.081477165222168, "learning_rate": 1.9382395660676597e-05, "loss": 1.6787, "step": 4242 }, { "epoch": 13.911475409836065, "grad_norm": 11.246665954589844, "learning_rate": 1.9382028209434e-05, "loss": 1.5283, "step": 4243 }, { "epoch": 13.914754098360655, "grad_norm": 7.931468963623047, "learning_rate": 1.9381660652399276e-05, "loss": 1.895, "step": 4244 }, { "epoch": 13.918032786885245, "grad_norm": 9.660533905029297, "learning_rate": 1.938129298957658e-05, "loss": 1.5596, "step": 4245 }, { "epoch": 13.921311475409835, "grad_norm": 12.1267671585083, "learning_rate": 1.9380925220970043e-05, "loss": 1.5884, "step": 4246 }, { "epoch": 13.924590163934425, "grad_norm": 7.189838886260986, "learning_rate": 1.9380557346583826e-05, "loss": 1.7266, "step": 4247 }, { "epoch": 13.927868852459017, "grad_norm": 8.679364204406738, "learning_rate": 1.938018936642207e-05, "loss": 1.7585, "step": 4248 }, { "epoch": 13.931147540983606, "grad_norm": 8.456376075744629, "learning_rate": 1.9379821280488924e-05, "loss": 1.6592, "step": 4249 }, { "epoch": 13.934426229508198, "grad_norm": 9.078510284423828, "learning_rate": 1.9379453088788543e-05, "loss": 1.7334, "step": 4250 }, { "epoch": 13.937704918032788, "grad_norm": 8.091521263122559, "learning_rate": 1.9379084791325073e-05, "loss": 1.8198, "step": 4251 }, { "epoch": 13.940983606557378, "grad_norm": 9.69383430480957, "learning_rate": 1.9378716388102675e-05, "loss": 1.6206, "step": 4252 }, { "epoch": 13.944262295081968, "grad_norm": 9.171814918518066, "learning_rate": 1.9378347879125497e-05, "loss": 1.6587, "step": 4253 }, { "epoch": 13.947540983606558, "grad_norm": 9.878392219543457, "learning_rate": 1.937797926439769e-05, "loss": 1.6514, "step": 4254 }, { "epoch": 13.950819672131148, "grad_norm": 13.953185081481934, "learning_rate": 1.9377610543923423e-05, "loss": 1.5879, "step": 4255 }, { "epoch": 13.954098360655738, "grad_norm": 7.991023063659668, "learning_rate": 1.9377241717706846e-05, "loss": 1.7341, "step": 4256 }, { "epoch": 13.957377049180328, "grad_norm": 8.277174949645996, "learning_rate": 1.9376872785752117e-05, "loss": 1.3591, "step": 4257 }, { "epoch": 13.960655737704919, "grad_norm": 8.720085144042969, "learning_rate": 1.9376503748063397e-05, "loss": 1.5432, "step": 4258 }, { "epoch": 13.963934426229509, "grad_norm": 8.36055850982666, "learning_rate": 1.937613460464485e-05, "loss": 1.77, "step": 4259 }, { "epoch": 13.967213114754099, "grad_norm": 7.7973785400390625, "learning_rate": 1.9375765355500635e-05, "loss": 1.7866, "step": 4260 }, { "epoch": 13.970491803278689, "grad_norm": 9.651106834411621, "learning_rate": 1.937539600063492e-05, "loss": 1.7847, "step": 4261 }, { "epoch": 13.973770491803279, "grad_norm": 8.734148025512695, "learning_rate": 1.9375026540051864e-05, "loss": 1.9746, "step": 4262 }, { "epoch": 13.97704918032787, "grad_norm": 6.833339691162109, "learning_rate": 1.9374656973755638e-05, "loss": 1.7578, "step": 4263 }, { "epoch": 13.98032786885246, "grad_norm": 12.717832565307617, "learning_rate": 1.9374287301750405e-05, "loss": 1.7949, "step": 4264 }, { "epoch": 13.98360655737705, "grad_norm": 8.558756828308105, "learning_rate": 1.937391752404034e-05, "loss": 1.6118, "step": 4265 }, { "epoch": 13.98688524590164, "grad_norm": 6.836415767669678, "learning_rate": 1.9373547640629604e-05, "loss": 1.7639, "step": 4266 }, { "epoch": 13.99016393442623, "grad_norm": 16.177471160888672, "learning_rate": 1.9373177651522374e-05, "loss": 1.5264, "step": 4267 }, { "epoch": 13.99344262295082, "grad_norm": 11.03646183013916, "learning_rate": 1.937280755672282e-05, "loss": 1.6716, "step": 4268 }, { "epoch": 13.99672131147541, "grad_norm": 12.515852928161621, "learning_rate": 1.9372437356235117e-05, "loss": 1.5603, "step": 4269 }, { "epoch": 14.0, "grad_norm": 18.494098663330078, "learning_rate": 1.937206705006344e-05, "loss": 1.6053, "step": 4270 }, { "epoch": 14.00327868852459, "grad_norm": 7.61881160736084, "learning_rate": 1.937169663821196e-05, "loss": 1.5647, "step": 4271 }, { "epoch": 14.00655737704918, "grad_norm": 14.78549575805664, "learning_rate": 1.9371326120684854e-05, "loss": 1.771, "step": 4272 }, { "epoch": 14.00983606557377, "grad_norm": 7.368114948272705, "learning_rate": 1.9370955497486304e-05, "loss": 1.803, "step": 4273 }, { "epoch": 14.01311475409836, "grad_norm": 9.634485244750977, "learning_rate": 1.9370584768620485e-05, "loss": 1.4797, "step": 4274 }, { "epoch": 14.01639344262295, "grad_norm": 11.082958221435547, "learning_rate": 1.9370213934091585e-05, "loss": 1.6631, "step": 4275 }, { "epoch": 14.01967213114754, "grad_norm": 8.422038078308105, "learning_rate": 1.9369842993903778e-05, "loss": 1.5295, "step": 4276 }, { "epoch": 14.02295081967213, "grad_norm": 8.383923530578613, "learning_rate": 1.9369471948061246e-05, "loss": 1.5112, "step": 4277 }, { "epoch": 14.026229508196721, "grad_norm": 7.645330905914307, "learning_rate": 1.9369100796568182e-05, "loss": 1.574, "step": 4278 }, { "epoch": 14.029508196721311, "grad_norm": 8.203909873962402, "learning_rate": 1.936872953942876e-05, "loss": 1.5916, "step": 4279 }, { "epoch": 14.032786885245901, "grad_norm": 7.1903605461120605, "learning_rate": 1.9368358176647174e-05, "loss": 1.6143, "step": 4280 }, { "epoch": 14.036065573770491, "grad_norm": 7.623129367828369, "learning_rate": 1.9367986708227608e-05, "loss": 1.4822, "step": 4281 }, { "epoch": 14.039344262295081, "grad_norm": 8.489445686340332, "learning_rate": 1.9367615134174252e-05, "loss": 1.7529, "step": 4282 }, { "epoch": 14.042622950819672, "grad_norm": 12.210674285888672, "learning_rate": 1.9367243454491297e-05, "loss": 1.5315, "step": 4283 }, { "epoch": 14.045901639344262, "grad_norm": 7.386124610900879, "learning_rate": 1.936687166918293e-05, "loss": 1.647, "step": 4284 }, { "epoch": 14.049180327868852, "grad_norm": 9.361241340637207, "learning_rate": 1.9366499778253346e-05, "loss": 1.5684, "step": 4285 }, { "epoch": 14.052459016393442, "grad_norm": 7.6344895362854, "learning_rate": 1.936612778170674e-05, "loss": 1.8477, "step": 4286 }, { "epoch": 14.055737704918032, "grad_norm": 8.341824531555176, "learning_rate": 1.9365755679547304e-05, "loss": 1.6514, "step": 4287 }, { "epoch": 14.059016393442622, "grad_norm": 8.205276489257812, "learning_rate": 1.9365383471779235e-05, "loss": 1.5327, "step": 4288 }, { "epoch": 14.062295081967212, "grad_norm": 10.93684196472168, "learning_rate": 1.9365011158406727e-05, "loss": 1.5786, "step": 4289 }, { "epoch": 14.065573770491802, "grad_norm": 8.603989601135254, "learning_rate": 1.9364638739433984e-05, "loss": 1.4761, "step": 4290 }, { "epoch": 14.068852459016393, "grad_norm": 8.528665542602539, "learning_rate": 1.93642662148652e-05, "loss": 1.7278, "step": 4291 }, { "epoch": 14.072131147540984, "grad_norm": 8.649527549743652, "learning_rate": 1.936389358470458e-05, "loss": 1.6306, "step": 4292 }, { "epoch": 14.075409836065575, "grad_norm": 11.67628288269043, "learning_rate": 1.9363520848956325e-05, "loss": 1.7109, "step": 4293 }, { "epoch": 14.078688524590165, "grad_norm": 12.257633209228516, "learning_rate": 1.9363148007624634e-05, "loss": 1.6973, "step": 4294 }, { "epoch": 14.081967213114755, "grad_norm": 8.869423866271973, "learning_rate": 1.9362775060713718e-05, "loss": 1.7583, "step": 4295 }, { "epoch": 14.085245901639345, "grad_norm": 6.564986228942871, "learning_rate": 1.9362402008227775e-05, "loss": 1.5398, "step": 4296 }, { "epoch": 14.088524590163935, "grad_norm": 6.9957275390625, "learning_rate": 1.9362028850171016e-05, "loss": 1.5928, "step": 4297 }, { "epoch": 14.091803278688525, "grad_norm": 8.193568229675293, "learning_rate": 1.9361655586547648e-05, "loss": 1.6821, "step": 4298 }, { "epoch": 14.095081967213115, "grad_norm": 8.932961463928223, "learning_rate": 1.936128221736188e-05, "loss": 1.458, "step": 4299 }, { "epoch": 14.098360655737705, "grad_norm": 7.830617427825928, "learning_rate": 1.936090874261792e-05, "loss": 1.689, "step": 4300 }, { "epoch": 14.101639344262296, "grad_norm": 8.493889808654785, "learning_rate": 1.936053516231998e-05, "loss": 1.6118, "step": 4301 }, { "epoch": 14.104918032786886, "grad_norm": 8.27542495727539, "learning_rate": 1.9360161476472274e-05, "loss": 1.4243, "step": 4302 }, { "epoch": 14.108196721311476, "grad_norm": 8.414350509643555, "learning_rate": 1.935978768507902e-05, "loss": 1.5605, "step": 4303 }, { "epoch": 14.111475409836066, "grad_norm": 11.964371681213379, "learning_rate": 1.9359413788144423e-05, "loss": 1.5249, "step": 4304 }, { "epoch": 14.114754098360656, "grad_norm": 18.274879455566406, "learning_rate": 1.9359039785672707e-05, "loss": 1.6008, "step": 4305 }, { "epoch": 14.118032786885246, "grad_norm": 10.764945983886719, "learning_rate": 1.9358665677668082e-05, "loss": 1.5815, "step": 4306 }, { "epoch": 14.121311475409836, "grad_norm": 8.497701644897461, "learning_rate": 1.9358291464134776e-05, "loss": 1.4502, "step": 4307 }, { "epoch": 14.124590163934426, "grad_norm": 7.243794918060303, "learning_rate": 1.9357917145077004e-05, "loss": 1.6748, "step": 4308 }, { "epoch": 14.127868852459017, "grad_norm": 15.440591812133789, "learning_rate": 1.9357542720498982e-05, "loss": 1.7808, "step": 4309 }, { "epoch": 14.131147540983607, "grad_norm": 7.591920375823975, "learning_rate": 1.9357168190404937e-05, "loss": 1.2952, "step": 4310 }, { "epoch": 14.134426229508197, "grad_norm": 13.965633392333984, "learning_rate": 1.9356793554799094e-05, "loss": 1.5073, "step": 4311 }, { "epoch": 14.137704918032787, "grad_norm": 9.993762016296387, "learning_rate": 1.9356418813685676e-05, "loss": 1.5837, "step": 4312 }, { "epoch": 14.140983606557377, "grad_norm": 9.824346542358398, "learning_rate": 1.9356043967068903e-05, "loss": 1.5308, "step": 4313 }, { "epoch": 14.144262295081967, "grad_norm": 19.3183536529541, "learning_rate": 1.9355669014953008e-05, "loss": 1.5222, "step": 4314 }, { "epoch": 14.147540983606557, "grad_norm": 8.91763973236084, "learning_rate": 1.9355293957342222e-05, "loss": 1.8047, "step": 4315 }, { "epoch": 14.150819672131147, "grad_norm": 10.909320831298828, "learning_rate": 1.9354918794240768e-05, "loss": 1.5186, "step": 4316 }, { "epoch": 14.154098360655738, "grad_norm": 7.3830108642578125, "learning_rate": 1.9354543525652872e-05, "loss": 1.8713, "step": 4317 }, { "epoch": 14.157377049180328, "grad_norm": 16.3859806060791, "learning_rate": 1.9354168151582778e-05, "loss": 1.6587, "step": 4318 }, { "epoch": 14.160655737704918, "grad_norm": 7.59255838394165, "learning_rate": 1.935379267203471e-05, "loss": 1.7207, "step": 4319 }, { "epoch": 14.163934426229508, "grad_norm": 8.946880340576172, "learning_rate": 1.9353417087012903e-05, "loss": 1.4771, "step": 4320 }, { "epoch": 14.167213114754098, "grad_norm": 9.887306213378906, "learning_rate": 1.9353041396521593e-05, "loss": 1.5449, "step": 4321 }, { "epoch": 14.170491803278688, "grad_norm": 9.79620361328125, "learning_rate": 1.9352665600565018e-05, "loss": 1.5103, "step": 4322 }, { "epoch": 14.173770491803278, "grad_norm": 7.594998359680176, "learning_rate": 1.9352289699147416e-05, "loss": 1.5835, "step": 4323 }, { "epoch": 14.177049180327868, "grad_norm": 8.270757675170898, "learning_rate": 1.935191369227302e-05, "loss": 1.5247, "step": 4324 }, { "epoch": 14.180327868852459, "grad_norm": 9.269370079040527, "learning_rate": 1.9351537579946075e-05, "loss": 1.6179, "step": 4325 }, { "epoch": 14.183606557377049, "grad_norm": 11.408978462219238, "learning_rate": 1.935116136217082e-05, "loss": 1.6843, "step": 4326 }, { "epoch": 14.186885245901639, "grad_norm": 9.951693534851074, "learning_rate": 1.9350785038951498e-05, "loss": 1.6091, "step": 4327 }, { "epoch": 14.190163934426229, "grad_norm": 7.961082935333252, "learning_rate": 1.9350408610292353e-05, "loss": 1.6047, "step": 4328 }, { "epoch": 14.193442622950819, "grad_norm": 10.893746376037598, "learning_rate": 1.935003207619763e-05, "loss": 1.4072, "step": 4329 }, { "epoch": 14.19672131147541, "grad_norm": 8.148015975952148, "learning_rate": 1.9349655436671572e-05, "loss": 1.6377, "step": 4330 }, { "epoch": 14.2, "grad_norm": 10.21658992767334, "learning_rate": 1.9349278691718426e-05, "loss": 1.6038, "step": 4331 }, { "epoch": 14.20327868852459, "grad_norm": 7.685039043426514, "learning_rate": 1.9348901841342448e-05, "loss": 1.666, "step": 4332 }, { "epoch": 14.20655737704918, "grad_norm": 10.869845390319824, "learning_rate": 1.9348524885547876e-05, "loss": 1.6187, "step": 4333 }, { "epoch": 14.20983606557377, "grad_norm": 8.860671043395996, "learning_rate": 1.9348147824338967e-05, "loss": 1.7764, "step": 4334 }, { "epoch": 14.21311475409836, "grad_norm": 9.435059547424316, "learning_rate": 1.934777065771997e-05, "loss": 1.6042, "step": 4335 }, { "epoch": 14.216393442622952, "grad_norm": 15.13699722290039, "learning_rate": 1.9347393385695143e-05, "loss": 1.7446, "step": 4336 }, { "epoch": 14.219672131147542, "grad_norm": 8.254712104797363, "learning_rate": 1.9347016008268736e-05, "loss": 1.5874, "step": 4337 }, { "epoch": 14.222950819672132, "grad_norm": 9.277929306030273, "learning_rate": 1.9346638525445003e-05, "loss": 1.5598, "step": 4338 }, { "epoch": 14.226229508196722, "grad_norm": 9.873025894165039, "learning_rate": 1.9346260937228206e-05, "loss": 1.6018, "step": 4339 }, { "epoch": 14.229508196721312, "grad_norm": 8.951099395751953, "learning_rate": 1.9345883243622597e-05, "loss": 1.644, "step": 4340 }, { "epoch": 14.232786885245902, "grad_norm": 8.632999420166016, "learning_rate": 1.9345505444632436e-05, "loss": 1.6223, "step": 4341 }, { "epoch": 14.236065573770492, "grad_norm": 8.936878204345703, "learning_rate": 1.9345127540261984e-05, "loss": 1.8499, "step": 4342 }, { "epoch": 14.239344262295083, "grad_norm": 7.480454921722412, "learning_rate": 1.9344749530515504e-05, "loss": 1.4905, "step": 4343 }, { "epoch": 14.242622950819673, "grad_norm": 8.945237159729004, "learning_rate": 1.9344371415397258e-05, "loss": 1.4736, "step": 4344 }, { "epoch": 14.245901639344263, "grad_norm": 9.595574378967285, "learning_rate": 1.9343993194911508e-05, "loss": 1.6522, "step": 4345 }, { "epoch": 14.249180327868853, "grad_norm": 9.361688613891602, "learning_rate": 1.934361486906252e-05, "loss": 1.6565, "step": 4346 }, { "epoch": 14.252459016393443, "grad_norm": 8.975278854370117, "learning_rate": 1.934323643785456e-05, "loss": 1.7656, "step": 4347 }, { "epoch": 14.255737704918033, "grad_norm": 7.0991129875183105, "learning_rate": 1.934285790129189e-05, "loss": 1.7031, "step": 4348 }, { "epoch": 14.259016393442623, "grad_norm": 12.158851623535156, "learning_rate": 1.9342479259378787e-05, "loss": 1.717, "step": 4349 }, { "epoch": 14.262295081967213, "grad_norm": 9.733933448791504, "learning_rate": 1.9342100512119514e-05, "loss": 1.6224, "step": 4350 }, { "epoch": 14.265573770491804, "grad_norm": 6.766531467437744, "learning_rate": 1.9341721659518347e-05, "loss": 1.6543, "step": 4351 }, { "epoch": 14.268852459016394, "grad_norm": 9.392786026000977, "learning_rate": 1.9341342701579554e-05, "loss": 1.4597, "step": 4352 }, { "epoch": 14.272131147540984, "grad_norm": 11.580540657043457, "learning_rate": 1.934096363830741e-05, "loss": 1.4165, "step": 4353 }, { "epoch": 14.275409836065574, "grad_norm": 11.188146591186523, "learning_rate": 1.934058446970619e-05, "loss": 1.7454, "step": 4354 }, { "epoch": 14.278688524590164, "grad_norm": 8.497528076171875, "learning_rate": 1.9340205195780165e-05, "loss": 1.4858, "step": 4355 }, { "epoch": 14.281967213114754, "grad_norm": 7.815674304962158, "learning_rate": 1.9339825816533614e-05, "loss": 1.6519, "step": 4356 }, { "epoch": 14.285245901639344, "grad_norm": 6.462090969085693, "learning_rate": 1.9339446331970822e-05, "loss": 1.5203, "step": 4357 }, { "epoch": 14.288524590163934, "grad_norm": 11.088310241699219, "learning_rate": 1.933906674209606e-05, "loss": 1.3826, "step": 4358 }, { "epoch": 14.291803278688525, "grad_norm": 7.4757914543151855, "learning_rate": 1.933868704691361e-05, "loss": 1.5916, "step": 4359 }, { "epoch": 14.295081967213115, "grad_norm": 6.899500370025635, "learning_rate": 1.9338307246427753e-05, "loss": 1.6416, "step": 4360 }, { "epoch": 14.298360655737705, "grad_norm": 6.546374797821045, "learning_rate": 1.933792734064277e-05, "loss": 1.6458, "step": 4361 }, { "epoch": 14.301639344262295, "grad_norm": 7.487833023071289, "learning_rate": 1.933754732956295e-05, "loss": 1.6199, "step": 4362 }, { "epoch": 14.304918032786885, "grad_norm": 11.559814453125, "learning_rate": 1.9337167213192573e-05, "loss": 1.5198, "step": 4363 }, { "epoch": 14.308196721311475, "grad_norm": 7.044945240020752, "learning_rate": 1.933678699153593e-05, "loss": 1.488, "step": 4364 }, { "epoch": 14.311475409836065, "grad_norm": 9.404251098632812, "learning_rate": 1.9336406664597307e-05, "loss": 1.366, "step": 4365 }, { "epoch": 14.314754098360655, "grad_norm": 18.63395881652832, "learning_rate": 1.933602623238099e-05, "loss": 1.5601, "step": 4366 }, { "epoch": 14.318032786885245, "grad_norm": 12.825553894042969, "learning_rate": 1.9335645694891266e-05, "loss": 1.4513, "step": 4367 }, { "epoch": 14.321311475409836, "grad_norm": 11.297211647033691, "learning_rate": 1.9335265052132434e-05, "loss": 1.5337, "step": 4368 }, { "epoch": 14.324590163934426, "grad_norm": 7.928412914276123, "learning_rate": 1.9334884304108782e-05, "loss": 1.5439, "step": 4369 }, { "epoch": 14.327868852459016, "grad_norm": 9.99484634399414, "learning_rate": 1.9334503450824607e-05, "loss": 1.5046, "step": 4370 }, { "epoch": 14.331147540983606, "grad_norm": 12.05523681640625, "learning_rate": 1.9334122492284194e-05, "loss": 1.6592, "step": 4371 }, { "epoch": 14.334426229508196, "grad_norm": 11.910283088684082, "learning_rate": 1.933374142849185e-05, "loss": 1.7754, "step": 4372 }, { "epoch": 14.337704918032786, "grad_norm": 10.289871215820312, "learning_rate": 1.9333360259451864e-05, "loss": 1.709, "step": 4373 }, { "epoch": 14.340983606557376, "grad_norm": 6.9045586585998535, "learning_rate": 1.9332978985168536e-05, "loss": 1.5632, "step": 4374 }, { "epoch": 14.344262295081966, "grad_norm": 8.034867286682129, "learning_rate": 1.933259760564617e-05, "loss": 1.5271, "step": 4375 }, { "epoch": 14.347540983606557, "grad_norm": 8.031217575073242, "learning_rate": 1.933221612088906e-05, "loss": 1.4412, "step": 4376 }, { "epoch": 14.350819672131147, "grad_norm": 8.030874252319336, "learning_rate": 1.933183453090151e-05, "loss": 1.634, "step": 4377 }, { "epoch": 14.354098360655737, "grad_norm": 8.726683616638184, "learning_rate": 1.9331452835687823e-05, "loss": 1.5874, "step": 4378 }, { "epoch": 14.357377049180329, "grad_norm": 8.76218318939209, "learning_rate": 1.93310710352523e-05, "loss": 1.5381, "step": 4379 }, { "epoch": 14.360655737704919, "grad_norm": 12.291126251220703, "learning_rate": 1.9330689129599257e-05, "loss": 1.6282, "step": 4380 }, { "epoch": 14.363934426229509, "grad_norm": 8.333836555480957, "learning_rate": 1.9330307118732985e-05, "loss": 1.5654, "step": 4381 }, { "epoch": 14.3672131147541, "grad_norm": 8.774056434631348, "learning_rate": 1.93299250026578e-05, "loss": 1.7661, "step": 4382 }, { "epoch": 14.37049180327869, "grad_norm": 8.354362487792969, "learning_rate": 1.9329542781378014e-05, "loss": 1.615, "step": 4383 }, { "epoch": 14.37377049180328, "grad_norm": 9.10221004486084, "learning_rate": 1.932916045489793e-05, "loss": 1.6699, "step": 4384 }, { "epoch": 14.37704918032787, "grad_norm": 7.200887203216553, "learning_rate": 1.9328778023221866e-05, "loss": 1.6604, "step": 4385 }, { "epoch": 14.38032786885246, "grad_norm": 438.35882568359375, "learning_rate": 1.9328395486354127e-05, "loss": 1.52, "step": 4386 }, { "epoch": 14.38360655737705, "grad_norm": 9.136695861816406, "learning_rate": 1.9328012844299028e-05, "loss": 1.5613, "step": 4387 }, { "epoch": 14.38688524590164, "grad_norm": 13.92342758178711, "learning_rate": 1.932763009706089e-05, "loss": 1.5002, "step": 4388 }, { "epoch": 14.39016393442623, "grad_norm": 9.135313987731934, "learning_rate": 1.932724724464402e-05, "loss": 1.4773, "step": 4389 }, { "epoch": 14.39344262295082, "grad_norm": 7.936758518218994, "learning_rate": 1.932686428705274e-05, "loss": 1.686, "step": 4390 }, { "epoch": 14.39672131147541, "grad_norm": 10.541463851928711, "learning_rate": 1.932648122429137e-05, "loss": 1.9048, "step": 4391 }, { "epoch": 14.4, "grad_norm": 9.797664642333984, "learning_rate": 1.9326098056364224e-05, "loss": 1.7, "step": 4392 }, { "epoch": 14.40327868852459, "grad_norm": 19.605337142944336, "learning_rate": 1.9325714783275627e-05, "loss": 1.5208, "step": 4393 }, { "epoch": 14.40655737704918, "grad_norm": 26.743335723876953, "learning_rate": 1.9325331405029903e-05, "loss": 1.7061, "step": 4394 }, { "epoch": 14.40983606557377, "grad_norm": 14.794509887695312, "learning_rate": 1.9324947921631366e-05, "loss": 1.8506, "step": 4395 }, { "epoch": 14.41311475409836, "grad_norm": 8.202848434448242, "learning_rate": 1.932456433308435e-05, "loss": 1.7563, "step": 4396 }, { "epoch": 14.416393442622951, "grad_norm": 12.828094482421875, "learning_rate": 1.9324180639393173e-05, "loss": 1.8677, "step": 4397 }, { "epoch": 14.419672131147541, "grad_norm": 19.0810546875, "learning_rate": 1.9323796840562166e-05, "loss": 1.6938, "step": 4398 }, { "epoch": 14.422950819672131, "grad_norm": 24.888002395629883, "learning_rate": 1.9323412936595655e-05, "loss": 1.6567, "step": 4399 }, { "epoch": 14.426229508196721, "grad_norm": 10.250091552734375, "learning_rate": 1.9323028927497968e-05, "loss": 1.7246, "step": 4400 }, { "epoch": 14.429508196721311, "grad_norm": 10.003995895385742, "learning_rate": 1.932264481327344e-05, "loss": 1.8416, "step": 4401 }, { "epoch": 14.432786885245902, "grad_norm": 7.920346736907959, "learning_rate": 1.9322260593926394e-05, "loss": 1.7842, "step": 4402 }, { "epoch": 14.436065573770492, "grad_norm": 13.461569786071777, "learning_rate": 1.932187626946117e-05, "loss": 1.6338, "step": 4403 }, { "epoch": 14.439344262295082, "grad_norm": 13.450008392333984, "learning_rate": 1.93214918398821e-05, "loss": 1.6584, "step": 4404 }, { "epoch": 14.442622950819672, "grad_norm": 17.150007247924805, "learning_rate": 1.9321107305193516e-05, "loss": 1.7627, "step": 4405 }, { "epoch": 14.445901639344262, "grad_norm": 10.108270645141602, "learning_rate": 1.9320722665399755e-05, "loss": 1.6914, "step": 4406 }, { "epoch": 14.449180327868852, "grad_norm": 13.048121452331543, "learning_rate": 1.932033792050515e-05, "loss": 1.5801, "step": 4407 }, { "epoch": 14.452459016393442, "grad_norm": 15.473933219909668, "learning_rate": 1.9319953070514052e-05, "loss": 1.4819, "step": 4408 }, { "epoch": 14.455737704918032, "grad_norm": 9.481607437133789, "learning_rate": 1.931956811543079e-05, "loss": 1.9331, "step": 4409 }, { "epoch": 14.459016393442623, "grad_norm": 10.285446166992188, "learning_rate": 1.9319183055259708e-05, "loss": 1.676, "step": 4410 }, { "epoch": 14.462295081967213, "grad_norm": 79.67132568359375, "learning_rate": 1.931879789000515e-05, "loss": 1.6001, "step": 4411 }, { "epoch": 14.465573770491803, "grad_norm": 15.464369773864746, "learning_rate": 1.9318412619671453e-05, "loss": 1.6157, "step": 4412 }, { "epoch": 14.468852459016393, "grad_norm": 17.644126892089844, "learning_rate": 1.9318027244262964e-05, "loss": 1.7751, "step": 4413 }, { "epoch": 14.472131147540983, "grad_norm": 9.682342529296875, "learning_rate": 1.9317641763784034e-05, "loss": 1.697, "step": 4414 }, { "epoch": 14.475409836065573, "grad_norm": 13.936300277709961, "learning_rate": 1.9317256178239e-05, "loss": 1.5791, "step": 4415 }, { "epoch": 14.478688524590163, "grad_norm": 10.910991668701172, "learning_rate": 1.931687048763222e-05, "loss": 1.7124, "step": 4416 }, { "epoch": 14.481967213114753, "grad_norm": 10.720891952514648, "learning_rate": 1.9316484691968035e-05, "loss": 1.769, "step": 4417 }, { "epoch": 14.485245901639344, "grad_norm": 11.015676498413086, "learning_rate": 1.93160987912508e-05, "loss": 1.6416, "step": 4418 }, { "epoch": 14.488524590163934, "grad_norm": 9.076830863952637, "learning_rate": 1.931571278548486e-05, "loss": 1.6584, "step": 4419 }, { "epoch": 14.491803278688524, "grad_norm": 14.225931167602539, "learning_rate": 1.931532667467458e-05, "loss": 1.6621, "step": 4420 }, { "epoch": 14.495081967213114, "grad_norm": 10.975401878356934, "learning_rate": 1.9314940458824303e-05, "loss": 1.6975, "step": 4421 }, { "epoch": 14.498360655737706, "grad_norm": 11.193243980407715, "learning_rate": 1.9314554137938388e-05, "loss": 1.4968, "step": 4422 }, { "epoch": 14.501639344262294, "grad_norm": 9.91901969909668, "learning_rate": 1.9314167712021186e-05, "loss": 1.5312, "step": 4423 }, { "epoch": 14.504918032786886, "grad_norm": 11.64225959777832, "learning_rate": 1.9313781181077066e-05, "loss": 1.627, "step": 4424 }, { "epoch": 14.508196721311476, "grad_norm": 12.864988327026367, "learning_rate": 1.9313394545110375e-05, "loss": 1.6589, "step": 4425 }, { "epoch": 14.511475409836066, "grad_norm": 10.71990966796875, "learning_rate": 1.9313007804125476e-05, "loss": 1.749, "step": 4426 }, { "epoch": 14.514754098360656, "grad_norm": 77.61092376708984, "learning_rate": 1.9312620958126733e-05, "loss": 1.6106, "step": 4427 }, { "epoch": 14.518032786885247, "grad_norm": 9.956932067871094, "learning_rate": 1.9312234007118503e-05, "loss": 1.575, "step": 4428 }, { "epoch": 14.521311475409837, "grad_norm": 9.086175918579102, "learning_rate": 1.9311846951105154e-05, "loss": 1.8174, "step": 4429 }, { "epoch": 14.524590163934427, "grad_norm": 7.9392619132995605, "learning_rate": 1.931145979009105e-05, "loss": 1.793, "step": 4430 }, { "epoch": 14.527868852459017, "grad_norm": 12.037826538085938, "learning_rate": 1.9311072524080554e-05, "loss": 1.7019, "step": 4431 }, { "epoch": 14.531147540983607, "grad_norm": 13.294632911682129, "learning_rate": 1.9310685153078034e-05, "loss": 1.9111, "step": 4432 }, { "epoch": 14.534426229508197, "grad_norm": 11.298171043395996, "learning_rate": 1.931029767708786e-05, "loss": 1.8013, "step": 4433 }, { "epoch": 14.537704918032787, "grad_norm": 16.361467361450195, "learning_rate": 1.93099100961144e-05, "loss": 1.7202, "step": 4434 }, { "epoch": 14.540983606557377, "grad_norm": 11.216754913330078, "learning_rate": 1.930952241016202e-05, "loss": 1.7136, "step": 4435 }, { "epoch": 14.544262295081968, "grad_norm": 9.276288032531738, "learning_rate": 1.9309134619235096e-05, "loss": 1.7329, "step": 4436 }, { "epoch": 14.547540983606558, "grad_norm": 18.86239242553711, "learning_rate": 1.9308746723338005e-05, "loss": 1.8223, "step": 4437 }, { "epoch": 14.550819672131148, "grad_norm": 7.655670642852783, "learning_rate": 1.930835872247511e-05, "loss": 1.8408, "step": 4438 }, { "epoch": 14.554098360655738, "grad_norm": 10.267099380493164, "learning_rate": 1.9307970616650794e-05, "loss": 1.8501, "step": 4439 }, { "epoch": 14.557377049180328, "grad_norm": 7.666110515594482, "learning_rate": 1.9307582405869435e-05, "loss": 1.5991, "step": 4440 }, { "epoch": 14.560655737704918, "grad_norm": 15.559757232666016, "learning_rate": 1.9307194090135402e-05, "loss": 1.9243, "step": 4441 }, { "epoch": 14.563934426229508, "grad_norm": 10.078330039978027, "learning_rate": 1.9306805669453083e-05, "loss": 1.7388, "step": 4442 }, { "epoch": 14.567213114754098, "grad_norm": 12.893906593322754, "learning_rate": 1.9306417143826854e-05, "loss": 1.6287, "step": 4443 }, { "epoch": 14.570491803278689, "grad_norm": 8.661218643188477, "learning_rate": 1.930602851326109e-05, "loss": 1.7075, "step": 4444 }, { "epoch": 14.573770491803279, "grad_norm": 9.09465503692627, "learning_rate": 1.930563977776018e-05, "loss": 1.6982, "step": 4445 }, { "epoch": 14.577049180327869, "grad_norm": 8.893826484680176, "learning_rate": 1.930525093732851e-05, "loss": 1.7002, "step": 4446 }, { "epoch": 14.580327868852459, "grad_norm": 10.067910194396973, "learning_rate": 1.9304861991970454e-05, "loss": 1.5723, "step": 4447 }, { "epoch": 14.583606557377049, "grad_norm": 12.141783714294434, "learning_rate": 1.9304472941690412e-05, "loss": 1.7849, "step": 4448 }, { "epoch": 14.58688524590164, "grad_norm": 10.364639282226562, "learning_rate": 1.930408378649276e-05, "loss": 1.7095, "step": 4449 }, { "epoch": 14.59016393442623, "grad_norm": 9.9079008102417, "learning_rate": 1.9303694526381886e-05, "loss": 1.7183, "step": 4450 }, { "epoch": 14.59344262295082, "grad_norm": 13.036176681518555, "learning_rate": 1.9303305161362188e-05, "loss": 1.8687, "step": 4451 }, { "epoch": 14.59672131147541, "grad_norm": 11.058475494384766, "learning_rate": 1.930291569143805e-05, "loss": 1.7285, "step": 4452 }, { "epoch": 14.6, "grad_norm": 13.285865783691406, "learning_rate": 1.9302526116613863e-05, "loss": 1.531, "step": 4453 }, { "epoch": 14.60327868852459, "grad_norm": 7.851236820220947, "learning_rate": 1.9302136436894022e-05, "loss": 1.6853, "step": 4454 }, { "epoch": 14.60655737704918, "grad_norm": 8.070691108703613, "learning_rate": 1.9301746652282923e-05, "loss": 1.6392, "step": 4455 }, { "epoch": 14.60983606557377, "grad_norm": 9.958656311035156, "learning_rate": 1.930135676278496e-05, "loss": 1.6589, "step": 4456 }, { "epoch": 14.61311475409836, "grad_norm": 8.746475219726562, "learning_rate": 1.9300966768404526e-05, "loss": 1.6167, "step": 4457 }, { "epoch": 14.61639344262295, "grad_norm": 11.333358764648438, "learning_rate": 1.930057666914602e-05, "loss": 1.6111, "step": 4458 }, { "epoch": 14.61967213114754, "grad_norm": 9.121363639831543, "learning_rate": 1.9300186465013845e-05, "loss": 1.6284, "step": 4459 }, { "epoch": 14.62295081967213, "grad_norm": 8.302974700927734, "learning_rate": 1.9299796156012397e-05, "loss": 1.5645, "step": 4460 }, { "epoch": 14.62622950819672, "grad_norm": 15.526643753051758, "learning_rate": 1.9299405742146078e-05, "loss": 1.6523, "step": 4461 }, { "epoch": 14.62950819672131, "grad_norm": 13.193619728088379, "learning_rate": 1.929901522341929e-05, "loss": 1.6338, "step": 4462 }, { "epoch": 14.6327868852459, "grad_norm": 8.393623352050781, "learning_rate": 1.9298624599836435e-05, "loss": 1.6245, "step": 4463 }, { "epoch": 14.636065573770491, "grad_norm": 8.686366081237793, "learning_rate": 1.929823387140192e-05, "loss": 1.7056, "step": 4464 }, { "epoch": 14.639344262295083, "grad_norm": 8.56516170501709, "learning_rate": 1.9297843038120153e-05, "loss": 1.6367, "step": 4465 }, { "epoch": 14.642622950819671, "grad_norm": 9.133932113647461, "learning_rate": 1.9297452099995534e-05, "loss": 1.6924, "step": 4466 }, { "epoch": 14.645901639344263, "grad_norm": 8.595069885253906, "learning_rate": 1.929706105703248e-05, "loss": 1.7539, "step": 4467 }, { "epoch": 14.649180327868853, "grad_norm": 8.5179443359375, "learning_rate": 1.9296669909235394e-05, "loss": 1.792, "step": 4468 }, { "epoch": 14.652459016393443, "grad_norm": 11.06851863861084, "learning_rate": 1.929627865660869e-05, "loss": 1.6309, "step": 4469 }, { "epoch": 14.655737704918034, "grad_norm": 19.130111694335938, "learning_rate": 1.9295887299156777e-05, "loss": 1.5991, "step": 4470 }, { "epoch": 14.659016393442624, "grad_norm": 7.948769569396973, "learning_rate": 1.929549583688407e-05, "loss": 1.8521, "step": 4471 }, { "epoch": 14.662295081967214, "grad_norm": 9.092312812805176, "learning_rate": 1.929510426979498e-05, "loss": 1.5576, "step": 4472 }, { "epoch": 14.665573770491804, "grad_norm": 9.299659729003906, "learning_rate": 1.9294712597893933e-05, "loss": 1.6675, "step": 4473 }, { "epoch": 14.668852459016394, "grad_norm": 6.530439376831055, "learning_rate": 1.9294320821185328e-05, "loss": 1.6394, "step": 4474 }, { "epoch": 14.672131147540984, "grad_norm": 8.725584983825684, "learning_rate": 1.92939289396736e-05, "loss": 1.625, "step": 4475 }, { "epoch": 14.675409836065574, "grad_norm": 9.814778327941895, "learning_rate": 1.929353695336315e-05, "loss": 1.6309, "step": 4476 }, { "epoch": 14.678688524590164, "grad_norm": 9.841285705566406, "learning_rate": 1.9293144862258416e-05, "loss": 1.6372, "step": 4477 }, { "epoch": 14.681967213114755, "grad_norm": 9.108449935913086, "learning_rate": 1.929275266636381e-05, "loss": 1.7061, "step": 4478 }, { "epoch": 14.685245901639345, "grad_norm": 9.09070110321045, "learning_rate": 1.9292360365683757e-05, "loss": 1.7534, "step": 4479 }, { "epoch": 14.688524590163935, "grad_norm": 7.759023189544678, "learning_rate": 1.9291967960222676e-05, "loss": 1.4965, "step": 4480 }, { "epoch": 14.691803278688525, "grad_norm": 7.561288356781006, "learning_rate": 1.9291575449984995e-05, "loss": 1.7666, "step": 4481 }, { "epoch": 14.695081967213115, "grad_norm": 11.307239532470703, "learning_rate": 1.929118283497514e-05, "loss": 1.7139, "step": 4482 }, { "epoch": 14.698360655737705, "grad_norm": 8.770818710327148, "learning_rate": 1.929079011519754e-05, "loss": 1.5291, "step": 4483 }, { "epoch": 14.701639344262295, "grad_norm": 11.585281372070312, "learning_rate": 1.929039729065662e-05, "loss": 1.6689, "step": 4484 }, { "epoch": 14.704918032786885, "grad_norm": 9.408661842346191, "learning_rate": 1.9290004361356813e-05, "loss": 1.6353, "step": 4485 }, { "epoch": 14.708196721311475, "grad_norm": 8.768043518066406, "learning_rate": 1.9289611327302546e-05, "loss": 1.8652, "step": 4486 }, { "epoch": 14.711475409836066, "grad_norm": 10.715054512023926, "learning_rate": 1.9289218188498252e-05, "loss": 1.7415, "step": 4487 }, { "epoch": 14.714754098360656, "grad_norm": 7.302587032318115, "learning_rate": 1.9288824944948367e-05, "loss": 1.9175, "step": 4488 }, { "epoch": 14.718032786885246, "grad_norm": 8.969926834106445, "learning_rate": 1.928843159665732e-05, "loss": 1.7412, "step": 4489 }, { "epoch": 14.721311475409836, "grad_norm": 8.193726539611816, "learning_rate": 1.9288038143629547e-05, "loss": 1.626, "step": 4490 }, { "epoch": 14.724590163934426, "grad_norm": 18.739946365356445, "learning_rate": 1.9287644585869493e-05, "loss": 1.5829, "step": 4491 }, { "epoch": 14.727868852459016, "grad_norm": 7.289856433868408, "learning_rate": 1.9287250923381587e-05, "loss": 1.7393, "step": 4492 }, { "epoch": 14.731147540983606, "grad_norm": 12.853858947753906, "learning_rate": 1.928685715617027e-05, "loss": 1.6206, "step": 4493 }, { "epoch": 14.734426229508196, "grad_norm": 10.05057430267334, "learning_rate": 1.928646328423998e-05, "loss": 1.7034, "step": 4494 }, { "epoch": 14.737704918032787, "grad_norm": 9.039133071899414, "learning_rate": 1.9286069307595166e-05, "loss": 1.4824, "step": 4495 }, { "epoch": 14.740983606557377, "grad_norm": 8.962372779846191, "learning_rate": 1.9285675226240263e-05, "loss": 1.6257, "step": 4496 }, { "epoch": 14.744262295081967, "grad_norm": 7.931955337524414, "learning_rate": 1.9285281040179717e-05, "loss": 1.5063, "step": 4497 }, { "epoch": 14.747540983606557, "grad_norm": 7.186501502990723, "learning_rate": 1.9284886749417974e-05, "loss": 1.7915, "step": 4498 }, { "epoch": 14.750819672131147, "grad_norm": 7.860422611236572, "learning_rate": 1.928449235395948e-05, "loss": 1.7114, "step": 4499 }, { "epoch": 14.754098360655737, "grad_norm": 9.462503433227539, "learning_rate": 1.9284097853808678e-05, "loss": 1.7366, "step": 4500 }, { "epoch": 14.757377049180327, "grad_norm": 9.554061889648438, "learning_rate": 1.9283703248970022e-05, "loss": 1.6245, "step": 4501 }, { "epoch": 14.760655737704917, "grad_norm": 8.76281452178955, "learning_rate": 1.928330853944796e-05, "loss": 1.6692, "step": 4502 }, { "epoch": 14.763934426229508, "grad_norm": 8.84903335571289, "learning_rate": 1.9282913725246937e-05, "loss": 1.667, "step": 4503 }, { "epoch": 14.767213114754098, "grad_norm": 11.791482925415039, "learning_rate": 1.928251880637141e-05, "loss": 1.5991, "step": 4504 }, { "epoch": 14.770491803278688, "grad_norm": 13.964775085449219, "learning_rate": 1.928212378282584e-05, "loss": 1.4739, "step": 4505 }, { "epoch": 14.773770491803278, "grad_norm": 9.759378433227539, "learning_rate": 1.9281728654614667e-05, "loss": 1.6982, "step": 4506 }, { "epoch": 14.777049180327868, "grad_norm": 8.713841438293457, "learning_rate": 1.9281333421742358e-05, "loss": 1.6843, "step": 4507 }, { "epoch": 14.780327868852458, "grad_norm": 8.1864595413208, "learning_rate": 1.9280938084213358e-05, "loss": 1.8125, "step": 4508 }, { "epoch": 14.783606557377048, "grad_norm": 14.63178825378418, "learning_rate": 1.9280542642032137e-05, "loss": 1.5698, "step": 4509 }, { "epoch": 14.78688524590164, "grad_norm": 8.383896827697754, "learning_rate": 1.9280147095203143e-05, "loss": 1.5452, "step": 4510 }, { "epoch": 14.790163934426229, "grad_norm": 8.183445930480957, "learning_rate": 1.9279751443730844e-05, "loss": 1.5103, "step": 4511 }, { "epoch": 14.79344262295082, "grad_norm": 9.297981262207031, "learning_rate": 1.92793556876197e-05, "loss": 1.5581, "step": 4512 }, { "epoch": 14.79672131147541, "grad_norm": 10.023659706115723, "learning_rate": 1.9278959826874174e-05, "loss": 1.6021, "step": 4513 }, { "epoch": 14.8, "grad_norm": 9.251008987426758, "learning_rate": 1.9278563861498726e-05, "loss": 1.5483, "step": 4514 }, { "epoch": 14.80327868852459, "grad_norm": 14.832979202270508, "learning_rate": 1.9278167791497823e-05, "loss": 1.5537, "step": 4515 }, { "epoch": 14.806557377049181, "grad_norm": 9.086749076843262, "learning_rate": 1.9277771616875933e-05, "loss": 1.5901, "step": 4516 }, { "epoch": 14.809836065573771, "grad_norm": 11.311331748962402, "learning_rate": 1.927737533763752e-05, "loss": 1.6604, "step": 4517 }, { "epoch": 14.813114754098361, "grad_norm": 8.149856567382812, "learning_rate": 1.9276978953787052e-05, "loss": 1.7556, "step": 4518 }, { "epoch": 14.816393442622951, "grad_norm": 9.825830459594727, "learning_rate": 1.9276582465329003e-05, "loss": 1.6157, "step": 4519 }, { "epoch": 14.819672131147541, "grad_norm": 7.305123329162598, "learning_rate": 1.927618587226784e-05, "loss": 1.7385, "step": 4520 }, { "epoch": 14.822950819672132, "grad_norm": 30.594043731689453, "learning_rate": 1.927578917460804e-05, "loss": 1.5249, "step": 4521 }, { "epoch": 14.826229508196722, "grad_norm": 9.91596508026123, "learning_rate": 1.927539237235407e-05, "loss": 1.6204, "step": 4522 }, { "epoch": 14.829508196721312, "grad_norm": 10.341180801391602, "learning_rate": 1.9274995465510406e-05, "loss": 1.6655, "step": 4523 }, { "epoch": 14.832786885245902, "grad_norm": 8.802774429321289, "learning_rate": 1.9274598454081527e-05, "loss": 1.6646, "step": 4524 }, { "epoch": 14.836065573770492, "grad_norm": 35.47300720214844, "learning_rate": 1.927420133807191e-05, "loss": 1.6938, "step": 4525 }, { "epoch": 14.839344262295082, "grad_norm": 9.302033424377441, "learning_rate": 1.9273804117486024e-05, "loss": 1.7571, "step": 4526 }, { "epoch": 14.842622950819672, "grad_norm": 8.037651062011719, "learning_rate": 1.9273406792328355e-05, "loss": 1.4414, "step": 4527 }, { "epoch": 14.845901639344262, "grad_norm": 8.112993240356445, "learning_rate": 1.9273009362603385e-05, "loss": 1.5439, "step": 4528 }, { "epoch": 14.849180327868853, "grad_norm": 6.920777320861816, "learning_rate": 1.9272611828315592e-05, "loss": 1.5598, "step": 4529 }, { "epoch": 14.852459016393443, "grad_norm": 19.981239318847656, "learning_rate": 1.927221418946946e-05, "loss": 1.6726, "step": 4530 }, { "epoch": 14.855737704918033, "grad_norm": 8.58700942993164, "learning_rate": 1.9271816446069472e-05, "loss": 1.8572, "step": 4531 }, { "epoch": 14.859016393442623, "grad_norm": 12.941055297851562, "learning_rate": 1.9271418598120114e-05, "loss": 1.6184, "step": 4532 }, { "epoch": 14.862295081967213, "grad_norm": 9.96876335144043, "learning_rate": 1.927102064562587e-05, "loss": 1.6272, "step": 4533 }, { "epoch": 14.865573770491803, "grad_norm": 8.902267456054688, "learning_rate": 1.927062258859123e-05, "loss": 1.6877, "step": 4534 }, { "epoch": 14.868852459016393, "grad_norm": 8.2984619140625, "learning_rate": 1.927022442702068e-05, "loss": 1.7134, "step": 4535 }, { "epoch": 14.872131147540983, "grad_norm": 6.58944034576416, "learning_rate": 1.9269826160918714e-05, "loss": 1.7512, "step": 4536 }, { "epoch": 14.875409836065574, "grad_norm": 10.857598304748535, "learning_rate": 1.9269427790289813e-05, "loss": 1.5935, "step": 4537 }, { "epoch": 14.878688524590164, "grad_norm": 9.409242630004883, "learning_rate": 1.9269029315138483e-05, "loss": 1.6025, "step": 4538 }, { "epoch": 14.881967213114754, "grad_norm": 7.73929500579834, "learning_rate": 1.9268630735469207e-05, "loss": 1.8364, "step": 4539 }, { "epoch": 14.885245901639344, "grad_norm": 8.087221145629883, "learning_rate": 1.9268232051286483e-05, "loss": 1.6289, "step": 4540 }, { "epoch": 14.888524590163934, "grad_norm": 8.53503704071045, "learning_rate": 1.9267833262594805e-05, "loss": 1.5093, "step": 4541 }, { "epoch": 14.891803278688524, "grad_norm": 12.436113357543945, "learning_rate": 1.9267434369398672e-05, "loss": 1.7046, "step": 4542 }, { "epoch": 14.895081967213114, "grad_norm": 7.170222282409668, "learning_rate": 1.926703537170258e-05, "loss": 1.5205, "step": 4543 }, { "epoch": 14.898360655737704, "grad_norm": 10.682517051696777, "learning_rate": 1.926663626951103e-05, "loss": 1.6096, "step": 4544 }, { "epoch": 14.901639344262295, "grad_norm": 7.443078994750977, "learning_rate": 1.926623706282852e-05, "loss": 1.8005, "step": 4545 }, { "epoch": 14.904918032786885, "grad_norm": 8.17945384979248, "learning_rate": 1.9265837751659554e-05, "loss": 1.5742, "step": 4546 }, { "epoch": 14.908196721311475, "grad_norm": 23.030487060546875, "learning_rate": 1.9265438336008633e-05, "loss": 1.6152, "step": 4547 }, { "epoch": 14.911475409836065, "grad_norm": 8.69357681274414, "learning_rate": 1.926503881588026e-05, "loss": 1.6572, "step": 4548 }, { "epoch": 14.914754098360655, "grad_norm": 9.422393798828125, "learning_rate": 1.9264639191278944e-05, "loss": 1.553, "step": 4549 }, { "epoch": 14.918032786885245, "grad_norm": 8.264982223510742, "learning_rate": 1.9264239462209187e-05, "loss": 1.8452, "step": 4550 }, { "epoch": 14.921311475409835, "grad_norm": 9.645124435424805, "learning_rate": 1.9263839628675497e-05, "loss": 1.4951, "step": 4551 }, { "epoch": 14.924590163934425, "grad_norm": 8.484789848327637, "learning_rate": 1.9263439690682384e-05, "loss": 1.731, "step": 4552 }, { "epoch": 14.927868852459017, "grad_norm": 10.777974128723145, "learning_rate": 1.9263039648234354e-05, "loss": 1.4321, "step": 4553 }, { "epoch": 14.931147540983606, "grad_norm": 7.712950706481934, "learning_rate": 1.9262639501335926e-05, "loss": 1.624, "step": 4554 }, { "epoch": 14.934426229508198, "grad_norm": 9.636205673217773, "learning_rate": 1.9262239249991603e-05, "loss": 1.3223, "step": 4555 }, { "epoch": 14.937704918032788, "grad_norm": 6.735140323638916, "learning_rate": 1.9261838894205903e-05, "loss": 1.5061, "step": 4556 }, { "epoch": 14.940983606557378, "grad_norm": 15.072264671325684, "learning_rate": 1.9261438433983337e-05, "loss": 1.6436, "step": 4557 }, { "epoch": 14.944262295081968, "grad_norm": 7.327390193939209, "learning_rate": 1.926103786932843e-05, "loss": 1.5947, "step": 4558 }, { "epoch": 14.947540983606558, "grad_norm": 7.897475242614746, "learning_rate": 1.9260637200245685e-05, "loss": 1.6021, "step": 4559 }, { "epoch": 14.950819672131148, "grad_norm": 6.405426502227783, "learning_rate": 1.9260236426739628e-05, "loss": 1.6924, "step": 4560 }, { "epoch": 14.954098360655738, "grad_norm": 9.788792610168457, "learning_rate": 1.925983554881478e-05, "loss": 1.718, "step": 4561 }, { "epoch": 14.957377049180328, "grad_norm": 10.343052864074707, "learning_rate": 1.9259434566475656e-05, "loss": 1.5593, "step": 4562 }, { "epoch": 14.960655737704919, "grad_norm": 6.974435806274414, "learning_rate": 1.9259033479726783e-05, "loss": 1.7148, "step": 4563 }, { "epoch": 14.963934426229509, "grad_norm": 8.325489044189453, "learning_rate": 1.9258632288572676e-05, "loss": 1.4651, "step": 4564 }, { "epoch": 14.967213114754099, "grad_norm": 11.486846923828125, "learning_rate": 1.9258230993017866e-05, "loss": 1.8555, "step": 4565 }, { "epoch": 14.970491803278689, "grad_norm": 8.891592979431152, "learning_rate": 1.9257829593066876e-05, "loss": 1.5862, "step": 4566 }, { "epoch": 14.973770491803279, "grad_norm": 11.4635648727417, "learning_rate": 1.925742808872423e-05, "loss": 1.6646, "step": 4567 }, { "epoch": 14.97704918032787, "grad_norm": 9.12259292602539, "learning_rate": 1.925702647999446e-05, "loss": 1.6577, "step": 4568 }, { "epoch": 14.98032786885246, "grad_norm": 9.729551315307617, "learning_rate": 1.925662476688209e-05, "loss": 1.7324, "step": 4569 }, { "epoch": 14.98360655737705, "grad_norm": 11.200374603271484, "learning_rate": 1.9256222949391648e-05, "loss": 1.7095, "step": 4570 }, { "epoch": 14.98688524590164, "grad_norm": 8.05246639251709, "learning_rate": 1.9255821027527673e-05, "loss": 1.6409, "step": 4571 }, { "epoch": 14.99016393442623, "grad_norm": 9.537615776062012, "learning_rate": 1.9255419001294687e-05, "loss": 1.5112, "step": 4572 }, { "epoch": 14.99344262295082, "grad_norm": 9.208584785461426, "learning_rate": 1.9255016870697233e-05, "loss": 1.4392, "step": 4573 }, { "epoch": 14.99672131147541, "grad_norm": 10.248440742492676, "learning_rate": 1.925461463573984e-05, "loss": 1.5134, "step": 4574 }, { "epoch": 15.0, "grad_norm": 7.131521701812744, "learning_rate": 1.9254212296427043e-05, "loss": 1.5115, "step": 4575 }, { "epoch": 15.00327868852459, "grad_norm": 10.061299324035645, "learning_rate": 1.9253809852763382e-05, "loss": 1.5808, "step": 4576 }, { "epoch": 15.00655737704918, "grad_norm": 7.319720268249512, "learning_rate": 1.9253407304753395e-05, "loss": 1.5479, "step": 4577 }, { "epoch": 15.00983606557377, "grad_norm": 13.26547908782959, "learning_rate": 1.9253004652401616e-05, "loss": 1.5491, "step": 4578 }, { "epoch": 15.01311475409836, "grad_norm": 7.749277114868164, "learning_rate": 1.925260189571259e-05, "loss": 1.5366, "step": 4579 }, { "epoch": 15.01639344262295, "grad_norm": 6.882004737854004, "learning_rate": 1.9252199034690857e-05, "loss": 1.4734, "step": 4580 }, { "epoch": 15.01967213114754, "grad_norm": 7.209424018859863, "learning_rate": 1.925179606934096e-05, "loss": 1.7607, "step": 4581 }, { "epoch": 15.02295081967213, "grad_norm": 14.38086223602295, "learning_rate": 1.9251392999667446e-05, "loss": 1.7004, "step": 4582 }, { "epoch": 15.026229508196721, "grad_norm": 10.02975845336914, "learning_rate": 1.925098982567485e-05, "loss": 1.6177, "step": 4583 }, { "epoch": 15.029508196721311, "grad_norm": 10.547574043273926, "learning_rate": 1.9250586547367734e-05, "loss": 1.6033, "step": 4584 }, { "epoch": 15.032786885245901, "grad_norm": 13.597888946533203, "learning_rate": 1.925018316475063e-05, "loss": 1.7021, "step": 4585 }, { "epoch": 15.036065573770491, "grad_norm": 9.456025123596191, "learning_rate": 1.9249779677828096e-05, "loss": 1.5793, "step": 4586 }, { "epoch": 15.039344262295081, "grad_norm": 7.635336875915527, "learning_rate": 1.9249376086604677e-05, "loss": 1.5153, "step": 4587 }, { "epoch": 15.042622950819672, "grad_norm": 7.88023042678833, "learning_rate": 1.9248972391084925e-05, "loss": 1.5671, "step": 4588 }, { "epoch": 15.045901639344262, "grad_norm": 24.374431610107422, "learning_rate": 1.9248568591273395e-05, "loss": 1.6238, "step": 4589 }, { "epoch": 15.049180327868852, "grad_norm": 10.692521095275879, "learning_rate": 1.9248164687174636e-05, "loss": 1.6675, "step": 4590 }, { "epoch": 15.052459016393442, "grad_norm": 8.027568817138672, "learning_rate": 1.9247760678793206e-05, "loss": 1.4275, "step": 4591 }, { "epoch": 15.055737704918032, "grad_norm": 8.154793739318848, "learning_rate": 1.9247356566133662e-05, "loss": 1.5684, "step": 4592 }, { "epoch": 15.059016393442622, "grad_norm": 7.191619396209717, "learning_rate": 1.9246952349200553e-05, "loss": 1.6904, "step": 4593 }, { "epoch": 15.062295081967212, "grad_norm": 11.048150062561035, "learning_rate": 1.9246548027998444e-05, "loss": 1.6301, "step": 4594 }, { "epoch": 15.065573770491802, "grad_norm": 8.33261489868164, "learning_rate": 1.924614360253189e-05, "loss": 1.4922, "step": 4595 }, { "epoch": 15.068852459016393, "grad_norm": 8.392163276672363, "learning_rate": 1.9245739072805458e-05, "loss": 1.6211, "step": 4596 }, { "epoch": 15.072131147540984, "grad_norm": 8.9502534866333, "learning_rate": 1.92453344388237e-05, "loss": 1.4888, "step": 4597 }, { "epoch": 15.075409836065575, "grad_norm": 8.182008743286133, "learning_rate": 1.9244929700591185e-05, "loss": 1.4475, "step": 4598 }, { "epoch": 15.078688524590165, "grad_norm": 9.442512512207031, "learning_rate": 1.9244524858112474e-05, "loss": 1.7441, "step": 4599 }, { "epoch": 15.081967213114755, "grad_norm": 8.799263954162598, "learning_rate": 1.9244119911392136e-05, "loss": 1.3599, "step": 4600 }, { "epoch": 15.085245901639345, "grad_norm": 7.3723978996276855, "learning_rate": 1.924371486043473e-05, "loss": 1.6179, "step": 4601 }, { "epoch": 15.088524590163935, "grad_norm": 8.790154457092285, "learning_rate": 1.9243309705244833e-05, "loss": 1.5286, "step": 4602 }, { "epoch": 15.091803278688525, "grad_norm": 7.5529680252075195, "learning_rate": 1.9242904445827006e-05, "loss": 1.8062, "step": 4603 }, { "epoch": 15.095081967213115, "grad_norm": 11.181670188903809, "learning_rate": 1.9242499082185823e-05, "loss": 1.46, "step": 4604 }, { "epoch": 15.098360655737705, "grad_norm": 6.755658149719238, "learning_rate": 1.9242093614325852e-05, "loss": 1.6833, "step": 4605 }, { "epoch": 15.101639344262296, "grad_norm": 6.700267791748047, "learning_rate": 1.9241688042251667e-05, "loss": 1.5774, "step": 4606 }, { "epoch": 15.104918032786886, "grad_norm": 8.844646453857422, "learning_rate": 1.9241282365967838e-05, "loss": 1.4877, "step": 4607 }, { "epoch": 15.108196721311476, "grad_norm": 8.342638969421387, "learning_rate": 1.9240876585478943e-05, "loss": 1.5723, "step": 4608 }, { "epoch": 15.111475409836066, "grad_norm": 7.2179036140441895, "learning_rate": 1.9240470700789556e-05, "loss": 1.7371, "step": 4609 }, { "epoch": 15.114754098360656, "grad_norm": 10.049314498901367, "learning_rate": 1.9240064711904254e-05, "loss": 1.5232, "step": 4610 }, { "epoch": 15.118032786885246, "grad_norm": 14.660560607910156, "learning_rate": 1.9239658618827617e-05, "loss": 1.5754, "step": 4611 }, { "epoch": 15.121311475409836, "grad_norm": 8.684211730957031, "learning_rate": 1.923925242156422e-05, "loss": 1.592, "step": 4612 }, { "epoch": 15.124590163934426, "grad_norm": 12.13690185546875, "learning_rate": 1.9238846120118646e-05, "loss": 1.5698, "step": 4613 }, { "epoch": 15.127868852459017, "grad_norm": 10.031571388244629, "learning_rate": 1.9238439714495478e-05, "loss": 1.5991, "step": 4614 }, { "epoch": 15.131147540983607, "grad_norm": 8.448850631713867, "learning_rate": 1.9238033204699293e-05, "loss": 1.6062, "step": 4615 }, { "epoch": 15.134426229508197, "grad_norm": 7.7097907066345215, "learning_rate": 1.9237626590734684e-05, "loss": 1.5002, "step": 4616 }, { "epoch": 15.137704918032787, "grad_norm": 14.746013641357422, "learning_rate": 1.9237219872606225e-05, "loss": 1.5295, "step": 4617 }, { "epoch": 15.140983606557377, "grad_norm": 10.318989753723145, "learning_rate": 1.923681305031851e-05, "loss": 1.5864, "step": 4618 }, { "epoch": 15.144262295081967, "grad_norm": 11.241744041442871, "learning_rate": 1.9236406123876126e-05, "loss": 1.364, "step": 4619 }, { "epoch": 15.147540983606557, "grad_norm": 11.109684944152832, "learning_rate": 1.9235999093283655e-05, "loss": 1.5713, "step": 4620 }, { "epoch": 15.150819672131147, "grad_norm": 6.965385913848877, "learning_rate": 1.9235591958545693e-05, "loss": 1.5054, "step": 4621 }, { "epoch": 15.154098360655738, "grad_norm": 11.861509323120117, "learning_rate": 1.923518471966683e-05, "loss": 1.5278, "step": 4622 }, { "epoch": 15.157377049180328, "grad_norm": 8.556235313415527, "learning_rate": 1.9234777376651656e-05, "loss": 1.6216, "step": 4623 }, { "epoch": 15.160655737704918, "grad_norm": 10.491715431213379, "learning_rate": 1.923436992950477e-05, "loss": 1.3704, "step": 4624 }, { "epoch": 15.163934426229508, "grad_norm": 9.743182182312012, "learning_rate": 1.9233962378230753e-05, "loss": 1.626, "step": 4625 }, { "epoch": 15.167213114754098, "grad_norm": 8.077900886535645, "learning_rate": 1.9233554722834213e-05, "loss": 1.5117, "step": 4626 }, { "epoch": 15.170491803278688, "grad_norm": 9.083403587341309, "learning_rate": 1.9233146963319746e-05, "loss": 1.5571, "step": 4627 }, { "epoch": 15.173770491803278, "grad_norm": 8.361519813537598, "learning_rate": 1.9232739099691942e-05, "loss": 1.5497, "step": 4628 }, { "epoch": 15.177049180327868, "grad_norm": 9.775463104248047, "learning_rate": 1.923233113195541e-05, "loss": 1.5269, "step": 4629 }, { "epoch": 15.180327868852459, "grad_norm": 7.235503673553467, "learning_rate": 1.9231923060114742e-05, "loss": 1.5522, "step": 4630 }, { "epoch": 15.183606557377049, "grad_norm": 8.256921768188477, "learning_rate": 1.9231514884174544e-05, "loss": 1.6592, "step": 4631 }, { "epoch": 15.186885245901639, "grad_norm": 11.731782913208008, "learning_rate": 1.923110660413942e-05, "loss": 1.345, "step": 4632 }, { "epoch": 15.190163934426229, "grad_norm": 8.73988151550293, "learning_rate": 1.923069822001397e-05, "loss": 1.6807, "step": 4633 }, { "epoch": 15.193442622950819, "grad_norm": 7.816914081573486, "learning_rate": 1.9230289731802796e-05, "loss": 1.6064, "step": 4634 }, { "epoch": 15.19672131147541, "grad_norm": 7.284728050231934, "learning_rate": 1.9229881139510512e-05, "loss": 1.8042, "step": 4635 }, { "epoch": 15.2, "grad_norm": 6.811606407165527, "learning_rate": 1.922947244314172e-05, "loss": 1.6113, "step": 4636 }, { "epoch": 15.20327868852459, "grad_norm": 7.972897052764893, "learning_rate": 1.922906364270103e-05, "loss": 1.5996, "step": 4637 }, { "epoch": 15.20655737704918, "grad_norm": 7.855531215667725, "learning_rate": 1.9228654738193047e-05, "loss": 1.4473, "step": 4638 }, { "epoch": 15.20983606557377, "grad_norm": 12.607911109924316, "learning_rate": 1.9228245729622393e-05, "loss": 1.5801, "step": 4639 }, { "epoch": 15.21311475409836, "grad_norm": 7.927937030792236, "learning_rate": 1.922783661699367e-05, "loss": 1.4348, "step": 4640 }, { "epoch": 15.216393442622952, "grad_norm": 8.069917678833008, "learning_rate": 1.9227427400311497e-05, "loss": 1.4189, "step": 4641 }, { "epoch": 15.219672131147542, "grad_norm": 7.097611427307129, "learning_rate": 1.9227018079580487e-05, "loss": 1.6025, "step": 4642 }, { "epoch": 15.222950819672132, "grad_norm": 9.925882339477539, "learning_rate": 1.922660865480525e-05, "loss": 1.6267, "step": 4643 }, { "epoch": 15.226229508196722, "grad_norm": 20.497787475585938, "learning_rate": 1.922619912599041e-05, "loss": 1.5566, "step": 4644 }, { "epoch": 15.229508196721312, "grad_norm": 9.059619903564453, "learning_rate": 1.922578949314058e-05, "loss": 1.5371, "step": 4645 }, { "epoch": 15.232786885245902, "grad_norm": 8.785792350769043, "learning_rate": 1.9225379756260383e-05, "loss": 1.542, "step": 4646 }, { "epoch": 15.236065573770492, "grad_norm": 6.064661979675293, "learning_rate": 1.922496991535444e-05, "loss": 1.5658, "step": 4647 }, { "epoch": 15.239344262295083, "grad_norm": 8.474519729614258, "learning_rate": 1.9224559970427362e-05, "loss": 1.5732, "step": 4648 }, { "epoch": 15.242622950819673, "grad_norm": 7.306848049163818, "learning_rate": 1.9224149921483784e-05, "loss": 1.5627, "step": 4649 }, { "epoch": 15.245901639344263, "grad_norm": 22.2325382232666, "learning_rate": 1.9223739768528328e-05, "loss": 1.6973, "step": 4650 }, { "epoch": 15.249180327868853, "grad_norm": 9.445137023925781, "learning_rate": 1.922332951156561e-05, "loss": 1.593, "step": 4651 }, { "epoch": 15.252459016393443, "grad_norm": 10.147903442382812, "learning_rate": 1.9222919150600266e-05, "loss": 1.509, "step": 4652 }, { "epoch": 15.255737704918033, "grad_norm": 7.946404933929443, "learning_rate": 1.9222508685636917e-05, "loss": 1.6218, "step": 4653 }, { "epoch": 15.259016393442623, "grad_norm": 14.008086204528809, "learning_rate": 1.9222098116680193e-05, "loss": 1.4561, "step": 4654 }, { "epoch": 15.262295081967213, "grad_norm": 8.29315185546875, "learning_rate": 1.9221687443734724e-05, "loss": 1.7698, "step": 4655 }, { "epoch": 15.265573770491804, "grad_norm": 13.430312156677246, "learning_rate": 1.9221276666805142e-05, "loss": 1.3474, "step": 4656 }, { "epoch": 15.268852459016394, "grad_norm": 8.669612884521484, "learning_rate": 1.922086578589608e-05, "loss": 1.5491, "step": 4657 }, { "epoch": 15.272131147540984, "grad_norm": 9.866212844848633, "learning_rate": 1.9220454801012164e-05, "loss": 1.6562, "step": 4658 }, { "epoch": 15.275409836065574, "grad_norm": 9.179747581481934, "learning_rate": 1.9220043712158038e-05, "loss": 1.5217, "step": 4659 }, { "epoch": 15.278688524590164, "grad_norm": 8.560012817382812, "learning_rate": 1.9219632519338332e-05, "loss": 1.5928, "step": 4660 }, { "epoch": 15.281967213114754, "grad_norm": 7.335213661193848, "learning_rate": 1.921922122255768e-05, "loss": 1.7017, "step": 4661 }, { "epoch": 15.285245901639344, "grad_norm": 7.870155334472656, "learning_rate": 1.9218809821820725e-05, "loss": 1.5864, "step": 4662 }, { "epoch": 15.288524590163934, "grad_norm": 11.891434669494629, "learning_rate": 1.9218398317132107e-05, "loss": 1.6455, "step": 4663 }, { "epoch": 15.291803278688525, "grad_norm": 7.2154622077941895, "learning_rate": 1.921798670849646e-05, "loss": 1.7009, "step": 4664 }, { "epoch": 15.295081967213115, "grad_norm": 78.65385437011719, "learning_rate": 1.9217574995918428e-05, "loss": 1.5383, "step": 4665 }, { "epoch": 15.298360655737705, "grad_norm": 9.051289558410645, "learning_rate": 1.9217163179402657e-05, "loss": 1.4916, "step": 4666 }, { "epoch": 15.301639344262295, "grad_norm": 7.172353744506836, "learning_rate": 1.9216751258953786e-05, "loss": 1.605, "step": 4667 }, { "epoch": 15.304918032786885, "grad_norm": 14.542760848999023, "learning_rate": 1.921633923457646e-05, "loss": 1.5015, "step": 4668 }, { "epoch": 15.308196721311475, "grad_norm": 6.770301818847656, "learning_rate": 1.921592710627533e-05, "loss": 1.5586, "step": 4669 }, { "epoch": 15.311475409836065, "grad_norm": 14.242073059082031, "learning_rate": 1.9215514874055037e-05, "loss": 1.4714, "step": 4670 }, { "epoch": 15.314754098360655, "grad_norm": 9.538846015930176, "learning_rate": 1.921510253792023e-05, "loss": 1.7456, "step": 4671 }, { "epoch": 15.318032786885245, "grad_norm": 7.529801368713379, "learning_rate": 1.9214690097875565e-05, "loss": 1.726, "step": 4672 }, { "epoch": 15.321311475409836, "grad_norm": 6.562735557556152, "learning_rate": 1.9214277553925687e-05, "loss": 1.8335, "step": 4673 }, { "epoch": 15.324590163934426, "grad_norm": 8.513991355895996, "learning_rate": 1.921386490607525e-05, "loss": 1.6357, "step": 4674 }, { "epoch": 15.327868852459016, "grad_norm": 9.347460746765137, "learning_rate": 1.9213452154328905e-05, "loss": 1.7524, "step": 4675 }, { "epoch": 15.331147540983606, "grad_norm": 13.983185768127441, "learning_rate": 1.9213039298691306e-05, "loss": 1.5859, "step": 4676 }, { "epoch": 15.334426229508196, "grad_norm": 12.691304206848145, "learning_rate": 1.9212626339167114e-05, "loss": 1.5811, "step": 4677 }, { "epoch": 15.337704918032786, "grad_norm": 10.695256233215332, "learning_rate": 1.9212213275760976e-05, "loss": 1.6501, "step": 4678 }, { "epoch": 15.340983606557376, "grad_norm": 8.308568000793457, "learning_rate": 1.921180010847756e-05, "loss": 1.4812, "step": 4679 }, { "epoch": 15.344262295081966, "grad_norm": 8.91743278503418, "learning_rate": 1.9211386837321517e-05, "loss": 1.5713, "step": 4680 }, { "epoch": 15.347540983606557, "grad_norm": 7.533475875854492, "learning_rate": 1.921097346229751e-05, "loss": 1.5066, "step": 4681 }, { "epoch": 15.350819672131147, "grad_norm": 12.64835262298584, "learning_rate": 1.9210559983410198e-05, "loss": 1.5461, "step": 4682 }, { "epoch": 15.354098360655737, "grad_norm": 7.773105144500732, "learning_rate": 1.921014640066425e-05, "loss": 1.3174, "step": 4683 }, { "epoch": 15.357377049180329, "grad_norm": 16.30866050720215, "learning_rate": 1.9209732714064324e-05, "loss": 1.4249, "step": 4684 }, { "epoch": 15.360655737704919, "grad_norm": 15.508742332458496, "learning_rate": 1.9209318923615085e-05, "loss": 1.5266, "step": 4685 }, { "epoch": 15.363934426229509, "grad_norm": 7.288305759429932, "learning_rate": 1.92089050293212e-05, "loss": 1.6624, "step": 4686 }, { "epoch": 15.3672131147541, "grad_norm": 7.032402038574219, "learning_rate": 1.9208491031187333e-05, "loss": 1.4089, "step": 4687 }, { "epoch": 15.37049180327869, "grad_norm": 8.085877418518066, "learning_rate": 1.920807692921816e-05, "loss": 1.4104, "step": 4688 }, { "epoch": 15.37377049180328, "grad_norm": 14.842211723327637, "learning_rate": 1.9207662723418343e-05, "loss": 1.5398, "step": 4689 }, { "epoch": 15.37704918032787, "grad_norm": 7.069154739379883, "learning_rate": 1.9207248413792555e-05, "loss": 1.4309, "step": 4690 }, { "epoch": 15.38032786885246, "grad_norm": 10.202994346618652, "learning_rate": 1.9206834000345468e-05, "loss": 1.3901, "step": 4691 }, { "epoch": 15.38360655737705, "grad_norm": 9.566800117492676, "learning_rate": 1.9206419483081757e-05, "loss": 1.3118, "step": 4692 }, { "epoch": 15.38688524590164, "grad_norm": 8.690803527832031, "learning_rate": 1.9206004862006094e-05, "loss": 1.226, "step": 4693 }, { "epoch": 15.39016393442623, "grad_norm": 7.615360736846924, "learning_rate": 1.9205590137123152e-05, "loss": 1.5923, "step": 4694 }, { "epoch": 15.39344262295082, "grad_norm": 9.140518188476562, "learning_rate": 1.9205175308437614e-05, "loss": 1.4636, "step": 4695 }, { "epoch": 15.39672131147541, "grad_norm": 8.42721939086914, "learning_rate": 1.9204760375954147e-05, "loss": 1.5613, "step": 4696 }, { "epoch": 15.4, "grad_norm": 7.701144695281982, "learning_rate": 1.9204345339677442e-05, "loss": 1.6047, "step": 4697 }, { "epoch": 15.40327868852459, "grad_norm": 8.064120292663574, "learning_rate": 1.920393019961217e-05, "loss": 1.301, "step": 4698 }, { "epoch": 15.40655737704918, "grad_norm": 11.469313621520996, "learning_rate": 1.9203514955763018e-05, "loss": 1.5583, "step": 4699 }, { "epoch": 15.40983606557377, "grad_norm": 11.045919418334961, "learning_rate": 1.9203099608134663e-05, "loss": 1.6714, "step": 4700 }, { "epoch": 15.41311475409836, "grad_norm": 9.380766868591309, "learning_rate": 1.9202684156731793e-05, "loss": 1.6476, "step": 4701 }, { "epoch": 15.416393442622951, "grad_norm": 9.519185066223145, "learning_rate": 1.920226860155909e-05, "loss": 1.5339, "step": 4702 }, { "epoch": 15.419672131147541, "grad_norm": 6.20302677154541, "learning_rate": 1.9201852942621237e-05, "loss": 1.7717, "step": 4703 }, { "epoch": 15.422950819672131, "grad_norm": 7.440988540649414, "learning_rate": 1.9201437179922927e-05, "loss": 1.4998, "step": 4704 }, { "epoch": 15.426229508196721, "grad_norm": 10.036214828491211, "learning_rate": 1.9201021313468847e-05, "loss": 1.6025, "step": 4705 }, { "epoch": 15.429508196721311, "grad_norm": 7.390755653381348, "learning_rate": 1.9200605343263684e-05, "loss": 1.532, "step": 4706 }, { "epoch": 15.432786885245902, "grad_norm": 18.68461036682129, "learning_rate": 1.9200189269312133e-05, "loss": 1.4631, "step": 4707 }, { "epoch": 15.436065573770492, "grad_norm": 7.474997520446777, "learning_rate": 1.919977309161888e-05, "loss": 1.8755, "step": 4708 }, { "epoch": 15.439344262295082, "grad_norm": 6.719567775726318, "learning_rate": 1.9199356810188617e-05, "loss": 1.6406, "step": 4709 }, { "epoch": 15.442622950819672, "grad_norm": 6.453608989715576, "learning_rate": 1.9198940425026045e-05, "loss": 1.522, "step": 4710 }, { "epoch": 15.445901639344262, "grad_norm": 7.987715244293213, "learning_rate": 1.9198523936135852e-05, "loss": 1.6514, "step": 4711 }, { "epoch": 15.449180327868852, "grad_norm": 8.11986255645752, "learning_rate": 1.9198107343522737e-05, "loss": 1.3574, "step": 4712 }, { "epoch": 15.452459016393442, "grad_norm": 9.177483558654785, "learning_rate": 1.9197690647191404e-05, "loss": 1.402, "step": 4713 }, { "epoch": 15.455737704918032, "grad_norm": 8.471920013427734, "learning_rate": 1.919727384714654e-05, "loss": 1.5186, "step": 4714 }, { "epoch": 15.459016393442623, "grad_norm": 7.1087470054626465, "learning_rate": 1.919685694339285e-05, "loss": 1.571, "step": 4715 }, { "epoch": 15.462295081967213, "grad_norm": 8.696074485778809, "learning_rate": 1.9196439935935043e-05, "loss": 1.8569, "step": 4716 }, { "epoch": 15.465573770491803, "grad_norm": 10.279294967651367, "learning_rate": 1.9196022824777808e-05, "loss": 1.6047, "step": 4717 }, { "epoch": 15.468852459016393, "grad_norm": 8.635638236999512, "learning_rate": 1.9195605609925857e-05, "loss": 1.6001, "step": 4718 }, { "epoch": 15.472131147540983, "grad_norm": 6.516118049621582, "learning_rate": 1.9195188291383887e-05, "loss": 1.6003, "step": 4719 }, { "epoch": 15.475409836065573, "grad_norm": 7.215735912322998, "learning_rate": 1.919477086915661e-05, "loss": 1.4961, "step": 4720 }, { "epoch": 15.478688524590163, "grad_norm": 8.300944328308105, "learning_rate": 1.9194353343248733e-05, "loss": 1.5747, "step": 4721 }, { "epoch": 15.481967213114753, "grad_norm": 8.975954055786133, "learning_rate": 1.9193935713664965e-05, "loss": 1.4839, "step": 4722 }, { "epoch": 15.485245901639344, "grad_norm": 6.953572750091553, "learning_rate": 1.919351798041001e-05, "loss": 1.4324, "step": 4723 }, { "epoch": 15.488524590163934, "grad_norm": 8.057967185974121, "learning_rate": 1.9193100143488578e-05, "loss": 1.5994, "step": 4724 }, { "epoch": 15.491803278688524, "grad_norm": 11.10765552520752, "learning_rate": 1.9192682202905385e-05, "loss": 1.5078, "step": 4725 }, { "epoch": 15.495081967213114, "grad_norm": 7.889439582824707, "learning_rate": 1.9192264158665145e-05, "loss": 1.4128, "step": 4726 }, { "epoch": 15.498360655737706, "grad_norm": 9.039682388305664, "learning_rate": 1.9191846010772566e-05, "loss": 1.5186, "step": 4727 }, { "epoch": 15.501639344262294, "grad_norm": 12.238734245300293, "learning_rate": 1.9191427759232366e-05, "loss": 1.6008, "step": 4728 }, { "epoch": 15.504918032786886, "grad_norm": 8.345104217529297, "learning_rate": 1.9191009404049262e-05, "loss": 1.4504, "step": 4729 }, { "epoch": 15.508196721311476, "grad_norm": 8.837240219116211, "learning_rate": 1.919059094522797e-05, "loss": 1.469, "step": 4730 }, { "epoch": 15.511475409836066, "grad_norm": 9.098609924316406, "learning_rate": 1.9190172382773215e-05, "loss": 1.6909, "step": 4731 }, { "epoch": 15.514754098360656, "grad_norm": 7.685744285583496, "learning_rate": 1.9189753716689707e-05, "loss": 1.4731, "step": 4732 }, { "epoch": 15.518032786885247, "grad_norm": 7.824804782867432, "learning_rate": 1.9189334946982167e-05, "loss": 1.6528, "step": 4733 }, { "epoch": 15.521311475409837, "grad_norm": 7.423306941986084, "learning_rate": 1.9188916073655324e-05, "loss": 1.4316, "step": 4734 }, { "epoch": 15.524590163934427, "grad_norm": 8.824625968933105, "learning_rate": 1.91884970967139e-05, "loss": 1.6953, "step": 4735 }, { "epoch": 15.527868852459017, "grad_norm": 8.968450546264648, "learning_rate": 1.9188078016162615e-05, "loss": 1.5991, "step": 4736 }, { "epoch": 15.531147540983607, "grad_norm": 15.539884567260742, "learning_rate": 1.91876588320062e-05, "loss": 1.4915, "step": 4737 }, { "epoch": 15.534426229508197, "grad_norm": 8.38217544555664, "learning_rate": 1.9187239544249373e-05, "loss": 1.281, "step": 4738 }, { "epoch": 15.537704918032787, "grad_norm": 7.498559951782227, "learning_rate": 1.9186820152896876e-05, "loss": 1.5736, "step": 4739 }, { "epoch": 15.540983606557377, "grad_norm": 5.674959182739258, "learning_rate": 1.9186400657953424e-05, "loss": 1.5225, "step": 4740 }, { "epoch": 15.544262295081968, "grad_norm": 9.14244270324707, "learning_rate": 1.9185981059423756e-05, "loss": 1.5422, "step": 4741 }, { "epoch": 15.547540983606558, "grad_norm": 7.497686862945557, "learning_rate": 1.91855613573126e-05, "loss": 1.3997, "step": 4742 }, { "epoch": 15.550819672131148, "grad_norm": 8.041558265686035, "learning_rate": 1.9185141551624687e-05, "loss": 1.6851, "step": 4743 }, { "epoch": 15.554098360655738, "grad_norm": 8.487998008728027, "learning_rate": 1.9184721642364753e-05, "loss": 1.5962, "step": 4744 }, { "epoch": 15.557377049180328, "grad_norm": 7.110544204711914, "learning_rate": 1.9184301629537537e-05, "loss": 1.7173, "step": 4745 }, { "epoch": 15.560655737704918, "grad_norm": 8.254914283752441, "learning_rate": 1.9183881513147768e-05, "loss": 1.417, "step": 4746 }, { "epoch": 15.563934426229508, "grad_norm": 7.5674967765808105, "learning_rate": 1.9183461293200184e-05, "loss": 1.4824, "step": 4747 }, { "epoch": 15.567213114754098, "grad_norm": 9.198649406433105, "learning_rate": 1.9183040969699525e-05, "loss": 1.6567, "step": 4748 }, { "epoch": 15.570491803278689, "grad_norm": 8.073134422302246, "learning_rate": 1.9182620542650536e-05, "loss": 1.6042, "step": 4749 }, { "epoch": 15.573770491803279, "grad_norm": 8.205108642578125, "learning_rate": 1.918220001205795e-05, "loss": 1.5498, "step": 4750 }, { "epoch": 15.577049180327869, "grad_norm": 7.495077610015869, "learning_rate": 1.9181779377926513e-05, "loss": 1.5664, "step": 4751 }, { "epoch": 15.580327868852459, "grad_norm": 6.934393882751465, "learning_rate": 1.9181358640260964e-05, "loss": 1.5337, "step": 4752 }, { "epoch": 15.583606557377049, "grad_norm": 8.272793769836426, "learning_rate": 1.9180937799066053e-05, "loss": 1.3933, "step": 4753 }, { "epoch": 15.58688524590164, "grad_norm": 8.759895324707031, "learning_rate": 1.9180516854346525e-05, "loss": 1.5088, "step": 4754 }, { "epoch": 15.59016393442623, "grad_norm": 9.170106887817383, "learning_rate": 1.918009580610712e-05, "loss": 1.5952, "step": 4755 }, { "epoch": 15.59344262295082, "grad_norm": 9.822393417358398, "learning_rate": 1.917967465435259e-05, "loss": 1.4491, "step": 4756 }, { "epoch": 15.59672131147541, "grad_norm": 7.398726463317871, "learning_rate": 1.9179253399087684e-05, "loss": 1.6562, "step": 4757 }, { "epoch": 15.6, "grad_norm": 8.434330940246582, "learning_rate": 1.9178832040317153e-05, "loss": 1.4977, "step": 4758 }, { "epoch": 15.60327868852459, "grad_norm": 7.757427215576172, "learning_rate": 1.9178410578045746e-05, "loss": 1.5652, "step": 4759 }, { "epoch": 15.60655737704918, "grad_norm": 6.334539413452148, "learning_rate": 1.917798901227822e-05, "loss": 1.6807, "step": 4760 }, { "epoch": 15.60983606557377, "grad_norm": 11.287483215332031, "learning_rate": 1.917756734301932e-05, "loss": 1.3926, "step": 4761 }, { "epoch": 15.61311475409836, "grad_norm": 7.45743465423584, "learning_rate": 1.9177145570273808e-05, "loss": 1.5642, "step": 4762 }, { "epoch": 15.61639344262295, "grad_norm": 8.131412506103516, "learning_rate": 1.917672369404644e-05, "loss": 1.6257, "step": 4763 }, { "epoch": 15.61967213114754, "grad_norm": 8.430331230163574, "learning_rate": 1.917630171434197e-05, "loss": 1.5647, "step": 4764 }, { "epoch": 15.62295081967213, "grad_norm": 8.556373596191406, "learning_rate": 1.9175879631165154e-05, "loss": 1.5508, "step": 4765 }, { "epoch": 15.62622950819672, "grad_norm": 7.6060099601745605, "learning_rate": 1.9175457444520758e-05, "loss": 1.6626, "step": 4766 }, { "epoch": 15.62950819672131, "grad_norm": 8.964372634887695, "learning_rate": 1.9175035154413538e-05, "loss": 1.4575, "step": 4767 }, { "epoch": 15.6327868852459, "grad_norm": 7.033228397369385, "learning_rate": 1.9174612760848257e-05, "loss": 1.6726, "step": 4768 }, { "epoch": 15.636065573770491, "grad_norm": 7.538151264190674, "learning_rate": 1.9174190263829678e-05, "loss": 1.595, "step": 4769 }, { "epoch": 15.639344262295083, "grad_norm": 7.762611389160156, "learning_rate": 1.9173767663362566e-05, "loss": 1.7495, "step": 4770 }, { "epoch": 15.642622950819671, "grad_norm": 15.018902778625488, "learning_rate": 1.9173344959451684e-05, "loss": 1.5977, "step": 4771 }, { "epoch": 15.645901639344263, "grad_norm": 7.301663875579834, "learning_rate": 1.9172922152101802e-05, "loss": 1.5006, "step": 4772 }, { "epoch": 15.649180327868853, "grad_norm": 6.419182777404785, "learning_rate": 1.917249924131768e-05, "loss": 1.5708, "step": 4773 }, { "epoch": 15.652459016393443, "grad_norm": 6.642709255218506, "learning_rate": 1.91720762271041e-05, "loss": 1.6226, "step": 4774 }, { "epoch": 15.655737704918034, "grad_norm": 7.199892044067383, "learning_rate": 1.9171653109465818e-05, "loss": 1.5808, "step": 4775 }, { "epoch": 15.659016393442624, "grad_norm": 6.682309627532959, "learning_rate": 1.917122988840761e-05, "loss": 1.6675, "step": 4776 }, { "epoch": 15.662295081967214, "grad_norm": 8.555474281311035, "learning_rate": 1.9170806563934254e-05, "loss": 1.7251, "step": 4777 }, { "epoch": 15.665573770491804, "grad_norm": 7.576327800750732, "learning_rate": 1.9170383136050515e-05, "loss": 1.4756, "step": 4778 }, { "epoch": 15.668852459016394, "grad_norm": 11.416282653808594, "learning_rate": 1.9169959604761174e-05, "loss": 1.6694, "step": 4779 }, { "epoch": 15.672131147540984, "grad_norm": 6.621777057647705, "learning_rate": 1.9169535970071004e-05, "loss": 1.6351, "step": 4780 }, { "epoch": 15.675409836065574, "grad_norm": 13.40967845916748, "learning_rate": 1.916911223198478e-05, "loss": 1.6392, "step": 4781 }, { "epoch": 15.678688524590164, "grad_norm": 7.296464920043945, "learning_rate": 1.9168688390507283e-05, "loss": 1.5122, "step": 4782 }, { "epoch": 15.681967213114755, "grad_norm": 6.248904705047607, "learning_rate": 1.916826444564329e-05, "loss": 1.7915, "step": 4783 }, { "epoch": 15.685245901639345, "grad_norm": 14.541531562805176, "learning_rate": 1.9167840397397585e-05, "loss": 1.5391, "step": 4784 }, { "epoch": 15.688524590163935, "grad_norm": 8.588814735412598, "learning_rate": 1.9167416245774947e-05, "loss": 1.4343, "step": 4785 }, { "epoch": 15.691803278688525, "grad_norm": 8.345013618469238, "learning_rate": 1.916699199078016e-05, "loss": 1.3656, "step": 4786 }, { "epoch": 15.695081967213115, "grad_norm": 9.354768753051758, "learning_rate": 1.9166567632418004e-05, "loss": 1.4873, "step": 4787 }, { "epoch": 15.698360655737705, "grad_norm": 13.18817138671875, "learning_rate": 1.916614317069327e-05, "loss": 1.4126, "step": 4788 }, { "epoch": 15.701639344262295, "grad_norm": 7.844508647918701, "learning_rate": 1.916571860561074e-05, "loss": 1.6545, "step": 4789 }, { "epoch": 15.704918032786885, "grad_norm": 8.265909194946289, "learning_rate": 1.91652939371752e-05, "loss": 1.3416, "step": 4790 }, { "epoch": 15.708196721311475, "grad_norm": 9.487588882446289, "learning_rate": 1.9164869165391445e-05, "loss": 1.4331, "step": 4791 }, { "epoch": 15.711475409836066, "grad_norm": 10.630083084106445, "learning_rate": 1.9164444290264262e-05, "loss": 1.8159, "step": 4792 }, { "epoch": 15.714754098360656, "grad_norm": 8.812634468078613, "learning_rate": 1.916401931179844e-05, "loss": 1.5635, "step": 4793 }, { "epoch": 15.718032786885246, "grad_norm": 11.455883979797363, "learning_rate": 1.916359422999877e-05, "loss": 1.5027, "step": 4794 }, { "epoch": 15.721311475409836, "grad_norm": 8.171388626098633, "learning_rate": 1.916316904487005e-05, "loss": 1.5088, "step": 4795 }, { "epoch": 15.724590163934426, "grad_norm": 9.823205947875977, "learning_rate": 1.9162743756417067e-05, "loss": 1.6738, "step": 4796 }, { "epoch": 15.727868852459016, "grad_norm": 10.204655647277832, "learning_rate": 1.9162318364644625e-05, "loss": 1.5591, "step": 4797 }, { "epoch": 15.731147540983606, "grad_norm": 8.559812545776367, "learning_rate": 1.916189286955752e-05, "loss": 1.4519, "step": 4798 }, { "epoch": 15.734426229508196, "grad_norm": 7.9518303871154785, "learning_rate": 1.916146727116054e-05, "loss": 1.3687, "step": 4799 }, { "epoch": 15.737704918032787, "grad_norm": 8.855232238769531, "learning_rate": 1.9161041569458496e-05, "loss": 1.4011, "step": 4800 }, { "epoch": 15.740983606557377, "grad_norm": 9.991811752319336, "learning_rate": 1.916061576445618e-05, "loss": 1.4797, "step": 4801 }, { "epoch": 15.744262295081967, "grad_norm": 7.167050838470459, "learning_rate": 1.91601898561584e-05, "loss": 1.5825, "step": 4802 }, { "epoch": 15.747540983606557, "grad_norm": 11.72457218170166, "learning_rate": 1.9159763844569953e-05, "loss": 1.4397, "step": 4803 }, { "epoch": 15.750819672131147, "grad_norm": 13.486937522888184, "learning_rate": 1.9159337729695647e-05, "loss": 1.5813, "step": 4804 }, { "epoch": 15.754098360655737, "grad_norm": 8.791569709777832, "learning_rate": 1.9158911511540284e-05, "loss": 1.5601, "step": 4805 }, { "epoch": 15.757377049180327, "grad_norm": 23.524198532104492, "learning_rate": 1.915848519010867e-05, "loss": 1.6182, "step": 4806 }, { "epoch": 15.760655737704917, "grad_norm": 7.127262115478516, "learning_rate": 1.9158058765405613e-05, "loss": 1.5815, "step": 4807 }, { "epoch": 15.763934426229508, "grad_norm": 8.554178237915039, "learning_rate": 1.915763223743592e-05, "loss": 1.5625, "step": 4808 }, { "epoch": 15.767213114754098, "grad_norm": 8.238202095031738, "learning_rate": 1.9157205606204405e-05, "loss": 1.421, "step": 4809 }, { "epoch": 15.770491803278688, "grad_norm": 9.935218811035156, "learning_rate": 1.9156778871715876e-05, "loss": 1.6211, "step": 4810 }, { "epoch": 15.773770491803278, "grad_norm": 7.933630466461182, "learning_rate": 1.915635203397514e-05, "loss": 1.6506, "step": 4811 }, { "epoch": 15.777049180327868, "grad_norm": 8.992918014526367, "learning_rate": 1.915592509298702e-05, "loss": 1.4966, "step": 4812 }, { "epoch": 15.780327868852458, "grad_norm": 7.966134548187256, "learning_rate": 1.915549804875632e-05, "loss": 1.5872, "step": 4813 }, { "epoch": 15.783606557377048, "grad_norm": 11.191150665283203, "learning_rate": 1.915507090128786e-05, "loss": 1.4277, "step": 4814 }, { "epoch": 15.78688524590164, "grad_norm": 8.070208549499512, "learning_rate": 1.9154643650586463e-05, "loss": 1.5278, "step": 4815 }, { "epoch": 15.790163934426229, "grad_norm": 9.755576133728027, "learning_rate": 1.9154216296656936e-05, "loss": 1.6675, "step": 4816 }, { "epoch": 15.79344262295082, "grad_norm": 9.593367576599121, "learning_rate": 1.91537888395041e-05, "loss": 1.75, "step": 4817 }, { "epoch": 15.79672131147541, "grad_norm": 7.610499382019043, "learning_rate": 1.9153361279132784e-05, "loss": 1.6948, "step": 4818 }, { "epoch": 15.8, "grad_norm": 13.251016616821289, "learning_rate": 1.91529336155478e-05, "loss": 1.6287, "step": 4819 }, { "epoch": 15.80327868852459, "grad_norm": 8.87399673461914, "learning_rate": 1.9152505848753966e-05, "loss": 1.5574, "step": 4820 }, { "epoch": 15.806557377049181, "grad_norm": 9.091657638549805, "learning_rate": 1.9152077978756118e-05, "loss": 1.3806, "step": 4821 }, { "epoch": 15.809836065573771, "grad_norm": 10.402572631835938, "learning_rate": 1.9151650005559075e-05, "loss": 1.5254, "step": 4822 }, { "epoch": 15.813114754098361, "grad_norm": 10.10389518737793, "learning_rate": 1.9151221929167663e-05, "loss": 1.3918, "step": 4823 }, { "epoch": 15.816393442622951, "grad_norm": 7.637540817260742, "learning_rate": 1.915079374958671e-05, "loss": 1.5068, "step": 4824 }, { "epoch": 15.819672131147541, "grad_norm": 8.155203819274902, "learning_rate": 1.915036546682104e-05, "loss": 1.6531, "step": 4825 }, { "epoch": 15.822950819672132, "grad_norm": 10.905927658081055, "learning_rate": 1.9149937080875487e-05, "loss": 1.5933, "step": 4826 }, { "epoch": 15.826229508196722, "grad_norm": 10.053316116333008, "learning_rate": 1.9149508591754878e-05, "loss": 1.5913, "step": 4827 }, { "epoch": 15.829508196721312, "grad_norm": 8.04518985748291, "learning_rate": 1.914907999946405e-05, "loss": 1.5718, "step": 4828 }, { "epoch": 15.832786885245902, "grad_norm": 11.037951469421387, "learning_rate": 1.914865130400783e-05, "loss": 1.6294, "step": 4829 }, { "epoch": 15.836065573770492, "grad_norm": 7.0547027587890625, "learning_rate": 1.9148222505391055e-05, "loss": 1.6514, "step": 4830 }, { "epoch": 15.839344262295082, "grad_norm": 6.990579128265381, "learning_rate": 1.914779360361856e-05, "loss": 1.637, "step": 4831 }, { "epoch": 15.842622950819672, "grad_norm": 9.322208404541016, "learning_rate": 1.9147364598695176e-05, "loss": 1.5352, "step": 4832 }, { "epoch": 15.845901639344262, "grad_norm": 10.771870613098145, "learning_rate": 1.9146935490625753e-05, "loss": 1.7139, "step": 4833 }, { "epoch": 15.849180327868853, "grad_norm": 8.357375144958496, "learning_rate": 1.914650627941512e-05, "loss": 1.3594, "step": 4834 }, { "epoch": 15.852459016393443, "grad_norm": 14.931387901306152, "learning_rate": 1.9146076965068114e-05, "loss": 1.6292, "step": 4835 }, { "epoch": 15.855737704918033, "grad_norm": 6.027149200439453, "learning_rate": 1.9145647547589586e-05, "loss": 1.738, "step": 4836 }, { "epoch": 15.859016393442623, "grad_norm": 8.87999439239502, "learning_rate": 1.914521802698437e-05, "loss": 1.6367, "step": 4837 }, { "epoch": 15.862295081967213, "grad_norm": 10.872405052185059, "learning_rate": 1.914478840325731e-05, "loss": 1.6646, "step": 4838 }, { "epoch": 15.865573770491803, "grad_norm": 9.93960189819336, "learning_rate": 1.914435867641326e-05, "loss": 1.4829, "step": 4839 }, { "epoch": 15.868852459016393, "grad_norm": 8.387932777404785, "learning_rate": 1.914392884645705e-05, "loss": 1.4736, "step": 4840 }, { "epoch": 15.872131147540983, "grad_norm": 10.684578895568848, "learning_rate": 1.9143498913393535e-05, "loss": 1.3713, "step": 4841 }, { "epoch": 15.875409836065574, "grad_norm": 7.049987316131592, "learning_rate": 1.914306887722757e-05, "loss": 1.4751, "step": 4842 }, { "epoch": 15.878688524590164, "grad_norm": 8.646108627319336, "learning_rate": 1.9142638737963994e-05, "loss": 1.8003, "step": 4843 }, { "epoch": 15.881967213114754, "grad_norm": 10.054579734802246, "learning_rate": 1.9142208495607658e-05, "loss": 1.3547, "step": 4844 }, { "epoch": 15.885245901639344, "grad_norm": 8.615581512451172, "learning_rate": 1.9141778150163417e-05, "loss": 1.6421, "step": 4845 }, { "epoch": 15.888524590163934, "grad_norm": 10.359512329101562, "learning_rate": 1.914134770163612e-05, "loss": 1.2855, "step": 4846 }, { "epoch": 15.891803278688524, "grad_norm": 8.035500526428223, "learning_rate": 1.9140917150030628e-05, "loss": 1.5564, "step": 4847 }, { "epoch": 15.895081967213114, "grad_norm": 8.331493377685547, "learning_rate": 1.914048649535179e-05, "loss": 1.6028, "step": 4848 }, { "epoch": 15.898360655737704, "grad_norm": 8.826908111572266, "learning_rate": 1.914005573760446e-05, "loss": 1.698, "step": 4849 }, { "epoch": 15.901639344262295, "grad_norm": 9.350910186767578, "learning_rate": 1.91396248767935e-05, "loss": 1.6704, "step": 4850 }, { "epoch": 15.904918032786885, "grad_norm": 7.541502475738525, "learning_rate": 1.9139193912923764e-05, "loss": 1.4751, "step": 4851 }, { "epoch": 15.908196721311475, "grad_norm": 9.583415985107422, "learning_rate": 1.913876284600012e-05, "loss": 1.6294, "step": 4852 }, { "epoch": 15.911475409836065, "grad_norm": 8.02721881866455, "learning_rate": 1.9138331676027418e-05, "loss": 1.6345, "step": 4853 }, { "epoch": 15.914754098360655, "grad_norm": 7.803502559661865, "learning_rate": 1.9137900403010526e-05, "loss": 1.6184, "step": 4854 }, { "epoch": 15.918032786885245, "grad_norm": 7.585573196411133, "learning_rate": 1.9137469026954306e-05, "loss": 1.4314, "step": 4855 }, { "epoch": 15.921311475409835, "grad_norm": 9.289884567260742, "learning_rate": 1.9137037547863625e-05, "loss": 1.6235, "step": 4856 }, { "epoch": 15.924590163934425, "grad_norm": 8.571759223937988, "learning_rate": 1.9136605965743344e-05, "loss": 1.6646, "step": 4857 }, { "epoch": 15.927868852459017, "grad_norm": 8.02662467956543, "learning_rate": 1.9136174280598326e-05, "loss": 1.5264, "step": 4858 }, { "epoch": 15.931147540983606, "grad_norm": 9.687170028686523, "learning_rate": 1.9135742492433448e-05, "loss": 1.386, "step": 4859 }, { "epoch": 15.934426229508198, "grad_norm": 8.19593334197998, "learning_rate": 1.9135310601253575e-05, "loss": 1.5796, "step": 4860 }, { "epoch": 15.937704918032788, "grad_norm": 7.705286979675293, "learning_rate": 1.913487860706358e-05, "loss": 1.666, "step": 4861 }, { "epoch": 15.940983606557378, "grad_norm": 7.791813373565674, "learning_rate": 1.913444650986832e-05, "loss": 1.4871, "step": 4862 }, { "epoch": 15.944262295081968, "grad_norm": 11.186238288879395, "learning_rate": 1.913401430967269e-05, "loss": 1.7515, "step": 4863 }, { "epoch": 15.947540983606558, "grad_norm": 6.120187759399414, "learning_rate": 1.9133582006481547e-05, "loss": 1.4968, "step": 4864 }, { "epoch": 15.950819672131148, "grad_norm": 7.922364234924316, "learning_rate": 1.913314960029977e-05, "loss": 1.6978, "step": 4865 }, { "epoch": 15.954098360655738, "grad_norm": 8.049272537231445, "learning_rate": 1.9132717091132235e-05, "loss": 1.4548, "step": 4866 }, { "epoch": 15.957377049180328, "grad_norm": 7.004309177398682, "learning_rate": 1.913228447898382e-05, "loss": 1.8452, "step": 4867 }, { "epoch": 15.960655737704919, "grad_norm": 8.645654678344727, "learning_rate": 1.9131851763859402e-05, "loss": 1.5347, "step": 4868 }, { "epoch": 15.963934426229509, "grad_norm": 8.39875602722168, "learning_rate": 1.913141894576386e-05, "loss": 1.6479, "step": 4869 }, { "epoch": 15.967213114754099, "grad_norm": 17.052610397338867, "learning_rate": 1.9130986024702075e-05, "loss": 1.5215, "step": 4870 }, { "epoch": 15.970491803278689, "grad_norm": 7.839423179626465, "learning_rate": 1.9130553000678928e-05, "loss": 1.593, "step": 4871 }, { "epoch": 15.973770491803279, "grad_norm": 7.258364677429199, "learning_rate": 1.9130119873699303e-05, "loss": 1.6074, "step": 4872 }, { "epoch": 15.97704918032787, "grad_norm": 6.1590776443481445, "learning_rate": 1.9129686643768085e-05, "loss": 1.7185, "step": 4873 }, { "epoch": 15.98032786885246, "grad_norm": 6.4073662757873535, "learning_rate": 1.9129253310890156e-05, "loss": 1.6311, "step": 4874 }, { "epoch": 15.98360655737705, "grad_norm": 11.9574613571167, "learning_rate": 1.9128819875070406e-05, "loss": 1.688, "step": 4875 }, { "epoch": 15.98688524590164, "grad_norm": 8.107562065124512, "learning_rate": 1.9128386336313717e-05, "loss": 1.5537, "step": 4876 }, { "epoch": 15.99016393442623, "grad_norm": 9.74001693725586, "learning_rate": 1.9127952694624983e-05, "loss": 1.5103, "step": 4877 }, { "epoch": 15.99344262295082, "grad_norm": 7.736787796020508, "learning_rate": 1.9127518950009093e-05, "loss": 1.7305, "step": 4878 }, { "epoch": 15.99672131147541, "grad_norm": 10.380775451660156, "learning_rate": 1.9127085102470933e-05, "loss": 1.6299, "step": 4879 }, { "epoch": 16.0, "grad_norm": 9.395980834960938, "learning_rate": 1.9126651152015404e-05, "loss": 1.6416, "step": 4880 }, { "epoch": 16.003278688524592, "grad_norm": 7.4245524406433105, "learning_rate": 1.9126217098647388e-05, "loss": 1.3879, "step": 4881 }, { "epoch": 16.00655737704918, "grad_norm": 8.36386775970459, "learning_rate": 1.912578294237179e-05, "loss": 1.6604, "step": 4882 }, { "epoch": 16.009836065573772, "grad_norm": 7.891048908233643, "learning_rate": 1.9125348683193498e-05, "loss": 1.438, "step": 4883 }, { "epoch": 16.01311475409836, "grad_norm": 7.237792015075684, "learning_rate": 1.9124914321117413e-05, "loss": 1.4037, "step": 4884 }, { "epoch": 16.016393442622952, "grad_norm": 8.758609771728516, "learning_rate": 1.912447985614843e-05, "loss": 1.5234, "step": 4885 }, { "epoch": 16.01967213114754, "grad_norm": 8.56706714630127, "learning_rate": 1.912404528829145e-05, "loss": 1.4646, "step": 4886 }, { "epoch": 16.022950819672133, "grad_norm": 10.439284324645996, "learning_rate": 1.9123610617551374e-05, "loss": 1.6255, "step": 4887 }, { "epoch": 16.02622950819672, "grad_norm": 8.51159381866455, "learning_rate": 1.9123175843933103e-05, "loss": 1.4821, "step": 4888 }, { "epoch": 16.029508196721313, "grad_norm": 10.444500923156738, "learning_rate": 1.912274096744154e-05, "loss": 1.3918, "step": 4889 }, { "epoch": 16.0327868852459, "grad_norm": 7.817781925201416, "learning_rate": 1.9122305988081584e-05, "loss": 1.5237, "step": 4890 }, { "epoch": 16.036065573770493, "grad_norm": 9.213235855102539, "learning_rate": 1.912187090585814e-05, "loss": 1.582, "step": 4891 }, { "epoch": 16.03934426229508, "grad_norm": 7.769673824310303, "learning_rate": 1.9121435720776122e-05, "loss": 1.5156, "step": 4892 }, { "epoch": 16.042622950819673, "grad_norm": 6.315924644470215, "learning_rate": 1.912100043284043e-05, "loss": 1.457, "step": 4893 }, { "epoch": 16.04590163934426, "grad_norm": 8.735722541809082, "learning_rate": 1.9120565042055977e-05, "loss": 1.4004, "step": 4894 }, { "epoch": 16.049180327868854, "grad_norm": 7.155642986297607, "learning_rate": 1.912012954842767e-05, "loss": 1.5818, "step": 4895 }, { "epoch": 16.052459016393442, "grad_norm": 11.922045707702637, "learning_rate": 1.9119693951960417e-05, "loss": 1.4111, "step": 4896 }, { "epoch": 16.055737704918034, "grad_norm": 14.298126220703125, "learning_rate": 1.9119258252659134e-05, "loss": 1.3704, "step": 4897 }, { "epoch": 16.059016393442622, "grad_norm": 7.3794474601745605, "learning_rate": 1.9118822450528735e-05, "loss": 1.4204, "step": 4898 }, { "epoch": 16.062295081967214, "grad_norm": 8.549210548400879, "learning_rate": 1.911838654557413e-05, "loss": 1.5518, "step": 4899 }, { "epoch": 16.065573770491802, "grad_norm": 6.138871669769287, "learning_rate": 1.9117950537800235e-05, "loss": 1.4932, "step": 4900 }, { "epoch": 16.068852459016394, "grad_norm": 7.374150276184082, "learning_rate": 1.9117514427211966e-05, "loss": 1.5376, "step": 4901 }, { "epoch": 16.072131147540983, "grad_norm": 7.733357906341553, "learning_rate": 1.9117078213814243e-05, "loss": 1.4026, "step": 4902 }, { "epoch": 16.075409836065575, "grad_norm": 7.869483947753906, "learning_rate": 1.9116641897611986e-05, "loss": 1.5166, "step": 4903 }, { "epoch": 16.078688524590163, "grad_norm": 9.218618392944336, "learning_rate": 1.911620547861011e-05, "loss": 1.4194, "step": 4904 }, { "epoch": 16.081967213114755, "grad_norm": 7.598816871643066, "learning_rate": 1.911576895681354e-05, "loss": 1.4893, "step": 4905 }, { "epoch": 16.085245901639343, "grad_norm": 9.248200416564941, "learning_rate": 1.91153323322272e-05, "loss": 1.2771, "step": 4906 }, { "epoch": 16.088524590163935, "grad_norm": 8.322046279907227, "learning_rate": 1.9114895604856004e-05, "loss": 1.6201, "step": 4907 }, { "epoch": 16.091803278688523, "grad_norm": 7.393403053283691, "learning_rate": 1.911445877470489e-05, "loss": 1.5237, "step": 4908 }, { "epoch": 16.095081967213115, "grad_norm": 13.765730857849121, "learning_rate": 1.911402184177877e-05, "loss": 1.5271, "step": 4909 }, { "epoch": 16.098360655737704, "grad_norm": 7.855867862701416, "learning_rate": 1.9113584806082583e-05, "loss": 1.1948, "step": 4910 }, { "epoch": 16.101639344262296, "grad_norm": 6.613121509552002, "learning_rate": 1.9113147667621246e-05, "loss": 1.5952, "step": 4911 }, { "epoch": 16.104918032786884, "grad_norm": 7.627673149108887, "learning_rate": 1.9112710426399697e-05, "loss": 1.4236, "step": 4912 }, { "epoch": 16.108196721311476, "grad_norm": 8.149751663208008, "learning_rate": 1.9112273082422865e-05, "loss": 1.4049, "step": 4913 }, { "epoch": 16.111475409836064, "grad_norm": 7.626618385314941, "learning_rate": 1.9111835635695674e-05, "loss": 1.3397, "step": 4914 }, { "epoch": 16.114754098360656, "grad_norm": 8.621652603149414, "learning_rate": 1.911139808622307e-05, "loss": 1.5122, "step": 4915 }, { "epoch": 16.118032786885244, "grad_norm": 6.584492206573486, "learning_rate": 1.9110960434009973e-05, "loss": 1.5073, "step": 4916 }, { "epoch": 16.121311475409836, "grad_norm": 8.844049453735352, "learning_rate": 1.9110522679061326e-05, "loss": 1.4346, "step": 4917 }, { "epoch": 16.124590163934425, "grad_norm": 6.737952709197998, "learning_rate": 1.9110084821382064e-05, "loss": 1.6399, "step": 4918 }, { "epoch": 16.127868852459017, "grad_norm": 7.751693248748779, "learning_rate": 1.9109646860977122e-05, "loss": 1.5454, "step": 4919 }, { "epoch": 16.131147540983605, "grad_norm": 8.596935272216797, "learning_rate": 1.9109208797851438e-05, "loss": 1.5386, "step": 4920 }, { "epoch": 16.134426229508197, "grad_norm": 13.650845527648926, "learning_rate": 1.9108770632009958e-05, "loss": 1.604, "step": 4921 }, { "epoch": 16.137704918032785, "grad_norm": 8.343558311462402, "learning_rate": 1.9108332363457618e-05, "loss": 1.6357, "step": 4922 }, { "epoch": 16.140983606557377, "grad_norm": 7.6076436042785645, "learning_rate": 1.910789399219936e-05, "loss": 1.4668, "step": 4923 }, { "epoch": 16.14426229508197, "grad_norm": 8.687705993652344, "learning_rate": 1.9107455518240124e-05, "loss": 1.5037, "step": 4924 }, { "epoch": 16.147540983606557, "grad_norm": 7.487062454223633, "learning_rate": 1.910701694158486e-05, "loss": 1.4851, "step": 4925 }, { "epoch": 16.15081967213115, "grad_norm": 5.88943338394165, "learning_rate": 1.9106578262238515e-05, "loss": 1.7666, "step": 4926 }, { "epoch": 16.154098360655738, "grad_norm": 7.256438255310059, "learning_rate": 1.9106139480206024e-05, "loss": 1.5547, "step": 4927 }, { "epoch": 16.15737704918033, "grad_norm": 7.232799053192139, "learning_rate": 1.9105700595492347e-05, "loss": 1.429, "step": 4928 }, { "epoch": 16.160655737704918, "grad_norm": 6.8252339363098145, "learning_rate": 1.9105261608102427e-05, "loss": 1.4236, "step": 4929 }, { "epoch": 16.16393442622951, "grad_norm": 20.924551010131836, "learning_rate": 1.9104822518041218e-05, "loss": 1.3528, "step": 4930 }, { "epoch": 16.167213114754098, "grad_norm": 9.218170166015625, "learning_rate": 1.9104383325313662e-05, "loss": 1.4226, "step": 4931 }, { "epoch": 16.17049180327869, "grad_norm": 7.111834526062012, "learning_rate": 1.9103944029924724e-05, "loss": 1.5615, "step": 4932 }, { "epoch": 16.17377049180328, "grad_norm": 10.478367805480957, "learning_rate": 1.910350463187935e-05, "loss": 1.4095, "step": 4933 }, { "epoch": 16.17704918032787, "grad_norm": 18.32309913635254, "learning_rate": 1.9103065131182494e-05, "loss": 1.5549, "step": 4934 }, { "epoch": 16.18032786885246, "grad_norm": 8.961275100708008, "learning_rate": 1.9102625527839116e-05, "loss": 1.3921, "step": 4935 }, { "epoch": 16.18360655737705, "grad_norm": 6.904242992401123, "learning_rate": 1.910218582185417e-05, "loss": 1.3574, "step": 4936 }, { "epoch": 16.18688524590164, "grad_norm": 7.382119178771973, "learning_rate": 1.9101746013232613e-05, "loss": 1.5415, "step": 4937 }, { "epoch": 16.19016393442623, "grad_norm": 8.38260555267334, "learning_rate": 1.910130610197941e-05, "loss": 1.6608, "step": 4938 }, { "epoch": 16.19344262295082, "grad_norm": 8.77680492401123, "learning_rate": 1.9100866088099513e-05, "loss": 1.4282, "step": 4939 }, { "epoch": 16.19672131147541, "grad_norm": 8.601003646850586, "learning_rate": 1.9100425971597893e-05, "loss": 1.3413, "step": 4940 }, { "epoch": 16.2, "grad_norm": 11.895405769348145, "learning_rate": 1.9099985752479505e-05, "loss": 1.4895, "step": 4941 }, { "epoch": 16.20327868852459, "grad_norm": 6.952282428741455, "learning_rate": 1.9099545430749317e-05, "loss": 1.4752, "step": 4942 }, { "epoch": 16.20655737704918, "grad_norm": 7.966655254364014, "learning_rate": 1.9099105006412295e-05, "loss": 1.2881, "step": 4943 }, { "epoch": 16.20983606557377, "grad_norm": 13.631542205810547, "learning_rate": 1.90986644794734e-05, "loss": 1.6509, "step": 4944 }, { "epoch": 16.21311475409836, "grad_norm": 8.634750366210938, "learning_rate": 1.909822384993761e-05, "loss": 1.5527, "step": 4945 }, { "epoch": 16.21639344262295, "grad_norm": 9.176822662353516, "learning_rate": 1.9097783117809874e-05, "loss": 1.323, "step": 4946 }, { "epoch": 16.21967213114754, "grad_norm": 8.738844871520996, "learning_rate": 1.9097342283095185e-05, "loss": 1.3894, "step": 4947 }, { "epoch": 16.222950819672132, "grad_norm": 9.840709686279297, "learning_rate": 1.9096901345798496e-05, "loss": 1.5083, "step": 4948 }, { "epoch": 16.22622950819672, "grad_norm": 7.4121551513671875, "learning_rate": 1.909646030592479e-05, "loss": 1.3732, "step": 4949 }, { "epoch": 16.229508196721312, "grad_norm": 9.928022384643555, "learning_rate": 1.9096019163479033e-05, "loss": 1.5356, "step": 4950 }, { "epoch": 16.2327868852459, "grad_norm": 9.119237899780273, "learning_rate": 1.9095577918466205e-05, "loss": 1.4929, "step": 4951 }, { "epoch": 16.236065573770492, "grad_norm": 12.415729522705078, "learning_rate": 1.9095136570891277e-05, "loss": 1.6185, "step": 4952 }, { "epoch": 16.23934426229508, "grad_norm": 7.733429431915283, "learning_rate": 1.909469512075923e-05, "loss": 1.5974, "step": 4953 }, { "epoch": 16.242622950819673, "grad_norm": 6.928485870361328, "learning_rate": 1.909425356807504e-05, "loss": 1.5056, "step": 4954 }, { "epoch": 16.24590163934426, "grad_norm": 8.05333423614502, "learning_rate": 1.9093811912843684e-05, "loss": 1.4878, "step": 4955 }, { "epoch": 16.249180327868853, "grad_norm": 8.26458740234375, "learning_rate": 1.909337015507014e-05, "loss": 1.5156, "step": 4956 }, { "epoch": 16.25245901639344, "grad_norm": 6.533052921295166, "learning_rate": 1.9092928294759397e-05, "loss": 1.3833, "step": 4957 }, { "epoch": 16.255737704918033, "grad_norm": 7.564152717590332, "learning_rate": 1.9092486331916432e-05, "loss": 1.5771, "step": 4958 }, { "epoch": 16.25901639344262, "grad_norm": 9.500889778137207, "learning_rate": 1.909204426654623e-05, "loss": 1.489, "step": 4959 }, { "epoch": 16.262295081967213, "grad_norm": 11.246335983276367, "learning_rate": 1.9091602098653776e-05, "loss": 1.634, "step": 4960 }, { "epoch": 16.2655737704918, "grad_norm": 7.70950984954834, "learning_rate": 1.9091159828244055e-05, "loss": 1.6003, "step": 4961 }, { "epoch": 16.268852459016394, "grad_norm": 9.67523193359375, "learning_rate": 1.9090717455322052e-05, "loss": 1.5864, "step": 4962 }, { "epoch": 16.272131147540982, "grad_norm": 27.390710830688477, "learning_rate": 1.909027497989276e-05, "loss": 1.5801, "step": 4963 }, { "epoch": 16.275409836065574, "grad_norm": 12.847710609436035, "learning_rate": 1.9089832401961165e-05, "loss": 1.3398, "step": 4964 }, { "epoch": 16.278688524590162, "grad_norm": 10.941567420959473, "learning_rate": 1.908938972153226e-05, "loss": 1.3887, "step": 4965 }, { "epoch": 16.281967213114754, "grad_norm": 7.536496162414551, "learning_rate": 1.9088946938611034e-05, "loss": 1.302, "step": 4966 }, { "epoch": 16.285245901639342, "grad_norm": 6.91977596282959, "learning_rate": 1.9088504053202485e-05, "loss": 1.6384, "step": 4967 }, { "epoch": 16.288524590163934, "grad_norm": 6.473534107208252, "learning_rate": 1.9088061065311598e-05, "loss": 1.5811, "step": 4968 }, { "epoch": 16.291803278688526, "grad_norm": 7.313913822174072, "learning_rate": 1.9087617974943376e-05, "loss": 1.4854, "step": 4969 }, { "epoch": 16.295081967213115, "grad_norm": 8.679726600646973, "learning_rate": 1.9087174782102815e-05, "loss": 1.4978, "step": 4970 }, { "epoch": 16.298360655737707, "grad_norm": 6.776386260986328, "learning_rate": 1.9086731486794906e-05, "loss": 1.6289, "step": 4971 }, { "epoch": 16.301639344262295, "grad_norm": 6.583308696746826, "learning_rate": 1.908628808902465e-05, "loss": 1.6111, "step": 4972 }, { "epoch": 16.304918032786887, "grad_norm": 7.711925029754639, "learning_rate": 1.9085844588797054e-05, "loss": 1.3232, "step": 4973 }, { "epoch": 16.308196721311475, "grad_norm": 13.543785095214844, "learning_rate": 1.9085400986117108e-05, "loss": 1.4316, "step": 4974 }, { "epoch": 16.311475409836067, "grad_norm": 65.42059326171875, "learning_rate": 1.9084957280989824e-05, "loss": 1.5518, "step": 4975 }, { "epoch": 16.314754098360655, "grad_norm": 11.035801887512207, "learning_rate": 1.9084513473420198e-05, "loss": 1.665, "step": 4976 }, { "epoch": 16.318032786885247, "grad_norm": 8.724404335021973, "learning_rate": 1.9084069563413236e-05, "loss": 1.5703, "step": 4977 }, { "epoch": 16.321311475409836, "grad_norm": 9.03919792175293, "learning_rate": 1.908362555097395e-05, "loss": 1.4272, "step": 4978 }, { "epoch": 16.324590163934428, "grad_norm": 8.450577735900879, "learning_rate": 1.9083181436107336e-05, "loss": 1.498, "step": 4979 }, { "epoch": 16.327868852459016, "grad_norm": 11.537009239196777, "learning_rate": 1.908273721881841e-05, "loss": 1.3179, "step": 4980 }, { "epoch": 16.331147540983608, "grad_norm": 9.012798309326172, "learning_rate": 1.9082292899112175e-05, "loss": 1.6687, "step": 4981 }, { "epoch": 16.334426229508196, "grad_norm": 10.003662109375, "learning_rate": 1.9081848476993647e-05, "loss": 1.5232, "step": 4982 }, { "epoch": 16.337704918032788, "grad_norm": 10.232065200805664, "learning_rate": 1.9081403952467836e-05, "loss": 1.481, "step": 4983 }, { "epoch": 16.340983606557376, "grad_norm": 7.351672172546387, "learning_rate": 1.9080959325539746e-05, "loss": 1.6948, "step": 4984 }, { "epoch": 16.34426229508197, "grad_norm": 9.252422332763672, "learning_rate": 1.9080514596214406e-05, "loss": 1.3481, "step": 4985 }, { "epoch": 16.347540983606557, "grad_norm": 6.332942485809326, "learning_rate": 1.908006976449682e-05, "loss": 1.3911, "step": 4986 }, { "epoch": 16.35081967213115, "grad_norm": 6.898162841796875, "learning_rate": 1.9079624830392004e-05, "loss": 1.4529, "step": 4987 }, { "epoch": 16.354098360655737, "grad_norm": 10.254878997802734, "learning_rate": 1.9079179793904982e-05, "loss": 1.5066, "step": 4988 }, { "epoch": 16.35737704918033, "grad_norm": 7.867844104766846, "learning_rate": 1.9078734655040763e-05, "loss": 1.4512, "step": 4989 }, { "epoch": 16.360655737704917, "grad_norm": 15.44710922241211, "learning_rate": 1.9078289413804373e-05, "loss": 1.4773, "step": 4990 }, { "epoch": 16.36393442622951, "grad_norm": 10.084695816040039, "learning_rate": 1.907784407020083e-05, "loss": 1.4609, "step": 4991 }, { "epoch": 16.367213114754097, "grad_norm": 10.253591537475586, "learning_rate": 1.907739862423516e-05, "loss": 1.2356, "step": 4992 }, { "epoch": 16.37049180327869, "grad_norm": 8.849788665771484, "learning_rate": 1.907695307591238e-05, "loss": 1.5735, "step": 4993 }, { "epoch": 16.373770491803278, "grad_norm": 7.027200222015381, "learning_rate": 1.9076507425237518e-05, "loss": 1.3829, "step": 4994 }, { "epoch": 16.37704918032787, "grad_norm": 8.764825820922852, "learning_rate": 1.9076061672215595e-05, "loss": 1.4641, "step": 4995 }, { "epoch": 16.380327868852458, "grad_norm": 7.7662200927734375, "learning_rate": 1.907561581685164e-05, "loss": 1.5488, "step": 4996 }, { "epoch": 16.38360655737705, "grad_norm": 9.780139923095703, "learning_rate": 1.907516985915068e-05, "loss": 1.4702, "step": 4997 }, { "epoch": 16.386885245901638, "grad_norm": 8.39199447631836, "learning_rate": 1.907472379911775e-05, "loss": 1.4934, "step": 4998 }, { "epoch": 16.39016393442623, "grad_norm": 6.542855739593506, "learning_rate": 1.9074277636757867e-05, "loss": 1.5576, "step": 4999 }, { "epoch": 16.39344262295082, "grad_norm": 6.721310138702393, "learning_rate": 1.907383137207607e-05, "loss": 1.5674, "step": 5000 }, { "epoch": 16.39672131147541, "grad_norm": 8.000901222229004, "learning_rate": 1.907338500507739e-05, "loss": 1.5176, "step": 5001 }, { "epoch": 16.4, "grad_norm": 9.843482971191406, "learning_rate": 1.9072938535766864e-05, "loss": 1.5784, "step": 5002 }, { "epoch": 16.40327868852459, "grad_norm": 8.925456047058105, "learning_rate": 1.907249196414952e-05, "loss": 1.5559, "step": 5003 }, { "epoch": 16.40655737704918, "grad_norm": 19.650426864624023, "learning_rate": 1.9072045290230394e-05, "loss": 1.4993, "step": 5004 }, { "epoch": 16.40983606557377, "grad_norm": 13.315231323242188, "learning_rate": 1.9071598514014527e-05, "loss": 1.4896, "step": 5005 }, { "epoch": 16.41311475409836, "grad_norm": 7.680421829223633, "learning_rate": 1.9071151635506954e-05, "loss": 1.7742, "step": 5006 }, { "epoch": 16.41639344262295, "grad_norm": 8.28125, "learning_rate": 1.9070704654712715e-05, "loss": 1.6079, "step": 5007 }, { "epoch": 16.41967213114754, "grad_norm": 6.675710678100586, "learning_rate": 1.9070257571636852e-05, "loss": 1.4758, "step": 5008 }, { "epoch": 16.42295081967213, "grad_norm": 23.437082290649414, "learning_rate": 1.90698103862844e-05, "loss": 1.3669, "step": 5009 }, { "epoch": 16.42622950819672, "grad_norm": 10.657157897949219, "learning_rate": 1.906936309866041e-05, "loss": 1.377, "step": 5010 }, { "epoch": 16.42950819672131, "grad_norm": 6.693853378295898, "learning_rate": 1.9068915708769917e-05, "loss": 1.6414, "step": 5011 }, { "epoch": 16.432786885245903, "grad_norm": 9.56287956237793, "learning_rate": 1.906846821661797e-05, "loss": 1.3242, "step": 5012 }, { "epoch": 16.43606557377049, "grad_norm": 9.408652305603027, "learning_rate": 1.9068020622209616e-05, "loss": 1.4495, "step": 5013 }, { "epoch": 16.439344262295084, "grad_norm": 12.349905967712402, "learning_rate": 1.9067572925549903e-05, "loss": 1.5557, "step": 5014 }, { "epoch": 16.442622950819672, "grad_norm": 15.42333984375, "learning_rate": 1.9067125126643878e-05, "loss": 1.4961, "step": 5015 }, { "epoch": 16.445901639344264, "grad_norm": 11.85336971282959, "learning_rate": 1.9066677225496588e-05, "loss": 1.4036, "step": 5016 }, { "epoch": 16.449180327868852, "grad_norm": 8.992764472961426, "learning_rate": 1.9066229222113086e-05, "loss": 1.4309, "step": 5017 }, { "epoch": 16.452459016393444, "grad_norm": 10.776933670043945, "learning_rate": 1.9065781116498422e-05, "loss": 1.4331, "step": 5018 }, { "epoch": 16.455737704918032, "grad_norm": 13.806550979614258, "learning_rate": 1.906533290865765e-05, "loss": 1.4944, "step": 5019 }, { "epoch": 16.459016393442624, "grad_norm": 9.655937194824219, "learning_rate": 1.9064884598595826e-05, "loss": 1.396, "step": 5020 }, { "epoch": 16.462295081967213, "grad_norm": 9.88607120513916, "learning_rate": 1.9064436186318e-05, "loss": 1.4524, "step": 5021 }, { "epoch": 16.465573770491805, "grad_norm": 16.58027458190918, "learning_rate": 1.9063987671829234e-05, "loss": 1.5283, "step": 5022 }, { "epoch": 16.468852459016393, "grad_norm": 8.521886825561523, "learning_rate": 1.906353905513458e-05, "loss": 1.3892, "step": 5023 }, { "epoch": 16.472131147540985, "grad_norm": 9.010225296020508, "learning_rate": 1.90630903362391e-05, "loss": 1.4966, "step": 5024 }, { "epoch": 16.475409836065573, "grad_norm": 9.696020126342773, "learning_rate": 1.9062641515147856e-05, "loss": 1.4495, "step": 5025 }, { "epoch": 16.478688524590165, "grad_norm": 12.022340774536133, "learning_rate": 1.90621925918659e-05, "loss": 1.4639, "step": 5026 }, { "epoch": 16.481967213114753, "grad_norm": 9.769861221313477, "learning_rate": 1.9061743566398306e-05, "loss": 1.5703, "step": 5027 }, { "epoch": 16.485245901639345, "grad_norm": 9.260519981384277, "learning_rate": 1.906129443875013e-05, "loss": 1.3059, "step": 5028 }, { "epoch": 16.488524590163934, "grad_norm": 11.02465534210205, "learning_rate": 1.9060845208926438e-05, "loss": 1.4204, "step": 5029 }, { "epoch": 16.491803278688526, "grad_norm": 28.91628646850586, "learning_rate": 1.9060395876932296e-05, "loss": 1.5085, "step": 5030 }, { "epoch": 16.495081967213114, "grad_norm": 9.137979507446289, "learning_rate": 1.9059946442772768e-05, "loss": 1.554, "step": 5031 }, { "epoch": 16.498360655737706, "grad_norm": 10.228318214416504, "learning_rate": 1.9059496906452927e-05, "loss": 1.5552, "step": 5032 }, { "epoch": 16.501639344262294, "grad_norm": 11.097414016723633, "learning_rate": 1.9059047267977836e-05, "loss": 1.5762, "step": 5033 }, { "epoch": 16.504918032786886, "grad_norm": 8.99489688873291, "learning_rate": 1.9058597527352568e-05, "loss": 1.6089, "step": 5034 }, { "epoch": 16.508196721311474, "grad_norm": 8.004324913024902, "learning_rate": 1.9058147684582194e-05, "loss": 1.7026, "step": 5035 }, { "epoch": 16.511475409836066, "grad_norm": 7.607657432556152, "learning_rate": 1.9057697739671786e-05, "loss": 1.4066, "step": 5036 }, { "epoch": 16.514754098360655, "grad_norm": 10.892800331115723, "learning_rate": 1.9057247692626423e-05, "loss": 1.3623, "step": 5037 }, { "epoch": 16.518032786885247, "grad_norm": 13.020956993103027, "learning_rate": 1.9056797543451174e-05, "loss": 1.647, "step": 5038 }, { "epoch": 16.521311475409835, "grad_norm": 9.775672912597656, "learning_rate": 1.905634729215111e-05, "loss": 1.5605, "step": 5039 }, { "epoch": 16.524590163934427, "grad_norm": 12.565170288085938, "learning_rate": 1.905589693873132e-05, "loss": 1.3647, "step": 5040 }, { "epoch": 16.527868852459015, "grad_norm": 7.6862053871154785, "learning_rate": 1.9055446483196877e-05, "loss": 1.6953, "step": 5041 }, { "epoch": 16.531147540983607, "grad_norm": 7.982120037078857, "learning_rate": 1.9054995925552857e-05, "loss": 1.5125, "step": 5042 }, { "epoch": 16.534426229508195, "grad_norm": 9.451335906982422, "learning_rate": 1.905454526580434e-05, "loss": 1.5724, "step": 5043 }, { "epoch": 16.537704918032787, "grad_norm": 9.659408569335938, "learning_rate": 1.9054094503956416e-05, "loss": 1.6431, "step": 5044 }, { "epoch": 16.540983606557376, "grad_norm": 8.459065437316895, "learning_rate": 1.9053643640014162e-05, "loss": 1.4797, "step": 5045 }, { "epoch": 16.544262295081968, "grad_norm": 10.295465469360352, "learning_rate": 1.905319267398266e-05, "loss": 1.4136, "step": 5046 }, { "epoch": 16.547540983606556, "grad_norm": 9.936111450195312, "learning_rate": 1.9052741605867e-05, "loss": 1.448, "step": 5047 }, { "epoch": 16.550819672131148, "grad_norm": 11.260162353515625, "learning_rate": 1.9052290435672264e-05, "loss": 1.5835, "step": 5048 }, { "epoch": 16.554098360655736, "grad_norm": 8.497773170471191, "learning_rate": 1.905183916340354e-05, "loss": 1.4277, "step": 5049 }, { "epoch": 16.557377049180328, "grad_norm": 9.132645606994629, "learning_rate": 1.9051387789065922e-05, "loss": 1.26, "step": 5050 }, { "epoch": 16.560655737704916, "grad_norm": 9.216017723083496, "learning_rate": 1.9050936312664492e-05, "loss": 1.5808, "step": 5051 }, { "epoch": 16.56393442622951, "grad_norm": 15.98417854309082, "learning_rate": 1.9050484734204347e-05, "loss": 1.5796, "step": 5052 }, { "epoch": 16.567213114754097, "grad_norm": 7.050910472869873, "learning_rate": 1.9050033053690575e-05, "loss": 1.5745, "step": 5053 }, { "epoch": 16.57049180327869, "grad_norm": 12.389750480651855, "learning_rate": 1.904958127112827e-05, "loss": 1.5076, "step": 5054 }, { "epoch": 16.57377049180328, "grad_norm": 9.352683067321777, "learning_rate": 1.904912938652253e-05, "loss": 1.5117, "step": 5055 }, { "epoch": 16.57704918032787, "grad_norm": 9.480854034423828, "learning_rate": 1.9048677399878444e-05, "loss": 1.4822, "step": 5056 }, { "epoch": 16.58032786885246, "grad_norm": 8.294007301330566, "learning_rate": 1.9048225311201114e-05, "loss": 1.5457, "step": 5057 }, { "epoch": 16.58360655737705, "grad_norm": 10.692105293273926, "learning_rate": 1.9047773120495634e-05, "loss": 1.2241, "step": 5058 }, { "epoch": 16.58688524590164, "grad_norm": 8.254308700561523, "learning_rate": 1.9047320827767106e-05, "loss": 1.5654, "step": 5059 }, { "epoch": 16.59016393442623, "grad_norm": 8.800142288208008, "learning_rate": 1.904686843302063e-05, "loss": 1.446, "step": 5060 }, { "epoch": 16.59344262295082, "grad_norm": 8.648344993591309, "learning_rate": 1.9046415936261304e-05, "loss": 1.5803, "step": 5061 }, { "epoch": 16.59672131147541, "grad_norm": 8.594194412231445, "learning_rate": 1.9045963337494232e-05, "loss": 1.5344, "step": 5062 }, { "epoch": 16.6, "grad_norm": 8.39549732208252, "learning_rate": 1.904551063672452e-05, "loss": 1.4146, "step": 5063 }, { "epoch": 16.60327868852459, "grad_norm": 9.157149314880371, "learning_rate": 1.904505783395727e-05, "loss": 1.5164, "step": 5064 }, { "epoch": 16.60655737704918, "grad_norm": 8.422932624816895, "learning_rate": 1.9044604929197588e-05, "loss": 1.4932, "step": 5065 }, { "epoch": 16.60983606557377, "grad_norm": 10.348286628723145, "learning_rate": 1.904415192245058e-05, "loss": 1.6213, "step": 5066 }, { "epoch": 16.613114754098362, "grad_norm": 11.416069984436035, "learning_rate": 1.9043698813721357e-05, "loss": 1.4019, "step": 5067 }, { "epoch": 16.61639344262295, "grad_norm": 16.967546463012695, "learning_rate": 1.904324560301503e-05, "loss": 1.4738, "step": 5068 }, { "epoch": 16.619672131147542, "grad_norm": 23.323362350463867, "learning_rate": 1.9042792290336702e-05, "loss": 1.6079, "step": 5069 }, { "epoch": 16.62295081967213, "grad_norm": 8.285826683044434, "learning_rate": 1.904233887569149e-05, "loss": 1.6636, "step": 5070 }, { "epoch": 16.626229508196722, "grad_norm": 8.556781768798828, "learning_rate": 1.9041885359084507e-05, "loss": 1.5564, "step": 5071 }, { "epoch": 16.62950819672131, "grad_norm": 10.332049369812012, "learning_rate": 1.904143174052086e-05, "loss": 1.5825, "step": 5072 }, { "epoch": 16.632786885245903, "grad_norm": 11.677776336669922, "learning_rate": 1.9040978020005677e-05, "loss": 1.2981, "step": 5073 }, { "epoch": 16.63606557377049, "grad_norm": 7.775506019592285, "learning_rate": 1.9040524197544062e-05, "loss": 1.6226, "step": 5074 }, { "epoch": 16.639344262295083, "grad_norm": 9.499494552612305, "learning_rate": 1.9040070273141138e-05, "loss": 1.5813, "step": 5075 }, { "epoch": 16.64262295081967, "grad_norm": 8.564533233642578, "learning_rate": 1.9039616246802023e-05, "loss": 1.1775, "step": 5076 }, { "epoch": 16.645901639344263, "grad_norm": 7.037639617919922, "learning_rate": 1.9039162118531834e-05, "loss": 1.6982, "step": 5077 }, { "epoch": 16.64918032786885, "grad_norm": 9.248964309692383, "learning_rate": 1.9038707888335694e-05, "loss": 1.5234, "step": 5078 }, { "epoch": 16.652459016393443, "grad_norm": 10.172974586486816, "learning_rate": 1.9038253556218725e-05, "loss": 1.4858, "step": 5079 }, { "epoch": 16.65573770491803, "grad_norm": 9.355697631835938, "learning_rate": 1.903779912218605e-05, "loss": 1.5895, "step": 5080 }, { "epoch": 16.659016393442624, "grad_norm": 9.160849571228027, "learning_rate": 1.903734458624279e-05, "loss": 1.5486, "step": 5081 }, { "epoch": 16.662295081967212, "grad_norm": 12.684041976928711, "learning_rate": 1.9036889948394077e-05, "loss": 1.4209, "step": 5082 }, { "epoch": 16.665573770491804, "grad_norm": 9.286162376403809, "learning_rate": 1.9036435208645034e-05, "loss": 1.5493, "step": 5083 }, { "epoch": 16.668852459016392, "grad_norm": 9.250655174255371, "learning_rate": 1.903598036700079e-05, "loss": 1.8115, "step": 5084 }, { "epoch": 16.672131147540984, "grad_norm": 10.206916809082031, "learning_rate": 1.9035525423466468e-05, "loss": 1.4944, "step": 5085 }, { "epoch": 16.675409836065572, "grad_norm": 8.342544555664062, "learning_rate": 1.9035070378047204e-05, "loss": 1.5884, "step": 5086 }, { "epoch": 16.678688524590164, "grad_norm": 8.219439506530762, "learning_rate": 1.903461523074813e-05, "loss": 1.499, "step": 5087 }, { "epoch": 16.681967213114753, "grad_norm": 8.236669540405273, "learning_rate": 1.9034159981574372e-05, "loss": 1.4954, "step": 5088 }, { "epoch": 16.685245901639345, "grad_norm": 7.646697998046875, "learning_rate": 1.9033704630531066e-05, "loss": 1.5007, "step": 5089 }, { "epoch": 16.688524590163933, "grad_norm": 14.097946166992188, "learning_rate": 1.903324917762335e-05, "loss": 1.5718, "step": 5090 }, { "epoch": 16.691803278688525, "grad_norm": 9.284226417541504, "learning_rate": 1.903279362285636e-05, "loss": 1.6211, "step": 5091 }, { "epoch": 16.695081967213113, "grad_norm": 9.035076141357422, "learning_rate": 1.9032337966235225e-05, "loss": 1.7104, "step": 5092 }, { "epoch": 16.698360655737705, "grad_norm": 9.910948753356934, "learning_rate": 1.903188220776509e-05, "loss": 1.5459, "step": 5093 }, { "epoch": 16.701639344262293, "grad_norm": 10.382855415344238, "learning_rate": 1.9031426347451095e-05, "loss": 1.3032, "step": 5094 }, { "epoch": 16.704918032786885, "grad_norm": 6.680116653442383, "learning_rate": 1.9030970385298377e-05, "loss": 1.4336, "step": 5095 }, { "epoch": 16.708196721311474, "grad_norm": 18.520551681518555, "learning_rate": 1.9030514321312074e-05, "loss": 1.3245, "step": 5096 }, { "epoch": 16.711475409836066, "grad_norm": 15.62069320678711, "learning_rate": 1.9030058155497334e-05, "loss": 1.5063, "step": 5097 }, { "epoch": 16.714754098360658, "grad_norm": 10.606794357299805, "learning_rate": 1.90296018878593e-05, "loss": 1.533, "step": 5098 }, { "epoch": 16.718032786885246, "grad_norm": 9.143725395202637, "learning_rate": 1.9029145518403117e-05, "loss": 1.3704, "step": 5099 }, { "epoch": 16.721311475409838, "grad_norm": 8.26407241821289, "learning_rate": 1.902868904713393e-05, "loss": 1.4126, "step": 5100 }, { "epoch": 16.724590163934426, "grad_norm": 9.075414657592773, "learning_rate": 1.9028232474056886e-05, "loss": 1.5281, "step": 5101 }, { "epoch": 16.727868852459018, "grad_norm": 7.625262260437012, "learning_rate": 1.9027775799177133e-05, "loss": 1.6401, "step": 5102 }, { "epoch": 16.731147540983606, "grad_norm": 7.259300231933594, "learning_rate": 1.9027319022499823e-05, "loss": 1.5352, "step": 5103 }, { "epoch": 16.7344262295082, "grad_norm": 12.9739408493042, "learning_rate": 1.9026862144030108e-05, "loss": 1.4106, "step": 5104 }, { "epoch": 16.737704918032787, "grad_norm": 8.866393089294434, "learning_rate": 1.902640516377313e-05, "loss": 1.5435, "step": 5105 }, { "epoch": 16.74098360655738, "grad_norm": 8.177452087402344, "learning_rate": 1.9025948081734053e-05, "loss": 1.6797, "step": 5106 }, { "epoch": 16.744262295081967, "grad_norm": 9.76906681060791, "learning_rate": 1.9025490897918027e-05, "loss": 1.5847, "step": 5107 }, { "epoch": 16.74754098360656, "grad_norm": 8.127374649047852, "learning_rate": 1.9025033612330203e-05, "loss": 1.6125, "step": 5108 }, { "epoch": 16.750819672131147, "grad_norm": 12.3309326171875, "learning_rate": 1.9024576224975747e-05, "loss": 1.4705, "step": 5109 }, { "epoch": 16.75409836065574, "grad_norm": 8.846883773803711, "learning_rate": 1.9024118735859807e-05, "loss": 1.472, "step": 5110 }, { "epoch": 16.757377049180327, "grad_norm": 12.762275695800781, "learning_rate": 1.9023661144987547e-05, "loss": 1.5774, "step": 5111 }, { "epoch": 16.76065573770492, "grad_norm": 10.409838676452637, "learning_rate": 1.9023203452364125e-05, "loss": 1.7556, "step": 5112 }, { "epoch": 16.763934426229508, "grad_norm": 8.592623710632324, "learning_rate": 1.9022745657994702e-05, "loss": 1.3416, "step": 5113 }, { "epoch": 16.7672131147541, "grad_norm": 7.436367988586426, "learning_rate": 1.902228776188444e-05, "loss": 1.48, "step": 5114 }, { "epoch": 16.770491803278688, "grad_norm": 7.687219619750977, "learning_rate": 1.90218297640385e-05, "loss": 1.4839, "step": 5115 }, { "epoch": 16.77377049180328, "grad_norm": 9.647697448730469, "learning_rate": 1.9021371664462058e-05, "loss": 1.5608, "step": 5116 }, { "epoch": 16.777049180327868, "grad_norm": 9.561129570007324, "learning_rate": 1.9020913463160263e-05, "loss": 1.429, "step": 5117 }, { "epoch": 16.78032786885246, "grad_norm": 15.448119163513184, "learning_rate": 1.902045516013829e-05, "loss": 1.5085, "step": 5118 }, { "epoch": 16.78360655737705, "grad_norm": 8.50027084350586, "learning_rate": 1.9019996755401306e-05, "loss": 1.4977, "step": 5119 }, { "epoch": 16.78688524590164, "grad_norm": 44.011329650878906, "learning_rate": 1.9019538248954483e-05, "loss": 1.2539, "step": 5120 }, { "epoch": 16.79016393442623, "grad_norm": 8.303200721740723, "learning_rate": 1.9019079640802988e-05, "loss": 1.7456, "step": 5121 }, { "epoch": 16.79344262295082, "grad_norm": 17.464515686035156, "learning_rate": 1.901862093095199e-05, "loss": 1.6074, "step": 5122 }, { "epoch": 16.79672131147541, "grad_norm": 7.160317897796631, "learning_rate": 1.9018162119406668e-05, "loss": 1.5754, "step": 5123 }, { "epoch": 16.8, "grad_norm": 10.557757377624512, "learning_rate": 1.9017703206172187e-05, "loss": 1.5234, "step": 5124 }, { "epoch": 16.80327868852459, "grad_norm": 8.98908519744873, "learning_rate": 1.9017244191253726e-05, "loss": 1.5669, "step": 5125 }, { "epoch": 16.80655737704918, "grad_norm": 12.297932624816895, "learning_rate": 1.9016785074656464e-05, "loss": 1.3955, "step": 5126 }, { "epoch": 16.80983606557377, "grad_norm": 10.79130744934082, "learning_rate": 1.9016325856385575e-05, "loss": 1.3958, "step": 5127 }, { "epoch": 16.81311475409836, "grad_norm": 30.919363021850586, "learning_rate": 1.9015866536446236e-05, "loss": 1.729, "step": 5128 }, { "epoch": 16.81639344262295, "grad_norm": 9.35461139678955, "learning_rate": 1.901540711484363e-05, "loss": 1.7065, "step": 5129 }, { "epoch": 16.81967213114754, "grad_norm": 8.303970336914062, "learning_rate": 1.9014947591582933e-05, "loss": 1.5502, "step": 5130 }, { "epoch": 16.82295081967213, "grad_norm": 19.434267044067383, "learning_rate": 1.9014487966669328e-05, "loss": 1.5496, "step": 5131 }, { "epoch": 16.82622950819672, "grad_norm": 6.782968997955322, "learning_rate": 1.9014028240108e-05, "loss": 1.6006, "step": 5132 }, { "epoch": 16.82950819672131, "grad_norm": 8.8649320602417, "learning_rate": 1.9013568411904133e-05, "loss": 1.4771, "step": 5133 }, { "epoch": 16.832786885245902, "grad_norm": 9.611631393432617, "learning_rate": 1.9013108482062908e-05, "loss": 1.7271, "step": 5134 }, { "epoch": 16.83606557377049, "grad_norm": 13.409951210021973, "learning_rate": 1.9012648450589513e-05, "loss": 1.624, "step": 5135 }, { "epoch": 16.839344262295082, "grad_norm": 7.4284138679504395, "learning_rate": 1.9012188317489136e-05, "loss": 1.498, "step": 5136 }, { "epoch": 16.84262295081967, "grad_norm": 14.543658256530762, "learning_rate": 1.9011728082766967e-05, "loss": 1.6084, "step": 5137 }, { "epoch": 16.845901639344262, "grad_norm": 8.393194198608398, "learning_rate": 1.9011267746428193e-05, "loss": 1.4788, "step": 5138 }, { "epoch": 16.84918032786885, "grad_norm": 11.926824569702148, "learning_rate": 1.901080730847801e-05, "loss": 1.489, "step": 5139 }, { "epoch": 16.852459016393443, "grad_norm": 15.365937232971191, "learning_rate": 1.90103467689216e-05, "loss": 1.6587, "step": 5140 }, { "epoch": 16.855737704918035, "grad_norm": 13.407890319824219, "learning_rate": 1.9009886127764164e-05, "loss": 1.5894, "step": 5141 }, { "epoch": 16.859016393442623, "grad_norm": 6.986405372619629, "learning_rate": 1.9009425385010894e-05, "loss": 1.6799, "step": 5142 }, { "epoch": 16.862295081967215, "grad_norm": 9.971433639526367, "learning_rate": 1.9008964540666984e-05, "loss": 1.6533, "step": 5143 }, { "epoch": 16.865573770491803, "grad_norm": 12.199600219726562, "learning_rate": 1.9008503594737632e-05, "loss": 1.5637, "step": 5144 }, { "epoch": 16.868852459016395, "grad_norm": 8.67577838897705, "learning_rate": 1.9008042547228038e-05, "loss": 1.6265, "step": 5145 }, { "epoch": 16.872131147540983, "grad_norm": 14.919572830200195, "learning_rate": 1.9007581398143394e-05, "loss": 1.3831, "step": 5146 }, { "epoch": 16.875409836065575, "grad_norm": 8.308844566345215, "learning_rate": 1.9007120147488908e-05, "loss": 1.5542, "step": 5147 }, { "epoch": 16.878688524590164, "grad_norm": 9.860864639282227, "learning_rate": 1.9006658795269775e-05, "loss": 1.2903, "step": 5148 }, { "epoch": 16.881967213114756, "grad_norm": 11.868879318237305, "learning_rate": 1.9006197341491202e-05, "loss": 1.437, "step": 5149 }, { "epoch": 16.885245901639344, "grad_norm": 12.420906066894531, "learning_rate": 1.9005735786158386e-05, "loss": 1.519, "step": 5150 }, { "epoch": 16.888524590163936, "grad_norm": 7.96370792388916, "learning_rate": 1.9005274129276538e-05, "loss": 1.5918, "step": 5151 }, { "epoch": 16.891803278688524, "grad_norm": 8.972087860107422, "learning_rate": 1.900481237085086e-05, "loss": 1.6077, "step": 5152 }, { "epoch": 16.895081967213116, "grad_norm": 20.63373565673828, "learning_rate": 1.9004350510886564e-05, "loss": 1.4351, "step": 5153 }, { "epoch": 16.898360655737704, "grad_norm": 8.536465644836426, "learning_rate": 1.900388854938885e-05, "loss": 1.5254, "step": 5154 }, { "epoch": 16.901639344262296, "grad_norm": 9.339862823486328, "learning_rate": 1.900342648636293e-05, "loss": 1.4929, "step": 5155 }, { "epoch": 16.904918032786885, "grad_norm": 7.7634100914001465, "learning_rate": 1.9002964321814016e-05, "loss": 1.668, "step": 5156 }, { "epoch": 16.908196721311477, "grad_norm": 8.067708015441895, "learning_rate": 1.900250205574732e-05, "loss": 1.6221, "step": 5157 }, { "epoch": 16.911475409836065, "grad_norm": 11.06637191772461, "learning_rate": 1.900203968816805e-05, "loss": 1.5554, "step": 5158 }, { "epoch": 16.914754098360657, "grad_norm": 7.478052616119385, "learning_rate": 1.9001577219081428e-05, "loss": 1.6584, "step": 5159 }, { "epoch": 16.918032786885245, "grad_norm": 8.294966697692871, "learning_rate": 1.900111464849266e-05, "loss": 1.4783, "step": 5160 }, { "epoch": 16.921311475409837, "grad_norm": 12.310277938842773, "learning_rate": 1.9000651976406966e-05, "loss": 1.5051, "step": 5161 }, { "epoch": 16.924590163934425, "grad_norm": 9.566818237304688, "learning_rate": 1.9000189202829562e-05, "loss": 1.6357, "step": 5162 }, { "epoch": 16.927868852459017, "grad_norm": 9.10171890258789, "learning_rate": 1.899972632776567e-05, "loss": 1.7283, "step": 5163 }, { "epoch": 16.931147540983606, "grad_norm": 12.901915550231934, "learning_rate": 1.89992633512205e-05, "loss": 1.698, "step": 5164 }, { "epoch": 16.934426229508198, "grad_norm": 8.638260841369629, "learning_rate": 1.899880027319928e-05, "loss": 1.3357, "step": 5165 }, { "epoch": 16.937704918032786, "grad_norm": 9.646981239318848, "learning_rate": 1.8998337093707237e-05, "loss": 1.624, "step": 5166 }, { "epoch": 16.940983606557378, "grad_norm": 9.420180320739746, "learning_rate": 1.8997873812749583e-05, "loss": 1.5017, "step": 5167 }, { "epoch": 16.944262295081966, "grad_norm": 13.601848602294922, "learning_rate": 1.8997410430331548e-05, "loss": 1.45, "step": 5168 }, { "epoch": 16.947540983606558, "grad_norm": 7.993776798248291, "learning_rate": 1.8996946946458356e-05, "loss": 1.4878, "step": 5169 }, { "epoch": 16.950819672131146, "grad_norm": 6.854208469390869, "learning_rate": 1.8996483361135228e-05, "loss": 1.5168, "step": 5170 }, { "epoch": 16.95409836065574, "grad_norm": 7.904080390930176, "learning_rate": 1.89960196743674e-05, "loss": 1.5297, "step": 5171 }, { "epoch": 16.957377049180327, "grad_norm": 8.117511749267578, "learning_rate": 1.8995555886160094e-05, "loss": 1.4775, "step": 5172 }, { "epoch": 16.96065573770492, "grad_norm": 6.818753242492676, "learning_rate": 1.8995091996518544e-05, "loss": 1.55, "step": 5173 }, { "epoch": 16.963934426229507, "grad_norm": 8.321391105651855, "learning_rate": 1.899462800544798e-05, "loss": 1.5383, "step": 5174 }, { "epoch": 16.9672131147541, "grad_norm": 10.449907302856445, "learning_rate": 1.8994163912953633e-05, "loss": 1.6196, "step": 5175 }, { "epoch": 16.970491803278687, "grad_norm": 8.027862548828125, "learning_rate": 1.8993699719040734e-05, "loss": 1.6011, "step": 5176 }, { "epoch": 16.97377049180328, "grad_norm": 6.720288276672363, "learning_rate": 1.8993235423714522e-05, "loss": 1.6602, "step": 5177 }, { "epoch": 16.977049180327867, "grad_norm": 8.386101722717285, "learning_rate": 1.899277102698023e-05, "loss": 1.5151, "step": 5178 }, { "epoch": 16.98032786885246, "grad_norm": 11.023128509521484, "learning_rate": 1.8992306528843094e-05, "loss": 1.5176, "step": 5179 }, { "epoch": 16.983606557377048, "grad_norm": 7.281102180480957, "learning_rate": 1.8991841929308353e-05, "loss": 1.5432, "step": 5180 }, { "epoch": 16.98688524590164, "grad_norm": 8.749135971069336, "learning_rate": 1.899137722838124e-05, "loss": 1.6145, "step": 5181 }, { "epoch": 16.990163934426228, "grad_norm": 9.044461250305176, "learning_rate": 1.8990912426067006e-05, "loss": 1.3628, "step": 5182 }, { "epoch": 16.99344262295082, "grad_norm": 9.679009437561035, "learning_rate": 1.8990447522370886e-05, "loss": 1.4529, "step": 5183 }, { "epoch": 16.99672131147541, "grad_norm": 7.92471170425415, "learning_rate": 1.898998251729812e-05, "loss": 1.5586, "step": 5184 }, { "epoch": 17.0, "grad_norm": 6.4932122230529785, "learning_rate": 1.8989517410853956e-05, "loss": 1.428, "step": 5185 }, { "epoch": 17.003278688524592, "grad_norm": 8.674429893493652, "learning_rate": 1.8989052203043633e-05, "loss": 1.2954, "step": 5186 }, { "epoch": 17.00655737704918, "grad_norm": 7.514732360839844, "learning_rate": 1.8988586893872404e-05, "loss": 1.3726, "step": 5187 }, { "epoch": 17.009836065573772, "grad_norm": 7.484908103942871, "learning_rate": 1.898812148334551e-05, "loss": 1.4119, "step": 5188 }, { "epoch": 17.01311475409836, "grad_norm": 8.825096130371094, "learning_rate": 1.89876559714682e-05, "loss": 1.5215, "step": 5189 }, { "epoch": 17.016393442622952, "grad_norm": 8.192490577697754, "learning_rate": 1.8987190358245727e-05, "loss": 1.6106, "step": 5190 }, { "epoch": 17.01967213114754, "grad_norm": 10.837349891662598, "learning_rate": 1.8986724643683337e-05, "loss": 1.4072, "step": 5191 }, { "epoch": 17.022950819672133, "grad_norm": 9.398548126220703, "learning_rate": 1.898625882778628e-05, "loss": 1.4705, "step": 5192 }, { "epoch": 17.02622950819672, "grad_norm": 8.321000099182129, "learning_rate": 1.8985792910559814e-05, "loss": 1.5486, "step": 5193 }, { "epoch": 17.029508196721313, "grad_norm": 7.551253318786621, "learning_rate": 1.898532689200919e-05, "loss": 1.5811, "step": 5194 }, { "epoch": 17.0327868852459, "grad_norm": 14.718335151672363, "learning_rate": 1.8984860772139665e-05, "loss": 1.3672, "step": 5195 }, { "epoch": 17.036065573770493, "grad_norm": 8.214298248291016, "learning_rate": 1.8984394550956488e-05, "loss": 1.3923, "step": 5196 }, { "epoch": 17.03934426229508, "grad_norm": 32.490596771240234, "learning_rate": 1.8983928228464924e-05, "loss": 1.615, "step": 5197 }, { "epoch": 17.042622950819673, "grad_norm": 12.27561092376709, "learning_rate": 1.898346180467023e-05, "loss": 1.3271, "step": 5198 }, { "epoch": 17.04590163934426, "grad_norm": 8.706827163696289, "learning_rate": 1.8982995279577662e-05, "loss": 1.6836, "step": 5199 }, { "epoch": 17.049180327868854, "grad_norm": 7.32559061050415, "learning_rate": 1.898252865319248e-05, "loss": 1.4504, "step": 5200 }, { "epoch": 17.052459016393442, "grad_norm": 8.896888732910156, "learning_rate": 1.898206192551995e-05, "loss": 1.3093, "step": 5201 }, { "epoch": 17.055737704918034, "grad_norm": 9.6004638671875, "learning_rate": 1.898159509656533e-05, "loss": 1.2837, "step": 5202 }, { "epoch": 17.059016393442622, "grad_norm": 7.447723865509033, "learning_rate": 1.8981128166333893e-05, "loss": 1.5073, "step": 5203 }, { "epoch": 17.062295081967214, "grad_norm": 8.204702377319336, "learning_rate": 1.8980661134830894e-05, "loss": 1.3064, "step": 5204 }, { "epoch": 17.065573770491802, "grad_norm": 12.76715087890625, "learning_rate": 1.8980194002061602e-05, "loss": 1.4646, "step": 5205 }, { "epoch": 17.068852459016394, "grad_norm": 7.31166934967041, "learning_rate": 1.8979726768031287e-05, "loss": 1.4312, "step": 5206 }, { "epoch": 17.072131147540983, "grad_norm": 7.3371806144714355, "learning_rate": 1.8979259432745217e-05, "loss": 1.374, "step": 5207 }, { "epoch": 17.075409836065575, "grad_norm": 9.732880592346191, "learning_rate": 1.897879199620866e-05, "loss": 1.4924, "step": 5208 }, { "epoch": 17.078688524590163, "grad_norm": 9.787437438964844, "learning_rate": 1.897832445842689e-05, "loss": 1.334, "step": 5209 }, { "epoch": 17.081967213114755, "grad_norm": 8.097512245178223, "learning_rate": 1.897785681940517e-05, "loss": 1.6099, "step": 5210 }, { "epoch": 17.085245901639343, "grad_norm": 7.928719520568848, "learning_rate": 1.8977389079148784e-05, "loss": 1.55, "step": 5211 }, { "epoch": 17.088524590163935, "grad_norm": 8.273520469665527, "learning_rate": 1.8976921237663003e-05, "loss": 1.3345, "step": 5212 }, { "epoch": 17.091803278688523, "grad_norm": 10.844110488891602, "learning_rate": 1.89764532949531e-05, "loss": 1.4204, "step": 5213 }, { "epoch": 17.095081967213115, "grad_norm": 9.422962188720703, "learning_rate": 1.8975985251024355e-05, "loss": 1.2688, "step": 5214 }, { "epoch": 17.098360655737704, "grad_norm": 7.0970869064331055, "learning_rate": 1.897551710588204e-05, "loss": 1.5913, "step": 5215 }, { "epoch": 17.101639344262296, "grad_norm": 10.25952434539795, "learning_rate": 1.897504885953144e-05, "loss": 1.4712, "step": 5216 }, { "epoch": 17.104918032786884, "grad_norm": 10.527787208557129, "learning_rate": 1.897458051197783e-05, "loss": 1.3428, "step": 5217 }, { "epoch": 17.108196721311476, "grad_norm": 7.885066509246826, "learning_rate": 1.8974112063226498e-05, "loss": 1.439, "step": 5218 }, { "epoch": 17.111475409836064, "grad_norm": 7.407318115234375, "learning_rate": 1.8973643513282716e-05, "loss": 1.5164, "step": 5219 }, { "epoch": 17.114754098360656, "grad_norm": 15.730359077453613, "learning_rate": 1.8973174862151776e-05, "loss": 1.52, "step": 5220 }, { "epoch": 17.118032786885244, "grad_norm": 9.016157150268555, "learning_rate": 1.8972706109838962e-05, "loss": 1.5264, "step": 5221 }, { "epoch": 17.121311475409836, "grad_norm": 8.570069313049316, "learning_rate": 1.8972237256349553e-05, "loss": 1.3904, "step": 5222 }, { "epoch": 17.124590163934425, "grad_norm": 10.231650352478027, "learning_rate": 1.8971768301688846e-05, "loss": 1.5205, "step": 5223 }, { "epoch": 17.127868852459017, "grad_norm": 8.554409980773926, "learning_rate": 1.8971299245862117e-05, "loss": 1.3801, "step": 5224 }, { "epoch": 17.131147540983605, "grad_norm": 8.899978637695312, "learning_rate": 1.8970830088874664e-05, "loss": 1.5273, "step": 5225 }, { "epoch": 17.134426229508197, "grad_norm": 8.249234199523926, "learning_rate": 1.8970360830731772e-05, "loss": 1.6133, "step": 5226 }, { "epoch": 17.137704918032785, "grad_norm": 9.503390312194824, "learning_rate": 1.8969891471438736e-05, "loss": 1.3936, "step": 5227 }, { "epoch": 17.140983606557377, "grad_norm": 7.440091609954834, "learning_rate": 1.8969422011000852e-05, "loss": 1.6138, "step": 5228 }, { "epoch": 17.14426229508197, "grad_norm": 7.7627387046813965, "learning_rate": 1.8968952449423403e-05, "loss": 1.5374, "step": 5229 }, { "epoch": 17.147540983606557, "grad_norm": 10.162819862365723, "learning_rate": 1.8968482786711693e-05, "loss": 1.3503, "step": 5230 }, { "epoch": 17.15081967213115, "grad_norm": 8.388864517211914, "learning_rate": 1.8968013022871012e-05, "loss": 1.2063, "step": 5231 }, { "epoch": 17.154098360655738, "grad_norm": 8.325652122497559, "learning_rate": 1.8967543157906662e-05, "loss": 1.407, "step": 5232 }, { "epoch": 17.15737704918033, "grad_norm": 7.136242389678955, "learning_rate": 1.8967073191823936e-05, "loss": 1.5383, "step": 5233 }, { "epoch": 17.160655737704918, "grad_norm": 10.965339660644531, "learning_rate": 1.896660312462814e-05, "loss": 1.4126, "step": 5234 }, { "epoch": 17.16393442622951, "grad_norm": 8.425226211547852, "learning_rate": 1.896613295632457e-05, "loss": 1.6025, "step": 5235 }, { "epoch": 17.167213114754098, "grad_norm": 10.234370231628418, "learning_rate": 1.896566268691853e-05, "loss": 1.4866, "step": 5236 }, { "epoch": 17.17049180327869, "grad_norm": 16.25861167907715, "learning_rate": 1.8965192316415318e-05, "loss": 1.3823, "step": 5237 }, { "epoch": 17.17377049180328, "grad_norm": 9.943150520324707, "learning_rate": 1.8964721844820242e-05, "loss": 1.3801, "step": 5238 }, { "epoch": 17.17704918032787, "grad_norm": 8.715669631958008, "learning_rate": 1.8964251272138604e-05, "loss": 1.3013, "step": 5239 }, { "epoch": 17.18032786885246, "grad_norm": 9.743325233459473, "learning_rate": 1.8963780598375717e-05, "loss": 1.3271, "step": 5240 }, { "epoch": 17.18360655737705, "grad_norm": 8.090828895568848, "learning_rate": 1.8963309823536883e-05, "loss": 1.3396, "step": 5241 }, { "epoch": 17.18688524590164, "grad_norm": 7.922239780426025, "learning_rate": 1.8962838947627408e-05, "loss": 1.5642, "step": 5242 }, { "epoch": 17.19016393442623, "grad_norm": 8.5822114944458, "learning_rate": 1.8962367970652608e-05, "loss": 1.5515, "step": 5243 }, { "epoch": 17.19344262295082, "grad_norm": 10.065010070800781, "learning_rate": 1.896189689261779e-05, "loss": 1.4048, "step": 5244 }, { "epoch": 17.19672131147541, "grad_norm": 5.797851085662842, "learning_rate": 1.8961425713528262e-05, "loss": 1.5808, "step": 5245 }, { "epoch": 17.2, "grad_norm": 6.435488700866699, "learning_rate": 1.896095443338935e-05, "loss": 1.5596, "step": 5246 }, { "epoch": 17.20327868852459, "grad_norm": 9.465907096862793, "learning_rate": 1.896048305220635e-05, "loss": 1.4209, "step": 5247 }, { "epoch": 17.20655737704918, "grad_norm": 11.392313003540039, "learning_rate": 1.8960011569984592e-05, "loss": 1.2544, "step": 5248 }, { "epoch": 17.20983606557377, "grad_norm": 9.968788146972656, "learning_rate": 1.8959539986729392e-05, "loss": 1.5715, "step": 5249 }, { "epoch": 17.21311475409836, "grad_norm": 7.651429176330566, "learning_rate": 1.895906830244606e-05, "loss": 1.1821, "step": 5250 }, { "epoch": 17.21639344262295, "grad_norm": 11.591606140136719, "learning_rate": 1.8958596517139914e-05, "loss": 1.3035, "step": 5251 }, { "epoch": 17.21967213114754, "grad_norm": 8.216171264648438, "learning_rate": 1.8958124630816278e-05, "loss": 1.408, "step": 5252 }, { "epoch": 17.222950819672132, "grad_norm": 9.565545082092285, "learning_rate": 1.8957652643480476e-05, "loss": 1.613, "step": 5253 }, { "epoch": 17.22622950819672, "grad_norm": 9.240152359008789, "learning_rate": 1.8957180555137825e-05, "loss": 1.4846, "step": 5254 }, { "epoch": 17.229508196721312, "grad_norm": 9.798702239990234, "learning_rate": 1.895670836579365e-05, "loss": 1.3296, "step": 5255 }, { "epoch": 17.2327868852459, "grad_norm": 17.64131736755371, "learning_rate": 1.8956236075453277e-05, "loss": 1.4414, "step": 5256 }, { "epoch": 17.236065573770492, "grad_norm": 9.45158863067627, "learning_rate": 1.895576368412203e-05, "loss": 1.3816, "step": 5257 }, { "epoch": 17.23934426229508, "grad_norm": 7.7935028076171875, "learning_rate": 1.8955291191805237e-05, "loss": 1.4001, "step": 5258 }, { "epoch": 17.242622950819673, "grad_norm": 10.263077735900879, "learning_rate": 1.895481859850822e-05, "loss": 1.395, "step": 5259 }, { "epoch": 17.24590163934426, "grad_norm": 9.777775764465332, "learning_rate": 1.8954345904236317e-05, "loss": 1.402, "step": 5260 }, { "epoch": 17.249180327868853, "grad_norm": 7.604602813720703, "learning_rate": 1.8953873108994852e-05, "loss": 1.4211, "step": 5261 }, { "epoch": 17.25245901639344, "grad_norm": 8.926627159118652, "learning_rate": 1.8953400212789158e-05, "loss": 1.4692, "step": 5262 }, { "epoch": 17.255737704918033, "grad_norm": 8.440860748291016, "learning_rate": 1.895292721562457e-05, "loss": 1.5952, "step": 5263 }, { "epoch": 17.25901639344262, "grad_norm": 8.911730766296387, "learning_rate": 1.8952454117506414e-05, "loss": 1.377, "step": 5264 }, { "epoch": 17.262295081967213, "grad_norm": 8.836308479309082, "learning_rate": 1.8951980918440033e-05, "loss": 1.4287, "step": 5265 }, { "epoch": 17.2655737704918, "grad_norm": 8.290578842163086, "learning_rate": 1.8951507618430758e-05, "loss": 1.6904, "step": 5266 }, { "epoch": 17.268852459016394, "grad_norm": 8.93047046661377, "learning_rate": 1.8951034217483927e-05, "loss": 1.5164, "step": 5267 }, { "epoch": 17.272131147540982, "grad_norm": 11.602684020996094, "learning_rate": 1.895056071560488e-05, "loss": 1.5146, "step": 5268 }, { "epoch": 17.275409836065574, "grad_norm": 7.00722599029541, "learning_rate": 1.8950087112798955e-05, "loss": 1.522, "step": 5269 }, { "epoch": 17.278688524590162, "grad_norm": 6.9373555183410645, "learning_rate": 1.894961340907149e-05, "loss": 1.5266, "step": 5270 }, { "epoch": 17.281967213114754, "grad_norm": 10.036805152893066, "learning_rate": 1.8949139604427832e-05, "loss": 1.4805, "step": 5271 }, { "epoch": 17.285245901639342, "grad_norm": 8.952600479125977, "learning_rate": 1.8948665698873317e-05, "loss": 1.233, "step": 5272 }, { "epoch": 17.288524590163934, "grad_norm": 9.429550170898438, "learning_rate": 1.894819169241329e-05, "loss": 1.4456, "step": 5273 }, { "epoch": 17.291803278688526, "grad_norm": 13.007466316223145, "learning_rate": 1.8947717585053102e-05, "loss": 1.2961, "step": 5274 }, { "epoch": 17.295081967213115, "grad_norm": 22.395687103271484, "learning_rate": 1.8947243376798095e-05, "loss": 1.4172, "step": 5275 }, { "epoch": 17.298360655737707, "grad_norm": 12.85626220703125, "learning_rate": 1.8946769067653614e-05, "loss": 1.6025, "step": 5276 }, { "epoch": 17.301639344262295, "grad_norm": 10.845804214477539, "learning_rate": 1.894629465762501e-05, "loss": 1.2891, "step": 5277 }, { "epoch": 17.304918032786887, "grad_norm": 7.627501964569092, "learning_rate": 1.894582014671763e-05, "loss": 1.5415, "step": 5278 }, { "epoch": 17.308196721311475, "grad_norm": 9.439506530761719, "learning_rate": 1.8945345534936834e-05, "loss": 1.4255, "step": 5279 }, { "epoch": 17.311475409836067, "grad_norm": 13.93928337097168, "learning_rate": 1.8944870822287957e-05, "loss": 1.4578, "step": 5280 }, { "epoch": 17.314754098360655, "grad_norm": 26.233722686767578, "learning_rate": 1.8944396008776366e-05, "loss": 1.4546, "step": 5281 }, { "epoch": 17.318032786885247, "grad_norm": 7.698297500610352, "learning_rate": 1.894392109440741e-05, "loss": 1.4799, "step": 5282 }, { "epoch": 17.321311475409836, "grad_norm": 13.264187812805176, "learning_rate": 1.8943446079186442e-05, "loss": 1.5198, "step": 5283 }, { "epoch": 17.324590163934428, "grad_norm": 7.797306060791016, "learning_rate": 1.8942970963118822e-05, "loss": 1.543, "step": 5284 }, { "epoch": 17.327868852459016, "grad_norm": 13.39547061920166, "learning_rate": 1.8942495746209907e-05, "loss": 1.4507, "step": 5285 }, { "epoch": 17.331147540983608, "grad_norm": 9.211943626403809, "learning_rate": 1.8942020428465054e-05, "loss": 1.5312, "step": 5286 }, { "epoch": 17.334426229508196, "grad_norm": 8.816401481628418, "learning_rate": 1.8941545009889623e-05, "loss": 1.4966, "step": 5287 }, { "epoch": 17.337704918032788, "grad_norm": 10.011903762817383, "learning_rate": 1.8941069490488976e-05, "loss": 1.5398, "step": 5288 }, { "epoch": 17.340983606557376, "grad_norm": 10.583654403686523, "learning_rate": 1.894059387026847e-05, "loss": 1.3635, "step": 5289 }, { "epoch": 17.34426229508197, "grad_norm": 8.713057518005371, "learning_rate": 1.8940118149233478e-05, "loss": 1.5542, "step": 5290 }, { "epoch": 17.347540983606557, "grad_norm": 9.182790756225586, "learning_rate": 1.8939642327389353e-05, "loss": 1.4038, "step": 5291 }, { "epoch": 17.35081967213115, "grad_norm": 13.12893009185791, "learning_rate": 1.893916640474147e-05, "loss": 1.6016, "step": 5292 }, { "epoch": 17.354098360655737, "grad_norm": 9.91674518585205, "learning_rate": 1.8938690381295193e-05, "loss": 1.5012, "step": 5293 }, { "epoch": 17.35737704918033, "grad_norm": 9.594267845153809, "learning_rate": 1.893821425705588e-05, "loss": 1.3992, "step": 5294 }, { "epoch": 17.360655737704917, "grad_norm": 7.464132785797119, "learning_rate": 1.893773803202892e-05, "loss": 1.6169, "step": 5295 }, { "epoch": 17.36393442622951, "grad_norm": 7.383868217468262, "learning_rate": 1.893726170621966e-05, "loss": 1.4451, "step": 5296 }, { "epoch": 17.367213114754097, "grad_norm": 7.586705207824707, "learning_rate": 1.8936785279633488e-05, "loss": 1.5903, "step": 5297 }, { "epoch": 17.37049180327869, "grad_norm": 28.134443283081055, "learning_rate": 1.8936308752275767e-05, "loss": 1.5208, "step": 5298 }, { "epoch": 17.373770491803278, "grad_norm": 8.227849960327148, "learning_rate": 1.8935832124151872e-05, "loss": 1.3792, "step": 5299 }, { "epoch": 17.37704918032787, "grad_norm": 6.365107536315918, "learning_rate": 1.893535539526718e-05, "loss": 1.4132, "step": 5300 }, { "epoch": 17.380327868852458, "grad_norm": 9.713379859924316, "learning_rate": 1.893487856562707e-05, "loss": 1.3744, "step": 5301 }, { "epoch": 17.38360655737705, "grad_norm": 10.157432556152344, "learning_rate": 1.893440163523691e-05, "loss": 1.5349, "step": 5302 }, { "epoch": 17.386885245901638, "grad_norm": 6.930978775024414, "learning_rate": 1.8933924604102083e-05, "loss": 1.4607, "step": 5303 }, { "epoch": 17.39016393442623, "grad_norm": 6.167233467102051, "learning_rate": 1.8933447472227965e-05, "loss": 1.6663, "step": 5304 }, { "epoch": 17.39344262295082, "grad_norm": 13.761475563049316, "learning_rate": 1.8932970239619943e-05, "loss": 1.2654, "step": 5305 }, { "epoch": 17.39672131147541, "grad_norm": 8.003153800964355, "learning_rate": 1.893249290628339e-05, "loss": 1.6074, "step": 5306 }, { "epoch": 17.4, "grad_norm": 6.848921775817871, "learning_rate": 1.8932015472223692e-05, "loss": 1.3582, "step": 5307 }, { "epoch": 17.40327868852459, "grad_norm": 12.031671524047852, "learning_rate": 1.8931537937446235e-05, "loss": 1.5005, "step": 5308 }, { "epoch": 17.40655737704918, "grad_norm": 7.599471092224121, "learning_rate": 1.89310603019564e-05, "loss": 1.4615, "step": 5309 }, { "epoch": 17.40983606557377, "grad_norm": 10.588016510009766, "learning_rate": 1.8930582565759576e-05, "loss": 1.4357, "step": 5310 }, { "epoch": 17.41311475409836, "grad_norm": 10.244454383850098, "learning_rate": 1.8930104728861142e-05, "loss": 1.5476, "step": 5311 }, { "epoch": 17.41639344262295, "grad_norm": 8.64493179321289, "learning_rate": 1.8929626791266494e-05, "loss": 1.4832, "step": 5312 }, { "epoch": 17.41967213114754, "grad_norm": 7.886257648468018, "learning_rate": 1.8929148752981024e-05, "loss": 1.5261, "step": 5313 }, { "epoch": 17.42295081967213, "grad_norm": 8.570425987243652, "learning_rate": 1.8928670614010116e-05, "loss": 1.5454, "step": 5314 }, { "epoch": 17.42622950819672, "grad_norm": 7.059185028076172, "learning_rate": 1.892819237435916e-05, "loss": 1.3049, "step": 5315 }, { "epoch": 17.42950819672131, "grad_norm": 7.380618095397949, "learning_rate": 1.8927714034033557e-05, "loss": 1.4241, "step": 5316 }, { "epoch": 17.432786885245903, "grad_norm": 8.450316429138184, "learning_rate": 1.8927235593038693e-05, "loss": 1.5361, "step": 5317 }, { "epoch": 17.43606557377049, "grad_norm": 8.081077575683594, "learning_rate": 1.8926757051379967e-05, "loss": 1.4241, "step": 5318 }, { "epoch": 17.439344262295084, "grad_norm": 6.636048316955566, "learning_rate": 1.8926278409062773e-05, "loss": 1.5142, "step": 5319 }, { "epoch": 17.442622950819672, "grad_norm": 11.13736343383789, "learning_rate": 1.892579966609251e-05, "loss": 1.5613, "step": 5320 }, { "epoch": 17.445901639344264, "grad_norm": 10.411565780639648, "learning_rate": 1.8925320822474573e-05, "loss": 1.3379, "step": 5321 }, { "epoch": 17.449180327868852, "grad_norm": 7.547125816345215, "learning_rate": 1.8924841878214366e-05, "loss": 1.4194, "step": 5322 }, { "epoch": 17.452459016393444, "grad_norm": 9.90507984161377, "learning_rate": 1.8924362833317286e-05, "loss": 1.4011, "step": 5323 }, { "epoch": 17.455737704918032, "grad_norm": 6.757145404815674, "learning_rate": 1.8923883687788734e-05, "loss": 1.5728, "step": 5324 }, { "epoch": 17.459016393442624, "grad_norm": 11.720161437988281, "learning_rate": 1.892340444163412e-05, "loss": 1.3145, "step": 5325 }, { "epoch": 17.462295081967213, "grad_norm": 8.959077835083008, "learning_rate": 1.892292509485884e-05, "loss": 1.5486, "step": 5326 }, { "epoch": 17.465573770491805, "grad_norm": 8.415810585021973, "learning_rate": 1.8922445647468302e-05, "loss": 1.2722, "step": 5327 }, { "epoch": 17.468852459016393, "grad_norm": 6.573002338409424, "learning_rate": 1.8921966099467912e-05, "loss": 1.3503, "step": 5328 }, { "epoch": 17.472131147540985, "grad_norm": 8.530378341674805, "learning_rate": 1.8921486450863078e-05, "loss": 1.4338, "step": 5329 }, { "epoch": 17.475409836065573, "grad_norm": 8.215868949890137, "learning_rate": 1.8921006701659207e-05, "loss": 1.4536, "step": 5330 }, { "epoch": 17.478688524590165, "grad_norm": 9.814072608947754, "learning_rate": 1.892052685186171e-05, "loss": 1.3926, "step": 5331 }, { "epoch": 17.481967213114753, "grad_norm": 7.886608600616455, "learning_rate": 1.8920046901476e-05, "loss": 1.4882, "step": 5332 }, { "epoch": 17.485245901639345, "grad_norm": 12.20085334777832, "learning_rate": 1.8919566850507488e-05, "loss": 1.5911, "step": 5333 }, { "epoch": 17.488524590163934, "grad_norm": 8.888171195983887, "learning_rate": 1.891908669896158e-05, "loss": 1.4297, "step": 5334 }, { "epoch": 17.491803278688526, "grad_norm": 10.019767761230469, "learning_rate": 1.8918606446843702e-05, "loss": 1.3557, "step": 5335 }, { "epoch": 17.495081967213114, "grad_norm": 9.392210960388184, "learning_rate": 1.891812609415926e-05, "loss": 1.2471, "step": 5336 }, { "epoch": 17.498360655737706, "grad_norm": 10.497026443481445, "learning_rate": 1.8917645640913673e-05, "loss": 1.2635, "step": 5337 }, { "epoch": 17.501639344262294, "grad_norm": 7.811776638031006, "learning_rate": 1.891716508711236e-05, "loss": 1.4685, "step": 5338 }, { "epoch": 17.504918032786886, "grad_norm": 7.629563808441162, "learning_rate": 1.8916684432760745e-05, "loss": 1.4202, "step": 5339 }, { "epoch": 17.508196721311474, "grad_norm": 6.671821594238281, "learning_rate": 1.8916203677864234e-05, "loss": 1.3838, "step": 5340 }, { "epoch": 17.511475409836066, "grad_norm": 9.660783767700195, "learning_rate": 1.891572282242826e-05, "loss": 1.3553, "step": 5341 }, { "epoch": 17.514754098360655, "grad_norm": 8.432360649108887, "learning_rate": 1.891524186645824e-05, "loss": 1.494, "step": 5342 }, { "epoch": 17.518032786885247, "grad_norm": 9.889093399047852, "learning_rate": 1.8914760809959597e-05, "loss": 1.7012, "step": 5343 }, { "epoch": 17.521311475409835, "grad_norm": 14.398622512817383, "learning_rate": 1.891427965293776e-05, "loss": 1.3767, "step": 5344 }, { "epoch": 17.524590163934427, "grad_norm": 14.611983299255371, "learning_rate": 1.8913798395398147e-05, "loss": 1.3777, "step": 5345 }, { "epoch": 17.527868852459015, "grad_norm": 9.585142135620117, "learning_rate": 1.891331703734619e-05, "loss": 1.3433, "step": 5346 }, { "epoch": 17.531147540983607, "grad_norm": 11.435277938842773, "learning_rate": 1.8912835578787322e-05, "loss": 1.4854, "step": 5347 }, { "epoch": 17.534426229508195, "grad_norm": 8.800565719604492, "learning_rate": 1.8912354019726963e-05, "loss": 1.4006, "step": 5348 }, { "epoch": 17.537704918032787, "grad_norm": 7.8851399421691895, "learning_rate": 1.891187236017054e-05, "loss": 1.3401, "step": 5349 }, { "epoch": 17.540983606557376, "grad_norm": 12.405420303344727, "learning_rate": 1.8911390600123496e-05, "loss": 1.5793, "step": 5350 }, { "epoch": 17.544262295081968, "grad_norm": 15.059341430664062, "learning_rate": 1.8910908739591257e-05, "loss": 1.6445, "step": 5351 }, { "epoch": 17.547540983606556, "grad_norm": 7.258525371551514, "learning_rate": 1.8910426778579257e-05, "loss": 1.5359, "step": 5352 }, { "epoch": 17.550819672131148, "grad_norm": 7.848976135253906, "learning_rate": 1.8909944717092927e-05, "loss": 1.6711, "step": 5353 }, { "epoch": 17.554098360655736, "grad_norm": 9.239972114562988, "learning_rate": 1.890946255513771e-05, "loss": 1.6284, "step": 5354 }, { "epoch": 17.557377049180328, "grad_norm": 8.69980239868164, "learning_rate": 1.890898029271904e-05, "loss": 1.5923, "step": 5355 }, { "epoch": 17.560655737704916, "grad_norm": 8.173318862915039, "learning_rate": 1.890849792984235e-05, "loss": 1.3591, "step": 5356 }, { "epoch": 17.56393442622951, "grad_norm": 14.036402702331543, "learning_rate": 1.8908015466513086e-05, "loss": 1.3557, "step": 5357 }, { "epoch": 17.567213114754097, "grad_norm": 8.071876525878906, "learning_rate": 1.8907532902736686e-05, "loss": 1.5432, "step": 5358 }, { "epoch": 17.57049180327869, "grad_norm": 11.947210311889648, "learning_rate": 1.8907050238518587e-05, "loss": 1.4673, "step": 5359 }, { "epoch": 17.57377049180328, "grad_norm": 10.043256759643555, "learning_rate": 1.890656747386424e-05, "loss": 1.4717, "step": 5360 }, { "epoch": 17.57704918032787, "grad_norm": 27.088092803955078, "learning_rate": 1.8906084608779084e-05, "loss": 1.6471, "step": 5361 }, { "epoch": 17.58032786885246, "grad_norm": 7.260936737060547, "learning_rate": 1.890560164326856e-05, "loss": 1.3748, "step": 5362 }, { "epoch": 17.58360655737705, "grad_norm": 9.086104393005371, "learning_rate": 1.8905118577338123e-05, "loss": 1.3027, "step": 5363 }, { "epoch": 17.58688524590164, "grad_norm": 8.875235557556152, "learning_rate": 1.890463541099321e-05, "loss": 1.5403, "step": 5364 }, { "epoch": 17.59016393442623, "grad_norm": 9.634775161743164, "learning_rate": 1.890415214423928e-05, "loss": 1.5068, "step": 5365 }, { "epoch": 17.59344262295082, "grad_norm": 11.287786483764648, "learning_rate": 1.890366877708177e-05, "loss": 1.5081, "step": 5366 }, { "epoch": 17.59672131147541, "grad_norm": 12.95934009552002, "learning_rate": 1.8903185309526142e-05, "loss": 1.3848, "step": 5367 }, { "epoch": 17.6, "grad_norm": 8.728068351745605, "learning_rate": 1.8902701741577844e-05, "loss": 1.3918, "step": 5368 }, { "epoch": 17.60327868852459, "grad_norm": 10.142816543579102, "learning_rate": 1.8902218073242325e-05, "loss": 1.4167, "step": 5369 }, { "epoch": 17.60655737704918, "grad_norm": 10.610684394836426, "learning_rate": 1.890173430452504e-05, "loss": 1.3416, "step": 5370 }, { "epoch": 17.60983606557377, "grad_norm": 14.70905876159668, "learning_rate": 1.8901250435431447e-05, "loss": 1.6785, "step": 5371 }, { "epoch": 17.613114754098362, "grad_norm": 30.465396881103516, "learning_rate": 1.8900766465966998e-05, "loss": 1.4922, "step": 5372 }, { "epoch": 17.61639344262295, "grad_norm": 6.339456081390381, "learning_rate": 1.8900282396137153e-05, "loss": 1.458, "step": 5373 }, { "epoch": 17.619672131147542, "grad_norm": 11.835405349731445, "learning_rate": 1.8899798225947373e-05, "loss": 1.2017, "step": 5374 }, { "epoch": 17.62295081967213, "grad_norm": 9.21015739440918, "learning_rate": 1.889931395540311e-05, "loss": 1.6648, "step": 5375 }, { "epoch": 17.626229508196722, "grad_norm": 10.668731689453125, "learning_rate": 1.8898829584509832e-05, "loss": 1.6436, "step": 5376 }, { "epoch": 17.62950819672131, "grad_norm": 10.665947914123535, "learning_rate": 1.8898345113273e-05, "loss": 1.4954, "step": 5377 }, { "epoch": 17.632786885245903, "grad_norm": 8.268571853637695, "learning_rate": 1.8897860541698073e-05, "loss": 1.5527, "step": 5378 }, { "epoch": 17.63606557377049, "grad_norm": 8.50771713256836, "learning_rate": 1.8897375869790515e-05, "loss": 1.4651, "step": 5379 }, { "epoch": 17.639344262295083, "grad_norm": 6.26059627532959, "learning_rate": 1.8896891097555797e-05, "loss": 1.6035, "step": 5380 }, { "epoch": 17.64262295081967, "grad_norm": 12.012002944946289, "learning_rate": 1.889640622499938e-05, "loss": 1.3062, "step": 5381 }, { "epoch": 17.645901639344263, "grad_norm": 10.347001075744629, "learning_rate": 1.889592125212673e-05, "loss": 1.5106, "step": 5382 }, { "epoch": 17.64918032786885, "grad_norm": 7.280503273010254, "learning_rate": 1.889543617894332e-05, "loss": 1.4607, "step": 5383 }, { "epoch": 17.652459016393443, "grad_norm": 11.358933448791504, "learning_rate": 1.889495100545462e-05, "loss": 1.4825, "step": 5384 }, { "epoch": 17.65573770491803, "grad_norm": 7.837279796600342, "learning_rate": 1.8894465731666095e-05, "loss": 1.3344, "step": 5385 }, { "epoch": 17.659016393442624, "grad_norm": 11.708612442016602, "learning_rate": 1.8893980357583227e-05, "loss": 1.4251, "step": 5386 }, { "epoch": 17.662295081967212, "grad_norm": 16.25047492980957, "learning_rate": 1.8893494883211478e-05, "loss": 1.7178, "step": 5387 }, { "epoch": 17.665573770491804, "grad_norm": 9.443652153015137, "learning_rate": 1.8893009308556327e-05, "loss": 1.5273, "step": 5388 }, { "epoch": 17.668852459016392, "grad_norm": 6.201911926269531, "learning_rate": 1.8892523633623252e-05, "loss": 1.4861, "step": 5389 }, { "epoch": 17.672131147540984, "grad_norm": 7.717549800872803, "learning_rate": 1.889203785841773e-05, "loss": 1.3348, "step": 5390 }, { "epoch": 17.675409836065572, "grad_norm": 12.169188499450684, "learning_rate": 1.889155198294523e-05, "loss": 1.4248, "step": 5391 }, { "epoch": 17.678688524590164, "grad_norm": 9.028136253356934, "learning_rate": 1.8891066007211242e-05, "loss": 1.4758, "step": 5392 }, { "epoch": 17.681967213114753, "grad_norm": 10.004429817199707, "learning_rate": 1.8890579931221237e-05, "loss": 1.6111, "step": 5393 }, { "epoch": 17.685245901639345, "grad_norm": 9.935419082641602, "learning_rate": 1.8890093754980698e-05, "loss": 1.4934, "step": 5394 }, { "epoch": 17.688524590163933, "grad_norm": 8.078093528747559, "learning_rate": 1.8889607478495107e-05, "loss": 1.6455, "step": 5395 }, { "epoch": 17.691803278688525, "grad_norm": 9.156832695007324, "learning_rate": 1.8889121101769958e-05, "loss": 1.4399, "step": 5396 }, { "epoch": 17.695081967213113, "grad_norm": 6.87352180480957, "learning_rate": 1.8888634624810717e-05, "loss": 1.6147, "step": 5397 }, { "epoch": 17.698360655737705, "grad_norm": 9.605731964111328, "learning_rate": 1.888814804762288e-05, "loss": 1.5532, "step": 5398 }, { "epoch": 17.701639344262293, "grad_norm": 8.860075950622559, "learning_rate": 1.8887661370211937e-05, "loss": 1.5808, "step": 5399 }, { "epoch": 17.704918032786885, "grad_norm": 6.98176383972168, "learning_rate": 1.8887174592583365e-05, "loss": 1.7927, "step": 5400 }, { "epoch": 17.708196721311474, "grad_norm": 6.5297770500183105, "learning_rate": 1.8886687714742665e-05, "loss": 1.3799, "step": 5401 }, { "epoch": 17.711475409836066, "grad_norm": 11.98838996887207, "learning_rate": 1.888620073669532e-05, "loss": 1.5784, "step": 5402 }, { "epoch": 17.714754098360658, "grad_norm": 7.078723907470703, "learning_rate": 1.888571365844682e-05, "loss": 1.6548, "step": 5403 }, { "epoch": 17.718032786885246, "grad_norm": 9.271154403686523, "learning_rate": 1.888522648000266e-05, "loss": 1.5288, "step": 5404 }, { "epoch": 17.721311475409838, "grad_norm": 12.109509468078613, "learning_rate": 1.8884739201368337e-05, "loss": 1.5154, "step": 5405 }, { "epoch": 17.724590163934426, "grad_norm": 8.620353698730469, "learning_rate": 1.888425182254934e-05, "loss": 1.4368, "step": 5406 }, { "epoch": 17.727868852459018, "grad_norm": 12.012712478637695, "learning_rate": 1.8883764343551165e-05, "loss": 1.5652, "step": 5407 }, { "epoch": 17.731147540983606, "grad_norm": 8.67398738861084, "learning_rate": 1.8883276764379307e-05, "loss": 1.4141, "step": 5408 }, { "epoch": 17.7344262295082, "grad_norm": 7.603647232055664, "learning_rate": 1.888278908503927e-05, "loss": 1.5498, "step": 5409 }, { "epoch": 17.737704918032787, "grad_norm": 12.480513572692871, "learning_rate": 1.888230130553655e-05, "loss": 1.4509, "step": 5410 }, { "epoch": 17.74098360655738, "grad_norm": 7.924636363983154, "learning_rate": 1.8881813425876648e-05, "loss": 1.7734, "step": 5411 }, { "epoch": 17.744262295081967, "grad_norm": 8.751424789428711, "learning_rate": 1.888132544606507e-05, "loss": 1.4106, "step": 5412 }, { "epoch": 17.74754098360656, "grad_norm": 8.775043487548828, "learning_rate": 1.8880837366107306e-05, "loss": 1.3899, "step": 5413 }, { "epoch": 17.750819672131147, "grad_norm": 9.057461738586426, "learning_rate": 1.888034918600887e-05, "loss": 1.438, "step": 5414 }, { "epoch": 17.75409836065574, "grad_norm": 8.478311538696289, "learning_rate": 1.8879860905775264e-05, "loss": 1.5608, "step": 5415 }, { "epoch": 17.757377049180327, "grad_norm": 12.662374496459961, "learning_rate": 1.887937252541199e-05, "loss": 1.4475, "step": 5416 }, { "epoch": 17.76065573770492, "grad_norm": 7.745804309844971, "learning_rate": 1.8878884044924563e-05, "loss": 1.4187, "step": 5417 }, { "epoch": 17.763934426229508, "grad_norm": 10.588851928710938, "learning_rate": 1.8878395464318483e-05, "loss": 1.3521, "step": 5418 }, { "epoch": 17.7672131147541, "grad_norm": 9.756278991699219, "learning_rate": 1.8877906783599267e-05, "loss": 1.3113, "step": 5419 }, { "epoch": 17.770491803278688, "grad_norm": 6.590897083282471, "learning_rate": 1.887741800277242e-05, "loss": 1.5239, "step": 5420 }, { "epoch": 17.77377049180328, "grad_norm": 8.567337036132812, "learning_rate": 1.887692912184345e-05, "loss": 1.4797, "step": 5421 }, { "epoch": 17.777049180327868, "grad_norm": 9.597769737243652, "learning_rate": 1.8876440140817883e-05, "loss": 1.5154, "step": 5422 }, { "epoch": 17.78032786885246, "grad_norm": 7.523346900939941, "learning_rate": 1.887595105970122e-05, "loss": 1.583, "step": 5423 }, { "epoch": 17.78360655737705, "grad_norm": 8.235913276672363, "learning_rate": 1.8875461878498977e-05, "loss": 1.3433, "step": 5424 }, { "epoch": 17.78688524590164, "grad_norm": 10.22326374053955, "learning_rate": 1.8874972597216678e-05, "loss": 1.5967, "step": 5425 }, { "epoch": 17.79016393442623, "grad_norm": 7.9751691818237305, "learning_rate": 1.8874483215859835e-05, "loss": 1.699, "step": 5426 }, { "epoch": 17.79344262295082, "grad_norm": 9.099581718444824, "learning_rate": 1.887399373443396e-05, "loss": 1.4839, "step": 5427 }, { "epoch": 17.79672131147541, "grad_norm": 8.594701766967773, "learning_rate": 1.8873504152944585e-05, "loss": 1.5083, "step": 5428 }, { "epoch": 17.8, "grad_norm": 8.43868637084961, "learning_rate": 1.8873014471397225e-05, "loss": 1.2346, "step": 5429 }, { "epoch": 17.80327868852459, "grad_norm": 131.1882781982422, "learning_rate": 1.88725246897974e-05, "loss": 1.5332, "step": 5430 }, { "epoch": 17.80655737704918, "grad_norm": 9.15156364440918, "learning_rate": 1.8872034808150633e-05, "loss": 1.3899, "step": 5431 }, { "epoch": 17.80983606557377, "grad_norm": 27.948877334594727, "learning_rate": 1.8871544826462448e-05, "loss": 1.7637, "step": 5432 }, { "epoch": 17.81311475409836, "grad_norm": 6.339897155761719, "learning_rate": 1.8871054744738374e-05, "loss": 1.3887, "step": 5433 }, { "epoch": 17.81639344262295, "grad_norm": 8.768489837646484, "learning_rate": 1.887056456298393e-05, "loss": 1.5591, "step": 5434 }, { "epoch": 17.81967213114754, "grad_norm": 8.144115447998047, "learning_rate": 1.8870074281204652e-05, "loss": 1.4651, "step": 5435 }, { "epoch": 17.82295081967213, "grad_norm": 7.399721622467041, "learning_rate": 1.886958389940606e-05, "loss": 1.3872, "step": 5436 }, { "epoch": 17.82622950819672, "grad_norm": 14.376477241516113, "learning_rate": 1.886909341759369e-05, "loss": 1.5635, "step": 5437 }, { "epoch": 17.82950819672131, "grad_norm": 7.857174396514893, "learning_rate": 1.886860283577307e-05, "loss": 1.5845, "step": 5438 }, { "epoch": 17.832786885245902, "grad_norm": 8.129916191101074, "learning_rate": 1.8868112153949735e-05, "loss": 1.4443, "step": 5439 }, { "epoch": 17.83606557377049, "grad_norm": 14.022039413452148, "learning_rate": 1.886762137212921e-05, "loss": 1.5563, "step": 5440 }, { "epoch": 17.839344262295082, "grad_norm": 5.691249847412109, "learning_rate": 1.8867130490317036e-05, "loss": 1.6787, "step": 5441 }, { "epoch": 17.84262295081967, "grad_norm": 17.75613784790039, "learning_rate": 1.8866639508518744e-05, "loss": 1.4517, "step": 5442 }, { "epoch": 17.845901639344262, "grad_norm": 8.10845947265625, "learning_rate": 1.8866148426739878e-05, "loss": 1.4099, "step": 5443 }, { "epoch": 17.84918032786885, "grad_norm": 11.013771057128906, "learning_rate": 1.8865657244985964e-05, "loss": 1.4856, "step": 5444 }, { "epoch": 17.852459016393443, "grad_norm": 9.783138275146484, "learning_rate": 1.8865165963262552e-05, "loss": 1.4795, "step": 5445 }, { "epoch": 17.855737704918035, "grad_norm": 10.289151191711426, "learning_rate": 1.886467458157517e-05, "loss": 1.4561, "step": 5446 }, { "epoch": 17.859016393442623, "grad_norm": 14.491978645324707, "learning_rate": 1.886418309992937e-05, "loss": 1.4524, "step": 5447 }, { "epoch": 17.862295081967215, "grad_norm": 16.08396339416504, "learning_rate": 1.8863691518330688e-05, "loss": 1.427, "step": 5448 }, { "epoch": 17.865573770491803, "grad_norm": 13.941269874572754, "learning_rate": 1.886319983678467e-05, "loss": 1.217, "step": 5449 }, { "epoch": 17.868852459016395, "grad_norm": 10.23324203491211, "learning_rate": 1.8862708055296852e-05, "loss": 1.5022, "step": 5450 }, { "epoch": 17.872131147540983, "grad_norm": 17.53517723083496, "learning_rate": 1.8862216173872792e-05, "loss": 1.395, "step": 5451 }, { "epoch": 17.875409836065575, "grad_norm": 12.031365394592285, "learning_rate": 1.8861724192518027e-05, "loss": 1.5754, "step": 5452 }, { "epoch": 17.878688524590164, "grad_norm": 14.208266258239746, "learning_rate": 1.886123211123811e-05, "loss": 1.592, "step": 5453 }, { "epoch": 17.881967213114756, "grad_norm": 10.500535011291504, "learning_rate": 1.8860739930038585e-05, "loss": 1.5281, "step": 5454 }, { "epoch": 17.885245901639344, "grad_norm": 7.213810920715332, "learning_rate": 1.8860247648925007e-05, "loss": 1.5002, "step": 5455 }, { "epoch": 17.888524590163936, "grad_norm": 11.626531600952148, "learning_rate": 1.8859755267902923e-05, "loss": 1.3206, "step": 5456 }, { "epoch": 17.891803278688524, "grad_norm": 10.00550651550293, "learning_rate": 1.8859262786977888e-05, "loss": 1.3888, "step": 5457 }, { "epoch": 17.895081967213116, "grad_norm": 12.608622550964355, "learning_rate": 1.8858770206155454e-05, "loss": 1.438, "step": 5458 }, { "epoch": 17.898360655737704, "grad_norm": 15.045890808105469, "learning_rate": 1.8858277525441174e-05, "loss": 1.4402, "step": 5459 }, { "epoch": 17.901639344262296, "grad_norm": 18.999332427978516, "learning_rate": 1.8857784744840605e-05, "loss": 1.4177, "step": 5460 }, { "epoch": 17.904918032786885, "grad_norm": 10.215024948120117, "learning_rate": 1.88572918643593e-05, "loss": 1.3948, "step": 5461 }, { "epoch": 17.908196721311477, "grad_norm": 14.452589988708496, "learning_rate": 1.885679888400282e-05, "loss": 1.5393, "step": 5462 }, { "epoch": 17.911475409836065, "grad_norm": 23.534706115722656, "learning_rate": 1.8856305803776728e-05, "loss": 1.6277, "step": 5463 }, { "epoch": 17.914754098360657, "grad_norm": 9.61208724975586, "learning_rate": 1.8855812623686577e-05, "loss": 1.5151, "step": 5464 }, { "epoch": 17.918032786885245, "grad_norm": 9.851929664611816, "learning_rate": 1.8855319343737934e-05, "loss": 1.4626, "step": 5465 }, { "epoch": 17.921311475409837, "grad_norm": 8.11524772644043, "learning_rate": 1.8854825963936355e-05, "loss": 1.5415, "step": 5466 }, { "epoch": 17.924590163934425, "grad_norm": 8.620658874511719, "learning_rate": 1.885433248428741e-05, "loss": 1.5645, "step": 5467 }, { "epoch": 17.927868852459017, "grad_norm": 9.016486167907715, "learning_rate": 1.8853838904796657e-05, "loss": 1.488, "step": 5468 }, { "epoch": 17.931147540983606, "grad_norm": 7.775198936462402, "learning_rate": 1.8853345225469665e-05, "loss": 1.5894, "step": 5469 }, { "epoch": 17.934426229508198, "grad_norm": 8.743369102478027, "learning_rate": 1.8852851446312e-05, "loss": 1.4268, "step": 5470 }, { "epoch": 17.937704918032786, "grad_norm": 9.275167465209961, "learning_rate": 1.8852357567329232e-05, "loss": 1.5127, "step": 5471 }, { "epoch": 17.940983606557378, "grad_norm": 7.187248229980469, "learning_rate": 1.8851863588526932e-05, "loss": 1.3223, "step": 5472 }, { "epoch": 17.944262295081966, "grad_norm": 16.244348526000977, "learning_rate": 1.885136950991066e-05, "loss": 1.6587, "step": 5473 }, { "epoch": 17.947540983606558, "grad_norm": 8.32331657409668, "learning_rate": 1.8850875331485996e-05, "loss": 1.3984, "step": 5474 }, { "epoch": 17.950819672131146, "grad_norm": 18.008821487426758, "learning_rate": 1.8850381053258507e-05, "loss": 1.5347, "step": 5475 }, { "epoch": 17.95409836065574, "grad_norm": 6.635096073150635, "learning_rate": 1.8849886675233777e-05, "loss": 1.4856, "step": 5476 }, { "epoch": 17.957377049180327, "grad_norm": 14.423796653747559, "learning_rate": 1.8849392197417367e-05, "loss": 1.5737, "step": 5477 }, { "epoch": 17.96065573770492, "grad_norm": 10.632131576538086, "learning_rate": 1.884889761981486e-05, "loss": 1.5076, "step": 5478 }, { "epoch": 17.963934426229507, "grad_norm": 24.568811416625977, "learning_rate": 1.884840294243183e-05, "loss": 1.4294, "step": 5479 }, { "epoch": 17.9672131147541, "grad_norm": 9.922660827636719, "learning_rate": 1.884790816527386e-05, "loss": 1.6782, "step": 5480 }, { "epoch": 17.970491803278687, "grad_norm": 11.345142364501953, "learning_rate": 1.8847413288346526e-05, "loss": 1.3181, "step": 5481 }, { "epoch": 17.97377049180328, "grad_norm": 34.85976791381836, "learning_rate": 1.8846918311655405e-05, "loss": 1.4277, "step": 5482 }, { "epoch": 17.977049180327867, "grad_norm": 9.096114158630371, "learning_rate": 1.8846423235206087e-05, "loss": 1.5813, "step": 5483 }, { "epoch": 17.98032786885246, "grad_norm": 12.855316162109375, "learning_rate": 1.8845928059004144e-05, "loss": 1.3499, "step": 5484 }, { "epoch": 17.983606557377048, "grad_norm": 13.50810432434082, "learning_rate": 1.8845432783055165e-05, "loss": 1.5171, "step": 5485 }, { "epoch": 17.98688524590164, "grad_norm": 9.491978645324707, "learning_rate": 1.8844937407364735e-05, "loss": 1.3606, "step": 5486 }, { "epoch": 17.990163934426228, "grad_norm": 11.315908432006836, "learning_rate": 1.884444193193844e-05, "loss": 1.4265, "step": 5487 }, { "epoch": 17.99344262295082, "grad_norm": 9.360760688781738, "learning_rate": 1.8843946356781865e-05, "loss": 1.5771, "step": 5488 }, { "epoch": 17.99672131147541, "grad_norm": 7.611891269683838, "learning_rate": 1.8843450681900605e-05, "loss": 1.4956, "step": 5489 }, { "epoch": 18.0, "grad_norm": 10.7274169921875, "learning_rate": 1.8842954907300236e-05, "loss": 1.2266, "step": 5490 }, { "epoch": 18.003278688524592, "grad_norm": 16.449499130249023, "learning_rate": 1.884245903298636e-05, "loss": 1.2753, "step": 5491 }, { "epoch": 18.00655737704918, "grad_norm": 11.767072677612305, "learning_rate": 1.8841963058964564e-05, "loss": 1.2996, "step": 5492 }, { "epoch": 18.009836065573772, "grad_norm": 14.281088829040527, "learning_rate": 1.8841466985240443e-05, "loss": 1.3496, "step": 5493 }, { "epoch": 18.01311475409836, "grad_norm": 8.327179908752441, "learning_rate": 1.8840970811819588e-05, "loss": 1.4471, "step": 5494 }, { "epoch": 18.016393442622952, "grad_norm": 11.637178421020508, "learning_rate": 1.884047453870759e-05, "loss": 1.3616, "step": 5495 }, { "epoch": 18.01967213114754, "grad_norm": 14.830780982971191, "learning_rate": 1.8839978165910057e-05, "loss": 1.5068, "step": 5496 }, { "epoch": 18.022950819672133, "grad_norm": 7.52707576751709, "learning_rate": 1.8839481693432575e-05, "loss": 1.3105, "step": 5497 }, { "epoch": 18.02622950819672, "grad_norm": 9.947242736816406, "learning_rate": 1.8838985121280745e-05, "loss": 1.3821, "step": 5498 }, { "epoch": 18.029508196721313, "grad_norm": 10.48604679107666, "learning_rate": 1.883848844946017e-05, "loss": 1.3953, "step": 5499 }, { "epoch": 18.0327868852459, "grad_norm": 9.645222663879395, "learning_rate": 1.8837991677976447e-05, "loss": 1.4927, "step": 5500 }, { "epoch": 18.036065573770493, "grad_norm": 13.552383422851562, "learning_rate": 1.883749480683518e-05, "loss": 1.3389, "step": 5501 }, { "epoch": 18.03934426229508, "grad_norm": 9.168352127075195, "learning_rate": 1.8836997836041968e-05, "loss": 1.3606, "step": 5502 }, { "epoch": 18.042622950819673, "grad_norm": 11.387288093566895, "learning_rate": 1.883650076560242e-05, "loss": 1.5442, "step": 5503 }, { "epoch": 18.04590163934426, "grad_norm": 9.171950340270996, "learning_rate": 1.8836003595522135e-05, "loss": 1.427, "step": 5504 }, { "epoch": 18.049180327868854, "grad_norm": 10.48697280883789, "learning_rate": 1.8835506325806726e-05, "loss": 1.2971, "step": 5505 }, { "epoch": 18.052459016393442, "grad_norm": 11.09500503540039, "learning_rate": 1.8835008956461795e-05, "loss": 1.3918, "step": 5506 }, { "epoch": 18.055737704918034, "grad_norm": 8.430598258972168, "learning_rate": 1.883451148749295e-05, "loss": 1.5095, "step": 5507 }, { "epoch": 18.059016393442622, "grad_norm": 8.976431846618652, "learning_rate": 1.8834013918905805e-05, "loss": 1.4531, "step": 5508 }, { "epoch": 18.062295081967214, "grad_norm": 19.47614288330078, "learning_rate": 1.8833516250705966e-05, "loss": 1.4854, "step": 5509 }, { "epoch": 18.065573770491802, "grad_norm": 24.304824829101562, "learning_rate": 1.8833018482899047e-05, "loss": 1.3589, "step": 5510 }, { "epoch": 18.068852459016394, "grad_norm": 9.57711410522461, "learning_rate": 1.883252061549066e-05, "loss": 1.241, "step": 5511 }, { "epoch": 18.072131147540983, "grad_norm": 6.595380783081055, "learning_rate": 1.883202264848642e-05, "loss": 1.4502, "step": 5512 }, { "epoch": 18.075409836065575, "grad_norm": 10.44453239440918, "learning_rate": 1.8831524581891946e-05, "loss": 1.5249, "step": 5513 }, { "epoch": 18.078688524590163, "grad_norm": 9.003952026367188, "learning_rate": 1.8831026415712845e-05, "loss": 1.6339, "step": 5514 }, { "epoch": 18.081967213114755, "grad_norm": 9.459230422973633, "learning_rate": 1.883052814995474e-05, "loss": 1.3599, "step": 5515 }, { "epoch": 18.085245901639343, "grad_norm": 8.118230819702148, "learning_rate": 1.8830029784623248e-05, "loss": 1.3054, "step": 5516 }, { "epoch": 18.088524590163935, "grad_norm": 16.878360748291016, "learning_rate": 1.8829531319723992e-05, "loss": 1.5828, "step": 5517 }, { "epoch": 18.091803278688523, "grad_norm": 8.625945091247559, "learning_rate": 1.8829032755262585e-05, "loss": 1.4165, "step": 5518 }, { "epoch": 18.095081967213115, "grad_norm": 13.520465850830078, "learning_rate": 1.882853409124466e-05, "loss": 1.3408, "step": 5519 }, { "epoch": 18.098360655737704, "grad_norm": 8.76867389678955, "learning_rate": 1.882803532767583e-05, "loss": 1.2998, "step": 5520 }, { "epoch": 18.101639344262296, "grad_norm": 13.646285057067871, "learning_rate": 1.8827536464561726e-05, "loss": 1.3064, "step": 5521 }, { "epoch": 18.104918032786884, "grad_norm": 8.153837203979492, "learning_rate": 1.8827037501907966e-05, "loss": 1.1821, "step": 5522 }, { "epoch": 18.108196721311476, "grad_norm": 9.708085060119629, "learning_rate": 1.882653843972018e-05, "loss": 1.4355, "step": 5523 }, { "epoch": 18.111475409836064, "grad_norm": 7.542160511016846, "learning_rate": 1.8826039278004e-05, "loss": 1.2438, "step": 5524 }, { "epoch": 18.114754098360656, "grad_norm": 7.691033363342285, "learning_rate": 1.8825540016765046e-05, "loss": 1.4392, "step": 5525 }, { "epoch": 18.118032786885244, "grad_norm": 8.203259468078613, "learning_rate": 1.8825040656008955e-05, "loss": 1.3003, "step": 5526 }, { "epoch": 18.121311475409836, "grad_norm": 8.743029594421387, "learning_rate": 1.8824541195741353e-05, "loss": 1.3286, "step": 5527 }, { "epoch": 18.124590163934425, "grad_norm": 8.48466968536377, "learning_rate": 1.8824041635967875e-05, "loss": 1.5188, "step": 5528 }, { "epoch": 18.127868852459017, "grad_norm": 7.287288188934326, "learning_rate": 1.8823541976694155e-05, "loss": 1.5283, "step": 5529 }, { "epoch": 18.131147540983605, "grad_norm": 10.20109748840332, "learning_rate": 1.8823042217925823e-05, "loss": 1.2048, "step": 5530 }, { "epoch": 18.134426229508197, "grad_norm": 7.009385585784912, "learning_rate": 1.8822542359668515e-05, "loss": 1.4807, "step": 5531 }, { "epoch": 18.137704918032785, "grad_norm": 10.411758422851562, "learning_rate": 1.882204240192787e-05, "loss": 1.3892, "step": 5532 }, { "epoch": 18.140983606557377, "grad_norm": 10.075823783874512, "learning_rate": 1.8821542344709525e-05, "loss": 1.3384, "step": 5533 }, { "epoch": 18.14426229508197, "grad_norm": 8.860199928283691, "learning_rate": 1.8821042188019115e-05, "loss": 1.3364, "step": 5534 }, { "epoch": 18.147540983606557, "grad_norm": 17.839262008666992, "learning_rate": 1.8820541931862287e-05, "loss": 1.5088, "step": 5535 }, { "epoch": 18.15081967213115, "grad_norm": 9.686392784118652, "learning_rate": 1.8820041576244678e-05, "loss": 1.5342, "step": 5536 }, { "epoch": 18.154098360655738, "grad_norm": 54.386436462402344, "learning_rate": 1.8819541121171926e-05, "loss": 1.3268, "step": 5537 }, { "epoch": 18.15737704918033, "grad_norm": 8.716695785522461, "learning_rate": 1.8819040566649677e-05, "loss": 1.5435, "step": 5538 }, { "epoch": 18.160655737704918, "grad_norm": 10.299678802490234, "learning_rate": 1.8818539912683577e-05, "loss": 1.5498, "step": 5539 }, { "epoch": 18.16393442622951, "grad_norm": 7.830200672149658, "learning_rate": 1.8818039159279272e-05, "loss": 1.5674, "step": 5540 }, { "epoch": 18.167213114754098, "grad_norm": 11.760506629943848, "learning_rate": 1.881753830644241e-05, "loss": 1.5916, "step": 5541 }, { "epoch": 18.17049180327869, "grad_norm": 7.093559265136719, "learning_rate": 1.881703735417863e-05, "loss": 1.3137, "step": 5542 }, { "epoch": 18.17377049180328, "grad_norm": 6.56692361831665, "learning_rate": 1.8816536302493586e-05, "loss": 1.5085, "step": 5543 }, { "epoch": 18.17704918032787, "grad_norm": 26.433578491210938, "learning_rate": 1.8816035151392933e-05, "loss": 1.2632, "step": 5544 }, { "epoch": 18.18032786885246, "grad_norm": 6.624031066894531, "learning_rate": 1.8815533900882315e-05, "loss": 1.4924, "step": 5545 }, { "epoch": 18.18360655737705, "grad_norm": 11.222123146057129, "learning_rate": 1.8815032550967386e-05, "loss": 1.4607, "step": 5546 }, { "epoch": 18.18688524590164, "grad_norm": 11.011024475097656, "learning_rate": 1.8814531101653802e-05, "loss": 1.2858, "step": 5547 }, { "epoch": 18.19016393442623, "grad_norm": 9.10645580291748, "learning_rate": 1.8814029552947213e-05, "loss": 1.2233, "step": 5548 }, { "epoch": 18.19344262295082, "grad_norm": 15.741768836975098, "learning_rate": 1.8813527904853276e-05, "loss": 1.2764, "step": 5549 }, { "epoch": 18.19672131147541, "grad_norm": 8.731093406677246, "learning_rate": 1.881302615737765e-05, "loss": 1.3689, "step": 5550 }, { "epoch": 18.2, "grad_norm": 11.640625, "learning_rate": 1.881252431052599e-05, "loss": 1.3718, "step": 5551 }, { "epoch": 18.20327868852459, "grad_norm": 8.522912979125977, "learning_rate": 1.8812022364303953e-05, "loss": 1.4915, "step": 5552 }, { "epoch": 18.20655737704918, "grad_norm": 12.428662300109863, "learning_rate": 1.8811520318717206e-05, "loss": 1.2537, "step": 5553 }, { "epoch": 18.20983606557377, "grad_norm": 7.581199645996094, "learning_rate": 1.8811018173771404e-05, "loss": 1.4846, "step": 5554 }, { "epoch": 18.21311475409836, "grad_norm": 10.607147216796875, "learning_rate": 1.881051592947221e-05, "loss": 1.512, "step": 5555 }, { "epoch": 18.21639344262295, "grad_norm": 7.008500099182129, "learning_rate": 1.881001358582529e-05, "loss": 1.4185, "step": 5556 }, { "epoch": 18.21967213114754, "grad_norm": 8.0316801071167, "learning_rate": 1.8809511142836304e-05, "loss": 1.4675, "step": 5557 }, { "epoch": 18.222950819672132, "grad_norm": 10.719928741455078, "learning_rate": 1.8809008600510924e-05, "loss": 1.563, "step": 5558 }, { "epoch": 18.22622950819672, "grad_norm": 7.658203601837158, "learning_rate": 1.880850595885481e-05, "loss": 1.4563, "step": 5559 }, { "epoch": 18.229508196721312, "grad_norm": 7.23634672164917, "learning_rate": 1.8808003217873633e-05, "loss": 1.3596, "step": 5560 }, { "epoch": 18.2327868852459, "grad_norm": 8.198729515075684, "learning_rate": 1.8807500377573063e-05, "loss": 1.5452, "step": 5561 }, { "epoch": 18.236065573770492, "grad_norm": 48.913856506347656, "learning_rate": 1.8806997437958768e-05, "loss": 1.4875, "step": 5562 }, { "epoch": 18.23934426229508, "grad_norm": 9.775596618652344, "learning_rate": 1.8806494399036422e-05, "loss": 1.3762, "step": 5563 }, { "epoch": 18.242622950819673, "grad_norm": 6.642462730407715, "learning_rate": 1.880599126081169e-05, "loss": 1.5142, "step": 5564 }, { "epoch": 18.24590163934426, "grad_norm": 11.69786262512207, "learning_rate": 1.8805488023290254e-05, "loss": 1.5179, "step": 5565 }, { "epoch": 18.249180327868853, "grad_norm": 9.692583084106445, "learning_rate": 1.8804984686477788e-05, "loss": 1.3325, "step": 5566 }, { "epoch": 18.25245901639344, "grad_norm": 9.153143882751465, "learning_rate": 1.8804481250379962e-05, "loss": 1.3997, "step": 5567 }, { "epoch": 18.255737704918033, "grad_norm": 7.431546688079834, "learning_rate": 1.8803977715002454e-05, "loss": 1.1321, "step": 5568 }, { "epoch": 18.25901639344262, "grad_norm": 9.099260330200195, "learning_rate": 1.8803474080350944e-05, "loss": 1.2843, "step": 5569 }, { "epoch": 18.262295081967213, "grad_norm": 16.316171646118164, "learning_rate": 1.880297034643111e-05, "loss": 1.4468, "step": 5570 }, { "epoch": 18.2655737704918, "grad_norm": 13.786361694335938, "learning_rate": 1.8802466513248635e-05, "loss": 1.4802, "step": 5571 }, { "epoch": 18.268852459016394, "grad_norm": 10.968769073486328, "learning_rate": 1.8801962580809198e-05, "loss": 1.6091, "step": 5572 }, { "epoch": 18.272131147540982, "grad_norm": 10.186875343322754, "learning_rate": 1.8801458549118478e-05, "loss": 1.2599, "step": 5573 }, { "epoch": 18.275409836065574, "grad_norm": 11.925747871398926, "learning_rate": 1.880095441818216e-05, "loss": 1.5667, "step": 5574 }, { "epoch": 18.278688524590162, "grad_norm": 11.600239753723145, "learning_rate": 1.8800450188005936e-05, "loss": 1.6827, "step": 5575 }, { "epoch": 18.281967213114754, "grad_norm": 7.925594806671143, "learning_rate": 1.879994585859548e-05, "loss": 1.4198, "step": 5576 }, { "epoch": 18.285245901639342, "grad_norm": 7.65798807144165, "learning_rate": 1.879944142995649e-05, "loss": 1.2085, "step": 5577 }, { "epoch": 18.288524590163934, "grad_norm": 8.052207946777344, "learning_rate": 1.8798936902094648e-05, "loss": 1.3098, "step": 5578 }, { "epoch": 18.291803278688526, "grad_norm": 9.383814811706543, "learning_rate": 1.8798432275015644e-05, "loss": 1.4385, "step": 5579 }, { "epoch": 18.295081967213115, "grad_norm": 9.28731918334961, "learning_rate": 1.8797927548725164e-05, "loss": 1.4651, "step": 5580 }, { "epoch": 18.298360655737707, "grad_norm": 10.355934143066406, "learning_rate": 1.8797422723228908e-05, "loss": 1.3544, "step": 5581 }, { "epoch": 18.301639344262295, "grad_norm": 7.616357326507568, "learning_rate": 1.8796917798532562e-05, "loss": 1.4009, "step": 5582 }, { "epoch": 18.304918032786887, "grad_norm": 9.75713062286377, "learning_rate": 1.879641277464182e-05, "loss": 1.3472, "step": 5583 }, { "epoch": 18.308196721311475, "grad_norm": 9.117118835449219, "learning_rate": 1.8795907651562378e-05, "loss": 1.4661, "step": 5584 }, { "epoch": 18.311475409836067, "grad_norm": 10.90739631652832, "learning_rate": 1.8795402429299935e-05, "loss": 1.3328, "step": 5585 }, { "epoch": 18.314754098360655, "grad_norm": 13.846322059631348, "learning_rate": 1.8794897107860183e-05, "loss": 1.4604, "step": 5586 }, { "epoch": 18.318032786885247, "grad_norm": 7.853305816650391, "learning_rate": 1.8794391687248824e-05, "loss": 1.4729, "step": 5587 }, { "epoch": 18.321311475409836, "grad_norm": 6.799876689910889, "learning_rate": 1.8793886167471548e-05, "loss": 1.5591, "step": 5588 }, { "epoch": 18.324590163934428, "grad_norm": 10.186819076538086, "learning_rate": 1.879338054853407e-05, "loss": 1.5132, "step": 5589 }, { "epoch": 18.327868852459016, "grad_norm": 7.614936351776123, "learning_rate": 1.879287483044208e-05, "loss": 1.3983, "step": 5590 }, { "epoch": 18.331147540983608, "grad_norm": 6.98232889175415, "learning_rate": 1.879236901320128e-05, "loss": 1.4763, "step": 5591 }, { "epoch": 18.334426229508196, "grad_norm": 28.448707580566406, "learning_rate": 1.8791863096817385e-05, "loss": 1.4707, "step": 5592 }, { "epoch": 18.337704918032788, "grad_norm": 6.886091232299805, "learning_rate": 1.8791357081296088e-05, "loss": 1.4775, "step": 5593 }, { "epoch": 18.340983606557376, "grad_norm": 6.886336326599121, "learning_rate": 1.87908509666431e-05, "loss": 1.4275, "step": 5594 }, { "epoch": 18.34426229508197, "grad_norm": 11.51620101928711, "learning_rate": 1.8790344752864126e-05, "loss": 1.2739, "step": 5595 }, { "epoch": 18.347540983606557, "grad_norm": 7.564685821533203, "learning_rate": 1.8789838439964876e-05, "loss": 1.3254, "step": 5596 }, { "epoch": 18.35081967213115, "grad_norm": 14.459113121032715, "learning_rate": 1.8789332027951058e-05, "loss": 1.408, "step": 5597 }, { "epoch": 18.354098360655737, "grad_norm": 9.55578899383545, "learning_rate": 1.8788825516828385e-05, "loss": 1.5586, "step": 5598 }, { "epoch": 18.35737704918033, "grad_norm": 7.375198841094971, "learning_rate": 1.8788318906602566e-05, "loss": 1.4888, "step": 5599 }, { "epoch": 18.360655737704917, "grad_norm": 7.951760292053223, "learning_rate": 1.8787812197279313e-05, "loss": 1.4077, "step": 5600 }, { "epoch": 18.36393442622951, "grad_norm": 9.862276077270508, "learning_rate": 1.8787305388864338e-05, "loss": 1.3311, "step": 5601 }, { "epoch": 18.367213114754097, "grad_norm": 8.361034393310547, "learning_rate": 1.878679848136336e-05, "loss": 1.6443, "step": 5602 }, { "epoch": 18.37049180327869, "grad_norm": 9.613348007202148, "learning_rate": 1.8786291474782095e-05, "loss": 1.3293, "step": 5603 }, { "epoch": 18.373770491803278, "grad_norm": 8.85464859008789, "learning_rate": 1.8785784369126254e-05, "loss": 1.4985, "step": 5604 }, { "epoch": 18.37704918032787, "grad_norm": 7.333346366882324, "learning_rate": 1.878527716440156e-05, "loss": 1.5337, "step": 5605 }, { "epoch": 18.380327868852458, "grad_norm": 6.410733222961426, "learning_rate": 1.8784769860613737e-05, "loss": 1.3528, "step": 5606 }, { "epoch": 18.38360655737705, "grad_norm": 7.674012660980225, "learning_rate": 1.8784262457768496e-05, "loss": 1.4385, "step": 5607 }, { "epoch": 18.386885245901638, "grad_norm": 27.61484146118164, "learning_rate": 1.8783754955871564e-05, "loss": 1.3607, "step": 5608 }, { "epoch": 18.39016393442623, "grad_norm": 9.0385160446167, "learning_rate": 1.878324735492866e-05, "loss": 1.4773, "step": 5609 }, { "epoch": 18.39344262295082, "grad_norm": 9.426460266113281, "learning_rate": 1.878273965494551e-05, "loss": 1.3174, "step": 5610 }, { "epoch": 18.39672131147541, "grad_norm": 7.279082775115967, "learning_rate": 1.878223185592784e-05, "loss": 1.3054, "step": 5611 }, { "epoch": 18.4, "grad_norm": 6.6554484367370605, "learning_rate": 1.8781723957881374e-05, "loss": 1.4744, "step": 5612 }, { "epoch": 18.40327868852459, "grad_norm": 8.68543815612793, "learning_rate": 1.8781215960811837e-05, "loss": 1.3123, "step": 5613 }, { "epoch": 18.40655737704918, "grad_norm": 10.399298667907715, "learning_rate": 1.8780707864724966e-05, "loss": 1.4629, "step": 5614 }, { "epoch": 18.40983606557377, "grad_norm": 7.6685638427734375, "learning_rate": 1.878019966962648e-05, "loss": 1.4746, "step": 5615 }, { "epoch": 18.41311475409836, "grad_norm": 7.44167423248291, "learning_rate": 1.8779691375522114e-05, "loss": 1.6228, "step": 5616 }, { "epoch": 18.41639344262295, "grad_norm": 6.709864616394043, "learning_rate": 1.87791829824176e-05, "loss": 1.3286, "step": 5617 }, { "epoch": 18.41967213114754, "grad_norm": 10.57207202911377, "learning_rate": 1.8778674490318668e-05, "loss": 1.5701, "step": 5618 }, { "epoch": 18.42295081967213, "grad_norm": 8.821362495422363, "learning_rate": 1.8778165899231055e-05, "loss": 1.4402, "step": 5619 }, { "epoch": 18.42622950819672, "grad_norm": 8.031661987304688, "learning_rate": 1.8777657209160492e-05, "loss": 1.6333, "step": 5620 }, { "epoch": 18.42950819672131, "grad_norm": 6.494778633117676, "learning_rate": 1.877714842011272e-05, "loss": 1.4749, "step": 5621 }, { "epoch": 18.432786885245903, "grad_norm": 6.850954055786133, "learning_rate": 1.8776639532093476e-05, "loss": 1.6934, "step": 5622 }, { "epoch": 18.43606557377049, "grad_norm": 8.750444412231445, "learning_rate": 1.877613054510849e-05, "loss": 1.5122, "step": 5623 }, { "epoch": 18.439344262295084, "grad_norm": 8.934494972229004, "learning_rate": 1.8775621459163514e-05, "loss": 1.6108, "step": 5624 }, { "epoch": 18.442622950819672, "grad_norm": 9.499272346496582, "learning_rate": 1.8775112274264276e-05, "loss": 1.4319, "step": 5625 }, { "epoch": 18.445901639344264, "grad_norm": 11.513618469238281, "learning_rate": 1.8774602990416527e-05, "loss": 1.4253, "step": 5626 }, { "epoch": 18.449180327868852, "grad_norm": 7.823112487792969, "learning_rate": 1.8774093607626005e-05, "loss": 1.3997, "step": 5627 }, { "epoch": 18.452459016393444, "grad_norm": 9.679957389831543, "learning_rate": 1.8773584125898453e-05, "loss": 1.4014, "step": 5628 }, { "epoch": 18.455737704918032, "grad_norm": 7.58380126953125, "learning_rate": 1.8773074545239618e-05, "loss": 1.6343, "step": 5629 }, { "epoch": 18.459016393442624, "grad_norm": 30.883316040039062, "learning_rate": 1.8772564865655246e-05, "loss": 1.3574, "step": 5630 }, { "epoch": 18.462295081967213, "grad_norm": 8.232741355895996, "learning_rate": 1.8772055087151087e-05, "loss": 1.3696, "step": 5631 }, { "epoch": 18.465573770491805, "grad_norm": 7.231961727142334, "learning_rate": 1.877154520973288e-05, "loss": 1.4648, "step": 5632 }, { "epoch": 18.468852459016393, "grad_norm": 7.716813087463379, "learning_rate": 1.8771035233406386e-05, "loss": 1.4446, "step": 5633 }, { "epoch": 18.472131147540985, "grad_norm": 9.5672607421875, "learning_rate": 1.877052515817735e-05, "loss": 1.3855, "step": 5634 }, { "epoch": 18.475409836065573, "grad_norm": 6.954891204833984, "learning_rate": 1.8770014984051524e-05, "loss": 1.3384, "step": 5635 }, { "epoch": 18.478688524590165, "grad_norm": 7.383896350860596, "learning_rate": 1.876950471103466e-05, "loss": 1.5605, "step": 5636 }, { "epoch": 18.481967213114753, "grad_norm": 15.276644706726074, "learning_rate": 1.876899433913251e-05, "loss": 1.571, "step": 5637 }, { "epoch": 18.485245901639345, "grad_norm": 7.787586212158203, "learning_rate": 1.876848386835083e-05, "loss": 1.6084, "step": 5638 }, { "epoch": 18.488524590163934, "grad_norm": 7.40824031829834, "learning_rate": 1.8767973298695384e-05, "loss": 1.5249, "step": 5639 }, { "epoch": 18.491803278688526, "grad_norm": 8.570989608764648, "learning_rate": 1.876746263017192e-05, "loss": 1.4236, "step": 5640 }, { "epoch": 18.495081967213114, "grad_norm": 7.85215425491333, "learning_rate": 1.8766951862786196e-05, "loss": 1.3325, "step": 5641 }, { "epoch": 18.498360655737706, "grad_norm": 12.984559059143066, "learning_rate": 1.8766440996543976e-05, "loss": 1.5452, "step": 5642 }, { "epoch": 18.501639344262294, "grad_norm": 8.585888862609863, "learning_rate": 1.876593003145102e-05, "loss": 1.2546, "step": 5643 }, { "epoch": 18.504918032786886, "grad_norm": 9.069802284240723, "learning_rate": 1.8765418967513085e-05, "loss": 1.364, "step": 5644 }, { "epoch": 18.508196721311474, "grad_norm": 12.593541145324707, "learning_rate": 1.876490780473594e-05, "loss": 1.4946, "step": 5645 }, { "epoch": 18.511475409836066, "grad_norm": 8.422029495239258, "learning_rate": 1.876439654312535e-05, "loss": 1.4412, "step": 5646 }, { "epoch": 18.514754098360655, "grad_norm": 16.42731475830078, "learning_rate": 1.876388518268707e-05, "loss": 1.4612, "step": 5647 }, { "epoch": 18.518032786885247, "grad_norm": 7.870600700378418, "learning_rate": 1.8763373723426873e-05, "loss": 1.5383, "step": 5648 }, { "epoch": 18.521311475409835, "grad_norm": 7.094788551330566, "learning_rate": 1.876286216535053e-05, "loss": 1.4133, "step": 5649 }, { "epoch": 18.524590163934427, "grad_norm": 10.315038681030273, "learning_rate": 1.8762350508463804e-05, "loss": 1.5659, "step": 5650 }, { "epoch": 18.527868852459015, "grad_norm": 13.684473991394043, "learning_rate": 1.8761838752772462e-05, "loss": 1.3221, "step": 5651 }, { "epoch": 18.531147540983607, "grad_norm": 7.826561450958252, "learning_rate": 1.8761326898282283e-05, "loss": 1.5034, "step": 5652 }, { "epoch": 18.534426229508195, "grad_norm": 7.381956577301025, "learning_rate": 1.876081494499903e-05, "loss": 1.2964, "step": 5653 }, { "epoch": 18.537704918032787, "grad_norm": 8.29567813873291, "learning_rate": 1.876030289292848e-05, "loss": 1.4873, "step": 5654 }, { "epoch": 18.540983606557376, "grad_norm": 6.636821746826172, "learning_rate": 1.8759790742076406e-05, "loss": 1.6609, "step": 5655 }, { "epoch": 18.544262295081968, "grad_norm": 8.480496406555176, "learning_rate": 1.8759278492448586e-05, "loss": 1.3547, "step": 5656 }, { "epoch": 18.547540983606556, "grad_norm": 8.684870719909668, "learning_rate": 1.875876614405079e-05, "loss": 1.2522, "step": 5657 }, { "epoch": 18.550819672131148, "grad_norm": 7.29861307144165, "learning_rate": 1.8758253696888803e-05, "loss": 1.4705, "step": 5658 }, { "epoch": 18.554098360655736, "grad_norm": 7.641143321990967, "learning_rate": 1.8757741150968397e-05, "loss": 1.3269, "step": 5659 }, { "epoch": 18.557377049180328, "grad_norm": 15.744382858276367, "learning_rate": 1.8757228506295354e-05, "loss": 1.4509, "step": 5660 }, { "epoch": 18.560655737704916, "grad_norm": 14.286890983581543, "learning_rate": 1.8756715762875454e-05, "loss": 1.5532, "step": 5661 }, { "epoch": 18.56393442622951, "grad_norm": 7.3538618087768555, "learning_rate": 1.875620292071448e-05, "loss": 1.4875, "step": 5662 }, { "epoch": 18.567213114754097, "grad_norm": 6.856424808502197, "learning_rate": 1.8755689979818214e-05, "loss": 1.3774, "step": 5663 }, { "epoch": 18.57049180327869, "grad_norm": 7.619325637817383, "learning_rate": 1.875517694019244e-05, "loss": 1.3416, "step": 5664 }, { "epoch": 18.57377049180328, "grad_norm": 8.350316047668457, "learning_rate": 1.875466380184294e-05, "loss": 1.2885, "step": 5665 }, { "epoch": 18.57704918032787, "grad_norm": 9.715645790100098, "learning_rate": 1.8754150564775505e-05, "loss": 1.5186, "step": 5666 }, { "epoch": 18.58032786885246, "grad_norm": 8.8082914352417, "learning_rate": 1.875363722899592e-05, "loss": 1.334, "step": 5667 }, { "epoch": 18.58360655737705, "grad_norm": 7.007417678833008, "learning_rate": 1.8753123794509974e-05, "loss": 1.655, "step": 5668 }, { "epoch": 18.58688524590164, "grad_norm": 8.520768165588379, "learning_rate": 1.875261026132346e-05, "loss": 1.4807, "step": 5669 }, { "epoch": 18.59016393442623, "grad_norm": 7.404495716094971, "learning_rate": 1.875209662944216e-05, "loss": 1.4421, "step": 5670 }, { "epoch": 18.59344262295082, "grad_norm": 8.439949035644531, "learning_rate": 1.875158289887187e-05, "loss": 1.3687, "step": 5671 }, { "epoch": 18.59672131147541, "grad_norm": 11.428464889526367, "learning_rate": 1.875106906961839e-05, "loss": 1.3621, "step": 5672 }, { "epoch": 18.6, "grad_norm": 8.198221206665039, "learning_rate": 1.87505551416875e-05, "loss": 1.3269, "step": 5673 }, { "epoch": 18.60327868852459, "grad_norm": 36.2695198059082, "learning_rate": 1.8750041115085005e-05, "loss": 1.2432, "step": 5674 }, { "epoch": 18.60655737704918, "grad_norm": 8.099735260009766, "learning_rate": 1.87495269898167e-05, "loss": 1.2773, "step": 5675 }, { "epoch": 18.60983606557377, "grad_norm": 8.559089660644531, "learning_rate": 1.874901276588838e-05, "loss": 1.332, "step": 5676 }, { "epoch": 18.613114754098362, "grad_norm": 9.102998733520508, "learning_rate": 1.8748498443305846e-05, "loss": 1.6592, "step": 5677 }, { "epoch": 18.61639344262295, "grad_norm": 8.040339469909668, "learning_rate": 1.8747984022074892e-05, "loss": 1.3801, "step": 5678 }, { "epoch": 18.619672131147542, "grad_norm": 8.305706977844238, "learning_rate": 1.8747469502201326e-05, "loss": 1.4006, "step": 5679 }, { "epoch": 18.62295081967213, "grad_norm": 10.24997329711914, "learning_rate": 1.8746954883690944e-05, "loss": 1.4119, "step": 5680 }, { "epoch": 18.626229508196722, "grad_norm": 10.2232084274292, "learning_rate": 1.8746440166549554e-05, "loss": 1.3284, "step": 5681 }, { "epoch": 18.62950819672131, "grad_norm": 9.755813598632812, "learning_rate": 1.8745925350782955e-05, "loss": 1.3916, "step": 5682 }, { "epoch": 18.632786885245903, "grad_norm": 7.520615100860596, "learning_rate": 1.8745410436396954e-05, "loss": 1.335, "step": 5683 }, { "epoch": 18.63606557377049, "grad_norm": 8.197097778320312, "learning_rate": 1.8744895423397354e-05, "loss": 1.3652, "step": 5684 }, { "epoch": 18.639344262295083, "grad_norm": 7.388245105743408, "learning_rate": 1.8744380311789973e-05, "loss": 1.4927, "step": 5685 }, { "epoch": 18.64262295081967, "grad_norm": 9.085187911987305, "learning_rate": 1.874386510158061e-05, "loss": 1.4001, "step": 5686 }, { "epoch": 18.645901639344263, "grad_norm": 7.637387752532959, "learning_rate": 1.8743349792775074e-05, "loss": 1.4019, "step": 5687 }, { "epoch": 18.64918032786885, "grad_norm": 8.308878898620605, "learning_rate": 1.8742834385379177e-05, "loss": 1.322, "step": 5688 }, { "epoch": 18.652459016393443, "grad_norm": 8.533103942871094, "learning_rate": 1.8742318879398738e-05, "loss": 1.3726, "step": 5689 }, { "epoch": 18.65573770491803, "grad_norm": 8.373300552368164, "learning_rate": 1.8741803274839558e-05, "loss": 1.3457, "step": 5690 }, { "epoch": 18.659016393442624, "grad_norm": 7.964229583740234, "learning_rate": 1.874128757170746e-05, "loss": 1.4915, "step": 5691 }, { "epoch": 18.662295081967212, "grad_norm": 7.750272750854492, "learning_rate": 1.8740771770008256e-05, "loss": 1.4373, "step": 5692 }, { "epoch": 18.665573770491804, "grad_norm": 7.832116603851318, "learning_rate": 1.874025586974776e-05, "loss": 1.5549, "step": 5693 }, { "epoch": 18.668852459016392, "grad_norm": 9.33836555480957, "learning_rate": 1.873973987093179e-05, "loss": 1.5171, "step": 5694 }, { "epoch": 18.672131147540984, "grad_norm": 10.82204818725586, "learning_rate": 1.8739223773566173e-05, "loss": 1.2042, "step": 5695 }, { "epoch": 18.675409836065572, "grad_norm": 7.299262046813965, "learning_rate": 1.8738707577656717e-05, "loss": 1.5667, "step": 5696 }, { "epoch": 18.678688524590164, "grad_norm": 8.072340965270996, "learning_rate": 1.873819128320925e-05, "loss": 1.6643, "step": 5697 }, { "epoch": 18.681967213114753, "grad_norm": 7.545629978179932, "learning_rate": 1.8737674890229585e-05, "loss": 1.4304, "step": 5698 }, { "epoch": 18.685245901639345, "grad_norm": 8.277815818786621, "learning_rate": 1.8737158398723558e-05, "loss": 1.5081, "step": 5699 }, { "epoch": 18.688524590163933, "grad_norm": 6.551368236541748, "learning_rate": 1.873664180869698e-05, "loss": 1.5122, "step": 5700 }, { "epoch": 18.691803278688525, "grad_norm": 10.493257522583008, "learning_rate": 1.8736125120155683e-05, "loss": 1.2605, "step": 5701 }, { "epoch": 18.695081967213113, "grad_norm": 9.828604698181152, "learning_rate": 1.873560833310549e-05, "loss": 1.4741, "step": 5702 }, { "epoch": 18.698360655737705, "grad_norm": 15.833396911621094, "learning_rate": 1.8735091447552235e-05, "loss": 1.4351, "step": 5703 }, { "epoch": 18.701639344262293, "grad_norm": 9.86988353729248, "learning_rate": 1.873457446350174e-05, "loss": 1.2622, "step": 5704 }, { "epoch": 18.704918032786885, "grad_norm": 6.6152753829956055, "learning_rate": 1.8734057380959834e-05, "loss": 1.4048, "step": 5705 }, { "epoch": 18.708196721311474, "grad_norm": 8.351329803466797, "learning_rate": 1.873354019993235e-05, "loss": 1.3992, "step": 5706 }, { "epoch": 18.711475409836066, "grad_norm": 12.43202018737793, "learning_rate": 1.8733022920425125e-05, "loss": 1.3848, "step": 5707 }, { "epoch": 18.714754098360658, "grad_norm": 8.511462211608887, "learning_rate": 1.8732505542443983e-05, "loss": 1.4417, "step": 5708 }, { "epoch": 18.718032786885246, "grad_norm": 8.137946128845215, "learning_rate": 1.8731988065994756e-05, "loss": 1.356, "step": 5709 }, { "epoch": 18.721311475409838, "grad_norm": 10.869983673095703, "learning_rate": 1.8731470491083292e-05, "loss": 1.3513, "step": 5710 }, { "epoch": 18.724590163934426, "grad_norm": 8.787209510803223, "learning_rate": 1.873095281771542e-05, "loss": 1.519, "step": 5711 }, { "epoch": 18.727868852459018, "grad_norm": 16.1871337890625, "learning_rate": 1.8730435045896973e-05, "loss": 1.4905, "step": 5712 }, { "epoch": 18.731147540983606, "grad_norm": 8.213484764099121, "learning_rate": 1.8729917175633794e-05, "loss": 1.5105, "step": 5713 }, { "epoch": 18.7344262295082, "grad_norm": 7.667931079864502, "learning_rate": 1.8729399206931722e-05, "loss": 1.594, "step": 5714 }, { "epoch": 18.737704918032787, "grad_norm": 11.310383796691895, "learning_rate": 1.87288811397966e-05, "loss": 1.4585, "step": 5715 }, { "epoch": 18.74098360655738, "grad_norm": 8.382898330688477, "learning_rate": 1.8728362974234268e-05, "loss": 1.3165, "step": 5716 }, { "epoch": 18.744262295081967, "grad_norm": 10.541345596313477, "learning_rate": 1.8727844710250564e-05, "loss": 1.2923, "step": 5717 }, { "epoch": 18.74754098360656, "grad_norm": 9.007438659667969, "learning_rate": 1.872732634785134e-05, "loss": 1.4844, "step": 5718 }, { "epoch": 18.750819672131147, "grad_norm": 16.14845848083496, "learning_rate": 1.8726807887042434e-05, "loss": 1.3572, "step": 5719 }, { "epoch": 18.75409836065574, "grad_norm": 9.25307559967041, "learning_rate": 1.87262893278297e-05, "loss": 1.3499, "step": 5720 }, { "epoch": 18.757377049180327, "grad_norm": 7.128683567047119, "learning_rate": 1.8725770670218978e-05, "loss": 1.5791, "step": 5721 }, { "epoch": 18.76065573770492, "grad_norm": 6.946247577667236, "learning_rate": 1.8725251914216115e-05, "loss": 1.4814, "step": 5722 }, { "epoch": 18.763934426229508, "grad_norm": 7.77070426940918, "learning_rate": 1.8724733059826968e-05, "loss": 1.2429, "step": 5723 }, { "epoch": 18.7672131147541, "grad_norm": 8.656712532043457, "learning_rate": 1.8724214107057386e-05, "loss": 1.4626, "step": 5724 }, { "epoch": 18.770491803278688, "grad_norm": 9.269207954406738, "learning_rate": 1.872369505591322e-05, "loss": 1.363, "step": 5725 }, { "epoch": 18.77377049180328, "grad_norm": 9.079788208007812, "learning_rate": 1.872317590640032e-05, "loss": 1.4255, "step": 5726 }, { "epoch": 18.777049180327868, "grad_norm": 9.264305114746094, "learning_rate": 1.8722656658524544e-05, "loss": 1.5208, "step": 5727 }, { "epoch": 18.78032786885246, "grad_norm": 7.842609405517578, "learning_rate": 1.8722137312291743e-05, "loss": 1.4529, "step": 5728 }, { "epoch": 18.78360655737705, "grad_norm": 7.732379913330078, "learning_rate": 1.872161786770777e-05, "loss": 1.3119, "step": 5729 }, { "epoch": 18.78688524590164, "grad_norm": 13.551188468933105, "learning_rate": 1.8721098324778494e-05, "loss": 1.4792, "step": 5730 }, { "epoch": 18.79016393442623, "grad_norm": 9.184735298156738, "learning_rate": 1.8720578683509765e-05, "loss": 1.3794, "step": 5731 }, { "epoch": 18.79344262295082, "grad_norm": 7.754308223724365, "learning_rate": 1.8720058943907444e-05, "loss": 1.4934, "step": 5732 }, { "epoch": 18.79672131147541, "grad_norm": 10.692769050598145, "learning_rate": 1.8719539105977394e-05, "loss": 1.3959, "step": 5733 }, { "epoch": 18.8, "grad_norm": 10.030674934387207, "learning_rate": 1.871901916972547e-05, "loss": 1.4719, "step": 5734 }, { "epoch": 18.80327868852459, "grad_norm": 11.318829536437988, "learning_rate": 1.8718499135157545e-05, "loss": 1.5283, "step": 5735 }, { "epoch": 18.80655737704918, "grad_norm": 11.464491844177246, "learning_rate": 1.8717979002279473e-05, "loss": 1.3806, "step": 5736 }, { "epoch": 18.80983606557377, "grad_norm": 8.979096412658691, "learning_rate": 1.8717458771097125e-05, "loss": 1.4033, "step": 5737 }, { "epoch": 18.81311475409836, "grad_norm": 7.856903076171875, "learning_rate": 1.8716938441616362e-05, "loss": 1.3286, "step": 5738 }, { "epoch": 18.81639344262295, "grad_norm": 9.585583686828613, "learning_rate": 1.871641801384306e-05, "loss": 1.4197, "step": 5739 }, { "epoch": 18.81967213114754, "grad_norm": 10.078999519348145, "learning_rate": 1.871589748778308e-05, "loss": 1.4961, "step": 5740 }, { "epoch": 18.82295081967213, "grad_norm": 9.123442649841309, "learning_rate": 1.871537686344229e-05, "loss": 1.3735, "step": 5741 }, { "epoch": 18.82622950819672, "grad_norm": 10.249605178833008, "learning_rate": 1.8714856140826572e-05, "loss": 1.4092, "step": 5742 }, { "epoch": 18.82950819672131, "grad_norm": 13.388093948364258, "learning_rate": 1.8714335319941783e-05, "loss": 1.5955, "step": 5743 }, { "epoch": 18.832786885245902, "grad_norm": 8.274534225463867, "learning_rate": 1.8713814400793807e-05, "loss": 1.3203, "step": 5744 }, { "epoch": 18.83606557377049, "grad_norm": 9.105462074279785, "learning_rate": 1.871329338338851e-05, "loss": 1.5193, "step": 5745 }, { "epoch": 18.839344262295082, "grad_norm": 11.169293403625488, "learning_rate": 1.871277226773177e-05, "loss": 1.5725, "step": 5746 }, { "epoch": 18.84262295081967, "grad_norm": 9.5751953125, "learning_rate": 1.871225105382947e-05, "loss": 1.5, "step": 5747 }, { "epoch": 18.845901639344262, "grad_norm": 7.588006496429443, "learning_rate": 1.8711729741687475e-05, "loss": 1.5078, "step": 5748 }, { "epoch": 18.84918032786885, "grad_norm": 9.187796592712402, "learning_rate": 1.871120833131167e-05, "loss": 1.5874, "step": 5749 }, { "epoch": 18.852459016393443, "grad_norm": 10.522089004516602, "learning_rate": 1.8710686822707935e-05, "loss": 1.4802, "step": 5750 }, { "epoch": 18.855737704918035, "grad_norm": 7.197164535522461, "learning_rate": 1.871016521588215e-05, "loss": 1.4387, "step": 5751 }, { "epoch": 18.859016393442623, "grad_norm": 9.803361892700195, "learning_rate": 1.8709643510840192e-05, "loss": 1.2063, "step": 5752 }, { "epoch": 18.862295081967215, "grad_norm": 9.316662788391113, "learning_rate": 1.870912170758795e-05, "loss": 1.4172, "step": 5753 }, { "epoch": 18.865573770491803, "grad_norm": 9.007020950317383, "learning_rate": 1.8708599806131308e-05, "loss": 1.208, "step": 5754 }, { "epoch": 18.868852459016395, "grad_norm": 8.035162925720215, "learning_rate": 1.8708077806476144e-05, "loss": 1.5322, "step": 5755 }, { "epoch": 18.872131147540983, "grad_norm": 13.47609806060791, "learning_rate": 1.8707555708628354e-05, "loss": 1.3926, "step": 5756 }, { "epoch": 18.875409836065575, "grad_norm": 12.289198875427246, "learning_rate": 1.8707033512593815e-05, "loss": 1.592, "step": 5757 }, { "epoch": 18.878688524590164, "grad_norm": 9.235445976257324, "learning_rate": 1.8706511218378424e-05, "loss": 1.317, "step": 5758 }, { "epoch": 18.881967213114756, "grad_norm": 8.515436172485352, "learning_rate": 1.8705988825988062e-05, "loss": 1.4929, "step": 5759 }, { "epoch": 18.885245901639344, "grad_norm": 10.365198135375977, "learning_rate": 1.8705466335428624e-05, "loss": 1.3347, "step": 5760 }, { "epoch": 18.888524590163936, "grad_norm": 7.941636562347412, "learning_rate": 1.8704943746706007e-05, "loss": 1.4878, "step": 5761 }, { "epoch": 18.891803278688524, "grad_norm": 8.187708854675293, "learning_rate": 1.8704421059826094e-05, "loss": 1.4934, "step": 5762 }, { "epoch": 18.895081967213116, "grad_norm": 9.298574447631836, "learning_rate": 1.8703898274794785e-05, "loss": 1.2515, "step": 5763 }, { "epoch": 18.898360655737704, "grad_norm": 8.12692642211914, "learning_rate": 1.870337539161797e-05, "loss": 1.3652, "step": 5764 }, { "epoch": 18.901639344262296, "grad_norm": 9.363301277160645, "learning_rate": 1.8702852410301556e-05, "loss": 1.5222, "step": 5765 }, { "epoch": 18.904918032786885, "grad_norm": 9.530213356018066, "learning_rate": 1.8702329330851426e-05, "loss": 1.5979, "step": 5766 }, { "epoch": 18.908196721311477, "grad_norm": 8.105963706970215, "learning_rate": 1.8701806153273486e-05, "loss": 1.4749, "step": 5767 }, { "epoch": 18.911475409836065, "grad_norm": 6.923146724700928, "learning_rate": 1.8701282877573632e-05, "loss": 1.3027, "step": 5768 }, { "epoch": 18.914754098360657, "grad_norm": 18.693418502807617, "learning_rate": 1.8700759503757768e-05, "loss": 1.6733, "step": 5769 }, { "epoch": 18.918032786885245, "grad_norm": 10.920429229736328, "learning_rate": 1.87002360318318e-05, "loss": 1.481, "step": 5770 }, { "epoch": 18.921311475409837, "grad_norm": 6.690215587615967, "learning_rate": 1.8699712461801617e-05, "loss": 1.5894, "step": 5771 }, { "epoch": 18.924590163934425, "grad_norm": 10.09395694732666, "learning_rate": 1.869918879367313e-05, "loss": 1.4612, "step": 5772 }, { "epoch": 18.927868852459017, "grad_norm": 11.641096115112305, "learning_rate": 1.869866502745225e-05, "loss": 1.5874, "step": 5773 }, { "epoch": 18.931147540983606, "grad_norm": 11.853036880493164, "learning_rate": 1.8698141163144873e-05, "loss": 1.5088, "step": 5774 }, { "epoch": 18.934426229508198, "grad_norm": 11.426163673400879, "learning_rate": 1.8697617200756914e-05, "loss": 1.354, "step": 5775 }, { "epoch": 18.937704918032786, "grad_norm": 13.663561820983887, "learning_rate": 1.8697093140294272e-05, "loss": 1.4971, "step": 5776 }, { "epoch": 18.940983606557378, "grad_norm": 8.834726333618164, "learning_rate": 1.8696568981762867e-05, "loss": 1.5918, "step": 5777 }, { "epoch": 18.944262295081966, "grad_norm": 16.201656341552734, "learning_rate": 1.869604472516861e-05, "loss": 1.3694, "step": 5778 }, { "epoch": 18.947540983606558, "grad_norm": 8.659014701843262, "learning_rate": 1.8695520370517397e-05, "loss": 1.3201, "step": 5779 }, { "epoch": 18.950819672131146, "grad_norm": 8.763788223266602, "learning_rate": 1.8694995917815157e-05, "loss": 1.5466, "step": 5780 }, { "epoch": 18.95409836065574, "grad_norm": 8.551032066345215, "learning_rate": 1.8694471367067795e-05, "loss": 1.3274, "step": 5781 }, { "epoch": 18.957377049180327, "grad_norm": 10.11640453338623, "learning_rate": 1.869394671828123e-05, "loss": 1.3147, "step": 5782 }, { "epoch": 18.96065573770492, "grad_norm": 7.556556701660156, "learning_rate": 1.8693421971461373e-05, "loss": 1.5952, "step": 5783 }, { "epoch": 18.963934426229507, "grad_norm": 38.063777923583984, "learning_rate": 1.8692897126614146e-05, "loss": 1.4607, "step": 5784 }, { "epoch": 18.9672131147541, "grad_norm": 10.129253387451172, "learning_rate": 1.8692372183745466e-05, "loss": 1.1896, "step": 5785 }, { "epoch": 18.970491803278687, "grad_norm": 8.768479347229004, "learning_rate": 1.8691847142861253e-05, "loss": 1.3904, "step": 5786 }, { "epoch": 18.97377049180328, "grad_norm": 8.446194648742676, "learning_rate": 1.8691322003967423e-05, "loss": 1.6587, "step": 5787 }, { "epoch": 18.977049180327867, "grad_norm": 7.626687049865723, "learning_rate": 1.8690796767069903e-05, "loss": 1.4421, "step": 5788 }, { "epoch": 18.98032786885246, "grad_norm": 12.035118103027344, "learning_rate": 1.869027143217461e-05, "loss": 1.6416, "step": 5789 }, { "epoch": 18.983606557377048, "grad_norm": 7.955839157104492, "learning_rate": 1.8689745999287477e-05, "loss": 1.4194, "step": 5790 }, { "epoch": 18.98688524590164, "grad_norm": 7.510252475738525, "learning_rate": 1.868922046841442e-05, "loss": 1.3861, "step": 5791 }, { "epoch": 18.990163934426228, "grad_norm": 9.153772354125977, "learning_rate": 1.8688694839561368e-05, "loss": 1.4749, "step": 5792 }, { "epoch": 18.99344262295082, "grad_norm": 11.34606647491455, "learning_rate": 1.8688169112734248e-05, "loss": 1.3462, "step": 5793 }, { "epoch": 18.99672131147541, "grad_norm": 8.628372192382812, "learning_rate": 1.8687643287938982e-05, "loss": 1.5757, "step": 5794 }, { "epoch": 19.0, "grad_norm": 10.938878059387207, "learning_rate": 1.8687117365181514e-05, "loss": 1.3037, "step": 5795 }, { "epoch": 19.003278688524592, "grad_norm": 8.7100830078125, "learning_rate": 1.8686591344467758e-05, "loss": 1.2532, "step": 5796 }, { "epoch": 19.00655737704918, "grad_norm": 8.31673812866211, "learning_rate": 1.8686065225803657e-05, "loss": 1.3606, "step": 5797 }, { "epoch": 19.009836065573772, "grad_norm": 15.425695419311523, "learning_rate": 1.8685539009195138e-05, "loss": 1.2402, "step": 5798 }, { "epoch": 19.01311475409836, "grad_norm": 8.711438179016113, "learning_rate": 1.8685012694648136e-05, "loss": 1.4812, "step": 5799 }, { "epoch": 19.016393442622952, "grad_norm": 10.568920135498047, "learning_rate": 1.8684486282168585e-05, "loss": 1.3853, "step": 5800 }, { "epoch": 19.01967213114754, "grad_norm": 6.828817367553711, "learning_rate": 1.8683959771762425e-05, "loss": 1.5818, "step": 5801 }, { "epoch": 19.022950819672133, "grad_norm": 10.631808280944824, "learning_rate": 1.8683433163435588e-05, "loss": 1.2942, "step": 5802 }, { "epoch": 19.02622950819672, "grad_norm": 9.970907211303711, "learning_rate": 1.8682906457194012e-05, "loss": 1.2097, "step": 5803 }, { "epoch": 19.029508196721313, "grad_norm": 8.886046409606934, "learning_rate": 1.8682379653043637e-05, "loss": 1.2732, "step": 5804 }, { "epoch": 19.0327868852459, "grad_norm": 12.566886901855469, "learning_rate": 1.868185275099041e-05, "loss": 1.5881, "step": 5805 }, { "epoch": 19.036065573770493, "grad_norm": 7.8848161697387695, "learning_rate": 1.868132575104026e-05, "loss": 1.3958, "step": 5806 }, { "epoch": 19.03934426229508, "grad_norm": 7.843966960906982, "learning_rate": 1.8680798653199137e-05, "loss": 1.4448, "step": 5807 }, { "epoch": 19.042622950819673, "grad_norm": 8.883245468139648, "learning_rate": 1.8680271457472986e-05, "loss": 1.2952, "step": 5808 }, { "epoch": 19.04590163934426, "grad_norm": 9.986730575561523, "learning_rate": 1.8679744163867743e-05, "loss": 1.3589, "step": 5809 }, { "epoch": 19.049180327868854, "grad_norm": 11.923543930053711, "learning_rate": 1.8679216772389364e-05, "loss": 1.3204, "step": 5810 }, { "epoch": 19.052459016393442, "grad_norm": 11.11159610748291, "learning_rate": 1.8678689283043792e-05, "loss": 1.2676, "step": 5811 }, { "epoch": 19.055737704918034, "grad_norm": 8.126717567443848, "learning_rate": 1.8678161695836977e-05, "loss": 1.4238, "step": 5812 }, { "epoch": 19.059016393442622, "grad_norm": 8.184572219848633, "learning_rate": 1.8677634010774864e-05, "loss": 1.2183, "step": 5813 }, { "epoch": 19.062295081967214, "grad_norm": 7.858686447143555, "learning_rate": 1.8677106227863404e-05, "loss": 1.2444, "step": 5814 }, { "epoch": 19.065573770491802, "grad_norm": 8.170656204223633, "learning_rate": 1.867657834710855e-05, "loss": 1.3503, "step": 5815 }, { "epoch": 19.068852459016394, "grad_norm": 8.720105171203613, "learning_rate": 1.8676050368516255e-05, "loss": 1.2023, "step": 5816 }, { "epoch": 19.072131147540983, "grad_norm": 10.307565689086914, "learning_rate": 1.8675522292092466e-05, "loss": 1.4353, "step": 5817 }, { "epoch": 19.075409836065575, "grad_norm": 8.84920883178711, "learning_rate": 1.8674994117843147e-05, "loss": 1.4111, "step": 5818 }, { "epoch": 19.078688524590163, "grad_norm": 8.336915016174316, "learning_rate": 1.8674465845774253e-05, "loss": 1.2219, "step": 5819 }, { "epoch": 19.081967213114755, "grad_norm": 7.90780782699585, "learning_rate": 1.867393747589173e-05, "loss": 1.3459, "step": 5820 }, { "epoch": 19.085245901639343, "grad_norm": 12.995648384094238, "learning_rate": 1.867340900820155e-05, "loss": 1.3877, "step": 5821 }, { "epoch": 19.088524590163935, "grad_norm": 7.711339473724365, "learning_rate": 1.867288044270966e-05, "loss": 1.3472, "step": 5822 }, { "epoch": 19.091803278688523, "grad_norm": 6.939466953277588, "learning_rate": 1.867235177942203e-05, "loss": 1.551, "step": 5823 }, { "epoch": 19.095081967213115, "grad_norm": 6.583888053894043, "learning_rate": 1.8671823018344615e-05, "loss": 1.3853, "step": 5824 }, { "epoch": 19.098360655737704, "grad_norm": 10.258466720581055, "learning_rate": 1.867129415948338e-05, "loss": 1.3599, "step": 5825 }, { "epoch": 19.101639344262296, "grad_norm": 9.393264770507812, "learning_rate": 1.8670765202844284e-05, "loss": 1.4575, "step": 5826 }, { "epoch": 19.104918032786884, "grad_norm": 20.113601684570312, "learning_rate": 1.86702361484333e-05, "loss": 1.4551, "step": 5827 }, { "epoch": 19.108196721311476, "grad_norm": 7.8220744132995605, "learning_rate": 1.8669706996256383e-05, "loss": 1.498, "step": 5828 }, { "epoch": 19.111475409836064, "grad_norm": 10.83771800994873, "learning_rate": 1.8669177746319508e-05, "loss": 1.4224, "step": 5829 }, { "epoch": 19.114754098360656, "grad_norm": 10.195075035095215, "learning_rate": 1.866864839862864e-05, "loss": 1.2471, "step": 5830 }, { "epoch": 19.118032786885244, "grad_norm": 7.946230411529541, "learning_rate": 1.8668118953189748e-05, "loss": 1.3933, "step": 5831 }, { "epoch": 19.121311475409836, "grad_norm": 7.400454044342041, "learning_rate": 1.8667589410008802e-05, "loss": 1.3433, "step": 5832 }, { "epoch": 19.124590163934425, "grad_norm": 8.016301155090332, "learning_rate": 1.8667059769091778e-05, "loss": 1.1533, "step": 5833 }, { "epoch": 19.127868852459017, "grad_norm": 8.17029094696045, "learning_rate": 1.8666530030444638e-05, "loss": 1.3616, "step": 5834 }, { "epoch": 19.131147540983605, "grad_norm": 7.290432929992676, "learning_rate": 1.8666000194073365e-05, "loss": 1.1711, "step": 5835 }, { "epoch": 19.134426229508197, "grad_norm": 6.432207107543945, "learning_rate": 1.8665470259983926e-05, "loss": 1.3097, "step": 5836 }, { "epoch": 19.137704918032785, "grad_norm": 9.560962677001953, "learning_rate": 1.86649402281823e-05, "loss": 1.3535, "step": 5837 }, { "epoch": 19.140983606557377, "grad_norm": 11.11164665222168, "learning_rate": 1.8664410098674467e-05, "loss": 1.3945, "step": 5838 }, { "epoch": 19.14426229508197, "grad_norm": 8.965797424316406, "learning_rate": 1.8663879871466397e-05, "loss": 1.3203, "step": 5839 }, { "epoch": 19.147540983606557, "grad_norm": 7.379488468170166, "learning_rate": 1.8663349546564074e-05, "loss": 1.3455, "step": 5840 }, { "epoch": 19.15081967213115, "grad_norm": 7.944657802581787, "learning_rate": 1.866281912397348e-05, "loss": 1.4141, "step": 5841 }, { "epoch": 19.154098360655738, "grad_norm": 7.0535054206848145, "learning_rate": 1.8662288603700595e-05, "loss": 1.3245, "step": 5842 }, { "epoch": 19.15737704918033, "grad_norm": 9.450200080871582, "learning_rate": 1.8661757985751398e-05, "loss": 1.0657, "step": 5843 }, { "epoch": 19.160655737704918, "grad_norm": 6.473608493804932, "learning_rate": 1.866122727013187e-05, "loss": 1.4521, "step": 5844 }, { "epoch": 19.16393442622951, "grad_norm": 7.274924278259277, "learning_rate": 1.8660696456848e-05, "loss": 1.5723, "step": 5845 }, { "epoch": 19.167213114754098, "grad_norm": 8.388411521911621, "learning_rate": 1.8660165545905774e-05, "loss": 1.4413, "step": 5846 }, { "epoch": 19.17049180327869, "grad_norm": 8.397058486938477, "learning_rate": 1.865963453731118e-05, "loss": 1.2015, "step": 5847 }, { "epoch": 19.17377049180328, "grad_norm": 7.105687618255615, "learning_rate": 1.86591034310702e-05, "loss": 1.3525, "step": 5848 }, { "epoch": 19.17704918032787, "grad_norm": 9.93787956237793, "learning_rate": 1.8658572227188824e-05, "loss": 1.2769, "step": 5849 }, { "epoch": 19.18032786885246, "grad_norm": 19.253976821899414, "learning_rate": 1.8658040925673044e-05, "loss": 1.4277, "step": 5850 }, { "epoch": 19.18360655737705, "grad_norm": 7.439272880554199, "learning_rate": 1.865750952652885e-05, "loss": 1.343, "step": 5851 }, { "epoch": 19.18688524590164, "grad_norm": 6.789249420166016, "learning_rate": 1.8656978029762238e-05, "loss": 1.4854, "step": 5852 }, { "epoch": 19.19016393442623, "grad_norm": 8.273358345031738, "learning_rate": 1.8656446435379196e-05, "loss": 1.3937, "step": 5853 }, { "epoch": 19.19344262295082, "grad_norm": 13.90998649597168, "learning_rate": 1.865591474338572e-05, "loss": 1.4524, "step": 5854 }, { "epoch": 19.19672131147541, "grad_norm": 9.149178504943848, "learning_rate": 1.8655382953787805e-05, "loss": 1.4097, "step": 5855 }, { "epoch": 19.2, "grad_norm": 15.19316291809082, "learning_rate": 1.865485106659145e-05, "loss": 1.229, "step": 5856 }, { "epoch": 19.20327868852459, "grad_norm": 7.720294952392578, "learning_rate": 1.8654319081802645e-05, "loss": 1.325, "step": 5857 }, { "epoch": 19.20655737704918, "grad_norm": 9.425862312316895, "learning_rate": 1.8653786999427398e-05, "loss": 1.4397, "step": 5858 }, { "epoch": 19.20983606557377, "grad_norm": 10.813791275024414, "learning_rate": 1.8653254819471705e-05, "loss": 1.5505, "step": 5859 }, { "epoch": 19.21311475409836, "grad_norm": 7.466488838195801, "learning_rate": 1.8652722541941565e-05, "loss": 1.3125, "step": 5860 }, { "epoch": 19.21639344262295, "grad_norm": 7.94448709487915, "learning_rate": 1.8652190166842983e-05, "loss": 1.3853, "step": 5861 }, { "epoch": 19.21967213114754, "grad_norm": 10.72591781616211, "learning_rate": 1.865165769418196e-05, "loss": 1.3867, "step": 5862 }, { "epoch": 19.222950819672132, "grad_norm": 9.456504821777344, "learning_rate": 1.8651125123964502e-05, "loss": 1.3206, "step": 5863 }, { "epoch": 19.22622950819672, "grad_norm": 9.194424629211426, "learning_rate": 1.8650592456196613e-05, "loss": 1.4092, "step": 5864 }, { "epoch": 19.229508196721312, "grad_norm": 12.374114990234375, "learning_rate": 1.86500596908843e-05, "loss": 1.2944, "step": 5865 }, { "epoch": 19.2327868852459, "grad_norm": 8.647334098815918, "learning_rate": 1.864952682803357e-05, "loss": 1.2017, "step": 5866 }, { "epoch": 19.236065573770492, "grad_norm": 9.20249080657959, "learning_rate": 1.8648993867650432e-05, "loss": 1.4202, "step": 5867 }, { "epoch": 19.23934426229508, "grad_norm": 9.001497268676758, "learning_rate": 1.8648460809740895e-05, "loss": 1.2595, "step": 5868 }, { "epoch": 19.242622950819673, "grad_norm": 10.139727592468262, "learning_rate": 1.864792765431097e-05, "loss": 1.165, "step": 5869 }, { "epoch": 19.24590163934426, "grad_norm": 11.224538803100586, "learning_rate": 1.864739440136667e-05, "loss": 1.239, "step": 5870 }, { "epoch": 19.249180327868853, "grad_norm": 9.280838966369629, "learning_rate": 1.8646861050914008e-05, "loss": 1.437, "step": 5871 }, { "epoch": 19.25245901639344, "grad_norm": 8.65218734741211, "learning_rate": 1.8646327602958996e-05, "loss": 1.4509, "step": 5872 }, { "epoch": 19.255737704918033, "grad_norm": 9.479018211364746, "learning_rate": 1.864579405750765e-05, "loss": 1.1527, "step": 5873 }, { "epoch": 19.25901639344262, "grad_norm": 7.709451675415039, "learning_rate": 1.8645260414565988e-05, "loss": 1.3218, "step": 5874 }, { "epoch": 19.262295081967213, "grad_norm": 9.524618148803711, "learning_rate": 1.8644726674140023e-05, "loss": 1.3604, "step": 5875 }, { "epoch": 19.2655737704918, "grad_norm": 9.653271675109863, "learning_rate": 1.864419283623578e-05, "loss": 1.3665, "step": 5876 }, { "epoch": 19.268852459016394, "grad_norm": 8.216551780700684, "learning_rate": 1.8643658900859273e-05, "loss": 1.3811, "step": 5877 }, { "epoch": 19.272131147540982, "grad_norm": 9.638235092163086, "learning_rate": 1.8643124868016525e-05, "loss": 1.2825, "step": 5878 }, { "epoch": 19.275409836065574, "grad_norm": 9.818253517150879, "learning_rate": 1.8642590737713556e-05, "loss": 1.2628, "step": 5879 }, { "epoch": 19.278688524590162, "grad_norm": 8.113272666931152, "learning_rate": 1.864205650995639e-05, "loss": 1.4102, "step": 5880 }, { "epoch": 19.281967213114754, "grad_norm": 7.722725868225098, "learning_rate": 1.8641522184751057e-05, "loss": 1.2698, "step": 5881 }, { "epoch": 19.285245901639342, "grad_norm": 10.215914726257324, "learning_rate": 1.8640987762103574e-05, "loss": 1.4092, "step": 5882 }, { "epoch": 19.288524590163934, "grad_norm": 11.359482765197754, "learning_rate": 1.8640453242019963e-05, "loss": 1.3108, "step": 5883 }, { "epoch": 19.291803278688526, "grad_norm": 10.9368257522583, "learning_rate": 1.8639918624506265e-05, "loss": 1.3684, "step": 5884 }, { "epoch": 19.295081967213115, "grad_norm": 6.738132476806641, "learning_rate": 1.86393839095685e-05, "loss": 1.2112, "step": 5885 }, { "epoch": 19.298360655737707, "grad_norm": 9.367899894714355, "learning_rate": 1.86388490972127e-05, "loss": 1.5464, "step": 5886 }, { "epoch": 19.301639344262295, "grad_norm": 7.517656326293945, "learning_rate": 1.8638314187444894e-05, "loss": 1.5437, "step": 5887 }, { "epoch": 19.304918032786887, "grad_norm": 9.703985214233398, "learning_rate": 1.863777918027111e-05, "loss": 1.4265, "step": 5888 }, { "epoch": 19.308196721311475, "grad_norm": 7.148262023925781, "learning_rate": 1.863724407569739e-05, "loss": 1.6473, "step": 5889 }, { "epoch": 19.311475409836067, "grad_norm": 9.608821868896484, "learning_rate": 1.8636708873729758e-05, "loss": 1.1191, "step": 5890 }, { "epoch": 19.314754098360655, "grad_norm": 17.34554100036621, "learning_rate": 1.8636173574374253e-05, "loss": 1.2615, "step": 5891 }, { "epoch": 19.318032786885247, "grad_norm": 8.611981391906738, "learning_rate": 1.8635638177636916e-05, "loss": 1.4231, "step": 5892 }, { "epoch": 19.321311475409836, "grad_norm": 7.000396251678467, "learning_rate": 1.8635102683523777e-05, "loss": 1.2975, "step": 5893 }, { "epoch": 19.324590163934428, "grad_norm": 9.529603004455566, "learning_rate": 1.863456709204088e-05, "loss": 1.5107, "step": 5894 }, { "epoch": 19.327868852459016, "grad_norm": 9.8159818649292, "learning_rate": 1.8634031403194255e-05, "loss": 1.2023, "step": 5895 }, { "epoch": 19.331147540983608, "grad_norm": 8.90146541595459, "learning_rate": 1.8633495616989953e-05, "loss": 1.225, "step": 5896 }, { "epoch": 19.334426229508196, "grad_norm": 11.613015174865723, "learning_rate": 1.8632959733434012e-05, "loss": 1.2644, "step": 5897 }, { "epoch": 19.337704918032788, "grad_norm": 8.601458549499512, "learning_rate": 1.8632423752532474e-05, "loss": 1.2417, "step": 5898 }, { "epoch": 19.340983606557376, "grad_norm": 12.961302757263184, "learning_rate": 1.863188767429138e-05, "loss": 1.2789, "step": 5899 }, { "epoch": 19.34426229508197, "grad_norm": 15.882302284240723, "learning_rate": 1.8631351498716782e-05, "loss": 1.3074, "step": 5900 }, { "epoch": 19.347540983606557, "grad_norm": 20.25116729736328, "learning_rate": 1.8630815225814717e-05, "loss": 1.3406, "step": 5901 }, { "epoch": 19.35081967213115, "grad_norm": 7.109716892242432, "learning_rate": 1.8630278855591236e-05, "loss": 1.4753, "step": 5902 }, { "epoch": 19.354098360655737, "grad_norm": 18.227642059326172, "learning_rate": 1.862974238805239e-05, "loss": 1.4949, "step": 5903 }, { "epoch": 19.35737704918033, "grad_norm": 10.11056900024414, "learning_rate": 1.8629205823204225e-05, "loss": 1.429, "step": 5904 }, { "epoch": 19.360655737704917, "grad_norm": 6.798585414886475, "learning_rate": 1.8628669161052793e-05, "loss": 1.3329, "step": 5905 }, { "epoch": 19.36393442622951, "grad_norm": 9.243972778320312, "learning_rate": 1.8628132401604148e-05, "loss": 1.1489, "step": 5906 }, { "epoch": 19.367213114754097, "grad_norm": 9.551400184631348, "learning_rate": 1.8627595544864335e-05, "loss": 1.4099, "step": 5907 }, { "epoch": 19.37049180327869, "grad_norm": 7.214020252227783, "learning_rate": 1.8627058590839415e-05, "loss": 1.3936, "step": 5908 }, { "epoch": 19.373770491803278, "grad_norm": 8.885588645935059, "learning_rate": 1.8626521539535436e-05, "loss": 1.4324, "step": 5909 }, { "epoch": 19.37704918032787, "grad_norm": 12.600717544555664, "learning_rate": 1.862598439095846e-05, "loss": 1.5747, "step": 5910 }, { "epoch": 19.380327868852458, "grad_norm": 6.105042934417725, "learning_rate": 1.8625447145114536e-05, "loss": 1.5928, "step": 5911 }, { "epoch": 19.38360655737705, "grad_norm": 9.617730140686035, "learning_rate": 1.862490980200973e-05, "loss": 1.3345, "step": 5912 }, { "epoch": 19.386885245901638, "grad_norm": 7.6866044998168945, "learning_rate": 1.8624372361650103e-05, "loss": 1.4548, "step": 5913 }, { "epoch": 19.39016393442623, "grad_norm": 9.689250946044922, "learning_rate": 1.8623834824041704e-05, "loss": 1.4404, "step": 5914 }, { "epoch": 19.39344262295082, "grad_norm": 7.727320671081543, "learning_rate": 1.8623297189190603e-05, "loss": 1.4788, "step": 5915 }, { "epoch": 19.39672131147541, "grad_norm": 7.3553853034973145, "learning_rate": 1.862275945710286e-05, "loss": 1.4094, "step": 5916 }, { "epoch": 19.4, "grad_norm": 7.151291847229004, "learning_rate": 1.862222162778454e-05, "loss": 1.3608, "step": 5917 }, { "epoch": 19.40327868852459, "grad_norm": 9.504226684570312, "learning_rate": 1.8621683701241706e-05, "loss": 1.3154, "step": 5918 }, { "epoch": 19.40655737704918, "grad_norm": 10.846951484680176, "learning_rate": 1.8621145677480424e-05, "loss": 1.4036, "step": 5919 }, { "epoch": 19.40983606557377, "grad_norm": 8.60598373413086, "learning_rate": 1.862060755650676e-05, "loss": 1.4595, "step": 5920 }, { "epoch": 19.41311475409836, "grad_norm": 8.444146156311035, "learning_rate": 1.8620069338326786e-05, "loss": 1.4871, "step": 5921 }, { "epoch": 19.41639344262295, "grad_norm": 10.259262084960938, "learning_rate": 1.861953102294656e-05, "loss": 1.1891, "step": 5922 }, { "epoch": 19.41967213114754, "grad_norm": 10.986629486083984, "learning_rate": 1.8618992610372166e-05, "loss": 1.3579, "step": 5923 }, { "epoch": 19.42295081967213, "grad_norm": 9.2542085647583, "learning_rate": 1.8618454100609668e-05, "loss": 1.345, "step": 5924 }, { "epoch": 19.42622950819672, "grad_norm": 10.318565368652344, "learning_rate": 1.8617915493665138e-05, "loss": 1.4478, "step": 5925 }, { "epoch": 19.42950819672131, "grad_norm": 9.893271446228027, "learning_rate": 1.8617376789544652e-05, "loss": 1.2074, "step": 5926 }, { "epoch": 19.432786885245903, "grad_norm": 15.17852783203125, "learning_rate": 1.861683798825428e-05, "loss": 1.4067, "step": 5927 }, { "epoch": 19.43606557377049, "grad_norm": 8.880885124206543, "learning_rate": 1.8616299089800103e-05, "loss": 1.7773, "step": 5928 }, { "epoch": 19.439344262295084, "grad_norm": 12.543628692626953, "learning_rate": 1.8615760094188196e-05, "loss": 1.4126, "step": 5929 }, { "epoch": 19.442622950819672, "grad_norm": 7.794452667236328, "learning_rate": 1.8615221001424633e-05, "loss": 1.1121, "step": 5930 }, { "epoch": 19.445901639344264, "grad_norm": 8.19049072265625, "learning_rate": 1.8614681811515495e-05, "loss": 1.3235, "step": 5931 }, { "epoch": 19.449180327868852, "grad_norm": 8.434455871582031, "learning_rate": 1.8614142524466863e-05, "loss": 1.4734, "step": 5932 }, { "epoch": 19.452459016393444, "grad_norm": 7.8588385581970215, "learning_rate": 1.861360314028482e-05, "loss": 1.4033, "step": 5933 }, { "epoch": 19.455737704918032, "grad_norm": 7.719192028045654, "learning_rate": 1.8613063658975443e-05, "loss": 1.4893, "step": 5934 }, { "epoch": 19.459016393442624, "grad_norm": 7.063138484954834, "learning_rate": 1.861252408054482e-05, "loss": 1.3093, "step": 5935 }, { "epoch": 19.462295081967213, "grad_norm": 9.93870735168457, "learning_rate": 1.8611984404999033e-05, "loss": 1.3389, "step": 5936 }, { "epoch": 19.465573770491805, "grad_norm": 8.461309432983398, "learning_rate": 1.8611444632344166e-05, "loss": 1.2698, "step": 5937 }, { "epoch": 19.468852459016393, "grad_norm": 11.792213439941406, "learning_rate": 1.861090476258631e-05, "loss": 1.2061, "step": 5938 }, { "epoch": 19.472131147540985, "grad_norm": 9.521915435791016, "learning_rate": 1.8610364795731545e-05, "loss": 1.2712, "step": 5939 }, { "epoch": 19.475409836065573, "grad_norm": 13.254783630371094, "learning_rate": 1.8609824731785968e-05, "loss": 1.394, "step": 5940 }, { "epoch": 19.478688524590165, "grad_norm": 9.200394630432129, "learning_rate": 1.8609284570755663e-05, "loss": 1.3572, "step": 5941 }, { "epoch": 19.481967213114753, "grad_norm": 8.016918182373047, "learning_rate": 1.8608744312646726e-05, "loss": 1.4509, "step": 5942 }, { "epoch": 19.485245901639345, "grad_norm": 9.719833374023438, "learning_rate": 1.8608203957465245e-05, "loss": 1.3286, "step": 5943 }, { "epoch": 19.488524590163934, "grad_norm": 13.145890235900879, "learning_rate": 1.860766350521731e-05, "loss": 1.4119, "step": 5944 }, { "epoch": 19.491803278688526, "grad_norm": 7.64846658706665, "learning_rate": 1.8607122955909024e-05, "loss": 1.3997, "step": 5945 }, { "epoch": 19.495081967213114, "grad_norm": 15.541510581970215, "learning_rate": 1.860658230954648e-05, "loss": 1.3274, "step": 5946 }, { "epoch": 19.498360655737706, "grad_norm": 7.6639933586120605, "learning_rate": 1.8606041566135765e-05, "loss": 1.3176, "step": 5947 }, { "epoch": 19.501639344262294, "grad_norm": 10.452496528625488, "learning_rate": 1.860550072568299e-05, "loss": 1.4181, "step": 5948 }, { "epoch": 19.504918032786886, "grad_norm": 9.205144882202148, "learning_rate": 1.860495978819424e-05, "loss": 1.4587, "step": 5949 }, { "epoch": 19.508196721311474, "grad_norm": 8.344523429870605, "learning_rate": 1.8604418753675622e-05, "loss": 1.2455, "step": 5950 }, { "epoch": 19.511475409836066, "grad_norm": 10.903095245361328, "learning_rate": 1.8603877622133242e-05, "loss": 1.427, "step": 5951 }, { "epoch": 19.514754098360655, "grad_norm": 7.979176044464111, "learning_rate": 1.8603336393573195e-05, "loss": 1.4695, "step": 5952 }, { "epoch": 19.518032786885247, "grad_norm": 7.692610263824463, "learning_rate": 1.860279506800158e-05, "loss": 1.4622, "step": 5953 }, { "epoch": 19.521311475409835, "grad_norm": 11.425785064697266, "learning_rate": 1.8602253645424508e-05, "loss": 1.4146, "step": 5954 }, { "epoch": 19.524590163934427, "grad_norm": 10.131711959838867, "learning_rate": 1.8601712125848084e-05, "loss": 1.1687, "step": 5955 }, { "epoch": 19.527868852459015, "grad_norm": 7.777642250061035, "learning_rate": 1.8601170509278412e-05, "loss": 1.4221, "step": 5956 }, { "epoch": 19.531147540983607, "grad_norm": 11.254684448242188, "learning_rate": 1.8600628795721598e-05, "loss": 1.2993, "step": 5957 }, { "epoch": 19.534426229508195, "grad_norm": 8.184942245483398, "learning_rate": 1.8600086985183753e-05, "loss": 1.51, "step": 5958 }, { "epoch": 19.537704918032787, "grad_norm": 8.986394882202148, "learning_rate": 1.8599545077670983e-05, "loss": 1.2998, "step": 5959 }, { "epoch": 19.540983606557376, "grad_norm": 8.857073783874512, "learning_rate": 1.8599003073189404e-05, "loss": 1.3323, "step": 5960 }, { "epoch": 19.544262295081968, "grad_norm": 8.155108451843262, "learning_rate": 1.8598460971745124e-05, "loss": 1.5759, "step": 5961 }, { "epoch": 19.547540983606556, "grad_norm": 9.258818626403809, "learning_rate": 1.859791877334426e-05, "loss": 1.2397, "step": 5962 }, { "epoch": 19.550819672131148, "grad_norm": 7.700468063354492, "learning_rate": 1.8597376477992913e-05, "loss": 1.2725, "step": 5963 }, { "epoch": 19.554098360655736, "grad_norm": 9.831090927124023, "learning_rate": 1.8596834085697214e-05, "loss": 1.205, "step": 5964 }, { "epoch": 19.557377049180328, "grad_norm": 8.70452880859375, "learning_rate": 1.859629159646327e-05, "loss": 1.48, "step": 5965 }, { "epoch": 19.560655737704916, "grad_norm": 10.832600593566895, "learning_rate": 1.8595749010297203e-05, "loss": 1.3696, "step": 5966 }, { "epoch": 19.56393442622951, "grad_norm": 7.027812957763672, "learning_rate": 1.8595206327205125e-05, "loss": 1.5103, "step": 5967 }, { "epoch": 19.567213114754097, "grad_norm": 8.421210289001465, "learning_rate": 1.8594663547193163e-05, "loss": 1.4263, "step": 5968 }, { "epoch": 19.57049180327869, "grad_norm": 9.877931594848633, "learning_rate": 1.8594120670267427e-05, "loss": 1.2269, "step": 5969 }, { "epoch": 19.57377049180328, "grad_norm": 11.290816307067871, "learning_rate": 1.8593577696434048e-05, "loss": 1.269, "step": 5970 }, { "epoch": 19.57704918032787, "grad_norm": 8.929183006286621, "learning_rate": 1.8593034625699148e-05, "loss": 1.2826, "step": 5971 }, { "epoch": 19.58032786885246, "grad_norm": 32.301513671875, "learning_rate": 1.8592491458068846e-05, "loss": 1.3896, "step": 5972 }, { "epoch": 19.58360655737705, "grad_norm": 10.053234100341797, "learning_rate": 1.8591948193549267e-05, "loss": 1.3265, "step": 5973 }, { "epoch": 19.58688524590164, "grad_norm": 8.098123550415039, "learning_rate": 1.8591404832146544e-05, "loss": 1.5015, "step": 5974 }, { "epoch": 19.59016393442623, "grad_norm": 9.450011253356934, "learning_rate": 1.8590861373866792e-05, "loss": 1.5398, "step": 5975 }, { "epoch": 19.59344262295082, "grad_norm": 8.769155502319336, "learning_rate": 1.8590317818716152e-05, "loss": 1.4194, "step": 5976 }, { "epoch": 19.59672131147541, "grad_norm": 9.074950218200684, "learning_rate": 1.858977416670074e-05, "loss": 1.3156, "step": 5977 }, { "epoch": 19.6, "grad_norm": 10.177846908569336, "learning_rate": 1.85892304178267e-05, "loss": 1.3357, "step": 5978 }, { "epoch": 19.60327868852459, "grad_norm": 8.964207649230957, "learning_rate": 1.8588686572100153e-05, "loss": 1.4973, "step": 5979 }, { "epoch": 19.60655737704918, "grad_norm": 7.5822577476501465, "learning_rate": 1.8588142629527233e-05, "loss": 1.4153, "step": 5980 }, { "epoch": 19.60983606557377, "grad_norm": 19.918373107910156, "learning_rate": 1.858759859011408e-05, "loss": 1.4902, "step": 5981 }, { "epoch": 19.613114754098362, "grad_norm": 9.227676391601562, "learning_rate": 1.858705445386682e-05, "loss": 1.4856, "step": 5982 }, { "epoch": 19.61639344262295, "grad_norm": 9.042496681213379, "learning_rate": 1.8586510220791596e-05, "loss": 1.4331, "step": 5983 }, { "epoch": 19.619672131147542, "grad_norm": 7.986794471740723, "learning_rate": 1.858596589089454e-05, "loss": 1.3048, "step": 5984 }, { "epoch": 19.62295081967213, "grad_norm": 11.388798713684082, "learning_rate": 1.858542146418179e-05, "loss": 1.4329, "step": 5985 }, { "epoch": 19.626229508196722, "grad_norm": 8.960982322692871, "learning_rate": 1.858487694065949e-05, "loss": 1.4751, "step": 5986 }, { "epoch": 19.62950819672131, "grad_norm": 10.764372825622559, "learning_rate": 1.8584332320333775e-05, "loss": 1.2712, "step": 5987 }, { "epoch": 19.632786885245903, "grad_norm": 9.432048797607422, "learning_rate": 1.8583787603210787e-05, "loss": 1.3713, "step": 5988 }, { "epoch": 19.63606557377049, "grad_norm": 37.39448165893555, "learning_rate": 1.8583242789296668e-05, "loss": 1.0365, "step": 5989 }, { "epoch": 19.639344262295083, "grad_norm": 6.696474552154541, "learning_rate": 1.858269787859756e-05, "loss": 1.3638, "step": 5990 }, { "epoch": 19.64262295081967, "grad_norm": 10.009222030639648, "learning_rate": 1.8582152871119615e-05, "loss": 1.3438, "step": 5991 }, { "epoch": 19.645901639344263, "grad_norm": 8.76695728302002, "learning_rate": 1.858160776686897e-05, "loss": 1.3784, "step": 5992 }, { "epoch": 19.64918032786885, "grad_norm": 8.415204048156738, "learning_rate": 1.858106256585178e-05, "loss": 1.3933, "step": 5993 }, { "epoch": 19.652459016393443, "grad_norm": 10.788945198059082, "learning_rate": 1.858051726807418e-05, "loss": 1.3274, "step": 5994 }, { "epoch": 19.65573770491803, "grad_norm": 11.128988265991211, "learning_rate": 1.857997187354233e-05, "loss": 1.2424, "step": 5995 }, { "epoch": 19.659016393442624, "grad_norm": 8.915677070617676, "learning_rate": 1.857942638226238e-05, "loss": 1.5547, "step": 5996 }, { "epoch": 19.662295081967212, "grad_norm": 8.033583641052246, "learning_rate": 1.857888079424047e-05, "loss": 1.4656, "step": 5997 }, { "epoch": 19.665573770491804, "grad_norm": 7.077258110046387, "learning_rate": 1.8578335109482763e-05, "loss": 1.3551, "step": 5998 }, { "epoch": 19.668852459016392, "grad_norm": 8.70209789276123, "learning_rate": 1.8577789327995406e-05, "loss": 1.6223, "step": 5999 }, { "epoch": 19.672131147540984, "grad_norm": 9.40170669555664, "learning_rate": 1.8577243449784558e-05, "loss": 1.4592, "step": 6000 }, { "epoch": 19.675409836065572, "grad_norm": 7.214683532714844, "learning_rate": 1.857669747485637e-05, "loss": 1.4691, "step": 6001 }, { "epoch": 19.678688524590164, "grad_norm": 7.063951015472412, "learning_rate": 1.8576151403217003e-05, "loss": 1.4504, "step": 6002 }, { "epoch": 19.681967213114753, "grad_norm": 8.302841186523438, "learning_rate": 1.857560523487261e-05, "loss": 1.4155, "step": 6003 }, { "epoch": 19.685245901639345, "grad_norm": 8.261292457580566, "learning_rate": 1.8575058969829353e-05, "loss": 1.407, "step": 6004 }, { "epoch": 19.688524590163933, "grad_norm": 8.870393753051758, "learning_rate": 1.857451260809339e-05, "loss": 1.4712, "step": 6005 }, { "epoch": 19.691803278688525, "grad_norm": 7.5365729331970215, "learning_rate": 1.857396614967088e-05, "loss": 1.2161, "step": 6006 }, { "epoch": 19.695081967213113, "grad_norm": 7.573945999145508, "learning_rate": 1.857341959456799e-05, "loss": 1.3167, "step": 6007 }, { "epoch": 19.698360655737705, "grad_norm": 7.806207656860352, "learning_rate": 1.857287294279088e-05, "loss": 1.5286, "step": 6008 }, { "epoch": 19.701639344262293, "grad_norm": 11.185795783996582, "learning_rate": 1.857232619434571e-05, "loss": 1.3879, "step": 6009 }, { "epoch": 19.704918032786885, "grad_norm": 10.686799049377441, "learning_rate": 1.8571779349238653e-05, "loss": 1.3141, "step": 6010 }, { "epoch": 19.708196721311474, "grad_norm": 7.907397270202637, "learning_rate": 1.857123240747587e-05, "loss": 1.2394, "step": 6011 }, { "epoch": 19.711475409836066, "grad_norm": 9.818852424621582, "learning_rate": 1.8570685369063528e-05, "loss": 1.2729, "step": 6012 }, { "epoch": 19.714754098360658, "grad_norm": 9.159998893737793, "learning_rate": 1.85701382340078e-05, "loss": 1.3667, "step": 6013 }, { "epoch": 19.718032786885246, "grad_norm": 7.35371732711792, "learning_rate": 1.856959100231485e-05, "loss": 1.3303, "step": 6014 }, { "epoch": 19.721311475409838, "grad_norm": 17.116439819335938, "learning_rate": 1.8569043673990854e-05, "loss": 1.3429, "step": 6015 }, { "epoch": 19.724590163934426, "grad_norm": 7.728044509887695, "learning_rate": 1.8568496249041977e-05, "loss": 1.3228, "step": 6016 }, { "epoch": 19.727868852459018, "grad_norm": 10.165249824523926, "learning_rate": 1.85679487274744e-05, "loss": 1.3362, "step": 6017 }, { "epoch": 19.731147540983606, "grad_norm": 7.288064002990723, "learning_rate": 1.856740110929429e-05, "loss": 1.3413, "step": 6018 }, { "epoch": 19.7344262295082, "grad_norm": 6.522739410400391, "learning_rate": 1.856685339450783e-05, "loss": 1.6267, "step": 6019 }, { "epoch": 19.737704918032787, "grad_norm": 7.432081699371338, "learning_rate": 1.8566305583121187e-05, "loss": 1.5317, "step": 6020 }, { "epoch": 19.74098360655738, "grad_norm": 6.663694858551025, "learning_rate": 1.856575767514054e-05, "loss": 1.323, "step": 6021 }, { "epoch": 19.744262295081967, "grad_norm": 8.120278358459473, "learning_rate": 1.8565209670572072e-05, "loss": 1.2869, "step": 6022 }, { "epoch": 19.74754098360656, "grad_norm": 10.415215492248535, "learning_rate": 1.8564661569421956e-05, "loss": 1.4434, "step": 6023 }, { "epoch": 19.750819672131147, "grad_norm": 7.319741249084473, "learning_rate": 1.856411337169638e-05, "loss": 1.384, "step": 6024 }, { "epoch": 19.75409836065574, "grad_norm": 12.621076583862305, "learning_rate": 1.856356507740152e-05, "loss": 1.3987, "step": 6025 }, { "epoch": 19.757377049180327, "grad_norm": 9.135287284851074, "learning_rate": 1.8563016686543557e-05, "loss": 1.1777, "step": 6026 }, { "epoch": 19.76065573770492, "grad_norm": 7.942331790924072, "learning_rate": 1.856246819912868e-05, "loss": 1.4421, "step": 6027 }, { "epoch": 19.763934426229508, "grad_norm": 12.725056648254395, "learning_rate": 1.856191961516307e-05, "loss": 1.4456, "step": 6028 }, { "epoch": 19.7672131147541, "grad_norm": 6.997255802154541, "learning_rate": 1.8561370934652915e-05, "loss": 1.3716, "step": 6029 }, { "epoch": 19.770491803278688, "grad_norm": 10.192909240722656, "learning_rate": 1.8560822157604402e-05, "loss": 1.5593, "step": 6030 }, { "epoch": 19.77377049180328, "grad_norm": 6.630135536193848, "learning_rate": 1.856027328402372e-05, "loss": 1.3494, "step": 6031 }, { "epoch": 19.777049180327868, "grad_norm": 7.438174724578857, "learning_rate": 1.855972431391705e-05, "loss": 1.1189, "step": 6032 }, { "epoch": 19.78032786885246, "grad_norm": 7.49081563949585, "learning_rate": 1.8559175247290593e-05, "loss": 1.3877, "step": 6033 }, { "epoch": 19.78360655737705, "grad_norm": 7.020524978637695, "learning_rate": 1.8558626084150538e-05, "loss": 1.3755, "step": 6034 }, { "epoch": 19.78688524590164, "grad_norm": 7.610602855682373, "learning_rate": 1.8558076824503072e-05, "loss": 1.3335, "step": 6035 }, { "epoch": 19.79016393442623, "grad_norm": 6.565340518951416, "learning_rate": 1.8557527468354387e-05, "loss": 1.2505, "step": 6036 }, { "epoch": 19.79344262295082, "grad_norm": 12.020517349243164, "learning_rate": 1.8556978015710688e-05, "loss": 1.4927, "step": 6037 }, { "epoch": 19.79672131147541, "grad_norm": 8.662442207336426, "learning_rate": 1.8556428466578166e-05, "loss": 1.3374, "step": 6038 }, { "epoch": 19.8, "grad_norm": 6.203099250793457, "learning_rate": 1.8555878820963014e-05, "loss": 1.4685, "step": 6039 }, { "epoch": 19.80327868852459, "grad_norm": 8.968151092529297, "learning_rate": 1.855532907887143e-05, "loss": 1.3704, "step": 6040 }, { "epoch": 19.80655737704918, "grad_norm": 8.016606330871582, "learning_rate": 1.855477924030962e-05, "loss": 1.4551, "step": 6041 }, { "epoch": 19.80983606557377, "grad_norm": 5.834217548370361, "learning_rate": 1.8554229305283778e-05, "loss": 1.488, "step": 6042 }, { "epoch": 19.81311475409836, "grad_norm": 7.277266502380371, "learning_rate": 1.8553679273800104e-05, "loss": 1.3315, "step": 6043 }, { "epoch": 19.81639344262295, "grad_norm": 8.778764724731445, "learning_rate": 1.8553129145864806e-05, "loss": 1.5308, "step": 6044 }, { "epoch": 19.81967213114754, "grad_norm": 7.31365966796875, "learning_rate": 1.8552578921484083e-05, "loss": 1.4392, "step": 6045 }, { "epoch": 19.82295081967213, "grad_norm": 18.141027450561523, "learning_rate": 1.855202860066414e-05, "loss": 1.4863, "step": 6046 }, { "epoch": 19.82622950819672, "grad_norm": 7.0979132652282715, "learning_rate": 1.855147818341118e-05, "loss": 1.3575, "step": 6047 }, { "epoch": 19.82950819672131, "grad_norm": 7.002731800079346, "learning_rate": 1.8550927669731417e-05, "loss": 1.5691, "step": 6048 }, { "epoch": 19.832786885245902, "grad_norm": 13.993189811706543, "learning_rate": 1.855037705963105e-05, "loss": 1.384, "step": 6049 }, { "epoch": 19.83606557377049, "grad_norm": 8.4501953125, "learning_rate": 1.854982635311629e-05, "loss": 1.4038, "step": 6050 }, { "epoch": 19.839344262295082, "grad_norm": 8.661941528320312, "learning_rate": 1.854927555019335e-05, "loss": 1.3882, "step": 6051 }, { "epoch": 19.84262295081967, "grad_norm": 11.512383460998535, "learning_rate": 1.854872465086844e-05, "loss": 1.375, "step": 6052 }, { "epoch": 19.845901639344262, "grad_norm": 7.0326714515686035, "learning_rate": 1.8548173655147773e-05, "loss": 1.4185, "step": 6053 }, { "epoch": 19.84918032786885, "grad_norm": 6.530059337615967, "learning_rate": 1.854762256303756e-05, "loss": 1.5645, "step": 6054 }, { "epoch": 19.852459016393443, "grad_norm": 7.001465320587158, "learning_rate": 1.854707137454401e-05, "loss": 1.3633, "step": 6055 }, { "epoch": 19.855737704918035, "grad_norm": 9.642216682434082, "learning_rate": 1.854652008967335e-05, "loss": 1.3062, "step": 6056 }, { "epoch": 19.859016393442623, "grad_norm": 7.437513828277588, "learning_rate": 1.8545968708431785e-05, "loss": 1.301, "step": 6057 }, { "epoch": 19.862295081967215, "grad_norm": 9.312566757202148, "learning_rate": 1.854541723082554e-05, "loss": 1.4182, "step": 6058 }, { "epoch": 19.865573770491803, "grad_norm": 7.846425533294678, "learning_rate": 1.854486565686083e-05, "loss": 1.3423, "step": 6059 }, { "epoch": 19.868852459016395, "grad_norm": 8.279914855957031, "learning_rate": 1.8544313986543875e-05, "loss": 1.24, "step": 6060 }, { "epoch": 19.872131147540983, "grad_norm": 9.451187133789062, "learning_rate": 1.8543762219880896e-05, "loss": 1.2577, "step": 6061 }, { "epoch": 19.875409836065575, "grad_norm": 7.786389350891113, "learning_rate": 1.8543210356878118e-05, "loss": 1.4773, "step": 6062 }, { "epoch": 19.878688524590164, "grad_norm": 11.548321723937988, "learning_rate": 1.8542658397541754e-05, "loss": 1.3091, "step": 6063 }, { "epoch": 19.881967213114756, "grad_norm": 8.11103343963623, "learning_rate": 1.854210634187804e-05, "loss": 1.6265, "step": 6064 }, { "epoch": 19.885245901639344, "grad_norm": 13.362306594848633, "learning_rate": 1.8541554189893192e-05, "loss": 1.5151, "step": 6065 }, { "epoch": 19.888524590163936, "grad_norm": 8.181281089782715, "learning_rate": 1.8541001941593442e-05, "loss": 1.3912, "step": 6066 }, { "epoch": 19.891803278688524, "grad_norm": 9.641404151916504, "learning_rate": 1.8540449596985013e-05, "loss": 1.478, "step": 6067 }, { "epoch": 19.895081967213116, "grad_norm": 7.754452705383301, "learning_rate": 1.8539897156074135e-05, "loss": 1.4365, "step": 6068 }, { "epoch": 19.898360655737704, "grad_norm": 10.213297843933105, "learning_rate": 1.8539344618867036e-05, "loss": 1.4912, "step": 6069 }, { "epoch": 19.901639344262296, "grad_norm": 8.876334190368652, "learning_rate": 1.853879198536995e-05, "loss": 1.552, "step": 6070 }, { "epoch": 19.904918032786885, "grad_norm": 6.69220495223999, "learning_rate": 1.8538239255589107e-05, "loss": 1.4922, "step": 6071 }, { "epoch": 19.908196721311477, "grad_norm": 8.601639747619629, "learning_rate": 1.853768642953074e-05, "loss": 1.3662, "step": 6072 }, { "epoch": 19.911475409836065, "grad_norm": 7.436435222625732, "learning_rate": 1.8537133507201075e-05, "loss": 1.3037, "step": 6073 }, { "epoch": 19.914754098360657, "grad_norm": 9.283783912658691, "learning_rate": 1.8536580488606358e-05, "loss": 1.5579, "step": 6074 }, { "epoch": 19.918032786885245, "grad_norm": 10.005884170532227, "learning_rate": 1.8536027373752818e-05, "loss": 1.3838, "step": 6075 }, { "epoch": 19.921311475409837, "grad_norm": 15.38759994506836, "learning_rate": 1.85354741626467e-05, "loss": 1.4382, "step": 6076 }, { "epoch": 19.924590163934425, "grad_norm": 7.212333679199219, "learning_rate": 1.8534920855294228e-05, "loss": 1.4438, "step": 6077 }, { "epoch": 19.927868852459017, "grad_norm": 11.11390209197998, "learning_rate": 1.8534367451701654e-05, "loss": 1.5066, "step": 6078 }, { "epoch": 19.931147540983606, "grad_norm": 9.073598861694336, "learning_rate": 1.8533813951875214e-05, "loss": 1.3813, "step": 6079 }, { "epoch": 19.934426229508198, "grad_norm": 12.518562316894531, "learning_rate": 1.8533260355821145e-05, "loss": 1.355, "step": 6080 }, { "epoch": 19.937704918032786, "grad_norm": 10.384223937988281, "learning_rate": 1.8532706663545695e-05, "loss": 1.2246, "step": 6081 }, { "epoch": 19.940983606557378, "grad_norm": 6.543493747711182, "learning_rate": 1.853215287505511e-05, "loss": 1.5049, "step": 6082 }, { "epoch": 19.944262295081966, "grad_norm": 7.942398548126221, "learning_rate": 1.8531598990355623e-05, "loss": 1.343, "step": 6083 }, { "epoch": 19.947540983606558, "grad_norm": 29.794340133666992, "learning_rate": 1.853104500945349e-05, "loss": 1.3796, "step": 6084 }, { "epoch": 19.950819672131146, "grad_norm": 7.046882629394531, "learning_rate": 1.8530490932354953e-05, "loss": 1.4722, "step": 6085 }, { "epoch": 19.95409836065574, "grad_norm": 8.872758865356445, "learning_rate": 1.8529936759066264e-05, "loss": 1.325, "step": 6086 }, { "epoch": 19.957377049180327, "grad_norm": 9.544448852539062, "learning_rate": 1.8529382489593666e-05, "loss": 1.5161, "step": 6087 }, { "epoch": 19.96065573770492, "grad_norm": 10.231402397155762, "learning_rate": 1.8528828123943415e-05, "loss": 1.4153, "step": 6088 }, { "epoch": 19.963934426229507, "grad_norm": 18.20576286315918, "learning_rate": 1.8528273662121758e-05, "loss": 1.355, "step": 6089 }, { "epoch": 19.9672131147541, "grad_norm": 7.341126441955566, "learning_rate": 1.8527719104134946e-05, "loss": 1.3704, "step": 6090 }, { "epoch": 19.970491803278687, "grad_norm": 9.549093246459961, "learning_rate": 1.8527164449989237e-05, "loss": 1.4597, "step": 6091 }, { "epoch": 19.97377049180328, "grad_norm": 7.80355978012085, "learning_rate": 1.8526609699690886e-05, "loss": 1.3279, "step": 6092 }, { "epoch": 19.977049180327867, "grad_norm": 7.552994251251221, "learning_rate": 1.852605485324614e-05, "loss": 1.4106, "step": 6093 }, { "epoch": 19.98032786885246, "grad_norm": 6.554324626922607, "learning_rate": 1.852549991066126e-05, "loss": 1.1555, "step": 6094 }, { "epoch": 19.983606557377048, "grad_norm": 7.5543599128723145, "learning_rate": 1.852494487194251e-05, "loss": 1.4038, "step": 6095 }, { "epoch": 19.98688524590164, "grad_norm": 8.014880180358887, "learning_rate": 1.852438973709614e-05, "loss": 1.363, "step": 6096 }, { "epoch": 19.990163934426228, "grad_norm": 5.4370317459106445, "learning_rate": 1.852383450612841e-05, "loss": 1.6467, "step": 6097 }, { "epoch": 19.99344262295082, "grad_norm": 8.591913223266602, "learning_rate": 1.8523279179045586e-05, "loss": 1.408, "step": 6098 }, { "epoch": 19.99672131147541, "grad_norm": 10.412559509277344, "learning_rate": 1.8522723755853924e-05, "loss": 1.3672, "step": 6099 }, { "epoch": 20.0, "grad_norm": 9.672107696533203, "learning_rate": 1.8522168236559693e-05, "loss": 1.1729, "step": 6100 }, { "epoch": 20.003278688524592, "grad_norm": 11.143481254577637, "learning_rate": 1.8521612621169157e-05, "loss": 1.2522, "step": 6101 }, { "epoch": 20.00655737704918, "grad_norm": 8.75096607208252, "learning_rate": 1.852105690968857e-05, "loss": 1.3113, "step": 6102 }, { "epoch": 20.009836065573772, "grad_norm": 7.741184711456299, "learning_rate": 1.8520501102124217e-05, "loss": 1.2341, "step": 6103 }, { "epoch": 20.01311475409836, "grad_norm": 9.499117851257324, "learning_rate": 1.851994519848235e-05, "loss": 1.1632, "step": 6104 }, { "epoch": 20.016393442622952, "grad_norm": 7.803379535675049, "learning_rate": 1.851938919876924e-05, "loss": 1.3284, "step": 6105 }, { "epoch": 20.01967213114754, "grad_norm": 16.53290367126465, "learning_rate": 1.8518833102991163e-05, "loss": 1.2573, "step": 6106 }, { "epoch": 20.022950819672133, "grad_norm": 8.22087287902832, "learning_rate": 1.8518276911154384e-05, "loss": 1.3601, "step": 6107 }, { "epoch": 20.02622950819672, "grad_norm": 7.406254291534424, "learning_rate": 1.8517720623265174e-05, "loss": 1.4343, "step": 6108 }, { "epoch": 20.029508196721313, "grad_norm": 7.833783149719238, "learning_rate": 1.851716423932981e-05, "loss": 1.3782, "step": 6109 }, { "epoch": 20.0327868852459, "grad_norm": 6.826525688171387, "learning_rate": 1.8516607759354562e-05, "loss": 1.3621, "step": 6110 }, { "epoch": 20.036065573770493, "grad_norm": 9.189133644104004, "learning_rate": 1.851605118334571e-05, "loss": 1.2695, "step": 6111 }, { "epoch": 20.03934426229508, "grad_norm": 7.071382999420166, "learning_rate": 1.8515494511309524e-05, "loss": 1.1792, "step": 6112 }, { "epoch": 20.042622950819673, "grad_norm": 7.9467668533325195, "learning_rate": 1.8514937743252284e-05, "loss": 1.2637, "step": 6113 }, { "epoch": 20.04590163934426, "grad_norm": 6.951395034790039, "learning_rate": 1.8514380879180265e-05, "loss": 1.2673, "step": 6114 }, { "epoch": 20.049180327868854, "grad_norm": 9.405558586120605, "learning_rate": 1.8513823919099752e-05, "loss": 1.1901, "step": 6115 }, { "epoch": 20.052459016393442, "grad_norm": 11.688520431518555, "learning_rate": 1.851326686301702e-05, "loss": 1.3171, "step": 6116 }, { "epoch": 20.055737704918034, "grad_norm": 7.0247802734375, "learning_rate": 1.8512709710938355e-05, "loss": 1.3855, "step": 6117 }, { "epoch": 20.059016393442622, "grad_norm": 8.659720420837402, "learning_rate": 1.8512152462870035e-05, "loss": 1.396, "step": 6118 }, { "epoch": 20.062295081967214, "grad_norm": 11.937788963317871, "learning_rate": 1.851159511881835e-05, "loss": 1.2827, "step": 6119 }, { "epoch": 20.065573770491802, "grad_norm": 12.9078369140625, "learning_rate": 1.8511037678789575e-05, "loss": 1.2803, "step": 6120 }, { "epoch": 20.068852459016394, "grad_norm": 11.150814056396484, "learning_rate": 1.8510480142790002e-05, "loss": 1.4297, "step": 6121 }, { "epoch": 20.072131147540983, "grad_norm": 7.343381881713867, "learning_rate": 1.8509922510825917e-05, "loss": 1.3744, "step": 6122 }, { "epoch": 20.075409836065575, "grad_norm": 9.328131675720215, "learning_rate": 1.8509364782903606e-05, "loss": 1.3364, "step": 6123 }, { "epoch": 20.078688524590163, "grad_norm": 7.278066158294678, "learning_rate": 1.8508806959029362e-05, "loss": 1.4062, "step": 6124 }, { "epoch": 20.081967213114755, "grad_norm": 6.855672836303711, "learning_rate": 1.8508249039209474e-05, "loss": 1.293, "step": 6125 }, { "epoch": 20.085245901639343, "grad_norm": 6.138515472412109, "learning_rate": 1.850769102345023e-05, "loss": 1.2407, "step": 6126 }, { "epoch": 20.088524590163935, "grad_norm": 9.094207763671875, "learning_rate": 1.8507132911757925e-05, "loss": 1.5232, "step": 6127 }, { "epoch": 20.091803278688523, "grad_norm": 8.131207466125488, "learning_rate": 1.8506574704138847e-05, "loss": 1.2434, "step": 6128 }, { "epoch": 20.095081967213115, "grad_norm": 7.656022071838379, "learning_rate": 1.85060164005993e-05, "loss": 1.4189, "step": 6129 }, { "epoch": 20.098360655737704, "grad_norm": 8.237183570861816, "learning_rate": 1.8505458001145567e-05, "loss": 1.2394, "step": 6130 }, { "epoch": 20.101639344262296, "grad_norm": 8.736248016357422, "learning_rate": 1.8504899505783956e-05, "loss": 1.0612, "step": 6131 }, { "epoch": 20.104918032786884, "grad_norm": 12.39680290222168, "learning_rate": 1.8504340914520763e-05, "loss": 1.4458, "step": 6132 }, { "epoch": 20.108196721311476, "grad_norm": 7.823732376098633, "learning_rate": 1.850378222736228e-05, "loss": 1.4099, "step": 6133 }, { "epoch": 20.111475409836064, "grad_norm": 32.666648864746094, "learning_rate": 1.850322344431481e-05, "loss": 1.1824, "step": 6134 }, { "epoch": 20.114754098360656, "grad_norm": 8.051243782043457, "learning_rate": 1.850266456538466e-05, "loss": 1.3835, "step": 6135 }, { "epoch": 20.118032786885244, "grad_norm": 7.631539344787598, "learning_rate": 1.8502105590578117e-05, "loss": 1.421, "step": 6136 }, { "epoch": 20.121311475409836, "grad_norm": 9.46081256866455, "learning_rate": 1.8501546519901503e-05, "loss": 1.2837, "step": 6137 }, { "epoch": 20.124590163934425, "grad_norm": 24.181987762451172, "learning_rate": 1.8500987353361108e-05, "loss": 1.3428, "step": 6138 }, { "epoch": 20.127868852459017, "grad_norm": 10.32526683807373, "learning_rate": 1.8500428090963244e-05, "loss": 1.2981, "step": 6139 }, { "epoch": 20.131147540983605, "grad_norm": 10.253111839294434, "learning_rate": 1.849986873271421e-05, "loss": 1.3263, "step": 6140 }, { "epoch": 20.134426229508197, "grad_norm": 8.252104759216309, "learning_rate": 1.8499309278620323e-05, "loss": 1.3915, "step": 6141 }, { "epoch": 20.137704918032785, "grad_norm": 14.567801475524902, "learning_rate": 1.8498749728687886e-05, "loss": 1.3296, "step": 6142 }, { "epoch": 20.140983606557377, "grad_norm": 28.981348037719727, "learning_rate": 1.849819008292321e-05, "loss": 1.3606, "step": 6143 }, { "epoch": 20.14426229508197, "grad_norm": 8.571141242980957, "learning_rate": 1.8497630341332603e-05, "loss": 1.5486, "step": 6144 }, { "epoch": 20.147540983606557, "grad_norm": 7.328765392303467, "learning_rate": 1.849707050392238e-05, "loss": 1.272, "step": 6145 }, { "epoch": 20.15081967213115, "grad_norm": 8.980973243713379, "learning_rate": 1.8496510570698852e-05, "loss": 1.3883, "step": 6146 }, { "epoch": 20.154098360655738, "grad_norm": 7.960147380828857, "learning_rate": 1.8495950541668334e-05, "loss": 1.3073, "step": 6147 }, { "epoch": 20.15737704918033, "grad_norm": 8.523369789123535, "learning_rate": 1.849539041683714e-05, "loss": 1.386, "step": 6148 }, { "epoch": 20.160655737704918, "grad_norm": 7.673922538757324, "learning_rate": 1.8494830196211584e-05, "loss": 1.4121, "step": 6149 }, { "epoch": 20.16393442622951, "grad_norm": 8.137182235717773, "learning_rate": 1.8494269879797986e-05, "loss": 1.2727, "step": 6150 }, { "epoch": 20.167213114754098, "grad_norm": 8.560172080993652, "learning_rate": 1.849370946760266e-05, "loss": 1.2175, "step": 6151 }, { "epoch": 20.17049180327869, "grad_norm": 10.240826606750488, "learning_rate": 1.8493148959631936e-05, "loss": 1.5781, "step": 6152 }, { "epoch": 20.17377049180328, "grad_norm": 7.852731227874756, "learning_rate": 1.8492588355892125e-05, "loss": 1.3633, "step": 6153 }, { "epoch": 20.17704918032787, "grad_norm": 10.623722076416016, "learning_rate": 1.8492027656389547e-05, "loss": 1.1533, "step": 6154 }, { "epoch": 20.18032786885246, "grad_norm": 21.586589813232422, "learning_rate": 1.8491466861130528e-05, "loss": 1.3599, "step": 6155 }, { "epoch": 20.18360655737705, "grad_norm": 34.25944900512695, "learning_rate": 1.8490905970121393e-05, "loss": 1.3536, "step": 6156 }, { "epoch": 20.18688524590164, "grad_norm": 9.323585510253906, "learning_rate": 1.8490344983368462e-05, "loss": 1.124, "step": 6157 }, { "epoch": 20.19016393442623, "grad_norm": 13.131192207336426, "learning_rate": 1.848978390087807e-05, "loss": 1.3335, "step": 6158 }, { "epoch": 20.19344262295082, "grad_norm": 8.55349063873291, "learning_rate": 1.848922272265653e-05, "loss": 1.5208, "step": 6159 }, { "epoch": 20.19672131147541, "grad_norm": 7.871082305908203, "learning_rate": 1.8488661448710183e-05, "loss": 1.542, "step": 6160 }, { "epoch": 20.2, "grad_norm": 8.95598030090332, "learning_rate": 1.8488100079045345e-05, "loss": 1.3643, "step": 6161 }, { "epoch": 20.20327868852459, "grad_norm": 7.093752384185791, "learning_rate": 1.848753861366836e-05, "loss": 1.0465, "step": 6162 }, { "epoch": 20.20655737704918, "grad_norm": 7.047847270965576, "learning_rate": 1.848697705258555e-05, "loss": 1.3599, "step": 6163 }, { "epoch": 20.20983606557377, "grad_norm": 8.677289009094238, "learning_rate": 1.8486415395803247e-05, "loss": 1.2227, "step": 6164 }, { "epoch": 20.21311475409836, "grad_norm": 9.364516258239746, "learning_rate": 1.848585364332779e-05, "loss": 1.2593, "step": 6165 }, { "epoch": 20.21639344262295, "grad_norm": 11.192185401916504, "learning_rate": 1.8485291795165508e-05, "loss": 1.3621, "step": 6166 }, { "epoch": 20.21967213114754, "grad_norm": 7.9011454582214355, "learning_rate": 1.848472985132274e-05, "loss": 1.1995, "step": 6167 }, { "epoch": 20.222950819672132, "grad_norm": 8.847905158996582, "learning_rate": 1.848416781180582e-05, "loss": 1.2151, "step": 6168 }, { "epoch": 20.22622950819672, "grad_norm": 8.69105339050293, "learning_rate": 1.848360567662109e-05, "loss": 1.4004, "step": 6169 }, { "epoch": 20.229508196721312, "grad_norm": 9.071680068969727, "learning_rate": 1.8483043445774883e-05, "loss": 1.3859, "step": 6170 }, { "epoch": 20.2327868852459, "grad_norm": 7.868447780609131, "learning_rate": 1.8482481119273538e-05, "loss": 1.3535, "step": 6171 }, { "epoch": 20.236065573770492, "grad_norm": 6.931913375854492, "learning_rate": 1.8481918697123402e-05, "loss": 1.4089, "step": 6172 }, { "epoch": 20.23934426229508, "grad_norm": 7.396856307983398, "learning_rate": 1.8481356179330812e-05, "loss": 1.2028, "step": 6173 }, { "epoch": 20.242622950819673, "grad_norm": 10.342813491821289, "learning_rate": 1.8480793565902114e-05, "loss": 1.1646, "step": 6174 }, { "epoch": 20.24590163934426, "grad_norm": 6.702258110046387, "learning_rate": 1.848023085684365e-05, "loss": 1.3972, "step": 6175 }, { "epoch": 20.249180327868853, "grad_norm": 9.800132751464844, "learning_rate": 1.8479668052161764e-05, "loss": 1.1948, "step": 6176 }, { "epoch": 20.25245901639344, "grad_norm": 8.278546333312988, "learning_rate": 1.847910515186281e-05, "loss": 1.2571, "step": 6177 }, { "epoch": 20.255737704918033, "grad_norm": 8.51552677154541, "learning_rate": 1.8478542155953125e-05, "loss": 1.4514, "step": 6178 }, { "epoch": 20.25901639344262, "grad_norm": 7.250391483306885, "learning_rate": 1.8477979064439062e-05, "loss": 1.4048, "step": 6179 }, { "epoch": 20.262295081967213, "grad_norm": 12.832372665405273, "learning_rate": 1.847741587732697e-05, "loss": 1.5081, "step": 6180 }, { "epoch": 20.2655737704918, "grad_norm": 8.631757736206055, "learning_rate": 1.8476852594623202e-05, "loss": 1.1902, "step": 6181 }, { "epoch": 20.268852459016394, "grad_norm": 9.753762245178223, "learning_rate": 1.8476289216334103e-05, "loss": 1.3767, "step": 6182 }, { "epoch": 20.272131147540982, "grad_norm": 7.784601211547852, "learning_rate": 1.8475725742466034e-05, "loss": 1.4341, "step": 6183 }, { "epoch": 20.275409836065574, "grad_norm": 8.990860939025879, "learning_rate": 1.8475162173025346e-05, "loss": 1.5071, "step": 6184 }, { "epoch": 20.278688524590162, "grad_norm": 8.442390441894531, "learning_rate": 1.8474598508018387e-05, "loss": 1.2427, "step": 6185 }, { "epoch": 20.281967213114754, "grad_norm": 9.782971382141113, "learning_rate": 1.847403474745152e-05, "loss": 1.332, "step": 6186 }, { "epoch": 20.285245901639342, "grad_norm": 10.757795333862305, "learning_rate": 1.8473470891331103e-05, "loss": 1.2794, "step": 6187 }, { "epoch": 20.288524590163934, "grad_norm": 7.162297248840332, "learning_rate": 1.847290693966349e-05, "loss": 1.3767, "step": 6188 }, { "epoch": 20.291803278688526, "grad_norm": 8.98888874053955, "learning_rate": 1.8472342892455043e-05, "loss": 1.4812, "step": 6189 }, { "epoch": 20.295081967213115, "grad_norm": 10.952305793762207, "learning_rate": 1.847177874971212e-05, "loss": 1.4385, "step": 6190 }, { "epoch": 20.298360655737707, "grad_norm": 11.587091445922852, "learning_rate": 1.8471214511441084e-05, "loss": 1.3, "step": 6191 }, { "epoch": 20.301639344262295, "grad_norm": 13.31020450592041, "learning_rate": 1.8470650177648294e-05, "loss": 1.511, "step": 6192 }, { "epoch": 20.304918032786887, "grad_norm": 8.938477516174316, "learning_rate": 1.8470085748340118e-05, "loss": 1.4387, "step": 6193 }, { "epoch": 20.308196721311475, "grad_norm": 9.190852165222168, "learning_rate": 1.8469521223522915e-05, "loss": 1.2639, "step": 6194 }, { "epoch": 20.311475409836067, "grad_norm": 6.978446006774902, "learning_rate": 1.846895660320306e-05, "loss": 1.2974, "step": 6195 }, { "epoch": 20.314754098360655, "grad_norm": 7.448197841644287, "learning_rate": 1.846839188738691e-05, "loss": 1.5371, "step": 6196 }, { "epoch": 20.318032786885247, "grad_norm": 8.317472457885742, "learning_rate": 1.8467827076080835e-05, "loss": 1.427, "step": 6197 }, { "epoch": 20.321311475409836, "grad_norm": 7.819760799407959, "learning_rate": 1.8467262169291208e-05, "loss": 1.2269, "step": 6198 }, { "epoch": 20.324590163934428, "grad_norm": 14.008392333984375, "learning_rate": 1.8466697167024396e-05, "loss": 1.2412, "step": 6199 }, { "epoch": 20.327868852459016, "grad_norm": 8.390521049499512, "learning_rate": 1.846613206928677e-05, "loss": 1.2109, "step": 6200 }, { "epoch": 20.331147540983608, "grad_norm": 9.38710880279541, "learning_rate": 1.8465566876084705e-05, "loss": 1.2499, "step": 6201 }, { "epoch": 20.334426229508196, "grad_norm": 7.308473587036133, "learning_rate": 1.8465001587424565e-05, "loss": 1.2986, "step": 6202 }, { "epoch": 20.337704918032788, "grad_norm": 9.630352973937988, "learning_rate": 1.846443620331274e-05, "loss": 1.3239, "step": 6203 }, { "epoch": 20.340983606557376, "grad_norm": 8.492533683776855, "learning_rate": 1.8463870723755588e-05, "loss": 1.2673, "step": 6204 }, { "epoch": 20.34426229508197, "grad_norm": 6.46112585067749, "learning_rate": 1.8463305148759498e-05, "loss": 1.3049, "step": 6205 }, { "epoch": 20.347540983606557, "grad_norm": 9.114988327026367, "learning_rate": 1.8462739478330837e-05, "loss": 1.2793, "step": 6206 }, { "epoch": 20.35081967213115, "grad_norm": 12.175674438476562, "learning_rate": 1.8462173712475997e-05, "loss": 1.4985, "step": 6207 }, { "epoch": 20.354098360655737, "grad_norm": 10.651782035827637, "learning_rate": 1.8461607851201348e-05, "loss": 1.3923, "step": 6208 }, { "epoch": 20.35737704918033, "grad_norm": 12.570039749145508, "learning_rate": 1.8461041894513268e-05, "loss": 1.1755, "step": 6209 }, { "epoch": 20.360655737704917, "grad_norm": 13.781831741333008, "learning_rate": 1.8460475842418148e-05, "loss": 1.3131, "step": 6210 }, { "epoch": 20.36393442622951, "grad_norm": 8.540324211120605, "learning_rate": 1.845990969492236e-05, "loss": 1.3879, "step": 6211 }, { "epoch": 20.367213114754097, "grad_norm": 16.222185134887695, "learning_rate": 1.84593434520323e-05, "loss": 1.2708, "step": 6212 }, { "epoch": 20.37049180327869, "grad_norm": 11.7189359664917, "learning_rate": 1.8458777113754343e-05, "loss": 1.2549, "step": 6213 }, { "epoch": 20.373770491803278, "grad_norm": 10.114566802978516, "learning_rate": 1.8458210680094882e-05, "loss": 1.2086, "step": 6214 }, { "epoch": 20.37704918032787, "grad_norm": 8.751230239868164, "learning_rate": 1.8457644151060304e-05, "loss": 1.2781, "step": 6215 }, { "epoch": 20.380327868852458, "grad_norm": 9.944451332092285, "learning_rate": 1.8457077526656992e-05, "loss": 1.3221, "step": 6216 }, { "epoch": 20.38360655737705, "grad_norm": 9.33166217803955, "learning_rate": 1.8456510806891333e-05, "loss": 1.4744, "step": 6217 }, { "epoch": 20.386885245901638, "grad_norm": 8.157172203063965, "learning_rate": 1.845594399176973e-05, "loss": 1.2623, "step": 6218 }, { "epoch": 20.39016393442623, "grad_norm": 10.561598777770996, "learning_rate": 1.845537708129856e-05, "loss": 1.3835, "step": 6219 }, { "epoch": 20.39344262295082, "grad_norm": 12.136903762817383, "learning_rate": 1.8454810075484228e-05, "loss": 1.2585, "step": 6220 }, { "epoch": 20.39672131147541, "grad_norm": 10.181530952453613, "learning_rate": 1.8454242974333117e-05, "loss": 1.2283, "step": 6221 }, { "epoch": 20.4, "grad_norm": 10.76869010925293, "learning_rate": 1.8453675777851627e-05, "loss": 1.459, "step": 6222 }, { "epoch": 20.40327868852459, "grad_norm": 8.597542762756348, "learning_rate": 1.8453108486046153e-05, "loss": 1.2549, "step": 6223 }, { "epoch": 20.40655737704918, "grad_norm": 8.853398323059082, "learning_rate": 1.8452541098923093e-05, "loss": 1.2694, "step": 6224 }, { "epoch": 20.40983606557377, "grad_norm": 24.472991943359375, "learning_rate": 1.8451973616488846e-05, "loss": 1.4697, "step": 6225 }, { "epoch": 20.41311475409836, "grad_norm": 10.571724891662598, "learning_rate": 1.8451406038749803e-05, "loss": 1.1035, "step": 6226 }, { "epoch": 20.41639344262295, "grad_norm": 11.146543502807617, "learning_rate": 1.845083836571237e-05, "loss": 1.5002, "step": 6227 }, { "epoch": 20.41967213114754, "grad_norm": 10.747232437133789, "learning_rate": 1.8450270597382952e-05, "loss": 1.285, "step": 6228 }, { "epoch": 20.42295081967213, "grad_norm": 9.682169914245605, "learning_rate": 1.8449702733767948e-05, "loss": 1.4424, "step": 6229 }, { "epoch": 20.42622950819672, "grad_norm": 8.438727378845215, "learning_rate": 1.8449134774873755e-05, "loss": 1.3737, "step": 6230 }, { "epoch": 20.42950819672131, "grad_norm": 9.273889541625977, "learning_rate": 1.8448566720706784e-05, "loss": 1.3423, "step": 6231 }, { "epoch": 20.432786885245903, "grad_norm": 7.488100051879883, "learning_rate": 1.844799857127344e-05, "loss": 1.272, "step": 6232 }, { "epoch": 20.43606557377049, "grad_norm": 7.982758045196533, "learning_rate": 1.8447430326580127e-05, "loss": 1.2048, "step": 6233 }, { "epoch": 20.439344262295084, "grad_norm": 9.331907272338867, "learning_rate": 1.8446861986633255e-05, "loss": 1.2297, "step": 6234 }, { "epoch": 20.442622950819672, "grad_norm": 11.107112884521484, "learning_rate": 1.8446293551439232e-05, "loss": 1.4076, "step": 6235 }, { "epoch": 20.445901639344264, "grad_norm": 6.70396089553833, "learning_rate": 1.8445725021004466e-05, "loss": 1.4158, "step": 6236 }, { "epoch": 20.449180327868852, "grad_norm": 7.564201354980469, "learning_rate": 1.844515639533537e-05, "loss": 1.4919, "step": 6237 }, { "epoch": 20.452459016393444, "grad_norm": 12.976723670959473, "learning_rate": 1.8444587674438357e-05, "loss": 1.3021, "step": 6238 }, { "epoch": 20.455737704918032, "grad_norm": 8.61697006225586, "learning_rate": 1.8444018858319835e-05, "loss": 1.3994, "step": 6239 }, { "epoch": 20.459016393442624, "grad_norm": 37.62699508666992, "learning_rate": 1.844344994698622e-05, "loss": 1.2074, "step": 6240 }, { "epoch": 20.462295081967213, "grad_norm": 10.787440299987793, "learning_rate": 1.844288094044393e-05, "loss": 1.3809, "step": 6241 }, { "epoch": 20.465573770491805, "grad_norm": 15.136363983154297, "learning_rate": 1.8442311838699378e-05, "loss": 1.1718, "step": 6242 }, { "epoch": 20.468852459016393, "grad_norm": 9.173669815063477, "learning_rate": 1.8441742641758983e-05, "loss": 1.4027, "step": 6243 }, { "epoch": 20.472131147540985, "grad_norm": 7.523420333862305, "learning_rate": 1.844117334962916e-05, "loss": 1.2659, "step": 6244 }, { "epoch": 20.475409836065573, "grad_norm": 9.721637725830078, "learning_rate": 1.8440603962316333e-05, "loss": 1.3567, "step": 6245 }, { "epoch": 20.478688524590165, "grad_norm": 9.356410026550293, "learning_rate": 1.8440034479826922e-05, "loss": 1.2942, "step": 6246 }, { "epoch": 20.481967213114753, "grad_norm": 8.950504302978516, "learning_rate": 1.8439464902167342e-05, "loss": 1.3257, "step": 6247 }, { "epoch": 20.485245901639345, "grad_norm": 8.796825408935547, "learning_rate": 1.8438895229344023e-05, "loss": 1.4651, "step": 6248 }, { "epoch": 20.488524590163934, "grad_norm": 9.67078971862793, "learning_rate": 1.8438325461363387e-05, "loss": 1.447, "step": 6249 }, { "epoch": 20.491803278688526, "grad_norm": 14.112467765808105, "learning_rate": 1.8437755598231857e-05, "loss": 1.3221, "step": 6250 }, { "epoch": 20.495081967213114, "grad_norm": 13.288495063781738, "learning_rate": 1.8437185639955858e-05, "loss": 1.3999, "step": 6251 }, { "epoch": 20.498360655737706, "grad_norm": 12.960515975952148, "learning_rate": 1.8436615586541822e-05, "loss": 1.2037, "step": 6252 }, { "epoch": 20.501639344262294, "grad_norm": 20.438581466674805, "learning_rate": 1.843604543799617e-05, "loss": 1.4683, "step": 6253 }, { "epoch": 20.504918032786886, "grad_norm": 10.494003295898438, "learning_rate": 1.8435475194325338e-05, "loss": 1.3508, "step": 6254 }, { "epoch": 20.508196721311474, "grad_norm": 10.62585735321045, "learning_rate": 1.8434904855535748e-05, "loss": 1.5071, "step": 6255 }, { "epoch": 20.511475409836066, "grad_norm": 8.16122055053711, "learning_rate": 1.8434334421633838e-05, "loss": 1.479, "step": 6256 }, { "epoch": 20.514754098360655, "grad_norm": 11.154123306274414, "learning_rate": 1.843376389262604e-05, "loss": 1.4495, "step": 6257 }, { "epoch": 20.518032786885247, "grad_norm": 8.960631370544434, "learning_rate": 1.8433193268518784e-05, "loss": 1.3716, "step": 6258 }, { "epoch": 20.521311475409835, "grad_norm": 9.098949432373047, "learning_rate": 1.8432622549318505e-05, "loss": 1.4463, "step": 6259 }, { "epoch": 20.524590163934427, "grad_norm": 12.537405967712402, "learning_rate": 1.8432051735031636e-05, "loss": 1.229, "step": 6260 }, { "epoch": 20.527868852459015, "grad_norm": 12.469073295593262, "learning_rate": 1.843148082566462e-05, "loss": 1.0642, "step": 6261 }, { "epoch": 20.531147540983607, "grad_norm": 8.669642448425293, "learning_rate": 1.843090982122389e-05, "loss": 1.4341, "step": 6262 }, { "epoch": 20.534426229508195, "grad_norm": 9.144691467285156, "learning_rate": 1.8430338721715886e-05, "loss": 1.4065, "step": 6263 }, { "epoch": 20.537704918032787, "grad_norm": 13.297725677490234, "learning_rate": 1.842976752714705e-05, "loss": 1.2119, "step": 6264 }, { "epoch": 20.540983606557376, "grad_norm": 8.477569580078125, "learning_rate": 1.8429196237523816e-05, "loss": 1.3152, "step": 6265 }, { "epoch": 20.544262295081968, "grad_norm": 8.164541244506836, "learning_rate": 1.8428624852852634e-05, "loss": 1.2319, "step": 6266 }, { "epoch": 20.547540983606556, "grad_norm": 14.301515579223633, "learning_rate": 1.842805337313994e-05, "loss": 1.2056, "step": 6267 }, { "epoch": 20.550819672131148, "grad_norm": 7.890411853790283, "learning_rate": 1.8427481798392183e-05, "loss": 1.5383, "step": 6268 }, { "epoch": 20.554098360655736, "grad_norm": 10.888723373413086, "learning_rate": 1.8426910128615806e-05, "loss": 1.3875, "step": 6269 }, { "epoch": 20.557377049180328, "grad_norm": 9.67883586883545, "learning_rate": 1.8426338363817257e-05, "loss": 1.4102, "step": 6270 }, { "epoch": 20.560655737704916, "grad_norm": 8.046887397766113, "learning_rate": 1.842576650400298e-05, "loss": 1.1708, "step": 6271 }, { "epoch": 20.56393442622951, "grad_norm": 8.91104507446289, "learning_rate": 1.8425194549179423e-05, "loss": 1.415, "step": 6272 }, { "epoch": 20.567213114754097, "grad_norm": 12.72077465057373, "learning_rate": 1.842462249935304e-05, "loss": 1.3696, "step": 6273 }, { "epoch": 20.57049180327869, "grad_norm": 8.896607398986816, "learning_rate": 1.842405035453028e-05, "loss": 1.155, "step": 6274 }, { "epoch": 20.57377049180328, "grad_norm": 7.548302173614502, "learning_rate": 1.8423478114717593e-05, "loss": 1.2693, "step": 6275 }, { "epoch": 20.57704918032787, "grad_norm": 11.196722030639648, "learning_rate": 1.842290577992143e-05, "loss": 1.217, "step": 6276 }, { "epoch": 20.58032786885246, "grad_norm": 17.417375564575195, "learning_rate": 1.842233335014825e-05, "loss": 1.2769, "step": 6277 }, { "epoch": 20.58360655737705, "grad_norm": 15.1220703125, "learning_rate": 1.84217608254045e-05, "loss": 1.5463, "step": 6278 }, { "epoch": 20.58688524590164, "grad_norm": 13.306121826171875, "learning_rate": 1.8421188205696645e-05, "loss": 1.1511, "step": 6279 }, { "epoch": 20.59016393442623, "grad_norm": 9.348933219909668, "learning_rate": 1.8420615491031134e-05, "loss": 1.4275, "step": 6280 }, { "epoch": 20.59344262295082, "grad_norm": 8.273395538330078, "learning_rate": 1.8420042681414426e-05, "loss": 1.4258, "step": 6281 }, { "epoch": 20.59672131147541, "grad_norm": 26.357391357421875, "learning_rate": 1.841946977685299e-05, "loss": 1.5222, "step": 6282 }, { "epoch": 20.6, "grad_norm": 14.929738998413086, "learning_rate": 1.8418896777353272e-05, "loss": 1.2539, "step": 6283 }, { "epoch": 20.60327868852459, "grad_norm": 10.647074699401855, "learning_rate": 1.841832368292174e-05, "loss": 1.3795, "step": 6284 }, { "epoch": 20.60655737704918, "grad_norm": 8.29302978515625, "learning_rate": 1.8417750493564857e-05, "loss": 1.5864, "step": 6285 }, { "epoch": 20.60983606557377, "grad_norm": 9.22007942199707, "learning_rate": 1.8417177209289082e-05, "loss": 1.3342, "step": 6286 }, { "epoch": 20.613114754098362, "grad_norm": 12.276195526123047, "learning_rate": 1.8416603830100885e-05, "loss": 1.2632, "step": 6287 }, { "epoch": 20.61639344262295, "grad_norm": 8.761259078979492, "learning_rate": 1.8416030356006728e-05, "loss": 1.3373, "step": 6288 }, { "epoch": 20.619672131147542, "grad_norm": 9.46788501739502, "learning_rate": 1.841545678701308e-05, "loss": 1.3413, "step": 6289 }, { "epoch": 20.62295081967213, "grad_norm": 9.686439514160156, "learning_rate": 1.8414883123126402e-05, "loss": 1.3149, "step": 6290 }, { "epoch": 20.626229508196722, "grad_norm": 10.355820655822754, "learning_rate": 1.8414309364353173e-05, "loss": 1.5925, "step": 6291 }, { "epoch": 20.62950819672131, "grad_norm": 10.91043472290039, "learning_rate": 1.8413735510699856e-05, "loss": 1.3152, "step": 6292 }, { "epoch": 20.632786885245903, "grad_norm": 26.377613067626953, "learning_rate": 1.841316156217292e-05, "loss": 1.47, "step": 6293 }, { "epoch": 20.63606557377049, "grad_norm": 7.734337329864502, "learning_rate": 1.8412587518778845e-05, "loss": 1.2874, "step": 6294 }, { "epoch": 20.639344262295083, "grad_norm": 10.107001304626465, "learning_rate": 1.8412013380524095e-05, "loss": 1.2692, "step": 6295 }, { "epoch": 20.64262295081967, "grad_norm": 11.868860244750977, "learning_rate": 1.841143914741515e-05, "loss": 1.5332, "step": 6296 }, { "epoch": 20.645901639344263, "grad_norm": 15.243014335632324, "learning_rate": 1.841086481945848e-05, "loss": 1.4197, "step": 6297 }, { "epoch": 20.64918032786885, "grad_norm": 8.91750717163086, "learning_rate": 1.8410290396660567e-05, "loss": 1.5659, "step": 6298 }, { "epoch": 20.652459016393443, "grad_norm": 7.624995231628418, "learning_rate": 1.840971587902788e-05, "loss": 1.5195, "step": 6299 }, { "epoch": 20.65573770491803, "grad_norm": 9.864726066589355, "learning_rate": 1.8409141266566908e-05, "loss": 1.3596, "step": 6300 }, { "epoch": 20.659016393442624, "grad_norm": 9.535372734069824, "learning_rate": 1.840856655928412e-05, "loss": 1.2319, "step": 6301 }, { "epoch": 20.662295081967212, "grad_norm": 11.041155815124512, "learning_rate": 1.8407991757186007e-05, "loss": 1.4397, "step": 6302 }, { "epoch": 20.665573770491804, "grad_norm": 14.633535385131836, "learning_rate": 1.8407416860279045e-05, "loss": 1.16, "step": 6303 }, { "epoch": 20.668852459016392, "grad_norm": 11.415567398071289, "learning_rate": 1.8406841868569712e-05, "loss": 1.47, "step": 6304 }, { "epoch": 20.672131147540984, "grad_norm": 9.386392593383789, "learning_rate": 1.8406266782064498e-05, "loss": 1.3442, "step": 6305 }, { "epoch": 20.675409836065572, "grad_norm": 8.929351806640625, "learning_rate": 1.8405691600769886e-05, "loss": 1.2009, "step": 6306 }, { "epoch": 20.678688524590164, "grad_norm": 13.421119689941406, "learning_rate": 1.8405116324692362e-05, "loss": 1.2123, "step": 6307 }, { "epoch": 20.681967213114753, "grad_norm": 13.351691246032715, "learning_rate": 1.840454095383841e-05, "loss": 1.3318, "step": 6308 }, { "epoch": 20.685245901639345, "grad_norm": 10.660076141357422, "learning_rate": 1.840396548821452e-05, "loss": 1.4778, "step": 6309 }, { "epoch": 20.688524590163933, "grad_norm": 13.250536918640137, "learning_rate": 1.8403389927827186e-05, "loss": 1.3459, "step": 6310 }, { "epoch": 20.691803278688525, "grad_norm": 14.134561538696289, "learning_rate": 1.840281427268289e-05, "loss": 1.3141, "step": 6311 }, { "epoch": 20.695081967213113, "grad_norm": 13.864128112792969, "learning_rate": 1.8402238522788124e-05, "loss": 1.4187, "step": 6312 }, { "epoch": 20.698360655737705, "grad_norm": 9.477717399597168, "learning_rate": 1.8401662678149387e-05, "loss": 1.2523, "step": 6313 }, { "epoch": 20.701639344262293, "grad_norm": 12.467284202575684, "learning_rate": 1.8401086738773166e-05, "loss": 1.3621, "step": 6314 }, { "epoch": 20.704918032786885, "grad_norm": 9.680435180664062, "learning_rate": 1.8400510704665955e-05, "loss": 1.4336, "step": 6315 }, { "epoch": 20.708196721311474, "grad_norm": 11.768270492553711, "learning_rate": 1.8399934575834254e-05, "loss": 1.4915, "step": 6316 }, { "epoch": 20.711475409836066, "grad_norm": 7.357112884521484, "learning_rate": 1.8399358352284554e-05, "loss": 1.4056, "step": 6317 }, { "epoch": 20.714754098360658, "grad_norm": 11.630696296691895, "learning_rate": 1.8398782034023357e-05, "loss": 1.2849, "step": 6318 }, { "epoch": 20.718032786885246, "grad_norm": 8.983837127685547, "learning_rate": 1.8398205621057164e-05, "loss": 1.3708, "step": 6319 }, { "epoch": 20.721311475409838, "grad_norm": 9.567397117614746, "learning_rate": 1.8397629113392464e-05, "loss": 1.0542, "step": 6320 }, { "epoch": 20.724590163934426, "grad_norm": 73.12342071533203, "learning_rate": 1.8397052511035766e-05, "loss": 1.4338, "step": 6321 }, { "epoch": 20.727868852459018, "grad_norm": 9.802146911621094, "learning_rate": 1.8396475813993574e-05, "loss": 1.4955, "step": 6322 }, { "epoch": 20.731147540983606, "grad_norm": 13.969940185546875, "learning_rate": 1.8395899022272384e-05, "loss": 1.2015, "step": 6323 }, { "epoch": 20.7344262295082, "grad_norm": 13.508036613464355, "learning_rate": 1.8395322135878705e-05, "loss": 1.375, "step": 6324 }, { "epoch": 20.737704918032787, "grad_norm": 15.084892272949219, "learning_rate": 1.8394745154819037e-05, "loss": 1.3071, "step": 6325 }, { "epoch": 20.74098360655738, "grad_norm": 10.11515998840332, "learning_rate": 1.839416807909989e-05, "loss": 1.6763, "step": 6326 }, { "epoch": 20.744262295081967, "grad_norm": 11.11331844329834, "learning_rate": 1.8393590908727773e-05, "loss": 1.3335, "step": 6327 }, { "epoch": 20.74754098360656, "grad_norm": 8.504098892211914, "learning_rate": 1.839301364370919e-05, "loss": 1.2086, "step": 6328 }, { "epoch": 20.750819672131147, "grad_norm": 11.518229484558105, "learning_rate": 1.839243628405065e-05, "loss": 1.1947, "step": 6329 }, { "epoch": 20.75409836065574, "grad_norm": 7.49586820602417, "learning_rate": 1.8391858829758667e-05, "loss": 1.4436, "step": 6330 }, { "epoch": 20.757377049180327, "grad_norm": 12.1475830078125, "learning_rate": 1.8391281280839746e-05, "loss": 1.2734, "step": 6331 }, { "epoch": 20.76065573770492, "grad_norm": 8.860130310058594, "learning_rate": 1.8390703637300408e-05, "loss": 1.311, "step": 6332 }, { "epoch": 20.763934426229508, "grad_norm": 11.952997207641602, "learning_rate": 1.839012589914716e-05, "loss": 1.3086, "step": 6333 }, { "epoch": 20.7672131147541, "grad_norm": 17.327713012695312, "learning_rate": 1.838954806638652e-05, "loss": 1.2373, "step": 6334 }, { "epoch": 20.770491803278688, "grad_norm": 12.328404426574707, "learning_rate": 1.8388970139025006e-05, "loss": 1.3884, "step": 6335 }, { "epoch": 20.77377049180328, "grad_norm": 17.593185424804688, "learning_rate": 1.8388392117069128e-05, "loss": 1.4895, "step": 6336 }, { "epoch": 20.777049180327868, "grad_norm": 9.061001777648926, "learning_rate": 1.8387814000525406e-05, "loss": 1.4824, "step": 6337 }, { "epoch": 20.78032786885246, "grad_norm": 6.801525592803955, "learning_rate": 1.8387235789400363e-05, "loss": 1.603, "step": 6338 }, { "epoch": 20.78360655737705, "grad_norm": 10.735387802124023, "learning_rate": 1.8386657483700516e-05, "loss": 1.4231, "step": 6339 }, { "epoch": 20.78688524590164, "grad_norm": 10.250723838806152, "learning_rate": 1.8386079083432382e-05, "loss": 1.2324, "step": 6340 }, { "epoch": 20.79016393442623, "grad_norm": 8.133975982666016, "learning_rate": 1.838550058860249e-05, "loss": 1.4634, "step": 6341 }, { "epoch": 20.79344262295082, "grad_norm": 11.95905590057373, "learning_rate": 1.838492199921736e-05, "loss": 1.3896, "step": 6342 }, { "epoch": 20.79672131147541, "grad_norm": 7.076842308044434, "learning_rate": 1.8384343315283515e-05, "loss": 1.3765, "step": 6343 }, { "epoch": 20.8, "grad_norm": 11.011161804199219, "learning_rate": 1.8383764536807486e-05, "loss": 1.3357, "step": 6344 }, { "epoch": 20.80327868852459, "grad_norm": 9.471604347229004, "learning_rate": 1.838318566379579e-05, "loss": 1.3755, "step": 6345 }, { "epoch": 20.80655737704918, "grad_norm": 9.47075366973877, "learning_rate": 1.838260669625496e-05, "loss": 1.302, "step": 6346 }, { "epoch": 20.80983606557377, "grad_norm": 17.955528259277344, "learning_rate": 1.8382027634191523e-05, "loss": 1.5322, "step": 6347 }, { "epoch": 20.81311475409836, "grad_norm": 9.28707218170166, "learning_rate": 1.838144847761201e-05, "loss": 1.3174, "step": 6348 }, { "epoch": 20.81639344262295, "grad_norm": 9.250998497009277, "learning_rate": 1.8380869226522954e-05, "loss": 1.3291, "step": 6349 }, { "epoch": 20.81967213114754, "grad_norm": 9.573119163513184, "learning_rate": 1.8380289880930878e-05, "loss": 1.3838, "step": 6350 }, { "epoch": 20.82295081967213, "grad_norm": 8.44811725616455, "learning_rate": 1.8379710440842323e-05, "loss": 1.5042, "step": 6351 }, { "epoch": 20.82622950819672, "grad_norm": 13.582762718200684, "learning_rate": 1.8379130906263824e-05, "loss": 1.2291, "step": 6352 }, { "epoch": 20.82950819672131, "grad_norm": 9.307693481445312, "learning_rate": 1.8378551277201906e-05, "loss": 1.2246, "step": 6353 }, { "epoch": 20.832786885245902, "grad_norm": 9.226236343383789, "learning_rate": 1.8377971553663113e-05, "loss": 1.2268, "step": 6354 }, { "epoch": 20.83606557377049, "grad_norm": 8.87761116027832, "learning_rate": 1.8377391735653983e-05, "loss": 1.3889, "step": 6355 }, { "epoch": 20.839344262295082, "grad_norm": 15.371622085571289, "learning_rate": 1.8376811823181045e-05, "loss": 1.2493, "step": 6356 }, { "epoch": 20.84262295081967, "grad_norm": 8.32937240600586, "learning_rate": 1.837623181625085e-05, "loss": 1.2935, "step": 6357 }, { "epoch": 20.845901639344262, "grad_norm": 8.134034156799316, "learning_rate": 1.8375651714869926e-05, "loss": 1.2158, "step": 6358 }, { "epoch": 20.84918032786885, "grad_norm": 12.91006088256836, "learning_rate": 1.8375071519044826e-05, "loss": 1.0326, "step": 6359 }, { "epoch": 20.852459016393443, "grad_norm": 7.93954610824585, "learning_rate": 1.837449122878208e-05, "loss": 1.429, "step": 6360 }, { "epoch": 20.855737704918035, "grad_norm": 10.19822883605957, "learning_rate": 1.837391084408824e-05, "loss": 1.396, "step": 6361 }, { "epoch": 20.859016393442623, "grad_norm": 8.283929824829102, "learning_rate": 1.8373330364969856e-05, "loss": 1.3295, "step": 6362 }, { "epoch": 20.862295081967215, "grad_norm": 62.53342819213867, "learning_rate": 1.837274979143346e-05, "loss": 1.4434, "step": 6363 }, { "epoch": 20.865573770491803, "grad_norm": 12.212371826171875, "learning_rate": 1.8372169123485605e-05, "loss": 1.3542, "step": 6364 }, { "epoch": 20.868852459016395, "grad_norm": 10.660745620727539, "learning_rate": 1.837158836113284e-05, "loss": 1.3307, "step": 6365 }, { "epoch": 20.872131147540983, "grad_norm": 9.260540008544922, "learning_rate": 1.837100750438171e-05, "loss": 1.3254, "step": 6366 }, { "epoch": 20.875409836065575, "grad_norm": 9.118934631347656, "learning_rate": 1.837042655323877e-05, "loss": 1.5806, "step": 6367 }, { "epoch": 20.878688524590164, "grad_norm": 19.84263801574707, "learning_rate": 1.836984550771056e-05, "loss": 1.4453, "step": 6368 }, { "epoch": 20.881967213114756, "grad_norm": 8.151179313659668, "learning_rate": 1.8369264367803646e-05, "loss": 1.2605, "step": 6369 }, { "epoch": 20.885245901639344, "grad_norm": 18.96211814880371, "learning_rate": 1.8368683133524576e-05, "loss": 1.4194, "step": 6370 }, { "epoch": 20.888524590163936, "grad_norm": 7.647970199584961, "learning_rate": 1.83681018048799e-05, "loss": 1.4321, "step": 6371 }, { "epoch": 20.891803278688524, "grad_norm": 12.583844184875488, "learning_rate": 1.8367520381876173e-05, "loss": 1.3191, "step": 6372 }, { "epoch": 20.895081967213116, "grad_norm": 6.89016580581665, "learning_rate": 1.8366938864519955e-05, "loss": 1.4529, "step": 6373 }, { "epoch": 20.898360655737704, "grad_norm": 8.194912910461426, "learning_rate": 1.8366357252817803e-05, "loss": 1.3821, "step": 6374 }, { "epoch": 20.901639344262296, "grad_norm": 9.028703689575195, "learning_rate": 1.8365775546776272e-05, "loss": 1.3196, "step": 6375 }, { "epoch": 20.904918032786885, "grad_norm": 9.16824722290039, "learning_rate": 1.8365193746401926e-05, "loss": 1.1929, "step": 6376 }, { "epoch": 20.908196721311477, "grad_norm": 9.17095947265625, "learning_rate": 1.8364611851701322e-05, "loss": 1.5874, "step": 6377 }, { "epoch": 20.911475409836065, "grad_norm": 8.212725639343262, "learning_rate": 1.8364029862681022e-05, "loss": 1.3455, "step": 6378 }, { "epoch": 20.914754098360657, "grad_norm": 13.785562515258789, "learning_rate": 1.836344777934759e-05, "loss": 1.3831, "step": 6379 }, { "epoch": 20.918032786885245, "grad_norm": 10.170683860778809, "learning_rate": 1.836286560170759e-05, "loss": 1.4873, "step": 6380 }, { "epoch": 20.921311475409837, "grad_norm": 17.690105438232422, "learning_rate": 1.8362283329767577e-05, "loss": 1.1633, "step": 6381 }, { "epoch": 20.924590163934425, "grad_norm": 9.164934158325195, "learning_rate": 1.8361700963534134e-05, "loss": 1.3958, "step": 6382 }, { "epoch": 20.927868852459017, "grad_norm": 9.301403045654297, "learning_rate": 1.8361118503013814e-05, "loss": 1.3589, "step": 6383 }, { "epoch": 20.931147540983606, "grad_norm": 16.670001983642578, "learning_rate": 1.836053594821319e-05, "loss": 1.3591, "step": 6384 }, { "epoch": 20.934426229508198, "grad_norm": 9.890294075012207, "learning_rate": 1.8359953299138832e-05, "loss": 1.2975, "step": 6385 }, { "epoch": 20.937704918032786, "grad_norm": 9.8230619430542, "learning_rate": 1.8359370555797308e-05, "loss": 1.5132, "step": 6386 }, { "epoch": 20.940983606557378, "grad_norm": 6.932863235473633, "learning_rate": 1.835878771819519e-05, "loss": 1.4956, "step": 6387 }, { "epoch": 20.944262295081966, "grad_norm": 7.6394243240356445, "learning_rate": 1.8358204786339046e-05, "loss": 1.281, "step": 6388 }, { "epoch": 20.947540983606558, "grad_norm": 10.513296127319336, "learning_rate": 1.8357621760235454e-05, "loss": 1.3033, "step": 6389 }, { "epoch": 20.950819672131146, "grad_norm": 10.731789588928223, "learning_rate": 1.835703863989099e-05, "loss": 1.3169, "step": 6390 }, { "epoch": 20.95409836065574, "grad_norm": 11.265450477600098, "learning_rate": 1.8356455425312224e-05, "loss": 1.4186, "step": 6391 }, { "epoch": 20.957377049180327, "grad_norm": 8.742026329040527, "learning_rate": 1.8355872116505735e-05, "loss": 1.3867, "step": 6392 }, { "epoch": 20.96065573770492, "grad_norm": 9.21419906616211, "learning_rate": 1.83552887134781e-05, "loss": 1.2419, "step": 6393 }, { "epoch": 20.963934426229507, "grad_norm": 7.637161731719971, "learning_rate": 1.8354705216235896e-05, "loss": 1.1763, "step": 6394 }, { "epoch": 20.9672131147541, "grad_norm": 7.863580226898193, "learning_rate": 1.8354121624785707e-05, "loss": 1.5303, "step": 6395 }, { "epoch": 20.970491803278687, "grad_norm": 8.547701835632324, "learning_rate": 1.8353537939134107e-05, "loss": 1.2888, "step": 6396 }, { "epoch": 20.97377049180328, "grad_norm": 8.105032920837402, "learning_rate": 1.8352954159287683e-05, "loss": 1.1775, "step": 6397 }, { "epoch": 20.977049180327867, "grad_norm": 8.403120040893555, "learning_rate": 1.8352370285253018e-05, "loss": 1.1089, "step": 6398 }, { "epoch": 20.98032786885246, "grad_norm": 8.964822769165039, "learning_rate": 1.835178631703669e-05, "loss": 1.4016, "step": 6399 }, { "epoch": 20.983606557377048, "grad_norm": 5.941895008087158, "learning_rate": 1.8351202254645287e-05, "loss": 1.4536, "step": 6400 }, { "epoch": 20.98688524590164, "grad_norm": 7.213320732116699, "learning_rate": 1.8350618098085398e-05, "loss": 1.479, "step": 6401 }, { "epoch": 20.990163934426228, "grad_norm": 9.22203540802002, "learning_rate": 1.8350033847363608e-05, "loss": 1.3374, "step": 6402 }, { "epoch": 20.99344262295082, "grad_norm": 7.251187801361084, "learning_rate": 1.8349449502486505e-05, "loss": 1.3511, "step": 6403 }, { "epoch": 20.99672131147541, "grad_norm": 6.488349914550781, "learning_rate": 1.8348865063460675e-05, "loss": 1.3101, "step": 6404 }, { "epoch": 21.0, "grad_norm": 9.065382957458496, "learning_rate": 1.8348280530292712e-05, "loss": 1.4073, "step": 6405 }, { "epoch": 21.003278688524592, "grad_norm": 9.183079719543457, "learning_rate": 1.8347695902989208e-05, "loss": 1.4805, "step": 6406 }, { "epoch": 21.00655737704918, "grad_norm": 10.086148262023926, "learning_rate": 1.834711118155675e-05, "loss": 1.2207, "step": 6407 }, { "epoch": 21.009836065573772, "grad_norm": 6.7176079750061035, "learning_rate": 1.8346526366001936e-05, "loss": 1.3306, "step": 6408 }, { "epoch": 21.01311475409836, "grad_norm": 12.074140548706055, "learning_rate": 1.834594145633136e-05, "loss": 1.3254, "step": 6409 }, { "epoch": 21.016393442622952, "grad_norm": 8.826446533203125, "learning_rate": 1.8345356452551616e-05, "loss": 1.2701, "step": 6410 }, { "epoch": 21.01967213114754, "grad_norm": 11.086338996887207, "learning_rate": 1.83447713546693e-05, "loss": 1.2263, "step": 6411 }, { "epoch": 21.022950819672133, "grad_norm": 7.7809014320373535, "learning_rate": 1.8344186162691012e-05, "loss": 1.1163, "step": 6412 }, { "epoch": 21.02622950819672, "grad_norm": 10.001270294189453, "learning_rate": 1.8343600876623347e-05, "loss": 1.1067, "step": 6413 }, { "epoch": 21.029508196721313, "grad_norm": 7.1323370933532715, "learning_rate": 1.834301549647291e-05, "loss": 1.3362, "step": 6414 }, { "epoch": 21.0327868852459, "grad_norm": 8.509360313415527, "learning_rate": 1.8342430022246295e-05, "loss": 1.3245, "step": 6415 }, { "epoch": 21.036065573770493, "grad_norm": 9.904582023620605, "learning_rate": 1.8341844453950108e-05, "loss": 1.3721, "step": 6416 }, { "epoch": 21.03934426229508, "grad_norm": 8.812411308288574, "learning_rate": 1.8341258791590953e-05, "loss": 1.3684, "step": 6417 }, { "epoch": 21.042622950819673, "grad_norm": 16.41347312927246, "learning_rate": 1.8340673035175433e-05, "loss": 1.1608, "step": 6418 }, { "epoch": 21.04590163934426, "grad_norm": 10.19005012512207, "learning_rate": 1.8340087184710152e-05, "loss": 1.3589, "step": 6419 }, { "epoch": 21.049180327868854, "grad_norm": 9.590958595275879, "learning_rate": 1.8339501240201717e-05, "loss": 1.3491, "step": 6420 }, { "epoch": 21.052459016393442, "grad_norm": 9.453815460205078, "learning_rate": 1.8338915201656735e-05, "loss": 1.4951, "step": 6421 }, { "epoch": 21.055737704918034, "grad_norm": 7.302789688110352, "learning_rate": 1.833832906908181e-05, "loss": 1.2449, "step": 6422 }, { "epoch": 21.059016393442622, "grad_norm": 9.107691764831543, "learning_rate": 1.8337742842483558e-05, "loss": 1.2546, "step": 6423 }, { "epoch": 21.062295081967214, "grad_norm": 14.187833786010742, "learning_rate": 1.8337156521868587e-05, "loss": 1.3955, "step": 6424 }, { "epoch": 21.065573770491802, "grad_norm": 7.87606143951416, "learning_rate": 1.8336570107243507e-05, "loss": 1.1587, "step": 6425 }, { "epoch": 21.068852459016394, "grad_norm": 9.751505851745605, "learning_rate": 1.833598359861493e-05, "loss": 1.2974, "step": 6426 }, { "epoch": 21.072131147540983, "grad_norm": 8.3711576461792, "learning_rate": 1.8335396995989474e-05, "loss": 1.2415, "step": 6427 }, { "epoch": 21.075409836065575, "grad_norm": 9.954106330871582, "learning_rate": 1.8334810299373746e-05, "loss": 1.1431, "step": 6428 }, { "epoch": 21.078688524590163, "grad_norm": 7.170368671417236, "learning_rate": 1.8334223508774366e-05, "loss": 1.3848, "step": 6429 }, { "epoch": 21.081967213114755, "grad_norm": 9.158522605895996, "learning_rate": 1.8333636624197954e-05, "loss": 1.5493, "step": 6430 }, { "epoch": 21.085245901639343, "grad_norm": 7.248652935028076, "learning_rate": 1.833304964565112e-05, "loss": 1.3381, "step": 6431 }, { "epoch": 21.088524590163935, "grad_norm": 6.531339168548584, "learning_rate": 1.8332462573140494e-05, "loss": 1.5776, "step": 6432 }, { "epoch": 21.091803278688523, "grad_norm": 7.981857776641846, "learning_rate": 1.8331875406672683e-05, "loss": 1.3389, "step": 6433 }, { "epoch": 21.095081967213115, "grad_norm": 8.396663665771484, "learning_rate": 1.8331288146254315e-05, "loss": 1.3099, "step": 6434 }, { "epoch": 21.098360655737704, "grad_norm": 9.622721672058105, "learning_rate": 1.833070079189201e-05, "loss": 1.3413, "step": 6435 }, { "epoch": 21.101639344262296, "grad_norm": 13.702903747558594, "learning_rate": 1.8330113343592394e-05, "loss": 1.4211, "step": 6436 }, { "epoch": 21.104918032786884, "grad_norm": 8.842732429504395, "learning_rate": 1.8329525801362086e-05, "loss": 1.4475, "step": 6437 }, { "epoch": 21.108196721311476, "grad_norm": 42.87361526489258, "learning_rate": 1.8328938165207716e-05, "loss": 1.1282, "step": 6438 }, { "epoch": 21.111475409836064, "grad_norm": 10.746010780334473, "learning_rate": 1.8328350435135908e-05, "loss": 1.355, "step": 6439 }, { "epoch": 21.114754098360656, "grad_norm": 8.429924011230469, "learning_rate": 1.832776261115329e-05, "loss": 1.3364, "step": 6440 }, { "epoch": 21.118032786885244, "grad_norm": 10.661102294921875, "learning_rate": 1.832717469326649e-05, "loss": 1.4128, "step": 6441 }, { "epoch": 21.121311475409836, "grad_norm": 9.49242877960205, "learning_rate": 1.8326586681482137e-05, "loss": 1.426, "step": 6442 }, { "epoch": 21.124590163934425, "grad_norm": 9.075321197509766, "learning_rate": 1.8325998575806863e-05, "loss": 1.1006, "step": 6443 }, { "epoch": 21.127868852459017, "grad_norm": 8.874678611755371, "learning_rate": 1.8325410376247295e-05, "loss": 1.3506, "step": 6444 }, { "epoch": 21.131147540983605, "grad_norm": 8.656864166259766, "learning_rate": 1.832482208281007e-05, "loss": 1.5093, "step": 6445 }, { "epoch": 21.134426229508197, "grad_norm": 15.622310638427734, "learning_rate": 1.832423369550182e-05, "loss": 1.2307, "step": 6446 }, { "epoch": 21.137704918032785, "grad_norm": 130.27174377441406, "learning_rate": 1.8323645214329183e-05, "loss": 1.24, "step": 6447 }, { "epoch": 21.140983606557377, "grad_norm": 7.167647361755371, "learning_rate": 1.8323056639298788e-05, "loss": 1.3118, "step": 6448 }, { "epoch": 21.14426229508197, "grad_norm": 13.45563793182373, "learning_rate": 1.8322467970417278e-05, "loss": 1.2067, "step": 6449 }, { "epoch": 21.147540983606557, "grad_norm": 9.202033996582031, "learning_rate": 1.8321879207691287e-05, "loss": 1.299, "step": 6450 }, { "epoch": 21.15081967213115, "grad_norm": 19.51340103149414, "learning_rate": 1.8321290351127455e-05, "loss": 1.274, "step": 6451 }, { "epoch": 21.154098360655738, "grad_norm": 12.574007034301758, "learning_rate": 1.8320701400732424e-05, "loss": 1.0714, "step": 6452 }, { "epoch": 21.15737704918033, "grad_norm": 11.91700553894043, "learning_rate": 1.832011235651283e-05, "loss": 1.3921, "step": 6453 }, { "epoch": 21.160655737704918, "grad_norm": 7.674149513244629, "learning_rate": 1.8319523218475323e-05, "loss": 1.2626, "step": 6454 }, { "epoch": 21.16393442622951, "grad_norm": 17.210664749145508, "learning_rate": 1.831893398662654e-05, "loss": 1.2539, "step": 6455 }, { "epoch": 21.167213114754098, "grad_norm": 37.284610748291016, "learning_rate": 1.8318344660973125e-05, "loss": 1.2576, "step": 6456 }, { "epoch": 21.17049180327869, "grad_norm": 10.621808052062988, "learning_rate": 1.8317755241521727e-05, "loss": 1.1465, "step": 6457 }, { "epoch": 21.17377049180328, "grad_norm": 17.92538070678711, "learning_rate": 1.831716572827899e-05, "loss": 1.2278, "step": 6458 }, { "epoch": 21.17704918032787, "grad_norm": 13.57617473602295, "learning_rate": 1.831657612125156e-05, "loss": 1.2292, "step": 6459 }, { "epoch": 21.18032786885246, "grad_norm": 17.059019088745117, "learning_rate": 1.831598642044609e-05, "loss": 1.2748, "step": 6460 }, { "epoch": 21.18360655737705, "grad_norm": 12.532966613769531, "learning_rate": 1.8315396625869226e-05, "loss": 1.3773, "step": 6461 }, { "epoch": 21.18688524590164, "grad_norm": 10.919483184814453, "learning_rate": 1.8314806737527616e-05, "loss": 1.2167, "step": 6462 }, { "epoch": 21.19016393442623, "grad_norm": 7.809255599975586, "learning_rate": 1.8314216755427922e-05, "loss": 1.2515, "step": 6463 }, { "epoch": 21.19344262295082, "grad_norm": 13.360515594482422, "learning_rate": 1.831362667957678e-05, "loss": 1.2058, "step": 6464 }, { "epoch": 21.19672131147541, "grad_norm": 11.965765953063965, "learning_rate": 1.831303650998086e-05, "loss": 1.37, "step": 6465 }, { "epoch": 21.2, "grad_norm": 42.10258102416992, "learning_rate": 1.831244624664681e-05, "loss": 1.2987, "step": 6466 }, { "epoch": 21.20327868852459, "grad_norm": 6.879916191101074, "learning_rate": 1.8311855889581283e-05, "loss": 1.4337, "step": 6467 }, { "epoch": 21.20655737704918, "grad_norm": 12.22519302368164, "learning_rate": 1.8311265438790937e-05, "loss": 1.2506, "step": 6468 }, { "epoch": 21.20983606557377, "grad_norm": 10.000604629516602, "learning_rate": 1.8310674894282433e-05, "loss": 1.3667, "step": 6469 }, { "epoch": 21.21311475409836, "grad_norm": 11.303496360778809, "learning_rate": 1.831008425606243e-05, "loss": 1.4404, "step": 6470 }, { "epoch": 21.21639344262295, "grad_norm": 12.029296875, "learning_rate": 1.8309493524137587e-05, "loss": 1.4211, "step": 6471 }, { "epoch": 21.21967213114754, "grad_norm": 25.18040657043457, "learning_rate": 1.8308902698514565e-05, "loss": 1.4586, "step": 6472 }, { "epoch": 21.222950819672132, "grad_norm": 9.524029731750488, "learning_rate": 1.8308311779200027e-05, "loss": 1.3044, "step": 6473 }, { "epoch": 21.22622950819672, "grad_norm": 9.213274955749512, "learning_rate": 1.8307720766200633e-05, "loss": 1.5098, "step": 6474 }, { "epoch": 21.229508196721312, "grad_norm": 9.674110412597656, "learning_rate": 1.830712965952305e-05, "loss": 1.2727, "step": 6475 }, { "epoch": 21.2327868852459, "grad_norm": 12.217728614807129, "learning_rate": 1.830653845917394e-05, "loss": 1.1187, "step": 6476 }, { "epoch": 21.236065573770492, "grad_norm": 14.183650970458984, "learning_rate": 1.8305947165159975e-05, "loss": 1.2891, "step": 6477 }, { "epoch": 21.23934426229508, "grad_norm": 12.317159652709961, "learning_rate": 1.830535577748782e-05, "loss": 1.449, "step": 6478 }, { "epoch": 21.242622950819673, "grad_norm": 17.79361915588379, "learning_rate": 1.8304764296164145e-05, "loss": 1.223, "step": 6479 }, { "epoch": 21.24590163934426, "grad_norm": 11.158480644226074, "learning_rate": 1.8304172721195615e-05, "loss": 1.3, "step": 6480 }, { "epoch": 21.249180327868853, "grad_norm": 10.883320808410645, "learning_rate": 1.8303581052588905e-05, "loss": 1.3464, "step": 6481 }, { "epoch": 21.25245901639344, "grad_norm": 19.077880859375, "learning_rate": 1.830298929035068e-05, "loss": 1.2861, "step": 6482 }, { "epoch": 21.255737704918033, "grad_norm": 12.877900123596191, "learning_rate": 1.8302397434487625e-05, "loss": 1.3623, "step": 6483 }, { "epoch": 21.25901639344262, "grad_norm": 9.777122497558594, "learning_rate": 1.8301805485006402e-05, "loss": 1.1997, "step": 6484 }, { "epoch": 21.262295081967213, "grad_norm": 10.80992317199707, "learning_rate": 1.8301213441913693e-05, "loss": 1.1171, "step": 6485 }, { "epoch": 21.2655737704918, "grad_norm": 12.202701568603516, "learning_rate": 1.8300621305216172e-05, "loss": 1.3617, "step": 6486 }, { "epoch": 21.268852459016394, "grad_norm": 8.330087661743164, "learning_rate": 1.8300029074920512e-05, "loss": 1.1882, "step": 6487 }, { "epoch": 21.272131147540982, "grad_norm": 8.897217750549316, "learning_rate": 1.8299436751033396e-05, "loss": 1.1648, "step": 6488 }, { "epoch": 21.275409836065574, "grad_norm": 10.61178207397461, "learning_rate": 1.8298844333561502e-05, "loss": 1.2708, "step": 6489 }, { "epoch": 21.278688524590162, "grad_norm": 15.365729331970215, "learning_rate": 1.829825182251151e-05, "loss": 1.5137, "step": 6490 }, { "epoch": 21.281967213114754, "grad_norm": 8.65590763092041, "learning_rate": 1.8297659217890095e-05, "loss": 1.4431, "step": 6491 }, { "epoch": 21.285245901639342, "grad_norm": 14.018333435058594, "learning_rate": 1.829706651970395e-05, "loss": 1.2854, "step": 6492 }, { "epoch": 21.288524590163934, "grad_norm": 9.532332420349121, "learning_rate": 1.8296473727959755e-05, "loss": 1.2793, "step": 6493 }, { "epoch": 21.291803278688526, "grad_norm": 15.8489990234375, "learning_rate": 1.829588084266419e-05, "loss": 0.9955, "step": 6494 }, { "epoch": 21.295081967213115, "grad_norm": 9.507746696472168, "learning_rate": 1.8295287863823944e-05, "loss": 1.2656, "step": 6495 }, { "epoch": 21.298360655737707, "grad_norm": 9.190896987915039, "learning_rate": 1.82946947914457e-05, "loss": 1.4797, "step": 6496 }, { "epoch": 21.301639344262295, "grad_norm": 12.510631561279297, "learning_rate": 1.829410162553615e-05, "loss": 1.1943, "step": 6497 }, { "epoch": 21.304918032786887, "grad_norm": 9.127114295959473, "learning_rate": 1.8293508366101977e-05, "loss": 1.3093, "step": 6498 }, { "epoch": 21.308196721311475, "grad_norm": 8.536416053771973, "learning_rate": 1.8292915013149878e-05, "loss": 1.3077, "step": 6499 }, { "epoch": 21.311475409836067, "grad_norm": 20.468582153320312, "learning_rate": 1.8292321566686538e-05, "loss": 1.2084, "step": 6500 }, { "epoch": 21.314754098360655, "grad_norm": 69.01753997802734, "learning_rate": 1.829172802671865e-05, "loss": 1.2501, "step": 6501 }, { "epoch": 21.318032786885247, "grad_norm": 13.90781307220459, "learning_rate": 1.829113439325291e-05, "loss": 0.9753, "step": 6502 }, { "epoch": 21.321311475409836, "grad_norm": 9.706311225891113, "learning_rate": 1.8290540666296008e-05, "loss": 1.3431, "step": 6503 }, { "epoch": 21.324590163934428, "grad_norm": 5.705052852630615, "learning_rate": 1.8289946845854638e-05, "loss": 1.562, "step": 6504 }, { "epoch": 21.327868852459016, "grad_norm": 8.428292274475098, "learning_rate": 1.82893529319355e-05, "loss": 1.2454, "step": 6505 }, { "epoch": 21.331147540983608, "grad_norm": 14.948293685913086, "learning_rate": 1.8288758924545287e-05, "loss": 1.2947, "step": 6506 }, { "epoch": 21.334426229508196, "grad_norm": 9.598677635192871, "learning_rate": 1.82881648236907e-05, "loss": 1.4938, "step": 6507 }, { "epoch": 21.337704918032788, "grad_norm": 7.7920966148376465, "learning_rate": 1.828757062937844e-05, "loss": 1.3003, "step": 6508 }, { "epoch": 21.340983606557376, "grad_norm": 10.729846954345703, "learning_rate": 1.82869763416152e-05, "loss": 1.2214, "step": 6509 }, { "epoch": 21.34426229508197, "grad_norm": 8.204353332519531, "learning_rate": 1.828638196040769e-05, "loss": 1.2717, "step": 6510 }, { "epoch": 21.347540983606557, "grad_norm": 8.294573783874512, "learning_rate": 1.8285787485762602e-05, "loss": 1.2114, "step": 6511 }, { "epoch": 21.35081967213115, "grad_norm": 7.9261579513549805, "learning_rate": 1.828519291768665e-05, "loss": 1.5537, "step": 6512 }, { "epoch": 21.354098360655737, "grad_norm": 32.263641357421875, "learning_rate": 1.828459825618653e-05, "loss": 1.1019, "step": 6513 }, { "epoch": 21.35737704918033, "grad_norm": 18.68490219116211, "learning_rate": 1.8284003501268953e-05, "loss": 1.3953, "step": 6514 }, { "epoch": 21.360655737704917, "grad_norm": 8.87709903717041, "learning_rate": 1.8283408652940623e-05, "loss": 1.3643, "step": 6515 }, { "epoch": 21.36393442622951, "grad_norm": 9.195331573486328, "learning_rate": 1.8282813711208246e-05, "loss": 1.3621, "step": 6516 }, { "epoch": 21.367213114754097, "grad_norm": 15.129886627197266, "learning_rate": 1.8282218676078532e-05, "loss": 1.3933, "step": 6517 }, { "epoch": 21.37049180327869, "grad_norm": 12.593951225280762, "learning_rate": 1.8281623547558195e-05, "loss": 1.3102, "step": 6518 }, { "epoch": 21.373770491803278, "grad_norm": 13.529362678527832, "learning_rate": 1.828102832565394e-05, "loss": 1.3225, "step": 6519 }, { "epoch": 21.37704918032787, "grad_norm": 10.191488265991211, "learning_rate": 1.8280433010372476e-05, "loss": 1.3672, "step": 6520 }, { "epoch": 21.380327868852458, "grad_norm": 10.735076904296875, "learning_rate": 1.8279837601720525e-05, "loss": 1.2388, "step": 6521 }, { "epoch": 21.38360655737705, "grad_norm": 8.627421379089355, "learning_rate": 1.8279242099704798e-05, "loss": 1.3958, "step": 6522 }, { "epoch": 21.386885245901638, "grad_norm": 9.922264099121094, "learning_rate": 1.8278646504332006e-05, "loss": 1.2925, "step": 6523 }, { "epoch": 21.39016393442623, "grad_norm": 11.345307350158691, "learning_rate": 1.8278050815608865e-05, "loss": 1.345, "step": 6524 }, { "epoch": 21.39344262295082, "grad_norm": 6.520942687988281, "learning_rate": 1.8277455033542097e-05, "loss": 1.52, "step": 6525 }, { "epoch": 21.39672131147541, "grad_norm": 15.975083351135254, "learning_rate": 1.8276859158138414e-05, "loss": 1.3048, "step": 6526 }, { "epoch": 21.4, "grad_norm": 12.620408058166504, "learning_rate": 1.827626318940454e-05, "loss": 1.3496, "step": 6527 }, { "epoch": 21.40327868852459, "grad_norm": 9.867778778076172, "learning_rate": 1.8275667127347194e-05, "loss": 1.623, "step": 6528 }, { "epoch": 21.40655737704918, "grad_norm": 24.609495162963867, "learning_rate": 1.8275070971973097e-05, "loss": 1.31, "step": 6529 }, { "epoch": 21.40983606557377, "grad_norm": 9.773468017578125, "learning_rate": 1.8274474723288967e-05, "loss": 1.1191, "step": 6530 }, { "epoch": 21.41311475409836, "grad_norm": 8.039523124694824, "learning_rate": 1.827387838130154e-05, "loss": 1.3306, "step": 6531 }, { "epoch": 21.41639344262295, "grad_norm": 9.787378311157227, "learning_rate": 1.8273281946017524e-05, "loss": 1.502, "step": 6532 }, { "epoch": 21.41967213114754, "grad_norm": 9.760950088500977, "learning_rate": 1.827268541744365e-05, "loss": 1.4161, "step": 6533 }, { "epoch": 21.42295081967213, "grad_norm": 9.2996187210083, "learning_rate": 1.8272088795586654e-05, "loss": 1.2759, "step": 6534 }, { "epoch": 21.42622950819672, "grad_norm": 8.318441390991211, "learning_rate": 1.827149208045325e-05, "loss": 1.4597, "step": 6535 }, { "epoch": 21.42950819672131, "grad_norm": 11.200879096984863, "learning_rate": 1.8270895272050175e-05, "loss": 1.3884, "step": 6536 }, { "epoch": 21.432786885245903, "grad_norm": 11.031292915344238, "learning_rate": 1.8270298370384155e-05, "loss": 1.3235, "step": 6537 }, { "epoch": 21.43606557377049, "grad_norm": 7.555553913116455, "learning_rate": 1.8269701375461925e-05, "loss": 1.5183, "step": 6538 }, { "epoch": 21.439344262295084, "grad_norm": 10.173242568969727, "learning_rate": 1.826910428729021e-05, "loss": 1.2312, "step": 6539 }, { "epoch": 21.442622950819672, "grad_norm": 10.642006874084473, "learning_rate": 1.826850710587575e-05, "loss": 1.4202, "step": 6540 }, { "epoch": 21.445901639344264, "grad_norm": 8.052522659301758, "learning_rate": 1.826790983122527e-05, "loss": 1.4839, "step": 6541 }, { "epoch": 21.449180327868852, "grad_norm": 9.092856407165527, "learning_rate": 1.8267312463345514e-05, "loss": 1.4033, "step": 6542 }, { "epoch": 21.452459016393444, "grad_norm": 11.134171485900879, "learning_rate": 1.8266715002243214e-05, "loss": 0.9775, "step": 6543 }, { "epoch": 21.455737704918032, "grad_norm": 7.960520267486572, "learning_rate": 1.8266117447925108e-05, "loss": 1.2377, "step": 6544 }, { "epoch": 21.459016393442624, "grad_norm": 8.029797554016113, "learning_rate": 1.826551980039793e-05, "loss": 1.4075, "step": 6545 }, { "epoch": 21.462295081967213, "grad_norm": 9.368011474609375, "learning_rate": 1.8264922059668425e-05, "loss": 1.2634, "step": 6546 }, { "epoch": 21.465573770491805, "grad_norm": 10.044761657714844, "learning_rate": 1.826432422574333e-05, "loss": 1.2299, "step": 6547 }, { "epoch": 21.468852459016393, "grad_norm": 7.448816776275635, "learning_rate": 1.8263726298629384e-05, "loss": 1.3138, "step": 6548 }, { "epoch": 21.472131147540985, "grad_norm": 13.656652450561523, "learning_rate": 1.8263128278333334e-05, "loss": 1.2897, "step": 6549 }, { "epoch": 21.475409836065573, "grad_norm": 7.2910542488098145, "learning_rate": 1.826253016486192e-05, "loss": 1.1995, "step": 6550 }, { "epoch": 21.478688524590165, "grad_norm": 7.38451623916626, "learning_rate": 1.8261931958221892e-05, "loss": 1.3706, "step": 6551 }, { "epoch": 21.481967213114753, "grad_norm": 11.345658302307129, "learning_rate": 1.8261333658419985e-05, "loss": 1.3555, "step": 6552 }, { "epoch": 21.485245901639345, "grad_norm": 9.248167991638184, "learning_rate": 1.8260735265462957e-05, "loss": 1.3795, "step": 6553 }, { "epoch": 21.488524590163934, "grad_norm": 7.545040130615234, "learning_rate": 1.8260136779357546e-05, "loss": 1.1316, "step": 6554 }, { "epoch": 21.491803278688526, "grad_norm": 10.260719299316406, "learning_rate": 1.8259538200110505e-05, "loss": 1.1589, "step": 6555 }, { "epoch": 21.495081967213114, "grad_norm": 8.683096885681152, "learning_rate": 1.8258939527728583e-05, "loss": 1.2244, "step": 6556 }, { "epoch": 21.498360655737706, "grad_norm": 6.641821384429932, "learning_rate": 1.8258340762218532e-05, "loss": 1.4717, "step": 6557 }, { "epoch": 21.501639344262294, "grad_norm": 8.601326942443848, "learning_rate": 1.82577419035871e-05, "loss": 1.2612, "step": 6558 }, { "epoch": 21.504918032786886, "grad_norm": 16.909759521484375, "learning_rate": 1.8257142951841043e-05, "loss": 1.4983, "step": 6559 }, { "epoch": 21.508196721311474, "grad_norm": 11.316946983337402, "learning_rate": 1.8256543906987116e-05, "loss": 1.2711, "step": 6560 }, { "epoch": 21.511475409836066, "grad_norm": 19.157377243041992, "learning_rate": 1.825594476903207e-05, "loss": 1.255, "step": 6561 }, { "epoch": 21.514754098360655, "grad_norm": 10.760077476501465, "learning_rate": 1.8255345537982664e-05, "loss": 1.3219, "step": 6562 }, { "epoch": 21.518032786885247, "grad_norm": 13.56705093383789, "learning_rate": 1.825474621384565e-05, "loss": 1.2744, "step": 6563 }, { "epoch": 21.521311475409835, "grad_norm": 6.998266696929932, "learning_rate": 1.8254146796627793e-05, "loss": 1.2291, "step": 6564 }, { "epoch": 21.524590163934427, "grad_norm": 8.584768295288086, "learning_rate": 1.8253547286335848e-05, "loss": 1.499, "step": 6565 }, { "epoch": 21.527868852459015, "grad_norm": 8.251226425170898, "learning_rate": 1.8252947682976575e-05, "loss": 1.329, "step": 6566 }, { "epoch": 21.531147540983607, "grad_norm": 9.276113510131836, "learning_rate": 1.8252347986556736e-05, "loss": 1.2168, "step": 6567 }, { "epoch": 21.534426229508195, "grad_norm": 9.988510131835938, "learning_rate": 1.8251748197083098e-05, "loss": 1.3279, "step": 6568 }, { "epoch": 21.537704918032787, "grad_norm": 8.513280868530273, "learning_rate": 1.8251148314562416e-05, "loss": 1.2878, "step": 6569 }, { "epoch": 21.540983606557376, "grad_norm": 7.560437202453613, "learning_rate": 1.8250548339001456e-05, "loss": 1.4651, "step": 6570 }, { "epoch": 21.544262295081968, "grad_norm": 9.94267749786377, "learning_rate": 1.8249948270406985e-05, "loss": 1.442, "step": 6571 }, { "epoch": 21.547540983606556, "grad_norm": 7.735778331756592, "learning_rate": 1.8249348108785772e-05, "loss": 1.252, "step": 6572 }, { "epoch": 21.550819672131148, "grad_norm": 9.471702575683594, "learning_rate": 1.824874785414458e-05, "loss": 1.3623, "step": 6573 }, { "epoch": 21.554098360655736, "grad_norm": 7.542616367340088, "learning_rate": 1.8248147506490175e-05, "loss": 1.4373, "step": 6574 }, { "epoch": 21.557377049180328, "grad_norm": 8.179489135742188, "learning_rate": 1.824754706582934e-05, "loss": 1.1494, "step": 6575 }, { "epoch": 21.560655737704916, "grad_norm": 9.40566349029541, "learning_rate": 1.824694653216883e-05, "loss": 1.3774, "step": 6576 }, { "epoch": 21.56393442622951, "grad_norm": 7.716579914093018, "learning_rate": 1.8246345905515427e-05, "loss": 1.371, "step": 6577 }, { "epoch": 21.567213114754097, "grad_norm": 7.760453224182129, "learning_rate": 1.82457451858759e-05, "loss": 1.2944, "step": 6578 }, { "epoch": 21.57049180327869, "grad_norm": 9.154837608337402, "learning_rate": 1.8245144373257018e-05, "loss": 1.2449, "step": 6579 }, { "epoch": 21.57377049180328, "grad_norm": 8.265761375427246, "learning_rate": 1.8244543467665564e-05, "loss": 1.3433, "step": 6580 }, { "epoch": 21.57704918032787, "grad_norm": 9.035888671875, "learning_rate": 1.8243942469108308e-05, "loss": 1.276, "step": 6581 }, { "epoch": 21.58032786885246, "grad_norm": 14.710308074951172, "learning_rate": 1.8243341377592032e-05, "loss": 1.4021, "step": 6582 }, { "epoch": 21.58360655737705, "grad_norm": 9.743270874023438, "learning_rate": 1.824274019312351e-05, "loss": 1.3083, "step": 6583 }, { "epoch": 21.58688524590164, "grad_norm": 10.931258201599121, "learning_rate": 1.824213891570952e-05, "loss": 1.3184, "step": 6584 }, { "epoch": 21.59016393442623, "grad_norm": 9.6176118850708, "learning_rate": 1.8241537545356846e-05, "loss": 1.1245, "step": 6585 }, { "epoch": 21.59344262295082, "grad_norm": 25.588335037231445, "learning_rate": 1.824093608207227e-05, "loss": 1.1979, "step": 6586 }, { "epoch": 21.59672131147541, "grad_norm": 7.230180740356445, "learning_rate": 1.8240334525862565e-05, "loss": 1.4065, "step": 6587 }, { "epoch": 21.6, "grad_norm": 8.171823501586914, "learning_rate": 1.8239732876734525e-05, "loss": 1.3918, "step": 6588 }, { "epoch": 21.60327868852459, "grad_norm": 15.714916229248047, "learning_rate": 1.823913113469493e-05, "loss": 1.2988, "step": 6589 }, { "epoch": 21.60655737704918, "grad_norm": 5.668178558349609, "learning_rate": 1.823852929975056e-05, "loss": 1.5256, "step": 6590 }, { "epoch": 21.60983606557377, "grad_norm": 7.047585964202881, "learning_rate": 1.823792737190821e-05, "loss": 1.162, "step": 6591 }, { "epoch": 21.613114754098362, "grad_norm": 10.081779479980469, "learning_rate": 1.8237325351174663e-05, "loss": 1.1737, "step": 6592 }, { "epoch": 21.61639344262295, "grad_norm": 14.140932083129883, "learning_rate": 1.823672323755671e-05, "loss": 1.4736, "step": 6593 }, { "epoch": 21.619672131147542, "grad_norm": 17.49262809753418, "learning_rate": 1.8236121031061136e-05, "loss": 1.3237, "step": 6594 }, { "epoch": 21.62295081967213, "grad_norm": 10.463812828063965, "learning_rate": 1.8235518731694735e-05, "loss": 1.0963, "step": 6595 }, { "epoch": 21.626229508196722, "grad_norm": 9.710658073425293, "learning_rate": 1.8234916339464294e-05, "loss": 1.2856, "step": 6596 }, { "epoch": 21.62950819672131, "grad_norm": 30.695293426513672, "learning_rate": 1.8234313854376613e-05, "loss": 1.095, "step": 6597 }, { "epoch": 21.632786885245903, "grad_norm": 9.309293746948242, "learning_rate": 1.823371127643848e-05, "loss": 1.2004, "step": 6598 }, { "epoch": 21.63606557377049, "grad_norm": 12.034807205200195, "learning_rate": 1.8233108605656694e-05, "loss": 1.209, "step": 6599 }, { "epoch": 21.639344262295083, "grad_norm": 8.035743713378906, "learning_rate": 1.8232505842038047e-05, "loss": 1.0591, "step": 6600 }, { "epoch": 21.64262295081967, "grad_norm": 7.461005210876465, "learning_rate": 1.8231902985589336e-05, "loss": 1.3674, "step": 6601 }, { "epoch": 21.645901639344263, "grad_norm": 10.005292892456055, "learning_rate": 1.823130003631736e-05, "loss": 1.4685, "step": 6602 }, { "epoch": 21.64918032786885, "grad_norm": 8.028305053710938, "learning_rate": 1.8230696994228917e-05, "loss": 1.4626, "step": 6603 }, { "epoch": 21.652459016393443, "grad_norm": 11.46522331237793, "learning_rate": 1.823009385933081e-05, "loss": 1.3074, "step": 6604 }, { "epoch": 21.65573770491803, "grad_norm": 9.352941513061523, "learning_rate": 1.8229490631629834e-05, "loss": 1.3926, "step": 6605 }, { "epoch": 21.659016393442624, "grad_norm": 9.039230346679688, "learning_rate": 1.8228887311132798e-05, "loss": 1.2244, "step": 6606 }, { "epoch": 21.662295081967212, "grad_norm": 13.333240509033203, "learning_rate": 1.82282838978465e-05, "loss": 1.1711, "step": 6607 }, { "epoch": 21.665573770491804, "grad_norm": 9.56229305267334, "learning_rate": 1.8227680391777746e-05, "loss": 1.4133, "step": 6608 }, { "epoch": 21.668852459016392, "grad_norm": 8.908061981201172, "learning_rate": 1.822707679293334e-05, "loss": 1.4475, "step": 6609 }, { "epoch": 21.672131147540984, "grad_norm": 7.667943954467773, "learning_rate": 1.822647310132009e-05, "loss": 1.3877, "step": 6610 }, { "epoch": 21.675409836065572, "grad_norm": 47.83904266357422, "learning_rate": 1.82258693169448e-05, "loss": 1.4065, "step": 6611 }, { "epoch": 21.678688524590164, "grad_norm": 9.434056282043457, "learning_rate": 1.8225265439814286e-05, "loss": 1.0875, "step": 6612 }, { "epoch": 21.681967213114753, "grad_norm": 10.556572914123535, "learning_rate": 1.8224661469935348e-05, "loss": 1.2456, "step": 6613 }, { "epoch": 21.685245901639345, "grad_norm": 7.665876388549805, "learning_rate": 1.8224057407314803e-05, "loss": 1.3125, "step": 6614 }, { "epoch": 21.688524590163933, "grad_norm": 14.377902030944824, "learning_rate": 1.822345325195946e-05, "loss": 1.2422, "step": 6615 }, { "epoch": 21.691803278688525, "grad_norm": 17.53681182861328, "learning_rate": 1.822284900387613e-05, "loss": 1.3914, "step": 6616 }, { "epoch": 21.695081967213113, "grad_norm": 27.39217758178711, "learning_rate": 1.8222244663071628e-05, "loss": 1.3174, "step": 6617 }, { "epoch": 21.698360655737705, "grad_norm": 12.435818672180176, "learning_rate": 1.8221640229552767e-05, "loss": 1.3137, "step": 6618 }, { "epoch": 21.701639344262293, "grad_norm": 8.231149673461914, "learning_rate": 1.8221035703326367e-05, "loss": 1.3748, "step": 6619 }, { "epoch": 21.704918032786885, "grad_norm": 10.67856502532959, "learning_rate": 1.8220431084399238e-05, "loss": 1.4054, "step": 6620 }, { "epoch": 21.708196721311474, "grad_norm": 9.522255897521973, "learning_rate": 1.8219826372778203e-05, "loss": 1.3977, "step": 6621 }, { "epoch": 21.711475409836066, "grad_norm": 10.480567932128906, "learning_rate": 1.8219221568470076e-05, "loss": 1.0001, "step": 6622 }, { "epoch": 21.714754098360658, "grad_norm": 11.143928527832031, "learning_rate": 1.8218616671481685e-05, "loss": 1.561, "step": 6623 }, { "epoch": 21.718032786885246, "grad_norm": 52.877098083496094, "learning_rate": 1.8218011681819846e-05, "loss": 1.2859, "step": 6624 }, { "epoch": 21.721311475409838, "grad_norm": 8.160953521728516, "learning_rate": 1.821740659949138e-05, "loss": 1.2351, "step": 6625 }, { "epoch": 21.724590163934426, "grad_norm": 8.757712364196777, "learning_rate": 1.821680142450311e-05, "loss": 1.2129, "step": 6626 }, { "epoch": 21.727868852459018, "grad_norm": 19.355880737304688, "learning_rate": 1.8216196156861858e-05, "loss": 1.2484, "step": 6627 }, { "epoch": 21.731147540983606, "grad_norm": 17.740341186523438, "learning_rate": 1.8215590796574454e-05, "loss": 1.3282, "step": 6628 }, { "epoch": 21.7344262295082, "grad_norm": 9.682607650756836, "learning_rate": 1.821498534364772e-05, "loss": 1.6184, "step": 6629 }, { "epoch": 21.737704918032787, "grad_norm": 11.477693557739258, "learning_rate": 1.821437979808849e-05, "loss": 1.3523, "step": 6630 }, { "epoch": 21.74098360655738, "grad_norm": 13.364489555358887, "learning_rate": 1.821377415990358e-05, "loss": 1.4807, "step": 6631 }, { "epoch": 21.744262295081967, "grad_norm": 12.424422264099121, "learning_rate": 1.8213168429099833e-05, "loss": 1.3497, "step": 6632 }, { "epoch": 21.74754098360656, "grad_norm": 18.79233741760254, "learning_rate": 1.8212562605684067e-05, "loss": 1.4998, "step": 6633 }, { "epoch": 21.750819672131147, "grad_norm": 9.384282112121582, "learning_rate": 1.821195668966312e-05, "loss": 1.4224, "step": 6634 }, { "epoch": 21.75409836065574, "grad_norm": 19.834369659423828, "learning_rate": 1.8211350681043824e-05, "loss": 1.1646, "step": 6635 }, { "epoch": 21.757377049180327, "grad_norm": 8.323535919189453, "learning_rate": 1.821074457983301e-05, "loss": 1.4182, "step": 6636 }, { "epoch": 21.76065573770492, "grad_norm": 71.69844055175781, "learning_rate": 1.8210138386037516e-05, "loss": 1.3418, "step": 6637 }, { "epoch": 21.763934426229508, "grad_norm": 9.508346557617188, "learning_rate": 1.8209532099664177e-05, "loss": 1.292, "step": 6638 }, { "epoch": 21.7672131147541, "grad_norm": 8.48861026763916, "learning_rate": 1.8208925720719823e-05, "loss": 1.2603, "step": 6639 }, { "epoch": 21.770491803278688, "grad_norm": 8.87651538848877, "learning_rate": 1.8208319249211298e-05, "loss": 1.4619, "step": 6640 }, { "epoch": 21.77377049180328, "grad_norm": 10.610176086425781, "learning_rate": 1.820771268514544e-05, "loss": 1.3438, "step": 6641 }, { "epoch": 21.777049180327868, "grad_norm": 9.46677017211914, "learning_rate": 1.8207106028529086e-05, "loss": 1.4341, "step": 6642 }, { "epoch": 21.78032786885246, "grad_norm": 10.375398635864258, "learning_rate": 1.8206499279369077e-05, "loss": 1.3521, "step": 6643 }, { "epoch": 21.78360655737705, "grad_norm": 26.195924758911133, "learning_rate": 1.820589243767226e-05, "loss": 1.491, "step": 6644 }, { "epoch": 21.78688524590164, "grad_norm": 18.0054931640625, "learning_rate": 1.8205285503445473e-05, "loss": 1.3428, "step": 6645 }, { "epoch": 21.79016393442623, "grad_norm": 10.514472961425781, "learning_rate": 1.820467847669556e-05, "loss": 1.3066, "step": 6646 }, { "epoch": 21.79344262295082, "grad_norm": 11.109662055969238, "learning_rate": 1.8204071357429365e-05, "loss": 1.3225, "step": 6647 }, { "epoch": 21.79672131147541, "grad_norm": 13.910751342773438, "learning_rate": 1.8203464145653737e-05, "loss": 1.2642, "step": 6648 }, { "epoch": 21.8, "grad_norm": 13.651087760925293, "learning_rate": 1.8202856841375517e-05, "loss": 1.3884, "step": 6649 }, { "epoch": 21.80327868852459, "grad_norm": 21.662939071655273, "learning_rate": 1.8202249444601564e-05, "loss": 1.2836, "step": 6650 }, { "epoch": 21.80655737704918, "grad_norm": 12.50398063659668, "learning_rate": 1.8201641955338714e-05, "loss": 1.2876, "step": 6651 }, { "epoch": 21.80983606557377, "grad_norm": 18.21487045288086, "learning_rate": 1.820103437359383e-05, "loss": 1.2874, "step": 6652 }, { "epoch": 21.81311475409836, "grad_norm": 9.715670585632324, "learning_rate": 1.8200426699373753e-05, "loss": 1.4226, "step": 6653 }, { "epoch": 21.81639344262295, "grad_norm": 16.667612075805664, "learning_rate": 1.819981893268534e-05, "loss": 1.3867, "step": 6654 }, { "epoch": 21.81967213114754, "grad_norm": 12.128231048583984, "learning_rate": 1.819921107353544e-05, "loss": 1.1865, "step": 6655 }, { "epoch": 21.82295081967213, "grad_norm": 9.110513687133789, "learning_rate": 1.819860312193091e-05, "loss": 1.3062, "step": 6656 }, { "epoch": 21.82622950819672, "grad_norm": 13.530403137207031, "learning_rate": 1.819799507787861e-05, "loss": 1.2974, "step": 6657 }, { "epoch": 21.82950819672131, "grad_norm": 10.145020484924316, "learning_rate": 1.8197386941385385e-05, "loss": 1.5042, "step": 6658 }, { "epoch": 21.832786885245902, "grad_norm": 10.854630470275879, "learning_rate": 1.8196778712458106e-05, "loss": 1.3602, "step": 6659 }, { "epoch": 21.83606557377049, "grad_norm": 9.799246788024902, "learning_rate": 1.819617039110362e-05, "loss": 1.332, "step": 6660 }, { "epoch": 21.839344262295082, "grad_norm": 11.795405387878418, "learning_rate": 1.8195561977328792e-05, "loss": 1.3284, "step": 6661 }, { "epoch": 21.84262295081967, "grad_norm": 9.680768013000488, "learning_rate": 1.819495347114048e-05, "loss": 1.2041, "step": 6662 }, { "epoch": 21.845901639344262, "grad_norm": 8.85918140411377, "learning_rate": 1.819434487254555e-05, "loss": 1.1908, "step": 6663 }, { "epoch": 21.84918032786885, "grad_norm": 9.264301300048828, "learning_rate": 1.819373618155086e-05, "loss": 1.2327, "step": 6664 }, { "epoch": 21.852459016393443, "grad_norm": 18.048978805541992, "learning_rate": 1.8193127398163277e-05, "loss": 1.1134, "step": 6665 }, { "epoch": 21.855737704918035, "grad_norm": 8.435343742370605, "learning_rate": 1.8192518522389662e-05, "loss": 1.3567, "step": 6666 }, { "epoch": 21.859016393442623, "grad_norm": 22.058408737182617, "learning_rate": 1.8191909554236885e-05, "loss": 1.2339, "step": 6667 }, { "epoch": 21.862295081967215, "grad_norm": 9.129594802856445, "learning_rate": 1.8191300493711808e-05, "loss": 1.4546, "step": 6668 }, { "epoch": 21.865573770491803, "grad_norm": 18.153413772583008, "learning_rate": 1.81906913408213e-05, "loss": 1.458, "step": 6669 }, { "epoch": 21.868852459016395, "grad_norm": 13.46239948272705, "learning_rate": 1.8190082095572233e-05, "loss": 1.4543, "step": 6670 }, { "epoch": 21.872131147540983, "grad_norm": 8.657156944274902, "learning_rate": 1.8189472757971474e-05, "loss": 1.3948, "step": 6671 }, { "epoch": 21.875409836065575, "grad_norm": 9.632523536682129, "learning_rate": 1.8188863328025896e-05, "loss": 1.1921, "step": 6672 }, { "epoch": 21.878688524590164, "grad_norm": 8.960701942443848, "learning_rate": 1.8188253805742366e-05, "loss": 1.2316, "step": 6673 }, { "epoch": 21.881967213114756, "grad_norm": 10.013967514038086, "learning_rate": 1.8187644191127766e-05, "loss": 1.3127, "step": 6674 }, { "epoch": 21.885245901639344, "grad_norm": 12.56972599029541, "learning_rate": 1.8187034484188963e-05, "loss": 1.1423, "step": 6675 }, { "epoch": 21.888524590163936, "grad_norm": 8.387907981872559, "learning_rate": 1.8186424684932832e-05, "loss": 1.292, "step": 6676 }, { "epoch": 21.891803278688524, "grad_norm": 8.916770935058594, "learning_rate": 1.818581479336625e-05, "loss": 1.3829, "step": 6677 }, { "epoch": 21.895081967213116, "grad_norm": 8.561923027038574, "learning_rate": 1.8185204809496097e-05, "loss": 1.2595, "step": 6678 }, { "epoch": 21.898360655737704, "grad_norm": 17.144031524658203, "learning_rate": 1.818459473332925e-05, "loss": 1.3213, "step": 6679 }, { "epoch": 21.901639344262296, "grad_norm": 9.180726051330566, "learning_rate": 1.8183984564872588e-05, "loss": 1.3457, "step": 6680 }, { "epoch": 21.904918032786885, "grad_norm": 12.394440650939941, "learning_rate": 1.818337430413299e-05, "loss": 1.3022, "step": 6681 }, { "epoch": 21.908196721311477, "grad_norm": 8.379620552062988, "learning_rate": 1.818276395111734e-05, "loss": 1.2057, "step": 6682 }, { "epoch": 21.911475409836065, "grad_norm": 10.290314674377441, "learning_rate": 1.8182153505832513e-05, "loss": 1.3279, "step": 6683 }, { "epoch": 21.914754098360657, "grad_norm": 11.106452941894531, "learning_rate": 1.8181542968285402e-05, "loss": 1.4541, "step": 6684 }, { "epoch": 21.918032786885245, "grad_norm": 9.572174072265625, "learning_rate": 1.8180932338482885e-05, "loss": 1.4211, "step": 6685 }, { "epoch": 21.921311475409837, "grad_norm": 18.21620750427246, "learning_rate": 1.8180321616431853e-05, "loss": 1.4744, "step": 6686 }, { "epoch": 21.924590163934425, "grad_norm": 7.453968048095703, "learning_rate": 1.8179710802139187e-05, "loss": 1.3132, "step": 6687 }, { "epoch": 21.927868852459017, "grad_norm": 9.602834701538086, "learning_rate": 1.817909989561178e-05, "loss": 1.575, "step": 6688 }, { "epoch": 21.931147540983606, "grad_norm": 20.031105041503906, "learning_rate": 1.8178488896856512e-05, "loss": 1.2233, "step": 6689 }, { "epoch": 21.934426229508198, "grad_norm": 10.191178321838379, "learning_rate": 1.8177877805880283e-05, "loss": 1.2549, "step": 6690 }, { "epoch": 21.937704918032786, "grad_norm": 8.557232856750488, "learning_rate": 1.8177266622689976e-05, "loss": 1.3538, "step": 6691 }, { "epoch": 21.940983606557378, "grad_norm": 7.874271869659424, "learning_rate": 1.8176655347292485e-05, "loss": 1.3621, "step": 6692 }, { "epoch": 21.944262295081966, "grad_norm": 7.644613742828369, "learning_rate": 1.8176043979694706e-05, "loss": 1.2192, "step": 6693 }, { "epoch": 21.947540983606558, "grad_norm": 9.013508796691895, "learning_rate": 1.817543251990353e-05, "loss": 1.1238, "step": 6694 }, { "epoch": 21.950819672131146, "grad_norm": 8.022117614746094, "learning_rate": 1.8174820967925852e-05, "loss": 1.4602, "step": 6695 }, { "epoch": 21.95409836065574, "grad_norm": 10.62290096282959, "learning_rate": 1.8174209323768567e-05, "loss": 1.2639, "step": 6696 }, { "epoch": 21.957377049180327, "grad_norm": 7.384097576141357, "learning_rate": 1.8173597587438572e-05, "loss": 1.4485, "step": 6697 }, { "epoch": 21.96065573770492, "grad_norm": 11.228002548217773, "learning_rate": 1.817298575894277e-05, "loss": 1.0171, "step": 6698 }, { "epoch": 21.963934426229507, "grad_norm": 11.99096393585205, "learning_rate": 1.8172373838288052e-05, "loss": 1.1815, "step": 6699 }, { "epoch": 21.9672131147541, "grad_norm": 14.127434730529785, "learning_rate": 1.8171761825481323e-05, "loss": 1.2913, "step": 6700 }, { "epoch": 21.970491803278687, "grad_norm": 10.356224060058594, "learning_rate": 1.817114972052948e-05, "loss": 1.3645, "step": 6701 }, { "epoch": 21.97377049180328, "grad_norm": 27.073556900024414, "learning_rate": 1.8170537523439432e-05, "loss": 1.3295, "step": 6702 }, { "epoch": 21.977049180327867, "grad_norm": 10.1146879196167, "learning_rate": 1.8169925234218076e-05, "loss": 1.2428, "step": 6703 }, { "epoch": 21.98032786885246, "grad_norm": 9.86782455444336, "learning_rate": 1.816931285287232e-05, "loss": 1.2905, "step": 6704 }, { "epoch": 21.983606557377048, "grad_norm": 7.698418140411377, "learning_rate": 1.8168700379409067e-05, "loss": 1.4519, "step": 6705 }, { "epoch": 21.98688524590164, "grad_norm": 8.802701950073242, "learning_rate": 1.8168087813835223e-05, "loss": 1.2557, "step": 6706 }, { "epoch": 21.990163934426228, "grad_norm": 11.70771312713623, "learning_rate": 1.8167475156157697e-05, "loss": 1.1567, "step": 6707 }, { "epoch": 21.99344262295082, "grad_norm": 8.591588973999023, "learning_rate": 1.8166862406383396e-05, "loss": 1.0977, "step": 6708 }, { "epoch": 21.99672131147541, "grad_norm": 7.414114952087402, "learning_rate": 1.8166249564519233e-05, "loss": 1.3397, "step": 6709 }, { "epoch": 22.0, "grad_norm": 9.626557350158691, "learning_rate": 1.816563663057211e-05, "loss": 1.5011, "step": 6710 }, { "epoch": 22.003278688524592, "grad_norm": 12.764628410339355, "learning_rate": 1.816502360454895e-05, "loss": 1.1476, "step": 6711 }, { "epoch": 22.00655737704918, "grad_norm": 6.614801406860352, "learning_rate": 1.8164410486456655e-05, "loss": 1.3594, "step": 6712 }, { "epoch": 22.009836065573772, "grad_norm": 7.294346332550049, "learning_rate": 1.8163797276302143e-05, "loss": 1.2916, "step": 6713 }, { "epoch": 22.01311475409836, "grad_norm": 8.229291915893555, "learning_rate": 1.8163183974092327e-05, "loss": 1.2095, "step": 6714 }, { "epoch": 22.016393442622952, "grad_norm": 7.286703109741211, "learning_rate": 1.8162570579834126e-05, "loss": 1.0724, "step": 6715 }, { "epoch": 22.01967213114754, "grad_norm": 8.298209190368652, "learning_rate": 1.8161957093534456e-05, "loss": 1.0856, "step": 6716 }, { "epoch": 22.022950819672133, "grad_norm": 6.461541652679443, "learning_rate": 1.8161343515200234e-05, "loss": 1.2637, "step": 6717 }, { "epoch": 22.02622950819672, "grad_norm": 14.746843338012695, "learning_rate": 1.816072984483838e-05, "loss": 1.2881, "step": 6718 }, { "epoch": 22.029508196721313, "grad_norm": 9.217018127441406, "learning_rate": 1.8160116082455808e-05, "loss": 1.104, "step": 6719 }, { "epoch": 22.0327868852459, "grad_norm": 8.70562744140625, "learning_rate": 1.8159502228059443e-05, "loss": 1.4161, "step": 6720 }, { "epoch": 22.036065573770493, "grad_norm": 12.45873737335205, "learning_rate": 1.8158888281656206e-05, "loss": 1.2283, "step": 6721 }, { "epoch": 22.03934426229508, "grad_norm": 7.414708137512207, "learning_rate": 1.8158274243253023e-05, "loss": 1.2498, "step": 6722 }, { "epoch": 22.042622950819673, "grad_norm": 7.06107234954834, "learning_rate": 1.8157660112856814e-05, "loss": 1.5674, "step": 6723 }, { "epoch": 22.04590163934426, "grad_norm": 7.068535327911377, "learning_rate": 1.8157045890474505e-05, "loss": 1.2747, "step": 6724 }, { "epoch": 22.049180327868854, "grad_norm": 9.810957908630371, "learning_rate": 1.8156431576113026e-05, "loss": 1.0579, "step": 6725 }, { "epoch": 22.052459016393442, "grad_norm": 7.901889801025391, "learning_rate": 1.8155817169779294e-05, "loss": 1.1797, "step": 6726 }, { "epoch": 22.055737704918034, "grad_norm": 14.839754104614258, "learning_rate": 1.8155202671480247e-05, "loss": 1.1276, "step": 6727 }, { "epoch": 22.059016393442622, "grad_norm": 9.698760986328125, "learning_rate": 1.815458808122281e-05, "loss": 1.2498, "step": 6728 }, { "epoch": 22.062295081967214, "grad_norm": 7.985068321228027, "learning_rate": 1.8153973399013917e-05, "loss": 1.2917, "step": 6729 }, { "epoch": 22.065573770491802, "grad_norm": 8.983705520629883, "learning_rate": 1.815335862486049e-05, "loss": 1.3359, "step": 6730 }, { "epoch": 22.068852459016394, "grad_norm": 7.691589832305908, "learning_rate": 1.8152743758769472e-05, "loss": 1.1655, "step": 6731 }, { "epoch": 22.072131147540983, "grad_norm": 10.80333423614502, "learning_rate": 1.815212880074779e-05, "loss": 1.3826, "step": 6732 }, { "epoch": 22.075409836065575, "grad_norm": 9.839681625366211, "learning_rate": 1.8151513750802377e-05, "loss": 1.1661, "step": 6733 }, { "epoch": 22.078688524590163, "grad_norm": 8.859357833862305, "learning_rate": 1.8150898608940175e-05, "loss": 1.431, "step": 6734 }, { "epoch": 22.081967213114755, "grad_norm": 8.036565780639648, "learning_rate": 1.8150283375168112e-05, "loss": 1.4529, "step": 6735 }, { "epoch": 22.085245901639343, "grad_norm": 8.26585865020752, "learning_rate": 1.8149668049493137e-05, "loss": 1.2617, "step": 6736 }, { "epoch": 22.088524590163935, "grad_norm": 7.934204578399658, "learning_rate": 1.8149052631922175e-05, "loss": 1.1105, "step": 6737 }, { "epoch": 22.091803278688523, "grad_norm": 7.998476982116699, "learning_rate": 1.8148437122462175e-05, "loss": 1.2859, "step": 6738 }, { "epoch": 22.095081967213115, "grad_norm": 7.464719295501709, "learning_rate": 1.8147821521120073e-05, "loss": 1.2621, "step": 6739 }, { "epoch": 22.098360655737704, "grad_norm": 16.5397891998291, "learning_rate": 1.8147205827902808e-05, "loss": 1.1899, "step": 6740 }, { "epoch": 22.101639344262296, "grad_norm": 11.07158374786377, "learning_rate": 1.8146590042817332e-05, "loss": 1.3181, "step": 6741 }, { "epoch": 22.104918032786884, "grad_norm": 7.89854621887207, "learning_rate": 1.814597416587058e-05, "loss": 1.183, "step": 6742 }, { "epoch": 22.108196721311476, "grad_norm": 7.682772636413574, "learning_rate": 1.81453581970695e-05, "loss": 1.1128, "step": 6743 }, { "epoch": 22.111475409836064, "grad_norm": 5.994963645935059, "learning_rate": 1.814474213642104e-05, "loss": 1.5273, "step": 6744 }, { "epoch": 22.114754098360656, "grad_norm": 9.99893569946289, "learning_rate": 1.814412598393214e-05, "loss": 1.0101, "step": 6745 }, { "epoch": 22.118032786885244, "grad_norm": 11.337214469909668, "learning_rate": 1.8143509739609753e-05, "loss": 1.166, "step": 6746 }, { "epoch": 22.121311475409836, "grad_norm": 7.353456974029541, "learning_rate": 1.8142893403460827e-05, "loss": 1.335, "step": 6747 }, { "epoch": 22.124590163934425, "grad_norm": 10.611628532409668, "learning_rate": 1.8142276975492312e-05, "loss": 1.215, "step": 6748 }, { "epoch": 22.127868852459017, "grad_norm": 10.507198333740234, "learning_rate": 1.8141660455711156e-05, "loss": 1.4565, "step": 6749 }, { "epoch": 22.131147540983605, "grad_norm": 8.235396385192871, "learning_rate": 1.8141043844124316e-05, "loss": 1.1404, "step": 6750 }, { "epoch": 22.134426229508197, "grad_norm": 56.842037200927734, "learning_rate": 1.814042714073874e-05, "loss": 1.1978, "step": 6751 }, { "epoch": 22.137704918032785, "grad_norm": 9.76860237121582, "learning_rate": 1.8139810345561385e-05, "loss": 1.1758, "step": 6752 }, { "epoch": 22.140983606557377, "grad_norm": 10.957060813903809, "learning_rate": 1.8139193458599204e-05, "loss": 1.4006, "step": 6753 }, { "epoch": 22.14426229508197, "grad_norm": 8.081157684326172, "learning_rate": 1.8138576479859154e-05, "loss": 1.4102, "step": 6754 }, { "epoch": 22.147540983606557, "grad_norm": 7.185187816619873, "learning_rate": 1.8137959409348193e-05, "loss": 1.2617, "step": 6755 }, { "epoch": 22.15081967213115, "grad_norm": 14.659690856933594, "learning_rate": 1.8137342247073278e-05, "loss": 1.2554, "step": 6756 }, { "epoch": 22.154098360655738, "grad_norm": 13.671372413635254, "learning_rate": 1.8136724993041374e-05, "loss": 1.5516, "step": 6757 }, { "epoch": 22.15737704918033, "grad_norm": 7.7390947341918945, "learning_rate": 1.813610764725943e-05, "loss": 1.5488, "step": 6758 }, { "epoch": 22.160655737704918, "grad_norm": 8.680244445800781, "learning_rate": 1.813549020973441e-05, "loss": 1.1985, "step": 6759 }, { "epoch": 22.16393442622951, "grad_norm": 8.430973052978516, "learning_rate": 1.8134872680473285e-05, "loss": 1.281, "step": 6760 }, { "epoch": 22.167213114754098, "grad_norm": 6.4945454597473145, "learning_rate": 1.8134255059483008e-05, "loss": 1.429, "step": 6761 }, { "epoch": 22.17049180327869, "grad_norm": 7.7648844718933105, "learning_rate": 1.8133637346770552e-05, "loss": 1.4097, "step": 6762 }, { "epoch": 22.17377049180328, "grad_norm": 16.323013305664062, "learning_rate": 1.8133019542342873e-05, "loss": 1.2625, "step": 6763 }, { "epoch": 22.17704918032787, "grad_norm": 7.754722595214844, "learning_rate": 1.8132401646206947e-05, "loss": 1.1299, "step": 6764 }, { "epoch": 22.18032786885246, "grad_norm": 33.6076774597168, "learning_rate": 1.8131783658369736e-05, "loss": 1.189, "step": 6765 }, { "epoch": 22.18360655737705, "grad_norm": 14.522248268127441, "learning_rate": 1.8131165578838207e-05, "loss": 1.1812, "step": 6766 }, { "epoch": 22.18688524590164, "grad_norm": 7.475881099700928, "learning_rate": 1.813054740761933e-05, "loss": 1.1139, "step": 6767 }, { "epoch": 22.19016393442623, "grad_norm": 12.076748847961426, "learning_rate": 1.8129929144720082e-05, "loss": 1.0884, "step": 6768 }, { "epoch": 22.19344262295082, "grad_norm": 10.039093971252441, "learning_rate": 1.8129310790147428e-05, "loss": 1.2146, "step": 6769 }, { "epoch": 22.19672131147541, "grad_norm": 10.341459274291992, "learning_rate": 1.8128692343908346e-05, "loss": 1.1039, "step": 6770 }, { "epoch": 22.2, "grad_norm": 7.617304801940918, "learning_rate": 1.81280738060098e-05, "loss": 1.4492, "step": 6771 }, { "epoch": 22.20327868852459, "grad_norm": 10.880980491638184, "learning_rate": 1.8127455176458775e-05, "loss": 1.1602, "step": 6772 }, { "epoch": 22.20655737704918, "grad_norm": 14.992938995361328, "learning_rate": 1.812683645526224e-05, "loss": 1.1613, "step": 6773 }, { "epoch": 22.20983606557377, "grad_norm": 11.012959480285645, "learning_rate": 1.8126217642427174e-05, "loss": 1.2484, "step": 6774 }, { "epoch": 22.21311475409836, "grad_norm": 7.849508762359619, "learning_rate": 1.8125598737960558e-05, "loss": 1.1083, "step": 6775 }, { "epoch": 22.21639344262295, "grad_norm": 7.619724273681641, "learning_rate": 1.8124979741869368e-05, "loss": 1.2576, "step": 6776 }, { "epoch": 22.21967213114754, "grad_norm": 10.07224178314209, "learning_rate": 1.812436065416058e-05, "loss": 1.2529, "step": 6777 }, { "epoch": 22.222950819672132, "grad_norm": 8.803756713867188, "learning_rate": 1.8123741474841178e-05, "loss": 1.4695, "step": 6778 }, { "epoch": 22.22622950819672, "grad_norm": 15.606170654296875, "learning_rate": 1.8123122203918147e-05, "loss": 1.2197, "step": 6779 }, { "epoch": 22.229508196721312, "grad_norm": 7.669405460357666, "learning_rate": 1.8122502841398467e-05, "loss": 1.3406, "step": 6780 }, { "epoch": 22.2327868852459, "grad_norm": 7.762104511260986, "learning_rate": 1.8121883387289122e-05, "loss": 0.9851, "step": 6781 }, { "epoch": 22.236065573770492, "grad_norm": 7.268455982208252, "learning_rate": 1.8121263841597097e-05, "loss": 1.2991, "step": 6782 }, { "epoch": 22.23934426229508, "grad_norm": 13.234543800354004, "learning_rate": 1.8120644204329376e-05, "loss": 1.2264, "step": 6783 }, { "epoch": 22.242622950819673, "grad_norm": 17.072189331054688, "learning_rate": 1.812002447549295e-05, "loss": 1.21, "step": 6784 }, { "epoch": 22.24590163934426, "grad_norm": 9.886459350585938, "learning_rate": 1.811940465509481e-05, "loss": 1.0131, "step": 6785 }, { "epoch": 22.249180327868853, "grad_norm": 10.1576509475708, "learning_rate": 1.811878474314193e-05, "loss": 1.2842, "step": 6786 }, { "epoch": 22.25245901639344, "grad_norm": 9.168148040771484, "learning_rate": 1.811816473964132e-05, "loss": 1.1678, "step": 6787 }, { "epoch": 22.255737704918033, "grad_norm": 10.225179672241211, "learning_rate": 1.8117544644599955e-05, "loss": 1.3243, "step": 6788 }, { "epoch": 22.25901639344262, "grad_norm": 8.43603515625, "learning_rate": 1.8116924458024838e-05, "loss": 1.2825, "step": 6789 }, { "epoch": 22.262295081967213, "grad_norm": 9.927977561950684, "learning_rate": 1.811630417992296e-05, "loss": 1.1885, "step": 6790 }, { "epoch": 22.2655737704918, "grad_norm": 11.336075782775879, "learning_rate": 1.811568381030131e-05, "loss": 1.1276, "step": 6791 }, { "epoch": 22.268852459016394, "grad_norm": 15.092801094055176, "learning_rate": 1.8115063349166887e-05, "loss": 1.1589, "step": 6792 }, { "epoch": 22.272131147540982, "grad_norm": 10.730076789855957, "learning_rate": 1.811444279652669e-05, "loss": 1.1194, "step": 6793 }, { "epoch": 22.275409836065574, "grad_norm": 8.42851448059082, "learning_rate": 1.811382215238771e-05, "loss": 1.5256, "step": 6794 }, { "epoch": 22.278688524590162, "grad_norm": 12.80811882019043, "learning_rate": 1.811320141675695e-05, "loss": 1.0784, "step": 6795 }, { "epoch": 22.281967213114754, "grad_norm": 13.140615463256836, "learning_rate": 1.8112580589641412e-05, "loss": 1.1207, "step": 6796 }, { "epoch": 22.285245901639342, "grad_norm": 9.294510841369629, "learning_rate": 1.811195967104809e-05, "loss": 1.3149, "step": 6797 }, { "epoch": 22.288524590163934, "grad_norm": 8.683487892150879, "learning_rate": 1.8111338660983988e-05, "loss": 1.2238, "step": 6798 }, { "epoch": 22.291803278688526, "grad_norm": 13.078751564025879, "learning_rate": 1.811071755945611e-05, "loss": 1.3381, "step": 6799 }, { "epoch": 22.295081967213115, "grad_norm": 101.95185089111328, "learning_rate": 1.8110096366471458e-05, "loss": 1.3486, "step": 6800 }, { "epoch": 22.298360655737707, "grad_norm": 8.507079124450684, "learning_rate": 1.8109475082037033e-05, "loss": 1.3109, "step": 6801 }, { "epoch": 22.301639344262295, "grad_norm": 7.828509330749512, "learning_rate": 1.810885370615985e-05, "loss": 1.1985, "step": 6802 }, { "epoch": 22.304918032786887, "grad_norm": 11.504549026489258, "learning_rate": 1.810823223884691e-05, "loss": 1.2512, "step": 6803 }, { "epoch": 22.308196721311475, "grad_norm": 7.375008583068848, "learning_rate": 1.810761068010522e-05, "loss": 1.1692, "step": 6804 }, { "epoch": 22.311475409836067, "grad_norm": 24.17344856262207, "learning_rate": 1.810698902994179e-05, "loss": 1.4458, "step": 6805 }, { "epoch": 22.314754098360655, "grad_norm": 8.181312561035156, "learning_rate": 1.8106367288363625e-05, "loss": 1.1768, "step": 6806 }, { "epoch": 22.318032786885247, "grad_norm": 10.893041610717773, "learning_rate": 1.8105745455377748e-05, "loss": 1.4194, "step": 6807 }, { "epoch": 22.321311475409836, "grad_norm": 10.836397171020508, "learning_rate": 1.8105123530991158e-05, "loss": 1.3647, "step": 6808 }, { "epoch": 22.324590163934428, "grad_norm": 11.654704093933105, "learning_rate": 1.810450151521088e-05, "loss": 1.1494, "step": 6809 }, { "epoch": 22.327868852459016, "grad_norm": 11.701438903808594, "learning_rate": 1.8103879408043913e-05, "loss": 1.2412, "step": 6810 }, { "epoch": 22.331147540983608, "grad_norm": 8.554985046386719, "learning_rate": 1.810325720949728e-05, "loss": 1.2207, "step": 6811 }, { "epoch": 22.334426229508196, "grad_norm": 17.096254348754883, "learning_rate": 1.8102634919578e-05, "loss": 1.1753, "step": 6812 }, { "epoch": 22.337704918032788, "grad_norm": 11.179826736450195, "learning_rate": 1.8102012538293087e-05, "loss": 1.1448, "step": 6813 }, { "epoch": 22.340983606557376, "grad_norm": 8.439013481140137, "learning_rate": 1.8101390065649555e-05, "loss": 1.47, "step": 6814 }, { "epoch": 22.34426229508197, "grad_norm": 14.706488609313965, "learning_rate": 1.810076750165443e-05, "loss": 1.1633, "step": 6815 }, { "epoch": 22.347540983606557, "grad_norm": 9.20828914642334, "learning_rate": 1.8100144846314728e-05, "loss": 1.2715, "step": 6816 }, { "epoch": 22.35081967213115, "grad_norm": 7.89480447769165, "learning_rate": 1.809952209963747e-05, "loss": 1.3193, "step": 6817 }, { "epoch": 22.354098360655737, "grad_norm": 13.397088050842285, "learning_rate": 1.8098899261629678e-05, "loss": 1.0907, "step": 6818 }, { "epoch": 22.35737704918033, "grad_norm": 13.671568870544434, "learning_rate": 1.8098276332298378e-05, "loss": 1.5059, "step": 6819 }, { "epoch": 22.360655737704917, "grad_norm": 9.70606803894043, "learning_rate": 1.809765331165059e-05, "loss": 1.2222, "step": 6820 }, { "epoch": 22.36393442622951, "grad_norm": 10.060266494750977, "learning_rate": 1.8097030199693345e-05, "loss": 1.2677, "step": 6821 }, { "epoch": 22.367213114754097, "grad_norm": 10.391365051269531, "learning_rate": 1.8096406996433664e-05, "loss": 1.3762, "step": 6822 }, { "epoch": 22.37049180327869, "grad_norm": 17.77635955810547, "learning_rate": 1.809578370187858e-05, "loss": 1.5032, "step": 6823 }, { "epoch": 22.373770491803278, "grad_norm": 10.207234382629395, "learning_rate": 1.809516031603511e-05, "loss": 1.5153, "step": 6824 }, { "epoch": 22.37704918032787, "grad_norm": 8.104140281677246, "learning_rate": 1.8094536838910294e-05, "loss": 1.1631, "step": 6825 }, { "epoch": 22.380327868852458, "grad_norm": 11.418628692626953, "learning_rate": 1.8093913270511158e-05, "loss": 1.2166, "step": 6826 }, { "epoch": 22.38360655737705, "grad_norm": 8.534577369689941, "learning_rate": 1.8093289610844737e-05, "loss": 1.3813, "step": 6827 }, { "epoch": 22.386885245901638, "grad_norm": 10.932342529296875, "learning_rate": 1.8092665859918058e-05, "loss": 1.385, "step": 6828 }, { "epoch": 22.39016393442623, "grad_norm": 18.186328887939453, "learning_rate": 1.8092042017738158e-05, "loss": 1.2651, "step": 6829 }, { "epoch": 22.39344262295082, "grad_norm": 10.675835609436035, "learning_rate": 1.8091418084312067e-05, "loss": 1.2767, "step": 6830 }, { "epoch": 22.39672131147541, "grad_norm": 9.499977111816406, "learning_rate": 1.809079405964683e-05, "loss": 1.2959, "step": 6831 }, { "epoch": 22.4, "grad_norm": 8.336307525634766, "learning_rate": 1.8090169943749477e-05, "loss": 1.2529, "step": 6832 }, { "epoch": 22.40327868852459, "grad_norm": 8.637076377868652, "learning_rate": 1.8089545736627044e-05, "loss": 1.5166, "step": 6833 }, { "epoch": 22.40655737704918, "grad_norm": 7.121801853179932, "learning_rate": 1.8088921438286573e-05, "loss": 1.2117, "step": 6834 }, { "epoch": 22.40983606557377, "grad_norm": 7.761410236358643, "learning_rate": 1.80882970487351e-05, "loss": 1.2186, "step": 6835 }, { "epoch": 22.41311475409836, "grad_norm": 8.121712684631348, "learning_rate": 1.808767256797967e-05, "loss": 1.4401, "step": 6836 }, { "epoch": 22.41639344262295, "grad_norm": 8.152992248535156, "learning_rate": 1.8087047996027323e-05, "loss": 1.4106, "step": 6837 }, { "epoch": 22.41967213114754, "grad_norm": 8.380359649658203, "learning_rate": 1.80864233328851e-05, "loss": 1.26, "step": 6838 }, { "epoch": 22.42295081967213, "grad_norm": 13.237255096435547, "learning_rate": 1.8085798578560047e-05, "loss": 1.2407, "step": 6839 }, { "epoch": 22.42622950819672, "grad_norm": 11.113016128540039, "learning_rate": 1.808517373305921e-05, "loss": 1.2527, "step": 6840 }, { "epoch": 22.42950819672131, "grad_norm": 15.462684631347656, "learning_rate": 1.808454879638963e-05, "loss": 1.2349, "step": 6841 }, { "epoch": 22.432786885245903, "grad_norm": 8.451069831848145, "learning_rate": 1.8083923768558354e-05, "loss": 1.5454, "step": 6842 }, { "epoch": 22.43606557377049, "grad_norm": 8.82750415802002, "learning_rate": 1.8083298649572438e-05, "loss": 1.1887, "step": 6843 }, { "epoch": 22.439344262295084, "grad_norm": 14.475844383239746, "learning_rate": 1.8082673439438926e-05, "loss": 1.4174, "step": 6844 }, { "epoch": 22.442622950819672, "grad_norm": 10.99057388305664, "learning_rate": 1.8082048138164862e-05, "loss": 1.1024, "step": 6845 }, { "epoch": 22.445901639344264, "grad_norm": 8.610727310180664, "learning_rate": 1.8081422745757303e-05, "loss": 1.1088, "step": 6846 }, { "epoch": 22.449180327868852, "grad_norm": 15.320402145385742, "learning_rate": 1.80807972622233e-05, "loss": 1.2306, "step": 6847 }, { "epoch": 22.452459016393444, "grad_norm": 11.75052547454834, "learning_rate": 1.808017168756991e-05, "loss": 1.3821, "step": 6848 }, { "epoch": 22.455737704918032, "grad_norm": 8.097177505493164, "learning_rate": 1.8079546021804178e-05, "loss": 1.3508, "step": 6849 }, { "epoch": 22.459016393442624, "grad_norm": 9.8597412109375, "learning_rate": 1.8078920264933165e-05, "loss": 1.2476, "step": 6850 }, { "epoch": 22.462295081967213, "grad_norm": 12.036516189575195, "learning_rate": 1.8078294416963925e-05, "loss": 1.3071, "step": 6851 }, { "epoch": 22.465573770491805, "grad_norm": 15.49632453918457, "learning_rate": 1.8077668477903518e-05, "loss": 1.2332, "step": 6852 }, { "epoch": 22.468852459016393, "grad_norm": 10.97325611114502, "learning_rate": 1.8077042447759002e-05, "loss": 1.2156, "step": 6853 }, { "epoch": 22.472131147540985, "grad_norm": 9.503751754760742, "learning_rate": 1.807641632653743e-05, "loss": 1.423, "step": 6854 }, { "epoch": 22.475409836065573, "grad_norm": 7.989438056945801, "learning_rate": 1.8075790114245873e-05, "loss": 1.2, "step": 6855 }, { "epoch": 22.478688524590165, "grad_norm": 11.252107620239258, "learning_rate": 1.8075163810891378e-05, "loss": 1.4646, "step": 6856 }, { "epoch": 22.481967213114753, "grad_norm": 12.01799488067627, "learning_rate": 1.807453741648102e-05, "loss": 1.1633, "step": 6857 }, { "epoch": 22.485245901639345, "grad_norm": 8.77159595489502, "learning_rate": 1.8073910931021855e-05, "loss": 1.4644, "step": 6858 }, { "epoch": 22.488524590163934, "grad_norm": 9.308894157409668, "learning_rate": 1.8073284354520952e-05, "loss": 1.2866, "step": 6859 }, { "epoch": 22.491803278688526, "grad_norm": 9.182234764099121, "learning_rate": 1.807265768698537e-05, "loss": 1.3469, "step": 6860 }, { "epoch": 22.495081967213114, "grad_norm": 9.000635147094727, "learning_rate": 1.8072030928422183e-05, "loss": 1.354, "step": 6861 }, { "epoch": 22.498360655737706, "grad_norm": 13.310651779174805, "learning_rate": 1.8071404078838454e-05, "loss": 1.1432, "step": 6862 }, { "epoch": 22.501639344262294, "grad_norm": 10.553226470947266, "learning_rate": 1.807077713824125e-05, "loss": 1.2882, "step": 6863 }, { "epoch": 22.504918032786886, "grad_norm": 8.879755020141602, "learning_rate": 1.807015010663764e-05, "loss": 1.2686, "step": 6864 }, { "epoch": 22.508196721311474, "grad_norm": 9.82708740234375, "learning_rate": 1.8069522984034703e-05, "loss": 1.3348, "step": 6865 }, { "epoch": 22.511475409836066, "grad_norm": 7.170140266418457, "learning_rate": 1.80688957704395e-05, "loss": 1.4116, "step": 6866 }, { "epoch": 22.514754098360655, "grad_norm": 8.59340763092041, "learning_rate": 1.806826846585911e-05, "loss": 1.3533, "step": 6867 }, { "epoch": 22.518032786885247, "grad_norm": 8.731557846069336, "learning_rate": 1.8067641070300602e-05, "loss": 1.1272, "step": 6868 }, { "epoch": 22.521311475409835, "grad_norm": 7.531580924987793, "learning_rate": 1.8067013583771052e-05, "loss": 1.2866, "step": 6869 }, { "epoch": 22.524590163934427, "grad_norm": 26.877708435058594, "learning_rate": 1.806638600627754e-05, "loss": 1.2888, "step": 6870 }, { "epoch": 22.527868852459015, "grad_norm": 11.389907836914062, "learning_rate": 1.8065758337827135e-05, "loss": 1.271, "step": 6871 }, { "epoch": 22.531147540983607, "grad_norm": 12.171027183532715, "learning_rate": 1.806513057842692e-05, "loss": 1.1378, "step": 6872 }, { "epoch": 22.534426229508195, "grad_norm": 8.926263809204102, "learning_rate": 1.8064502728083973e-05, "loss": 1.182, "step": 6873 }, { "epoch": 22.537704918032787, "grad_norm": 8.981622695922852, "learning_rate": 1.806387478680537e-05, "loss": 1.3635, "step": 6874 }, { "epoch": 22.540983606557376, "grad_norm": 10.39754581451416, "learning_rate": 1.80632467545982e-05, "loss": 1.168, "step": 6875 }, { "epoch": 22.544262295081968, "grad_norm": 9.956761360168457, "learning_rate": 1.806261863146953e-05, "loss": 1.2563, "step": 6876 }, { "epoch": 22.547540983606556, "grad_norm": 11.37752914428711, "learning_rate": 1.806199041742646e-05, "loss": 1.2234, "step": 6877 }, { "epoch": 22.550819672131148, "grad_norm": 8.75468921661377, "learning_rate": 1.806136211247606e-05, "loss": 1.2983, "step": 6878 }, { "epoch": 22.554098360655736, "grad_norm": 9.275050163269043, "learning_rate": 1.8060733716625427e-05, "loss": 1.2397, "step": 6879 }, { "epoch": 22.557377049180328, "grad_norm": 11.689332008361816, "learning_rate": 1.8060105229881635e-05, "loss": 1.2158, "step": 6880 }, { "epoch": 22.560655737704916, "grad_norm": 9.73781967163086, "learning_rate": 1.8059476652251778e-05, "loss": 1.1101, "step": 6881 }, { "epoch": 22.56393442622951, "grad_norm": 9.193904876708984, "learning_rate": 1.8058847983742943e-05, "loss": 1.2629, "step": 6882 }, { "epoch": 22.567213114754097, "grad_norm": 12.972116470336914, "learning_rate": 1.8058219224362217e-05, "loss": 1.1274, "step": 6883 }, { "epoch": 22.57049180327869, "grad_norm": 23.889738082885742, "learning_rate": 1.805759037411669e-05, "loss": 1.3025, "step": 6884 }, { "epoch": 22.57377049180328, "grad_norm": 12.788161277770996, "learning_rate": 1.8056961433013455e-05, "loss": 1.0649, "step": 6885 }, { "epoch": 22.57704918032787, "grad_norm": 8.857391357421875, "learning_rate": 1.80563324010596e-05, "loss": 1.1677, "step": 6886 }, { "epoch": 22.58032786885246, "grad_norm": 9.746606826782227, "learning_rate": 1.805570327826222e-05, "loss": 1.2568, "step": 6887 }, { "epoch": 22.58360655737705, "grad_norm": 11.508660316467285, "learning_rate": 1.8055074064628416e-05, "loss": 1.0205, "step": 6888 }, { "epoch": 22.58688524590164, "grad_norm": 29.817975997924805, "learning_rate": 1.805444476016527e-05, "loss": 1.1211, "step": 6889 }, { "epoch": 22.59016393442623, "grad_norm": 10.187378883361816, "learning_rate": 1.805381536487989e-05, "loss": 1.1965, "step": 6890 }, { "epoch": 22.59344262295082, "grad_norm": 12.161118507385254, "learning_rate": 1.8053185878779364e-05, "loss": 1.2629, "step": 6891 }, { "epoch": 22.59672131147541, "grad_norm": 22.064027786254883, "learning_rate": 1.80525563018708e-05, "loss": 1.4005, "step": 6892 }, { "epoch": 22.6, "grad_norm": 10.198338508605957, "learning_rate": 1.8051926634161282e-05, "loss": 1.325, "step": 6893 }, { "epoch": 22.60327868852459, "grad_norm": 6.925838470458984, "learning_rate": 1.8051296875657928e-05, "loss": 1.3875, "step": 6894 }, { "epoch": 22.60655737704918, "grad_norm": 10.210555076599121, "learning_rate": 1.8050667026367827e-05, "loss": 1.0477, "step": 6895 }, { "epoch": 22.60983606557377, "grad_norm": 13.283584594726562, "learning_rate": 1.805003708629808e-05, "loss": 1.2515, "step": 6896 }, { "epoch": 22.613114754098362, "grad_norm": 9.509961128234863, "learning_rate": 1.8049407055455802e-05, "loss": 1.2281, "step": 6897 }, { "epoch": 22.61639344262295, "grad_norm": 10.388809204101562, "learning_rate": 1.804877693384809e-05, "loss": 1.3057, "step": 6898 }, { "epoch": 22.619672131147542, "grad_norm": 8.960524559020996, "learning_rate": 1.8048146721482044e-05, "loss": 1.3938, "step": 6899 }, { "epoch": 22.62295081967213, "grad_norm": 9.928884506225586, "learning_rate": 1.804751641836478e-05, "loss": 1.0762, "step": 6900 }, { "epoch": 22.626229508196722, "grad_norm": 7.991311550140381, "learning_rate": 1.80468860245034e-05, "loss": 1.3525, "step": 6901 }, { "epoch": 22.62950819672131, "grad_norm": 9.91824722290039, "learning_rate": 1.804625553990501e-05, "loss": 1.2644, "step": 6902 }, { "epoch": 22.632786885245903, "grad_norm": 38.75449752807617, "learning_rate": 1.8045624964576727e-05, "loss": 1.0146, "step": 6903 }, { "epoch": 22.63606557377049, "grad_norm": 13.562603950500488, "learning_rate": 1.8044994298525657e-05, "loss": 1.2385, "step": 6904 }, { "epoch": 22.639344262295083, "grad_norm": 9.565614700317383, "learning_rate": 1.804436354175891e-05, "loss": 1.3167, "step": 6905 }, { "epoch": 22.64262295081967, "grad_norm": 13.972830772399902, "learning_rate": 1.80437326942836e-05, "loss": 1.2466, "step": 6906 }, { "epoch": 22.645901639344263, "grad_norm": 9.635268211364746, "learning_rate": 1.804310175610684e-05, "loss": 1.1752, "step": 6907 }, { "epoch": 22.64918032786885, "grad_norm": 22.86267852783203, "learning_rate": 1.8042470727235746e-05, "loss": 1.4619, "step": 6908 }, { "epoch": 22.652459016393443, "grad_norm": 12.006753921508789, "learning_rate": 1.8041839607677432e-05, "loss": 1.3708, "step": 6909 }, { "epoch": 22.65573770491803, "grad_norm": 14.569563865661621, "learning_rate": 1.8041208397439017e-05, "loss": 1.2107, "step": 6910 }, { "epoch": 22.659016393442624, "grad_norm": 12.813419342041016, "learning_rate": 1.8040577096527616e-05, "loss": 1.2346, "step": 6911 }, { "epoch": 22.662295081967212, "grad_norm": 8.378348350524902, "learning_rate": 1.803994570495035e-05, "loss": 1.3077, "step": 6912 }, { "epoch": 22.665573770491804, "grad_norm": 15.272356033325195, "learning_rate": 1.803931422271433e-05, "loss": 1.2852, "step": 6913 }, { "epoch": 22.668852459016392, "grad_norm": 11.5318021774292, "learning_rate": 1.8038682649826687e-05, "loss": 1.1813, "step": 6914 }, { "epoch": 22.672131147540984, "grad_norm": 9.956487655639648, "learning_rate": 1.803805098629454e-05, "loss": 1.259, "step": 6915 }, { "epoch": 22.675409836065572, "grad_norm": 15.221031188964844, "learning_rate": 1.803741923212501e-05, "loss": 1.363, "step": 6916 }, { "epoch": 22.678688524590164, "grad_norm": 9.410361289978027, "learning_rate": 1.803678738732522e-05, "loss": 1.2079, "step": 6917 }, { "epoch": 22.681967213114753, "grad_norm": 9.04275894165039, "learning_rate": 1.8036155451902298e-05, "loss": 1.0342, "step": 6918 }, { "epoch": 22.685245901639345, "grad_norm": 10.153725624084473, "learning_rate": 1.8035523425863368e-05, "loss": 1.0898, "step": 6919 }, { "epoch": 22.688524590163933, "grad_norm": 8.781412124633789, "learning_rate": 1.8034891309215555e-05, "loss": 1.3129, "step": 6920 }, { "epoch": 22.691803278688525, "grad_norm": 12.367467880249023, "learning_rate": 1.803425910196599e-05, "loss": 1.3098, "step": 6921 }, { "epoch": 22.695081967213113, "grad_norm": 9.021690368652344, "learning_rate": 1.80336268041218e-05, "loss": 1.3616, "step": 6922 }, { "epoch": 22.698360655737705, "grad_norm": 17.866363525390625, "learning_rate": 1.803299441569011e-05, "loss": 1.3542, "step": 6923 }, { "epoch": 22.701639344262293, "grad_norm": 10.537566184997559, "learning_rate": 1.8032361936678063e-05, "loss": 1.2495, "step": 6924 }, { "epoch": 22.704918032786885, "grad_norm": 8.185550689697266, "learning_rate": 1.803172936709278e-05, "loss": 1.2328, "step": 6925 }, { "epoch": 22.708196721311474, "grad_norm": 10.316000938415527, "learning_rate": 1.80310967069414e-05, "loss": 1.1665, "step": 6926 }, { "epoch": 22.711475409836066, "grad_norm": 9.787848472595215, "learning_rate": 1.803046395623105e-05, "loss": 1.1881, "step": 6927 }, { "epoch": 22.714754098360658, "grad_norm": 8.389756202697754, "learning_rate": 1.8029831114968872e-05, "loss": 1.2678, "step": 6928 }, { "epoch": 22.718032786885246, "grad_norm": 9.65555191040039, "learning_rate": 1.8029198183162e-05, "loss": 1.3313, "step": 6929 }, { "epoch": 22.721311475409838, "grad_norm": 13.07250690460205, "learning_rate": 1.8028565160817567e-05, "loss": 1.4453, "step": 6930 }, { "epoch": 22.724590163934426, "grad_norm": 6.401609420776367, "learning_rate": 1.8027932047942717e-05, "loss": 1.3123, "step": 6931 }, { "epoch": 22.727868852459018, "grad_norm": 7.277573108673096, "learning_rate": 1.8027298844544585e-05, "loss": 1.2693, "step": 6932 }, { "epoch": 22.731147540983606, "grad_norm": 10.686644554138184, "learning_rate": 1.802666555063031e-05, "loss": 1.3379, "step": 6933 }, { "epoch": 22.7344262295082, "grad_norm": 8.124231338500977, "learning_rate": 1.802603216620704e-05, "loss": 1.3972, "step": 6934 }, { "epoch": 22.737704918032787, "grad_norm": 8.319644927978516, "learning_rate": 1.802539869128191e-05, "loss": 1.2842, "step": 6935 }, { "epoch": 22.74098360655738, "grad_norm": 11.18932819366455, "learning_rate": 1.8024765125862064e-05, "loss": 1.5098, "step": 6936 }, { "epoch": 22.744262295081967, "grad_norm": 11.093052864074707, "learning_rate": 1.8024131469954652e-05, "loss": 1.1133, "step": 6937 }, { "epoch": 22.74754098360656, "grad_norm": 7.4072747230529785, "learning_rate": 1.802349772356681e-05, "loss": 1.2515, "step": 6938 }, { "epoch": 22.750819672131147, "grad_norm": 9.162259101867676, "learning_rate": 1.8022863886705692e-05, "loss": 1.2106, "step": 6939 }, { "epoch": 22.75409836065574, "grad_norm": 25.95260238647461, "learning_rate": 1.8022229959378438e-05, "loss": 1.1483, "step": 6940 }, { "epoch": 22.757377049180327, "grad_norm": 10.604398727416992, "learning_rate": 1.8021595941592206e-05, "loss": 1.1599, "step": 6941 }, { "epoch": 22.76065573770492, "grad_norm": 7.8548688888549805, "learning_rate": 1.8020961833354133e-05, "loss": 1.397, "step": 6942 }, { "epoch": 22.763934426229508, "grad_norm": 7.21918249130249, "learning_rate": 1.802032763467138e-05, "loss": 1.249, "step": 6943 }, { "epoch": 22.7672131147541, "grad_norm": 9.372376441955566, "learning_rate": 1.8019693345551093e-05, "loss": 1.3904, "step": 6944 }, { "epoch": 22.770491803278688, "grad_norm": 10.96058177947998, "learning_rate": 1.8019058966000426e-05, "loss": 1.4497, "step": 6945 }, { "epoch": 22.77377049180328, "grad_norm": 10.313456535339355, "learning_rate": 1.8018424496026528e-05, "loss": 1.3967, "step": 6946 }, { "epoch": 22.777049180327868, "grad_norm": 13.470430374145508, "learning_rate": 1.801778993563656e-05, "loss": 1.4716, "step": 6947 }, { "epoch": 22.78032786885246, "grad_norm": 8.7125244140625, "learning_rate": 1.8017155284837672e-05, "loss": 1.4222, "step": 6948 }, { "epoch": 22.78360655737705, "grad_norm": 9.973340034484863, "learning_rate": 1.8016520543637025e-05, "loss": 1.3694, "step": 6949 }, { "epoch": 22.78688524590164, "grad_norm": 10.46109390258789, "learning_rate": 1.801588571204177e-05, "loss": 1.2656, "step": 6950 }, { "epoch": 22.79016393442623, "grad_norm": 8.362810134887695, "learning_rate": 1.8015250790059075e-05, "loss": 1.3904, "step": 6951 }, { "epoch": 22.79344262295082, "grad_norm": 8.832826614379883, "learning_rate": 1.801461577769609e-05, "loss": 1.3242, "step": 6952 }, { "epoch": 22.79672131147541, "grad_norm": 10.545952796936035, "learning_rate": 1.8013980674959975e-05, "loss": 1.3928, "step": 6953 }, { "epoch": 22.8, "grad_norm": 14.22304630279541, "learning_rate": 1.8013345481857903e-05, "loss": 1.0702, "step": 6954 }, { "epoch": 22.80327868852459, "grad_norm": 8.835540771484375, "learning_rate": 1.8012710198397022e-05, "loss": 1.2532, "step": 6955 }, { "epoch": 22.80655737704918, "grad_norm": 8.099288940429688, "learning_rate": 1.801207482458451e-05, "loss": 1.106, "step": 6956 }, { "epoch": 22.80983606557377, "grad_norm": 8.999001502990723, "learning_rate": 1.8011439360427517e-05, "loss": 1.3667, "step": 6957 }, { "epoch": 22.81311475409836, "grad_norm": 13.26650333404541, "learning_rate": 1.8010803805933217e-05, "loss": 1.303, "step": 6958 }, { "epoch": 22.81639344262295, "grad_norm": 8.710965156555176, "learning_rate": 1.801016816110878e-05, "loss": 1.2788, "step": 6959 }, { "epoch": 22.81967213114754, "grad_norm": 7.6531219482421875, "learning_rate": 1.800953242596136e-05, "loss": 1.1096, "step": 6960 }, { "epoch": 22.82295081967213, "grad_norm": 10.425765991210938, "learning_rate": 1.8008896600498142e-05, "loss": 1.1393, "step": 6961 }, { "epoch": 22.82622950819672, "grad_norm": 11.334968566894531, "learning_rate": 1.8008260684726282e-05, "loss": 1.363, "step": 6962 }, { "epoch": 22.82950819672131, "grad_norm": 8.607468605041504, "learning_rate": 1.800762467865296e-05, "loss": 1.3635, "step": 6963 }, { "epoch": 22.832786885245902, "grad_norm": 8.577007293701172, "learning_rate": 1.800698858228534e-05, "loss": 1.1956, "step": 6964 }, { "epoch": 22.83606557377049, "grad_norm": 8.079314231872559, "learning_rate": 1.8006352395630604e-05, "loss": 1.1855, "step": 6965 }, { "epoch": 22.839344262295082, "grad_norm": 7.584301471710205, "learning_rate": 1.8005716118695916e-05, "loss": 1.345, "step": 6966 }, { "epoch": 22.84262295081967, "grad_norm": 9.48681354522705, "learning_rate": 1.8005079751488455e-05, "loss": 1.3787, "step": 6967 }, { "epoch": 22.845901639344262, "grad_norm": 7.293821334838867, "learning_rate": 1.8004443294015396e-05, "loss": 1.5581, "step": 6968 }, { "epoch": 22.84918032786885, "grad_norm": 7.9839558601379395, "learning_rate": 1.800380674628392e-05, "loss": 1.1664, "step": 6969 }, { "epoch": 22.852459016393443, "grad_norm": 7.856146335601807, "learning_rate": 1.8003170108301198e-05, "loss": 1.3157, "step": 6970 }, { "epoch": 22.855737704918035, "grad_norm": 8.837114334106445, "learning_rate": 1.8002533380074413e-05, "loss": 1.251, "step": 6971 }, { "epoch": 22.859016393442623, "grad_norm": 9.500969886779785, "learning_rate": 1.8001896561610746e-05, "loss": 1.2922, "step": 6972 }, { "epoch": 22.862295081967215, "grad_norm": 11.446029663085938, "learning_rate": 1.800125965291737e-05, "loss": 1.0726, "step": 6973 }, { "epoch": 22.865573770491803, "grad_norm": 9.778746604919434, "learning_rate": 1.8000622654001476e-05, "loss": 1.201, "step": 6974 }, { "epoch": 22.868852459016395, "grad_norm": 11.574994087219238, "learning_rate": 1.7999985564870243e-05, "loss": 1.1475, "step": 6975 }, { "epoch": 22.872131147540983, "grad_norm": 15.645569801330566, "learning_rate": 1.799934838553085e-05, "loss": 1.1083, "step": 6976 }, { "epoch": 22.875409836065575, "grad_norm": 12.682771682739258, "learning_rate": 1.7998711115990494e-05, "loss": 1.1907, "step": 6977 }, { "epoch": 22.878688524590164, "grad_norm": 8.735747337341309, "learning_rate": 1.799807375625635e-05, "loss": 1.4526, "step": 6978 }, { "epoch": 22.881967213114756, "grad_norm": 9.249789237976074, "learning_rate": 1.7997436306335608e-05, "loss": 1.2825, "step": 6979 }, { "epoch": 22.885245901639344, "grad_norm": 11.03038501739502, "learning_rate": 1.799679876623546e-05, "loss": 1.3555, "step": 6980 }, { "epoch": 22.888524590163936, "grad_norm": 8.950128555297852, "learning_rate": 1.7996161135963085e-05, "loss": 1.2581, "step": 6981 }, { "epoch": 22.891803278688524, "grad_norm": 9.900246620178223, "learning_rate": 1.7995523415525684e-05, "loss": 1.4399, "step": 6982 }, { "epoch": 22.895081967213116, "grad_norm": 7.974553108215332, "learning_rate": 1.799488560493044e-05, "loss": 1.3289, "step": 6983 }, { "epoch": 22.898360655737704, "grad_norm": 13.697607040405273, "learning_rate": 1.799424770418455e-05, "loss": 1.2842, "step": 6984 }, { "epoch": 22.901639344262296, "grad_norm": 9.421948432922363, "learning_rate": 1.7993609713295204e-05, "loss": 1.2812, "step": 6985 }, { "epoch": 22.904918032786885, "grad_norm": 7.707674503326416, "learning_rate": 1.7992971632269603e-05, "loss": 1.2977, "step": 6986 }, { "epoch": 22.908196721311477, "grad_norm": 11.944683074951172, "learning_rate": 1.799233346111493e-05, "loss": 1.1477, "step": 6987 }, { "epoch": 22.911475409836065, "grad_norm": 7.749465465545654, "learning_rate": 1.7991695199838388e-05, "loss": 1.2275, "step": 6988 }, { "epoch": 22.914754098360657, "grad_norm": 12.02343463897705, "learning_rate": 1.7991056848447175e-05, "loss": 1.231, "step": 6989 }, { "epoch": 22.918032786885245, "grad_norm": 10.903786659240723, "learning_rate": 1.7990418406948488e-05, "loss": 1.0649, "step": 6990 }, { "epoch": 22.921311475409837, "grad_norm": 8.772539138793945, "learning_rate": 1.7989779875349524e-05, "loss": 1.3452, "step": 6991 }, { "epoch": 22.924590163934425, "grad_norm": 12.924076080322266, "learning_rate": 1.7989141253657486e-05, "loss": 1.3796, "step": 6992 }, { "epoch": 22.927868852459017, "grad_norm": 9.46132755279541, "learning_rate": 1.798850254187957e-05, "loss": 1.3665, "step": 6993 }, { "epoch": 22.931147540983606, "grad_norm": 9.2204008102417, "learning_rate": 1.7987863740022985e-05, "loss": 1.1444, "step": 6994 }, { "epoch": 22.934426229508198, "grad_norm": 8.363957405090332, "learning_rate": 1.7987224848094932e-05, "loss": 1.3369, "step": 6995 }, { "epoch": 22.937704918032786, "grad_norm": 8.114672660827637, "learning_rate": 1.798658586610261e-05, "loss": 1.3315, "step": 6996 }, { "epoch": 22.940983606557378, "grad_norm": 12.337542533874512, "learning_rate": 1.7985946794053234e-05, "loss": 1.1521, "step": 6997 }, { "epoch": 22.944262295081966, "grad_norm": 8.157447814941406, "learning_rate": 1.7985307631954e-05, "loss": 1.0485, "step": 6998 }, { "epoch": 22.947540983606558, "grad_norm": 9.020703315734863, "learning_rate": 1.798466837981212e-05, "loss": 1.3903, "step": 6999 }, { "epoch": 22.950819672131146, "grad_norm": 17.837547302246094, "learning_rate": 1.7984029037634804e-05, "loss": 1.2803, "step": 7000 }, { "epoch": 22.95409836065574, "grad_norm": 8.894123077392578, "learning_rate": 1.798338960542926e-05, "loss": 1.3668, "step": 7001 }, { "epoch": 22.957377049180327, "grad_norm": 8.102474212646484, "learning_rate": 1.7982750083202698e-05, "loss": 1.3219, "step": 7002 }, { "epoch": 22.96065573770492, "grad_norm": 8.325446128845215, "learning_rate": 1.7982110470962325e-05, "loss": 1.0531, "step": 7003 }, { "epoch": 22.963934426229507, "grad_norm": 7.938252925872803, "learning_rate": 1.798147076871536e-05, "loss": 1.3115, "step": 7004 }, { "epoch": 22.9672131147541, "grad_norm": 7.1200666427612305, "learning_rate": 1.7980830976469015e-05, "loss": 1.4016, "step": 7005 }, { "epoch": 22.970491803278687, "grad_norm": 8.962170600891113, "learning_rate": 1.7980191094230497e-05, "loss": 1.2869, "step": 7006 }, { "epoch": 22.97377049180328, "grad_norm": 10.336400032043457, "learning_rate": 1.7979551122007035e-05, "loss": 1.3075, "step": 7007 }, { "epoch": 22.977049180327867, "grad_norm": 8.23541259765625, "learning_rate": 1.797891105980583e-05, "loss": 1.4902, "step": 7008 }, { "epoch": 22.98032786885246, "grad_norm": 6.986274242401123, "learning_rate": 1.797827090763411e-05, "loss": 1.3181, "step": 7009 }, { "epoch": 22.983606557377048, "grad_norm": 10.38596248626709, "learning_rate": 1.797763066549909e-05, "loss": 1.2683, "step": 7010 }, { "epoch": 22.98688524590164, "grad_norm": 7.999907493591309, "learning_rate": 1.797699033340799e-05, "loss": 1.366, "step": 7011 }, { "epoch": 22.990163934426228, "grad_norm": 8.320189476013184, "learning_rate": 1.797634991136803e-05, "loss": 1.2705, "step": 7012 }, { "epoch": 22.99344262295082, "grad_norm": 8.401735305786133, "learning_rate": 1.7975709399386432e-05, "loss": 1.2908, "step": 7013 }, { "epoch": 22.99672131147541, "grad_norm": 11.874272346496582, "learning_rate": 1.797506879747042e-05, "loss": 1.2109, "step": 7014 }, { "epoch": 23.0, "grad_norm": 19.53700065612793, "learning_rate": 1.797442810562721e-05, "loss": 1.105, "step": 7015 }, { "epoch": 23.003278688524592, "grad_norm": 7.2453413009643555, "learning_rate": 1.7973787323864035e-05, "loss": 1.0911, "step": 7016 }, { "epoch": 23.00655737704918, "grad_norm": 11.179036140441895, "learning_rate": 1.7973146452188114e-05, "loss": 1.2546, "step": 7017 }, { "epoch": 23.009836065573772, "grad_norm": 11.634724617004395, "learning_rate": 1.7972505490606682e-05, "loss": 1.1874, "step": 7018 }, { "epoch": 23.01311475409836, "grad_norm": 11.015631675720215, "learning_rate": 1.7971864439126957e-05, "loss": 1.1812, "step": 7019 }, { "epoch": 23.016393442622952, "grad_norm": 9.838406562805176, "learning_rate": 1.7971223297756172e-05, "loss": 1.4084, "step": 7020 }, { "epoch": 23.01967213114754, "grad_norm": 8.285629272460938, "learning_rate": 1.7970582066501557e-05, "loss": 1.2584, "step": 7021 }, { "epoch": 23.022950819672133, "grad_norm": 9.41459846496582, "learning_rate": 1.7969940745370344e-05, "loss": 1.0066, "step": 7022 }, { "epoch": 23.02622950819672, "grad_norm": 9.24980354309082, "learning_rate": 1.796929933436976e-05, "loss": 1.229, "step": 7023 }, { "epoch": 23.029508196721313, "grad_norm": 7.854450702667236, "learning_rate": 1.7968657833507043e-05, "loss": 1.2083, "step": 7024 }, { "epoch": 23.0327868852459, "grad_norm": 8.32956314086914, "learning_rate": 1.796801624278942e-05, "loss": 1.0447, "step": 7025 }, { "epoch": 23.036065573770493, "grad_norm": 11.411928176879883, "learning_rate": 1.796737456222413e-05, "loss": 1.181, "step": 7026 }, { "epoch": 23.03934426229508, "grad_norm": 16.227413177490234, "learning_rate": 1.796673279181841e-05, "loss": 1.1875, "step": 7027 }, { "epoch": 23.042622950819673, "grad_norm": 10.306917190551758, "learning_rate": 1.7966090931579493e-05, "loss": 1.2534, "step": 7028 }, { "epoch": 23.04590163934426, "grad_norm": 12.01530647277832, "learning_rate": 1.7965448981514617e-05, "loss": 1.2415, "step": 7029 }, { "epoch": 23.049180327868854, "grad_norm": 7.7121453285217285, "learning_rate": 1.7964806941631024e-05, "loss": 1.2968, "step": 7030 }, { "epoch": 23.052459016393442, "grad_norm": 7.593451976776123, "learning_rate": 1.796416481193595e-05, "loss": 1.1545, "step": 7031 }, { "epoch": 23.055737704918034, "grad_norm": 8.339926719665527, "learning_rate": 1.7963522592436638e-05, "loss": 1.3619, "step": 7032 }, { "epoch": 23.059016393442622, "grad_norm": 9.396197319030762, "learning_rate": 1.7962880283140328e-05, "loss": 1.2234, "step": 7033 }, { "epoch": 23.062295081967214, "grad_norm": 9.2750883102417, "learning_rate": 1.7962237884054264e-05, "loss": 1.5013, "step": 7034 }, { "epoch": 23.065573770491802, "grad_norm": 9.043614387512207, "learning_rate": 1.7961595395185685e-05, "loss": 1.3611, "step": 7035 }, { "epoch": 23.068852459016394, "grad_norm": 8.203218460083008, "learning_rate": 1.7960952816541847e-05, "loss": 1.152, "step": 7036 }, { "epoch": 23.072131147540983, "grad_norm": 13.277657508850098, "learning_rate": 1.796031014812998e-05, "loss": 1.2915, "step": 7037 }, { "epoch": 23.075409836065575, "grad_norm": 11.986470222473145, "learning_rate": 1.795966738995735e-05, "loss": 1.1888, "step": 7038 }, { "epoch": 23.078688524590163, "grad_norm": 6.97630500793457, "learning_rate": 1.7959024542031187e-05, "loss": 1.1824, "step": 7039 }, { "epoch": 23.081967213114755, "grad_norm": 7.265919208526611, "learning_rate": 1.795838160435875e-05, "loss": 1.2222, "step": 7040 }, { "epoch": 23.085245901639343, "grad_norm": 7.453884601593018, "learning_rate": 1.7957738576947283e-05, "loss": 1.1218, "step": 7041 }, { "epoch": 23.088524590163935, "grad_norm": 7.664487361907959, "learning_rate": 1.795709545980404e-05, "loss": 1.4075, "step": 7042 }, { "epoch": 23.091803278688523, "grad_norm": 9.828523635864258, "learning_rate": 1.7956452252936275e-05, "loss": 1.1042, "step": 7043 }, { "epoch": 23.095081967213115, "grad_norm": 8.882673263549805, "learning_rate": 1.7955808956351237e-05, "loss": 1.377, "step": 7044 }, { "epoch": 23.098360655737704, "grad_norm": 7.957663536071777, "learning_rate": 1.7955165570056184e-05, "loss": 1.1885, "step": 7045 }, { "epoch": 23.101639344262296, "grad_norm": 9.437053680419922, "learning_rate": 1.7954522094058363e-05, "loss": 1.3546, "step": 7046 }, { "epoch": 23.104918032786884, "grad_norm": 7.524319171905518, "learning_rate": 1.7953878528365035e-05, "loss": 1.1937, "step": 7047 }, { "epoch": 23.108196721311476, "grad_norm": 13.270665168762207, "learning_rate": 1.795323487298346e-05, "loss": 1.1577, "step": 7048 }, { "epoch": 23.111475409836064, "grad_norm": 7.688432216644287, "learning_rate": 1.795259112792089e-05, "loss": 1.2748, "step": 7049 }, { "epoch": 23.114754098360656, "grad_norm": 9.760720252990723, "learning_rate": 1.7951947293184587e-05, "loss": 1.0488, "step": 7050 }, { "epoch": 23.118032786885244, "grad_norm": 11.153817176818848, "learning_rate": 1.7951303368781808e-05, "loss": 1.4756, "step": 7051 }, { "epoch": 23.121311475409836, "grad_norm": 8.526394844055176, "learning_rate": 1.7950659354719823e-05, "loss": 1.2864, "step": 7052 }, { "epoch": 23.124590163934425, "grad_norm": 7.425290584564209, "learning_rate": 1.795001525100588e-05, "loss": 1.1824, "step": 7053 }, { "epoch": 23.127868852459017, "grad_norm": 8.932516098022461, "learning_rate": 1.7949371057647255e-05, "loss": 1.265, "step": 7054 }, { "epoch": 23.131147540983605, "grad_norm": 42.237300872802734, "learning_rate": 1.79487267746512e-05, "loss": 1.2627, "step": 7055 }, { "epoch": 23.134426229508197, "grad_norm": 8.444655418395996, "learning_rate": 1.794808240202499e-05, "loss": 1.2224, "step": 7056 }, { "epoch": 23.137704918032785, "grad_norm": 20.01763153076172, "learning_rate": 1.7947437939775887e-05, "loss": 1.1366, "step": 7057 }, { "epoch": 23.140983606557377, "grad_norm": 7.394561290740967, "learning_rate": 1.7946793387911156e-05, "loss": 1.1025, "step": 7058 }, { "epoch": 23.14426229508197, "grad_norm": 8.1972017288208, "learning_rate": 1.794614874643807e-05, "loss": 1.396, "step": 7059 }, { "epoch": 23.147540983606557, "grad_norm": 9.913368225097656, "learning_rate": 1.7945504015363894e-05, "loss": 1.2809, "step": 7060 }, { "epoch": 23.15081967213115, "grad_norm": 17.41218376159668, "learning_rate": 1.7944859194695896e-05, "loss": 1.2073, "step": 7061 }, { "epoch": 23.154098360655738, "grad_norm": 11.676180839538574, "learning_rate": 1.7944214284441353e-05, "loss": 0.9995, "step": 7062 }, { "epoch": 23.15737704918033, "grad_norm": 7.244586944580078, "learning_rate": 1.7943569284607533e-05, "loss": 1.2695, "step": 7063 }, { "epoch": 23.160655737704918, "grad_norm": 10.005070686340332, "learning_rate": 1.7942924195201707e-05, "loss": 1.3667, "step": 7064 }, { "epoch": 23.16393442622951, "grad_norm": 9.63204574584961, "learning_rate": 1.7942279016231156e-05, "loss": 1.3376, "step": 7065 }, { "epoch": 23.167213114754098, "grad_norm": 8.19906234741211, "learning_rate": 1.794163374770315e-05, "loss": 1.2302, "step": 7066 }, { "epoch": 23.17049180327869, "grad_norm": 11.169666290283203, "learning_rate": 1.7940988389624968e-05, "loss": 1.3547, "step": 7067 }, { "epoch": 23.17377049180328, "grad_norm": 9.998291969299316, "learning_rate": 1.7940342942003884e-05, "loss": 1.4854, "step": 7068 }, { "epoch": 23.17704918032787, "grad_norm": 13.33810043334961, "learning_rate": 1.7939697404847175e-05, "loss": 1.0889, "step": 7069 }, { "epoch": 23.18032786885246, "grad_norm": 8.848636627197266, "learning_rate": 1.7939051778162126e-05, "loss": 1.458, "step": 7070 }, { "epoch": 23.18360655737705, "grad_norm": 8.97909164428711, "learning_rate": 1.7938406061956012e-05, "loss": 1.3225, "step": 7071 }, { "epoch": 23.18688524590164, "grad_norm": 8.871482849121094, "learning_rate": 1.7937760256236117e-05, "loss": 1.3359, "step": 7072 }, { "epoch": 23.19016393442623, "grad_norm": 7.712123870849609, "learning_rate": 1.793711436100972e-05, "loss": 1.3225, "step": 7073 }, { "epoch": 23.19344262295082, "grad_norm": 8.570713996887207, "learning_rate": 1.793646837628411e-05, "loss": 0.8871, "step": 7074 }, { "epoch": 23.19672131147541, "grad_norm": 6.965730667114258, "learning_rate": 1.7935822302066564e-05, "loss": 1.3059, "step": 7075 }, { "epoch": 23.2, "grad_norm": 9.330788612365723, "learning_rate": 1.793517613836437e-05, "loss": 1.344, "step": 7076 }, { "epoch": 23.20327868852459, "grad_norm": 8.87459659576416, "learning_rate": 1.7934529885184817e-05, "loss": 1.2954, "step": 7077 }, { "epoch": 23.20655737704918, "grad_norm": 18.423757553100586, "learning_rate": 1.7933883542535185e-05, "loss": 0.9527, "step": 7078 }, { "epoch": 23.20983606557377, "grad_norm": 10.146336555480957, "learning_rate": 1.7933237110422773e-05, "loss": 1.0812, "step": 7079 }, { "epoch": 23.21311475409836, "grad_norm": 7.7730512619018555, "learning_rate": 1.793259058885486e-05, "loss": 1.2141, "step": 7080 }, { "epoch": 23.21639344262295, "grad_norm": 13.818502426147461, "learning_rate": 1.7931943977838742e-05, "loss": 1.1731, "step": 7081 }, { "epoch": 23.21967213114754, "grad_norm": 13.35306453704834, "learning_rate": 1.793129727738171e-05, "loss": 0.9668, "step": 7082 }, { "epoch": 23.222950819672132, "grad_norm": 8.222099304199219, "learning_rate": 1.7930650487491047e-05, "loss": 1.2321, "step": 7083 }, { "epoch": 23.22622950819672, "grad_norm": 9.777535438537598, "learning_rate": 1.7930003608174062e-05, "loss": 1.3176, "step": 7084 }, { "epoch": 23.229508196721312, "grad_norm": 10.567349433898926, "learning_rate": 1.792935663943804e-05, "loss": 1.4082, "step": 7085 }, { "epoch": 23.2327868852459, "grad_norm": 7.563667297363281, "learning_rate": 1.7928709581290276e-05, "loss": 1.2795, "step": 7086 }, { "epoch": 23.236065573770492, "grad_norm": 17.04847526550293, "learning_rate": 1.7928062433738065e-05, "loss": 1.0045, "step": 7087 }, { "epoch": 23.23934426229508, "grad_norm": 12.102298736572266, "learning_rate": 1.792741519678871e-05, "loss": 1.0725, "step": 7088 }, { "epoch": 23.242622950819673, "grad_norm": 9.35317325592041, "learning_rate": 1.7926767870449507e-05, "loss": 0.952, "step": 7089 }, { "epoch": 23.24590163934426, "grad_norm": 11.131791114807129, "learning_rate": 1.7926120454727753e-05, "loss": 1.2166, "step": 7090 }, { "epoch": 23.249180327868853, "grad_norm": 7.618372917175293, "learning_rate": 1.792547294963075e-05, "loss": 1.1531, "step": 7091 }, { "epoch": 23.25245901639344, "grad_norm": 8.318041801452637, "learning_rate": 1.79248253551658e-05, "loss": 1.4246, "step": 7092 }, { "epoch": 23.255737704918033, "grad_norm": 10.420082092285156, "learning_rate": 1.7924177671340205e-05, "loss": 1.1826, "step": 7093 }, { "epoch": 23.25901639344262, "grad_norm": 206.55276489257812, "learning_rate": 1.7923529898161263e-05, "loss": 1.3201, "step": 7094 }, { "epoch": 23.262295081967213, "grad_norm": 17.441314697265625, "learning_rate": 1.7922882035636287e-05, "loss": 1.0063, "step": 7095 }, { "epoch": 23.2655737704918, "grad_norm": 8.181283950805664, "learning_rate": 1.7922234083772577e-05, "loss": 1.1643, "step": 7096 }, { "epoch": 23.268852459016394, "grad_norm": 13.13608169555664, "learning_rate": 1.7921586042577442e-05, "loss": 1.0188, "step": 7097 }, { "epoch": 23.272131147540982, "grad_norm": 19.641298294067383, "learning_rate": 1.7920937912058187e-05, "loss": 1.1348, "step": 7098 }, { "epoch": 23.275409836065574, "grad_norm": 7.358631134033203, "learning_rate": 1.7920289692222123e-05, "loss": 1.3589, "step": 7099 }, { "epoch": 23.278688524590162, "grad_norm": 9.291637420654297, "learning_rate": 1.791964138307656e-05, "loss": 1.3867, "step": 7100 }, { "epoch": 23.281967213114754, "grad_norm": 10.98044204711914, "learning_rate": 1.7918992984628798e-05, "loss": 1.1222, "step": 7101 }, { "epoch": 23.285245901639342, "grad_norm": 12.338889122009277, "learning_rate": 1.791834449688616e-05, "loss": 1.2496, "step": 7102 }, { "epoch": 23.288524590163934, "grad_norm": 12.837870597839355, "learning_rate": 1.7917695919855957e-05, "loss": 1.0981, "step": 7103 }, { "epoch": 23.291803278688526, "grad_norm": 13.249223709106445, "learning_rate": 1.79170472535455e-05, "loss": 1.3337, "step": 7104 }, { "epoch": 23.295081967213115, "grad_norm": 9.070639610290527, "learning_rate": 1.79163984979621e-05, "loss": 1.2079, "step": 7105 }, { "epoch": 23.298360655737707, "grad_norm": 9.76421070098877, "learning_rate": 1.7915749653113078e-05, "loss": 1.3823, "step": 7106 }, { "epoch": 23.301639344262295, "grad_norm": 9.5735502243042, "learning_rate": 1.7915100719005748e-05, "loss": 1.3726, "step": 7107 }, { "epoch": 23.304918032786887, "grad_norm": 11.822087287902832, "learning_rate": 1.791445169564743e-05, "loss": 1.1639, "step": 7108 }, { "epoch": 23.308196721311475, "grad_norm": 8.12729549407959, "learning_rate": 1.791380258304544e-05, "loss": 1.1107, "step": 7109 }, { "epoch": 23.311475409836067, "grad_norm": 11.364545822143555, "learning_rate": 1.7913153381207095e-05, "loss": 1.3057, "step": 7110 }, { "epoch": 23.314754098360655, "grad_norm": 9.61807918548584, "learning_rate": 1.791250409013972e-05, "loss": 1.1254, "step": 7111 }, { "epoch": 23.318032786885247, "grad_norm": 9.216632843017578, "learning_rate": 1.791185470985063e-05, "loss": 1.3467, "step": 7112 }, { "epoch": 23.321311475409836, "grad_norm": 21.52824592590332, "learning_rate": 1.7911205240347157e-05, "loss": 1.0934, "step": 7113 }, { "epoch": 23.324590163934428, "grad_norm": 8.547080993652344, "learning_rate": 1.7910555681636616e-05, "loss": 1.2783, "step": 7114 }, { "epoch": 23.327868852459016, "grad_norm": 11.298737525939941, "learning_rate": 1.7909906033726337e-05, "loss": 1.257, "step": 7115 }, { "epoch": 23.331147540983608, "grad_norm": 11.707683563232422, "learning_rate": 1.790925629662364e-05, "loss": 1.2747, "step": 7116 }, { "epoch": 23.334426229508196, "grad_norm": 11.59877872467041, "learning_rate": 1.790860647033586e-05, "loss": 1.2668, "step": 7117 }, { "epoch": 23.337704918032788, "grad_norm": 17.038602828979492, "learning_rate": 1.790795655487032e-05, "loss": 1.1151, "step": 7118 }, { "epoch": 23.340983606557376, "grad_norm": 8.863365173339844, "learning_rate": 1.790730655023434e-05, "loss": 1.2532, "step": 7119 }, { "epoch": 23.34426229508197, "grad_norm": 20.581533432006836, "learning_rate": 1.7906656456435263e-05, "loss": 1.3018, "step": 7120 }, { "epoch": 23.347540983606557, "grad_norm": 10.980864524841309, "learning_rate": 1.790600627348041e-05, "loss": 1.1449, "step": 7121 }, { "epoch": 23.35081967213115, "grad_norm": 13.677046775817871, "learning_rate": 1.790535600137712e-05, "loss": 1.2613, "step": 7122 }, { "epoch": 23.354098360655737, "grad_norm": 9.286945343017578, "learning_rate": 1.7904705640132717e-05, "loss": 1.2531, "step": 7123 }, { "epoch": 23.35737704918033, "grad_norm": 10.921445846557617, "learning_rate": 1.7904055189754544e-05, "loss": 1.1846, "step": 7124 }, { "epoch": 23.360655737704917, "grad_norm": 12.288107872009277, "learning_rate": 1.790340465024993e-05, "loss": 1.2188, "step": 7125 }, { "epoch": 23.36393442622951, "grad_norm": 13.049298286437988, "learning_rate": 1.7902754021626206e-05, "loss": 1.3367, "step": 7126 }, { "epoch": 23.367213114754097, "grad_norm": 13.082582473754883, "learning_rate": 1.790210330389072e-05, "loss": 1.3235, "step": 7127 }, { "epoch": 23.37049180327869, "grad_norm": 49.10982131958008, "learning_rate": 1.79014524970508e-05, "loss": 1.3772, "step": 7128 }, { "epoch": 23.373770491803278, "grad_norm": 9.150348663330078, "learning_rate": 1.7900801601113786e-05, "loss": 1.373, "step": 7129 }, { "epoch": 23.37704918032787, "grad_norm": 11.241909980773926, "learning_rate": 1.790015061608702e-05, "loss": 1.3961, "step": 7130 }, { "epoch": 23.380327868852458, "grad_norm": 11.123283386230469, "learning_rate": 1.7899499541977844e-05, "loss": 1.3223, "step": 7131 }, { "epoch": 23.38360655737705, "grad_norm": 8.241081237792969, "learning_rate": 1.7898848378793595e-05, "loss": 1.3005, "step": 7132 }, { "epoch": 23.386885245901638, "grad_norm": 9.841570854187012, "learning_rate": 1.789819712654162e-05, "loss": 1.0688, "step": 7133 }, { "epoch": 23.39016393442623, "grad_norm": 14.279935836791992, "learning_rate": 1.789754578522926e-05, "loss": 1.5022, "step": 7134 }, { "epoch": 23.39344262295082, "grad_norm": 12.905267715454102, "learning_rate": 1.7896894354863855e-05, "loss": 1.1941, "step": 7135 }, { "epoch": 23.39672131147541, "grad_norm": 9.14857006072998, "learning_rate": 1.789624283545276e-05, "loss": 1.1602, "step": 7136 }, { "epoch": 23.4, "grad_norm": 10.60505199432373, "learning_rate": 1.7895591227003316e-05, "loss": 1.2737, "step": 7137 }, { "epoch": 23.40327868852459, "grad_norm": 9.550522804260254, "learning_rate": 1.789493952952287e-05, "loss": 1.2515, "step": 7138 }, { "epoch": 23.40655737704918, "grad_norm": 13.686518669128418, "learning_rate": 1.7894287743018775e-05, "loss": 1.2168, "step": 7139 }, { "epoch": 23.40983606557377, "grad_norm": 8.020023345947266, "learning_rate": 1.7893635867498378e-05, "loss": 1.2511, "step": 7140 }, { "epoch": 23.41311475409836, "grad_norm": 8.845495223999023, "learning_rate": 1.7892983902969028e-05, "loss": 1.2474, "step": 7141 }, { "epoch": 23.41639344262295, "grad_norm": 11.545717239379883, "learning_rate": 1.7892331849438077e-05, "loss": 1.3677, "step": 7142 }, { "epoch": 23.41967213114754, "grad_norm": 11.012598037719727, "learning_rate": 1.789167970691288e-05, "loss": 1.45, "step": 7143 }, { "epoch": 23.42295081967213, "grad_norm": 7.803735733032227, "learning_rate": 1.7891027475400785e-05, "loss": 1.2947, "step": 7144 }, { "epoch": 23.42622950819672, "grad_norm": 7.735674858093262, "learning_rate": 1.7890375154909155e-05, "loss": 1.0884, "step": 7145 }, { "epoch": 23.42950819672131, "grad_norm": 9.402568817138672, "learning_rate": 1.788972274544534e-05, "loss": 1.407, "step": 7146 }, { "epoch": 23.432786885245903, "grad_norm": 13.157989501953125, "learning_rate": 1.7889070247016697e-05, "loss": 1.3242, "step": 7147 }, { "epoch": 23.43606557377049, "grad_norm": 9.31546401977539, "learning_rate": 1.7888417659630587e-05, "loss": 1.2123, "step": 7148 }, { "epoch": 23.439344262295084, "grad_norm": 8.699505805969238, "learning_rate": 1.7887764983294365e-05, "loss": 1.1458, "step": 7149 }, { "epoch": 23.442622950819672, "grad_norm": 10.061485290527344, "learning_rate": 1.7887112218015387e-05, "loss": 1.1277, "step": 7150 }, { "epoch": 23.445901639344264, "grad_norm": 8.300249099731445, "learning_rate": 1.7886459363801024e-05, "loss": 1.3242, "step": 7151 }, { "epoch": 23.449180327868852, "grad_norm": 7.857874393463135, "learning_rate": 1.788580642065863e-05, "loss": 1.2024, "step": 7152 }, { "epoch": 23.452459016393444, "grad_norm": 7.064905643463135, "learning_rate": 1.788515338859557e-05, "loss": 1.4849, "step": 7153 }, { "epoch": 23.455737704918032, "grad_norm": 12.486347198486328, "learning_rate": 1.7884500267619204e-05, "loss": 1.1521, "step": 7154 }, { "epoch": 23.459016393442624, "grad_norm": 8.310063362121582, "learning_rate": 1.78838470577369e-05, "loss": 1.416, "step": 7155 }, { "epoch": 23.462295081967213, "grad_norm": 8.542635917663574, "learning_rate": 1.7883193758956028e-05, "loss": 1.3108, "step": 7156 }, { "epoch": 23.465573770491805, "grad_norm": 9.025233268737793, "learning_rate": 1.7882540371283946e-05, "loss": 1.1434, "step": 7157 }, { "epoch": 23.468852459016393, "grad_norm": 12.660050392150879, "learning_rate": 1.7881886894728028e-05, "loss": 1.2166, "step": 7158 }, { "epoch": 23.472131147540985, "grad_norm": 7.525148391723633, "learning_rate": 1.7881233329295637e-05, "loss": 1.3142, "step": 7159 }, { "epoch": 23.475409836065573, "grad_norm": 8.841891288757324, "learning_rate": 1.7880579674994147e-05, "loss": 1.3103, "step": 7160 }, { "epoch": 23.478688524590165, "grad_norm": 11.456666946411133, "learning_rate": 1.787992593183093e-05, "loss": 1.2998, "step": 7161 }, { "epoch": 23.481967213114753, "grad_norm": 7.10093355178833, "learning_rate": 1.7879272099813353e-05, "loss": 1.6064, "step": 7162 }, { "epoch": 23.485245901639345, "grad_norm": 13.157092094421387, "learning_rate": 1.787861817894879e-05, "loss": 1.443, "step": 7163 }, { "epoch": 23.488524590163934, "grad_norm": 8.199141502380371, "learning_rate": 1.7877964169244615e-05, "loss": 1.1047, "step": 7164 }, { "epoch": 23.491803278688526, "grad_norm": 13.0100736618042, "learning_rate": 1.7877310070708206e-05, "loss": 1.2004, "step": 7165 }, { "epoch": 23.495081967213114, "grad_norm": 12.164849281311035, "learning_rate": 1.787665588334693e-05, "loss": 1.1542, "step": 7166 }, { "epoch": 23.498360655737706, "grad_norm": 9.811907768249512, "learning_rate": 1.7876001607168178e-05, "loss": 1.187, "step": 7167 }, { "epoch": 23.501639344262294, "grad_norm": 18.100143432617188, "learning_rate": 1.787534724217931e-05, "loss": 1.1091, "step": 7168 }, { "epoch": 23.504918032786886, "grad_norm": 10.30068302154541, "learning_rate": 1.7874692788387718e-05, "loss": 1.1802, "step": 7169 }, { "epoch": 23.508196721311474, "grad_norm": 6.599440574645996, "learning_rate": 1.7874038245800775e-05, "loss": 1.3256, "step": 7170 }, { "epoch": 23.511475409836066, "grad_norm": 9.730841636657715, "learning_rate": 1.7873383614425866e-05, "loss": 1.1672, "step": 7171 }, { "epoch": 23.514754098360655, "grad_norm": 9.903806686401367, "learning_rate": 1.787272889427037e-05, "loss": 1.3914, "step": 7172 }, { "epoch": 23.518032786885247, "grad_norm": 8.842599868774414, "learning_rate": 1.787207408534167e-05, "loss": 1.0278, "step": 7173 }, { "epoch": 23.521311475409835, "grad_norm": 20.171838760375977, "learning_rate": 1.787141918764715e-05, "loss": 1.1455, "step": 7174 }, { "epoch": 23.524590163934427, "grad_norm": 9.661331176757812, "learning_rate": 1.7870764201194194e-05, "loss": 1.4634, "step": 7175 }, { "epoch": 23.527868852459015, "grad_norm": 17.004451751708984, "learning_rate": 1.7870109125990186e-05, "loss": 1.4069, "step": 7176 }, { "epoch": 23.531147540983607, "grad_norm": 7.68392276763916, "learning_rate": 1.786945396204252e-05, "loss": 1.0884, "step": 7177 }, { "epoch": 23.534426229508195, "grad_norm": 9.76550006866455, "learning_rate": 1.7868798709358572e-05, "loss": 1.2194, "step": 7178 }, { "epoch": 23.537704918032787, "grad_norm": 7.88798189163208, "learning_rate": 1.7868143367945742e-05, "loss": 1.3037, "step": 7179 }, { "epoch": 23.540983606557376, "grad_norm": 8.196422576904297, "learning_rate": 1.7867487937811412e-05, "loss": 1.4009, "step": 7180 }, { "epoch": 23.544262295081968, "grad_norm": 7.593638896942139, "learning_rate": 1.7866832418962978e-05, "loss": 1.3601, "step": 7181 }, { "epoch": 23.547540983606556, "grad_norm": 8.113970756530762, "learning_rate": 1.786617681140783e-05, "loss": 1.3406, "step": 7182 }, { "epoch": 23.550819672131148, "grad_norm": 35.9350471496582, "learning_rate": 1.7865521115153354e-05, "loss": 1.1943, "step": 7183 }, { "epoch": 23.554098360655736, "grad_norm": 13.112014770507812, "learning_rate": 1.786486533020695e-05, "loss": 1.3816, "step": 7184 }, { "epoch": 23.557377049180328, "grad_norm": 12.03991985321045, "learning_rate": 1.786420945657602e-05, "loss": 0.8204, "step": 7185 }, { "epoch": 23.560655737704916, "grad_norm": 8.30135440826416, "learning_rate": 1.786355349426794e-05, "loss": 1.3855, "step": 7186 }, { "epoch": 23.56393442622951, "grad_norm": 8.48781967163086, "learning_rate": 1.786289744329013e-05, "loss": 1.0833, "step": 7187 }, { "epoch": 23.567213114754097, "grad_norm": 9.278565406799316, "learning_rate": 1.7862241303649967e-05, "loss": 1.1669, "step": 7188 }, { "epoch": 23.57049180327869, "grad_norm": 7.683532238006592, "learning_rate": 1.786158507535486e-05, "loss": 1.3835, "step": 7189 }, { "epoch": 23.57377049180328, "grad_norm": 15.561071395874023, "learning_rate": 1.786092875841221e-05, "loss": 1.0352, "step": 7190 }, { "epoch": 23.57704918032787, "grad_norm": 6.838441371917725, "learning_rate": 1.7860272352829415e-05, "loss": 1.3845, "step": 7191 }, { "epoch": 23.58032786885246, "grad_norm": 13.237520217895508, "learning_rate": 1.785961585861388e-05, "loss": 1.1158, "step": 7192 }, { "epoch": 23.58360655737705, "grad_norm": 8.732830047607422, "learning_rate": 1.7858959275772997e-05, "loss": 1.254, "step": 7193 }, { "epoch": 23.58688524590164, "grad_norm": 13.60378360748291, "learning_rate": 1.7858302604314183e-05, "loss": 1.2668, "step": 7194 }, { "epoch": 23.59016393442623, "grad_norm": 7.927389144897461, "learning_rate": 1.785764584424483e-05, "loss": 1.4941, "step": 7195 }, { "epoch": 23.59344262295082, "grad_norm": 8.538721084594727, "learning_rate": 1.7856988995572354e-05, "loss": 1.2637, "step": 7196 }, { "epoch": 23.59672131147541, "grad_norm": 8.670414924621582, "learning_rate": 1.785633205830416e-05, "loss": 1.2239, "step": 7197 }, { "epoch": 23.6, "grad_norm": 8.200368881225586, "learning_rate": 1.7855675032447648e-05, "loss": 1.2021, "step": 7198 }, { "epoch": 23.60327868852459, "grad_norm": 9.164191246032715, "learning_rate": 1.7855017918010237e-05, "loss": 1.3657, "step": 7199 }, { "epoch": 23.60655737704918, "grad_norm": 8.402499198913574, "learning_rate": 1.785436071499933e-05, "loss": 1.2441, "step": 7200 }, { "epoch": 23.60983606557377, "grad_norm": 6.582836151123047, "learning_rate": 1.7853703423422337e-05, "loss": 1.1221, "step": 7201 }, { "epoch": 23.613114754098362, "grad_norm": 10.405816078186035, "learning_rate": 1.7853046043286676e-05, "loss": 1.2183, "step": 7202 }, { "epoch": 23.61639344262295, "grad_norm": 7.364508628845215, "learning_rate": 1.7852388574599754e-05, "loss": 0.9725, "step": 7203 }, { "epoch": 23.619672131147542, "grad_norm": 8.438652038574219, "learning_rate": 1.7851731017368985e-05, "loss": 1.2529, "step": 7204 }, { "epoch": 23.62295081967213, "grad_norm": 9.7052640914917, "learning_rate": 1.7851073371601786e-05, "loss": 1.2201, "step": 7205 }, { "epoch": 23.626229508196722, "grad_norm": 6.441378116607666, "learning_rate": 1.7850415637305572e-05, "loss": 1.2953, "step": 7206 }, { "epoch": 23.62950819672131, "grad_norm": 12.087183952331543, "learning_rate": 1.784975781448776e-05, "loss": 0.9912, "step": 7207 }, { "epoch": 23.632786885245903, "grad_norm": 8.549850463867188, "learning_rate": 1.784909990315577e-05, "loss": 1.314, "step": 7208 }, { "epoch": 23.63606557377049, "grad_norm": 7.04699182510376, "learning_rate": 1.784844190331701e-05, "loss": 1.3289, "step": 7209 }, { "epoch": 23.639344262295083, "grad_norm": 6.795557022094727, "learning_rate": 1.7847783814978916e-05, "loss": 1.3204, "step": 7210 }, { "epoch": 23.64262295081967, "grad_norm": 7.245025157928467, "learning_rate": 1.7847125638148892e-05, "loss": 1.1619, "step": 7211 }, { "epoch": 23.645901639344263, "grad_norm": 12.24201774597168, "learning_rate": 1.784646737283437e-05, "loss": 1.3043, "step": 7212 }, { "epoch": 23.64918032786885, "grad_norm": 6.269240379333496, "learning_rate": 1.7845809019042774e-05, "loss": 1.3792, "step": 7213 }, { "epoch": 23.652459016393443, "grad_norm": 10.507488250732422, "learning_rate": 1.784515057678152e-05, "loss": 1.0801, "step": 7214 }, { "epoch": 23.65573770491803, "grad_norm": 9.173726081848145, "learning_rate": 1.7844492046058036e-05, "loss": 1.2983, "step": 7215 }, { "epoch": 23.659016393442624, "grad_norm": 7.800487995147705, "learning_rate": 1.7843833426879747e-05, "loss": 1.4117, "step": 7216 }, { "epoch": 23.662295081967212, "grad_norm": 9.034647941589355, "learning_rate": 1.7843174719254084e-05, "loss": 1.2778, "step": 7217 }, { "epoch": 23.665573770491804, "grad_norm": 12.160881042480469, "learning_rate": 1.784251592318847e-05, "loss": 0.9998, "step": 7218 }, { "epoch": 23.668852459016392, "grad_norm": 10.606008529663086, "learning_rate": 1.7841857038690332e-05, "loss": 1.2039, "step": 7219 }, { "epoch": 23.672131147540984, "grad_norm": 10.672924995422363, "learning_rate": 1.7841198065767107e-05, "loss": 1.111, "step": 7220 }, { "epoch": 23.675409836065572, "grad_norm": 7.257762432098389, "learning_rate": 1.7840539004426218e-05, "loss": 1.3788, "step": 7221 }, { "epoch": 23.678688524590164, "grad_norm": 8.364612579345703, "learning_rate": 1.7839879854675103e-05, "loss": 1.2712, "step": 7222 }, { "epoch": 23.681967213114753, "grad_norm": 9.074821472167969, "learning_rate": 1.7839220616521186e-05, "loss": 1.1533, "step": 7223 }, { "epoch": 23.685245901639345, "grad_norm": 8.156413078308105, "learning_rate": 1.783856128997191e-05, "loss": 1.3081, "step": 7224 }, { "epoch": 23.688524590163933, "grad_norm": 11.481118202209473, "learning_rate": 1.78379018750347e-05, "loss": 1.223, "step": 7225 }, { "epoch": 23.691803278688525, "grad_norm": 8.184572219848633, "learning_rate": 1.7837242371717e-05, "loss": 1.3518, "step": 7226 }, { "epoch": 23.695081967213113, "grad_norm": 8.92507266998291, "learning_rate": 1.7836582780026246e-05, "loss": 1.2067, "step": 7227 }, { "epoch": 23.698360655737705, "grad_norm": 7.703985214233398, "learning_rate": 1.783592309996987e-05, "loss": 1.2803, "step": 7228 }, { "epoch": 23.701639344262293, "grad_norm": 8.220107078552246, "learning_rate": 1.7835263331555317e-05, "loss": 1.0128, "step": 7229 }, { "epoch": 23.704918032786885, "grad_norm": 7.824337959289551, "learning_rate": 1.783460347479002e-05, "loss": 1.4137, "step": 7230 }, { "epoch": 23.708196721311474, "grad_norm": 7.374887466430664, "learning_rate": 1.7833943529681425e-05, "loss": 0.9852, "step": 7231 }, { "epoch": 23.711475409836066, "grad_norm": 9.53270435333252, "learning_rate": 1.7833283496236974e-05, "loss": 1.1799, "step": 7232 }, { "epoch": 23.714754098360658, "grad_norm": 8.643567085266113, "learning_rate": 1.7832623374464103e-05, "loss": 1.2385, "step": 7233 }, { "epoch": 23.718032786885246, "grad_norm": 8.862717628479004, "learning_rate": 1.7831963164370257e-05, "loss": 1.1282, "step": 7234 }, { "epoch": 23.721311475409838, "grad_norm": 7.7189741134643555, "learning_rate": 1.783130286596289e-05, "loss": 1.1968, "step": 7235 }, { "epoch": 23.724590163934426, "grad_norm": 8.405766487121582, "learning_rate": 1.7830642479249436e-05, "loss": 1.3022, "step": 7236 }, { "epoch": 23.727868852459018, "grad_norm": 13.537520408630371, "learning_rate": 1.7829982004237348e-05, "loss": 0.9459, "step": 7237 }, { "epoch": 23.731147540983606, "grad_norm": 9.134112358093262, "learning_rate": 1.782932144093407e-05, "loss": 1.2491, "step": 7238 }, { "epoch": 23.7344262295082, "grad_norm": 9.196579933166504, "learning_rate": 1.7828660789347054e-05, "loss": 1.1881, "step": 7239 }, { "epoch": 23.737704918032787, "grad_norm": 9.884831428527832, "learning_rate": 1.7828000049483745e-05, "loss": 1.208, "step": 7240 }, { "epoch": 23.74098360655738, "grad_norm": 7.859843730926514, "learning_rate": 1.7827339221351598e-05, "loss": 1.1904, "step": 7241 }, { "epoch": 23.744262295081967, "grad_norm": 10.08886432647705, "learning_rate": 1.7826678304958065e-05, "loss": 1.2144, "step": 7242 }, { "epoch": 23.74754098360656, "grad_norm": 8.539373397827148, "learning_rate": 1.7826017300310593e-05, "loss": 1.0369, "step": 7243 }, { "epoch": 23.750819672131147, "grad_norm": 13.418244361877441, "learning_rate": 1.782535620741664e-05, "loss": 1.2847, "step": 7244 }, { "epoch": 23.75409836065574, "grad_norm": 7.491549968719482, "learning_rate": 1.782469502628366e-05, "loss": 1.2317, "step": 7245 }, { "epoch": 23.757377049180327, "grad_norm": 7.774018287658691, "learning_rate": 1.7824033756919112e-05, "loss": 1.3423, "step": 7246 }, { "epoch": 23.76065573770492, "grad_norm": 36.11772918701172, "learning_rate": 1.7823372399330443e-05, "loss": 1.3793, "step": 7247 }, { "epoch": 23.763934426229508, "grad_norm": 10.652432441711426, "learning_rate": 1.782271095352512e-05, "loss": 1.0467, "step": 7248 }, { "epoch": 23.7672131147541, "grad_norm": 8.885942459106445, "learning_rate": 1.7822049419510594e-05, "loss": 1.2261, "step": 7249 }, { "epoch": 23.770491803278688, "grad_norm": 9.416556358337402, "learning_rate": 1.782138779729433e-05, "loss": 1.1643, "step": 7250 }, { "epoch": 23.77377049180328, "grad_norm": 9.170167922973633, "learning_rate": 1.7820726086883783e-05, "loss": 1.3816, "step": 7251 }, { "epoch": 23.777049180327868, "grad_norm": 7.611317157745361, "learning_rate": 1.782006428828642e-05, "loss": 1.4067, "step": 7252 }, { "epoch": 23.78032786885246, "grad_norm": 12.977231979370117, "learning_rate": 1.7819402401509703e-05, "loss": 1.1541, "step": 7253 }, { "epoch": 23.78360655737705, "grad_norm": 7.706293106079102, "learning_rate": 1.781874042656109e-05, "loss": 1.3855, "step": 7254 }, { "epoch": 23.78688524590164, "grad_norm": 9.917338371276855, "learning_rate": 1.7818078363448053e-05, "loss": 1.3525, "step": 7255 }, { "epoch": 23.79016393442623, "grad_norm": 7.855100154876709, "learning_rate": 1.781741621217805e-05, "loss": 1.2993, "step": 7256 }, { "epoch": 23.79344262295082, "grad_norm": 8.304184913635254, "learning_rate": 1.7816753972758552e-05, "loss": 1.0913, "step": 7257 }, { "epoch": 23.79672131147541, "grad_norm": 7.606873035430908, "learning_rate": 1.781609164519703e-05, "loss": 1.3555, "step": 7258 }, { "epoch": 23.8, "grad_norm": 10.213632583618164, "learning_rate": 1.7815429229500946e-05, "loss": 1.6592, "step": 7259 }, { "epoch": 23.80327868852459, "grad_norm": 12.5699462890625, "learning_rate": 1.781476672567777e-05, "loss": 1.3555, "step": 7260 }, { "epoch": 23.80655737704918, "grad_norm": 13.64445972442627, "learning_rate": 1.7814104133734976e-05, "loss": 1.3192, "step": 7261 }, { "epoch": 23.80983606557377, "grad_norm": 11.111879348754883, "learning_rate": 1.781344145368003e-05, "loss": 1.217, "step": 7262 }, { "epoch": 23.81311475409836, "grad_norm": 9.244961738586426, "learning_rate": 1.781277868552041e-05, "loss": 1.4797, "step": 7263 }, { "epoch": 23.81639344262295, "grad_norm": 9.403325080871582, "learning_rate": 1.7812115829263585e-05, "loss": 0.9565, "step": 7264 }, { "epoch": 23.81967213114754, "grad_norm": 9.68964958190918, "learning_rate": 1.781145288491703e-05, "loss": 1.183, "step": 7265 }, { "epoch": 23.82295081967213, "grad_norm": 7.068607807159424, "learning_rate": 1.7810789852488225e-05, "loss": 1.335, "step": 7266 }, { "epoch": 23.82622950819672, "grad_norm": 7.075607776641846, "learning_rate": 1.781012673198464e-05, "loss": 1.2539, "step": 7267 }, { "epoch": 23.82950819672131, "grad_norm": 8.418706893920898, "learning_rate": 1.7809463523413762e-05, "loss": 1.2759, "step": 7268 }, { "epoch": 23.832786885245902, "grad_norm": 6.655417442321777, "learning_rate": 1.7808800226783058e-05, "loss": 1.2102, "step": 7269 }, { "epoch": 23.83606557377049, "grad_norm": 7.334166049957275, "learning_rate": 1.7808136842100015e-05, "loss": 1.3563, "step": 7270 }, { "epoch": 23.839344262295082, "grad_norm": 6.117486953735352, "learning_rate": 1.780747336937211e-05, "loss": 1.4899, "step": 7271 }, { "epoch": 23.84262295081967, "grad_norm": 8.02966022491455, "learning_rate": 1.7806809808606825e-05, "loss": 1.2827, "step": 7272 }, { "epoch": 23.845901639344262, "grad_norm": 6.5798163414001465, "learning_rate": 1.780614615981164e-05, "loss": 1.4963, "step": 7273 }, { "epoch": 23.84918032786885, "grad_norm": 10.038601875305176, "learning_rate": 1.7805482422994042e-05, "loss": 1.0457, "step": 7274 }, { "epoch": 23.852459016393443, "grad_norm": 12.627583503723145, "learning_rate": 1.7804818598161517e-05, "loss": 1.2994, "step": 7275 }, { "epoch": 23.855737704918035, "grad_norm": 9.618637084960938, "learning_rate": 1.7804154685321543e-05, "loss": 1.1836, "step": 7276 }, { "epoch": 23.859016393442623, "grad_norm": 8.614688873291016, "learning_rate": 1.780349068448161e-05, "loss": 1.3064, "step": 7277 }, { "epoch": 23.862295081967215, "grad_norm": 8.416584014892578, "learning_rate": 1.780282659564921e-05, "loss": 1.2506, "step": 7278 }, { "epoch": 23.865573770491803, "grad_norm": 10.210958480834961, "learning_rate": 1.7802162418831826e-05, "loss": 1.3213, "step": 7279 }, { "epoch": 23.868852459016395, "grad_norm": 8.076135635375977, "learning_rate": 1.7801498154036946e-05, "loss": 1.2338, "step": 7280 }, { "epoch": 23.872131147540983, "grad_norm": 9.938669204711914, "learning_rate": 1.7800833801272064e-05, "loss": 1.2462, "step": 7281 }, { "epoch": 23.875409836065575, "grad_norm": 7.899815082550049, "learning_rate": 1.780016936054467e-05, "loss": 1.0093, "step": 7282 }, { "epoch": 23.878688524590164, "grad_norm": 7.760792255401611, "learning_rate": 1.779950483186226e-05, "loss": 1.0106, "step": 7283 }, { "epoch": 23.881967213114756, "grad_norm": 7.339454650878906, "learning_rate": 1.779884021523232e-05, "loss": 1.2588, "step": 7284 }, { "epoch": 23.885245901639344, "grad_norm": 8.527061462402344, "learning_rate": 1.779817551066235e-05, "loss": 1.3508, "step": 7285 }, { "epoch": 23.888524590163936, "grad_norm": 8.070666313171387, "learning_rate": 1.779751071815984e-05, "loss": 1.2007, "step": 7286 }, { "epoch": 23.891803278688524, "grad_norm": 6.537155628204346, "learning_rate": 1.779684583773229e-05, "loss": 1.3627, "step": 7287 }, { "epoch": 23.895081967213116, "grad_norm": 8.648849487304688, "learning_rate": 1.77961808693872e-05, "loss": 1.1152, "step": 7288 }, { "epoch": 23.898360655737704, "grad_norm": 8.73868179321289, "learning_rate": 1.7795515813132063e-05, "loss": 1.1394, "step": 7289 }, { "epoch": 23.901639344262296, "grad_norm": 9.667346954345703, "learning_rate": 1.7794850668974378e-05, "loss": 1.2549, "step": 7290 }, { "epoch": 23.904918032786885, "grad_norm": 9.565971374511719, "learning_rate": 1.7794185436921646e-05, "loss": 1.3333, "step": 7291 }, { "epoch": 23.908196721311477, "grad_norm": 6.6044487953186035, "learning_rate": 1.7793520116981372e-05, "loss": 1.233, "step": 7292 }, { "epoch": 23.911475409836065, "grad_norm": 8.773494720458984, "learning_rate": 1.7792854709161057e-05, "loss": 1.2979, "step": 7293 }, { "epoch": 23.914754098360657, "grad_norm": 8.013781547546387, "learning_rate": 1.7792189213468195e-05, "loss": 1.405, "step": 7294 }, { "epoch": 23.918032786885245, "grad_norm": 8.663928031921387, "learning_rate": 1.7791523629910305e-05, "loss": 1.032, "step": 7295 }, { "epoch": 23.921311475409837, "grad_norm": 15.488967895507812, "learning_rate": 1.7790857958494885e-05, "loss": 1.2323, "step": 7296 }, { "epoch": 23.924590163934425, "grad_norm": 5.741009712219238, "learning_rate": 1.779019219922944e-05, "loss": 1.3726, "step": 7297 }, { "epoch": 23.927868852459017, "grad_norm": 7.862154483795166, "learning_rate": 1.7789526352121477e-05, "loss": 1.1459, "step": 7298 }, { "epoch": 23.931147540983606, "grad_norm": 8.71338939666748, "learning_rate": 1.7788860417178508e-05, "loss": 1.3909, "step": 7299 }, { "epoch": 23.934426229508198, "grad_norm": 13.434000968933105, "learning_rate": 1.7788194394408034e-05, "loss": 1.3491, "step": 7300 }, { "epoch": 23.937704918032786, "grad_norm": 9.061213493347168, "learning_rate": 1.7787528283817575e-05, "loss": 1.1956, "step": 7301 }, { "epoch": 23.940983606557378, "grad_norm": 7.613790512084961, "learning_rate": 1.7786862085414633e-05, "loss": 1.2434, "step": 7302 }, { "epoch": 23.944262295081966, "grad_norm": 6.4997239112854, "learning_rate": 1.778619579920673e-05, "loss": 1.2778, "step": 7303 }, { "epoch": 23.947540983606558, "grad_norm": 6.8585638999938965, "learning_rate": 1.778552942520137e-05, "loss": 1.1929, "step": 7304 }, { "epoch": 23.950819672131146, "grad_norm": 9.744657516479492, "learning_rate": 1.7784862963406075e-05, "loss": 1.0652, "step": 7305 }, { "epoch": 23.95409836065574, "grad_norm": 8.092781066894531, "learning_rate": 1.7784196413828352e-05, "loss": 1.2756, "step": 7306 }, { "epoch": 23.957377049180327, "grad_norm": 8.194183349609375, "learning_rate": 1.7783529776475722e-05, "loss": 1.1638, "step": 7307 }, { "epoch": 23.96065573770492, "grad_norm": 7.034293174743652, "learning_rate": 1.77828630513557e-05, "loss": 1.329, "step": 7308 }, { "epoch": 23.963934426229507, "grad_norm": 8.094721794128418, "learning_rate": 1.7782196238475807e-05, "loss": 0.8629, "step": 7309 }, { "epoch": 23.9672131147541, "grad_norm": 7.551657676696777, "learning_rate": 1.778152933784356e-05, "loss": 1.3225, "step": 7310 }, { "epoch": 23.970491803278687, "grad_norm": 16.515050888061523, "learning_rate": 1.7780862349466475e-05, "loss": 1.3325, "step": 7311 }, { "epoch": 23.97377049180328, "grad_norm": 7.51099157333374, "learning_rate": 1.778019527335208e-05, "loss": 1.1035, "step": 7312 }, { "epoch": 23.977049180327867, "grad_norm": 9.322549819946289, "learning_rate": 1.7779528109507894e-05, "loss": 1.4619, "step": 7313 }, { "epoch": 23.98032786885246, "grad_norm": 9.549121856689453, "learning_rate": 1.777886085794144e-05, "loss": 1.0947, "step": 7314 }, { "epoch": 23.983606557377048, "grad_norm": 8.529422760009766, "learning_rate": 1.7778193518660242e-05, "loss": 1.4297, "step": 7315 }, { "epoch": 23.98688524590164, "grad_norm": 7.524857521057129, "learning_rate": 1.7777526091671823e-05, "loss": 1.1499, "step": 7316 }, { "epoch": 23.990163934426228, "grad_norm": 7.865795612335205, "learning_rate": 1.7776858576983713e-05, "loss": 1.1956, "step": 7317 }, { "epoch": 23.99344262295082, "grad_norm": 9.88709831237793, "learning_rate": 1.7776190974603435e-05, "loss": 1.283, "step": 7318 }, { "epoch": 23.99672131147541, "grad_norm": 7.957291126251221, "learning_rate": 1.7775523284538522e-05, "loss": 1.3381, "step": 7319 }, { "epoch": 24.0, "grad_norm": 9.14069938659668, "learning_rate": 1.7774855506796497e-05, "loss": 1.064, "step": 7320 }, { "epoch": 24.003278688524592, "grad_norm": 18.424217224121094, "learning_rate": 1.777418764138489e-05, "loss": 1.1229, "step": 7321 }, { "epoch": 24.00655737704918, "grad_norm": 9.186418533325195, "learning_rate": 1.7773519688311235e-05, "loss": 1.3083, "step": 7322 }, { "epoch": 24.009836065573772, "grad_norm": 7.757462501525879, "learning_rate": 1.7772851647583068e-05, "loss": 1.3608, "step": 7323 }, { "epoch": 24.01311475409836, "grad_norm": 10.972811698913574, "learning_rate": 1.777218351920791e-05, "loss": 1.0846, "step": 7324 }, { "epoch": 24.016393442622952, "grad_norm": 10.17823600769043, "learning_rate": 1.7771515303193304e-05, "loss": 0.9537, "step": 7325 }, { "epoch": 24.01967213114754, "grad_norm": 38.40754318237305, "learning_rate": 1.7770846999546784e-05, "loss": 1.09, "step": 7326 }, { "epoch": 24.022950819672133, "grad_norm": 8.476258277893066, "learning_rate": 1.7770178608275885e-05, "loss": 1.3728, "step": 7327 }, { "epoch": 24.02622950819672, "grad_norm": 7.767606735229492, "learning_rate": 1.7769510129388142e-05, "loss": 1.241, "step": 7328 }, { "epoch": 24.029508196721313, "grad_norm": 7.079670429229736, "learning_rate": 1.7768841562891094e-05, "loss": 1.1709, "step": 7329 }, { "epoch": 24.0327868852459, "grad_norm": 7.635005950927734, "learning_rate": 1.776817290879228e-05, "loss": 1.0497, "step": 7330 }, { "epoch": 24.036065573770493, "grad_norm": 6.963351726531982, "learning_rate": 1.7767504167099238e-05, "loss": 1.1038, "step": 7331 }, { "epoch": 24.03934426229508, "grad_norm": 7.912164688110352, "learning_rate": 1.7766835337819514e-05, "loss": 1.0896, "step": 7332 }, { "epoch": 24.042622950819673, "grad_norm": 9.39943790435791, "learning_rate": 1.7766166420960643e-05, "loss": 1.179, "step": 7333 }, { "epoch": 24.04590163934426, "grad_norm": 7.245376110076904, "learning_rate": 1.7765497416530173e-05, "loss": 1.0898, "step": 7334 }, { "epoch": 24.049180327868854, "grad_norm": 6.771124839782715, "learning_rate": 1.7764828324535645e-05, "loss": 1.0569, "step": 7335 }, { "epoch": 24.052459016393442, "grad_norm": 6.727597236633301, "learning_rate": 1.7764159144984603e-05, "loss": 1.1106, "step": 7336 }, { "epoch": 24.055737704918034, "grad_norm": 7.106828689575195, "learning_rate": 1.7763489877884598e-05, "loss": 1.1622, "step": 7337 }, { "epoch": 24.059016393442622, "grad_norm": 7.931841850280762, "learning_rate": 1.776282052324317e-05, "loss": 1.236, "step": 7338 }, { "epoch": 24.062295081967214, "grad_norm": 6.147015571594238, "learning_rate": 1.7762151081067863e-05, "loss": 1.2399, "step": 7339 }, { "epoch": 24.065573770491802, "grad_norm": 10.616979598999023, "learning_rate": 1.7761481551366238e-05, "loss": 1.2839, "step": 7340 }, { "epoch": 24.068852459016394, "grad_norm": 7.860185146331787, "learning_rate": 1.7760811934145842e-05, "loss": 1.1558, "step": 7341 }, { "epoch": 24.072131147540983, "grad_norm": 7.0898613929748535, "learning_rate": 1.7760142229414213e-05, "loss": 1.2217, "step": 7342 }, { "epoch": 24.075409836065575, "grad_norm": 8.67089557647705, "learning_rate": 1.775947243717892e-05, "loss": 1.3254, "step": 7343 }, { "epoch": 24.078688524590163, "grad_norm": 12.041911125183105, "learning_rate": 1.7758802557447503e-05, "loss": 1.364, "step": 7344 }, { "epoch": 24.081967213114755, "grad_norm": 8.75302505493164, "learning_rate": 1.7758132590227522e-05, "loss": 1.2532, "step": 7345 }, { "epoch": 24.085245901639343, "grad_norm": 9.326406478881836, "learning_rate": 1.7757462535526532e-05, "loss": 0.9501, "step": 7346 }, { "epoch": 24.088524590163935, "grad_norm": 8.29487133026123, "learning_rate": 1.775679239335208e-05, "loss": 1.1102, "step": 7347 }, { "epoch": 24.091803278688523, "grad_norm": 7.6472601890563965, "learning_rate": 1.7756122163711734e-05, "loss": 1.259, "step": 7348 }, { "epoch": 24.095081967213115, "grad_norm": 9.019111633300781, "learning_rate": 1.7755451846613045e-05, "loss": 1.3113, "step": 7349 }, { "epoch": 24.098360655737704, "grad_norm": 8.216048240661621, "learning_rate": 1.775478144206357e-05, "loss": 0.9751, "step": 7350 }, { "epoch": 24.101639344262296, "grad_norm": 8.947550773620605, "learning_rate": 1.7754110950070874e-05, "loss": 1.4438, "step": 7351 }, { "epoch": 24.104918032786884, "grad_norm": 6.525865077972412, "learning_rate": 1.7753440370642513e-05, "loss": 1.2917, "step": 7352 }, { "epoch": 24.108196721311476, "grad_norm": 8.981352806091309, "learning_rate": 1.7752769703786055e-05, "loss": 1.0267, "step": 7353 }, { "epoch": 24.111475409836064, "grad_norm": 10.496613502502441, "learning_rate": 1.7752098949509053e-05, "loss": 1.1877, "step": 7354 }, { "epoch": 24.114754098360656, "grad_norm": 7.5392608642578125, "learning_rate": 1.7751428107819075e-05, "loss": 1.2544, "step": 7355 }, { "epoch": 24.118032786885244, "grad_norm": 7.9794816970825195, "learning_rate": 1.7750757178723686e-05, "loss": 1.1735, "step": 7356 }, { "epoch": 24.121311475409836, "grad_norm": 7.200768947601318, "learning_rate": 1.7750086162230455e-05, "loss": 1.2888, "step": 7357 }, { "epoch": 24.124590163934425, "grad_norm": 6.685603141784668, "learning_rate": 1.774941505834694e-05, "loss": 1.1978, "step": 7358 }, { "epoch": 24.127868852459017, "grad_norm": 9.182787895202637, "learning_rate": 1.7748743867080715e-05, "loss": 0.9544, "step": 7359 }, { "epoch": 24.131147540983605, "grad_norm": 7.53884220123291, "learning_rate": 1.7748072588439345e-05, "loss": 1.1731, "step": 7360 }, { "epoch": 24.134426229508197, "grad_norm": 9.055030822753906, "learning_rate": 1.7747401222430403e-05, "loss": 1.0984, "step": 7361 }, { "epoch": 24.137704918032785, "grad_norm": 7.222362518310547, "learning_rate": 1.7746729769061454e-05, "loss": 1.0533, "step": 7362 }, { "epoch": 24.140983606557377, "grad_norm": 8.330340385437012, "learning_rate": 1.7746058228340073e-05, "loss": 1.2311, "step": 7363 }, { "epoch": 24.14426229508197, "grad_norm": 7.990321159362793, "learning_rate": 1.774538660027383e-05, "loss": 1.0775, "step": 7364 }, { "epoch": 24.147540983606557, "grad_norm": 6.8599982261657715, "learning_rate": 1.7744714884870303e-05, "loss": 1.2114, "step": 7365 }, { "epoch": 24.15081967213115, "grad_norm": 7.691117763519287, "learning_rate": 1.7744043082137062e-05, "loss": 1.0118, "step": 7366 }, { "epoch": 24.154098360655738, "grad_norm": 6.606563568115234, "learning_rate": 1.7743371192081683e-05, "loss": 0.9862, "step": 7367 }, { "epoch": 24.15737704918033, "grad_norm": 7.8449811935424805, "learning_rate": 1.7742699214711745e-05, "loss": 1.075, "step": 7368 }, { "epoch": 24.160655737704918, "grad_norm": 7.802924633026123, "learning_rate": 1.774202715003482e-05, "loss": 1.1462, "step": 7369 }, { "epoch": 24.16393442622951, "grad_norm": 7.866354465484619, "learning_rate": 1.774135499805849e-05, "loss": 1.2714, "step": 7370 }, { "epoch": 24.167213114754098, "grad_norm": 7.8900041580200195, "learning_rate": 1.7740682758790334e-05, "loss": 1.1056, "step": 7371 }, { "epoch": 24.17049180327869, "grad_norm": 6.526989936828613, "learning_rate": 1.774001043223793e-05, "loss": 1.2275, "step": 7372 }, { "epoch": 24.17377049180328, "grad_norm": 8.702393531799316, "learning_rate": 1.7739338018408864e-05, "loss": 1.1272, "step": 7373 }, { "epoch": 24.17704918032787, "grad_norm": 8.781269073486328, "learning_rate": 1.7738665517310713e-05, "loss": 0.9542, "step": 7374 }, { "epoch": 24.18032786885246, "grad_norm": 6.870723247528076, "learning_rate": 1.7737992928951066e-05, "loss": 1.2582, "step": 7375 }, { "epoch": 24.18360655737705, "grad_norm": 7.562155246734619, "learning_rate": 1.7737320253337497e-05, "loss": 0.9987, "step": 7376 }, { "epoch": 24.18688524590164, "grad_norm": 9.094918251037598, "learning_rate": 1.77366474904776e-05, "loss": 1.217, "step": 7377 }, { "epoch": 24.19016393442623, "grad_norm": 7.887197494506836, "learning_rate": 1.7735974640378958e-05, "loss": 1.1819, "step": 7378 }, { "epoch": 24.19344262295082, "grad_norm": 5.889809608459473, "learning_rate": 1.7735301703049156e-05, "loss": 1.1165, "step": 7379 }, { "epoch": 24.19672131147541, "grad_norm": 6.870424747467041, "learning_rate": 1.7734628678495787e-05, "loss": 1.0239, "step": 7380 }, { "epoch": 24.2, "grad_norm": 5.81177282333374, "learning_rate": 1.7733955566726438e-05, "loss": 1.2736, "step": 7381 }, { "epoch": 24.20327868852459, "grad_norm": 8.761881828308105, "learning_rate": 1.77332823677487e-05, "loss": 0.9995, "step": 7382 }, { "epoch": 24.20655737704918, "grad_norm": 7.1726861000061035, "learning_rate": 1.7732609081570162e-05, "loss": 1.0114, "step": 7383 }, { "epoch": 24.20983606557377, "grad_norm": 6.738405704498291, "learning_rate": 1.7731935708198417e-05, "loss": 1.2224, "step": 7384 }, { "epoch": 24.21311475409836, "grad_norm": 7.228145122528076, "learning_rate": 1.7731262247641056e-05, "loss": 0.9093, "step": 7385 }, { "epoch": 24.21639344262295, "grad_norm": 6.340682506561279, "learning_rate": 1.7730588699905675e-05, "loss": 1.1494, "step": 7386 }, { "epoch": 24.21967213114754, "grad_norm": 8.467281341552734, "learning_rate": 1.772991506499987e-05, "loss": 1.4012, "step": 7387 }, { "epoch": 24.222950819672132, "grad_norm": 7.142617225646973, "learning_rate": 1.7729241342931235e-05, "loss": 1.1169, "step": 7388 }, { "epoch": 24.22622950819672, "grad_norm": 7.117769718170166, "learning_rate": 1.7728567533707367e-05, "loss": 0.9705, "step": 7389 }, { "epoch": 24.229508196721312, "grad_norm": 5.363097190856934, "learning_rate": 1.7727893637335864e-05, "loss": 1.4176, "step": 7390 }, { "epoch": 24.2327868852459, "grad_norm": 8.913989067077637, "learning_rate": 1.7727219653824326e-05, "loss": 1.3905, "step": 7391 }, { "epoch": 24.236065573770492, "grad_norm": 8.069242477416992, "learning_rate": 1.772654558318035e-05, "loss": 1.3271, "step": 7392 }, { "epoch": 24.23934426229508, "grad_norm": 9.564178466796875, "learning_rate": 1.7725871425411544e-05, "loss": 1.2898, "step": 7393 }, { "epoch": 24.242622950819673, "grad_norm": 11.692468643188477, "learning_rate": 1.77251971805255e-05, "loss": 1.2405, "step": 7394 }, { "epoch": 24.24590163934426, "grad_norm": 5.955313205718994, "learning_rate": 1.7724522848529827e-05, "loss": 1.2631, "step": 7395 }, { "epoch": 24.249180327868853, "grad_norm": 6.570806503295898, "learning_rate": 1.7723848429432127e-05, "loss": 1.3391, "step": 7396 }, { "epoch": 24.25245901639344, "grad_norm": 7.606823921203613, "learning_rate": 1.7723173923240006e-05, "loss": 1.2642, "step": 7397 }, { "epoch": 24.255737704918033, "grad_norm": 11.829866409301758, "learning_rate": 1.772249932996107e-05, "loss": 1.3866, "step": 7398 }, { "epoch": 24.25901639344262, "grad_norm": 8.068306922912598, "learning_rate": 1.7721824649602923e-05, "loss": 1.2257, "step": 7399 }, { "epoch": 24.262295081967213, "grad_norm": 8.886075973510742, "learning_rate": 1.7721149882173176e-05, "loss": 1.2374, "step": 7400 }, { "epoch": 24.2655737704918, "grad_norm": 15.195263862609863, "learning_rate": 1.7720475027679433e-05, "loss": 1.0762, "step": 7401 }, { "epoch": 24.268852459016394, "grad_norm": 6.480534553527832, "learning_rate": 1.771980008612931e-05, "loss": 1.2988, "step": 7402 }, { "epoch": 24.272131147540982, "grad_norm": 8.879258155822754, "learning_rate": 1.7719125057530413e-05, "loss": 1.0066, "step": 7403 }, { "epoch": 24.275409836065574, "grad_norm": 11.056426048278809, "learning_rate": 1.7718449941890355e-05, "loss": 1.1949, "step": 7404 }, { "epoch": 24.278688524590162, "grad_norm": 8.223645210266113, "learning_rate": 1.771777473921675e-05, "loss": 1.2346, "step": 7405 }, { "epoch": 24.281967213114754, "grad_norm": 7.113921642303467, "learning_rate": 1.7717099449517208e-05, "loss": 1.1, "step": 7406 }, { "epoch": 24.285245901639342, "grad_norm": 6.879621982574463, "learning_rate": 1.771642407279935e-05, "loss": 1.0982, "step": 7407 }, { "epoch": 24.288524590163934, "grad_norm": 6.467372894287109, "learning_rate": 1.7715748609070782e-05, "loss": 1.2344, "step": 7408 }, { "epoch": 24.291803278688526, "grad_norm": 7.782687664031982, "learning_rate": 1.771507305833913e-05, "loss": 1.1174, "step": 7409 }, { "epoch": 24.295081967213115, "grad_norm": 6.870420455932617, "learning_rate": 1.7714397420612007e-05, "loss": 1.3306, "step": 7410 }, { "epoch": 24.298360655737707, "grad_norm": 9.040794372558594, "learning_rate": 1.7713721695897028e-05, "loss": 1.1173, "step": 7411 }, { "epoch": 24.301639344262295, "grad_norm": 8.077656745910645, "learning_rate": 1.771304588420182e-05, "loss": 1.1498, "step": 7412 }, { "epoch": 24.304918032786887, "grad_norm": 7.874416828155518, "learning_rate": 1.7712369985534e-05, "loss": 0.9537, "step": 7413 }, { "epoch": 24.308196721311475, "grad_norm": 8.334412574768066, "learning_rate": 1.771169399990119e-05, "loss": 1.23, "step": 7414 }, { "epoch": 24.311475409836067, "grad_norm": 8.898695945739746, "learning_rate": 1.771101792731101e-05, "loss": 1.0594, "step": 7415 }, { "epoch": 24.314754098360655, "grad_norm": 6.290560722351074, "learning_rate": 1.7710341767771088e-05, "loss": 1.3076, "step": 7416 }, { "epoch": 24.318032786885247, "grad_norm": 6.4136481285095215, "learning_rate": 1.7709665521289045e-05, "loss": 1.1716, "step": 7417 }, { "epoch": 24.321311475409836, "grad_norm": 7.322346210479736, "learning_rate": 1.770898918787251e-05, "loss": 1.0475, "step": 7418 }, { "epoch": 24.324590163934428, "grad_norm": 6.751878261566162, "learning_rate": 1.7708312767529104e-05, "loss": 0.9664, "step": 7419 }, { "epoch": 24.327868852459016, "grad_norm": 8.391515731811523, "learning_rate": 1.7707636260266453e-05, "loss": 1.411, "step": 7420 }, { "epoch": 24.331147540983608, "grad_norm": 7.260112762451172, "learning_rate": 1.7706959666092195e-05, "loss": 1.2124, "step": 7421 }, { "epoch": 24.334426229508196, "grad_norm": 8.273789405822754, "learning_rate": 1.770628298501395e-05, "loss": 1.134, "step": 7422 }, { "epoch": 24.337704918032788, "grad_norm": 6.487095355987549, "learning_rate": 1.7705606217039354e-05, "loss": 1.3015, "step": 7423 }, { "epoch": 24.340983606557376, "grad_norm": 5.843881607055664, "learning_rate": 1.7704929362176035e-05, "loss": 0.9949, "step": 7424 }, { "epoch": 24.34426229508197, "grad_norm": 9.782795906066895, "learning_rate": 1.770425242043163e-05, "loss": 1.1819, "step": 7425 }, { "epoch": 24.347540983606557, "grad_norm": 6.864906311035156, "learning_rate": 1.7703575391813764e-05, "loss": 1.3263, "step": 7426 }, { "epoch": 24.35081967213115, "grad_norm": 9.84756851196289, "learning_rate": 1.7702898276330077e-05, "loss": 1.0638, "step": 7427 }, { "epoch": 24.354098360655737, "grad_norm": 8.86546516418457, "learning_rate": 1.77022210739882e-05, "loss": 1.1511, "step": 7428 }, { "epoch": 24.35737704918033, "grad_norm": 8.123442649841309, "learning_rate": 1.770154378479578e-05, "loss": 1.1498, "step": 7429 }, { "epoch": 24.360655737704917, "grad_norm": 5.566918849945068, "learning_rate": 1.770086640876044e-05, "loss": 1.4224, "step": 7430 }, { "epoch": 24.36393442622951, "grad_norm": 11.076251029968262, "learning_rate": 1.7700188945889825e-05, "loss": 0.9446, "step": 7431 }, { "epoch": 24.367213114754097, "grad_norm": 12.231165885925293, "learning_rate": 1.7699511396191573e-05, "loss": 1.1522, "step": 7432 }, { "epoch": 24.37049180327869, "grad_norm": 6.9435296058654785, "learning_rate": 1.7698833759673328e-05, "loss": 1.1287, "step": 7433 }, { "epoch": 24.373770491803278, "grad_norm": 20.035192489624023, "learning_rate": 1.7698156036342723e-05, "loss": 1.1769, "step": 7434 }, { "epoch": 24.37704918032787, "grad_norm": 8.090957641601562, "learning_rate": 1.7697478226207407e-05, "loss": 0.9939, "step": 7435 }, { "epoch": 24.380327868852458, "grad_norm": 8.564011573791504, "learning_rate": 1.769680032927502e-05, "loss": 1.4832, "step": 7436 }, { "epoch": 24.38360655737705, "grad_norm": 6.574256896972656, "learning_rate": 1.7696122345553205e-05, "loss": 1.1107, "step": 7437 }, { "epoch": 24.386885245901638, "grad_norm": 8.053852081298828, "learning_rate": 1.7695444275049613e-05, "loss": 1.2358, "step": 7438 }, { "epoch": 24.39016393442623, "grad_norm": 8.776965141296387, "learning_rate": 1.7694766117771884e-05, "loss": 1.1425, "step": 7439 }, { "epoch": 24.39344262295082, "grad_norm": 25.95384979248047, "learning_rate": 1.7694087873727663e-05, "loss": 1.1362, "step": 7440 }, { "epoch": 24.39672131147541, "grad_norm": 6.436905384063721, "learning_rate": 1.7693409542924606e-05, "loss": 1.5354, "step": 7441 }, { "epoch": 24.4, "grad_norm": 8.540474891662598, "learning_rate": 1.7692731125370355e-05, "loss": 1.343, "step": 7442 }, { "epoch": 24.40327868852459, "grad_norm": 6.84485387802124, "learning_rate": 1.7692052621072558e-05, "loss": 1.183, "step": 7443 }, { "epoch": 24.40655737704918, "grad_norm": 5.674765586853027, "learning_rate": 1.769137403003888e-05, "loss": 1.343, "step": 7444 }, { "epoch": 24.40983606557377, "grad_norm": 7.393068313598633, "learning_rate": 1.7690695352276953e-05, "loss": 1.3273, "step": 7445 }, { "epoch": 24.41311475409836, "grad_norm": 8.587717056274414, "learning_rate": 1.7690016587794447e-05, "loss": 1.1659, "step": 7446 }, { "epoch": 24.41639344262295, "grad_norm": 11.024603843688965, "learning_rate": 1.7689337736599002e-05, "loss": 1.2322, "step": 7447 }, { "epoch": 24.41967213114754, "grad_norm": 10.72114372253418, "learning_rate": 1.7688658798698283e-05, "loss": 1.2106, "step": 7448 }, { "epoch": 24.42295081967213, "grad_norm": 8.215873718261719, "learning_rate": 1.768797977409994e-05, "loss": 1.3037, "step": 7449 }, { "epoch": 24.42622950819672, "grad_norm": 7.093799114227295, "learning_rate": 1.7687300662811636e-05, "loss": 1.0229, "step": 7450 }, { "epoch": 24.42950819672131, "grad_norm": 7.73971700668335, "learning_rate": 1.7686621464841017e-05, "loss": 1.2759, "step": 7451 }, { "epoch": 24.432786885245903, "grad_norm": 10.585577964782715, "learning_rate": 1.7685942180195757e-05, "loss": 1.3532, "step": 7452 }, { "epoch": 24.43606557377049, "grad_norm": 8.672255516052246, "learning_rate": 1.7685262808883502e-05, "loss": 1.2386, "step": 7453 }, { "epoch": 24.439344262295084, "grad_norm": 7.424099445343018, "learning_rate": 1.7684583350911917e-05, "loss": 1.149, "step": 7454 }, { "epoch": 24.442622950819672, "grad_norm": 8.654640197753906, "learning_rate": 1.768390380628867e-05, "loss": 1.2089, "step": 7455 }, { "epoch": 24.445901639344264, "grad_norm": 8.908886909484863, "learning_rate": 1.7683224175021414e-05, "loss": 1.1505, "step": 7456 }, { "epoch": 24.449180327868852, "grad_norm": 9.003252983093262, "learning_rate": 1.768254445711782e-05, "loss": 1.1755, "step": 7457 }, { "epoch": 24.452459016393444, "grad_norm": 7.773183345794678, "learning_rate": 1.7681864652585546e-05, "loss": 1.217, "step": 7458 }, { "epoch": 24.455737704918032, "grad_norm": 6.158716201782227, "learning_rate": 1.768118476143226e-05, "loss": 1.3567, "step": 7459 }, { "epoch": 24.459016393442624, "grad_norm": 7.492519378662109, "learning_rate": 1.7680504783665635e-05, "loss": 0.9786, "step": 7460 }, { "epoch": 24.462295081967213, "grad_norm": 8.312092781066895, "learning_rate": 1.767982471929333e-05, "loss": 1.0386, "step": 7461 }, { "epoch": 24.465573770491805, "grad_norm": 11.822065353393555, "learning_rate": 1.7679144568323016e-05, "loss": 1.244, "step": 7462 }, { "epoch": 24.468852459016393, "grad_norm": 6.287604331970215, "learning_rate": 1.7678464330762363e-05, "loss": 1.3555, "step": 7463 }, { "epoch": 24.472131147540985, "grad_norm": 18.85452651977539, "learning_rate": 1.7677784006619042e-05, "loss": 1.1188, "step": 7464 }, { "epoch": 24.475409836065573, "grad_norm": 6.98207426071167, "learning_rate": 1.767710359590072e-05, "loss": 0.8672, "step": 7465 }, { "epoch": 24.478688524590165, "grad_norm": 7.583587646484375, "learning_rate": 1.7676423098615078e-05, "loss": 1.0226, "step": 7466 }, { "epoch": 24.481967213114753, "grad_norm": 6.174720764160156, "learning_rate": 1.767574251476978e-05, "loss": 1.2433, "step": 7467 }, { "epoch": 24.485245901639345, "grad_norm": 7.63121223449707, "learning_rate": 1.7675061844372504e-05, "loss": 1.2876, "step": 7468 }, { "epoch": 24.488524590163934, "grad_norm": 7.795904159545898, "learning_rate": 1.7674381087430927e-05, "loss": 1.2607, "step": 7469 }, { "epoch": 24.491803278688526, "grad_norm": 37.1788330078125, "learning_rate": 1.7673700243952726e-05, "loss": 1.0753, "step": 7470 }, { "epoch": 24.495081967213114, "grad_norm": 7.5088629722595215, "learning_rate": 1.7673019313945573e-05, "loss": 1.2943, "step": 7471 }, { "epoch": 24.498360655737706, "grad_norm": 7.581496715545654, "learning_rate": 1.767233829741715e-05, "loss": 1.2246, "step": 7472 }, { "epoch": 24.501639344262294, "grad_norm": 16.08201026916504, "learning_rate": 1.7671657194375137e-05, "loss": 1.1121, "step": 7473 }, { "epoch": 24.504918032786886, "grad_norm": 8.65156364440918, "learning_rate": 1.7670976004827212e-05, "loss": 0.9761, "step": 7474 }, { "epoch": 24.508196721311474, "grad_norm": 8.859366416931152, "learning_rate": 1.7670294728781055e-05, "loss": 1.2527, "step": 7475 }, { "epoch": 24.511475409836066, "grad_norm": 8.121987342834473, "learning_rate": 1.766961336624435e-05, "loss": 0.9525, "step": 7476 }, { "epoch": 24.514754098360655, "grad_norm": 9.70322322845459, "learning_rate": 1.766893191722478e-05, "loss": 1.1628, "step": 7477 }, { "epoch": 24.518032786885247, "grad_norm": 10.24108600616455, "learning_rate": 1.7668250381730036e-05, "loss": 1.2581, "step": 7478 }, { "epoch": 24.521311475409835, "grad_norm": 7.604228973388672, "learning_rate": 1.7667568759767787e-05, "loss": 1.1433, "step": 7479 }, { "epoch": 24.524590163934427, "grad_norm": 8.681170463562012, "learning_rate": 1.7666887051345727e-05, "loss": 0.9395, "step": 7480 }, { "epoch": 24.527868852459015, "grad_norm": 8.023681640625, "learning_rate": 1.766620525647155e-05, "loss": 1.2385, "step": 7481 }, { "epoch": 24.531147540983607, "grad_norm": 9.66079044342041, "learning_rate": 1.7665523375152934e-05, "loss": 0.9576, "step": 7482 }, { "epoch": 24.534426229508195, "grad_norm": 9.218110084533691, "learning_rate": 1.7664841407397575e-05, "loss": 1.3008, "step": 7483 }, { "epoch": 24.537704918032787, "grad_norm": 10.012674331665039, "learning_rate": 1.766415935321316e-05, "loss": 1.2144, "step": 7484 }, { "epoch": 24.540983606557376, "grad_norm": 9.827874183654785, "learning_rate": 1.7663477212607375e-05, "loss": 1.1847, "step": 7485 }, { "epoch": 24.544262295081968, "grad_norm": 9.468673706054688, "learning_rate": 1.766279498558792e-05, "loss": 1.2234, "step": 7486 }, { "epoch": 24.547540983606556, "grad_norm": 7.824769020080566, "learning_rate": 1.7662112672162485e-05, "loss": 1.2396, "step": 7487 }, { "epoch": 24.550819672131148, "grad_norm": 10.141990661621094, "learning_rate": 1.7661430272338764e-05, "loss": 1.0996, "step": 7488 }, { "epoch": 24.554098360655736, "grad_norm": 9.606060028076172, "learning_rate": 1.7660747786124445e-05, "loss": 1.2053, "step": 7489 }, { "epoch": 24.557377049180328, "grad_norm": 19.54848289489746, "learning_rate": 1.7660065213527233e-05, "loss": 1.0504, "step": 7490 }, { "epoch": 24.560655737704916, "grad_norm": 10.209521293640137, "learning_rate": 1.7659382554554822e-05, "loss": 1.3527, "step": 7491 }, { "epoch": 24.56393442622951, "grad_norm": 8.459670066833496, "learning_rate": 1.7658699809214906e-05, "loss": 1.0387, "step": 7492 }, { "epoch": 24.567213114754097, "grad_norm": 8.944852828979492, "learning_rate": 1.765801697751519e-05, "loss": 1.1979, "step": 7493 }, { "epoch": 24.57049180327869, "grad_norm": 5.926830291748047, "learning_rate": 1.765733405946337e-05, "loss": 1.0394, "step": 7494 }, { "epoch": 24.57377049180328, "grad_norm": 8.233747482299805, "learning_rate": 1.7656651055067146e-05, "loss": 1.1713, "step": 7495 }, { "epoch": 24.57704918032787, "grad_norm": 7.8706865310668945, "learning_rate": 1.7655967964334218e-05, "loss": 1.2101, "step": 7496 }, { "epoch": 24.58032786885246, "grad_norm": 7.95872163772583, "learning_rate": 1.7655284787272292e-05, "loss": 1.1334, "step": 7497 }, { "epoch": 24.58360655737705, "grad_norm": 10.426100730895996, "learning_rate": 1.7654601523889073e-05, "loss": 1.2898, "step": 7498 }, { "epoch": 24.58688524590164, "grad_norm": 7.5655975341796875, "learning_rate": 1.765391817419226e-05, "loss": 1.0121, "step": 7499 }, { "epoch": 24.59016393442623, "grad_norm": 8.318973541259766, "learning_rate": 1.7653234738189565e-05, "loss": 1.0271, "step": 7500 }, { "epoch": 24.59344262295082, "grad_norm": 7.270852088928223, "learning_rate": 1.7652551215888688e-05, "loss": 1.2981, "step": 7501 }, { "epoch": 24.59672131147541, "grad_norm": 9.298344612121582, "learning_rate": 1.765186760729734e-05, "loss": 1.2407, "step": 7502 }, { "epoch": 24.6, "grad_norm": 8.35423469543457, "learning_rate": 1.7651183912423228e-05, "loss": 1.2439, "step": 7503 }, { "epoch": 24.60327868852459, "grad_norm": 14.202654838562012, "learning_rate": 1.7650500131274064e-05, "loss": 1.2128, "step": 7504 }, { "epoch": 24.60655737704918, "grad_norm": 6.925654411315918, "learning_rate": 1.764981626385756e-05, "loss": 1.2461, "step": 7505 }, { "epoch": 24.60983606557377, "grad_norm": 8.337213516235352, "learning_rate": 1.7649132310181416e-05, "loss": 1.2257, "step": 7506 }, { "epoch": 24.613114754098362, "grad_norm": 11.724298477172852, "learning_rate": 1.7648448270253356e-05, "loss": 1.1, "step": 7507 }, { "epoch": 24.61639344262295, "grad_norm": 7.582003116607666, "learning_rate": 1.764776414408109e-05, "loss": 1.1221, "step": 7508 }, { "epoch": 24.619672131147542, "grad_norm": 6.458169460296631, "learning_rate": 1.764707993167233e-05, "loss": 1.2517, "step": 7509 }, { "epoch": 24.62295081967213, "grad_norm": 7.924469470977783, "learning_rate": 1.7646395633034793e-05, "loss": 1.1326, "step": 7510 }, { "epoch": 24.626229508196722, "grad_norm": 14.044720649719238, "learning_rate": 1.7645711248176198e-05, "loss": 1.5151, "step": 7511 }, { "epoch": 24.62950819672131, "grad_norm": 12.71950626373291, "learning_rate": 1.7645026777104254e-05, "loss": 1.2688, "step": 7512 }, { "epoch": 24.632786885245903, "grad_norm": 7.085191249847412, "learning_rate": 1.7644342219826688e-05, "loss": 1.1858, "step": 7513 }, { "epoch": 24.63606557377049, "grad_norm": 6.493353843688965, "learning_rate": 1.7643657576351213e-05, "loss": 1.2451, "step": 7514 }, { "epoch": 24.639344262295083, "grad_norm": 8.763490676879883, "learning_rate": 1.7642972846685552e-05, "loss": 1.0797, "step": 7515 }, { "epoch": 24.64262295081967, "grad_norm": 11.446795463562012, "learning_rate": 1.764228803083743e-05, "loss": 1.1796, "step": 7516 }, { "epoch": 24.645901639344263, "grad_norm": 7.074670791625977, "learning_rate": 1.764160312881456e-05, "loss": 1.3508, "step": 7517 }, { "epoch": 24.64918032786885, "grad_norm": 11.836524963378906, "learning_rate": 1.764091814062467e-05, "loss": 1.238, "step": 7518 }, { "epoch": 24.652459016393443, "grad_norm": 7.045542240142822, "learning_rate": 1.7640233066275484e-05, "loss": 1.1688, "step": 7519 }, { "epoch": 24.65573770491803, "grad_norm": 6.318970203399658, "learning_rate": 1.7639547905774724e-05, "loss": 1.259, "step": 7520 }, { "epoch": 24.659016393442624, "grad_norm": 7.358802318572998, "learning_rate": 1.763886265913012e-05, "loss": 1.3125, "step": 7521 }, { "epoch": 24.662295081967212, "grad_norm": 13.915374755859375, "learning_rate": 1.7638177326349394e-05, "loss": 1.1865, "step": 7522 }, { "epoch": 24.665573770491804, "grad_norm": 6.587783336639404, "learning_rate": 1.763749190744028e-05, "loss": 1.2883, "step": 7523 }, { "epoch": 24.668852459016392, "grad_norm": 6.890915870666504, "learning_rate": 1.7636806402410503e-05, "loss": 1.2125, "step": 7524 }, { "epoch": 24.672131147540984, "grad_norm": 10.150712013244629, "learning_rate": 1.7636120811267798e-05, "loss": 1.3976, "step": 7525 }, { "epoch": 24.675409836065572, "grad_norm": 6.878763675689697, "learning_rate": 1.7635435134019887e-05, "loss": 1.1792, "step": 7526 }, { "epoch": 24.678688524590164, "grad_norm": 7.872745513916016, "learning_rate": 1.7634749370674506e-05, "loss": 1.2083, "step": 7527 }, { "epoch": 24.681967213114753, "grad_norm": 7.877538204193115, "learning_rate": 1.763406352123939e-05, "loss": 1.1917, "step": 7528 }, { "epoch": 24.685245901639345, "grad_norm": 7.822597503662109, "learning_rate": 1.763337758572227e-05, "loss": 1.2556, "step": 7529 }, { "epoch": 24.688524590163933, "grad_norm": 10.84537124633789, "learning_rate": 1.763269156413088e-05, "loss": 1.1995, "step": 7530 }, { "epoch": 24.691803278688525, "grad_norm": 7.989884376525879, "learning_rate": 1.7632005456472954e-05, "loss": 1.2815, "step": 7531 }, { "epoch": 24.695081967213113, "grad_norm": 8.695324897766113, "learning_rate": 1.763131926275623e-05, "loss": 1.1993, "step": 7532 }, { "epoch": 24.698360655737705, "grad_norm": 7.720767498016357, "learning_rate": 1.763063298298845e-05, "loss": 1.291, "step": 7533 }, { "epoch": 24.701639344262293, "grad_norm": 19.338623046875, "learning_rate": 1.7629946617177355e-05, "loss": 1.2568, "step": 7534 }, { "epoch": 24.704918032786885, "grad_norm": 8.512701034545898, "learning_rate": 1.762926016533067e-05, "loss": 1.2463, "step": 7535 }, { "epoch": 24.708196721311474, "grad_norm": 8.487397193908691, "learning_rate": 1.7628573627456148e-05, "loss": 1.3042, "step": 7536 }, { "epoch": 24.711475409836066, "grad_norm": 7.61692476272583, "learning_rate": 1.7627887003561525e-05, "loss": 1.0912, "step": 7537 }, { "epoch": 24.714754098360658, "grad_norm": 7.3937153816223145, "learning_rate": 1.7627200293654545e-05, "loss": 1.2085, "step": 7538 }, { "epoch": 24.718032786885246, "grad_norm": 13.902698516845703, "learning_rate": 1.7626513497742954e-05, "loss": 1.132, "step": 7539 }, { "epoch": 24.721311475409838, "grad_norm": 8.54470443725586, "learning_rate": 1.7625826615834492e-05, "loss": 1.3867, "step": 7540 }, { "epoch": 24.724590163934426, "grad_norm": 7.495753765106201, "learning_rate": 1.7625139647936906e-05, "loss": 1.0607, "step": 7541 }, { "epoch": 24.727868852459018, "grad_norm": 11.64258098602295, "learning_rate": 1.7624452594057944e-05, "loss": 1.1611, "step": 7542 }, { "epoch": 24.731147540983606, "grad_norm": 7.428439617156982, "learning_rate": 1.7623765454205348e-05, "loss": 1.1646, "step": 7543 }, { "epoch": 24.7344262295082, "grad_norm": 7.217948913574219, "learning_rate": 1.7623078228386873e-05, "loss": 1.1215, "step": 7544 }, { "epoch": 24.737704918032787, "grad_norm": 10.683721542358398, "learning_rate": 1.762239091661026e-05, "loss": 1.2273, "step": 7545 }, { "epoch": 24.74098360655738, "grad_norm": 8.687527656555176, "learning_rate": 1.7621703518883272e-05, "loss": 1.2867, "step": 7546 }, { "epoch": 24.744262295081967, "grad_norm": 19.79900360107422, "learning_rate": 1.7621016035213646e-05, "loss": 1.3733, "step": 7547 }, { "epoch": 24.74754098360656, "grad_norm": 48.6368408203125, "learning_rate": 1.7620328465609144e-05, "loss": 1.1858, "step": 7548 }, { "epoch": 24.750819672131147, "grad_norm": 11.870978355407715, "learning_rate": 1.7619640810077512e-05, "loss": 1.131, "step": 7549 }, { "epoch": 24.75409836065574, "grad_norm": 8.476663589477539, "learning_rate": 1.761895306862651e-05, "loss": 0.9772, "step": 7550 }, { "epoch": 24.757377049180327, "grad_norm": 21.054746627807617, "learning_rate": 1.761826524126389e-05, "loss": 1.3655, "step": 7551 }, { "epoch": 24.76065573770492, "grad_norm": 6.312690258026123, "learning_rate": 1.7617577327997408e-05, "loss": 1.2625, "step": 7552 }, { "epoch": 24.763934426229508, "grad_norm": 7.755091667175293, "learning_rate": 1.761688932883482e-05, "loss": 1.2313, "step": 7553 }, { "epoch": 24.7672131147541, "grad_norm": 8.19339656829834, "learning_rate": 1.7616201243783887e-05, "loss": 1.2357, "step": 7554 }, { "epoch": 24.770491803278688, "grad_norm": 8.255220413208008, "learning_rate": 1.7615513072852365e-05, "loss": 1.2072, "step": 7555 }, { "epoch": 24.77377049180328, "grad_norm": 11.613554954528809, "learning_rate": 1.7614824816048016e-05, "loss": 1.3682, "step": 7556 }, { "epoch": 24.777049180327868, "grad_norm": 13.908638954162598, "learning_rate": 1.76141364733786e-05, "loss": 1.4329, "step": 7557 }, { "epoch": 24.78032786885246, "grad_norm": 8.588838577270508, "learning_rate": 1.7613448044851876e-05, "loss": 1.1262, "step": 7558 }, { "epoch": 24.78360655737705, "grad_norm": 7.452268123626709, "learning_rate": 1.7612759530475613e-05, "loss": 1.0154, "step": 7559 }, { "epoch": 24.78688524590164, "grad_norm": 7.354078769683838, "learning_rate": 1.761207093025757e-05, "loss": 1.3649, "step": 7560 }, { "epoch": 24.79016393442623, "grad_norm": 6.94096565246582, "learning_rate": 1.761138224420551e-05, "loss": 1.2621, "step": 7561 }, { "epoch": 24.79344262295082, "grad_norm": 7.228749752044678, "learning_rate": 1.7610693472327206e-05, "loss": 1.3108, "step": 7562 }, { "epoch": 24.79672131147541, "grad_norm": 11.647721290588379, "learning_rate": 1.7610004614630414e-05, "loss": 1.1442, "step": 7563 }, { "epoch": 24.8, "grad_norm": 8.955480575561523, "learning_rate": 1.7609315671122912e-05, "loss": 1.2888, "step": 7564 }, { "epoch": 24.80327868852459, "grad_norm": 6.805329322814941, "learning_rate": 1.7608626641812464e-05, "loss": 1.4573, "step": 7565 }, { "epoch": 24.80655737704918, "grad_norm": 8.585407257080078, "learning_rate": 1.7607937526706838e-05, "loss": 1.1982, "step": 7566 }, { "epoch": 24.80983606557377, "grad_norm": 8.137834548950195, "learning_rate": 1.7607248325813806e-05, "loss": 1.2705, "step": 7567 }, { "epoch": 24.81311475409836, "grad_norm": 14.078903198242188, "learning_rate": 1.760655903914114e-05, "loss": 1.2229, "step": 7568 }, { "epoch": 24.81639344262295, "grad_norm": 9.191569328308105, "learning_rate": 1.7605869666696613e-05, "loss": 1.2814, "step": 7569 }, { "epoch": 24.81967213114754, "grad_norm": 8.723236083984375, "learning_rate": 1.7605180208487996e-05, "loss": 0.9668, "step": 7570 }, { "epoch": 24.82295081967213, "grad_norm": 9.950310707092285, "learning_rate": 1.7604490664523066e-05, "loss": 1.2737, "step": 7571 }, { "epoch": 24.82622950819672, "grad_norm": 15.648223876953125, "learning_rate": 1.7603801034809597e-05, "loss": 1.2272, "step": 7572 }, { "epoch": 24.82950819672131, "grad_norm": 8.902923583984375, "learning_rate": 1.7603111319355366e-05, "loss": 1.2255, "step": 7573 }, { "epoch": 24.832786885245902, "grad_norm": 9.97433853149414, "learning_rate": 1.7602421518168147e-05, "loss": 1.3125, "step": 7574 }, { "epoch": 24.83606557377049, "grad_norm": 11.87671184539795, "learning_rate": 1.760173163125572e-05, "loss": 1.3096, "step": 7575 }, { "epoch": 24.839344262295082, "grad_norm": 7.7200236320495605, "learning_rate": 1.760104165862587e-05, "loss": 1.1765, "step": 7576 }, { "epoch": 24.84262295081967, "grad_norm": 8.981040954589844, "learning_rate": 1.760035160028637e-05, "loss": 1.1448, "step": 7577 }, { "epoch": 24.845901639344262, "grad_norm": 7.292300701141357, "learning_rate": 1.7599661456245e-05, "loss": 1.2977, "step": 7578 }, { "epoch": 24.84918032786885, "grad_norm": 6.4547271728515625, "learning_rate": 1.759897122650955e-05, "loss": 1.1364, "step": 7579 }, { "epoch": 24.852459016393443, "grad_norm": 7.286811351776123, "learning_rate": 1.7598280911087794e-05, "loss": 1.1462, "step": 7580 }, { "epoch": 24.855737704918035, "grad_norm": 8.966805458068848, "learning_rate": 1.7597590509987524e-05, "loss": 1.3672, "step": 7581 }, { "epoch": 24.859016393442623, "grad_norm": 14.63886833190918, "learning_rate": 1.7596900023216523e-05, "loss": 1.1653, "step": 7582 }, { "epoch": 24.862295081967215, "grad_norm": 6.805157661437988, "learning_rate": 1.7596209450782573e-05, "loss": 1.3977, "step": 7583 }, { "epoch": 24.865573770491803, "grad_norm": 8.454237937927246, "learning_rate": 1.7595518792693463e-05, "loss": 1.2502, "step": 7584 }, { "epoch": 24.868852459016395, "grad_norm": 6.723571300506592, "learning_rate": 1.7594828048956983e-05, "loss": 1.2378, "step": 7585 }, { "epoch": 24.872131147540983, "grad_norm": 7.8018975257873535, "learning_rate": 1.7594137219580917e-05, "loss": 1.2266, "step": 7586 }, { "epoch": 24.875409836065575, "grad_norm": 8.347740173339844, "learning_rate": 1.759344630457306e-05, "loss": 1.2234, "step": 7587 }, { "epoch": 24.878688524590164, "grad_norm": 8.715951919555664, "learning_rate": 1.75927553039412e-05, "loss": 1.2593, "step": 7588 }, { "epoch": 24.881967213114756, "grad_norm": 22.583110809326172, "learning_rate": 1.759206421769313e-05, "loss": 1.157, "step": 7589 }, { "epoch": 24.885245901639344, "grad_norm": 8.157798767089844, "learning_rate": 1.759137304583664e-05, "loss": 1.3688, "step": 7590 }, { "epoch": 24.888524590163936, "grad_norm": 8.761933326721191, "learning_rate": 1.759068178837953e-05, "loss": 1.1584, "step": 7591 }, { "epoch": 24.891803278688524, "grad_norm": 13.07568073272705, "learning_rate": 1.758999044532959e-05, "loss": 1.0947, "step": 7592 }, { "epoch": 24.895081967213116, "grad_norm": 8.824180603027344, "learning_rate": 1.758929901669461e-05, "loss": 0.9642, "step": 7593 }, { "epoch": 24.898360655737704, "grad_norm": 7.530888080596924, "learning_rate": 1.75886075024824e-05, "loss": 1.0945, "step": 7594 }, { "epoch": 24.901639344262296, "grad_norm": 9.610173225402832, "learning_rate": 1.7587915902700748e-05, "loss": 1.0875, "step": 7595 }, { "epoch": 24.904918032786885, "grad_norm": 13.541111946105957, "learning_rate": 1.7587224217357456e-05, "loss": 1.1531, "step": 7596 }, { "epoch": 24.908196721311477, "grad_norm": 8.131160736083984, "learning_rate": 1.758653244646032e-05, "loss": 1.0837, "step": 7597 }, { "epoch": 24.911475409836065, "grad_norm": 10.386931419372559, "learning_rate": 1.7585840590017143e-05, "loss": 1.1763, "step": 7598 }, { "epoch": 24.914754098360657, "grad_norm": 8.189921379089355, "learning_rate": 1.7585148648035726e-05, "loss": 1.323, "step": 7599 }, { "epoch": 24.918032786885245, "grad_norm": 14.812488555908203, "learning_rate": 1.7584456620523873e-05, "loss": 1.28, "step": 7600 }, { "epoch": 24.921311475409837, "grad_norm": 7.822157382965088, "learning_rate": 1.7583764507489386e-05, "loss": 1.2913, "step": 7601 }, { "epoch": 24.924590163934425, "grad_norm": 6.798226833343506, "learning_rate": 1.7583072308940066e-05, "loss": 1.3691, "step": 7602 }, { "epoch": 24.927868852459017, "grad_norm": 6.895071506500244, "learning_rate": 1.7582380024883722e-05, "loss": 1.1421, "step": 7603 }, { "epoch": 24.931147540983606, "grad_norm": 7.785478591918945, "learning_rate": 1.758168765532816e-05, "loss": 1.3943, "step": 7604 }, { "epoch": 24.934426229508198, "grad_norm": 9.352822303771973, "learning_rate": 1.7580995200281187e-05, "loss": 1.1996, "step": 7605 }, { "epoch": 24.937704918032786, "grad_norm": 7.8459391593933105, "learning_rate": 1.758030265975061e-05, "loss": 1.0248, "step": 7606 }, { "epoch": 24.940983606557378, "grad_norm": 9.726497650146484, "learning_rate": 1.757961003374424e-05, "loss": 1.1326, "step": 7607 }, { "epoch": 24.944262295081966, "grad_norm": 7.604334831237793, "learning_rate": 1.7578917322269885e-05, "loss": 1.3435, "step": 7608 }, { "epoch": 24.947540983606558, "grad_norm": 10.64186954498291, "learning_rate": 1.757822452533536e-05, "loss": 1.1152, "step": 7609 }, { "epoch": 24.950819672131146, "grad_norm": 8.734298706054688, "learning_rate": 1.757753164294847e-05, "loss": 1.2218, "step": 7610 }, { "epoch": 24.95409836065574, "grad_norm": 8.192754745483398, "learning_rate": 1.7576838675117038e-05, "loss": 1.3135, "step": 7611 }, { "epoch": 24.957377049180327, "grad_norm": 6.650279998779297, "learning_rate": 1.7576145621848865e-05, "loss": 1.2329, "step": 7612 }, { "epoch": 24.96065573770492, "grad_norm": 8.532095909118652, "learning_rate": 1.7575452483151778e-05, "loss": 1.3992, "step": 7613 }, { "epoch": 24.963934426229507, "grad_norm": 9.054408073425293, "learning_rate": 1.7574759259033586e-05, "loss": 1.3362, "step": 7614 }, { "epoch": 24.9672131147541, "grad_norm": 8.21865177154541, "learning_rate": 1.7574065949502107e-05, "loss": 1.3488, "step": 7615 }, { "epoch": 24.970491803278687, "grad_norm": 7.842900276184082, "learning_rate": 1.757337255456516e-05, "loss": 1.1814, "step": 7616 }, { "epoch": 24.97377049180328, "grad_norm": 9.805048942565918, "learning_rate": 1.757267907423056e-05, "loss": 1.1045, "step": 7617 }, { "epoch": 24.977049180327867, "grad_norm": 10.503933906555176, "learning_rate": 1.757198550850613e-05, "loss": 1.1345, "step": 7618 }, { "epoch": 24.98032786885246, "grad_norm": 9.619786262512207, "learning_rate": 1.7571291857399696e-05, "loss": 1.1674, "step": 7619 }, { "epoch": 24.983606557377048, "grad_norm": 8.668025016784668, "learning_rate": 1.757059812091907e-05, "loss": 1.098, "step": 7620 }, { "epoch": 24.98688524590164, "grad_norm": 6.785635471343994, "learning_rate": 1.7569904299072084e-05, "loss": 1.3875, "step": 7621 }, { "epoch": 24.990163934426228, "grad_norm": 9.688241958618164, "learning_rate": 1.756921039186655e-05, "loss": 1.1948, "step": 7622 }, { "epoch": 24.99344262295082, "grad_norm": 8.417081832885742, "learning_rate": 1.75685163993103e-05, "loss": 1.0782, "step": 7623 }, { "epoch": 24.99672131147541, "grad_norm": 7.695891857147217, "learning_rate": 1.756782232141116e-05, "loss": 1.2296, "step": 7624 }, { "epoch": 25.0, "grad_norm": 8.200347900390625, "learning_rate": 1.7567128158176955e-05, "loss": 0.9967, "step": 7625 }, { "epoch": 25.003278688524592, "grad_norm": 8.208074569702148, "learning_rate": 1.756643390961551e-05, "loss": 1.0676, "step": 7626 }, { "epoch": 25.00655737704918, "grad_norm": 7.071081161499023, "learning_rate": 1.7565739575734657e-05, "loss": 1.2108, "step": 7627 }, { "epoch": 25.009836065573772, "grad_norm": 9.361170768737793, "learning_rate": 1.7565045156542224e-05, "loss": 1.0807, "step": 7628 }, { "epoch": 25.01311475409836, "grad_norm": 6.2202019691467285, "learning_rate": 1.756435065204604e-05, "loss": 1.0759, "step": 7629 }, { "epoch": 25.016393442622952, "grad_norm": 6.422909736633301, "learning_rate": 1.756365606225394e-05, "loss": 1.2366, "step": 7630 }, { "epoch": 25.01967213114754, "grad_norm": 6.492117404937744, "learning_rate": 1.756296138717375e-05, "loss": 1.124, "step": 7631 }, { "epoch": 25.022950819672133, "grad_norm": 6.480733871459961, "learning_rate": 1.7562266626813312e-05, "loss": 1.2911, "step": 7632 }, { "epoch": 25.02622950819672, "grad_norm": 8.916438102722168, "learning_rate": 1.756157178118045e-05, "loss": 1.2632, "step": 7633 }, { "epoch": 25.029508196721313, "grad_norm": 5.3344292640686035, "learning_rate": 1.7560876850283007e-05, "loss": 1.2993, "step": 7634 }, { "epoch": 25.0327868852459, "grad_norm": 7.306712627410889, "learning_rate": 1.7560181834128817e-05, "loss": 1.051, "step": 7635 }, { "epoch": 25.036065573770493, "grad_norm": 7.517611980438232, "learning_rate": 1.7559486732725714e-05, "loss": 1.1492, "step": 7636 }, { "epoch": 25.03934426229508, "grad_norm": 7.948747158050537, "learning_rate": 1.755879154608154e-05, "loss": 1.2605, "step": 7637 }, { "epoch": 25.042622950819673, "grad_norm": 6.190670013427734, "learning_rate": 1.7558096274204128e-05, "loss": 1.0315, "step": 7638 }, { "epoch": 25.04590163934426, "grad_norm": 7.554588317871094, "learning_rate": 1.7557400917101324e-05, "loss": 1.3295, "step": 7639 }, { "epoch": 25.049180327868854, "grad_norm": 7.522397041320801, "learning_rate": 1.755670547478097e-05, "loss": 1.074, "step": 7640 }, { "epoch": 25.052459016393442, "grad_norm": 7.822667598724365, "learning_rate": 1.75560099472509e-05, "loss": 0.9429, "step": 7641 }, { "epoch": 25.055737704918034, "grad_norm": 8.362262725830078, "learning_rate": 1.7555314334518965e-05, "loss": 1.1539, "step": 7642 }, { "epoch": 25.059016393442622, "grad_norm": 7.272496700286865, "learning_rate": 1.7554618636593004e-05, "loss": 0.9611, "step": 7643 }, { "epoch": 25.062295081967214, "grad_norm": 7.1248321533203125, "learning_rate": 1.7553922853480862e-05, "loss": 1.2754, "step": 7644 }, { "epoch": 25.065573770491802, "grad_norm": 6.516973972320557, "learning_rate": 1.755322698519039e-05, "loss": 1.3499, "step": 7645 }, { "epoch": 25.068852459016394, "grad_norm": 7.810155391693115, "learning_rate": 1.7552531031729424e-05, "loss": 1.166, "step": 7646 }, { "epoch": 25.072131147540983, "grad_norm": 6.659343719482422, "learning_rate": 1.7551834993105825e-05, "loss": 1.2187, "step": 7647 }, { "epoch": 25.075409836065575, "grad_norm": 8.328780174255371, "learning_rate": 1.755113886932743e-05, "loss": 1.2527, "step": 7648 }, { "epoch": 25.078688524590163, "grad_norm": 39.44688415527344, "learning_rate": 1.7550442660402094e-05, "loss": 1.0524, "step": 7649 }, { "epoch": 25.081967213114755, "grad_norm": 39.57716369628906, "learning_rate": 1.7549746366337665e-05, "loss": 1.0115, "step": 7650 }, { "epoch": 25.085245901639343, "grad_norm": 5.70328426361084, "learning_rate": 1.7549049987141997e-05, "loss": 1.2281, "step": 7651 }, { "epoch": 25.088524590163935, "grad_norm": 8.806371688842773, "learning_rate": 1.7548353522822946e-05, "loss": 0.9803, "step": 7652 }, { "epoch": 25.091803278688523, "grad_norm": 5.971502304077148, "learning_rate": 1.7547656973388353e-05, "loss": 1.2793, "step": 7653 }, { "epoch": 25.095081967213115, "grad_norm": 7.286867141723633, "learning_rate": 1.7546960338846085e-05, "loss": 1.2083, "step": 7654 }, { "epoch": 25.098360655737704, "grad_norm": 8.48098373413086, "learning_rate": 1.7546263619203992e-05, "loss": 1.2197, "step": 7655 }, { "epoch": 25.101639344262296, "grad_norm": 8.330631256103516, "learning_rate": 1.7545566814469928e-05, "loss": 1.3206, "step": 7656 }, { "epoch": 25.104918032786884, "grad_norm": 9.05217456817627, "learning_rate": 1.7544869924651754e-05, "loss": 1.1199, "step": 7657 }, { "epoch": 25.108196721311476, "grad_norm": 10.05444049835205, "learning_rate": 1.754417294975733e-05, "loss": 1.1614, "step": 7658 }, { "epoch": 25.111475409836064, "grad_norm": 8.869338989257812, "learning_rate": 1.754347588979451e-05, "loss": 1.192, "step": 7659 }, { "epoch": 25.114754098360656, "grad_norm": 7.249868869781494, "learning_rate": 1.754277874477115e-05, "loss": 1.1644, "step": 7660 }, { "epoch": 25.118032786885244, "grad_norm": 6.793003559112549, "learning_rate": 1.7542081514695122e-05, "loss": 1.0193, "step": 7661 }, { "epoch": 25.121311475409836, "grad_norm": 7.781829833984375, "learning_rate": 1.7541384199574285e-05, "loss": 1.1249, "step": 7662 }, { "epoch": 25.124590163934425, "grad_norm": 7.284098148345947, "learning_rate": 1.75406867994165e-05, "loss": 1.2993, "step": 7663 }, { "epoch": 25.127868852459017, "grad_norm": 7.499817371368408, "learning_rate": 1.7539989314229628e-05, "loss": 1.2063, "step": 7664 }, { "epoch": 25.131147540983605, "grad_norm": 11.686310768127441, "learning_rate": 1.753929174402154e-05, "loss": 1.2864, "step": 7665 }, { "epoch": 25.134426229508197, "grad_norm": 7.7737717628479, "learning_rate": 1.7538594088800097e-05, "loss": 0.9846, "step": 7666 }, { "epoch": 25.137704918032785, "grad_norm": 7.476858139038086, "learning_rate": 1.7537896348573166e-05, "loss": 1.1359, "step": 7667 }, { "epoch": 25.140983606557377, "grad_norm": 8.144535064697266, "learning_rate": 1.7537198523348617e-05, "loss": 0.8549, "step": 7668 }, { "epoch": 25.14426229508197, "grad_norm": 7.35689115524292, "learning_rate": 1.753650061313432e-05, "loss": 0.9247, "step": 7669 }, { "epoch": 25.147540983606557, "grad_norm": 16.313243865966797, "learning_rate": 1.7535802617938143e-05, "loss": 1.065, "step": 7670 }, { "epoch": 25.15081967213115, "grad_norm": 7.665318965911865, "learning_rate": 1.7535104537767952e-05, "loss": 1.3531, "step": 7671 }, { "epoch": 25.154098360655738, "grad_norm": 7.572041034698486, "learning_rate": 1.7534406372631626e-05, "loss": 1.0657, "step": 7672 }, { "epoch": 25.15737704918033, "grad_norm": 8.906612396240234, "learning_rate": 1.7533708122537034e-05, "loss": 1.037, "step": 7673 }, { "epoch": 25.160655737704918, "grad_norm": 8.377792358398438, "learning_rate": 1.7533009787492048e-05, "loss": 1.1123, "step": 7674 }, { "epoch": 25.16393442622951, "grad_norm": 7.52640962600708, "learning_rate": 1.7532311367504548e-05, "loss": 1.1058, "step": 7675 }, { "epoch": 25.167213114754098, "grad_norm": 7.675042629241943, "learning_rate": 1.75316128625824e-05, "loss": 0.9857, "step": 7676 }, { "epoch": 25.17049180327869, "grad_norm": 11.00903034210205, "learning_rate": 1.7530914272733493e-05, "loss": 1.1227, "step": 7677 }, { "epoch": 25.17377049180328, "grad_norm": 9.964029312133789, "learning_rate": 1.7530215597965692e-05, "loss": 1.111, "step": 7678 }, { "epoch": 25.17704918032787, "grad_norm": 5.255417823791504, "learning_rate": 1.752951683828688e-05, "loss": 1.2472, "step": 7679 }, { "epoch": 25.18032786885246, "grad_norm": 7.146359920501709, "learning_rate": 1.7528817993704942e-05, "loss": 1.0559, "step": 7680 }, { "epoch": 25.18360655737705, "grad_norm": 10.236071586608887, "learning_rate": 1.752811906422775e-05, "loss": 1.179, "step": 7681 }, { "epoch": 25.18688524590164, "grad_norm": 14.374273300170898, "learning_rate": 1.7527420049863192e-05, "loss": 0.9592, "step": 7682 }, { "epoch": 25.19016393442623, "grad_norm": 7.509006023406982, "learning_rate": 1.752672095061914e-05, "loss": 1.1067, "step": 7683 }, { "epoch": 25.19344262295082, "grad_norm": 10.007627487182617, "learning_rate": 1.7526021766503487e-05, "loss": 1.2437, "step": 7684 }, { "epoch": 25.19672131147541, "grad_norm": 7.662177562713623, "learning_rate": 1.7525322497524114e-05, "loss": 1.2079, "step": 7685 }, { "epoch": 25.2, "grad_norm": 8.412443161010742, "learning_rate": 1.7524623143688905e-05, "loss": 1.2649, "step": 7686 }, { "epoch": 25.20327868852459, "grad_norm": 7.917557239532471, "learning_rate": 1.7523923705005742e-05, "loss": 1.0855, "step": 7687 }, { "epoch": 25.20655737704918, "grad_norm": 9.438036918640137, "learning_rate": 1.7523224181482522e-05, "loss": 1.283, "step": 7688 }, { "epoch": 25.20983606557377, "grad_norm": 9.684894561767578, "learning_rate": 1.7522524573127127e-05, "loss": 1.2473, "step": 7689 }, { "epoch": 25.21311475409836, "grad_norm": 6.5685343742370605, "learning_rate": 1.7521824879947446e-05, "loss": 1.3167, "step": 7690 }, { "epoch": 25.21639344262295, "grad_norm": 7.1631646156311035, "learning_rate": 1.7521125101951365e-05, "loss": 1.1824, "step": 7691 }, { "epoch": 25.21967213114754, "grad_norm": 26.42700958251953, "learning_rate": 1.752042523914678e-05, "loss": 1.2939, "step": 7692 }, { "epoch": 25.222950819672132, "grad_norm": 7.305550575256348, "learning_rate": 1.7519725291541584e-05, "loss": 1.3762, "step": 7693 }, { "epoch": 25.22622950819672, "grad_norm": 6.820261001586914, "learning_rate": 1.7519025259143663e-05, "loss": 1.1732, "step": 7694 }, { "epoch": 25.229508196721312, "grad_norm": 8.891149520874023, "learning_rate": 1.7518325141960916e-05, "loss": 1.0182, "step": 7695 }, { "epoch": 25.2327868852459, "grad_norm": 7.678470134735107, "learning_rate": 1.7517624940001236e-05, "loss": 1.1903, "step": 7696 }, { "epoch": 25.236065573770492, "grad_norm": 6.591206073760986, "learning_rate": 1.7516924653272518e-05, "loss": 0.9954, "step": 7697 }, { "epoch": 25.23934426229508, "grad_norm": 10.494763374328613, "learning_rate": 1.751622428178266e-05, "loss": 1.252, "step": 7698 }, { "epoch": 25.242622950819673, "grad_norm": 8.254827499389648, "learning_rate": 1.751552382553956e-05, "loss": 1.2488, "step": 7699 }, { "epoch": 25.24590163934426, "grad_norm": 6.460284233093262, "learning_rate": 1.751482328455111e-05, "loss": 1.2332, "step": 7700 }, { "epoch": 25.249180327868853, "grad_norm": 10.02614974975586, "learning_rate": 1.7514122658825217e-05, "loss": 0.8317, "step": 7701 }, { "epoch": 25.25245901639344, "grad_norm": 9.118891716003418, "learning_rate": 1.7513421948369775e-05, "loss": 1.1879, "step": 7702 }, { "epoch": 25.255737704918033, "grad_norm": 6.6318745613098145, "learning_rate": 1.751272115319269e-05, "loss": 1.2299, "step": 7703 }, { "epoch": 25.25901639344262, "grad_norm": 9.226394653320312, "learning_rate": 1.7512020273301868e-05, "loss": 1.1096, "step": 7704 }, { "epoch": 25.262295081967213, "grad_norm": 9.310705184936523, "learning_rate": 1.7511319308705198e-05, "loss": 1.087, "step": 7705 }, { "epoch": 25.2655737704918, "grad_norm": 19.731943130493164, "learning_rate": 1.75106182594106e-05, "loss": 1.3032, "step": 7706 }, { "epoch": 25.268852459016394, "grad_norm": 10.940019607543945, "learning_rate": 1.750991712542597e-05, "loss": 1.037, "step": 7707 }, { "epoch": 25.272131147540982, "grad_norm": 11.370244026184082, "learning_rate": 1.7509215906759215e-05, "loss": 1.2212, "step": 7708 }, { "epoch": 25.275409836065574, "grad_norm": 9.226962089538574, "learning_rate": 1.7508514603418244e-05, "loss": 1.0461, "step": 7709 }, { "epoch": 25.278688524590162, "grad_norm": 10.912370681762695, "learning_rate": 1.7507813215410966e-05, "loss": 1.0703, "step": 7710 }, { "epoch": 25.281967213114754, "grad_norm": 24.59866714477539, "learning_rate": 1.7507111742745282e-05, "loss": 1.0238, "step": 7711 }, { "epoch": 25.285245901639342, "grad_norm": 16.270082473754883, "learning_rate": 1.7506410185429112e-05, "loss": 1.3237, "step": 7712 }, { "epoch": 25.288524590163934, "grad_norm": 12.437858581542969, "learning_rate": 1.7505708543470362e-05, "loss": 0.9597, "step": 7713 }, { "epoch": 25.291803278688526, "grad_norm": 8.27236557006836, "learning_rate": 1.7505006816876944e-05, "loss": 1.0913, "step": 7714 }, { "epoch": 25.295081967213115, "grad_norm": 8.292820930480957, "learning_rate": 1.7504305005656772e-05, "loss": 1.0134, "step": 7715 }, { "epoch": 25.298360655737707, "grad_norm": 7.968771934509277, "learning_rate": 1.750360310981776e-05, "loss": 0.949, "step": 7716 }, { "epoch": 25.301639344262295, "grad_norm": 7.816679000854492, "learning_rate": 1.7502901129367814e-05, "loss": 1.0751, "step": 7717 }, { "epoch": 25.304918032786887, "grad_norm": 9.286224365234375, "learning_rate": 1.750219906431486e-05, "loss": 1.1521, "step": 7718 }, { "epoch": 25.308196721311475, "grad_norm": 6.807562351226807, "learning_rate": 1.7501496914666814e-05, "loss": 1.0603, "step": 7719 }, { "epoch": 25.311475409836067, "grad_norm": 9.19538402557373, "learning_rate": 1.750079468043159e-05, "loss": 1.0924, "step": 7720 }, { "epoch": 25.314754098360655, "grad_norm": 7.797105312347412, "learning_rate": 1.7500092361617105e-05, "loss": 1.0959, "step": 7721 }, { "epoch": 25.318032786885247, "grad_norm": 7.304067611694336, "learning_rate": 1.7499389958231284e-05, "loss": 1.2477, "step": 7722 }, { "epoch": 25.321311475409836, "grad_norm": 13.231961250305176, "learning_rate": 1.749868747028204e-05, "loss": 1.1595, "step": 7723 }, { "epoch": 25.324590163934428, "grad_norm": 6.91468620300293, "learning_rate": 1.74979848977773e-05, "loss": 1.1721, "step": 7724 }, { "epoch": 25.327868852459016, "grad_norm": 6.519208908081055, "learning_rate": 1.749728224072498e-05, "loss": 1.0355, "step": 7725 }, { "epoch": 25.331147540983608, "grad_norm": 7.6250715255737305, "learning_rate": 1.7496579499133016e-05, "loss": 1.1542, "step": 7726 }, { "epoch": 25.334426229508196, "grad_norm": 7.978590488433838, "learning_rate": 1.7495876673009314e-05, "loss": 1.0291, "step": 7727 }, { "epoch": 25.337704918032788, "grad_norm": 7.788833141326904, "learning_rate": 1.7495173762361817e-05, "loss": 1.2461, "step": 7728 }, { "epoch": 25.340983606557376, "grad_norm": 6.928246021270752, "learning_rate": 1.749447076719844e-05, "loss": 1.3579, "step": 7729 }, { "epoch": 25.34426229508197, "grad_norm": 22.931909561157227, "learning_rate": 1.749376768752711e-05, "loss": 1.1787, "step": 7730 }, { "epoch": 25.347540983606557, "grad_norm": 18.928489685058594, "learning_rate": 1.749306452335576e-05, "loss": 1.3337, "step": 7731 }, { "epoch": 25.35081967213115, "grad_norm": 7.1083292961120605, "learning_rate": 1.749236127469232e-05, "loss": 1.1887, "step": 7732 }, { "epoch": 25.354098360655737, "grad_norm": 14.788049697875977, "learning_rate": 1.7491657941544714e-05, "loss": 1.2179, "step": 7733 }, { "epoch": 25.35737704918033, "grad_norm": 7.368479251861572, "learning_rate": 1.7490954523920872e-05, "loss": 1.3203, "step": 7734 }, { "epoch": 25.360655737704917, "grad_norm": 10.678389549255371, "learning_rate": 1.7490251021828734e-05, "loss": 1.2838, "step": 7735 }, { "epoch": 25.36393442622951, "grad_norm": 31.723283767700195, "learning_rate": 1.7489547435276223e-05, "loss": 1.123, "step": 7736 }, { "epoch": 25.367213114754097, "grad_norm": 8.2265043258667, "learning_rate": 1.748884376427128e-05, "loss": 1.2833, "step": 7737 }, { "epoch": 25.37049180327869, "grad_norm": 8.255866050720215, "learning_rate": 1.7488140008821837e-05, "loss": 1.0154, "step": 7738 }, { "epoch": 25.373770491803278, "grad_norm": 9.980694770812988, "learning_rate": 1.7487436168935832e-05, "loss": 1.1772, "step": 7739 }, { "epoch": 25.37704918032787, "grad_norm": 7.962507247924805, "learning_rate": 1.7486732244621195e-05, "loss": 1.1185, "step": 7740 }, { "epoch": 25.380327868852458, "grad_norm": 13.678108215332031, "learning_rate": 1.748602823588587e-05, "loss": 1.0686, "step": 7741 }, { "epoch": 25.38360655737705, "grad_norm": 8.735373497009277, "learning_rate": 1.7485324142737793e-05, "loss": 1.2168, "step": 7742 }, { "epoch": 25.386885245901638, "grad_norm": 8.986571311950684, "learning_rate": 1.7484619965184903e-05, "loss": 1.1825, "step": 7743 }, { "epoch": 25.39016393442623, "grad_norm": 9.124197006225586, "learning_rate": 1.748391570323514e-05, "loss": 0.9788, "step": 7744 }, { "epoch": 25.39344262295082, "grad_norm": 9.59714412689209, "learning_rate": 1.7483211356896447e-05, "loss": 1.115, "step": 7745 }, { "epoch": 25.39672131147541, "grad_norm": 10.189932823181152, "learning_rate": 1.748250692617677e-05, "loss": 1.2087, "step": 7746 }, { "epoch": 25.4, "grad_norm": 12.056053161621094, "learning_rate": 1.748180241108404e-05, "loss": 1.1686, "step": 7747 }, { "epoch": 25.40327868852459, "grad_norm": 7.46405553817749, "learning_rate": 1.7481097811626214e-05, "loss": 1.3943, "step": 7748 }, { "epoch": 25.40655737704918, "grad_norm": 6.71895694732666, "learning_rate": 1.7480393127811228e-05, "loss": 1.2612, "step": 7749 }, { "epoch": 25.40983606557377, "grad_norm": 10.164816856384277, "learning_rate": 1.7479688359647037e-05, "loss": 1.4744, "step": 7750 }, { "epoch": 25.41311475409836, "grad_norm": 7.448090553283691, "learning_rate": 1.7478983507141577e-05, "loss": 0.9995, "step": 7751 }, { "epoch": 25.41639344262295, "grad_norm": 7.901515960693359, "learning_rate": 1.7478278570302802e-05, "loss": 1.2976, "step": 7752 }, { "epoch": 25.41967213114754, "grad_norm": 14.584538459777832, "learning_rate": 1.7477573549138666e-05, "loss": 1.1678, "step": 7753 }, { "epoch": 25.42295081967213, "grad_norm": 9.780791282653809, "learning_rate": 1.747686844365711e-05, "loss": 1.1775, "step": 7754 }, { "epoch": 25.42622950819672, "grad_norm": 10.132853507995605, "learning_rate": 1.7476163253866087e-05, "loss": 1.0295, "step": 7755 }, { "epoch": 25.42950819672131, "grad_norm": 24.581315994262695, "learning_rate": 1.7475457979773552e-05, "loss": 1.1353, "step": 7756 }, { "epoch": 25.432786885245903, "grad_norm": 9.776805877685547, "learning_rate": 1.7474752621387456e-05, "loss": 1.0416, "step": 7757 }, { "epoch": 25.43606557377049, "grad_norm": 7.545591354370117, "learning_rate": 1.747404717871575e-05, "loss": 1.145, "step": 7758 }, { "epoch": 25.439344262295084, "grad_norm": 8.56039047241211, "learning_rate": 1.7473341651766394e-05, "loss": 1.0208, "step": 7759 }, { "epoch": 25.442622950819672, "grad_norm": 7.381712436676025, "learning_rate": 1.747263604054734e-05, "loss": 1.2737, "step": 7760 }, { "epoch": 25.445901639344264, "grad_norm": 8.128575325012207, "learning_rate": 1.7471930345066542e-05, "loss": 1.1409, "step": 7761 }, { "epoch": 25.449180327868852, "grad_norm": 8.59476375579834, "learning_rate": 1.7471224565331966e-05, "loss": 1.345, "step": 7762 }, { "epoch": 25.452459016393444, "grad_norm": 10.805184364318848, "learning_rate": 1.747051870135156e-05, "loss": 1.2051, "step": 7763 }, { "epoch": 25.455737704918032, "grad_norm": 9.357346534729004, "learning_rate": 1.746981275313329e-05, "loss": 1.109, "step": 7764 }, { "epoch": 25.459016393442624, "grad_norm": 9.217819213867188, "learning_rate": 1.7469106720685113e-05, "loss": 1.2642, "step": 7765 }, { "epoch": 25.462295081967213, "grad_norm": 11.083588600158691, "learning_rate": 1.7468400604014997e-05, "loss": 1.2866, "step": 7766 }, { "epoch": 25.465573770491805, "grad_norm": 10.310277938842773, "learning_rate": 1.7467694403130893e-05, "loss": 1.2754, "step": 7767 }, { "epoch": 25.468852459016393, "grad_norm": 6.30837869644165, "learning_rate": 1.7466988118040775e-05, "loss": 1.2625, "step": 7768 }, { "epoch": 25.472131147540985, "grad_norm": 8.582866668701172, "learning_rate": 1.74662817487526e-05, "loss": 1.1406, "step": 7769 }, { "epoch": 25.475409836065573, "grad_norm": 8.42650032043457, "learning_rate": 1.7465575295274333e-05, "loss": 1.1403, "step": 7770 }, { "epoch": 25.478688524590165, "grad_norm": 12.582891464233398, "learning_rate": 1.7464868757613948e-05, "loss": 1.3549, "step": 7771 }, { "epoch": 25.481967213114753, "grad_norm": 16.32135009765625, "learning_rate": 1.74641621357794e-05, "loss": 1.3313, "step": 7772 }, { "epoch": 25.485245901639345, "grad_norm": 12.41606616973877, "learning_rate": 1.7463455429778666e-05, "loss": 1.2754, "step": 7773 }, { "epoch": 25.488524590163934, "grad_norm": 9.2321195602417, "learning_rate": 1.746274863961971e-05, "loss": 1.0588, "step": 7774 }, { "epoch": 25.491803278688526, "grad_norm": 10.129035949707031, "learning_rate": 1.746204176531051e-05, "loss": 1.1826, "step": 7775 }, { "epoch": 25.495081967213114, "grad_norm": 8.792076110839844, "learning_rate": 1.7461334806859023e-05, "loss": 1.0745, "step": 7776 }, { "epoch": 25.498360655737706, "grad_norm": 8.404053688049316, "learning_rate": 1.746062776427323e-05, "loss": 1.2548, "step": 7777 }, { "epoch": 25.501639344262294, "grad_norm": 11.522366523742676, "learning_rate": 1.7459920637561107e-05, "loss": 1.1825, "step": 7778 }, { "epoch": 25.504918032786886, "grad_norm": 14.88731575012207, "learning_rate": 1.7459213426730617e-05, "loss": 1.1375, "step": 7779 }, { "epoch": 25.508196721311474, "grad_norm": 8.412707328796387, "learning_rate": 1.745850613178974e-05, "loss": 1.1333, "step": 7780 }, { "epoch": 25.511475409836066, "grad_norm": 8.436331748962402, "learning_rate": 1.7457798752746453e-05, "loss": 1.1602, "step": 7781 }, { "epoch": 25.514754098360655, "grad_norm": 10.324048042297363, "learning_rate": 1.745709128960873e-05, "loss": 1.142, "step": 7782 }, { "epoch": 25.518032786885247, "grad_norm": 9.357919692993164, "learning_rate": 1.7456383742384552e-05, "loss": 0.8668, "step": 7783 }, { "epoch": 25.521311475409835, "grad_norm": 8.13143253326416, "learning_rate": 1.7455676111081894e-05, "loss": 1.1443, "step": 7784 }, { "epoch": 25.524590163934427, "grad_norm": 8.849763870239258, "learning_rate": 1.7454968395708735e-05, "loss": 1.1171, "step": 7785 }, { "epoch": 25.527868852459015, "grad_norm": 9.855085372924805, "learning_rate": 1.7454260596273055e-05, "loss": 1.0277, "step": 7786 }, { "epoch": 25.531147540983607, "grad_norm": 10.147029876708984, "learning_rate": 1.7453552712782837e-05, "loss": 1.1294, "step": 7787 }, { "epoch": 25.534426229508195, "grad_norm": 8.614059448242188, "learning_rate": 1.7452844745246062e-05, "loss": 1.012, "step": 7788 }, { "epoch": 25.537704918032787, "grad_norm": 7.646617412567139, "learning_rate": 1.7452136693670714e-05, "loss": 0.8467, "step": 7789 }, { "epoch": 25.540983606557376, "grad_norm": 8.519281387329102, "learning_rate": 1.7451428558064778e-05, "loss": 1.1517, "step": 7790 }, { "epoch": 25.544262295081968, "grad_norm": 8.608692169189453, "learning_rate": 1.7450720338436236e-05, "loss": 0.9556, "step": 7791 }, { "epoch": 25.547540983606556, "grad_norm": 9.583911895751953, "learning_rate": 1.7450012034793074e-05, "loss": 1.1265, "step": 7792 }, { "epoch": 25.550819672131148, "grad_norm": 8.287041664123535, "learning_rate": 1.744930364714328e-05, "loss": 1.1521, "step": 7793 }, { "epoch": 25.554098360655736, "grad_norm": 19.392719268798828, "learning_rate": 1.7448595175494846e-05, "loss": 1.2869, "step": 7794 }, { "epoch": 25.557377049180328, "grad_norm": 9.09469223022461, "learning_rate": 1.744788661985575e-05, "loss": 1.3809, "step": 7795 }, { "epoch": 25.560655737704916, "grad_norm": 9.079522132873535, "learning_rate": 1.7447177980233996e-05, "loss": 1.0751, "step": 7796 }, { "epoch": 25.56393442622951, "grad_norm": 8.350625991821289, "learning_rate": 1.7446469256637564e-05, "loss": 0.944, "step": 7797 }, { "epoch": 25.567213114754097, "grad_norm": 8.268899917602539, "learning_rate": 1.7445760449074448e-05, "loss": 1.114, "step": 7798 }, { "epoch": 25.57049180327869, "grad_norm": 9.65589714050293, "learning_rate": 1.744505155755264e-05, "loss": 1.5551, "step": 7799 }, { "epoch": 25.57377049180328, "grad_norm": 8.457277297973633, "learning_rate": 1.7444342582080137e-05, "loss": 1.2496, "step": 7800 }, { "epoch": 25.57704918032787, "grad_norm": 8.590764999389648, "learning_rate": 1.7443633522664933e-05, "loss": 1.0892, "step": 7801 }, { "epoch": 25.58032786885246, "grad_norm": 8.751941680908203, "learning_rate": 1.744292437931502e-05, "loss": 1.2629, "step": 7802 }, { "epoch": 25.58360655737705, "grad_norm": 31.583391189575195, "learning_rate": 1.7442215152038397e-05, "loss": 1.4071, "step": 7803 }, { "epoch": 25.58688524590164, "grad_norm": 7.2659759521484375, "learning_rate": 1.7441505840843057e-05, "loss": 1.135, "step": 7804 }, { "epoch": 25.59016393442623, "grad_norm": 7.684582710266113, "learning_rate": 1.7440796445737004e-05, "loss": 1.3762, "step": 7805 }, { "epoch": 25.59344262295082, "grad_norm": 7.020382404327393, "learning_rate": 1.7440086966728235e-05, "loss": 1.2911, "step": 7806 }, { "epoch": 25.59672131147541, "grad_norm": 8.649347305297852, "learning_rate": 1.7439377403824748e-05, "loss": 1.2798, "step": 7807 }, { "epoch": 25.6, "grad_norm": 8.386260032653809, "learning_rate": 1.7438667757034547e-05, "loss": 1.0321, "step": 7808 }, { "epoch": 25.60327868852459, "grad_norm": 9.856185913085938, "learning_rate": 1.743795802636563e-05, "loss": 0.9986, "step": 7809 }, { "epoch": 25.60655737704918, "grad_norm": 7.563595294952393, "learning_rate": 1.7437248211826007e-05, "loss": 1.174, "step": 7810 }, { "epoch": 25.60983606557377, "grad_norm": 8.513736724853516, "learning_rate": 1.7436538313423673e-05, "loss": 1.1963, "step": 7811 }, { "epoch": 25.613114754098362, "grad_norm": 5.865447521209717, "learning_rate": 1.743582833116664e-05, "loss": 1.3328, "step": 7812 }, { "epoch": 25.61639344262295, "grad_norm": 7.825433254241943, "learning_rate": 1.7435118265062912e-05, "loss": 1.1937, "step": 7813 }, { "epoch": 25.619672131147542, "grad_norm": 13.992701530456543, "learning_rate": 1.7434408115120494e-05, "loss": 0.9869, "step": 7814 }, { "epoch": 25.62295081967213, "grad_norm": 7.385585784912109, "learning_rate": 1.7433697881347394e-05, "loss": 1.1998, "step": 7815 }, { "epoch": 25.626229508196722, "grad_norm": 7.956852912902832, "learning_rate": 1.7432987563751623e-05, "loss": 1.168, "step": 7816 }, { "epoch": 25.62950819672131, "grad_norm": 7.804642200469971, "learning_rate": 1.7432277162341186e-05, "loss": 1.3223, "step": 7817 }, { "epoch": 25.632786885245903, "grad_norm": 8.407990455627441, "learning_rate": 1.74315666771241e-05, "loss": 1.0657, "step": 7818 }, { "epoch": 25.63606557377049, "grad_norm": 9.933746337890625, "learning_rate": 1.743085610810837e-05, "loss": 1.2482, "step": 7819 }, { "epoch": 25.639344262295083, "grad_norm": 8.819634437561035, "learning_rate": 1.7430145455302013e-05, "loss": 1.2434, "step": 7820 }, { "epoch": 25.64262295081967, "grad_norm": 7.355724334716797, "learning_rate": 1.742943471871304e-05, "loss": 1.28, "step": 7821 }, { "epoch": 25.645901639344263, "grad_norm": 9.74036693572998, "learning_rate": 1.7428723898349464e-05, "loss": 0.9381, "step": 7822 }, { "epoch": 25.64918032786885, "grad_norm": 6.749017238616943, "learning_rate": 1.7428012994219304e-05, "loss": 1.2253, "step": 7823 }, { "epoch": 25.652459016393443, "grad_norm": 7.655999183654785, "learning_rate": 1.7427302006330572e-05, "loss": 1.0599, "step": 7824 }, { "epoch": 25.65573770491803, "grad_norm": 8.69649887084961, "learning_rate": 1.7426590934691292e-05, "loss": 1.2245, "step": 7825 }, { "epoch": 25.659016393442624, "grad_norm": 10.37717056274414, "learning_rate": 1.7425879779309473e-05, "loss": 1.2133, "step": 7826 }, { "epoch": 25.662295081967212, "grad_norm": 10.556108474731445, "learning_rate": 1.7425168540193144e-05, "loss": 1.2378, "step": 7827 }, { "epoch": 25.665573770491804, "grad_norm": 8.05792236328125, "learning_rate": 1.7424457217350316e-05, "loss": 1.3545, "step": 7828 }, { "epoch": 25.668852459016392, "grad_norm": 6.940547466278076, "learning_rate": 1.742374581078901e-05, "loss": 1.3333, "step": 7829 }, { "epoch": 25.672131147540984, "grad_norm": 8.170183181762695, "learning_rate": 1.7423034320517256e-05, "loss": 0.981, "step": 7830 }, { "epoch": 25.675409836065572, "grad_norm": 12.666655540466309, "learning_rate": 1.742232274654307e-05, "loss": 1.1665, "step": 7831 }, { "epoch": 25.678688524590164, "grad_norm": 10.4921293258667, "learning_rate": 1.742161108887448e-05, "loss": 1.2058, "step": 7832 }, { "epoch": 25.681967213114753, "grad_norm": 7.042827606201172, "learning_rate": 1.7420899347519504e-05, "loss": 1.3744, "step": 7833 }, { "epoch": 25.685245901639345, "grad_norm": 17.60443878173828, "learning_rate": 1.7420187522486176e-05, "loss": 0.9484, "step": 7834 }, { "epoch": 25.688524590163933, "grad_norm": 6.923544883728027, "learning_rate": 1.7419475613782516e-05, "loss": 1.2684, "step": 7835 }, { "epoch": 25.691803278688525, "grad_norm": 7.690544605255127, "learning_rate": 1.7418763621416556e-05, "loss": 1.0249, "step": 7836 }, { "epoch": 25.695081967213113, "grad_norm": 7.940443992614746, "learning_rate": 1.7418051545396323e-05, "loss": 1.1528, "step": 7837 }, { "epoch": 25.698360655737705, "grad_norm": 8.139277458190918, "learning_rate": 1.7417339385729846e-05, "loss": 1.2042, "step": 7838 }, { "epoch": 25.701639344262293, "grad_norm": 8.668038368225098, "learning_rate": 1.7416627142425154e-05, "loss": 1.0826, "step": 7839 }, { "epoch": 25.704918032786885, "grad_norm": 9.08133602142334, "learning_rate": 1.741591481549028e-05, "loss": 1.2286, "step": 7840 }, { "epoch": 25.708196721311474, "grad_norm": 8.945905685424805, "learning_rate": 1.7415202404933256e-05, "loss": 1.1638, "step": 7841 }, { "epoch": 25.711475409836066, "grad_norm": 9.051992416381836, "learning_rate": 1.7414489910762114e-05, "loss": 1.1133, "step": 7842 }, { "epoch": 25.714754098360658, "grad_norm": 14.411506652832031, "learning_rate": 1.741377733298489e-05, "loss": 1.3298, "step": 7843 }, { "epoch": 25.718032786885246, "grad_norm": 8.900948524475098, "learning_rate": 1.7413064671609618e-05, "loss": 1.4036, "step": 7844 }, { "epoch": 25.721311475409838, "grad_norm": 9.436444282531738, "learning_rate": 1.7412351926644336e-05, "loss": 1.1505, "step": 7845 }, { "epoch": 25.724590163934426, "grad_norm": 11.044333457946777, "learning_rate": 1.7411639098097076e-05, "loss": 1.2432, "step": 7846 }, { "epoch": 25.727868852459018, "grad_norm": 8.528478622436523, "learning_rate": 1.741092618597588e-05, "loss": 1.0828, "step": 7847 }, { "epoch": 25.731147540983606, "grad_norm": 17.72016143798828, "learning_rate": 1.7410213190288788e-05, "loss": 1.0633, "step": 7848 }, { "epoch": 25.7344262295082, "grad_norm": 23.445125579833984, "learning_rate": 1.7409500111043834e-05, "loss": 1.0594, "step": 7849 }, { "epoch": 25.737704918032787, "grad_norm": 8.61736011505127, "learning_rate": 1.7408786948249065e-05, "loss": 1.0577, "step": 7850 }, { "epoch": 25.74098360655738, "grad_norm": 12.685980796813965, "learning_rate": 1.740807370191252e-05, "loss": 1.1226, "step": 7851 }, { "epoch": 25.744262295081967, "grad_norm": 42.1259651184082, "learning_rate": 1.740736037204224e-05, "loss": 1.1499, "step": 7852 }, { "epoch": 25.74754098360656, "grad_norm": 8.57598876953125, "learning_rate": 1.740664695864627e-05, "loss": 1.2595, "step": 7853 }, { "epoch": 25.750819672131147, "grad_norm": 9.748259544372559, "learning_rate": 1.7405933461732658e-05, "loss": 1.2012, "step": 7854 }, { "epoch": 25.75409836065574, "grad_norm": 9.599103927612305, "learning_rate": 1.740521988130944e-05, "loss": 1.004, "step": 7855 }, { "epoch": 25.757377049180327, "grad_norm": 9.710850715637207, "learning_rate": 1.7404506217384672e-05, "loss": 1.291, "step": 7856 }, { "epoch": 25.76065573770492, "grad_norm": 9.291095733642578, "learning_rate": 1.7403792469966397e-05, "loss": 1.2344, "step": 7857 }, { "epoch": 25.763934426229508, "grad_norm": 12.832453727722168, "learning_rate": 1.7403078639062668e-05, "loss": 1.0715, "step": 7858 }, { "epoch": 25.7672131147541, "grad_norm": 9.10926342010498, "learning_rate": 1.7402364724681524e-05, "loss": 1.2297, "step": 7859 }, { "epoch": 25.770491803278688, "grad_norm": 8.801647186279297, "learning_rate": 1.7401650726831023e-05, "loss": 1.3181, "step": 7860 }, { "epoch": 25.77377049180328, "grad_norm": 17.52680778503418, "learning_rate": 1.7400936645519215e-05, "loss": 1.1125, "step": 7861 }, { "epoch": 25.777049180327868, "grad_norm": 8.40669059753418, "learning_rate": 1.7400222480754152e-05, "loss": 1.3711, "step": 7862 }, { "epoch": 25.78032786885246, "grad_norm": 7.6108317375183105, "learning_rate": 1.7399508232543883e-05, "loss": 1.0479, "step": 7863 }, { "epoch": 25.78360655737705, "grad_norm": 10.425859451293945, "learning_rate": 1.7398793900896468e-05, "loss": 1.1531, "step": 7864 }, { "epoch": 25.78688524590164, "grad_norm": 8.813886642456055, "learning_rate": 1.739807948581996e-05, "loss": 1.2479, "step": 7865 }, { "epoch": 25.79016393442623, "grad_norm": 7.0056352615356445, "learning_rate": 1.7397364987322412e-05, "loss": 1.2845, "step": 7866 }, { "epoch": 25.79344262295082, "grad_norm": 6.412454605102539, "learning_rate": 1.7396650405411882e-05, "loss": 1.3077, "step": 7867 }, { "epoch": 25.79672131147541, "grad_norm": 18.594097137451172, "learning_rate": 1.7395935740096426e-05, "loss": 1.4265, "step": 7868 }, { "epoch": 25.8, "grad_norm": 12.003809928894043, "learning_rate": 1.739522099138411e-05, "loss": 1.0807, "step": 7869 }, { "epoch": 25.80327868852459, "grad_norm": 8.782392501831055, "learning_rate": 1.7394506159282984e-05, "loss": 1.1927, "step": 7870 }, { "epoch": 25.80655737704918, "grad_norm": 8.857528686523438, "learning_rate": 1.7393791243801115e-05, "loss": 1.259, "step": 7871 }, { "epoch": 25.80983606557377, "grad_norm": 10.127907752990723, "learning_rate": 1.7393076244946562e-05, "loss": 1.0603, "step": 7872 }, { "epoch": 25.81311475409836, "grad_norm": 10.46725845336914, "learning_rate": 1.7392361162727385e-05, "loss": 1.2466, "step": 7873 }, { "epoch": 25.81639344262295, "grad_norm": 10.06640911102295, "learning_rate": 1.7391645997151652e-05, "loss": 1.2449, "step": 7874 }, { "epoch": 25.81967213114754, "grad_norm": 8.849757194519043, "learning_rate": 1.7390930748227423e-05, "loss": 1.0304, "step": 7875 }, { "epoch": 25.82295081967213, "grad_norm": 6.9491448402404785, "learning_rate": 1.739021541596277e-05, "loss": 1.3002, "step": 7876 }, { "epoch": 25.82622950819672, "grad_norm": 9.310403823852539, "learning_rate": 1.738950000036575e-05, "loss": 1.2603, "step": 7877 }, { "epoch": 25.82950819672131, "grad_norm": 14.189422607421875, "learning_rate": 1.7388784501444435e-05, "loss": 1.1401, "step": 7878 }, { "epoch": 25.832786885245902, "grad_norm": 9.989075660705566, "learning_rate": 1.7388068919206893e-05, "loss": 1.1936, "step": 7879 }, { "epoch": 25.83606557377049, "grad_norm": 9.682625770568848, "learning_rate": 1.738735325366119e-05, "loss": 0.9946, "step": 7880 }, { "epoch": 25.839344262295082, "grad_norm": 8.579397201538086, "learning_rate": 1.73866375048154e-05, "loss": 1.1816, "step": 7881 }, { "epoch": 25.84262295081967, "grad_norm": 10.770938873291016, "learning_rate": 1.7385921672677595e-05, "loss": 0.905, "step": 7882 }, { "epoch": 25.845901639344262, "grad_norm": 9.301168441772461, "learning_rate": 1.738520575725584e-05, "loss": 1.1963, "step": 7883 }, { "epoch": 25.84918032786885, "grad_norm": 12.34523868560791, "learning_rate": 1.738448975855821e-05, "loss": 1.0472, "step": 7884 }, { "epoch": 25.852459016393443, "grad_norm": 14.015814781188965, "learning_rate": 1.7383773676592782e-05, "loss": 1.2288, "step": 7885 }, { "epoch": 25.855737704918035, "grad_norm": 7.508119583129883, "learning_rate": 1.7383057511367633e-05, "loss": 1.0006, "step": 7886 }, { "epoch": 25.859016393442623, "grad_norm": 10.363926887512207, "learning_rate": 1.7382341262890827e-05, "loss": 1.022, "step": 7887 }, { "epoch": 25.862295081967215, "grad_norm": 9.385868072509766, "learning_rate": 1.738162493117045e-05, "loss": 1.0988, "step": 7888 }, { "epoch": 25.865573770491803, "grad_norm": 8.003314971923828, "learning_rate": 1.7380908516214577e-05, "loss": 1.1511, "step": 7889 }, { "epoch": 25.868852459016395, "grad_norm": 8.027520179748535, "learning_rate": 1.7380192018031287e-05, "loss": 1.4475, "step": 7890 }, { "epoch": 25.872131147540983, "grad_norm": 8.65847396850586, "learning_rate": 1.7379475436628656e-05, "loss": 1.2748, "step": 7891 }, { "epoch": 25.875409836065575, "grad_norm": 9.862615585327148, "learning_rate": 1.7378758772014772e-05, "loss": 1.2559, "step": 7892 }, { "epoch": 25.878688524590164, "grad_norm": 8.173262596130371, "learning_rate": 1.7378042024197705e-05, "loss": 1.0369, "step": 7893 }, { "epoch": 25.881967213114756, "grad_norm": 12.109822273254395, "learning_rate": 1.7377325193185547e-05, "loss": 1.0217, "step": 7894 }, { "epoch": 25.885245901639344, "grad_norm": 11.545575141906738, "learning_rate": 1.7376608278986375e-05, "loss": 1.4653, "step": 7895 }, { "epoch": 25.888524590163936, "grad_norm": 33.143192291259766, "learning_rate": 1.7375891281608276e-05, "loss": 1.1198, "step": 7896 }, { "epoch": 25.891803278688524, "grad_norm": 8.051617622375488, "learning_rate": 1.737517420105933e-05, "loss": 1.2942, "step": 7897 }, { "epoch": 25.895081967213116, "grad_norm": 9.799962997436523, "learning_rate": 1.7374457037347634e-05, "loss": 1.219, "step": 7898 }, { "epoch": 25.898360655737704, "grad_norm": 8.67716121673584, "learning_rate": 1.7373739790481263e-05, "loss": 1.4005, "step": 7899 }, { "epoch": 25.901639344262296, "grad_norm": 9.05630111694336, "learning_rate": 1.737302246046831e-05, "loss": 1.1821, "step": 7900 }, { "epoch": 25.904918032786885, "grad_norm": 35.076499938964844, "learning_rate": 1.7372305047316863e-05, "loss": 1.199, "step": 7901 }, { "epoch": 25.908196721311477, "grad_norm": 8.891837120056152, "learning_rate": 1.7371587551035006e-05, "loss": 1.2606, "step": 7902 }, { "epoch": 25.911475409836065, "grad_norm": 6.948458194732666, "learning_rate": 1.7370869971630842e-05, "loss": 1.3045, "step": 7903 }, { "epoch": 25.914754098360657, "grad_norm": 9.298351287841797, "learning_rate": 1.7370152309112454e-05, "loss": 1.269, "step": 7904 }, { "epoch": 25.918032786885245, "grad_norm": 9.125036239624023, "learning_rate": 1.7369434563487933e-05, "loss": 1.2997, "step": 7905 }, { "epoch": 25.921311475409837, "grad_norm": 7.865637302398682, "learning_rate": 1.7368716734765377e-05, "loss": 1.3557, "step": 7906 }, { "epoch": 25.924590163934425, "grad_norm": 16.22856330871582, "learning_rate": 1.7367998822952876e-05, "loss": 1.1943, "step": 7907 }, { "epoch": 25.927868852459017, "grad_norm": 9.966531753540039, "learning_rate": 1.736728082805853e-05, "loss": 1.2466, "step": 7908 }, { "epoch": 25.931147540983606, "grad_norm": 14.007210731506348, "learning_rate": 1.7366562750090433e-05, "loss": 1.4968, "step": 7909 }, { "epoch": 25.934426229508198, "grad_norm": 8.981393814086914, "learning_rate": 1.736584458905668e-05, "loss": 1.0977, "step": 7910 }, { "epoch": 25.937704918032786, "grad_norm": 8.773497581481934, "learning_rate": 1.736512634496537e-05, "loss": 1.1643, "step": 7911 }, { "epoch": 25.940983606557378, "grad_norm": 13.669820785522461, "learning_rate": 1.7364408017824603e-05, "loss": 1.176, "step": 7912 }, { "epoch": 25.944262295081966, "grad_norm": 9.957890510559082, "learning_rate": 1.736368960764248e-05, "loss": 1.0079, "step": 7913 }, { "epoch": 25.947540983606558, "grad_norm": 7.800642967224121, "learning_rate": 1.7362971114427097e-05, "loss": 1.1621, "step": 7914 }, { "epoch": 25.950819672131146, "grad_norm": 8.771172523498535, "learning_rate": 1.736225253818656e-05, "loss": 1.1465, "step": 7915 }, { "epoch": 25.95409836065574, "grad_norm": 9.204360961914062, "learning_rate": 1.7361533878928976e-05, "loss": 1.1938, "step": 7916 }, { "epoch": 25.957377049180327, "grad_norm": 7.359714031219482, "learning_rate": 1.736081513666244e-05, "loss": 1.3232, "step": 7917 }, { "epoch": 25.96065573770492, "grad_norm": 8.708759307861328, "learning_rate": 1.7360096311395057e-05, "loss": 1.1257, "step": 7918 }, { "epoch": 25.963934426229507, "grad_norm": 13.80526351928711, "learning_rate": 1.7359377403134942e-05, "loss": 1.2471, "step": 7919 }, { "epoch": 25.9672131147541, "grad_norm": 8.706913948059082, "learning_rate": 1.735865841189019e-05, "loss": 1.1223, "step": 7920 }, { "epoch": 25.970491803278687, "grad_norm": 31.312116622924805, "learning_rate": 1.7357939337668914e-05, "loss": 1.1125, "step": 7921 }, { "epoch": 25.97377049180328, "grad_norm": 9.234532356262207, "learning_rate": 1.7357220180479223e-05, "loss": 1.145, "step": 7922 }, { "epoch": 25.977049180327867, "grad_norm": 11.423638343811035, "learning_rate": 1.7356500940329224e-05, "loss": 0.9868, "step": 7923 }, { "epoch": 25.98032786885246, "grad_norm": 7.689607620239258, "learning_rate": 1.735578161722703e-05, "loss": 1.3381, "step": 7924 }, { "epoch": 25.983606557377048, "grad_norm": 8.885924339294434, "learning_rate": 1.7355062211180745e-05, "loss": 1.0502, "step": 7925 }, { "epoch": 25.98688524590164, "grad_norm": 10.04664421081543, "learning_rate": 1.735434272219849e-05, "loss": 1.0887, "step": 7926 }, { "epoch": 25.990163934426228, "grad_norm": 7.9582133293151855, "learning_rate": 1.7353623150288374e-05, "loss": 1.0968, "step": 7927 }, { "epoch": 25.99344262295082, "grad_norm": 12.642607688903809, "learning_rate": 1.735290349545851e-05, "loss": 1.2039, "step": 7928 }, { "epoch": 25.99672131147541, "grad_norm": 12.732686996459961, "learning_rate": 1.7352183757717016e-05, "loss": 1.3123, "step": 7929 }, { "epoch": 26.0, "grad_norm": 8.349936485290527, "learning_rate": 1.7351463937072008e-05, "loss": 0.9931, "step": 7930 }, { "epoch": 26.003278688524592, "grad_norm": 7.16350793838501, "learning_rate": 1.7350744033531595e-05, "loss": 1.1448, "step": 7931 }, { "epoch": 26.00655737704918, "grad_norm": 19.579259872436523, "learning_rate": 1.7350024047103903e-05, "loss": 1.332, "step": 7932 }, { "epoch": 26.009836065573772, "grad_norm": 21.002840042114258, "learning_rate": 1.7349303977797048e-05, "loss": 1.1777, "step": 7933 }, { "epoch": 26.01311475409836, "grad_norm": 7.431830883026123, "learning_rate": 1.7348583825619147e-05, "loss": 0.9616, "step": 7934 }, { "epoch": 26.016393442622952, "grad_norm": 8.155316352844238, "learning_rate": 1.7347863590578326e-05, "loss": 0.886, "step": 7935 }, { "epoch": 26.01967213114754, "grad_norm": 8.971091270446777, "learning_rate": 1.7347143272682697e-05, "loss": 1.1011, "step": 7936 }, { "epoch": 26.022950819672133, "grad_norm": 8.222160339355469, "learning_rate": 1.7346422871940392e-05, "loss": 1.0667, "step": 7937 }, { "epoch": 26.02622950819672, "grad_norm": 13.64698600769043, "learning_rate": 1.7345702388359535e-05, "loss": 1.0776, "step": 7938 }, { "epoch": 26.029508196721313, "grad_norm": 10.491945266723633, "learning_rate": 1.734498182194824e-05, "loss": 1.2146, "step": 7939 }, { "epoch": 26.0327868852459, "grad_norm": 8.871785163879395, "learning_rate": 1.7344261172714642e-05, "loss": 1.1287, "step": 7940 }, { "epoch": 26.036065573770493, "grad_norm": 8.10602855682373, "learning_rate": 1.734354044066686e-05, "loss": 1.099, "step": 7941 }, { "epoch": 26.03934426229508, "grad_norm": 6.89113187789917, "learning_rate": 1.734281962581303e-05, "loss": 1.0618, "step": 7942 }, { "epoch": 26.042622950819673, "grad_norm": 7.301488876342773, "learning_rate": 1.734209872816127e-05, "loss": 0.994, "step": 7943 }, { "epoch": 26.04590163934426, "grad_norm": 8.03881549835205, "learning_rate": 1.7341377747719713e-05, "loss": 1.0923, "step": 7944 }, { "epoch": 26.049180327868854, "grad_norm": 7.307190418243408, "learning_rate": 1.7340656684496487e-05, "loss": 1.1499, "step": 7945 }, { "epoch": 26.052459016393442, "grad_norm": 8.397958755493164, "learning_rate": 1.7339935538499725e-05, "loss": 0.8923, "step": 7946 }, { "epoch": 26.055737704918034, "grad_norm": 9.134309768676758, "learning_rate": 1.733921430973756e-05, "loss": 0.8551, "step": 7947 }, { "epoch": 26.059016393442622, "grad_norm": 7.539096355438232, "learning_rate": 1.7338492998218125e-05, "loss": 1.2002, "step": 7948 }, { "epoch": 26.062295081967214, "grad_norm": 8.371731758117676, "learning_rate": 1.7337771603949547e-05, "loss": 1.0076, "step": 7949 }, { "epoch": 26.065573770491802, "grad_norm": 10.242279052734375, "learning_rate": 1.7337050126939966e-05, "loss": 0.9994, "step": 7950 }, { "epoch": 26.068852459016394, "grad_norm": 7.812270164489746, "learning_rate": 1.733632856719752e-05, "loss": 1.1499, "step": 7951 }, { "epoch": 26.072131147540983, "grad_norm": 7.902679443359375, "learning_rate": 1.7335606924730334e-05, "loss": 1.13, "step": 7952 }, { "epoch": 26.075409836065575, "grad_norm": 10.556936264038086, "learning_rate": 1.7334885199546557e-05, "loss": 1.3115, "step": 7953 }, { "epoch": 26.078688524590163, "grad_norm": 8.974616050720215, "learning_rate": 1.7334163391654323e-05, "loss": 1.2382, "step": 7954 }, { "epoch": 26.081967213114755, "grad_norm": 10.184659957885742, "learning_rate": 1.7333441501061772e-05, "loss": 1.1379, "step": 7955 }, { "epoch": 26.085245901639343, "grad_norm": 8.6761474609375, "learning_rate": 1.7332719527777044e-05, "loss": 1.2957, "step": 7956 }, { "epoch": 26.088524590163935, "grad_norm": 9.854816436767578, "learning_rate": 1.7331997471808276e-05, "loss": 1.1136, "step": 7957 }, { "epoch": 26.091803278688523, "grad_norm": 10.600801467895508, "learning_rate": 1.7331275333163614e-05, "loss": 1.1385, "step": 7958 }, { "epoch": 26.095081967213115, "grad_norm": 7.1464996337890625, "learning_rate": 1.73305531118512e-05, "loss": 1.0171, "step": 7959 }, { "epoch": 26.098360655737704, "grad_norm": 14.635032653808594, "learning_rate": 1.732983080787918e-05, "loss": 1.3003, "step": 7960 }, { "epoch": 26.101639344262296, "grad_norm": 7.9747772216796875, "learning_rate": 1.7329108421255694e-05, "loss": 1.0947, "step": 7961 }, { "epoch": 26.104918032786884, "grad_norm": 8.991509437561035, "learning_rate": 1.7328385951988892e-05, "loss": 0.9441, "step": 7962 }, { "epoch": 26.108196721311476, "grad_norm": 10.741771697998047, "learning_rate": 1.7327663400086918e-05, "loss": 1.1414, "step": 7963 }, { "epoch": 26.111475409836064, "grad_norm": 9.018608093261719, "learning_rate": 1.732694076555792e-05, "loss": 1.2493, "step": 7964 }, { "epoch": 26.114754098360656, "grad_norm": 6.469245433807373, "learning_rate": 1.7326218048410047e-05, "loss": 0.9731, "step": 7965 }, { "epoch": 26.118032786885244, "grad_norm": 9.237560272216797, "learning_rate": 1.732549524865145e-05, "loss": 0.9464, "step": 7966 }, { "epoch": 26.121311475409836, "grad_norm": 7.554790496826172, "learning_rate": 1.7324772366290274e-05, "loss": 1.2495, "step": 7967 }, { "epoch": 26.124590163934425, "grad_norm": 8.967986106872559, "learning_rate": 1.7324049401334676e-05, "loss": 0.8961, "step": 7968 }, { "epoch": 26.127868852459017, "grad_norm": 11.518265724182129, "learning_rate": 1.7323326353792806e-05, "loss": 1.2283, "step": 7969 }, { "epoch": 26.131147540983605, "grad_norm": 10.226186752319336, "learning_rate": 1.7322603223672816e-05, "loss": 1.082, "step": 7970 }, { "epoch": 26.134426229508197, "grad_norm": 6.925317764282227, "learning_rate": 1.732188001098286e-05, "loss": 1.1177, "step": 7971 }, { "epoch": 26.137704918032785, "grad_norm": 7.414592742919922, "learning_rate": 1.7321156715731096e-05, "loss": 1.1199, "step": 7972 }, { "epoch": 26.140983606557377, "grad_norm": 8.546889305114746, "learning_rate": 1.7320433337925676e-05, "loss": 1.107, "step": 7973 }, { "epoch": 26.14426229508197, "grad_norm": 9.6443510055542, "learning_rate": 1.731970987757476e-05, "loss": 0.9931, "step": 7974 }, { "epoch": 26.147540983606557, "grad_norm": 10.160064697265625, "learning_rate": 1.7318986334686505e-05, "loss": 0.9219, "step": 7975 }, { "epoch": 26.15081967213115, "grad_norm": 9.307185173034668, "learning_rate": 1.731826270926907e-05, "loss": 1.0511, "step": 7976 }, { "epoch": 26.154098360655738, "grad_norm": 8.575112342834473, "learning_rate": 1.731753900133061e-05, "loss": 0.9713, "step": 7977 }, { "epoch": 26.15737704918033, "grad_norm": 8.488640785217285, "learning_rate": 1.7316815210879295e-05, "loss": 0.9137, "step": 7978 }, { "epoch": 26.160655737704918, "grad_norm": 7.682087421417236, "learning_rate": 1.7316091337923276e-05, "loss": 1.3367, "step": 7979 }, { "epoch": 26.16393442622951, "grad_norm": 8.607882499694824, "learning_rate": 1.7315367382470724e-05, "loss": 1.1289, "step": 7980 }, { "epoch": 26.167213114754098, "grad_norm": 9.265679359436035, "learning_rate": 1.7314643344529797e-05, "loss": 1.0952, "step": 7981 }, { "epoch": 26.17049180327869, "grad_norm": 7.5208635330200195, "learning_rate": 1.731391922410866e-05, "loss": 1.1605, "step": 7982 }, { "epoch": 26.17377049180328, "grad_norm": 8.322064399719238, "learning_rate": 1.731319502121548e-05, "loss": 1.1968, "step": 7983 }, { "epoch": 26.17704918032787, "grad_norm": 8.38059139251709, "learning_rate": 1.731247073585842e-05, "loss": 1.0502, "step": 7984 }, { "epoch": 26.18032786885246, "grad_norm": 18.556716918945312, "learning_rate": 1.7311746368045653e-05, "loss": 1.5005, "step": 7985 }, { "epoch": 26.18360655737705, "grad_norm": 7.94036865234375, "learning_rate": 1.7311021917785343e-05, "loss": 1.0437, "step": 7986 }, { "epoch": 26.18688524590164, "grad_norm": 8.299339294433594, "learning_rate": 1.7310297385085658e-05, "loss": 1.0703, "step": 7987 }, { "epoch": 26.19016393442623, "grad_norm": 7.087713241577148, "learning_rate": 1.730957276995477e-05, "loss": 1.1407, "step": 7988 }, { "epoch": 26.19344262295082, "grad_norm": 6.6071624755859375, "learning_rate": 1.730884807240085e-05, "loss": 1.2162, "step": 7989 }, { "epoch": 26.19672131147541, "grad_norm": 7.881880283355713, "learning_rate": 1.7308123292432068e-05, "loss": 1.0187, "step": 7990 }, { "epoch": 26.2, "grad_norm": 8.81481647491455, "learning_rate": 1.7307398430056595e-05, "loss": 1.1619, "step": 7991 }, { "epoch": 26.20327868852459, "grad_norm": 6.617862701416016, "learning_rate": 1.7306673485282612e-05, "loss": 1.1064, "step": 7992 }, { "epoch": 26.20655737704918, "grad_norm": 9.764196395874023, "learning_rate": 1.7305948458118282e-05, "loss": 1.2175, "step": 7993 }, { "epoch": 26.20983606557377, "grad_norm": 7.535348892211914, "learning_rate": 1.7305223348571792e-05, "loss": 1.2344, "step": 7994 }, { "epoch": 26.21311475409836, "grad_norm": 7.7969279289245605, "learning_rate": 1.730449815665131e-05, "loss": 1.0106, "step": 7995 }, { "epoch": 26.21639344262295, "grad_norm": 9.531937599182129, "learning_rate": 1.7303772882365018e-05, "loss": 1.2795, "step": 7996 }, { "epoch": 26.21967213114754, "grad_norm": 5.956112384796143, "learning_rate": 1.730304752572109e-05, "loss": 1.1416, "step": 7997 }, { "epoch": 26.222950819672132, "grad_norm": 7.331029415130615, "learning_rate": 1.7302322086727712e-05, "loss": 1.1571, "step": 7998 }, { "epoch": 26.22622950819672, "grad_norm": 7.321688175201416, "learning_rate": 1.730159656539306e-05, "loss": 1.1703, "step": 7999 }, { "epoch": 26.229508196721312, "grad_norm": 7.215458869934082, "learning_rate": 1.730087096172531e-05, "loss": 1.1917, "step": 8000 }, { "epoch": 26.2327868852459, "grad_norm": 7.23848295211792, "learning_rate": 1.7300145275732654e-05, "loss": 1.1146, "step": 8001 }, { "epoch": 26.236065573770492, "grad_norm": 6.428265571594238, "learning_rate": 1.7299419507423267e-05, "loss": 1.2402, "step": 8002 }, { "epoch": 26.23934426229508, "grad_norm": 7.4938788414001465, "learning_rate": 1.7298693656805338e-05, "loss": 1.1473, "step": 8003 }, { "epoch": 26.242622950819673, "grad_norm": 7.3128485679626465, "learning_rate": 1.7297967723887044e-05, "loss": 0.9337, "step": 8004 }, { "epoch": 26.24590163934426, "grad_norm": 10.449673652648926, "learning_rate": 1.7297241708676583e-05, "loss": 1.0853, "step": 8005 }, { "epoch": 26.249180327868853, "grad_norm": 8.622359275817871, "learning_rate": 1.7296515611182133e-05, "loss": 1.2443, "step": 8006 }, { "epoch": 26.25245901639344, "grad_norm": 15.10363483428955, "learning_rate": 1.729578943141188e-05, "loss": 1.1978, "step": 8007 }, { "epoch": 26.255737704918033, "grad_norm": 8.371709823608398, "learning_rate": 1.7295063169374015e-05, "loss": 1.084, "step": 8008 }, { "epoch": 26.25901639344262, "grad_norm": 9.32931900024414, "learning_rate": 1.7294336825076728e-05, "loss": 1.1936, "step": 8009 }, { "epoch": 26.262295081967213, "grad_norm": 8.658563613891602, "learning_rate": 1.729361039852821e-05, "loss": 1.1191, "step": 8010 }, { "epoch": 26.2655737704918, "grad_norm": 10.463570594787598, "learning_rate": 1.7292883889736654e-05, "loss": 0.7994, "step": 8011 }, { "epoch": 26.268852459016394, "grad_norm": 8.488024711608887, "learning_rate": 1.7292157298710247e-05, "loss": 1.09, "step": 8012 }, { "epoch": 26.272131147540982, "grad_norm": 7.744126796722412, "learning_rate": 1.7291430625457186e-05, "loss": 1.1343, "step": 8013 }, { "epoch": 26.275409836065574, "grad_norm": 11.356134414672852, "learning_rate": 1.7290703869985665e-05, "loss": 0.9047, "step": 8014 }, { "epoch": 26.278688524590162, "grad_norm": 8.087620735168457, "learning_rate": 1.728997703230387e-05, "loss": 1.0804, "step": 8015 }, { "epoch": 26.281967213114754, "grad_norm": 8.732548713684082, "learning_rate": 1.7289250112420012e-05, "loss": 1.1524, "step": 8016 }, { "epoch": 26.285245901639342, "grad_norm": 7.237289905548096, "learning_rate": 1.7288523110342276e-05, "loss": 1.2635, "step": 8017 }, { "epoch": 26.288524590163934, "grad_norm": 6.889813423156738, "learning_rate": 1.7287796026078864e-05, "loss": 1.104, "step": 8018 }, { "epoch": 26.291803278688526, "grad_norm": 8.606697082519531, "learning_rate": 1.7287068859637975e-05, "loss": 1.2271, "step": 8019 }, { "epoch": 26.295081967213115, "grad_norm": 7.919652938842773, "learning_rate": 1.728634161102781e-05, "loss": 0.9969, "step": 8020 }, { "epoch": 26.298360655737707, "grad_norm": 9.764222145080566, "learning_rate": 1.7285614280256566e-05, "loss": 1.0903, "step": 8021 }, { "epoch": 26.301639344262295, "grad_norm": 15.009930610656738, "learning_rate": 1.7284886867332444e-05, "loss": 0.9016, "step": 8022 }, { "epoch": 26.304918032786887, "grad_norm": 11.700860023498535, "learning_rate": 1.7284159372263653e-05, "loss": 1.1147, "step": 8023 }, { "epoch": 26.308196721311475, "grad_norm": 7.32522439956665, "learning_rate": 1.7283431795058385e-05, "loss": 1.0071, "step": 8024 }, { "epoch": 26.311475409836067, "grad_norm": 7.594159126281738, "learning_rate": 1.7282704135724854e-05, "loss": 1.2166, "step": 8025 }, { "epoch": 26.314754098360655, "grad_norm": 9.172811508178711, "learning_rate": 1.728197639427126e-05, "loss": 1.0504, "step": 8026 }, { "epoch": 26.318032786885247, "grad_norm": 9.458439826965332, "learning_rate": 1.7281248570705814e-05, "loss": 1.0676, "step": 8027 }, { "epoch": 26.321311475409836, "grad_norm": 8.168164253234863, "learning_rate": 1.7280520665036717e-05, "loss": 0.9902, "step": 8028 }, { "epoch": 26.324590163934428, "grad_norm": 10.944893836975098, "learning_rate": 1.727979267727218e-05, "loss": 0.8843, "step": 8029 }, { "epoch": 26.327868852459016, "grad_norm": 9.700350761413574, "learning_rate": 1.7279064607420415e-05, "loss": 1.2408, "step": 8030 }, { "epoch": 26.331147540983608, "grad_norm": 9.398541450500488, "learning_rate": 1.7278336455489625e-05, "loss": 1.131, "step": 8031 }, { "epoch": 26.334426229508196, "grad_norm": 43.83219909667969, "learning_rate": 1.7277608221488024e-05, "loss": 1.1954, "step": 8032 }, { "epoch": 26.337704918032788, "grad_norm": 9.86408805847168, "learning_rate": 1.7276879905423824e-05, "loss": 1.1437, "step": 8033 }, { "epoch": 26.340983606557376, "grad_norm": 14.016325950622559, "learning_rate": 1.7276151507305235e-05, "loss": 1.1208, "step": 8034 }, { "epoch": 26.34426229508197, "grad_norm": 7.521488666534424, "learning_rate": 1.7275423027140474e-05, "loss": 1.1899, "step": 8035 }, { "epoch": 26.347540983606557, "grad_norm": 9.867048263549805, "learning_rate": 1.7274694464937756e-05, "loss": 0.9454, "step": 8036 }, { "epoch": 26.35081967213115, "grad_norm": 10.407925605773926, "learning_rate": 1.727396582070529e-05, "loss": 1.1509, "step": 8037 }, { "epoch": 26.354098360655737, "grad_norm": 8.684370994567871, "learning_rate": 1.72732370944513e-05, "loss": 1.2301, "step": 8038 }, { "epoch": 26.35737704918033, "grad_norm": 8.753337860107422, "learning_rate": 1.7272508286184e-05, "loss": 1.0833, "step": 8039 }, { "epoch": 26.360655737704917, "grad_norm": 9.284804344177246, "learning_rate": 1.7271779395911604e-05, "loss": 1.167, "step": 8040 }, { "epoch": 26.36393442622951, "grad_norm": 9.39419174194336, "learning_rate": 1.7271050423642334e-05, "loss": 0.8639, "step": 8041 }, { "epoch": 26.367213114754097, "grad_norm": 14.898763656616211, "learning_rate": 1.7270321369384414e-05, "loss": 1.1641, "step": 8042 }, { "epoch": 26.37049180327869, "grad_norm": 9.070464134216309, "learning_rate": 1.726959223314606e-05, "loss": 0.9954, "step": 8043 }, { "epoch": 26.373770491803278, "grad_norm": 10.16769790649414, "learning_rate": 1.7268863014935497e-05, "loss": 1.0564, "step": 8044 }, { "epoch": 26.37704918032787, "grad_norm": 13.343191146850586, "learning_rate": 1.7268133714760945e-05, "loss": 1.446, "step": 8045 }, { "epoch": 26.380327868852458, "grad_norm": 19.564775466918945, "learning_rate": 1.7267404332630625e-05, "loss": 1.0413, "step": 8046 }, { "epoch": 26.38360655737705, "grad_norm": 9.047127723693848, "learning_rate": 1.7266674868552765e-05, "loss": 0.9861, "step": 8047 }, { "epoch": 26.386885245901638, "grad_norm": 8.570148468017578, "learning_rate": 1.7265945322535594e-05, "loss": 1.0802, "step": 8048 }, { "epoch": 26.39016393442623, "grad_norm": 11.876596450805664, "learning_rate": 1.7265215694587335e-05, "loss": 1.17, "step": 8049 }, { "epoch": 26.39344262295082, "grad_norm": 12.323912620544434, "learning_rate": 1.7264485984716214e-05, "loss": 1.1322, "step": 8050 }, { "epoch": 26.39672131147541, "grad_norm": 8.616755485534668, "learning_rate": 1.7263756192930458e-05, "loss": 1.4537, "step": 8051 }, { "epoch": 26.4, "grad_norm": 11.740439414978027, "learning_rate": 1.72630263192383e-05, "loss": 1.0865, "step": 8052 }, { "epoch": 26.40327868852459, "grad_norm": 10.387157440185547, "learning_rate": 1.726229636364797e-05, "loss": 1.3259, "step": 8053 }, { "epoch": 26.40655737704918, "grad_norm": 9.485533714294434, "learning_rate": 1.7261566326167697e-05, "loss": 1.0867, "step": 8054 }, { "epoch": 26.40983606557377, "grad_norm": 7.033695220947266, "learning_rate": 1.726083620680571e-05, "loss": 1.3456, "step": 8055 }, { "epoch": 26.41311475409836, "grad_norm": 9.865442276000977, "learning_rate": 1.7260106005570252e-05, "loss": 1.22, "step": 8056 }, { "epoch": 26.41639344262295, "grad_norm": 6.5149431228637695, "learning_rate": 1.7259375722469547e-05, "loss": 1.0424, "step": 8057 }, { "epoch": 26.41967213114754, "grad_norm": 9.401248931884766, "learning_rate": 1.7258645357511832e-05, "loss": 1.1066, "step": 8058 }, { "epoch": 26.42295081967213, "grad_norm": 8.131828308105469, "learning_rate": 1.7257914910705343e-05, "loss": 0.9548, "step": 8059 }, { "epoch": 26.42622950819672, "grad_norm": 8.268580436706543, "learning_rate": 1.7257184382058318e-05, "loss": 1.2141, "step": 8060 }, { "epoch": 26.42950819672131, "grad_norm": 9.186773300170898, "learning_rate": 1.7256453771578993e-05, "loss": 1.1368, "step": 8061 }, { "epoch": 26.432786885245903, "grad_norm": 11.469234466552734, "learning_rate": 1.7255723079275607e-05, "loss": 1.0786, "step": 8062 }, { "epoch": 26.43606557377049, "grad_norm": 9.099955558776855, "learning_rate": 1.72549923051564e-05, "loss": 1.098, "step": 8063 }, { "epoch": 26.439344262295084, "grad_norm": 9.684370994567871, "learning_rate": 1.725426144922961e-05, "loss": 0.8588, "step": 8064 }, { "epoch": 26.442622950819672, "grad_norm": 14.407356262207031, "learning_rate": 1.7253530511503483e-05, "loss": 1.126, "step": 8065 }, { "epoch": 26.445901639344264, "grad_norm": 8.222886085510254, "learning_rate": 1.7252799491986256e-05, "loss": 1.0128, "step": 8066 }, { "epoch": 26.449180327868852, "grad_norm": 9.301934242248535, "learning_rate": 1.7252068390686174e-05, "loss": 0.9395, "step": 8067 }, { "epoch": 26.452459016393444, "grad_norm": 20.69354820251465, "learning_rate": 1.725133720761148e-05, "loss": 1.1252, "step": 8068 }, { "epoch": 26.455737704918032, "grad_norm": 8.877157211303711, "learning_rate": 1.725060594277042e-05, "loss": 1.2792, "step": 8069 }, { "epoch": 26.459016393442624, "grad_norm": 7.5609869956970215, "learning_rate": 1.7249874596171236e-05, "loss": 1.3169, "step": 8070 }, { "epoch": 26.462295081967213, "grad_norm": 10.47089958190918, "learning_rate": 1.7249143167822182e-05, "loss": 1.0449, "step": 8071 }, { "epoch": 26.465573770491805, "grad_norm": 9.082517623901367, "learning_rate": 1.72484116577315e-05, "loss": 1.1483, "step": 8072 }, { "epoch": 26.468852459016393, "grad_norm": 8.851373672485352, "learning_rate": 1.7247680065907443e-05, "loss": 1.1638, "step": 8073 }, { "epoch": 26.472131147540985, "grad_norm": 9.308815956115723, "learning_rate": 1.7246948392358255e-05, "loss": 0.8943, "step": 8074 }, { "epoch": 26.475409836065573, "grad_norm": 10.933077812194824, "learning_rate": 1.7246216637092184e-05, "loss": 1.1769, "step": 8075 }, { "epoch": 26.478688524590165, "grad_norm": 11.228545188903809, "learning_rate": 1.7245484800117492e-05, "loss": 1.0387, "step": 8076 }, { "epoch": 26.481967213114753, "grad_norm": 8.806920051574707, "learning_rate": 1.7244752881442424e-05, "loss": 1.2412, "step": 8077 }, { "epoch": 26.485245901639345, "grad_norm": 7.966861724853516, "learning_rate": 1.7244020881075236e-05, "loss": 1.1993, "step": 8078 }, { "epoch": 26.488524590163934, "grad_norm": 10.606344223022461, "learning_rate": 1.724328879902418e-05, "loss": 1.3562, "step": 8079 }, { "epoch": 26.491803278688526, "grad_norm": 23.67561149597168, "learning_rate": 1.7242556635297512e-05, "loss": 1.0129, "step": 8080 }, { "epoch": 26.495081967213114, "grad_norm": 9.855433464050293, "learning_rate": 1.7241824389903486e-05, "loss": 1.1172, "step": 8081 }, { "epoch": 26.498360655737706, "grad_norm": 12.890491485595703, "learning_rate": 1.724109206285036e-05, "loss": 1.1118, "step": 8082 }, { "epoch": 26.501639344262294, "grad_norm": 13.766124725341797, "learning_rate": 1.7240359654146394e-05, "loss": 1.1271, "step": 8083 }, { "epoch": 26.504918032786886, "grad_norm": 13.60588550567627, "learning_rate": 1.7239627163799845e-05, "loss": 0.9644, "step": 8084 }, { "epoch": 26.508196721311474, "grad_norm": 9.470972061157227, "learning_rate": 1.7238894591818975e-05, "loss": 1.1317, "step": 8085 }, { "epoch": 26.511475409836066, "grad_norm": 10.36252212524414, "learning_rate": 1.7238161938212036e-05, "loss": 1.1509, "step": 8086 }, { "epoch": 26.514754098360655, "grad_norm": 19.06805992126465, "learning_rate": 1.7237429202987297e-05, "loss": 0.9167, "step": 8087 }, { "epoch": 26.518032786885247, "grad_norm": 7.9649786949157715, "learning_rate": 1.723669638615302e-05, "loss": 1.3484, "step": 8088 }, { "epoch": 26.521311475409835, "grad_norm": 9.245552062988281, "learning_rate": 1.7235963487717466e-05, "loss": 1.1906, "step": 8089 }, { "epoch": 26.524590163934427, "grad_norm": 9.556844711303711, "learning_rate": 1.72352305076889e-05, "loss": 0.9964, "step": 8090 }, { "epoch": 26.527868852459015, "grad_norm": 8.25495433807373, "learning_rate": 1.7234497446075588e-05, "loss": 1.2589, "step": 8091 }, { "epoch": 26.531147540983607, "grad_norm": 9.312047004699707, "learning_rate": 1.7233764302885794e-05, "loss": 0.9015, "step": 8092 }, { "epoch": 26.534426229508195, "grad_norm": 8.554790496826172, "learning_rate": 1.723303107812779e-05, "loss": 1.0536, "step": 8093 }, { "epoch": 26.537704918032787, "grad_norm": 6.780545711517334, "learning_rate": 1.7232297771809834e-05, "loss": 1.1719, "step": 8094 }, { "epoch": 26.540983606557376, "grad_norm": 8.048786163330078, "learning_rate": 1.7231564383940205e-05, "loss": 1.1519, "step": 8095 }, { "epoch": 26.544262295081968, "grad_norm": 8.234129905700684, "learning_rate": 1.723083091452717e-05, "loss": 1.0056, "step": 8096 }, { "epoch": 26.547540983606556, "grad_norm": 10.237488746643066, "learning_rate": 1.7230097363579e-05, "loss": 1.2678, "step": 8097 }, { "epoch": 26.550819672131148, "grad_norm": 10.475619316101074, "learning_rate": 1.722936373110396e-05, "loss": 1.076, "step": 8098 }, { "epoch": 26.554098360655736, "grad_norm": 7.048524379730225, "learning_rate": 1.7228630017110328e-05, "loss": 1.058, "step": 8099 }, { "epoch": 26.557377049180328, "grad_norm": 9.428479194641113, "learning_rate": 1.7227896221606378e-05, "loss": 1.0417, "step": 8100 }, { "epoch": 26.560655737704916, "grad_norm": 7.488323211669922, "learning_rate": 1.7227162344600382e-05, "loss": 0.9088, "step": 8101 }, { "epoch": 26.56393442622951, "grad_norm": 9.167396545410156, "learning_rate": 1.7226428386100614e-05, "loss": 0.9149, "step": 8102 }, { "epoch": 26.567213114754097, "grad_norm": 19.024234771728516, "learning_rate": 1.7225694346115355e-05, "loss": 1.0813, "step": 8103 }, { "epoch": 26.57049180327869, "grad_norm": 8.03447151184082, "learning_rate": 1.722496022465288e-05, "loss": 1.0441, "step": 8104 }, { "epoch": 26.57377049180328, "grad_norm": 7.825647830963135, "learning_rate": 1.7224226021721467e-05, "loss": 1.1732, "step": 8105 }, { "epoch": 26.57704918032787, "grad_norm": 7.275649547576904, "learning_rate": 1.722349173732939e-05, "loss": 1.0392, "step": 8106 }, { "epoch": 26.58032786885246, "grad_norm": 7.754336357116699, "learning_rate": 1.7222757371484937e-05, "loss": 1.1719, "step": 8107 }, { "epoch": 26.58360655737705, "grad_norm": 9.784832000732422, "learning_rate": 1.7222022924196384e-05, "loss": 0.9977, "step": 8108 }, { "epoch": 26.58688524590164, "grad_norm": 7.984748363494873, "learning_rate": 1.7221288395472013e-05, "loss": 0.9752, "step": 8109 }, { "epoch": 26.59016393442623, "grad_norm": 7.532914638519287, "learning_rate": 1.722055378532011e-05, "loss": 1.1072, "step": 8110 }, { "epoch": 26.59344262295082, "grad_norm": 8.184216499328613, "learning_rate": 1.7219819093748952e-05, "loss": 1.0845, "step": 8111 }, { "epoch": 26.59672131147541, "grad_norm": 7.701682090759277, "learning_rate": 1.721908432076683e-05, "loss": 1.3108, "step": 8112 }, { "epoch": 26.6, "grad_norm": 7.367243766784668, "learning_rate": 1.7218349466382024e-05, "loss": 1.1661, "step": 8113 }, { "epoch": 26.60327868852459, "grad_norm": 6.953467845916748, "learning_rate": 1.721761453060282e-05, "loss": 1.0538, "step": 8114 }, { "epoch": 26.60655737704918, "grad_norm": 9.307676315307617, "learning_rate": 1.7216879513437512e-05, "loss": 1.2058, "step": 8115 }, { "epoch": 26.60983606557377, "grad_norm": 9.731877326965332, "learning_rate": 1.7216144414894383e-05, "loss": 0.9754, "step": 8116 }, { "epoch": 26.613114754098362, "grad_norm": 9.540046691894531, "learning_rate": 1.721540923498172e-05, "loss": 1.0281, "step": 8117 }, { "epoch": 26.61639344262295, "grad_norm": 8.809809684753418, "learning_rate": 1.7214673973707818e-05, "loss": 0.9922, "step": 8118 }, { "epoch": 26.619672131147542, "grad_norm": 8.272934913635254, "learning_rate": 1.7213938631080968e-05, "loss": 1.2133, "step": 8119 }, { "epoch": 26.62295081967213, "grad_norm": 11.039185523986816, "learning_rate": 1.7213203207109454e-05, "loss": 1.2622, "step": 8120 }, { "epoch": 26.626229508196722, "grad_norm": 7.159582614898682, "learning_rate": 1.7212467701801577e-05, "loss": 1.1022, "step": 8121 }, { "epoch": 26.62950819672131, "grad_norm": 13.071576118469238, "learning_rate": 1.721173211516563e-05, "loss": 1.2415, "step": 8122 }, { "epoch": 26.632786885245903, "grad_norm": 9.664667129516602, "learning_rate": 1.7210996447209904e-05, "loss": 1.1067, "step": 8123 }, { "epoch": 26.63606557377049, "grad_norm": 8.140029907226562, "learning_rate": 1.7210260697942695e-05, "loss": 1.2781, "step": 8124 }, { "epoch": 26.639344262295083, "grad_norm": 7.810670852661133, "learning_rate": 1.7209524867372296e-05, "loss": 1.2129, "step": 8125 }, { "epoch": 26.64262295081967, "grad_norm": 8.477216720581055, "learning_rate": 1.7208788955507013e-05, "loss": 1.3466, "step": 8126 }, { "epoch": 26.645901639344263, "grad_norm": 7.558050632476807, "learning_rate": 1.7208052962355135e-05, "loss": 0.9856, "step": 8127 }, { "epoch": 26.64918032786885, "grad_norm": 7.526703357696533, "learning_rate": 1.7207316887924968e-05, "loss": 1.0884, "step": 8128 }, { "epoch": 26.652459016393443, "grad_norm": 9.63477897644043, "learning_rate": 1.7206580732224808e-05, "loss": 1.3681, "step": 8129 }, { "epoch": 26.65573770491803, "grad_norm": 10.29178524017334, "learning_rate": 1.7205844495262957e-05, "loss": 1.0657, "step": 8130 }, { "epoch": 26.659016393442624, "grad_norm": 8.36260986328125, "learning_rate": 1.720510817704772e-05, "loss": 1.1007, "step": 8131 }, { "epoch": 26.662295081967212, "grad_norm": 10.155651092529297, "learning_rate": 1.7204371777587394e-05, "loss": 1.0508, "step": 8132 }, { "epoch": 26.665573770491804, "grad_norm": 18.01807403564453, "learning_rate": 1.7203635296890288e-05, "loss": 1.286, "step": 8133 }, { "epoch": 26.668852459016392, "grad_norm": 9.004261016845703, "learning_rate": 1.72028987349647e-05, "loss": 1.0752, "step": 8134 }, { "epoch": 26.672131147540984, "grad_norm": 10.379697799682617, "learning_rate": 1.7202162091818943e-05, "loss": 1.2188, "step": 8135 }, { "epoch": 26.675409836065572, "grad_norm": 32.02645492553711, "learning_rate": 1.7201425367461323e-05, "loss": 1.053, "step": 8136 }, { "epoch": 26.678688524590164, "grad_norm": 9.21868896484375, "learning_rate": 1.720068856190014e-05, "loss": 1.012, "step": 8137 }, { "epoch": 26.681967213114753, "grad_norm": 6.868173599243164, "learning_rate": 1.7199951675143708e-05, "loss": 1.2598, "step": 8138 }, { "epoch": 26.685245901639345, "grad_norm": 8.57174015045166, "learning_rate": 1.7199214707200335e-05, "loss": 0.9528, "step": 8139 }, { "epoch": 26.688524590163933, "grad_norm": 8.25838851928711, "learning_rate": 1.719847765807833e-05, "loss": 0.98, "step": 8140 }, { "epoch": 26.691803278688525, "grad_norm": 7.176837921142578, "learning_rate": 1.7197740527786005e-05, "loss": 1.097, "step": 8141 }, { "epoch": 26.695081967213113, "grad_norm": 10.754717826843262, "learning_rate": 1.719700331633167e-05, "loss": 0.9818, "step": 8142 }, { "epoch": 26.698360655737705, "grad_norm": 6.8084940910339355, "learning_rate": 1.719626602372364e-05, "loss": 1.3081, "step": 8143 }, { "epoch": 26.701639344262293, "grad_norm": 10.011106491088867, "learning_rate": 1.719552864997023e-05, "loss": 1.1018, "step": 8144 }, { "epoch": 26.704918032786885, "grad_norm": 8.57207202911377, "learning_rate": 1.7194791195079754e-05, "loss": 1.2676, "step": 8145 }, { "epoch": 26.708196721311474, "grad_norm": 9.07150650024414, "learning_rate": 1.7194053659060524e-05, "loss": 0.9734, "step": 8146 }, { "epoch": 26.711475409836066, "grad_norm": 8.860947608947754, "learning_rate": 1.719331604192086e-05, "loss": 1.0929, "step": 8147 }, { "epoch": 26.714754098360658, "grad_norm": 7.890523910522461, "learning_rate": 1.719257834366908e-05, "loss": 1.0854, "step": 8148 }, { "epoch": 26.718032786885246, "grad_norm": 17.624202728271484, "learning_rate": 1.71918405643135e-05, "loss": 0.9633, "step": 8149 }, { "epoch": 26.721311475409838, "grad_norm": 7.7564377784729, "learning_rate": 1.719110270386244e-05, "loss": 0.9465, "step": 8150 }, { "epoch": 26.724590163934426, "grad_norm": 11.096532821655273, "learning_rate": 1.719036476232422e-05, "loss": 1.33, "step": 8151 }, { "epoch": 26.727868852459018, "grad_norm": 7.261961936950684, "learning_rate": 1.718962673970716e-05, "loss": 1.2776, "step": 8152 }, { "epoch": 26.731147540983606, "grad_norm": 21.943050384521484, "learning_rate": 1.7188888636019586e-05, "loss": 1.2063, "step": 8153 }, { "epoch": 26.7344262295082, "grad_norm": 14.119084358215332, "learning_rate": 1.7188150451269816e-05, "loss": 0.9396, "step": 8154 }, { "epoch": 26.737704918032787, "grad_norm": 8.158653259277344, "learning_rate": 1.7187412185466175e-05, "loss": 1.0266, "step": 8155 }, { "epoch": 26.74098360655738, "grad_norm": 7.48057746887207, "learning_rate": 1.718667383861699e-05, "loss": 1.2098, "step": 8156 }, { "epoch": 26.744262295081967, "grad_norm": 69.16698455810547, "learning_rate": 1.7185935410730582e-05, "loss": 1.1456, "step": 8157 }, { "epoch": 26.74754098360656, "grad_norm": 8.001325607299805, "learning_rate": 1.7185196901815286e-05, "loss": 1.2128, "step": 8158 }, { "epoch": 26.750819672131147, "grad_norm": 8.19916820526123, "learning_rate": 1.718445831187942e-05, "loss": 1.249, "step": 8159 }, { "epoch": 26.75409836065574, "grad_norm": 7.983745574951172, "learning_rate": 1.718371964093132e-05, "loss": 1.2283, "step": 8160 }, { "epoch": 26.757377049180327, "grad_norm": 10.885926246643066, "learning_rate": 1.718298088897931e-05, "loss": 1.0508, "step": 8161 }, { "epoch": 26.76065573770492, "grad_norm": 7.483017444610596, "learning_rate": 1.7182242056031722e-05, "loss": 1.1721, "step": 8162 }, { "epoch": 26.763934426229508, "grad_norm": 9.025735855102539, "learning_rate": 1.7181503142096888e-05, "loss": 1.1307, "step": 8163 }, { "epoch": 26.7672131147541, "grad_norm": 8.575666427612305, "learning_rate": 1.7180764147183137e-05, "loss": 1.1951, "step": 8164 }, { "epoch": 26.770491803278688, "grad_norm": 8.394344329833984, "learning_rate": 1.718002507129881e-05, "loss": 1.0914, "step": 8165 }, { "epoch": 26.77377049180328, "grad_norm": 143.07025146484375, "learning_rate": 1.717928591445223e-05, "loss": 1.0883, "step": 8166 }, { "epoch": 26.777049180327868, "grad_norm": 10.241331100463867, "learning_rate": 1.7178546676651735e-05, "loss": 0.9677, "step": 8167 }, { "epoch": 26.78032786885246, "grad_norm": 7.410435199737549, "learning_rate": 1.7177807357905663e-05, "loss": 1.272, "step": 8168 }, { "epoch": 26.78360655737705, "grad_norm": 6.6942667961120605, "learning_rate": 1.7177067958222354e-05, "loss": 1.0234, "step": 8169 }, { "epoch": 26.78688524590164, "grad_norm": 14.414176940917969, "learning_rate": 1.717632847761014e-05, "loss": 1.3097, "step": 8170 }, { "epoch": 26.79016393442623, "grad_norm": 7.319309234619141, "learning_rate": 1.7175588916077357e-05, "loss": 1.0764, "step": 8171 }, { "epoch": 26.79344262295082, "grad_norm": 11.113551139831543, "learning_rate": 1.717484927363235e-05, "loss": 1.2255, "step": 8172 }, { "epoch": 26.79672131147541, "grad_norm": 13.680069923400879, "learning_rate": 1.7174109550283462e-05, "loss": 1.4114, "step": 8173 }, { "epoch": 26.8, "grad_norm": 22.33130645751953, "learning_rate": 1.7173369746039026e-05, "loss": 0.9602, "step": 8174 }, { "epoch": 26.80327868852459, "grad_norm": 12.049067497253418, "learning_rate": 1.717262986090739e-05, "loss": 1.1887, "step": 8175 }, { "epoch": 26.80655737704918, "grad_norm": 22.424509048461914, "learning_rate": 1.7171889894896893e-05, "loss": 1.1138, "step": 8176 }, { "epoch": 26.80983606557377, "grad_norm": 10.787063598632812, "learning_rate": 1.717114984801588e-05, "loss": 1.1257, "step": 8177 }, { "epoch": 26.81311475409836, "grad_norm": 11.206567764282227, "learning_rate": 1.7170409720272697e-05, "loss": 1.1891, "step": 8178 }, { "epoch": 26.81639344262295, "grad_norm": 7.964565753936768, "learning_rate": 1.7169669511675688e-05, "loss": 1.4604, "step": 8179 }, { "epoch": 26.81967213114754, "grad_norm": 9.659854888916016, "learning_rate": 1.7168929222233202e-05, "loss": 1.1589, "step": 8180 }, { "epoch": 26.82295081967213, "grad_norm": 10.423059463500977, "learning_rate": 1.7168188851953585e-05, "loss": 1.1115, "step": 8181 }, { "epoch": 26.82622950819672, "grad_norm": 12.258753776550293, "learning_rate": 1.7167448400845185e-05, "loss": 1.0503, "step": 8182 }, { "epoch": 26.82950819672131, "grad_norm": 9.775435447692871, "learning_rate": 1.7166707868916354e-05, "loss": 1.261, "step": 8183 }, { "epoch": 26.832786885245902, "grad_norm": 18.478302001953125, "learning_rate": 1.7165967256175436e-05, "loss": 1.1238, "step": 8184 }, { "epoch": 26.83606557377049, "grad_norm": 9.136244773864746, "learning_rate": 1.7165226562630787e-05, "loss": 1.4011, "step": 8185 }, { "epoch": 26.839344262295082, "grad_norm": 7.902610778808594, "learning_rate": 1.7164485788290762e-05, "loss": 1.2446, "step": 8186 }, { "epoch": 26.84262295081967, "grad_norm": 10.430127143859863, "learning_rate": 1.716374493316371e-05, "loss": 1.1016, "step": 8187 }, { "epoch": 26.845901639344262, "grad_norm": 8.712282180786133, "learning_rate": 1.7163003997257984e-05, "loss": 1.1666, "step": 8188 }, { "epoch": 26.84918032786885, "grad_norm": 9.658251762390137, "learning_rate": 1.716226298058194e-05, "loss": 1.1765, "step": 8189 }, { "epoch": 26.852459016393443, "grad_norm": 69.65254211425781, "learning_rate": 1.7161521883143936e-05, "loss": 1.0903, "step": 8190 }, { "epoch": 26.855737704918035, "grad_norm": 6.727294445037842, "learning_rate": 1.7160780704952324e-05, "loss": 1.364, "step": 8191 }, { "epoch": 26.859016393442623, "grad_norm": 10.577710151672363, "learning_rate": 1.7160039446015466e-05, "loss": 1.3527, "step": 8192 }, { "epoch": 26.862295081967215, "grad_norm": 7.271927833557129, "learning_rate": 1.715929810634172e-05, "loss": 1.3186, "step": 8193 }, { "epoch": 26.865573770491803, "grad_norm": 10.664294242858887, "learning_rate": 1.715855668593944e-05, "loss": 1.2732, "step": 8194 }, { "epoch": 26.868852459016395, "grad_norm": 7.656557083129883, "learning_rate": 1.7157815184816993e-05, "loss": 1.3187, "step": 8195 }, { "epoch": 26.872131147540983, "grad_norm": 6.971402645111084, "learning_rate": 1.7157073602982736e-05, "loss": 1.162, "step": 8196 }, { "epoch": 26.875409836065575, "grad_norm": 6.977034568786621, "learning_rate": 1.7156331940445037e-05, "loss": 1.2048, "step": 8197 }, { "epoch": 26.878688524590164, "grad_norm": 9.441640853881836, "learning_rate": 1.715559019721225e-05, "loss": 1.3322, "step": 8198 }, { "epoch": 26.881967213114756, "grad_norm": 25.881816864013672, "learning_rate": 1.7154848373292744e-05, "loss": 1.2731, "step": 8199 }, { "epoch": 26.885245901639344, "grad_norm": 10.098684310913086, "learning_rate": 1.7154106468694885e-05, "loss": 1.0743, "step": 8200 }, { "epoch": 26.888524590163936, "grad_norm": 10.284218788146973, "learning_rate": 1.715336448342704e-05, "loss": 1.3184, "step": 8201 }, { "epoch": 26.891803278688524, "grad_norm": 10.01430606842041, "learning_rate": 1.7152622417497565e-05, "loss": 1.2715, "step": 8202 }, { "epoch": 26.895081967213116, "grad_norm": 7.920988082885742, "learning_rate": 1.715188027091484e-05, "loss": 0.9933, "step": 8203 }, { "epoch": 26.898360655737704, "grad_norm": 8.177373886108398, "learning_rate": 1.715113804368723e-05, "loss": 1.2063, "step": 8204 }, { "epoch": 26.901639344262296, "grad_norm": 11.534283638000488, "learning_rate": 1.7150395735823102e-05, "loss": 1.0614, "step": 8205 }, { "epoch": 26.904918032786885, "grad_norm": 7.705158233642578, "learning_rate": 1.7149653347330828e-05, "loss": 1.4849, "step": 8206 }, { "epoch": 26.908196721311477, "grad_norm": 7.547187805175781, "learning_rate": 1.7148910878218778e-05, "loss": 1.437, "step": 8207 }, { "epoch": 26.911475409836065, "grad_norm": 11.00927734375, "learning_rate": 1.7148168328495324e-05, "loss": 1.1068, "step": 8208 }, { "epoch": 26.914754098360657, "grad_norm": 8.050970077514648, "learning_rate": 1.714742569816884e-05, "loss": 1.1671, "step": 8209 }, { "epoch": 26.918032786885245, "grad_norm": 9.123199462890625, "learning_rate": 1.7146682987247702e-05, "loss": 0.9677, "step": 8210 }, { "epoch": 26.921311475409837, "grad_norm": 7.663415431976318, "learning_rate": 1.7145940195740282e-05, "loss": 0.9857, "step": 8211 }, { "epoch": 26.924590163934425, "grad_norm": 9.114689826965332, "learning_rate": 1.7145197323654957e-05, "loss": 1.2424, "step": 8212 }, { "epoch": 26.927868852459017, "grad_norm": 9.406057357788086, "learning_rate": 1.71444543710001e-05, "loss": 1.0843, "step": 8213 }, { "epoch": 26.931147540983606, "grad_norm": 7.848902702331543, "learning_rate": 1.714371133778409e-05, "loss": 1.3286, "step": 8214 }, { "epoch": 26.934426229508198, "grad_norm": 8.98974609375, "learning_rate": 1.7142968224015316e-05, "loss": 1.0325, "step": 8215 }, { "epoch": 26.937704918032786, "grad_norm": 8.272201538085938, "learning_rate": 1.714222502970214e-05, "loss": 1.3533, "step": 8216 }, { "epoch": 26.940983606557378, "grad_norm": 7.801852226257324, "learning_rate": 1.7141481754852957e-05, "loss": 1.4019, "step": 8217 }, { "epoch": 26.944262295081966, "grad_norm": 15.184041023254395, "learning_rate": 1.7140738399476138e-05, "loss": 1.1421, "step": 8218 }, { "epoch": 26.947540983606558, "grad_norm": 9.34776496887207, "learning_rate": 1.713999496358007e-05, "loss": 0.9843, "step": 8219 }, { "epoch": 26.950819672131146, "grad_norm": 10.687050819396973, "learning_rate": 1.7139251447173134e-05, "loss": 1.3506, "step": 8220 }, { "epoch": 26.95409836065574, "grad_norm": 7.225146293640137, "learning_rate": 1.7138507850263715e-05, "loss": 1.2638, "step": 8221 }, { "epoch": 26.957377049180327, "grad_norm": 8.224681854248047, "learning_rate": 1.71377641728602e-05, "loss": 1.1077, "step": 8222 }, { "epoch": 26.96065573770492, "grad_norm": 15.004661560058594, "learning_rate": 1.713702041497097e-05, "loss": 1.2424, "step": 8223 }, { "epoch": 26.963934426229507, "grad_norm": 7.136646270751953, "learning_rate": 1.7136276576604413e-05, "loss": 0.9786, "step": 8224 }, { "epoch": 26.9672131147541, "grad_norm": 12.935322761535645, "learning_rate": 1.713553265776892e-05, "loss": 1.1943, "step": 8225 }, { "epoch": 26.970491803278687, "grad_norm": 10.292351722717285, "learning_rate": 1.7134788658472877e-05, "loss": 1.0096, "step": 8226 }, { "epoch": 26.97377049180328, "grad_norm": 7.506153583526611, "learning_rate": 1.7134044578724673e-05, "loss": 1.2661, "step": 8227 }, { "epoch": 26.977049180327867, "grad_norm": 8.766919136047363, "learning_rate": 1.71333004185327e-05, "loss": 1.0334, "step": 8228 }, { "epoch": 26.98032786885246, "grad_norm": 7.400331020355225, "learning_rate": 1.7132556177905348e-05, "loss": 1.0899, "step": 8229 }, { "epoch": 26.983606557377048, "grad_norm": 9.209178924560547, "learning_rate": 1.713181185685101e-05, "loss": 1.1471, "step": 8230 }, { "epoch": 26.98688524590164, "grad_norm": 6.523952960968018, "learning_rate": 1.7131067455378074e-05, "loss": 1.3792, "step": 8231 }, { "epoch": 26.990163934426228, "grad_norm": 9.81091594696045, "learning_rate": 1.713032297349494e-05, "loss": 1.0864, "step": 8232 }, { "epoch": 26.99344262295082, "grad_norm": 11.497675895690918, "learning_rate": 1.7129578411210002e-05, "loss": 1.0898, "step": 8233 }, { "epoch": 26.99672131147541, "grad_norm": 11.285298347473145, "learning_rate": 1.7128833768531653e-05, "loss": 1.0621, "step": 8234 }, { "epoch": 27.0, "grad_norm": 7.518892288208008, "learning_rate": 1.7128089045468294e-05, "loss": 1.1746, "step": 8235 }, { "epoch": 27.003278688524592, "grad_norm": 6.890469074249268, "learning_rate": 1.712734424202832e-05, "loss": 1.2657, "step": 8236 }, { "epoch": 27.00655737704918, "grad_norm": 8.300965309143066, "learning_rate": 1.7126599358220124e-05, "loss": 1.3077, "step": 8237 }, { "epoch": 27.009836065573772, "grad_norm": 11.202507972717285, "learning_rate": 1.7125854394052113e-05, "loss": 1.1244, "step": 8238 }, { "epoch": 27.01311475409836, "grad_norm": 9.988237380981445, "learning_rate": 1.7125109349532687e-05, "loss": 1.0953, "step": 8239 }, { "epoch": 27.016393442622952, "grad_norm": 7.2097673416137695, "learning_rate": 1.712436422467024e-05, "loss": 1.2581, "step": 8240 }, { "epoch": 27.01967213114754, "grad_norm": 6.182057857513428, "learning_rate": 1.7123619019473184e-05, "loss": 1.0888, "step": 8241 }, { "epoch": 27.022950819672133, "grad_norm": 10.977827072143555, "learning_rate": 1.7122873733949913e-05, "loss": 0.9256, "step": 8242 }, { "epoch": 27.02622950819672, "grad_norm": 13.59389591217041, "learning_rate": 1.7122128368108836e-05, "loss": 0.848, "step": 8243 }, { "epoch": 27.029508196721313, "grad_norm": 7.581161022186279, "learning_rate": 1.7121382921958357e-05, "loss": 1.0753, "step": 8244 }, { "epoch": 27.0327868852459, "grad_norm": 8.387837409973145, "learning_rate": 1.712063739550688e-05, "loss": 0.8427, "step": 8245 }, { "epoch": 27.036065573770493, "grad_norm": 8.829533576965332, "learning_rate": 1.7119891788762814e-05, "loss": 0.962, "step": 8246 }, { "epoch": 27.03934426229508, "grad_norm": 7.474094390869141, "learning_rate": 1.7119146101734565e-05, "loss": 1.324, "step": 8247 }, { "epoch": 27.042622950819673, "grad_norm": 9.694604873657227, "learning_rate": 1.7118400334430544e-05, "loss": 1.0864, "step": 8248 }, { "epoch": 27.04590163934426, "grad_norm": 9.497859001159668, "learning_rate": 1.7117654486859154e-05, "loss": 1.0583, "step": 8249 }, { "epoch": 27.049180327868854, "grad_norm": 8.477645874023438, "learning_rate": 1.711690855902881e-05, "loss": 1.0587, "step": 8250 }, { "epoch": 27.052459016393442, "grad_norm": 8.397007942199707, "learning_rate": 1.7116162550947922e-05, "loss": 1.0403, "step": 8251 }, { "epoch": 27.055737704918034, "grad_norm": 7.518351078033447, "learning_rate": 1.7115416462624902e-05, "loss": 1.2312, "step": 8252 }, { "epoch": 27.059016393442622, "grad_norm": 8.985121726989746, "learning_rate": 1.7114670294068167e-05, "loss": 1.1442, "step": 8253 }, { "epoch": 27.062295081967214, "grad_norm": 13.016395568847656, "learning_rate": 1.7113924045286126e-05, "loss": 1.0909, "step": 8254 }, { "epoch": 27.065573770491802, "grad_norm": 7.15093469619751, "learning_rate": 1.711317771628719e-05, "loss": 1.1002, "step": 8255 }, { "epoch": 27.068852459016394, "grad_norm": 7.5278639793396, "learning_rate": 1.7112431307079785e-05, "loss": 1.2224, "step": 8256 }, { "epoch": 27.072131147540983, "grad_norm": 18.883262634277344, "learning_rate": 1.711168481767232e-05, "loss": 1.1304, "step": 8257 }, { "epoch": 27.075409836065575, "grad_norm": 8.192170143127441, "learning_rate": 1.7110938248073212e-05, "loss": 1.0796, "step": 8258 }, { "epoch": 27.078688524590163, "grad_norm": 11.068634986877441, "learning_rate": 1.7110191598290883e-05, "loss": 1.0925, "step": 8259 }, { "epoch": 27.081967213114755, "grad_norm": 7.223823547363281, "learning_rate": 1.7109444868333752e-05, "loss": 1.0239, "step": 8260 }, { "epoch": 27.085245901639343, "grad_norm": 8.724849700927734, "learning_rate": 1.7108698058210238e-05, "loss": 1.0773, "step": 8261 }, { "epoch": 27.088524590163935, "grad_norm": 5.683693885803223, "learning_rate": 1.7107951167928763e-05, "loss": 1.2454, "step": 8262 }, { "epoch": 27.091803278688523, "grad_norm": 12.712760925292969, "learning_rate": 1.7107204197497748e-05, "loss": 1.0452, "step": 8263 }, { "epoch": 27.095081967213115, "grad_norm": 7.847059726715088, "learning_rate": 1.7106457146925612e-05, "loss": 1.0669, "step": 8264 }, { "epoch": 27.098360655737704, "grad_norm": 7.728455543518066, "learning_rate": 1.7105710016220788e-05, "loss": 1.1616, "step": 8265 }, { "epoch": 27.101639344262296, "grad_norm": 8.763978004455566, "learning_rate": 1.7104962805391695e-05, "loss": 1.0529, "step": 8266 }, { "epoch": 27.104918032786884, "grad_norm": 9.953145027160645, "learning_rate": 1.7104215514446757e-05, "loss": 1.0752, "step": 8267 }, { "epoch": 27.108196721311476, "grad_norm": 9.504024505615234, "learning_rate": 1.7103468143394403e-05, "loss": 1.0361, "step": 8268 }, { "epoch": 27.111475409836064, "grad_norm": 9.781691551208496, "learning_rate": 1.710272069224306e-05, "loss": 1.1779, "step": 8269 }, { "epoch": 27.114754098360656, "grad_norm": 8.42358684539795, "learning_rate": 1.7101973161001156e-05, "loss": 1.1199, "step": 8270 }, { "epoch": 27.118032786885244, "grad_norm": 7.925786018371582, "learning_rate": 1.710122554967712e-05, "loss": 1.2074, "step": 8271 }, { "epoch": 27.121311475409836, "grad_norm": 6.987016677856445, "learning_rate": 1.7100477858279384e-05, "loss": 1.1378, "step": 8272 }, { "epoch": 27.124590163934425, "grad_norm": 6.590237617492676, "learning_rate": 1.7099730086816375e-05, "loss": 1.1666, "step": 8273 }, { "epoch": 27.127868852459017, "grad_norm": 6.798712730407715, "learning_rate": 1.7098982235296528e-05, "loss": 1.1707, "step": 8274 }, { "epoch": 27.131147540983605, "grad_norm": 7.052664756774902, "learning_rate": 1.709823430372828e-05, "loss": 1.0365, "step": 8275 }, { "epoch": 27.134426229508197, "grad_norm": 5.565577983856201, "learning_rate": 1.7097486292120056e-05, "loss": 1.2286, "step": 8276 }, { "epoch": 27.137704918032785, "grad_norm": 7.705189228057861, "learning_rate": 1.709673820048029e-05, "loss": 1.0468, "step": 8277 }, { "epoch": 27.140983606557377, "grad_norm": 7.96671724319458, "learning_rate": 1.709599002881743e-05, "loss": 1.0833, "step": 8278 }, { "epoch": 27.14426229508197, "grad_norm": 9.6786527633667, "learning_rate": 1.70952417771399e-05, "loss": 0.9067, "step": 8279 }, { "epoch": 27.147540983606557, "grad_norm": 7.856201171875, "learning_rate": 1.7094493445456143e-05, "loss": 0.9985, "step": 8280 }, { "epoch": 27.15081967213115, "grad_norm": 8.903485298156738, "learning_rate": 1.7093745033774597e-05, "loss": 1.0804, "step": 8281 }, { "epoch": 27.154098360655738, "grad_norm": 7.610154151916504, "learning_rate": 1.7092996542103698e-05, "loss": 1.118, "step": 8282 }, { "epoch": 27.15737704918033, "grad_norm": 7.68511438369751, "learning_rate": 1.709224797045189e-05, "loss": 1.2014, "step": 8283 }, { "epoch": 27.160655737704918, "grad_norm": 7.534532070159912, "learning_rate": 1.709149931882761e-05, "loss": 1.0055, "step": 8284 }, { "epoch": 27.16393442622951, "grad_norm": 21.683483123779297, "learning_rate": 1.70907505872393e-05, "loss": 1.1248, "step": 8285 }, { "epoch": 27.167213114754098, "grad_norm": 7.891564846038818, "learning_rate": 1.709000177569541e-05, "loss": 1.0222, "step": 8286 }, { "epoch": 27.17049180327869, "grad_norm": 6.314001560211182, "learning_rate": 1.7089252884204376e-05, "loss": 1.2418, "step": 8287 }, { "epoch": 27.17377049180328, "grad_norm": 11.762826919555664, "learning_rate": 1.7088503912774643e-05, "loss": 1.0307, "step": 8288 }, { "epoch": 27.17704918032787, "grad_norm": 8.155170440673828, "learning_rate": 1.708775486141466e-05, "loss": 1.0337, "step": 8289 }, { "epoch": 27.18032786885246, "grad_norm": 7.302341938018799, "learning_rate": 1.7087005730132874e-05, "loss": 1.2048, "step": 8290 }, { "epoch": 27.18360655737705, "grad_norm": 9.742903709411621, "learning_rate": 1.7086256518937726e-05, "loss": 1.1136, "step": 8291 }, { "epoch": 27.18688524590164, "grad_norm": 6.958106517791748, "learning_rate": 1.708550722783767e-05, "loss": 1.3047, "step": 8292 }, { "epoch": 27.19016393442623, "grad_norm": 8.371918678283691, "learning_rate": 1.708475785684115e-05, "loss": 1.0405, "step": 8293 }, { "epoch": 27.19344262295082, "grad_norm": 7.071326732635498, "learning_rate": 1.7084008405956623e-05, "loss": 1.1561, "step": 8294 }, { "epoch": 27.19672131147541, "grad_norm": 6.391499042510986, "learning_rate": 1.7083258875192532e-05, "loss": 1.1454, "step": 8295 }, { "epoch": 27.2, "grad_norm": 7.275261402130127, "learning_rate": 1.7082509264557333e-05, "loss": 1.0896, "step": 8296 }, { "epoch": 27.20327868852459, "grad_norm": 8.74742603302002, "learning_rate": 1.7081759574059478e-05, "loss": 1.1611, "step": 8297 }, { "epoch": 27.20655737704918, "grad_norm": 9.20472240447998, "learning_rate": 1.708100980370742e-05, "loss": 0.9811, "step": 8298 }, { "epoch": 27.20983606557377, "grad_norm": 7.731794834136963, "learning_rate": 1.7080259953509618e-05, "loss": 1.0322, "step": 8299 }, { "epoch": 27.21311475409836, "grad_norm": 9.88336181640625, "learning_rate": 1.707951002347452e-05, "loss": 0.9573, "step": 8300 }, { "epoch": 27.21639344262295, "grad_norm": 11.2067289352417, "learning_rate": 1.7078760013610587e-05, "loss": 1.0157, "step": 8301 }, { "epoch": 27.21967213114754, "grad_norm": 8.216870307922363, "learning_rate": 1.7078009923926276e-05, "loss": 1.1108, "step": 8302 }, { "epoch": 27.222950819672132, "grad_norm": 7.879986763000488, "learning_rate": 1.707725975443004e-05, "loss": 1.1551, "step": 8303 }, { "epoch": 27.22622950819672, "grad_norm": 6.805259704589844, "learning_rate": 1.7076509505130344e-05, "loss": 1.1188, "step": 8304 }, { "epoch": 27.229508196721312, "grad_norm": 7.058359622955322, "learning_rate": 1.7075759176035647e-05, "loss": 1.2256, "step": 8305 }, { "epoch": 27.2327868852459, "grad_norm": 7.556347846984863, "learning_rate": 1.707500876715441e-05, "loss": 1.0294, "step": 8306 }, { "epoch": 27.236065573770492, "grad_norm": 9.149974822998047, "learning_rate": 1.7074258278495087e-05, "loss": 1.1139, "step": 8307 }, { "epoch": 27.23934426229508, "grad_norm": 7.147390842437744, "learning_rate": 1.7073507710066152e-05, "loss": 1.2349, "step": 8308 }, { "epoch": 27.242622950819673, "grad_norm": 7.812697887420654, "learning_rate": 1.707275706187606e-05, "loss": 1.0985, "step": 8309 }, { "epoch": 27.24590163934426, "grad_norm": 7.186325550079346, "learning_rate": 1.7072006333933275e-05, "loss": 1.0984, "step": 8310 }, { "epoch": 27.249180327868853, "grad_norm": 18.673877716064453, "learning_rate": 1.707125552624627e-05, "loss": 1.0365, "step": 8311 }, { "epoch": 27.25245901639344, "grad_norm": 7.088891983032227, "learning_rate": 1.7070504638823507e-05, "loss": 1.1384, "step": 8312 }, { "epoch": 27.255737704918033, "grad_norm": 10.08851146697998, "learning_rate": 1.706975367167345e-05, "loss": 1.0944, "step": 8313 }, { "epoch": 27.25901639344262, "grad_norm": 8.003777503967285, "learning_rate": 1.7069002624804574e-05, "loss": 0.9727, "step": 8314 }, { "epoch": 27.262295081967213, "grad_norm": 8.607743263244629, "learning_rate": 1.706825149822534e-05, "loss": 1.1555, "step": 8315 }, { "epoch": 27.2655737704918, "grad_norm": 8.753692626953125, "learning_rate": 1.7067500291944222e-05, "loss": 0.9836, "step": 8316 }, { "epoch": 27.268852459016394, "grad_norm": 8.917814254760742, "learning_rate": 1.706674900596969e-05, "loss": 1.0945, "step": 8317 }, { "epoch": 27.272131147540982, "grad_norm": 18.052433013916016, "learning_rate": 1.7065997640310214e-05, "loss": 1.1384, "step": 8318 }, { "epoch": 27.275409836065574, "grad_norm": 7.411976337432861, "learning_rate": 1.706524619497427e-05, "loss": 1.1299, "step": 8319 }, { "epoch": 27.278688524590162, "grad_norm": 6.838738441467285, "learning_rate": 1.7064494669970328e-05, "loss": 1.2106, "step": 8320 }, { "epoch": 27.281967213114754, "grad_norm": 8.736778259277344, "learning_rate": 1.7063743065306865e-05, "loss": 1.1578, "step": 8321 }, { "epoch": 27.285245901639342, "grad_norm": 9.032706260681152, "learning_rate": 1.7062991380992355e-05, "loss": 1.0682, "step": 8322 }, { "epoch": 27.288524590163934, "grad_norm": 6.725803852081299, "learning_rate": 1.7062239617035266e-05, "loss": 0.9723, "step": 8323 }, { "epoch": 27.291803278688526, "grad_norm": 8.181610107421875, "learning_rate": 1.706148777344409e-05, "loss": 1.0007, "step": 8324 }, { "epoch": 27.295081967213115, "grad_norm": 7.405035972595215, "learning_rate": 1.7060735850227298e-05, "loss": 1.2241, "step": 8325 }, { "epoch": 27.298360655737707, "grad_norm": 6.981710910797119, "learning_rate": 1.7059983847393363e-05, "loss": 1.3213, "step": 8326 }, { "epoch": 27.301639344262295, "grad_norm": 7.180805206298828, "learning_rate": 1.705923176495077e-05, "loss": 0.8856, "step": 8327 }, { "epoch": 27.304918032786887, "grad_norm": 8.36699104309082, "learning_rate": 1.7058479602908e-05, "loss": 1.1134, "step": 8328 }, { "epoch": 27.308196721311475, "grad_norm": 12.981736183166504, "learning_rate": 1.7057727361273536e-05, "loss": 0.8583, "step": 8329 }, { "epoch": 27.311475409836067, "grad_norm": 7.757081508636475, "learning_rate": 1.7056975040055857e-05, "loss": 1.033, "step": 8330 }, { "epoch": 27.314754098360655, "grad_norm": 10.07559871673584, "learning_rate": 1.7056222639263447e-05, "loss": 0.7441, "step": 8331 }, { "epoch": 27.318032786885247, "grad_norm": 9.261987686157227, "learning_rate": 1.705547015890479e-05, "loss": 1.0802, "step": 8332 }, { "epoch": 27.321311475409836, "grad_norm": 7.349649906158447, "learning_rate": 1.7054717598988372e-05, "loss": 1.0475, "step": 8333 }, { "epoch": 27.324590163934428, "grad_norm": 8.946345329284668, "learning_rate": 1.7053964959522675e-05, "loss": 0.8961, "step": 8334 }, { "epoch": 27.327868852459016, "grad_norm": 7.734113693237305, "learning_rate": 1.705321224051619e-05, "loss": 1.1838, "step": 8335 }, { "epoch": 27.331147540983608, "grad_norm": 7.490147590637207, "learning_rate": 1.7052459441977402e-05, "loss": 1.1117, "step": 8336 }, { "epoch": 27.334426229508196, "grad_norm": 9.029290199279785, "learning_rate": 1.70517065639148e-05, "loss": 1.0639, "step": 8337 }, { "epoch": 27.337704918032788, "grad_norm": 12.30181884765625, "learning_rate": 1.7050953606336878e-05, "loss": 0.973, "step": 8338 }, { "epoch": 27.340983606557376, "grad_norm": 8.621901512145996, "learning_rate": 1.7050200569252126e-05, "loss": 0.8505, "step": 8339 }, { "epoch": 27.34426229508197, "grad_norm": 11.755837440490723, "learning_rate": 1.704944745266903e-05, "loss": 1.0642, "step": 8340 }, { "epoch": 27.347540983606557, "grad_norm": 6.649503707885742, "learning_rate": 1.704869425659608e-05, "loss": 1.2844, "step": 8341 }, { "epoch": 27.35081967213115, "grad_norm": 6.330160617828369, "learning_rate": 1.704794098104178e-05, "loss": 1.0841, "step": 8342 }, { "epoch": 27.354098360655737, "grad_norm": 7.944349765777588, "learning_rate": 1.704718762601461e-05, "loss": 1.2623, "step": 8343 }, { "epoch": 27.35737704918033, "grad_norm": 9.34280014038086, "learning_rate": 1.7046434191523077e-05, "loss": 1.0432, "step": 8344 }, { "epoch": 27.360655737704917, "grad_norm": 10.504414558410645, "learning_rate": 1.704568067757567e-05, "loss": 1.0991, "step": 8345 }, { "epoch": 27.36393442622951, "grad_norm": 7.645432949066162, "learning_rate": 1.704492708418089e-05, "loss": 1.197, "step": 8346 }, { "epoch": 27.367213114754097, "grad_norm": 12.33913516998291, "learning_rate": 1.704417341134723e-05, "loss": 1.0863, "step": 8347 }, { "epoch": 27.37049180327869, "grad_norm": 6.303176403045654, "learning_rate": 1.7043419659083193e-05, "loss": 1.1594, "step": 8348 }, { "epoch": 27.373770491803278, "grad_norm": 16.76757049560547, "learning_rate": 1.7042665827397273e-05, "loss": 1.0967, "step": 8349 }, { "epoch": 27.37704918032787, "grad_norm": 13.044958114624023, "learning_rate": 1.7041911916297976e-05, "loss": 0.9994, "step": 8350 }, { "epoch": 27.380327868852458, "grad_norm": 22.106685638427734, "learning_rate": 1.70411579257938e-05, "loss": 1.1415, "step": 8351 }, { "epoch": 27.38360655737705, "grad_norm": 16.21758460998535, "learning_rate": 1.7040403855893246e-05, "loss": 1.105, "step": 8352 }, { "epoch": 27.386885245901638, "grad_norm": 9.934895515441895, "learning_rate": 1.7039649706604818e-05, "loss": 1.1592, "step": 8353 }, { "epoch": 27.39016393442623, "grad_norm": 10.138269424438477, "learning_rate": 1.7038895477937023e-05, "loss": 1.0841, "step": 8354 }, { "epoch": 27.39344262295082, "grad_norm": 10.975831985473633, "learning_rate": 1.703814116989836e-05, "loss": 1.0367, "step": 8355 }, { "epoch": 27.39672131147541, "grad_norm": 8.049971580505371, "learning_rate": 1.703738678249734e-05, "loss": 0.8254, "step": 8356 }, { "epoch": 27.4, "grad_norm": 8.642194747924805, "learning_rate": 1.7036632315742464e-05, "loss": 1.1758, "step": 8357 }, { "epoch": 27.40327868852459, "grad_norm": 7.78520393371582, "learning_rate": 1.7035877769642245e-05, "loss": 1.2507, "step": 8358 }, { "epoch": 27.40655737704918, "grad_norm": 9.03990650177002, "learning_rate": 1.7035123144205186e-05, "loss": 1.2262, "step": 8359 }, { "epoch": 27.40983606557377, "grad_norm": 7.70888090133667, "learning_rate": 1.7034368439439802e-05, "loss": 1.1655, "step": 8360 }, { "epoch": 27.41311475409836, "grad_norm": 7.773831844329834, "learning_rate": 1.7033613655354597e-05, "loss": 1.0105, "step": 8361 }, { "epoch": 27.41639344262295, "grad_norm": 6.338193416595459, "learning_rate": 1.703285879195809e-05, "loss": 1.3411, "step": 8362 }, { "epoch": 27.41967213114754, "grad_norm": 10.138360023498535, "learning_rate": 1.703210384925878e-05, "loss": 1.179, "step": 8363 }, { "epoch": 27.42295081967213, "grad_norm": 10.371729850769043, "learning_rate": 1.7031348827265193e-05, "loss": 1.0492, "step": 8364 }, { "epoch": 27.42622950819672, "grad_norm": 10.246389389038086, "learning_rate": 1.7030593725985834e-05, "loss": 1.1206, "step": 8365 }, { "epoch": 27.42950819672131, "grad_norm": 10.434015274047852, "learning_rate": 1.7029838545429226e-05, "loss": 1.1333, "step": 8366 }, { "epoch": 27.432786885245903, "grad_norm": 7.266347408294678, "learning_rate": 1.7029083285603873e-05, "loss": 1.2695, "step": 8367 }, { "epoch": 27.43606557377049, "grad_norm": 6.6291728019714355, "learning_rate": 1.70283279465183e-05, "loss": 1.4, "step": 8368 }, { "epoch": 27.439344262295084, "grad_norm": 19.726465225219727, "learning_rate": 1.702757252818102e-05, "loss": 1.0492, "step": 8369 }, { "epoch": 27.442622950819672, "grad_norm": 42.96559143066406, "learning_rate": 1.7026817030600557e-05, "loss": 0.8734, "step": 8370 }, { "epoch": 27.445901639344264, "grad_norm": 10.783475875854492, "learning_rate": 1.7026061453785426e-05, "loss": 1.0608, "step": 8371 }, { "epoch": 27.449180327868852, "grad_norm": 6.8990678787231445, "learning_rate": 1.7025305797744142e-05, "loss": 1.0508, "step": 8372 }, { "epoch": 27.452459016393444, "grad_norm": 8.222190856933594, "learning_rate": 1.7024550062485237e-05, "loss": 1.2367, "step": 8373 }, { "epoch": 27.455737704918032, "grad_norm": 9.409012794494629, "learning_rate": 1.7023794248017222e-05, "loss": 1.1018, "step": 8374 }, { "epoch": 27.459016393442624, "grad_norm": 11.801344871520996, "learning_rate": 1.7023038354348624e-05, "loss": 1.0388, "step": 8375 }, { "epoch": 27.462295081967213, "grad_norm": 9.787871360778809, "learning_rate": 1.7022282381487968e-05, "loss": 1.0276, "step": 8376 }, { "epoch": 27.465573770491805, "grad_norm": 9.88520336151123, "learning_rate": 1.7021526329443775e-05, "loss": 0.9749, "step": 8377 }, { "epoch": 27.468852459016393, "grad_norm": 9.714505195617676, "learning_rate": 1.702077019822457e-05, "loss": 1.2069, "step": 8378 }, { "epoch": 27.472131147540985, "grad_norm": 9.279206275939941, "learning_rate": 1.702001398783889e-05, "loss": 1.0974, "step": 8379 }, { "epoch": 27.475409836065573, "grad_norm": 7.832500457763672, "learning_rate": 1.7019257698295243e-05, "loss": 1.1205, "step": 8380 }, { "epoch": 27.478688524590165, "grad_norm": 9.391134262084961, "learning_rate": 1.7018501329602176e-05, "loss": 0.9189, "step": 8381 }, { "epoch": 27.481967213114753, "grad_norm": 7.4898905754089355, "learning_rate": 1.7017744881768205e-05, "loss": 1.329, "step": 8382 }, { "epoch": 27.485245901639345, "grad_norm": 11.234368324279785, "learning_rate": 1.701698835480186e-05, "loss": 1.0177, "step": 8383 }, { "epoch": 27.488524590163934, "grad_norm": 12.938575744628906, "learning_rate": 1.701623174871168e-05, "loss": 1.1144, "step": 8384 }, { "epoch": 27.491803278688526, "grad_norm": 11.788830757141113, "learning_rate": 1.7015475063506192e-05, "loss": 0.9912, "step": 8385 }, { "epoch": 27.495081967213114, "grad_norm": 7.139122009277344, "learning_rate": 1.7014718299193925e-05, "loss": 1.0977, "step": 8386 }, { "epoch": 27.498360655737706, "grad_norm": 8.914368629455566, "learning_rate": 1.7013961455783418e-05, "loss": 1.1575, "step": 8387 }, { "epoch": 27.501639344262294, "grad_norm": 8.989141464233398, "learning_rate": 1.7013204533283208e-05, "loss": 1.0284, "step": 8388 }, { "epoch": 27.504918032786886, "grad_norm": 7.794639587402344, "learning_rate": 1.7012447531701817e-05, "loss": 1.2635, "step": 8389 }, { "epoch": 27.508196721311474, "grad_norm": 9.620209693908691, "learning_rate": 1.7011690451047793e-05, "loss": 1.2854, "step": 8390 }, { "epoch": 27.511475409836066, "grad_norm": 10.924023628234863, "learning_rate": 1.7010933291329668e-05, "loss": 1.0063, "step": 8391 }, { "epoch": 27.514754098360655, "grad_norm": 8.748761177062988, "learning_rate": 1.701017605255598e-05, "loss": 1.3201, "step": 8392 }, { "epoch": 27.518032786885247, "grad_norm": 13.738829612731934, "learning_rate": 1.7009418734735267e-05, "loss": 1.0514, "step": 8393 }, { "epoch": 27.521311475409835, "grad_norm": 10.647921562194824, "learning_rate": 1.7008661337876074e-05, "loss": 1.097, "step": 8394 }, { "epoch": 27.524590163934427, "grad_norm": 9.069189071655273, "learning_rate": 1.7007903861986933e-05, "loss": 1.0471, "step": 8395 }, { "epoch": 27.527868852459015, "grad_norm": 10.790716171264648, "learning_rate": 1.7007146307076394e-05, "loss": 1.2493, "step": 8396 }, { "epoch": 27.531147540983607, "grad_norm": 7.560586452484131, "learning_rate": 1.7006388673152988e-05, "loss": 0.9386, "step": 8397 }, { "epoch": 27.534426229508195, "grad_norm": 20.59333038330078, "learning_rate": 1.700563096022527e-05, "loss": 1.1172, "step": 8398 }, { "epoch": 27.537704918032787, "grad_norm": 7.126296043395996, "learning_rate": 1.700487316830178e-05, "loss": 1.2081, "step": 8399 }, { "epoch": 27.540983606557376, "grad_norm": 9.283344268798828, "learning_rate": 1.700411529739106e-05, "loss": 1.0928, "step": 8400 }, { "epoch": 27.544262295081968, "grad_norm": 9.524859428405762, "learning_rate": 1.7003357347501656e-05, "loss": 1.0974, "step": 8401 }, { "epoch": 27.547540983606556, "grad_norm": 6.307524681091309, "learning_rate": 1.7002599318642115e-05, "loss": 1.1576, "step": 8402 }, { "epoch": 27.550819672131148, "grad_norm": 10.2044677734375, "learning_rate": 1.7001841210820994e-05, "loss": 0.9069, "step": 8403 }, { "epoch": 27.554098360655736, "grad_norm": 7.213521957397461, "learning_rate": 1.7001083024046824e-05, "loss": 1.0648, "step": 8404 }, { "epoch": 27.557377049180328, "grad_norm": 15.799574851989746, "learning_rate": 1.700032475832817e-05, "loss": 1.1353, "step": 8405 }, { "epoch": 27.560655737704916, "grad_norm": 7.832505702972412, "learning_rate": 1.699956641367357e-05, "loss": 0.9597, "step": 8406 }, { "epoch": 27.56393442622951, "grad_norm": 9.676798820495605, "learning_rate": 1.6998807990091587e-05, "loss": 0.9962, "step": 8407 }, { "epoch": 27.567213114754097, "grad_norm": 7.542994976043701, "learning_rate": 1.699804948759076e-05, "loss": 1.1915, "step": 8408 }, { "epoch": 27.57049180327869, "grad_norm": 9.05488395690918, "learning_rate": 1.699729090617966e-05, "loss": 1.0803, "step": 8409 }, { "epoch": 27.57377049180328, "grad_norm": 8.692524909973145, "learning_rate": 1.699653224586682e-05, "loss": 1.2156, "step": 8410 }, { "epoch": 27.57704918032787, "grad_norm": 8.823667526245117, "learning_rate": 1.6995773506660805e-05, "loss": 1.0961, "step": 8411 }, { "epoch": 27.58032786885246, "grad_norm": 6.758360385894775, "learning_rate": 1.6995014688570173e-05, "loss": 1.0425, "step": 8412 }, { "epoch": 27.58360655737705, "grad_norm": 11.919278144836426, "learning_rate": 1.6994255791603475e-05, "loss": 1.1393, "step": 8413 }, { "epoch": 27.58688524590164, "grad_norm": 9.817955017089844, "learning_rate": 1.699349681576927e-05, "loss": 1.0117, "step": 8414 }, { "epoch": 27.59016393442623, "grad_norm": 7.960262298583984, "learning_rate": 1.699273776107612e-05, "loss": 1.0265, "step": 8415 }, { "epoch": 27.59344262295082, "grad_norm": 6.6147589683532715, "learning_rate": 1.699197862753258e-05, "loss": 1.0912, "step": 8416 }, { "epoch": 27.59672131147541, "grad_norm": 7.97568416595459, "learning_rate": 1.699121941514721e-05, "loss": 1.0594, "step": 8417 }, { "epoch": 27.6, "grad_norm": 6.957054138183594, "learning_rate": 1.6990460123928577e-05, "loss": 1.1429, "step": 8418 }, { "epoch": 27.60327868852459, "grad_norm": 7.152989387512207, "learning_rate": 1.6989700753885233e-05, "loss": 1.2043, "step": 8419 }, { "epoch": 27.60655737704918, "grad_norm": 5.960642337799072, "learning_rate": 1.6988941305025748e-05, "loss": 1.377, "step": 8420 }, { "epoch": 27.60983606557377, "grad_norm": 7.636151313781738, "learning_rate": 1.6988181777358683e-05, "loss": 1.3702, "step": 8421 }, { "epoch": 27.613114754098362, "grad_norm": 7.341193675994873, "learning_rate": 1.6987422170892596e-05, "loss": 1.0497, "step": 8422 }, { "epoch": 27.61639344262295, "grad_norm": 9.101832389831543, "learning_rate": 1.6986662485636065e-05, "loss": 1.0226, "step": 8423 }, { "epoch": 27.619672131147542, "grad_norm": 9.685389518737793, "learning_rate": 1.6985902721597652e-05, "loss": 1.0701, "step": 8424 }, { "epoch": 27.62295081967213, "grad_norm": 6.785515785217285, "learning_rate": 1.698514287878592e-05, "loss": 1.0945, "step": 8425 }, { "epoch": 27.626229508196722, "grad_norm": 6.827757358551025, "learning_rate": 1.6984382957209437e-05, "loss": 1.2006, "step": 8426 }, { "epoch": 27.62950819672131, "grad_norm": 6.391838550567627, "learning_rate": 1.6983622956876778e-05, "loss": 1.1943, "step": 8427 }, { "epoch": 27.632786885245903, "grad_norm": 8.034502983093262, "learning_rate": 1.6982862877796505e-05, "loss": 1.1031, "step": 8428 }, { "epoch": 27.63606557377049, "grad_norm": 6.230140209197998, "learning_rate": 1.6982102719977195e-05, "loss": 1.2281, "step": 8429 }, { "epoch": 27.639344262295083, "grad_norm": 7.99119234085083, "learning_rate": 1.698134248342742e-05, "loss": 0.9447, "step": 8430 }, { "epoch": 27.64262295081967, "grad_norm": 8.065774917602539, "learning_rate": 1.6980582168155748e-05, "loss": 0.9174, "step": 8431 }, { "epoch": 27.645901639344263, "grad_norm": 6.422056198120117, "learning_rate": 1.697982177417075e-05, "loss": 1.4065, "step": 8432 }, { "epoch": 27.64918032786885, "grad_norm": 7.570176601409912, "learning_rate": 1.697906130148101e-05, "loss": 1.1188, "step": 8433 }, { "epoch": 27.652459016393443, "grad_norm": 8.227483749389648, "learning_rate": 1.697830075009509e-05, "loss": 1.1171, "step": 8434 }, { "epoch": 27.65573770491803, "grad_norm": 8.42851448059082, "learning_rate": 1.6977540120021584e-05, "loss": 1.2151, "step": 8435 }, { "epoch": 27.659016393442624, "grad_norm": 7.653264999389648, "learning_rate": 1.6976779411269047e-05, "loss": 0.8695, "step": 8436 }, { "epoch": 27.662295081967212, "grad_norm": 6.64351224899292, "learning_rate": 1.6976018623846075e-05, "loss": 1.1157, "step": 8437 }, { "epoch": 27.665573770491804, "grad_norm": 12.10474681854248, "learning_rate": 1.697525775776124e-05, "loss": 1.2798, "step": 8438 }, { "epoch": 27.668852459016392, "grad_norm": 8.053057670593262, "learning_rate": 1.6974496813023118e-05, "loss": 1.0214, "step": 8439 }, { "epoch": 27.672131147540984, "grad_norm": 7.2922539710998535, "learning_rate": 1.6973735789640294e-05, "loss": 1.0817, "step": 8440 }, { "epoch": 27.675409836065572, "grad_norm": 7.062229633331299, "learning_rate": 1.6972974687621347e-05, "loss": 1.0295, "step": 8441 }, { "epoch": 27.678688524590164, "grad_norm": 11.053343772888184, "learning_rate": 1.6972213506974866e-05, "loss": 1.127, "step": 8442 }, { "epoch": 27.681967213114753, "grad_norm": 9.309988975524902, "learning_rate": 1.6971452247709422e-05, "loss": 1.3328, "step": 8443 }, { "epoch": 27.685245901639345, "grad_norm": 6.712803840637207, "learning_rate": 1.697069090983361e-05, "loss": 1.0443, "step": 8444 }, { "epoch": 27.688524590163933, "grad_norm": 7.987921237945557, "learning_rate": 1.6969929493356008e-05, "loss": 1.0452, "step": 8445 }, { "epoch": 27.691803278688525, "grad_norm": 9.11001205444336, "learning_rate": 1.6969167998285205e-05, "loss": 1.1416, "step": 8446 }, { "epoch": 27.695081967213113, "grad_norm": 6.758894443511963, "learning_rate": 1.6968406424629785e-05, "loss": 0.9556, "step": 8447 }, { "epoch": 27.698360655737705, "grad_norm": 6.95824670791626, "learning_rate": 1.6967644772398337e-05, "loss": 1.1026, "step": 8448 }, { "epoch": 27.701639344262293, "grad_norm": 9.18260383605957, "learning_rate": 1.696688304159945e-05, "loss": 1.0511, "step": 8449 }, { "epoch": 27.704918032786885, "grad_norm": 10.08653736114502, "learning_rate": 1.6966121232241714e-05, "loss": 1.1495, "step": 8450 }, { "epoch": 27.708196721311474, "grad_norm": 8.428865432739258, "learning_rate": 1.696535934433372e-05, "loss": 1.0504, "step": 8451 }, { "epoch": 27.711475409836066, "grad_norm": 7.097047805786133, "learning_rate": 1.6964597377884053e-05, "loss": 0.9637, "step": 8452 }, { "epoch": 27.714754098360658, "grad_norm": 6.263730525970459, "learning_rate": 1.6963835332901313e-05, "loss": 1.1536, "step": 8453 }, { "epoch": 27.718032786885246, "grad_norm": 7.000203609466553, "learning_rate": 1.6963073209394086e-05, "loss": 1.0288, "step": 8454 }, { "epoch": 27.721311475409838, "grad_norm": 8.627695083618164, "learning_rate": 1.696231100737097e-05, "loss": 0.9404, "step": 8455 }, { "epoch": 27.724590163934426, "grad_norm": 6.936455726623535, "learning_rate": 1.696154872684056e-05, "loss": 0.9377, "step": 8456 }, { "epoch": 27.727868852459018, "grad_norm": 6.8302998542785645, "learning_rate": 1.6960786367811447e-05, "loss": 1.1144, "step": 8457 }, { "epoch": 27.731147540983606, "grad_norm": 7.368603229522705, "learning_rate": 1.696002393029223e-05, "loss": 1.1045, "step": 8458 }, { "epoch": 27.7344262295082, "grad_norm": 8.456063270568848, "learning_rate": 1.695926141429151e-05, "loss": 0.9822, "step": 8459 }, { "epoch": 27.737704918032787, "grad_norm": 8.196215629577637, "learning_rate": 1.695849881981788e-05, "loss": 1.0337, "step": 8460 }, { "epoch": 27.74098360655738, "grad_norm": 7.089298725128174, "learning_rate": 1.695773614687994e-05, "loss": 1.0339, "step": 8461 }, { "epoch": 27.744262295081967, "grad_norm": 7.838130474090576, "learning_rate": 1.695697339548629e-05, "loss": 1.12, "step": 8462 }, { "epoch": 27.74754098360656, "grad_norm": 10.712002754211426, "learning_rate": 1.6956210565645535e-05, "loss": 0.9637, "step": 8463 }, { "epoch": 27.750819672131147, "grad_norm": 11.236594200134277, "learning_rate": 1.695544765736627e-05, "loss": 1.0479, "step": 8464 }, { "epoch": 27.75409836065574, "grad_norm": 8.570807456970215, "learning_rate": 1.6954684670657105e-05, "loss": 1.251, "step": 8465 }, { "epoch": 27.757377049180327, "grad_norm": 8.282486915588379, "learning_rate": 1.6953921605526637e-05, "loss": 0.9564, "step": 8466 }, { "epoch": 27.76065573770492, "grad_norm": 7.732300758361816, "learning_rate": 1.695315846198347e-05, "loss": 1.3062, "step": 8467 }, { "epoch": 27.763934426229508, "grad_norm": 9.116558074951172, "learning_rate": 1.6952395240036215e-05, "loss": 1.062, "step": 8468 }, { "epoch": 27.7672131147541, "grad_norm": 7.732374668121338, "learning_rate": 1.6951631939693473e-05, "loss": 1.2019, "step": 8469 }, { "epoch": 27.770491803278688, "grad_norm": 9.37216567993164, "learning_rate": 1.6950868560963852e-05, "loss": 1.0367, "step": 8470 }, { "epoch": 27.77377049180328, "grad_norm": 27.99886703491211, "learning_rate": 1.6950105103855966e-05, "loss": 1.1157, "step": 8471 }, { "epoch": 27.777049180327868, "grad_norm": 6.479964256286621, "learning_rate": 1.6949341568378416e-05, "loss": 1.1219, "step": 8472 }, { "epoch": 27.78032786885246, "grad_norm": 9.657818794250488, "learning_rate": 1.694857795453981e-05, "loss": 0.9429, "step": 8473 }, { "epoch": 27.78360655737705, "grad_norm": 6.843387603759766, "learning_rate": 1.6947814262348766e-05, "loss": 1.0214, "step": 8474 }, { "epoch": 27.78688524590164, "grad_norm": 7.4053521156311035, "learning_rate": 1.6947050491813894e-05, "loss": 1.3041, "step": 8475 }, { "epoch": 27.79016393442623, "grad_norm": 6.652815818786621, "learning_rate": 1.6946286642943803e-05, "loss": 0.9812, "step": 8476 }, { "epoch": 27.79344262295082, "grad_norm": 6.831553936004639, "learning_rate": 1.6945522715747112e-05, "loss": 0.8874, "step": 8477 }, { "epoch": 27.79672131147541, "grad_norm": 11.972171783447266, "learning_rate": 1.6944758710232423e-05, "loss": 0.908, "step": 8478 }, { "epoch": 27.8, "grad_norm": 8.14301872253418, "learning_rate": 1.6943994626408365e-05, "loss": 1.1849, "step": 8479 }, { "epoch": 27.80327868852459, "grad_norm": 6.278542995452881, "learning_rate": 1.6943230464283545e-05, "loss": 1.1055, "step": 8480 }, { "epoch": 27.80655737704918, "grad_norm": 9.662097930908203, "learning_rate": 1.6942466223866582e-05, "loss": 1.2056, "step": 8481 }, { "epoch": 27.80983606557377, "grad_norm": 7.755866527557373, "learning_rate": 1.6941701905166096e-05, "loss": 1.1099, "step": 8482 }, { "epoch": 27.81311475409836, "grad_norm": 7.803549289703369, "learning_rate": 1.6940937508190702e-05, "loss": 1.033, "step": 8483 }, { "epoch": 27.81639344262295, "grad_norm": 8.233956336975098, "learning_rate": 1.694017303294902e-05, "loss": 1.1881, "step": 8484 }, { "epoch": 27.81967213114754, "grad_norm": 8.578909873962402, "learning_rate": 1.6939408479449672e-05, "loss": 1.1766, "step": 8485 }, { "epoch": 27.82295081967213, "grad_norm": 6.942768573760986, "learning_rate": 1.693864384770128e-05, "loss": 1.0337, "step": 8486 }, { "epoch": 27.82622950819672, "grad_norm": 8.443136215209961, "learning_rate": 1.693787913771246e-05, "loss": 1.1022, "step": 8487 }, { "epoch": 27.82950819672131, "grad_norm": 9.119877815246582, "learning_rate": 1.6937114349491844e-05, "loss": 1.0131, "step": 8488 }, { "epoch": 27.832786885245902, "grad_norm": 7.688457489013672, "learning_rate": 1.693634948304805e-05, "loss": 1.0372, "step": 8489 }, { "epoch": 27.83606557377049, "grad_norm": 7.526200294494629, "learning_rate": 1.69355845383897e-05, "loss": 1.1189, "step": 8490 }, { "epoch": 27.839344262295082, "grad_norm": 10.517020225524902, "learning_rate": 1.6934819515525425e-05, "loss": 1.0963, "step": 8491 }, { "epoch": 27.84262295081967, "grad_norm": 8.516403198242188, "learning_rate": 1.693405441446385e-05, "loss": 0.973, "step": 8492 }, { "epoch": 27.845901639344262, "grad_norm": 6.937134265899658, "learning_rate": 1.69332892352136e-05, "loss": 1.0342, "step": 8493 }, { "epoch": 27.84918032786885, "grad_norm": 8.239405632019043, "learning_rate": 1.693252397778331e-05, "loss": 1.0762, "step": 8494 }, { "epoch": 27.852459016393443, "grad_norm": 6.55388879776001, "learning_rate": 1.6931758642181598e-05, "loss": 1.0553, "step": 8495 }, { "epoch": 27.855737704918035, "grad_norm": 6.2452239990234375, "learning_rate": 1.6930993228417107e-05, "loss": 1.2778, "step": 8496 }, { "epoch": 27.859016393442623, "grad_norm": 6.750268936157227, "learning_rate": 1.6930227736498454e-05, "loss": 1.1815, "step": 8497 }, { "epoch": 27.862295081967215, "grad_norm": 7.466064453125, "learning_rate": 1.6929462166434284e-05, "loss": 1.1252, "step": 8498 }, { "epoch": 27.865573770491803, "grad_norm": 7.956892013549805, "learning_rate": 1.6928696518233217e-05, "loss": 0.8793, "step": 8499 }, { "epoch": 27.868852459016395, "grad_norm": 8.948503494262695, "learning_rate": 1.6927930791903896e-05, "loss": 1.1013, "step": 8500 }, { "epoch": 27.872131147540983, "grad_norm": 6.040266036987305, "learning_rate": 1.692716498745495e-05, "loss": 1.0381, "step": 8501 }, { "epoch": 27.875409836065575, "grad_norm": 7.95503568649292, "learning_rate": 1.692639910489502e-05, "loss": 0.9336, "step": 8502 }, { "epoch": 27.878688524590164, "grad_norm": 7.684663772583008, "learning_rate": 1.692563314423274e-05, "loss": 1.158, "step": 8503 }, { "epoch": 27.881967213114756, "grad_norm": 8.325610160827637, "learning_rate": 1.6924867105476738e-05, "loss": 1.168, "step": 8504 }, { "epoch": 27.885245901639344, "grad_norm": 8.548628807067871, "learning_rate": 1.6924100988635666e-05, "loss": 1.1292, "step": 8505 }, { "epoch": 27.888524590163936, "grad_norm": 11.538487434387207, "learning_rate": 1.692333479371815e-05, "loss": 1.1731, "step": 8506 }, { "epoch": 27.891803278688524, "grad_norm": 7.717864036560059, "learning_rate": 1.692256852073284e-05, "loss": 0.9053, "step": 8507 }, { "epoch": 27.895081967213116, "grad_norm": 9.239570617675781, "learning_rate": 1.6921802169688374e-05, "loss": 0.9369, "step": 8508 }, { "epoch": 27.898360655737704, "grad_norm": 10.076038360595703, "learning_rate": 1.6921035740593386e-05, "loss": 1.1802, "step": 8509 }, { "epoch": 27.901639344262296, "grad_norm": 8.001114845275879, "learning_rate": 1.6920269233456527e-05, "loss": 1.1266, "step": 8510 }, { "epoch": 27.904918032786885, "grad_norm": 8.821672439575195, "learning_rate": 1.691950264828644e-05, "loss": 1.2911, "step": 8511 }, { "epoch": 27.908196721311477, "grad_norm": 6.9615631103515625, "learning_rate": 1.691873598509176e-05, "loss": 1.1776, "step": 8512 }, { "epoch": 27.911475409836065, "grad_norm": 9.371182441711426, "learning_rate": 1.6917969243881143e-05, "loss": 1.0035, "step": 8513 }, { "epoch": 27.914754098360657, "grad_norm": 7.444364547729492, "learning_rate": 1.6917202424663228e-05, "loss": 1.1173, "step": 8514 }, { "epoch": 27.918032786885245, "grad_norm": 7.255202770233154, "learning_rate": 1.6916435527446663e-05, "loss": 1.0303, "step": 8515 }, { "epoch": 27.921311475409837, "grad_norm": 7.2697248458862305, "learning_rate": 1.6915668552240098e-05, "loss": 1.0373, "step": 8516 }, { "epoch": 27.924590163934425, "grad_norm": 8.614296913146973, "learning_rate": 1.6914901499052177e-05, "loss": 0.95, "step": 8517 }, { "epoch": 27.927868852459017, "grad_norm": 7.840150356292725, "learning_rate": 1.6914134367891553e-05, "loss": 1.2365, "step": 8518 }, { "epoch": 27.931147540983606, "grad_norm": 7.76846981048584, "learning_rate": 1.6913367158766876e-05, "loss": 1.2986, "step": 8519 }, { "epoch": 27.934426229508198, "grad_norm": 7.344472408294678, "learning_rate": 1.6912599871686795e-05, "loss": 0.8841, "step": 8520 }, { "epoch": 27.937704918032786, "grad_norm": 27.421916961669922, "learning_rate": 1.6911832506659968e-05, "loss": 1.0054, "step": 8521 }, { "epoch": 27.940983606557378, "grad_norm": 7.204267501831055, "learning_rate": 1.6911065063695037e-05, "loss": 1.0562, "step": 8522 }, { "epoch": 27.944262295081966, "grad_norm": 6.717144966125488, "learning_rate": 1.6910297542800662e-05, "loss": 1.2249, "step": 8523 }, { "epoch": 27.947540983606558, "grad_norm": 6.930518627166748, "learning_rate": 1.6909529943985498e-05, "loss": 1.1219, "step": 8524 }, { "epoch": 27.950819672131146, "grad_norm": 8.026833534240723, "learning_rate": 1.69087622672582e-05, "loss": 1.2096, "step": 8525 }, { "epoch": 27.95409836065574, "grad_norm": 8.121614456176758, "learning_rate": 1.6907994512627425e-05, "loss": 1.0529, "step": 8526 }, { "epoch": 27.957377049180327, "grad_norm": 7.427013874053955, "learning_rate": 1.6907226680101833e-05, "loss": 1.1943, "step": 8527 }, { "epoch": 27.96065573770492, "grad_norm": 5.727593898773193, "learning_rate": 1.6906458769690074e-05, "loss": 1.2327, "step": 8528 }, { "epoch": 27.963934426229507, "grad_norm": 7.973217010498047, "learning_rate": 1.6905690781400812e-05, "loss": 0.9187, "step": 8529 }, { "epoch": 27.9672131147541, "grad_norm": 5.673343181610107, "learning_rate": 1.6904922715242703e-05, "loss": 1.2714, "step": 8530 }, { "epoch": 27.970491803278687, "grad_norm": 7.824804782867432, "learning_rate": 1.6904154571224417e-05, "loss": 1.3179, "step": 8531 }, { "epoch": 27.97377049180328, "grad_norm": 7.244102478027344, "learning_rate": 1.6903386349354605e-05, "loss": 0.9158, "step": 8532 }, { "epoch": 27.977049180327867, "grad_norm": 8.44396686553955, "learning_rate": 1.6902618049641937e-05, "loss": 0.861, "step": 8533 }, { "epoch": 27.98032786885246, "grad_norm": 6.6009416580200195, "learning_rate": 1.6901849672095072e-05, "loss": 1.3088, "step": 8534 }, { "epoch": 27.983606557377048, "grad_norm": 9.064175605773926, "learning_rate": 1.6901081216722678e-05, "loss": 1.2258, "step": 8535 }, { "epoch": 27.98688524590164, "grad_norm": 9.188675880432129, "learning_rate": 1.6900312683533414e-05, "loss": 1.1558, "step": 8536 }, { "epoch": 27.990163934426228, "grad_norm": 8.019044876098633, "learning_rate": 1.6899544072535955e-05, "loss": 0.9846, "step": 8537 }, { "epoch": 27.99344262295082, "grad_norm": 8.406230926513672, "learning_rate": 1.689877538373896e-05, "loss": 1.2136, "step": 8538 }, { "epoch": 27.99672131147541, "grad_norm": 7.421855449676514, "learning_rate": 1.68980066171511e-05, "loss": 1.111, "step": 8539 }, { "epoch": 28.0, "grad_norm": 6.624839782714844, "learning_rate": 1.6897237772781046e-05, "loss": 1.1791, "step": 8540 }, { "epoch": 28.003278688524592, "grad_norm": 7.237847805023193, "learning_rate": 1.689646885063746e-05, "loss": 0.9504, "step": 8541 }, { "epoch": 28.00655737704918, "grad_norm": 7.389903545379639, "learning_rate": 1.689569985072902e-05, "loss": 1.0853, "step": 8542 }, { "epoch": 28.009836065573772, "grad_norm": 6.304675579071045, "learning_rate": 1.6894930773064394e-05, "loss": 0.8958, "step": 8543 }, { "epoch": 28.01311475409836, "grad_norm": 7.5608134269714355, "learning_rate": 1.6894161617652253e-05, "loss": 0.9917, "step": 8544 }, { "epoch": 28.016393442622952, "grad_norm": 19.28096580505371, "learning_rate": 1.6893392384501273e-05, "loss": 0.8403, "step": 8545 }, { "epoch": 28.01967213114754, "grad_norm": 14.717009544372559, "learning_rate": 1.6892623073620127e-05, "loss": 1.0351, "step": 8546 }, { "epoch": 28.022950819672133, "grad_norm": 7.841629981994629, "learning_rate": 1.689185368501749e-05, "loss": 0.9817, "step": 8547 }, { "epoch": 28.02622950819672, "grad_norm": 6.747201919555664, "learning_rate": 1.6891084218702035e-05, "loss": 1.051, "step": 8548 }, { "epoch": 28.029508196721313, "grad_norm": 6.471744060516357, "learning_rate": 1.689031467468244e-05, "loss": 1.1769, "step": 8549 }, { "epoch": 28.0327868852459, "grad_norm": 10.347883224487305, "learning_rate": 1.6889545052967384e-05, "loss": 0.9406, "step": 8550 }, { "epoch": 28.036065573770493, "grad_norm": 7.769740581512451, "learning_rate": 1.6888775353565547e-05, "loss": 1.0549, "step": 8551 }, { "epoch": 28.03934426229508, "grad_norm": 7.298774719238281, "learning_rate": 1.68880055764856e-05, "loss": 1.016, "step": 8552 }, { "epoch": 28.042622950819673, "grad_norm": 7.1740336418151855, "learning_rate": 1.688723572173623e-05, "loss": 1.1853, "step": 8553 }, { "epoch": 28.04590163934426, "grad_norm": 7.812497138977051, "learning_rate": 1.688646578932612e-05, "loss": 0.9294, "step": 8554 }, { "epoch": 28.049180327868854, "grad_norm": 8.076412200927734, "learning_rate": 1.6885695779263945e-05, "loss": 0.9131, "step": 8555 }, { "epoch": 28.052459016393442, "grad_norm": 6.854466438293457, "learning_rate": 1.6884925691558393e-05, "loss": 1.0446, "step": 8556 }, { "epoch": 28.055737704918034, "grad_norm": 11.184415817260742, "learning_rate": 1.688415552621814e-05, "loss": 1.1697, "step": 8557 }, { "epoch": 28.059016393442622, "grad_norm": 6.821655750274658, "learning_rate": 1.688338528325188e-05, "loss": 1.0713, "step": 8558 }, { "epoch": 28.062295081967214, "grad_norm": 9.318230628967285, "learning_rate": 1.6882614962668294e-05, "loss": 0.8666, "step": 8559 }, { "epoch": 28.065573770491802, "grad_norm": 6.424018383026123, "learning_rate": 1.6881844564476067e-05, "loss": 1.2683, "step": 8560 }, { "epoch": 28.068852459016394, "grad_norm": 7.462838649749756, "learning_rate": 1.6881074088683888e-05, "loss": 1.3401, "step": 8561 }, { "epoch": 28.072131147540983, "grad_norm": 9.041421890258789, "learning_rate": 1.6880303535300445e-05, "loss": 0.9895, "step": 8562 }, { "epoch": 28.075409836065575, "grad_norm": 11.449264526367188, "learning_rate": 1.6879532904334422e-05, "loss": 1.0488, "step": 8563 }, { "epoch": 28.078688524590163, "grad_norm": 7.0854034423828125, "learning_rate": 1.687876219579452e-05, "loss": 0.884, "step": 8564 }, { "epoch": 28.081967213114755, "grad_norm": 8.771958351135254, "learning_rate": 1.6877991409689415e-05, "loss": 1.0009, "step": 8565 }, { "epoch": 28.085245901639343, "grad_norm": 10.505305290222168, "learning_rate": 1.687722054602781e-05, "loss": 1.0942, "step": 8566 }, { "epoch": 28.088524590163935, "grad_norm": 12.129514694213867, "learning_rate": 1.6876449604818388e-05, "loss": 0.8833, "step": 8567 }, { "epoch": 28.091803278688523, "grad_norm": 5.275716781616211, "learning_rate": 1.6875678586069853e-05, "loss": 1.126, "step": 8568 }, { "epoch": 28.095081967213115, "grad_norm": 7.3366289138793945, "learning_rate": 1.6874907489790885e-05, "loss": 1.0557, "step": 8569 }, { "epoch": 28.098360655737704, "grad_norm": 7.370420932769775, "learning_rate": 1.6874136315990195e-05, "loss": 1.194, "step": 8570 }, { "epoch": 28.101639344262296, "grad_norm": 10.324785232543945, "learning_rate": 1.687336506467647e-05, "loss": 0.9854, "step": 8571 }, { "epoch": 28.104918032786884, "grad_norm": 9.485949516296387, "learning_rate": 1.68725937358584e-05, "loss": 1.0906, "step": 8572 }, { "epoch": 28.108196721311476, "grad_norm": 8.383527755737305, "learning_rate": 1.6871822329544697e-05, "loss": 1.0701, "step": 8573 }, { "epoch": 28.111475409836064, "grad_norm": 6.897063255310059, "learning_rate": 1.687105084574405e-05, "loss": 0.9692, "step": 8574 }, { "epoch": 28.114754098360656, "grad_norm": 8.012683868408203, "learning_rate": 1.687027928446516e-05, "loss": 0.8774, "step": 8575 }, { "epoch": 28.118032786885244, "grad_norm": 10.065281867980957, "learning_rate": 1.6869507645716727e-05, "loss": 1.0519, "step": 8576 }, { "epoch": 28.121311475409836, "grad_norm": 7.421697616577148, "learning_rate": 1.6868735929507453e-05, "loss": 1.0195, "step": 8577 }, { "epoch": 28.124590163934425, "grad_norm": 10.559404373168945, "learning_rate": 1.6867964135846043e-05, "loss": 0.9369, "step": 8578 }, { "epoch": 28.127868852459017, "grad_norm": 7.587333679199219, "learning_rate": 1.6867192264741196e-05, "loss": 1.0238, "step": 8579 }, { "epoch": 28.131147540983605, "grad_norm": 7.288105487823486, "learning_rate": 1.6866420316201614e-05, "loss": 0.8879, "step": 8580 }, { "epoch": 28.134426229508197, "grad_norm": 7.336978435516357, "learning_rate": 1.6865648290236007e-05, "loss": 0.9307, "step": 8581 }, { "epoch": 28.137704918032785, "grad_norm": 11.74873161315918, "learning_rate": 1.6864876186853072e-05, "loss": 0.9933, "step": 8582 }, { "epoch": 28.140983606557377, "grad_norm": 6.25191593170166, "learning_rate": 1.6864104006061525e-05, "loss": 1.0126, "step": 8583 }, { "epoch": 28.14426229508197, "grad_norm": 6.79330587387085, "learning_rate": 1.686333174787006e-05, "loss": 1.0201, "step": 8584 }, { "epoch": 28.147540983606557, "grad_norm": 6.67326545715332, "learning_rate": 1.6862559412287403e-05, "loss": 0.8978, "step": 8585 }, { "epoch": 28.15081967213115, "grad_norm": 20.70047950744629, "learning_rate": 1.6861786999322248e-05, "loss": 0.8025, "step": 8586 }, { "epoch": 28.154098360655738, "grad_norm": 7.757859230041504, "learning_rate": 1.6861014508983313e-05, "loss": 1.1602, "step": 8587 }, { "epoch": 28.15737704918033, "grad_norm": 7.516115665435791, "learning_rate": 1.6860241941279305e-05, "loss": 0.8762, "step": 8588 }, { "epoch": 28.160655737704918, "grad_norm": 6.801368713378906, "learning_rate": 1.6859469296218937e-05, "loss": 0.8607, "step": 8589 }, { "epoch": 28.16393442622951, "grad_norm": 8.78493595123291, "learning_rate": 1.6858696573810917e-05, "loss": 1.088, "step": 8590 }, { "epoch": 28.167213114754098, "grad_norm": 7.170875072479248, "learning_rate": 1.6857923774063965e-05, "loss": 1.1421, "step": 8591 }, { "epoch": 28.17049180327869, "grad_norm": 8.233372688293457, "learning_rate": 1.6857150896986795e-05, "loss": 1.0823, "step": 8592 }, { "epoch": 28.17377049180328, "grad_norm": 9.090840339660645, "learning_rate": 1.6856377942588114e-05, "loss": 1.1324, "step": 8593 }, { "epoch": 28.17704918032787, "grad_norm": 7.895556926727295, "learning_rate": 1.6855604910876645e-05, "loss": 0.969, "step": 8594 }, { "epoch": 28.18032786885246, "grad_norm": 8.204977035522461, "learning_rate": 1.6854831801861102e-05, "loss": 1.0, "step": 8595 }, { "epoch": 28.18360655737705, "grad_norm": 7.644473552703857, "learning_rate": 1.6854058615550203e-05, "loss": 0.9753, "step": 8596 }, { "epoch": 28.18688524590164, "grad_norm": 8.974413871765137, "learning_rate": 1.6853285351952665e-05, "loss": 0.9703, "step": 8597 }, { "epoch": 28.19016393442623, "grad_norm": 7.09705924987793, "learning_rate": 1.6852512011077213e-05, "loss": 1.0171, "step": 8598 }, { "epoch": 28.19344262295082, "grad_norm": 21.457468032836914, "learning_rate": 1.685173859293256e-05, "loss": 1.0061, "step": 8599 }, { "epoch": 28.19672131147541, "grad_norm": 8.68314266204834, "learning_rate": 1.685096509752743e-05, "loss": 1.0385, "step": 8600 }, { "epoch": 28.2, "grad_norm": 7.29564094543457, "learning_rate": 1.6850191524870548e-05, "loss": 0.8624, "step": 8601 }, { "epoch": 28.20327868852459, "grad_norm": 8.673382759094238, "learning_rate": 1.6849417874970633e-05, "loss": 0.9843, "step": 8602 }, { "epoch": 28.20655737704918, "grad_norm": 9.14428424835205, "learning_rate": 1.684864414783641e-05, "loss": 1.0736, "step": 8603 }, { "epoch": 28.20983606557377, "grad_norm": 8.69699478149414, "learning_rate": 1.6847870343476603e-05, "loss": 1.2509, "step": 8604 }, { "epoch": 28.21311475409836, "grad_norm": 9.360671997070312, "learning_rate": 1.684709646189994e-05, "loss": 0.8455, "step": 8605 }, { "epoch": 28.21639344262295, "grad_norm": 7.548166275024414, "learning_rate": 1.684632250311514e-05, "loss": 1.0403, "step": 8606 }, { "epoch": 28.21967213114754, "grad_norm": 7.282131671905518, "learning_rate": 1.684554846713094e-05, "loss": 1.0894, "step": 8607 }, { "epoch": 28.222950819672132, "grad_norm": 10.77672004699707, "learning_rate": 1.6844774353956062e-05, "loss": 0.8522, "step": 8608 }, { "epoch": 28.22622950819672, "grad_norm": 7.411276340484619, "learning_rate": 1.6844000163599237e-05, "loss": 1.0882, "step": 8609 }, { "epoch": 28.229508196721312, "grad_norm": 15.550032615661621, "learning_rate": 1.6843225896069192e-05, "loss": 0.7862, "step": 8610 }, { "epoch": 28.2327868852459, "grad_norm": 6.691198348999023, "learning_rate": 1.6842451551374663e-05, "loss": 1.1859, "step": 8611 }, { "epoch": 28.236065573770492, "grad_norm": 6.614813804626465, "learning_rate": 1.6841677129524377e-05, "loss": 1.0199, "step": 8612 }, { "epoch": 28.23934426229508, "grad_norm": 8.196917533874512, "learning_rate": 1.6840902630527066e-05, "loss": 0.8668, "step": 8613 }, { "epoch": 28.242622950819673, "grad_norm": 6.262859344482422, "learning_rate": 1.6840128054391467e-05, "loss": 1.0916, "step": 8614 }, { "epoch": 28.24590163934426, "grad_norm": 9.714930534362793, "learning_rate": 1.683935340112631e-05, "loss": 1.1738, "step": 8615 }, { "epoch": 28.249180327868853, "grad_norm": 7.5666303634643555, "learning_rate": 1.6838578670740337e-05, "loss": 1.0374, "step": 8616 }, { "epoch": 28.25245901639344, "grad_norm": 9.702642440795898, "learning_rate": 1.6837803863242276e-05, "loss": 1.2871, "step": 8617 }, { "epoch": 28.255737704918033, "grad_norm": 8.450826644897461, "learning_rate": 1.683702897864087e-05, "loss": 1.1655, "step": 8618 }, { "epoch": 28.25901639344262, "grad_norm": 7.5229902267456055, "learning_rate": 1.6836254016944848e-05, "loss": 0.8344, "step": 8619 }, { "epoch": 28.262295081967213, "grad_norm": 7.1069231033325195, "learning_rate": 1.683547897816296e-05, "loss": 0.9082, "step": 8620 }, { "epoch": 28.2655737704918, "grad_norm": 6.596324920654297, "learning_rate": 1.6834703862303933e-05, "loss": 0.9043, "step": 8621 }, { "epoch": 28.268852459016394, "grad_norm": 6.889361381530762, "learning_rate": 1.683392866937652e-05, "loss": 1.1351, "step": 8622 }, { "epoch": 28.272131147540982, "grad_norm": 8.512450218200684, "learning_rate": 1.683315339938945e-05, "loss": 1.0112, "step": 8623 }, { "epoch": 28.275409836065574, "grad_norm": 7.60504674911499, "learning_rate": 1.6832378052351472e-05, "loss": 0.9592, "step": 8624 }, { "epoch": 28.278688524590162, "grad_norm": 11.379887580871582, "learning_rate": 1.6831602628271332e-05, "loss": 1.1036, "step": 8625 }, { "epoch": 28.281967213114754, "grad_norm": 5.802578449249268, "learning_rate": 1.6830827127157764e-05, "loss": 1.2184, "step": 8626 }, { "epoch": 28.285245901639342, "grad_norm": 7.126239776611328, "learning_rate": 1.683005154901952e-05, "loss": 1.1715, "step": 8627 }, { "epoch": 28.288524590163934, "grad_norm": 7.7443928718566895, "learning_rate": 1.682927589386534e-05, "loss": 1.0377, "step": 8628 }, { "epoch": 28.291803278688526, "grad_norm": 6.74267053604126, "learning_rate": 1.6828500161703983e-05, "loss": 1.3054, "step": 8629 }, { "epoch": 28.295081967213115, "grad_norm": 7.1449503898620605, "learning_rate": 1.682772435254418e-05, "loss": 1.3013, "step": 8630 }, { "epoch": 28.298360655737707, "grad_norm": 6.8919453620910645, "learning_rate": 1.6826948466394685e-05, "loss": 1.1254, "step": 8631 }, { "epoch": 28.301639344262295, "grad_norm": 6.028007507324219, "learning_rate": 1.682617250326425e-05, "loss": 1.1542, "step": 8632 }, { "epoch": 28.304918032786887, "grad_norm": 6.984482765197754, "learning_rate": 1.6825396463161623e-05, "loss": 0.8803, "step": 8633 }, { "epoch": 28.308196721311475, "grad_norm": 64.30311584472656, "learning_rate": 1.682462034609555e-05, "loss": 1.0615, "step": 8634 }, { "epoch": 28.311475409836067, "grad_norm": 9.744955062866211, "learning_rate": 1.6823844152074795e-05, "loss": 1.2375, "step": 8635 }, { "epoch": 28.314754098360655, "grad_norm": 7.206366539001465, "learning_rate": 1.6823067881108095e-05, "loss": 0.972, "step": 8636 }, { "epoch": 28.318032786885247, "grad_norm": 10.89926815032959, "learning_rate": 1.6822291533204213e-05, "loss": 0.9928, "step": 8637 }, { "epoch": 28.321311475409836, "grad_norm": 9.07611083984375, "learning_rate": 1.6821515108371898e-05, "loss": 1.0725, "step": 8638 }, { "epoch": 28.324590163934428, "grad_norm": 8.004317283630371, "learning_rate": 1.682073860661991e-05, "loss": 1.2029, "step": 8639 }, { "epoch": 28.327868852459016, "grad_norm": 8.791322708129883, "learning_rate": 1.6819962027957004e-05, "loss": 1.1913, "step": 8640 }, { "epoch": 28.331147540983608, "grad_norm": 7.204840183258057, "learning_rate": 1.6819185372391932e-05, "loss": 0.9775, "step": 8641 }, { "epoch": 28.334426229508196, "grad_norm": 8.471467971801758, "learning_rate": 1.6818408639933456e-05, "loss": 0.9917, "step": 8642 }, { "epoch": 28.337704918032788, "grad_norm": 6.870920181274414, "learning_rate": 1.6817631830590335e-05, "loss": 1.1586, "step": 8643 }, { "epoch": 28.340983606557376, "grad_norm": 9.982475280761719, "learning_rate": 1.6816854944371326e-05, "loss": 1.0187, "step": 8644 }, { "epoch": 28.34426229508197, "grad_norm": 8.922905921936035, "learning_rate": 1.6816077981285186e-05, "loss": 1.1282, "step": 8645 }, { "epoch": 28.347540983606557, "grad_norm": 15.903501510620117, "learning_rate": 1.6815300941340686e-05, "loss": 1.1573, "step": 8646 }, { "epoch": 28.35081967213115, "grad_norm": 7.044106960296631, "learning_rate": 1.6814523824546577e-05, "loss": 0.9873, "step": 8647 }, { "epoch": 28.354098360655737, "grad_norm": 8.763701438903809, "learning_rate": 1.681374663091163e-05, "loss": 1.0029, "step": 8648 }, { "epoch": 28.35737704918033, "grad_norm": 8.633296966552734, "learning_rate": 1.68129693604446e-05, "loss": 1.0978, "step": 8649 }, { "epoch": 28.360655737704917, "grad_norm": 11.219415664672852, "learning_rate": 1.6812192013154262e-05, "loss": 0.9442, "step": 8650 }, { "epoch": 28.36393442622951, "grad_norm": 10.400541305541992, "learning_rate": 1.681141458904937e-05, "loss": 1.0304, "step": 8651 }, { "epoch": 28.367213114754097, "grad_norm": 6.659030437469482, "learning_rate": 1.6810637088138704e-05, "loss": 1.0719, "step": 8652 }, { "epoch": 28.37049180327869, "grad_norm": 10.71953296661377, "learning_rate": 1.680985951043102e-05, "loss": 1.0814, "step": 8653 }, { "epoch": 28.373770491803278, "grad_norm": 6.594222068786621, "learning_rate": 1.6809081855935087e-05, "loss": 1.3918, "step": 8654 }, { "epoch": 28.37704918032787, "grad_norm": 10.776144027709961, "learning_rate": 1.680830412465968e-05, "loss": 1.1357, "step": 8655 }, { "epoch": 28.380327868852458, "grad_norm": 8.004888534545898, "learning_rate": 1.6807526316613562e-05, "loss": 1.1588, "step": 8656 }, { "epoch": 28.38360655737705, "grad_norm": 8.333483695983887, "learning_rate": 1.6806748431805512e-05, "loss": 1.1152, "step": 8657 }, { "epoch": 28.386885245901638, "grad_norm": 7.737148284912109, "learning_rate": 1.680597047024429e-05, "loss": 1.0952, "step": 8658 }, { "epoch": 28.39016393442623, "grad_norm": 7.5290069580078125, "learning_rate": 1.6805192431938676e-05, "loss": 1.1035, "step": 8659 }, { "epoch": 28.39344262295082, "grad_norm": 10.602743148803711, "learning_rate": 1.6804414316897448e-05, "loss": 1.0737, "step": 8660 }, { "epoch": 28.39672131147541, "grad_norm": 8.281742095947266, "learning_rate": 1.6803636125129366e-05, "loss": 1.025, "step": 8661 }, { "epoch": 28.4, "grad_norm": 23.570880889892578, "learning_rate": 1.6802857856643214e-05, "loss": 1.04, "step": 8662 }, { "epoch": 28.40327868852459, "grad_norm": 9.235420227050781, "learning_rate": 1.6802079511447772e-05, "loss": 0.9424, "step": 8663 }, { "epoch": 28.40655737704918, "grad_norm": 8.104305267333984, "learning_rate": 1.6801301089551803e-05, "loss": 1.1409, "step": 8664 }, { "epoch": 28.40983606557377, "grad_norm": 9.805007934570312, "learning_rate": 1.68005225909641e-05, "loss": 0.8816, "step": 8665 }, { "epoch": 28.41311475409836, "grad_norm": 7.530628681182861, "learning_rate": 1.6799744015693428e-05, "loss": 0.9536, "step": 8666 }, { "epoch": 28.41639344262295, "grad_norm": 6.466772556304932, "learning_rate": 1.679896536374858e-05, "loss": 1.1785, "step": 8667 }, { "epoch": 28.41967213114754, "grad_norm": 8.384756088256836, "learning_rate": 1.679818663513832e-05, "loss": 1.014, "step": 8668 }, { "epoch": 28.42295081967213, "grad_norm": 7.42551326751709, "learning_rate": 1.679740782987144e-05, "loss": 1.2874, "step": 8669 }, { "epoch": 28.42622950819672, "grad_norm": 8.577795028686523, "learning_rate": 1.679662894795672e-05, "loss": 0.9689, "step": 8670 }, { "epoch": 28.42950819672131, "grad_norm": 8.290506362915039, "learning_rate": 1.6795849989402943e-05, "loss": 0.9755, "step": 8671 }, { "epoch": 28.432786885245903, "grad_norm": 6.885207653045654, "learning_rate": 1.6795070954218888e-05, "loss": 1.2251, "step": 8672 }, { "epoch": 28.43606557377049, "grad_norm": 8.595900535583496, "learning_rate": 1.6794291842413346e-05, "loss": 1.1119, "step": 8673 }, { "epoch": 28.439344262295084, "grad_norm": 7.424514293670654, "learning_rate": 1.67935126539951e-05, "loss": 1.119, "step": 8674 }, { "epoch": 28.442622950819672, "grad_norm": 7.349252700805664, "learning_rate": 1.679273338897293e-05, "loss": 1.0123, "step": 8675 }, { "epoch": 28.445901639344264, "grad_norm": 6.923219203948975, "learning_rate": 1.6791954047355635e-05, "loss": 0.8582, "step": 8676 }, { "epoch": 28.449180327868852, "grad_norm": 6.941025733947754, "learning_rate": 1.6791174629151995e-05, "loss": 0.9077, "step": 8677 }, { "epoch": 28.452459016393444, "grad_norm": 6.571949481964111, "learning_rate": 1.6790395134370797e-05, "loss": 1.1673, "step": 8678 }, { "epoch": 28.455737704918032, "grad_norm": 9.386005401611328, "learning_rate": 1.6789615563020832e-05, "loss": 1.2031, "step": 8679 }, { "epoch": 28.459016393442624, "grad_norm": 8.26280689239502, "learning_rate": 1.67888359151109e-05, "loss": 1.0223, "step": 8680 }, { "epoch": 28.462295081967213, "grad_norm": 8.811514854431152, "learning_rate": 1.678805619064978e-05, "loss": 1.0305, "step": 8681 }, { "epoch": 28.465573770491805, "grad_norm": 6.447183609008789, "learning_rate": 1.6787276389646264e-05, "loss": 0.9298, "step": 8682 }, { "epoch": 28.468852459016393, "grad_norm": 8.310578346252441, "learning_rate": 1.6786496512109156e-05, "loss": 1.0012, "step": 8683 }, { "epoch": 28.472131147540985, "grad_norm": 10.167219161987305, "learning_rate": 1.6785716558047242e-05, "loss": 0.9689, "step": 8684 }, { "epoch": 28.475409836065573, "grad_norm": 8.998746871948242, "learning_rate": 1.6784936527469318e-05, "loss": 0.9526, "step": 8685 }, { "epoch": 28.478688524590165, "grad_norm": 6.8303542137146, "learning_rate": 1.678415642038418e-05, "loss": 1.0142, "step": 8686 }, { "epoch": 28.481967213114753, "grad_norm": 6.402760982513428, "learning_rate": 1.6783376236800624e-05, "loss": 1.1238, "step": 8687 }, { "epoch": 28.485245901639345, "grad_norm": 6.6610026359558105, "learning_rate": 1.6782595976727448e-05, "loss": 1.0544, "step": 8688 }, { "epoch": 28.488524590163934, "grad_norm": 11.165426254272461, "learning_rate": 1.6781815640173448e-05, "loss": 0.8732, "step": 8689 }, { "epoch": 28.491803278688526, "grad_norm": 6.089877605438232, "learning_rate": 1.678103522714743e-05, "loss": 1.1277, "step": 8690 }, { "epoch": 28.495081967213114, "grad_norm": 8.741555213928223, "learning_rate": 1.678025473765819e-05, "loss": 1.0326, "step": 8691 }, { "epoch": 28.498360655737706, "grad_norm": 9.371903419494629, "learning_rate": 1.6779474171714524e-05, "loss": 1.09, "step": 8692 }, { "epoch": 28.501639344262294, "grad_norm": 16.355863571166992, "learning_rate": 1.6778693529325237e-05, "loss": 1.0797, "step": 8693 }, { "epoch": 28.504918032786886, "grad_norm": 6.995573043823242, "learning_rate": 1.6777912810499136e-05, "loss": 1.4487, "step": 8694 }, { "epoch": 28.508196721311474, "grad_norm": 6.700090408325195, "learning_rate": 1.6777132015245017e-05, "loss": 1.0681, "step": 8695 }, { "epoch": 28.511475409836066, "grad_norm": 8.049543380737305, "learning_rate": 1.6776351143571695e-05, "loss": 1.0918, "step": 8696 }, { "epoch": 28.514754098360655, "grad_norm": 8.932374000549316, "learning_rate": 1.677557019548796e-05, "loss": 0.9647, "step": 8697 }, { "epoch": 28.518032786885247, "grad_norm": 7.2185211181640625, "learning_rate": 1.6774789171002634e-05, "loss": 1.1362, "step": 8698 }, { "epoch": 28.521311475409835, "grad_norm": 11.514482498168945, "learning_rate": 1.677400807012451e-05, "loss": 1.0565, "step": 8699 }, { "epoch": 28.524590163934427, "grad_norm": 6.85330867767334, "learning_rate": 1.6773226892862405e-05, "loss": 1.0638, "step": 8700 }, { "epoch": 28.527868852459015, "grad_norm": 9.016705513000488, "learning_rate": 1.6772445639225125e-05, "loss": 0.9204, "step": 8701 }, { "epoch": 28.531147540983607, "grad_norm": 7.275710105895996, "learning_rate": 1.6771664309221475e-05, "loss": 1.0297, "step": 8702 }, { "epoch": 28.534426229508195, "grad_norm": 8.331086158752441, "learning_rate": 1.6770882902860272e-05, "loss": 1.1865, "step": 8703 }, { "epoch": 28.537704918032787, "grad_norm": 8.79014778137207, "learning_rate": 1.6770101420150324e-05, "loss": 0.9391, "step": 8704 }, { "epoch": 28.540983606557376, "grad_norm": 7.914542198181152, "learning_rate": 1.676931986110044e-05, "loss": 1.1414, "step": 8705 }, { "epoch": 28.544262295081968, "grad_norm": 7.843631744384766, "learning_rate": 1.676853822571944e-05, "loss": 0.8698, "step": 8706 }, { "epoch": 28.547540983606556, "grad_norm": 9.607110023498535, "learning_rate": 1.6767756514016132e-05, "loss": 1.0424, "step": 8707 }, { "epoch": 28.550819672131148, "grad_norm": 7.706268787384033, "learning_rate": 1.6766974725999338e-05, "loss": 1.0487, "step": 8708 }, { "epoch": 28.554098360655736, "grad_norm": 7.651754856109619, "learning_rate": 1.6766192861677863e-05, "loss": 0.9302, "step": 8709 }, { "epoch": 28.557377049180328, "grad_norm": 10.933037757873535, "learning_rate": 1.676541092106053e-05, "loss": 1.2942, "step": 8710 }, { "epoch": 28.560655737704916, "grad_norm": 9.394599914550781, "learning_rate": 1.6764628904156153e-05, "loss": 0.967, "step": 8711 }, { "epoch": 28.56393442622951, "grad_norm": 9.644620895385742, "learning_rate": 1.676384681097355e-05, "loss": 0.8997, "step": 8712 }, { "epoch": 28.567213114754097, "grad_norm": 7.748284339904785, "learning_rate": 1.6763064641521548e-05, "loss": 0.9462, "step": 8713 }, { "epoch": 28.57049180327869, "grad_norm": 10.674386978149414, "learning_rate": 1.6762282395808956e-05, "loss": 0.8275, "step": 8714 }, { "epoch": 28.57377049180328, "grad_norm": 13.679313659667969, "learning_rate": 1.67615000738446e-05, "loss": 0.8981, "step": 8715 }, { "epoch": 28.57704918032787, "grad_norm": 7.818674087524414, "learning_rate": 1.6760717675637298e-05, "loss": 0.9457, "step": 8716 }, { "epoch": 28.58032786885246, "grad_norm": 7.030300140380859, "learning_rate": 1.675993520119588e-05, "loss": 1.0558, "step": 8717 }, { "epoch": 28.58360655737705, "grad_norm": 7.178831100463867, "learning_rate": 1.675915265052916e-05, "loss": 1.1506, "step": 8718 }, { "epoch": 28.58688524590164, "grad_norm": 33.20570755004883, "learning_rate": 1.6758370023645968e-05, "loss": 0.9676, "step": 8719 }, { "epoch": 28.59016393442623, "grad_norm": 9.594151496887207, "learning_rate": 1.6757587320555124e-05, "loss": 1.0596, "step": 8720 }, { "epoch": 28.59344262295082, "grad_norm": 25.087182998657227, "learning_rate": 1.675680454126546e-05, "loss": 1.4088, "step": 8721 }, { "epoch": 28.59672131147541, "grad_norm": 10.333134651184082, "learning_rate": 1.6756021685785797e-05, "loss": 0.9846, "step": 8722 }, { "epoch": 28.6, "grad_norm": 9.040982246398926, "learning_rate": 1.6755238754124965e-05, "loss": 1.0214, "step": 8723 }, { "epoch": 28.60327868852459, "grad_norm": 9.196134567260742, "learning_rate": 1.6754455746291792e-05, "loss": 0.94, "step": 8724 }, { "epoch": 28.60655737704918, "grad_norm": 7.371367931365967, "learning_rate": 1.675367266229511e-05, "loss": 1.0328, "step": 8725 }, { "epoch": 28.60983606557377, "grad_norm": 7.767712116241455, "learning_rate": 1.6752889502143747e-05, "loss": 0.9234, "step": 8726 }, { "epoch": 28.613114754098362, "grad_norm": 9.777127265930176, "learning_rate": 1.6752106265846533e-05, "loss": 1.3175, "step": 8727 }, { "epoch": 28.61639344262295, "grad_norm": 6.822981357574463, "learning_rate": 1.67513229534123e-05, "loss": 0.994, "step": 8728 }, { "epoch": 28.619672131147542, "grad_norm": 8.949775695800781, "learning_rate": 1.675053956484988e-05, "loss": 1.1515, "step": 8729 }, { "epoch": 28.62295081967213, "grad_norm": 9.247782707214355, "learning_rate": 1.6749756100168107e-05, "loss": 1.1224, "step": 8730 }, { "epoch": 28.626229508196722, "grad_norm": 12.842855453491211, "learning_rate": 1.674897255937582e-05, "loss": 1.1315, "step": 8731 }, { "epoch": 28.62950819672131, "grad_norm": 11.432877540588379, "learning_rate": 1.6748188942481848e-05, "loss": 1.175, "step": 8732 }, { "epoch": 28.632786885245903, "grad_norm": 11.84620189666748, "learning_rate": 1.674740524949503e-05, "loss": 1.1635, "step": 8733 }, { "epoch": 28.63606557377049, "grad_norm": 8.5415620803833, "learning_rate": 1.67466214804242e-05, "loss": 0.9814, "step": 8734 }, { "epoch": 28.639344262295083, "grad_norm": 9.243104934692383, "learning_rate": 1.67458376352782e-05, "loss": 1.252, "step": 8735 }, { "epoch": 28.64262295081967, "grad_norm": 6.308099269866943, "learning_rate": 1.6745053714065866e-05, "loss": 1.1125, "step": 8736 }, { "epoch": 28.645901639344263, "grad_norm": 6.098517894744873, "learning_rate": 1.674426971679604e-05, "loss": 1.1762, "step": 8737 }, { "epoch": 28.64918032786885, "grad_norm": 9.03567123413086, "learning_rate": 1.6743485643477556e-05, "loss": 1.1254, "step": 8738 }, { "epoch": 28.652459016393443, "grad_norm": 8.075315475463867, "learning_rate": 1.6742701494119266e-05, "loss": 1.0868, "step": 8739 }, { "epoch": 28.65573770491803, "grad_norm": 7.544663429260254, "learning_rate": 1.674191726873e-05, "loss": 1.0117, "step": 8740 }, { "epoch": 28.659016393442624, "grad_norm": 6.838205814361572, "learning_rate": 1.674113296731861e-05, "loss": 1.3077, "step": 8741 }, { "epoch": 28.662295081967212, "grad_norm": 6.263871192932129, "learning_rate": 1.674034858989394e-05, "loss": 1.028, "step": 8742 }, { "epoch": 28.665573770491804, "grad_norm": 12.788844108581543, "learning_rate": 1.6739564136464827e-05, "loss": 1.1171, "step": 8743 }, { "epoch": 28.668852459016392, "grad_norm": 7.16091251373291, "learning_rate": 1.673877960704012e-05, "loss": 1.2067, "step": 8744 }, { "epoch": 28.672131147540984, "grad_norm": 9.125649452209473, "learning_rate": 1.673799500162867e-05, "loss": 1.0299, "step": 8745 }, { "epoch": 28.675409836065572, "grad_norm": 8.254965782165527, "learning_rate": 1.6737210320239322e-05, "loss": 1.1654, "step": 8746 }, { "epoch": 28.678688524590164, "grad_norm": 12.593592643737793, "learning_rate": 1.6736425562880918e-05, "loss": 1.1346, "step": 8747 }, { "epoch": 28.681967213114753, "grad_norm": 8.11722469329834, "learning_rate": 1.6735640729562314e-05, "loss": 1.3154, "step": 8748 }, { "epoch": 28.685245901639345, "grad_norm": 8.974175453186035, "learning_rate": 1.6734855820292356e-05, "loss": 0.9952, "step": 8749 }, { "epoch": 28.688524590163933, "grad_norm": 8.646791458129883, "learning_rate": 1.6734070835079897e-05, "loss": 0.989, "step": 8750 }, { "epoch": 28.691803278688525, "grad_norm": 7.630774021148682, "learning_rate": 1.673328577393379e-05, "loss": 1.0381, "step": 8751 }, { "epoch": 28.695081967213113, "grad_norm": 13.864777565002441, "learning_rate": 1.6732500636862883e-05, "loss": 1.0338, "step": 8752 }, { "epoch": 28.698360655737705, "grad_norm": 15.642867088317871, "learning_rate": 1.6731715423876028e-05, "loss": 1.3167, "step": 8753 }, { "epoch": 28.701639344262293, "grad_norm": 8.270283699035645, "learning_rate": 1.6730930134982088e-05, "loss": 0.981, "step": 8754 }, { "epoch": 28.704918032786885, "grad_norm": 7.131494522094727, "learning_rate": 1.673014477018991e-05, "loss": 1.2554, "step": 8755 }, { "epoch": 28.708196721311474, "grad_norm": 12.1078519821167, "learning_rate": 1.6729359329508353e-05, "loss": 0.871, "step": 8756 }, { "epoch": 28.711475409836066, "grad_norm": 8.208989143371582, "learning_rate": 1.672857381294627e-05, "loss": 1.1045, "step": 8757 }, { "epoch": 28.714754098360658, "grad_norm": 10.002843856811523, "learning_rate": 1.6727788220512522e-05, "loss": 1.1373, "step": 8758 }, { "epoch": 28.718032786885246, "grad_norm": 7.592384338378906, "learning_rate": 1.672700255221597e-05, "loss": 0.9839, "step": 8759 }, { "epoch": 28.721311475409838, "grad_norm": 7.941250324249268, "learning_rate": 1.6726216808065467e-05, "loss": 1.3269, "step": 8760 }, { "epoch": 28.724590163934426, "grad_norm": 9.112756729125977, "learning_rate": 1.6725430988069875e-05, "loss": 1.1249, "step": 8761 }, { "epoch": 28.727868852459018, "grad_norm": 10.61374568939209, "learning_rate": 1.6724645092238058e-05, "loss": 1.3715, "step": 8762 }, { "epoch": 28.731147540983606, "grad_norm": 7.449748516082764, "learning_rate": 1.6723859120578873e-05, "loss": 1.0952, "step": 8763 }, { "epoch": 28.7344262295082, "grad_norm": 9.137974739074707, "learning_rate": 1.672307307310119e-05, "loss": 1.1353, "step": 8764 }, { "epoch": 28.737704918032787, "grad_norm": 8.173929214477539, "learning_rate": 1.6722286949813866e-05, "loss": 1.0158, "step": 8765 }, { "epoch": 28.74098360655738, "grad_norm": 7.230685710906982, "learning_rate": 1.6721500750725764e-05, "loss": 1.2019, "step": 8766 }, { "epoch": 28.744262295081967, "grad_norm": 11.089292526245117, "learning_rate": 1.6720714475845755e-05, "loss": 1.1228, "step": 8767 }, { "epoch": 28.74754098360656, "grad_norm": 16.90262222290039, "learning_rate": 1.6719928125182703e-05, "loss": 1.2018, "step": 8768 }, { "epoch": 28.750819672131147, "grad_norm": 8.190685272216797, "learning_rate": 1.6719141698745477e-05, "loss": 1.1672, "step": 8769 }, { "epoch": 28.75409836065574, "grad_norm": 9.866602897644043, "learning_rate": 1.6718355196542936e-05, "loss": 0.7842, "step": 8770 }, { "epoch": 28.757377049180327, "grad_norm": 12.303840637207031, "learning_rate": 1.671756861858396e-05, "loss": 0.9155, "step": 8771 }, { "epoch": 28.76065573770492, "grad_norm": 14.827351570129395, "learning_rate": 1.6716781964877413e-05, "loss": 1.0898, "step": 8772 }, { "epoch": 28.763934426229508, "grad_norm": 7.497868537902832, "learning_rate": 1.671599523543216e-05, "loss": 0.8926, "step": 8773 }, { "epoch": 28.7672131147541, "grad_norm": 8.07261848449707, "learning_rate": 1.6715208430257085e-05, "loss": 1.1569, "step": 8774 }, { "epoch": 28.770491803278688, "grad_norm": 9.795478820800781, "learning_rate": 1.6714421549361048e-05, "loss": 0.8973, "step": 8775 }, { "epoch": 28.77377049180328, "grad_norm": 9.288662910461426, "learning_rate": 1.6713634592752926e-05, "loss": 0.9596, "step": 8776 }, { "epoch": 28.777049180327868, "grad_norm": 9.79969596862793, "learning_rate": 1.6712847560441598e-05, "loss": 1.1261, "step": 8777 }, { "epoch": 28.78032786885246, "grad_norm": 10.112549781799316, "learning_rate": 1.6712060452435933e-05, "loss": 1.1541, "step": 8778 }, { "epoch": 28.78360655737705, "grad_norm": 8.28842544555664, "learning_rate": 1.6711273268744804e-05, "loss": 1.1766, "step": 8779 }, { "epoch": 28.78688524590164, "grad_norm": 8.312104225158691, "learning_rate": 1.6710486009377092e-05, "loss": 1.1409, "step": 8780 }, { "epoch": 28.79016393442623, "grad_norm": 14.479676246643066, "learning_rate": 1.6709698674341677e-05, "loss": 0.9294, "step": 8781 }, { "epoch": 28.79344262295082, "grad_norm": 10.983872413635254, "learning_rate": 1.6708911263647433e-05, "loss": 1.1084, "step": 8782 }, { "epoch": 28.79672131147541, "grad_norm": 7.643303871154785, "learning_rate": 1.6708123777303233e-05, "loss": 1.0203, "step": 8783 }, { "epoch": 28.8, "grad_norm": 8.86036491394043, "learning_rate": 1.6707336215317968e-05, "loss": 0.8549, "step": 8784 }, { "epoch": 28.80327868852459, "grad_norm": 9.43356990814209, "learning_rate": 1.6706548577700514e-05, "loss": 1.293, "step": 8785 }, { "epoch": 28.80655737704918, "grad_norm": 8.32720947265625, "learning_rate": 1.6705760864459748e-05, "loss": 1.1044, "step": 8786 }, { "epoch": 28.80983606557377, "grad_norm": 8.731095314025879, "learning_rate": 1.6704973075604558e-05, "loss": 0.8839, "step": 8787 }, { "epoch": 28.81311475409836, "grad_norm": 7.48629903793335, "learning_rate": 1.6704185211143824e-05, "loss": 0.8409, "step": 8788 }, { "epoch": 28.81639344262295, "grad_norm": 9.313865661621094, "learning_rate": 1.670339727108643e-05, "loss": 1.1226, "step": 8789 }, { "epoch": 28.81967213114754, "grad_norm": 7.914771556854248, "learning_rate": 1.6702609255441267e-05, "loss": 0.934, "step": 8790 }, { "epoch": 28.82295081967213, "grad_norm": 7.171754837036133, "learning_rate": 1.6701821164217212e-05, "loss": 1.1549, "step": 8791 }, { "epoch": 28.82622950819672, "grad_norm": 7.0527849197387695, "learning_rate": 1.6701032997423155e-05, "loss": 1.1959, "step": 8792 }, { "epoch": 28.82950819672131, "grad_norm": 8.608988761901855, "learning_rate": 1.670024475506798e-05, "loss": 0.9454, "step": 8793 }, { "epoch": 28.832786885245902, "grad_norm": 8.153226852416992, "learning_rate": 1.6699456437160587e-05, "loss": 1.0567, "step": 8794 }, { "epoch": 28.83606557377049, "grad_norm": 24.15717315673828, "learning_rate": 1.6698668043709854e-05, "loss": 1.2223, "step": 8795 }, { "epoch": 28.839344262295082, "grad_norm": 7.696462154388428, "learning_rate": 1.6697879574724673e-05, "loss": 1.3596, "step": 8796 }, { "epoch": 28.84262295081967, "grad_norm": 7.403443813323975, "learning_rate": 1.6697091030213935e-05, "loss": 0.8983, "step": 8797 }, { "epoch": 28.845901639344262, "grad_norm": 8.440136909484863, "learning_rate": 1.6696302410186533e-05, "loss": 0.9424, "step": 8798 }, { "epoch": 28.84918032786885, "grad_norm": 7.48068904876709, "learning_rate": 1.669551371465136e-05, "loss": 1.0199, "step": 8799 }, { "epoch": 28.852459016393443, "grad_norm": 11.139698028564453, "learning_rate": 1.6694724943617306e-05, "loss": 1.0806, "step": 8800 }, { "epoch": 28.855737704918035, "grad_norm": 6.853739261627197, "learning_rate": 1.669393609709327e-05, "loss": 1.0007, "step": 8801 }, { "epoch": 28.859016393442623, "grad_norm": 9.28842544555664, "learning_rate": 1.6693147175088144e-05, "loss": 1.211, "step": 8802 }, { "epoch": 28.862295081967215, "grad_norm": 9.049936294555664, "learning_rate": 1.6692358177610823e-05, "loss": 1.1879, "step": 8803 }, { "epoch": 28.865573770491803, "grad_norm": 7.924197196960449, "learning_rate": 1.6691569104670206e-05, "loss": 0.9856, "step": 8804 }, { "epoch": 28.868852459016395, "grad_norm": 8.38231372833252, "learning_rate": 1.669077995627519e-05, "loss": 1.1429, "step": 8805 }, { "epoch": 28.872131147540983, "grad_norm": 7.265028953552246, "learning_rate": 1.6689990732434672e-05, "loss": 0.8822, "step": 8806 }, { "epoch": 28.875409836065575, "grad_norm": 10.005284309387207, "learning_rate": 1.6689201433157554e-05, "loss": 1.2051, "step": 8807 }, { "epoch": 28.878688524590164, "grad_norm": 7.31184720993042, "learning_rate": 1.6688412058452738e-05, "loss": 0.96, "step": 8808 }, { "epoch": 28.881967213114756, "grad_norm": 6.8786115646362305, "learning_rate": 1.6687622608329118e-05, "loss": 0.9821, "step": 8809 }, { "epoch": 28.885245901639344, "grad_norm": 6.700669288635254, "learning_rate": 1.6686833082795598e-05, "loss": 0.9493, "step": 8810 }, { "epoch": 28.888524590163936, "grad_norm": 8.72163200378418, "learning_rate": 1.6686043481861086e-05, "loss": 0.9918, "step": 8811 }, { "epoch": 28.891803278688524, "grad_norm": 8.358879089355469, "learning_rate": 1.668525380553448e-05, "loss": 1.0344, "step": 8812 }, { "epoch": 28.895081967213116, "grad_norm": 7.139632701873779, "learning_rate": 1.668446405382469e-05, "loss": 1.0229, "step": 8813 }, { "epoch": 28.898360655737704, "grad_norm": 6.557991027832031, "learning_rate": 1.6683674226740613e-05, "loss": 1.2108, "step": 8814 }, { "epoch": 28.901639344262296, "grad_norm": 12.616188049316406, "learning_rate": 1.6682884324291164e-05, "loss": 1.1606, "step": 8815 }, { "epoch": 28.904918032786885, "grad_norm": 7.295228004455566, "learning_rate": 1.6682094346485242e-05, "loss": 1.0254, "step": 8816 }, { "epoch": 28.908196721311477, "grad_norm": 6.977039337158203, "learning_rate": 1.668130429333176e-05, "loss": 1.1721, "step": 8817 }, { "epoch": 28.911475409836065, "grad_norm": 6.232088088989258, "learning_rate": 1.6680514164839624e-05, "loss": 1.041, "step": 8818 }, { "epoch": 28.914754098360657, "grad_norm": 6.831552505493164, "learning_rate": 1.667972396101775e-05, "loss": 0.9777, "step": 8819 }, { "epoch": 28.918032786885245, "grad_norm": 6.108039855957031, "learning_rate": 1.6678933681875035e-05, "loss": 1.108, "step": 8820 }, { "epoch": 28.921311475409837, "grad_norm": 7.574512004852295, "learning_rate": 1.6678143327420406e-05, "loss": 0.8223, "step": 8821 }, { "epoch": 28.924590163934425, "grad_norm": 7.461635589599609, "learning_rate": 1.6677352897662762e-05, "loss": 0.8071, "step": 8822 }, { "epoch": 28.927868852459017, "grad_norm": 6.459079742431641, "learning_rate": 1.6676562392611025e-05, "loss": 1.1852, "step": 8823 }, { "epoch": 28.931147540983606, "grad_norm": 7.162657737731934, "learning_rate": 1.6675771812274104e-05, "loss": 1.092, "step": 8824 }, { "epoch": 28.934426229508198, "grad_norm": 6.364645004272461, "learning_rate": 1.6674981156660916e-05, "loss": 0.8817, "step": 8825 }, { "epoch": 28.937704918032786, "grad_norm": 9.437915802001953, "learning_rate": 1.6674190425780372e-05, "loss": 1.0566, "step": 8826 }, { "epoch": 28.940983606557378, "grad_norm": 17.963001251220703, "learning_rate": 1.6673399619641392e-05, "loss": 0.9434, "step": 8827 }, { "epoch": 28.944262295081966, "grad_norm": 7.64657735824585, "learning_rate": 1.6672608738252896e-05, "loss": 1.1565, "step": 8828 }, { "epoch": 28.947540983606558, "grad_norm": 7.091639041900635, "learning_rate": 1.6671817781623794e-05, "loss": 0.9596, "step": 8829 }, { "epoch": 28.950819672131146, "grad_norm": 7.588685512542725, "learning_rate": 1.6671026749763012e-05, "loss": 1.0115, "step": 8830 }, { "epoch": 28.95409836065574, "grad_norm": 12.365408897399902, "learning_rate": 1.667023564267947e-05, "loss": 1.0425, "step": 8831 }, { "epoch": 28.957377049180327, "grad_norm": 9.229327201843262, "learning_rate": 1.6669444460382082e-05, "loss": 1.3794, "step": 8832 }, { "epoch": 28.96065573770492, "grad_norm": 6.008535861968994, "learning_rate": 1.6668653202879773e-05, "loss": 1.2959, "step": 8833 }, { "epoch": 28.963934426229507, "grad_norm": 8.614147186279297, "learning_rate": 1.666786187018147e-05, "loss": 1.0364, "step": 8834 }, { "epoch": 28.9672131147541, "grad_norm": 7.79154634475708, "learning_rate": 1.6667070462296088e-05, "loss": 1.0306, "step": 8835 }, { "epoch": 28.970491803278687, "grad_norm": 9.038490295410156, "learning_rate": 1.6666278979232554e-05, "loss": 1.1743, "step": 8836 }, { "epoch": 28.97377049180328, "grad_norm": 6.67938756942749, "learning_rate": 1.6665487420999796e-05, "loss": 1.1221, "step": 8837 }, { "epoch": 28.977049180327867, "grad_norm": 9.097375869750977, "learning_rate": 1.6664695787606735e-05, "loss": 0.9254, "step": 8838 }, { "epoch": 28.98032786885246, "grad_norm": 7.093966960906982, "learning_rate": 1.6663904079062302e-05, "loss": 1.1847, "step": 8839 }, { "epoch": 28.983606557377048, "grad_norm": 8.785818099975586, "learning_rate": 1.6663112295375418e-05, "loss": 1.1031, "step": 8840 }, { "epoch": 28.98688524590164, "grad_norm": 8.827447891235352, "learning_rate": 1.6662320436555014e-05, "loss": 1.1303, "step": 8841 }, { "epoch": 28.990163934426228, "grad_norm": 8.793669700622559, "learning_rate": 1.6661528502610025e-05, "loss": 1.0464, "step": 8842 }, { "epoch": 28.99344262295082, "grad_norm": 7.282190322875977, "learning_rate": 1.6660736493549374e-05, "loss": 1.2161, "step": 8843 }, { "epoch": 28.99672131147541, "grad_norm": 7.553589344024658, "learning_rate": 1.665994440938199e-05, "loss": 1.3428, "step": 8844 }, { "epoch": 29.0, "grad_norm": 9.34854507446289, "learning_rate": 1.665915225011681e-05, "loss": 0.9351, "step": 8845 }, { "epoch": 29.003278688524592, "grad_norm": 7.905797481536865, "learning_rate": 1.665836001576277e-05, "loss": 0.9486, "step": 8846 }, { "epoch": 29.00655737704918, "grad_norm": 9.012591361999512, "learning_rate": 1.6657567706328792e-05, "loss": 0.9497, "step": 8847 }, { "epoch": 29.009836065573772, "grad_norm": 7.314276218414307, "learning_rate": 1.6656775321823817e-05, "loss": 1.1465, "step": 8848 }, { "epoch": 29.01311475409836, "grad_norm": 6.673678874969482, "learning_rate": 1.665598286225678e-05, "loss": 0.8767, "step": 8849 }, { "epoch": 29.016393442622952, "grad_norm": 7.519339084625244, "learning_rate": 1.6655190327636615e-05, "loss": 1.0364, "step": 8850 }, { "epoch": 29.01967213114754, "grad_norm": 6.950882434844971, "learning_rate": 1.6654397717972258e-05, "loss": 1.1307, "step": 8851 }, { "epoch": 29.022950819672133, "grad_norm": 7.628293514251709, "learning_rate": 1.6653605033272653e-05, "loss": 0.9841, "step": 8852 }, { "epoch": 29.02622950819672, "grad_norm": 8.33553695678711, "learning_rate": 1.665281227354673e-05, "loss": 1.1077, "step": 8853 }, { "epoch": 29.029508196721313, "grad_norm": 8.660566329956055, "learning_rate": 1.665201943880343e-05, "loss": 1.0649, "step": 8854 }, { "epoch": 29.0327868852459, "grad_norm": 16.985002517700195, "learning_rate": 1.6651226529051695e-05, "loss": 0.8937, "step": 8855 }, { "epoch": 29.036065573770493, "grad_norm": 11.230765342712402, "learning_rate": 1.6650433544300468e-05, "loss": 0.822, "step": 8856 }, { "epoch": 29.03934426229508, "grad_norm": 8.096638679504395, "learning_rate": 1.6649640484558686e-05, "loss": 1.0452, "step": 8857 }, { "epoch": 29.042622950819673, "grad_norm": 8.380647659301758, "learning_rate": 1.6648847349835294e-05, "loss": 1.1068, "step": 8858 }, { "epoch": 29.04590163934426, "grad_norm": 8.08707046508789, "learning_rate": 1.6648054140139234e-05, "loss": 0.801, "step": 8859 }, { "epoch": 29.049180327868854, "grad_norm": 8.802886009216309, "learning_rate": 1.664726085547945e-05, "loss": 1.0215, "step": 8860 }, { "epoch": 29.052459016393442, "grad_norm": 8.092275619506836, "learning_rate": 1.6646467495864892e-05, "loss": 1.0956, "step": 8861 }, { "epoch": 29.055737704918034, "grad_norm": 6.8331122398376465, "learning_rate": 1.6645674061304502e-05, "loss": 1.0033, "step": 8862 }, { "epoch": 29.059016393442622, "grad_norm": 8.989145278930664, "learning_rate": 1.664488055180723e-05, "loss": 0.86, "step": 8863 }, { "epoch": 29.062295081967214, "grad_norm": 8.44471263885498, "learning_rate": 1.6644086967382015e-05, "loss": 1.005, "step": 8864 }, { "epoch": 29.065573770491802, "grad_norm": 6.282754898071289, "learning_rate": 1.6643293308037813e-05, "loss": 0.9521, "step": 8865 }, { "epoch": 29.068852459016394, "grad_norm": 8.083381652832031, "learning_rate": 1.664249957378357e-05, "loss": 0.8715, "step": 8866 }, { "epoch": 29.072131147540983, "grad_norm": 6.913165092468262, "learning_rate": 1.6641705764628243e-05, "loss": 1.2181, "step": 8867 }, { "epoch": 29.075409836065575, "grad_norm": 6.6178460121154785, "learning_rate": 1.6640911880580773e-05, "loss": 0.9291, "step": 8868 }, { "epoch": 29.078688524590163, "grad_norm": 12.051746368408203, "learning_rate": 1.664011792165012e-05, "loss": 1.0806, "step": 8869 }, { "epoch": 29.081967213114755, "grad_norm": 7.32055139541626, "learning_rate": 1.663932388784523e-05, "loss": 0.998, "step": 8870 }, { "epoch": 29.085245901639343, "grad_norm": 8.50241756439209, "learning_rate": 1.663852977917506e-05, "loss": 0.7894, "step": 8871 }, { "epoch": 29.088524590163935, "grad_norm": 16.753259658813477, "learning_rate": 1.663773559564857e-05, "loss": 1.2012, "step": 8872 }, { "epoch": 29.091803278688523, "grad_norm": 9.337568283081055, "learning_rate": 1.6636941337274705e-05, "loss": 1.1997, "step": 8873 }, { "epoch": 29.095081967213115, "grad_norm": 8.127442359924316, "learning_rate": 1.6636147004062424e-05, "loss": 1.1696, "step": 8874 }, { "epoch": 29.098360655737704, "grad_norm": 8.04888916015625, "learning_rate": 1.663535259602069e-05, "loss": 1.0862, "step": 8875 }, { "epoch": 29.101639344262296, "grad_norm": 6.876718521118164, "learning_rate": 1.6634558113158455e-05, "loss": 1.2341, "step": 8876 }, { "epoch": 29.104918032786884, "grad_norm": 6.279982089996338, "learning_rate": 1.6633763555484676e-05, "loss": 0.9699, "step": 8877 }, { "epoch": 29.108196721311476, "grad_norm": 6.512790203094482, "learning_rate": 1.663296892300832e-05, "loss": 0.9033, "step": 8878 }, { "epoch": 29.111475409836064, "grad_norm": 8.480949401855469, "learning_rate": 1.663217421573834e-05, "loss": 0.8448, "step": 8879 }, { "epoch": 29.114754098360656, "grad_norm": 12.242301940917969, "learning_rate": 1.6631379433683705e-05, "loss": 0.6985, "step": 8880 }, { "epoch": 29.118032786885244, "grad_norm": 6.572484016418457, "learning_rate": 1.6630584576853367e-05, "loss": 0.9358, "step": 8881 }, { "epoch": 29.121311475409836, "grad_norm": 7.918530464172363, "learning_rate": 1.6629789645256297e-05, "loss": 0.9648, "step": 8882 }, { "epoch": 29.124590163934425, "grad_norm": 9.697514533996582, "learning_rate": 1.662899463890145e-05, "loss": 1.0527, "step": 8883 }, { "epoch": 29.127868852459017, "grad_norm": 6.7675604820251465, "learning_rate": 1.66281995577978e-05, "loss": 1.1748, "step": 8884 }, { "epoch": 29.131147540983605, "grad_norm": 7.316075325012207, "learning_rate": 1.662740440195431e-05, "loss": 1.2279, "step": 8885 }, { "epoch": 29.134426229508197, "grad_norm": 7.588659286499023, "learning_rate": 1.6626609171379938e-05, "loss": 0.8658, "step": 8886 }, { "epoch": 29.137704918032785, "grad_norm": 10.449082374572754, "learning_rate": 1.6625813866083665e-05, "loss": 1.0216, "step": 8887 }, { "epoch": 29.140983606557377, "grad_norm": 13.602425575256348, "learning_rate": 1.6625018486074448e-05, "loss": 0.9148, "step": 8888 }, { "epoch": 29.14426229508197, "grad_norm": 6.926919460296631, "learning_rate": 1.6624223031361258e-05, "loss": 0.8924, "step": 8889 }, { "epoch": 29.147540983606557, "grad_norm": 8.556015968322754, "learning_rate": 1.662342750195307e-05, "loss": 0.9973, "step": 8890 }, { "epoch": 29.15081967213115, "grad_norm": 6.457183837890625, "learning_rate": 1.6622631897858848e-05, "loss": 1.016, "step": 8891 }, { "epoch": 29.154098360655738, "grad_norm": 7.8319549560546875, "learning_rate": 1.6621836219087565e-05, "loss": 0.8949, "step": 8892 }, { "epoch": 29.15737704918033, "grad_norm": 7.235404968261719, "learning_rate": 1.6621040465648196e-05, "loss": 1.0779, "step": 8893 }, { "epoch": 29.160655737704918, "grad_norm": 7.7443928718566895, "learning_rate": 1.6620244637549706e-05, "loss": 1.2109, "step": 8894 }, { "epoch": 29.16393442622951, "grad_norm": 7.205793380737305, "learning_rate": 1.6619448734801082e-05, "loss": 1.0763, "step": 8895 }, { "epoch": 29.167213114754098, "grad_norm": 10.742328643798828, "learning_rate": 1.6618652757411287e-05, "loss": 0.9277, "step": 8896 }, { "epoch": 29.17049180327869, "grad_norm": 8.755599975585938, "learning_rate": 1.66178567053893e-05, "loss": 1.0986, "step": 8897 }, { "epoch": 29.17377049180328, "grad_norm": 9.85329818725586, "learning_rate": 1.66170605787441e-05, "loss": 1.0593, "step": 8898 }, { "epoch": 29.17704918032787, "grad_norm": 7.595874786376953, "learning_rate": 1.6616264377484658e-05, "loss": 0.9694, "step": 8899 }, { "epoch": 29.18032786885246, "grad_norm": 9.919364929199219, "learning_rate": 1.661546810161996e-05, "loss": 1.2599, "step": 8900 }, { "epoch": 29.18360655737705, "grad_norm": 8.235891342163086, "learning_rate": 1.6614671751158978e-05, "loss": 1.0138, "step": 8901 }, { "epoch": 29.18688524590164, "grad_norm": 6.68744421005249, "learning_rate": 1.66138753261107e-05, "loss": 0.8571, "step": 8902 }, { "epoch": 29.19016393442623, "grad_norm": 6.459081649780273, "learning_rate": 1.6613078826484096e-05, "loss": 1.132, "step": 8903 }, { "epoch": 29.19344262295082, "grad_norm": 7.428539276123047, "learning_rate": 1.661228225228815e-05, "loss": 1.063, "step": 8904 }, { "epoch": 29.19672131147541, "grad_norm": 7.9213032722473145, "learning_rate": 1.6611485603531853e-05, "loss": 0.7976, "step": 8905 }, { "epoch": 29.2, "grad_norm": 9.1162109375, "learning_rate": 1.6610688880224178e-05, "loss": 0.9674, "step": 8906 }, { "epoch": 29.20327868852459, "grad_norm": 8.012373924255371, "learning_rate": 1.660989208237411e-05, "loss": 1.0092, "step": 8907 }, { "epoch": 29.20655737704918, "grad_norm": 7.763794422149658, "learning_rate": 1.660909520999064e-05, "loss": 0.97, "step": 8908 }, { "epoch": 29.20983606557377, "grad_norm": 4.833560466766357, "learning_rate": 1.6608298263082748e-05, "loss": 1.1677, "step": 8909 }, { "epoch": 29.21311475409836, "grad_norm": 6.5451154708862305, "learning_rate": 1.6607501241659424e-05, "loss": 1.2752, "step": 8910 }, { "epoch": 29.21639344262295, "grad_norm": 7.4187703132629395, "learning_rate": 1.660670414572965e-05, "loss": 0.8188, "step": 8911 }, { "epoch": 29.21967213114754, "grad_norm": 6.508037090301514, "learning_rate": 1.6605906975302422e-05, "loss": 0.9872, "step": 8912 }, { "epoch": 29.222950819672132, "grad_norm": 8.616531372070312, "learning_rate": 1.6605109730386718e-05, "loss": 1.0192, "step": 8913 }, { "epoch": 29.22622950819672, "grad_norm": 8.899201393127441, "learning_rate": 1.6604312410991542e-05, "loss": 1.1758, "step": 8914 }, { "epoch": 29.229508196721312, "grad_norm": 7.819016456604004, "learning_rate": 1.6603515017125873e-05, "loss": 1.1393, "step": 8915 }, { "epoch": 29.2327868852459, "grad_norm": 7.560910701751709, "learning_rate": 1.6602717548798707e-05, "loss": 1.0489, "step": 8916 }, { "epoch": 29.236065573770492, "grad_norm": 8.05225658416748, "learning_rate": 1.6601920006019036e-05, "loss": 1.0731, "step": 8917 }, { "epoch": 29.23934426229508, "grad_norm": 8.654884338378906, "learning_rate": 1.660112238879585e-05, "loss": 1.0161, "step": 8918 }, { "epoch": 29.242622950819673, "grad_norm": 6.933501243591309, "learning_rate": 1.6600324697138148e-05, "loss": 0.8831, "step": 8919 }, { "epoch": 29.24590163934426, "grad_norm": 6.5752034187316895, "learning_rate": 1.659952693105492e-05, "loss": 1.2416, "step": 8920 }, { "epoch": 29.249180327868853, "grad_norm": 7.815257549285889, "learning_rate": 1.6598729090555168e-05, "loss": 1.0656, "step": 8921 }, { "epoch": 29.25245901639344, "grad_norm": 8.429691314697266, "learning_rate": 1.659793117564788e-05, "loss": 0.9404, "step": 8922 }, { "epoch": 29.255737704918033, "grad_norm": 8.807943344116211, "learning_rate": 1.6597133186342062e-05, "loss": 1.1198, "step": 8923 }, { "epoch": 29.25901639344262, "grad_norm": 7.901695251464844, "learning_rate": 1.6596335122646706e-05, "loss": 1.0703, "step": 8924 }, { "epoch": 29.262295081967213, "grad_norm": 7.8929595947265625, "learning_rate": 1.6595536984570816e-05, "loss": 0.8147, "step": 8925 }, { "epoch": 29.2655737704918, "grad_norm": 7.515659809112549, "learning_rate": 1.6594738772123382e-05, "loss": 1.147, "step": 8926 }, { "epoch": 29.268852459016394, "grad_norm": 6.247848987579346, "learning_rate": 1.6593940485313416e-05, "loss": 0.764, "step": 8927 }, { "epoch": 29.272131147540982, "grad_norm": 6.561486721038818, "learning_rate": 1.6593142124149918e-05, "loss": 1.1796, "step": 8928 }, { "epoch": 29.275409836065574, "grad_norm": 8.019855499267578, "learning_rate": 1.659234368864188e-05, "loss": 0.813, "step": 8929 }, { "epoch": 29.278688524590162, "grad_norm": 7.693089962005615, "learning_rate": 1.659154517879832e-05, "loss": 1.084, "step": 8930 }, { "epoch": 29.281967213114754, "grad_norm": 7.257343292236328, "learning_rate": 1.6590746594628228e-05, "loss": 1.2124, "step": 8931 }, { "epoch": 29.285245901639342, "grad_norm": 9.636839866638184, "learning_rate": 1.658994793614062e-05, "loss": 1.0405, "step": 8932 }, { "epoch": 29.288524590163934, "grad_norm": 8.165958404541016, "learning_rate": 1.6589149203344493e-05, "loss": 1.063, "step": 8933 }, { "epoch": 29.291803278688526, "grad_norm": 10.12884521484375, "learning_rate": 1.6588350396248865e-05, "loss": 1.2288, "step": 8934 }, { "epoch": 29.295081967213115, "grad_norm": 8.618362426757812, "learning_rate": 1.658755151486273e-05, "loss": 0.9525, "step": 8935 }, { "epoch": 29.298360655737707, "grad_norm": 7.883306503295898, "learning_rate": 1.6586752559195106e-05, "loss": 1.0175, "step": 8936 }, { "epoch": 29.301639344262295, "grad_norm": 7.6898369789123535, "learning_rate": 1.6585953529254993e-05, "loss": 1.2484, "step": 8937 }, { "epoch": 29.304918032786887, "grad_norm": 8.13710880279541, "learning_rate": 1.6585154425051412e-05, "loss": 1.0952, "step": 8938 }, { "epoch": 29.308196721311475, "grad_norm": 7.733907699584961, "learning_rate": 1.6584355246593365e-05, "loss": 0.9484, "step": 8939 }, { "epoch": 29.311475409836067, "grad_norm": 5.479319095611572, "learning_rate": 1.6583555993889868e-05, "loss": 1.0604, "step": 8940 }, { "epoch": 29.314754098360655, "grad_norm": 8.561259269714355, "learning_rate": 1.6582756666949934e-05, "loss": 0.9626, "step": 8941 }, { "epoch": 29.318032786885247, "grad_norm": 8.261876106262207, "learning_rate": 1.6581957265782568e-05, "loss": 1.0115, "step": 8942 }, { "epoch": 29.321311475409836, "grad_norm": 12.0236234664917, "learning_rate": 1.6581157790396796e-05, "loss": 0.961, "step": 8943 }, { "epoch": 29.324590163934428, "grad_norm": 6.922274589538574, "learning_rate": 1.6580358240801624e-05, "loss": 0.7436, "step": 8944 }, { "epoch": 29.327868852459016, "grad_norm": 19.740833282470703, "learning_rate": 1.657955861700607e-05, "loss": 1.2888, "step": 8945 }, { "epoch": 29.331147540983608, "grad_norm": 26.10443687438965, "learning_rate": 1.6578758919019157e-05, "loss": 1.4236, "step": 8946 }, { "epoch": 29.334426229508196, "grad_norm": 7.673182010650635, "learning_rate": 1.6577959146849893e-05, "loss": 1.165, "step": 8947 }, { "epoch": 29.337704918032788, "grad_norm": 7.590333938598633, "learning_rate": 1.65771593005073e-05, "loss": 0.995, "step": 8948 }, { "epoch": 29.340983606557376, "grad_norm": 8.388291358947754, "learning_rate": 1.65763593800004e-05, "loss": 0.9622, "step": 8949 }, { "epoch": 29.34426229508197, "grad_norm": 11.545839309692383, "learning_rate": 1.657555938533821e-05, "loss": 1.4268, "step": 8950 }, { "epoch": 29.347540983606557, "grad_norm": 8.064774513244629, "learning_rate": 1.6574759316529748e-05, "loss": 0.9505, "step": 8951 }, { "epoch": 29.35081967213115, "grad_norm": 7.411064147949219, "learning_rate": 1.657395917358404e-05, "loss": 1.1433, "step": 8952 }, { "epoch": 29.354098360655737, "grad_norm": 7.729657173156738, "learning_rate": 1.6573158956510107e-05, "loss": 0.9106, "step": 8953 }, { "epoch": 29.35737704918033, "grad_norm": 8.239086151123047, "learning_rate": 1.6572358665316973e-05, "loss": 0.8323, "step": 8954 }, { "epoch": 29.360655737704917, "grad_norm": 6.646846294403076, "learning_rate": 1.657155830001366e-05, "loss": 0.9339, "step": 8955 }, { "epoch": 29.36393442622951, "grad_norm": 6.787014484405518, "learning_rate": 1.6570757860609198e-05, "loss": 1.1562, "step": 8956 }, { "epoch": 29.367213114754097, "grad_norm": 7.006695747375488, "learning_rate": 1.6569957347112606e-05, "loss": 1.2061, "step": 8957 }, { "epoch": 29.37049180327869, "grad_norm": 10.139253616333008, "learning_rate": 1.6569156759532916e-05, "loss": 1.2261, "step": 8958 }, { "epoch": 29.373770491803278, "grad_norm": 8.31210994720459, "learning_rate": 1.656835609787915e-05, "loss": 0.9344, "step": 8959 }, { "epoch": 29.37704918032787, "grad_norm": 7.607883930206299, "learning_rate": 1.6567555362160345e-05, "loss": 1.0952, "step": 8960 }, { "epoch": 29.380327868852458, "grad_norm": 11.424872398376465, "learning_rate": 1.656675455238552e-05, "loss": 1.1113, "step": 8961 }, { "epoch": 29.38360655737705, "grad_norm": 8.498170852661133, "learning_rate": 1.6565953668563713e-05, "loss": 0.9551, "step": 8962 }, { "epoch": 29.386885245901638, "grad_norm": 6.478883266448975, "learning_rate": 1.656515271070395e-05, "loss": 0.8415, "step": 8963 }, { "epoch": 29.39016393442623, "grad_norm": 6.8572845458984375, "learning_rate": 1.6564351678815263e-05, "loss": 1.1492, "step": 8964 }, { "epoch": 29.39344262295082, "grad_norm": 6.355286121368408, "learning_rate": 1.6563550572906687e-05, "loss": 1.0567, "step": 8965 }, { "epoch": 29.39672131147541, "grad_norm": 8.462289810180664, "learning_rate": 1.6562749392987255e-05, "loss": 0.9178, "step": 8966 }, { "epoch": 29.4, "grad_norm": 7.573614120483398, "learning_rate": 1.6561948139065997e-05, "loss": 1.0198, "step": 8967 }, { "epoch": 29.40327868852459, "grad_norm": 7.506148338317871, "learning_rate": 1.6561146811151953e-05, "loss": 1.1526, "step": 8968 }, { "epoch": 29.40655737704918, "grad_norm": 10.05983829498291, "learning_rate": 1.6560345409254154e-05, "loss": 1.2678, "step": 8969 }, { "epoch": 29.40983606557377, "grad_norm": 7.37281608581543, "learning_rate": 1.6559543933381645e-05, "loss": 1.1386, "step": 8970 }, { "epoch": 29.41311475409836, "grad_norm": 6.731396675109863, "learning_rate": 1.655874238354345e-05, "loss": 0.9673, "step": 8971 }, { "epoch": 29.41639344262295, "grad_norm": 10.817797660827637, "learning_rate": 1.6557940759748623e-05, "loss": 0.8658, "step": 8972 }, { "epoch": 29.41967213114754, "grad_norm": 6.244029521942139, "learning_rate": 1.655713906200619e-05, "loss": 1.0493, "step": 8973 }, { "epoch": 29.42295081967213, "grad_norm": 7.655313968658447, "learning_rate": 1.6556337290325202e-05, "loss": 0.952, "step": 8974 }, { "epoch": 29.42622950819672, "grad_norm": 6.936994552612305, "learning_rate": 1.655553544471469e-05, "loss": 1.3015, "step": 8975 }, { "epoch": 29.42950819672131, "grad_norm": 6.65730094909668, "learning_rate": 1.65547335251837e-05, "loss": 1.0545, "step": 8976 }, { "epoch": 29.432786885245903, "grad_norm": 5.710883140563965, "learning_rate": 1.6553931531741276e-05, "loss": 1.0873, "step": 8977 }, { "epoch": 29.43606557377049, "grad_norm": 7.229382038116455, "learning_rate": 1.6553129464396457e-05, "loss": 1.0057, "step": 8978 }, { "epoch": 29.439344262295084, "grad_norm": 10.711736679077148, "learning_rate": 1.6552327323158294e-05, "loss": 1.0229, "step": 8979 }, { "epoch": 29.442622950819672, "grad_norm": 7.235929489135742, "learning_rate": 1.6551525108035824e-05, "loss": 1.2437, "step": 8980 }, { "epoch": 29.445901639344264, "grad_norm": 6.1961870193481445, "learning_rate": 1.6550722819038096e-05, "loss": 0.999, "step": 8981 }, { "epoch": 29.449180327868852, "grad_norm": 6.983061790466309, "learning_rate": 1.654992045617416e-05, "loss": 1.0637, "step": 8982 }, { "epoch": 29.452459016393444, "grad_norm": 8.77487564086914, "learning_rate": 1.654911801945306e-05, "loss": 1.2078, "step": 8983 }, { "epoch": 29.455737704918032, "grad_norm": 8.667234420776367, "learning_rate": 1.6548315508883845e-05, "loss": 0.8412, "step": 8984 }, { "epoch": 29.459016393442624, "grad_norm": 7.932459354400635, "learning_rate": 1.654751292447556e-05, "loss": 0.9385, "step": 8985 }, { "epoch": 29.462295081967213, "grad_norm": 6.081642150878906, "learning_rate": 1.6546710266237264e-05, "loss": 1.1064, "step": 8986 }, { "epoch": 29.465573770491805, "grad_norm": 9.261052131652832, "learning_rate": 1.6545907534178e-05, "loss": 1.0096, "step": 8987 }, { "epoch": 29.468852459016393, "grad_norm": 7.843503475189209, "learning_rate": 1.6545104728306825e-05, "loss": 1.2211, "step": 8988 }, { "epoch": 29.472131147540985, "grad_norm": 7.897749900817871, "learning_rate": 1.654430184863279e-05, "loss": 0.8459, "step": 8989 }, { "epoch": 29.475409836065573, "grad_norm": 7.752945899963379, "learning_rate": 1.6543498895164944e-05, "loss": 1.1954, "step": 8990 }, { "epoch": 29.478688524590165, "grad_norm": 9.531105041503906, "learning_rate": 1.6542695867912346e-05, "loss": 0.9044, "step": 8991 }, { "epoch": 29.481967213114753, "grad_norm": 6.728018760681152, "learning_rate": 1.654189276688405e-05, "loss": 1.0268, "step": 8992 }, { "epoch": 29.485245901639345, "grad_norm": 5.783438205718994, "learning_rate": 1.654108959208911e-05, "loss": 1.1372, "step": 8993 }, { "epoch": 29.488524590163934, "grad_norm": 7.02293586730957, "learning_rate": 1.6540286343536583e-05, "loss": 1.1102, "step": 8994 }, { "epoch": 29.491803278688526, "grad_norm": 8.756723403930664, "learning_rate": 1.653948302123553e-05, "loss": 0.9514, "step": 8995 }, { "epoch": 29.495081967213114, "grad_norm": 10.646401405334473, "learning_rate": 1.6538679625195002e-05, "loss": 1.1122, "step": 8996 }, { "epoch": 29.498360655737706, "grad_norm": 5.875757694244385, "learning_rate": 1.653787615542407e-05, "loss": 1.1405, "step": 8997 }, { "epoch": 29.501639344262294, "grad_norm": 8.257566452026367, "learning_rate": 1.6537072611931778e-05, "loss": 0.9019, "step": 8998 }, { "epoch": 29.504918032786886, "grad_norm": 6.2572221755981445, "learning_rate": 1.65362689947272e-05, "loss": 0.9515, "step": 8999 }, { "epoch": 29.508196721311474, "grad_norm": 6.61385440826416, "learning_rate": 1.6535465303819394e-05, "loss": 0.9703, "step": 9000 }, { "epoch": 29.511475409836066, "grad_norm": 7.09272575378418, "learning_rate": 1.653466153921742e-05, "loss": 1.019, "step": 9001 }, { "epoch": 29.514754098360655, "grad_norm": 7.454678535461426, "learning_rate": 1.6533857700930345e-05, "loss": 0.7308, "step": 9002 }, { "epoch": 29.518032786885247, "grad_norm": 10.779346466064453, "learning_rate": 1.6533053788967227e-05, "loss": 1.0067, "step": 9003 }, { "epoch": 29.521311475409835, "grad_norm": 13.275521278381348, "learning_rate": 1.6532249803337138e-05, "loss": 1.0121, "step": 9004 }, { "epoch": 29.524590163934427, "grad_norm": 7.020145416259766, "learning_rate": 1.653144574404914e-05, "loss": 0.9367, "step": 9005 }, { "epoch": 29.527868852459015, "grad_norm": 13.660667419433594, "learning_rate": 1.65306416111123e-05, "loss": 0.9269, "step": 9006 }, { "epoch": 29.531147540983607, "grad_norm": 6.980009078979492, "learning_rate": 1.6529837404535685e-05, "loss": 1.1327, "step": 9007 }, { "epoch": 29.534426229508195, "grad_norm": 8.330110549926758, "learning_rate": 1.6529033124328364e-05, "loss": 0.9021, "step": 9008 }, { "epoch": 29.537704918032787, "grad_norm": 6.286548614501953, "learning_rate": 1.6528228770499406e-05, "loss": 1.064, "step": 9009 }, { "epoch": 29.540983606557376, "grad_norm": 5.896543025970459, "learning_rate": 1.6527424343057884e-05, "loss": 1.1375, "step": 9010 }, { "epoch": 29.544262295081968, "grad_norm": 6.971000671386719, "learning_rate": 1.652661984201286e-05, "loss": 0.8701, "step": 9011 }, { "epoch": 29.547540983606556, "grad_norm": 5.884976387023926, "learning_rate": 1.6525815267373415e-05, "loss": 1.1066, "step": 9012 }, { "epoch": 29.550819672131148, "grad_norm": 6.68723201751709, "learning_rate": 1.652501061914862e-05, "loss": 1.3152, "step": 9013 }, { "epoch": 29.554098360655736, "grad_norm": 7.44412899017334, "learning_rate": 1.652420589734754e-05, "loss": 0.9384, "step": 9014 }, { "epoch": 29.557377049180328, "grad_norm": 8.116512298583984, "learning_rate": 1.6523401101979258e-05, "loss": 1.0308, "step": 9015 }, { "epoch": 29.560655737704916, "grad_norm": 9.02014446258545, "learning_rate": 1.6522596233052847e-05, "loss": 1.0427, "step": 9016 }, { "epoch": 29.56393442622951, "grad_norm": 6.504359245300293, "learning_rate": 1.6521791290577384e-05, "loss": 1.0546, "step": 9017 }, { "epoch": 29.567213114754097, "grad_norm": 12.667441368103027, "learning_rate": 1.6520986274561937e-05, "loss": 1.2288, "step": 9018 }, { "epoch": 29.57049180327869, "grad_norm": 12.322954177856445, "learning_rate": 1.652018118501559e-05, "loss": 1.0834, "step": 9019 }, { "epoch": 29.57377049180328, "grad_norm": 8.237569808959961, "learning_rate": 1.6519376021947424e-05, "loss": 0.9958, "step": 9020 }, { "epoch": 29.57704918032787, "grad_norm": 7.390897750854492, "learning_rate": 1.6518570785366515e-05, "loss": 1.1187, "step": 9021 }, { "epoch": 29.58032786885246, "grad_norm": 6.661206245422363, "learning_rate": 1.651776547528194e-05, "loss": 1.12, "step": 9022 }, { "epoch": 29.58360655737705, "grad_norm": 7.984954357147217, "learning_rate": 1.6516960091702787e-05, "loss": 0.9597, "step": 9023 }, { "epoch": 29.58688524590164, "grad_norm": 14.394529342651367, "learning_rate": 1.6516154634638128e-05, "loss": 1.2546, "step": 9024 }, { "epoch": 29.59016393442623, "grad_norm": 9.73770523071289, "learning_rate": 1.6515349104097058e-05, "loss": 1.1094, "step": 9025 }, { "epoch": 29.59344262295082, "grad_norm": 12.255176544189453, "learning_rate": 1.6514543500088645e-05, "loss": 1.1982, "step": 9026 }, { "epoch": 29.59672131147541, "grad_norm": 8.05077075958252, "learning_rate": 1.651373782262198e-05, "loss": 0.8243, "step": 9027 }, { "epoch": 29.6, "grad_norm": 7.919269561767578, "learning_rate": 1.6512932071706153e-05, "loss": 0.9414, "step": 9028 }, { "epoch": 29.60327868852459, "grad_norm": 6.999645709991455, "learning_rate": 1.6512126247350245e-05, "loss": 1.046, "step": 9029 }, { "epoch": 29.60655737704918, "grad_norm": 9.090781211853027, "learning_rate": 1.6511320349563345e-05, "loss": 0.9962, "step": 9030 }, { "epoch": 29.60983606557377, "grad_norm": 7.200259208679199, "learning_rate": 1.6510514378354532e-05, "loss": 1.095, "step": 9031 }, { "epoch": 29.613114754098362, "grad_norm": 8.128661155700684, "learning_rate": 1.65097083337329e-05, "loss": 1.0602, "step": 9032 }, { "epoch": 29.61639344262295, "grad_norm": 8.703454971313477, "learning_rate": 1.6508902215707544e-05, "loss": 0.8309, "step": 9033 }, { "epoch": 29.619672131147542, "grad_norm": 6.4214768409729, "learning_rate": 1.6508096024287543e-05, "loss": 1.0876, "step": 9034 }, { "epoch": 29.62295081967213, "grad_norm": 6.544049263000488, "learning_rate": 1.6507289759481992e-05, "loss": 1.0488, "step": 9035 }, { "epoch": 29.626229508196722, "grad_norm": 91.15095520019531, "learning_rate": 1.6506483421299987e-05, "loss": 1.1969, "step": 9036 }, { "epoch": 29.62950819672131, "grad_norm": 8.274673461914062, "learning_rate": 1.6505677009750614e-05, "loss": 1.0967, "step": 9037 }, { "epoch": 29.632786885245903, "grad_norm": 7.494746685028076, "learning_rate": 1.650487052484297e-05, "loss": 1.039, "step": 9038 }, { "epoch": 29.63606557377049, "grad_norm": 8.768660545349121, "learning_rate": 1.6504063966586148e-05, "loss": 1.1129, "step": 9039 }, { "epoch": 29.639344262295083, "grad_norm": 7.108516216278076, "learning_rate": 1.650325733498924e-05, "loss": 1.0801, "step": 9040 }, { "epoch": 29.64262295081967, "grad_norm": 9.972467422485352, "learning_rate": 1.6502450630061348e-05, "loss": 0.9856, "step": 9041 }, { "epoch": 29.645901639344263, "grad_norm": 9.264782905578613, "learning_rate": 1.6501643851811557e-05, "loss": 0.9558, "step": 9042 }, { "epoch": 29.64918032786885, "grad_norm": 5.36868143081665, "learning_rate": 1.6500837000248978e-05, "loss": 1.4331, "step": 9043 }, { "epoch": 29.652459016393443, "grad_norm": 9.771270751953125, "learning_rate": 1.65000300753827e-05, "loss": 1.0309, "step": 9044 }, { "epoch": 29.65573770491803, "grad_norm": 6.740991592407227, "learning_rate": 1.6499223077221824e-05, "loss": 0.7996, "step": 9045 }, { "epoch": 29.659016393442624, "grad_norm": 16.35560417175293, "learning_rate": 1.649841600577545e-05, "loss": 0.9343, "step": 9046 }, { "epoch": 29.662295081967212, "grad_norm": 8.065964698791504, "learning_rate": 1.6497608861052682e-05, "loss": 0.9861, "step": 9047 }, { "epoch": 29.665573770491804, "grad_norm": 8.270367622375488, "learning_rate": 1.6496801643062616e-05, "loss": 1.0205, "step": 9048 }, { "epoch": 29.668852459016392, "grad_norm": 10.566370010375977, "learning_rate": 1.6495994351814358e-05, "loss": 0.9025, "step": 9049 }, { "epoch": 29.672131147540984, "grad_norm": 24.789051055908203, "learning_rate": 1.649518698731701e-05, "loss": 0.9731, "step": 9050 }, { "epoch": 29.675409836065572, "grad_norm": 6.482335567474365, "learning_rate": 1.649437954957967e-05, "loss": 1.1167, "step": 9051 }, { "epoch": 29.678688524590164, "grad_norm": 7.71373176574707, "learning_rate": 1.6493572038611452e-05, "loss": 1.0476, "step": 9052 }, { "epoch": 29.681967213114753, "grad_norm": 7.717639446258545, "learning_rate": 1.649276445442146e-05, "loss": 0.9037, "step": 9053 }, { "epoch": 29.685245901639345, "grad_norm": 8.803733825683594, "learning_rate": 1.649195679701879e-05, "loss": 1.1035, "step": 9054 }, { "epoch": 29.688524590163933, "grad_norm": 7.226432800292969, "learning_rate": 1.6491149066412566e-05, "loss": 1.0703, "step": 9055 }, { "epoch": 29.691803278688525, "grad_norm": 9.30444049835205, "learning_rate": 1.6490341262611885e-05, "loss": 0.9624, "step": 9056 }, { "epoch": 29.695081967213113, "grad_norm": 11.799521446228027, "learning_rate": 1.6489533385625856e-05, "loss": 1.0709, "step": 9057 }, { "epoch": 29.698360655737705, "grad_norm": 6.630303382873535, "learning_rate": 1.648872543546359e-05, "loss": 1.2627, "step": 9058 }, { "epoch": 29.701639344262293, "grad_norm": 7.906154155731201, "learning_rate": 1.6487917412134202e-05, "loss": 1.0572, "step": 9059 }, { "epoch": 29.704918032786885, "grad_norm": 8.831853866577148, "learning_rate": 1.6487109315646795e-05, "loss": 0.7854, "step": 9060 }, { "epoch": 29.708196721311474, "grad_norm": 8.314544677734375, "learning_rate": 1.6486301146010487e-05, "loss": 1.2, "step": 9061 }, { "epoch": 29.711475409836066, "grad_norm": 7.04927921295166, "learning_rate": 1.648549290323439e-05, "loss": 1.1294, "step": 9062 }, { "epoch": 29.714754098360658, "grad_norm": 7.083399772644043, "learning_rate": 1.648468458732762e-05, "loss": 1.0663, "step": 9063 }, { "epoch": 29.718032786885246, "grad_norm": 7.222980499267578, "learning_rate": 1.6483876198299284e-05, "loss": 0.9583, "step": 9064 }, { "epoch": 29.721311475409838, "grad_norm": 22.108835220336914, "learning_rate": 1.6483067736158504e-05, "loss": 0.9381, "step": 9065 }, { "epoch": 29.724590163934426, "grad_norm": 7.313708305358887, "learning_rate": 1.6482259200914397e-05, "loss": 1.0316, "step": 9066 }, { "epoch": 29.727868852459018, "grad_norm": 10.67381477355957, "learning_rate": 1.6481450592576076e-05, "loss": 1.1497, "step": 9067 }, { "epoch": 29.731147540983606, "grad_norm": 6.653229236602783, "learning_rate": 1.6480641911152662e-05, "loss": 0.9994, "step": 9068 }, { "epoch": 29.7344262295082, "grad_norm": 9.499868392944336, "learning_rate": 1.647983315665327e-05, "loss": 1.1042, "step": 9069 }, { "epoch": 29.737704918032787, "grad_norm": 7.661556720733643, "learning_rate": 1.647902432908702e-05, "loss": 1.1489, "step": 9070 }, { "epoch": 29.74098360655738, "grad_norm": 8.649056434631348, "learning_rate": 1.647821542846304e-05, "loss": 1.0031, "step": 9071 }, { "epoch": 29.744262295081967, "grad_norm": 8.18298625946045, "learning_rate": 1.647740645479044e-05, "loss": 0.8424, "step": 9072 }, { "epoch": 29.74754098360656, "grad_norm": 7.2896599769592285, "learning_rate": 1.6476597408078352e-05, "loss": 1.0026, "step": 9073 }, { "epoch": 29.750819672131147, "grad_norm": 8.993461608886719, "learning_rate": 1.647578828833589e-05, "loss": 1.2394, "step": 9074 }, { "epoch": 29.75409836065574, "grad_norm": 7.459568500518799, "learning_rate": 1.6474979095572184e-05, "loss": 1.0397, "step": 9075 }, { "epoch": 29.757377049180327, "grad_norm": 8.169556617736816, "learning_rate": 1.6474169829796353e-05, "loss": 0.9088, "step": 9076 }, { "epoch": 29.76065573770492, "grad_norm": 7.04005241394043, "learning_rate": 1.6473360491017533e-05, "loss": 1.1443, "step": 9077 }, { "epoch": 29.763934426229508, "grad_norm": 8.261011123657227, "learning_rate": 1.6472551079244836e-05, "loss": 0.9187, "step": 9078 }, { "epoch": 29.7672131147541, "grad_norm": 32.70747756958008, "learning_rate": 1.64717415944874e-05, "loss": 1.1716, "step": 9079 }, { "epoch": 29.770491803278688, "grad_norm": 6.8261823654174805, "learning_rate": 1.6470932036754348e-05, "loss": 1.0626, "step": 9080 }, { "epoch": 29.77377049180328, "grad_norm": 6.493194103240967, "learning_rate": 1.647012240605481e-05, "loss": 1.0725, "step": 9081 }, { "epoch": 29.777049180327868, "grad_norm": 6.6183624267578125, "learning_rate": 1.6469312702397912e-05, "loss": 1.1265, "step": 9082 }, { "epoch": 29.78032786885246, "grad_norm": 7.1482415199279785, "learning_rate": 1.6468502925792787e-05, "loss": 0.9028, "step": 9083 }, { "epoch": 29.78360655737705, "grad_norm": 6.679760456085205, "learning_rate": 1.6467693076248567e-05, "loss": 0.9581, "step": 9084 }, { "epoch": 29.78688524590164, "grad_norm": 6.359525680541992, "learning_rate": 1.6466883153774383e-05, "loss": 1.0413, "step": 9085 }, { "epoch": 29.79016393442623, "grad_norm": 6.841779708862305, "learning_rate": 1.6466073158379367e-05, "loss": 1.0354, "step": 9086 }, { "epoch": 29.79344262295082, "grad_norm": 7.071261405944824, "learning_rate": 1.6465263090072652e-05, "loss": 1.3521, "step": 9087 }, { "epoch": 29.79672131147541, "grad_norm": 7.7605671882629395, "learning_rate": 1.6464452948863377e-05, "loss": 1.0206, "step": 9088 }, { "epoch": 29.8, "grad_norm": 8.046605110168457, "learning_rate": 1.646364273476067e-05, "loss": 0.8153, "step": 9089 }, { "epoch": 29.80327868852459, "grad_norm": 7.274789810180664, "learning_rate": 1.646283244777367e-05, "loss": 0.8323, "step": 9090 }, { "epoch": 29.80655737704918, "grad_norm": 8.359763145446777, "learning_rate": 1.6462022087911516e-05, "loss": 1.1129, "step": 9091 }, { "epoch": 29.80983606557377, "grad_norm": 7.0977959632873535, "learning_rate": 1.6461211655183347e-05, "loss": 1.0691, "step": 9092 }, { "epoch": 29.81311475409836, "grad_norm": 6.852219581604004, "learning_rate": 1.6460401149598296e-05, "loss": 1.2393, "step": 9093 }, { "epoch": 29.81639344262295, "grad_norm": 6.973237037658691, "learning_rate": 1.6459590571165504e-05, "loss": 0.9111, "step": 9094 }, { "epoch": 29.81967213114754, "grad_norm": 6.902731895446777, "learning_rate": 1.6458779919894117e-05, "loss": 1.0135, "step": 9095 }, { "epoch": 29.82295081967213, "grad_norm": 6.449577331542969, "learning_rate": 1.6457969195793264e-05, "loss": 1.0464, "step": 9096 }, { "epoch": 29.82622950819672, "grad_norm": 6.748837947845459, "learning_rate": 1.6457158398872098e-05, "loss": 0.9802, "step": 9097 }, { "epoch": 29.82950819672131, "grad_norm": 9.903390884399414, "learning_rate": 1.6456347529139756e-05, "loss": 0.9741, "step": 9098 }, { "epoch": 29.832786885245902, "grad_norm": 6.522668838500977, "learning_rate": 1.6455536586605384e-05, "loss": 1.0464, "step": 9099 }, { "epoch": 29.83606557377049, "grad_norm": 7.9816813468933105, "learning_rate": 1.6454725571278124e-05, "loss": 0.996, "step": 9100 }, { "epoch": 29.839344262295082, "grad_norm": 7.467655181884766, "learning_rate": 1.645391448316712e-05, "loss": 0.9254, "step": 9101 }, { "epoch": 29.84262295081967, "grad_norm": 9.311360359191895, "learning_rate": 1.645310332228152e-05, "loss": 1.1646, "step": 9102 }, { "epoch": 29.845901639344262, "grad_norm": 6.709935665130615, "learning_rate": 1.6452292088630475e-05, "loss": 1.0338, "step": 9103 }, { "epoch": 29.84918032786885, "grad_norm": 6.419373512268066, "learning_rate": 1.645148078222312e-05, "loss": 0.8905, "step": 9104 }, { "epoch": 29.852459016393443, "grad_norm": 6.691270351409912, "learning_rate": 1.645066940306862e-05, "loss": 0.9675, "step": 9105 }, { "epoch": 29.855737704918035, "grad_norm": 8.124870300292969, "learning_rate": 1.644985795117611e-05, "loss": 1.1571, "step": 9106 }, { "epoch": 29.859016393442623, "grad_norm": 8.854856491088867, "learning_rate": 1.6449046426554747e-05, "loss": 1.0527, "step": 9107 }, { "epoch": 29.862295081967215, "grad_norm": 16.85637092590332, "learning_rate": 1.6448234829213684e-05, "loss": 0.6521, "step": 9108 }, { "epoch": 29.865573770491803, "grad_norm": 6.916195392608643, "learning_rate": 1.6447423159162062e-05, "loss": 1.2073, "step": 9109 }, { "epoch": 29.868852459016395, "grad_norm": 9.419954299926758, "learning_rate": 1.6446611416409047e-05, "loss": 0.8959, "step": 9110 }, { "epoch": 29.872131147540983, "grad_norm": 7.076431751251221, "learning_rate": 1.6445799600963782e-05, "loss": 1.1661, "step": 9111 }, { "epoch": 29.875409836065575, "grad_norm": 5.678464889526367, "learning_rate": 1.6444987712835424e-05, "loss": 1.0056, "step": 9112 }, { "epoch": 29.878688524590164, "grad_norm": 6.46904993057251, "learning_rate": 1.644417575203313e-05, "loss": 0.7913, "step": 9113 }, { "epoch": 29.881967213114756, "grad_norm": 6.519177436828613, "learning_rate": 1.6443363718566053e-05, "loss": 1.0469, "step": 9114 }, { "epoch": 29.885245901639344, "grad_norm": 6.965478897094727, "learning_rate": 1.644255161244335e-05, "loss": 0.9417, "step": 9115 }, { "epoch": 29.888524590163936, "grad_norm": 6.775918483734131, "learning_rate": 1.644173943367418e-05, "loss": 0.8003, "step": 9116 }, { "epoch": 29.891803278688524, "grad_norm": 7.048976421356201, "learning_rate": 1.6440927182267698e-05, "loss": 0.9772, "step": 9117 }, { "epoch": 29.895081967213116, "grad_norm": 46.217079162597656, "learning_rate": 1.6440114858233068e-05, "loss": 1.1781, "step": 9118 }, { "epoch": 29.898360655737704, "grad_norm": 6.998043537139893, "learning_rate": 1.6439302461579447e-05, "loss": 0.942, "step": 9119 }, { "epoch": 29.901639344262296, "grad_norm": 7.766012668609619, "learning_rate": 1.6438489992315993e-05, "loss": 0.9931, "step": 9120 }, { "epoch": 29.904918032786885, "grad_norm": 6.595963954925537, "learning_rate": 1.6437677450451875e-05, "loss": 1.0835, "step": 9121 }, { "epoch": 29.908196721311477, "grad_norm": 8.437572479248047, "learning_rate": 1.6436864835996243e-05, "loss": 0.8517, "step": 9122 }, { "epoch": 29.911475409836065, "grad_norm": 6.580631256103516, "learning_rate": 1.6436052148958274e-05, "loss": 0.8913, "step": 9123 }, { "epoch": 29.914754098360657, "grad_norm": 8.867279052734375, "learning_rate": 1.643523938934712e-05, "loss": 0.9359, "step": 9124 }, { "epoch": 29.918032786885245, "grad_norm": 8.846419334411621, "learning_rate": 1.6434426557171955e-05, "loss": 0.9358, "step": 9125 }, { "epoch": 29.921311475409837, "grad_norm": 7.10053825378418, "learning_rate": 1.6433613652441937e-05, "loss": 0.7933, "step": 9126 }, { "epoch": 29.924590163934425, "grad_norm": 20.5869140625, "learning_rate": 1.6432800675166238e-05, "loss": 0.8732, "step": 9127 }, { "epoch": 29.927868852459017, "grad_norm": 10.05737018585205, "learning_rate": 1.6431987625354022e-05, "loss": 0.9104, "step": 9128 }, { "epoch": 29.931147540983606, "grad_norm": 11.696144104003906, "learning_rate": 1.6431174503014458e-05, "loss": 0.8424, "step": 9129 }, { "epoch": 29.934426229508198, "grad_norm": 8.85375690460205, "learning_rate": 1.6430361308156716e-05, "loss": 1.2305, "step": 9130 }, { "epoch": 29.937704918032786, "grad_norm": 12.263830184936523, "learning_rate": 1.6429548040789963e-05, "loss": 1.12, "step": 9131 }, { "epoch": 29.940983606557378, "grad_norm": 8.665215492248535, "learning_rate": 1.642873470092337e-05, "loss": 0.9316, "step": 9132 }, { "epoch": 29.944262295081966, "grad_norm": 25.961278915405273, "learning_rate": 1.6427921288566114e-05, "loss": 1.3085, "step": 9133 }, { "epoch": 29.947540983606558, "grad_norm": 8.757240295410156, "learning_rate": 1.6427107803727354e-05, "loss": 1.0176, "step": 9134 }, { "epoch": 29.950819672131146, "grad_norm": 7.303307056427002, "learning_rate": 1.6426294246416276e-05, "loss": 0.9694, "step": 9135 }, { "epoch": 29.95409836065574, "grad_norm": 7.631478309631348, "learning_rate": 1.642548061664205e-05, "loss": 1.135, "step": 9136 }, { "epoch": 29.957377049180327, "grad_norm": 10.274513244628906, "learning_rate": 1.6424666914413848e-05, "loss": 1.0662, "step": 9137 }, { "epoch": 29.96065573770492, "grad_norm": 6.684512615203857, "learning_rate": 1.6423853139740845e-05, "loss": 1.1666, "step": 9138 }, { "epoch": 29.963934426229507, "grad_norm": 8.792889595031738, "learning_rate": 1.642303929263222e-05, "loss": 1.1418, "step": 9139 }, { "epoch": 29.9672131147541, "grad_norm": 9.381708145141602, "learning_rate": 1.6422225373097148e-05, "loss": 0.9836, "step": 9140 }, { "epoch": 29.970491803278687, "grad_norm": 7.756201267242432, "learning_rate": 1.6421411381144808e-05, "loss": 1.149, "step": 9141 }, { "epoch": 29.97377049180328, "grad_norm": 8.784553527832031, "learning_rate": 1.6420597316784378e-05, "loss": 0.9085, "step": 9142 }, { "epoch": 29.977049180327867, "grad_norm": 6.756900787353516, "learning_rate": 1.6419783180025034e-05, "loss": 1.1844, "step": 9143 }, { "epoch": 29.98032786885246, "grad_norm": 8.12755298614502, "learning_rate": 1.6418968970875966e-05, "loss": 1.015, "step": 9144 }, { "epoch": 29.983606557377048, "grad_norm": 9.628575325012207, "learning_rate": 1.6418154689346345e-05, "loss": 0.7939, "step": 9145 }, { "epoch": 29.98688524590164, "grad_norm": 7.497766017913818, "learning_rate": 1.6417340335445358e-05, "loss": 0.8049, "step": 9146 }, { "epoch": 29.990163934426228, "grad_norm": 8.067521095275879, "learning_rate": 1.6416525909182187e-05, "loss": 0.8198, "step": 9147 }, { "epoch": 29.99344262295082, "grad_norm": 6.623148441314697, "learning_rate": 1.641571141056601e-05, "loss": 1.1152, "step": 9148 }, { "epoch": 29.99672131147541, "grad_norm": 7.0076494216918945, "learning_rate": 1.6414896839606024e-05, "loss": 0.9618, "step": 9149 }, { "epoch": 30.0, "grad_norm": 8.172779083251953, "learning_rate": 1.6414082196311402e-05, "loss": 0.8301, "step": 9150 }, { "epoch": 30.003278688524592, "grad_norm": 6.153641223907471, "learning_rate": 1.6413267480691334e-05, "loss": 1.1332, "step": 9151 }, { "epoch": 30.00655737704918, "grad_norm": 6.176828384399414, "learning_rate": 1.6412452692755008e-05, "loss": 0.8669, "step": 9152 }, { "epoch": 30.009836065573772, "grad_norm": 7.578522682189941, "learning_rate": 1.6411637832511613e-05, "loss": 1.1262, "step": 9153 }, { "epoch": 30.01311475409836, "grad_norm": 7.17609977722168, "learning_rate": 1.6410822899970327e-05, "loss": 0.9859, "step": 9154 }, { "epoch": 30.016393442622952, "grad_norm": 8.303814888000488, "learning_rate": 1.6410007895140352e-05, "loss": 1.178, "step": 9155 }, { "epoch": 30.01967213114754, "grad_norm": 6.615112781524658, "learning_rate": 1.6409192818030875e-05, "loss": 0.7949, "step": 9156 }, { "epoch": 30.022950819672133, "grad_norm": 7.767205715179443, "learning_rate": 1.6408377668651082e-05, "loss": 0.7085, "step": 9157 }, { "epoch": 30.02622950819672, "grad_norm": 8.88370418548584, "learning_rate": 1.640756244701017e-05, "loss": 0.9869, "step": 9158 }, { "epoch": 30.029508196721313, "grad_norm": 11.558781623840332, "learning_rate": 1.6406747153117328e-05, "loss": 1.0654, "step": 9159 }, { "epoch": 30.0327868852459, "grad_norm": 7.2278618812561035, "learning_rate": 1.6405931786981753e-05, "loss": 0.9456, "step": 9160 }, { "epoch": 30.036065573770493, "grad_norm": 6.918359756469727, "learning_rate": 1.6405116348612636e-05, "loss": 1.0706, "step": 9161 }, { "epoch": 30.03934426229508, "grad_norm": 8.683489799499512, "learning_rate": 1.640430083801917e-05, "loss": 0.9595, "step": 9162 }, { "epoch": 30.042622950819673, "grad_norm": 8.749606132507324, "learning_rate": 1.6403485255210555e-05, "loss": 0.8855, "step": 9163 }, { "epoch": 30.04590163934426, "grad_norm": 8.501211166381836, "learning_rate": 1.6402669600195986e-05, "loss": 0.8528, "step": 9164 }, { "epoch": 30.049180327868854, "grad_norm": 6.889669895172119, "learning_rate": 1.640185387298466e-05, "loss": 0.9993, "step": 9165 }, { "epoch": 30.052459016393442, "grad_norm": 9.675798416137695, "learning_rate": 1.6401038073585772e-05, "loss": 0.9312, "step": 9166 }, { "epoch": 30.055737704918034, "grad_norm": 7.017741680145264, "learning_rate": 1.6400222202008528e-05, "loss": 0.948, "step": 9167 }, { "epoch": 30.059016393442622, "grad_norm": 6.920202732086182, "learning_rate": 1.6399406258262125e-05, "loss": 0.9921, "step": 9168 }, { "epoch": 30.062295081967214, "grad_norm": 7.059083461761475, "learning_rate": 1.639859024235576e-05, "loss": 0.9044, "step": 9169 }, { "epoch": 30.065573770491802, "grad_norm": 7.806171894073486, "learning_rate": 1.639777415429864e-05, "loss": 0.8505, "step": 9170 }, { "epoch": 30.068852459016394, "grad_norm": 8.355069160461426, "learning_rate": 1.6396957994099962e-05, "loss": 1.2001, "step": 9171 }, { "epoch": 30.072131147540983, "grad_norm": 5.780821323394775, "learning_rate": 1.639614176176893e-05, "loss": 0.9952, "step": 9172 }, { "epoch": 30.075409836065575, "grad_norm": 6.766128063201904, "learning_rate": 1.6395325457314752e-05, "loss": 1.1219, "step": 9173 }, { "epoch": 30.078688524590163, "grad_norm": 9.747854232788086, "learning_rate": 1.639450908074663e-05, "loss": 1.0444, "step": 9174 }, { "epoch": 30.081967213114755, "grad_norm": 6.346119403839111, "learning_rate": 1.6393692632073766e-05, "loss": 0.7666, "step": 9175 }, { "epoch": 30.085245901639343, "grad_norm": 9.088396072387695, "learning_rate": 1.6392876111305372e-05, "loss": 0.9956, "step": 9176 }, { "epoch": 30.088524590163935, "grad_norm": 7.2501020431518555, "learning_rate": 1.6392059518450655e-05, "loss": 0.8705, "step": 9177 }, { "epoch": 30.091803278688523, "grad_norm": 8.203847885131836, "learning_rate": 1.6391242853518822e-05, "loss": 0.9505, "step": 9178 }, { "epoch": 30.095081967213115, "grad_norm": 6.642249584197998, "learning_rate": 1.6390426116519075e-05, "loss": 0.9092, "step": 9179 }, { "epoch": 30.098360655737704, "grad_norm": 8.215015411376953, "learning_rate": 1.638960930746063e-05, "loss": 1.1251, "step": 9180 }, { "epoch": 30.101639344262296, "grad_norm": 6.8712873458862305, "learning_rate": 1.6388792426352702e-05, "loss": 0.8278, "step": 9181 }, { "epoch": 30.104918032786884, "grad_norm": 7.568325042724609, "learning_rate": 1.6387975473204495e-05, "loss": 0.9375, "step": 9182 }, { "epoch": 30.108196721311476, "grad_norm": 9.141881942749023, "learning_rate": 1.638715844802522e-05, "loss": 0.9239, "step": 9183 }, { "epoch": 30.111475409836064, "grad_norm": 7.961760520935059, "learning_rate": 1.6386341350824094e-05, "loss": 0.823, "step": 9184 }, { "epoch": 30.114754098360656, "grad_norm": 7.104288101196289, "learning_rate": 1.6385524181610325e-05, "loss": 0.9746, "step": 9185 }, { "epoch": 30.118032786885244, "grad_norm": 6.371534824371338, "learning_rate": 1.6384706940393138e-05, "loss": 1.121, "step": 9186 }, { "epoch": 30.121311475409836, "grad_norm": 6.447908878326416, "learning_rate": 1.6383889627181738e-05, "loss": 1.1564, "step": 9187 }, { "epoch": 30.124590163934425, "grad_norm": 6.158542156219482, "learning_rate": 1.638307224198535e-05, "loss": 0.9284, "step": 9188 }, { "epoch": 30.127868852459017, "grad_norm": 5.969788551330566, "learning_rate": 1.6382254784813175e-05, "loss": 1.0845, "step": 9189 }, { "epoch": 30.131147540983605, "grad_norm": 7.319391250610352, "learning_rate": 1.6381437255674452e-05, "loss": 0.9528, "step": 9190 }, { "epoch": 30.134426229508197, "grad_norm": 7.041470050811768, "learning_rate": 1.6380619654578384e-05, "loss": 0.9429, "step": 9191 }, { "epoch": 30.137704918032785, "grad_norm": 7.579249382019043, "learning_rate": 1.6379801981534198e-05, "loss": 1.1202, "step": 9192 }, { "epoch": 30.140983606557377, "grad_norm": 7.450645923614502, "learning_rate": 1.6378984236551108e-05, "loss": 0.8474, "step": 9193 }, { "epoch": 30.14426229508197, "grad_norm": 9.33621883392334, "learning_rate": 1.6378166419638342e-05, "loss": 0.7986, "step": 9194 }, { "epoch": 30.147540983606557, "grad_norm": 8.352348327636719, "learning_rate": 1.6377348530805114e-05, "loss": 1.24, "step": 9195 }, { "epoch": 30.15081967213115, "grad_norm": 7.081480503082275, "learning_rate": 1.637653057006065e-05, "loss": 1.1163, "step": 9196 }, { "epoch": 30.154098360655738, "grad_norm": 6.438255310058594, "learning_rate": 1.6375712537414178e-05, "loss": 1.1089, "step": 9197 }, { "epoch": 30.15737704918033, "grad_norm": 8.11949348449707, "learning_rate": 1.6374894432874915e-05, "loss": 0.8087, "step": 9198 }, { "epoch": 30.160655737704918, "grad_norm": 6.420104503631592, "learning_rate": 1.637407625645209e-05, "loss": 1.0852, "step": 9199 }, { "epoch": 30.16393442622951, "grad_norm": 9.373772621154785, "learning_rate": 1.6373258008154928e-05, "loss": 1.1179, "step": 9200 }, { "epoch": 30.167213114754098, "grad_norm": 6.766722679138184, "learning_rate": 1.6372439687992658e-05, "loss": 1.3049, "step": 9201 }, { "epoch": 30.17049180327869, "grad_norm": 8.273822784423828, "learning_rate": 1.6371621295974503e-05, "loss": 1.002, "step": 9202 }, { "epoch": 30.17377049180328, "grad_norm": 6.3123979568481445, "learning_rate": 1.6370802832109692e-05, "loss": 1.0558, "step": 9203 }, { "epoch": 30.17704918032787, "grad_norm": 7.3014044761657715, "learning_rate": 1.6369984296407454e-05, "loss": 1.1799, "step": 9204 }, { "epoch": 30.18032786885246, "grad_norm": 7.827673435211182, "learning_rate": 1.6369165688877022e-05, "loss": 0.9236, "step": 9205 }, { "epoch": 30.18360655737705, "grad_norm": 5.838433265686035, "learning_rate": 1.6368347009527626e-05, "loss": 1.1044, "step": 9206 }, { "epoch": 30.18688524590164, "grad_norm": 6.236882209777832, "learning_rate": 1.6367528258368493e-05, "loss": 0.9402, "step": 9207 }, { "epoch": 30.19016393442623, "grad_norm": 7.336239814758301, "learning_rate": 1.636670943540886e-05, "loss": 0.9481, "step": 9208 }, { "epoch": 30.19344262295082, "grad_norm": 9.053478240966797, "learning_rate": 1.6365890540657957e-05, "loss": 1.2732, "step": 9209 }, { "epoch": 30.19672131147541, "grad_norm": 6.87241268157959, "learning_rate": 1.636507157412502e-05, "loss": 0.8889, "step": 9210 }, { "epoch": 30.2, "grad_norm": 9.038525581359863, "learning_rate": 1.6364252535819284e-05, "loss": 1.0041, "step": 9211 }, { "epoch": 30.20327868852459, "grad_norm": 7.931083679199219, "learning_rate": 1.6363433425749984e-05, "loss": 1.1956, "step": 9212 }, { "epoch": 30.20655737704918, "grad_norm": 7.737565040588379, "learning_rate": 1.6362614243926352e-05, "loss": 0.8875, "step": 9213 }, { "epoch": 30.20983606557377, "grad_norm": 8.546980857849121, "learning_rate": 1.6361794990357634e-05, "loss": 0.8385, "step": 9214 }, { "epoch": 30.21311475409836, "grad_norm": 7.633058071136475, "learning_rate": 1.6360975665053058e-05, "loss": 0.8419, "step": 9215 }, { "epoch": 30.21639344262295, "grad_norm": 8.34012222290039, "learning_rate": 1.636015626802187e-05, "loss": 1.106, "step": 9216 }, { "epoch": 30.21967213114754, "grad_norm": 6.675316333770752, "learning_rate": 1.6359336799273306e-05, "loss": 0.8934, "step": 9217 }, { "epoch": 30.222950819672132, "grad_norm": 7.899888515472412, "learning_rate": 1.6358517258816608e-05, "loss": 1.1849, "step": 9218 }, { "epoch": 30.22622950819672, "grad_norm": 9.442893028259277, "learning_rate": 1.6357697646661018e-05, "loss": 1.0315, "step": 9219 }, { "epoch": 30.229508196721312, "grad_norm": 28.8037109375, "learning_rate": 1.6356877962815774e-05, "loss": 1.2543, "step": 9220 }, { "epoch": 30.2327868852459, "grad_norm": 8.95510482788086, "learning_rate": 1.6356058207290127e-05, "loss": 0.8874, "step": 9221 }, { "epoch": 30.236065573770492, "grad_norm": 15.826166152954102, "learning_rate": 1.635523838009331e-05, "loss": 1.1575, "step": 9222 }, { "epoch": 30.23934426229508, "grad_norm": 5.348484516143799, "learning_rate": 1.6354418481234576e-05, "loss": 1.1263, "step": 9223 }, { "epoch": 30.242622950819673, "grad_norm": 5.812054634094238, "learning_rate": 1.6353598510723164e-05, "loss": 1.0013, "step": 9224 }, { "epoch": 30.24590163934426, "grad_norm": 7.287330627441406, "learning_rate": 1.6352778468568323e-05, "loss": 1.0765, "step": 9225 }, { "epoch": 30.249180327868853, "grad_norm": 8.018574714660645, "learning_rate": 1.63519583547793e-05, "loss": 0.6078, "step": 9226 }, { "epoch": 30.25245901639344, "grad_norm": 10.62189769744873, "learning_rate": 1.6351138169365343e-05, "loss": 0.9666, "step": 9227 }, { "epoch": 30.255737704918033, "grad_norm": 7.076178550720215, "learning_rate": 1.6350317912335696e-05, "loss": 0.8491, "step": 9228 }, { "epoch": 30.25901639344262, "grad_norm": 7.5714006423950195, "learning_rate": 1.6349497583699618e-05, "loss": 1.3562, "step": 9229 }, { "epoch": 30.262295081967213, "grad_norm": 9.962309837341309, "learning_rate": 1.6348677183466346e-05, "loss": 0.9315, "step": 9230 }, { "epoch": 30.2655737704918, "grad_norm": 6.814122200012207, "learning_rate": 1.6347856711645142e-05, "loss": 0.8093, "step": 9231 }, { "epoch": 30.268852459016394, "grad_norm": 5.902323246002197, "learning_rate": 1.6347036168245253e-05, "loss": 1.1615, "step": 9232 }, { "epoch": 30.272131147540982, "grad_norm": 8.234557151794434, "learning_rate": 1.634621555327593e-05, "loss": 1.023, "step": 9233 }, { "epoch": 30.275409836065574, "grad_norm": 10.710225105285645, "learning_rate": 1.634539486674643e-05, "loss": 0.9404, "step": 9234 }, { "epoch": 30.278688524590162, "grad_norm": 7.550588130950928, "learning_rate": 1.6344574108666007e-05, "loss": 1.0555, "step": 9235 }, { "epoch": 30.281967213114754, "grad_norm": 6.500617027282715, "learning_rate": 1.634375327904391e-05, "loss": 1.1792, "step": 9236 }, { "epoch": 30.285245901639342, "grad_norm": 7.411678314208984, "learning_rate": 1.6342932377889404e-05, "loss": 0.8766, "step": 9237 }, { "epoch": 30.288524590163934, "grad_norm": 6.826788902282715, "learning_rate": 1.634211140521174e-05, "loss": 0.9966, "step": 9238 }, { "epoch": 30.291803278688526, "grad_norm": 8.574422836303711, "learning_rate": 1.6341290361020172e-05, "loss": 1.1383, "step": 9239 }, { "epoch": 30.295081967213115, "grad_norm": 5.988379001617432, "learning_rate": 1.6340469245323963e-05, "loss": 0.9013, "step": 9240 }, { "epoch": 30.298360655737707, "grad_norm": 7.899716377258301, "learning_rate": 1.6339648058132372e-05, "loss": 0.8866, "step": 9241 }, { "epoch": 30.301639344262295, "grad_norm": 7.405975818634033, "learning_rate": 1.6338826799454657e-05, "loss": 1.0742, "step": 9242 }, { "epoch": 30.304918032786887, "grad_norm": 7.267039775848389, "learning_rate": 1.6338005469300077e-05, "loss": 0.9253, "step": 9243 }, { "epoch": 30.308196721311475, "grad_norm": 5.983361721038818, "learning_rate": 1.6337184067677898e-05, "loss": 0.946, "step": 9244 }, { "epoch": 30.311475409836067, "grad_norm": 6.0389862060546875, "learning_rate": 1.633636259459738e-05, "loss": 1.1713, "step": 9245 }, { "epoch": 30.314754098360655, "grad_norm": 7.964498043060303, "learning_rate": 1.6335541050067784e-05, "loss": 0.868, "step": 9246 }, { "epoch": 30.318032786885247, "grad_norm": 11.108119010925293, "learning_rate": 1.6334719434098375e-05, "loss": 1.0092, "step": 9247 }, { "epoch": 30.321311475409836, "grad_norm": 10.294929504394531, "learning_rate": 1.633389774669842e-05, "loss": 0.991, "step": 9248 }, { "epoch": 30.324590163934428, "grad_norm": 6.998477935791016, "learning_rate": 1.6333075987877182e-05, "loss": 1.0427, "step": 9249 }, { "epoch": 30.327868852459016, "grad_norm": 9.000067710876465, "learning_rate": 1.6332254157643928e-05, "loss": 1.1554, "step": 9250 }, { "epoch": 30.331147540983608, "grad_norm": 6.960764408111572, "learning_rate": 1.633143225600792e-05, "loss": 1.1558, "step": 9251 }, { "epoch": 30.334426229508196, "grad_norm": 14.72045612335205, "learning_rate": 1.6330610282978434e-05, "loss": 0.9419, "step": 9252 }, { "epoch": 30.337704918032788, "grad_norm": 9.449170112609863, "learning_rate": 1.6329788238564734e-05, "loss": 0.7677, "step": 9253 }, { "epoch": 30.340983606557376, "grad_norm": 7.527994155883789, "learning_rate": 1.6328966122776094e-05, "loss": 0.7242, "step": 9254 }, { "epoch": 30.34426229508197, "grad_norm": 6.529874801635742, "learning_rate": 1.6328143935621773e-05, "loss": 0.9101, "step": 9255 }, { "epoch": 30.347540983606557, "grad_norm": 7.227572917938232, "learning_rate": 1.6327321677111053e-05, "loss": 0.9075, "step": 9256 }, { "epoch": 30.35081967213115, "grad_norm": 17.21746253967285, "learning_rate": 1.6326499347253206e-05, "loss": 1.053, "step": 9257 }, { "epoch": 30.354098360655737, "grad_norm": 7.030498504638672, "learning_rate": 1.63256769460575e-05, "loss": 1.095, "step": 9258 }, { "epoch": 30.35737704918033, "grad_norm": 6.665134429931641, "learning_rate": 1.6324854473533204e-05, "loss": 1.1991, "step": 9259 }, { "epoch": 30.360655737704917, "grad_norm": 7.700845718383789, "learning_rate": 1.63240319296896e-05, "loss": 0.937, "step": 9260 }, { "epoch": 30.36393442622951, "grad_norm": 7.8479413986206055, "learning_rate": 1.6323209314535962e-05, "loss": 0.8478, "step": 9261 }, { "epoch": 30.367213114754097, "grad_norm": 19.89590072631836, "learning_rate": 1.6322386628081564e-05, "loss": 1.205, "step": 9262 }, { "epoch": 30.37049180327869, "grad_norm": 7.968702793121338, "learning_rate": 1.6321563870335686e-05, "loss": 1.0159, "step": 9263 }, { "epoch": 30.373770491803278, "grad_norm": 12.18281364440918, "learning_rate": 1.6320741041307598e-05, "loss": 1.0775, "step": 9264 }, { "epoch": 30.37704918032787, "grad_norm": 7.006083965301514, "learning_rate": 1.6319918141006583e-05, "loss": 0.9694, "step": 9265 }, { "epoch": 30.380327868852458, "grad_norm": 6.915237903594971, "learning_rate": 1.631909516944192e-05, "loss": 0.9998, "step": 9266 }, { "epoch": 30.38360655737705, "grad_norm": 8.888355255126953, "learning_rate": 1.631827212662289e-05, "loss": 0.8094, "step": 9267 }, { "epoch": 30.386885245901638, "grad_norm": 7.121788501739502, "learning_rate": 1.6317449012558776e-05, "loss": 0.7532, "step": 9268 }, { "epoch": 30.39016393442623, "grad_norm": 7.5795578956604, "learning_rate": 1.6316625827258852e-05, "loss": 1.1183, "step": 9269 }, { "epoch": 30.39344262295082, "grad_norm": 7.239969253540039, "learning_rate": 1.6315802570732405e-05, "loss": 0.924, "step": 9270 }, { "epoch": 30.39672131147541, "grad_norm": 60.298343658447266, "learning_rate": 1.631497924298872e-05, "loss": 0.8037, "step": 9271 }, { "epoch": 30.4, "grad_norm": 9.49878215789795, "learning_rate": 1.6314155844037074e-05, "loss": 0.9874, "step": 9272 }, { "epoch": 30.40327868852459, "grad_norm": 8.70923900604248, "learning_rate": 1.6313332373886756e-05, "loss": 0.9533, "step": 9273 }, { "epoch": 30.40655737704918, "grad_norm": 7.915031909942627, "learning_rate": 1.6312508832547053e-05, "loss": 0.9581, "step": 9274 }, { "epoch": 30.40983606557377, "grad_norm": 8.095212936401367, "learning_rate": 1.6311685220027248e-05, "loss": 0.9124, "step": 9275 }, { "epoch": 30.41311475409836, "grad_norm": 6.7001543045043945, "learning_rate": 1.631086153633663e-05, "loss": 0.9974, "step": 9276 }, { "epoch": 30.41639344262295, "grad_norm": 7.8096537590026855, "learning_rate": 1.6310037781484485e-05, "loss": 1.0219, "step": 9277 }, { "epoch": 30.41967213114754, "grad_norm": 8.749415397644043, "learning_rate": 1.6309213955480105e-05, "loss": 0.8596, "step": 9278 }, { "epoch": 30.42295081967213, "grad_norm": 9.82463264465332, "learning_rate": 1.6308390058332778e-05, "loss": 0.9476, "step": 9279 }, { "epoch": 30.42622950819672, "grad_norm": 7.798523426055908, "learning_rate": 1.6307566090051793e-05, "loss": 0.9395, "step": 9280 }, { "epoch": 30.42950819672131, "grad_norm": 6.391181468963623, "learning_rate": 1.6306742050646444e-05, "loss": 1.032, "step": 9281 }, { "epoch": 30.432786885245903, "grad_norm": 8.415376663208008, "learning_rate": 1.6305917940126018e-05, "loss": 1.0984, "step": 9282 }, { "epoch": 30.43606557377049, "grad_norm": 7.854973316192627, "learning_rate": 1.6305093758499815e-05, "loss": 0.9391, "step": 9283 }, { "epoch": 30.439344262295084, "grad_norm": 7.600008010864258, "learning_rate": 1.6304269505777123e-05, "loss": 1.1228, "step": 9284 }, { "epoch": 30.442622950819672, "grad_norm": 5.814555644989014, "learning_rate": 1.6303445181967234e-05, "loss": 0.9606, "step": 9285 }, { "epoch": 30.445901639344264, "grad_norm": 7.212067127227783, "learning_rate": 1.6302620787079447e-05, "loss": 0.9354, "step": 9286 }, { "epoch": 30.449180327868852, "grad_norm": 44.19358825683594, "learning_rate": 1.630179632112306e-05, "loss": 0.9828, "step": 9287 }, { "epoch": 30.452459016393444, "grad_norm": 8.668622970581055, "learning_rate": 1.630097178410737e-05, "loss": 0.7911, "step": 9288 }, { "epoch": 30.455737704918032, "grad_norm": 8.965795516967773, "learning_rate": 1.6300147176041668e-05, "loss": 1.1071, "step": 9289 }, { "epoch": 30.459016393442624, "grad_norm": 6.642158508300781, "learning_rate": 1.629932249693526e-05, "loss": 0.9261, "step": 9290 }, { "epoch": 30.462295081967213, "grad_norm": 7.601068496704102, "learning_rate": 1.629849774679743e-05, "loss": 0.7666, "step": 9291 }, { "epoch": 30.465573770491805, "grad_norm": 13.217965126037598, "learning_rate": 1.62976729256375e-05, "loss": 0.948, "step": 9292 }, { "epoch": 30.468852459016393, "grad_norm": 7.5271735191345215, "learning_rate": 1.6296848033464755e-05, "loss": 0.9192, "step": 9293 }, { "epoch": 30.472131147540985, "grad_norm": 7.531671047210693, "learning_rate": 1.6296023070288506e-05, "loss": 1.1619, "step": 9294 }, { "epoch": 30.475409836065573, "grad_norm": 10.48080825805664, "learning_rate": 1.6295198036118045e-05, "loss": 1.1747, "step": 9295 }, { "epoch": 30.478688524590165, "grad_norm": 5.605158805847168, "learning_rate": 1.6294372930962685e-05, "loss": 1.2012, "step": 9296 }, { "epoch": 30.481967213114753, "grad_norm": 9.168269157409668, "learning_rate": 1.629354775483172e-05, "loss": 0.7353, "step": 9297 }, { "epoch": 30.485245901639345, "grad_norm": 18.13176727294922, "learning_rate": 1.629272250773447e-05, "loss": 1.011, "step": 9298 }, { "epoch": 30.488524590163934, "grad_norm": 9.146040916442871, "learning_rate": 1.6291897189680222e-05, "loss": 0.9759, "step": 9299 }, { "epoch": 30.491803278688526, "grad_norm": 6.803792953491211, "learning_rate": 1.6291071800678295e-05, "loss": 0.8999, "step": 9300 }, { "epoch": 30.495081967213114, "grad_norm": 6.441173076629639, "learning_rate": 1.6290246340737995e-05, "loss": 1.1072, "step": 9301 }, { "epoch": 30.498360655737706, "grad_norm": 12.09627628326416, "learning_rate": 1.6289420809868623e-05, "loss": 1.2083, "step": 9302 }, { "epoch": 30.501639344262294, "grad_norm": 7.432031631469727, "learning_rate": 1.6288595208079493e-05, "loss": 0.8397, "step": 9303 }, { "epoch": 30.504918032786886, "grad_norm": 8.579140663146973, "learning_rate": 1.6287769535379916e-05, "loss": 1.0005, "step": 9304 }, { "epoch": 30.508196721311474, "grad_norm": 7.167339324951172, "learning_rate": 1.62869437917792e-05, "loss": 0.9587, "step": 9305 }, { "epoch": 30.511475409836066, "grad_norm": 7.640463829040527, "learning_rate": 1.6286117977286648e-05, "loss": 1.0162, "step": 9306 }, { "epoch": 30.514754098360655, "grad_norm": 8.564327239990234, "learning_rate": 1.6285292091911585e-05, "loss": 1.1298, "step": 9307 }, { "epoch": 30.518032786885247, "grad_norm": 7.24005651473999, "learning_rate": 1.628446613566332e-05, "loss": 1.1016, "step": 9308 }, { "epoch": 30.521311475409835, "grad_norm": 10.681232452392578, "learning_rate": 1.6283640108551166e-05, "loss": 0.8039, "step": 9309 }, { "epoch": 30.524590163934427, "grad_norm": 15.958212852478027, "learning_rate": 1.6282814010584433e-05, "loss": 0.928, "step": 9310 }, { "epoch": 30.527868852459015, "grad_norm": 7.395463943481445, "learning_rate": 1.6281987841772444e-05, "loss": 0.8835, "step": 9311 }, { "epoch": 30.531147540983607, "grad_norm": 10.477158546447754, "learning_rate": 1.6281161602124507e-05, "loss": 1.0767, "step": 9312 }, { "epoch": 30.534426229508195, "grad_norm": 10.044984817504883, "learning_rate": 1.6280335291649944e-05, "loss": 1.1136, "step": 9313 }, { "epoch": 30.537704918032787, "grad_norm": 17.740327835083008, "learning_rate": 1.627950891035807e-05, "loss": 1.2151, "step": 9314 }, { "epoch": 30.540983606557376, "grad_norm": 11.027382850646973, "learning_rate": 1.6278682458258202e-05, "loss": 0.8104, "step": 9315 }, { "epoch": 30.544262295081968, "grad_norm": 7.837738513946533, "learning_rate": 1.627785593535966e-05, "loss": 0.9287, "step": 9316 }, { "epoch": 30.547540983606556, "grad_norm": 7.870811462402344, "learning_rate": 1.6277029341671772e-05, "loss": 0.9417, "step": 9317 }, { "epoch": 30.550819672131148, "grad_norm": 7.77944278717041, "learning_rate": 1.6276202677203845e-05, "loss": 0.76, "step": 9318 }, { "epoch": 30.554098360655736, "grad_norm": 6.990563869476318, "learning_rate": 1.627537594196521e-05, "loss": 0.9619, "step": 9319 }, { "epoch": 30.557377049180328, "grad_norm": 7.205669403076172, "learning_rate": 1.627454913596519e-05, "loss": 0.8827, "step": 9320 }, { "epoch": 30.560655737704916, "grad_norm": 8.864093780517578, "learning_rate": 1.6273722259213095e-05, "loss": 0.8887, "step": 9321 }, { "epoch": 30.56393442622951, "grad_norm": 6.796481132507324, "learning_rate": 1.627289531171827e-05, "loss": 0.8934, "step": 9322 }, { "epoch": 30.567213114754097, "grad_norm": 10.578206062316895, "learning_rate": 1.6272068293490017e-05, "loss": 0.8377, "step": 9323 }, { "epoch": 30.57049180327869, "grad_norm": 10.294682502746582, "learning_rate": 1.627124120453768e-05, "loss": 0.8176, "step": 9324 }, { "epoch": 30.57377049180328, "grad_norm": 7.316860675811768, "learning_rate": 1.6270414044870575e-05, "loss": 0.9742, "step": 9325 }, { "epoch": 30.57704918032787, "grad_norm": 10.304217338562012, "learning_rate": 1.6269586814498035e-05, "loss": 0.8354, "step": 9326 }, { "epoch": 30.58032786885246, "grad_norm": 6.834982872009277, "learning_rate": 1.6268759513429384e-05, "loss": 1.2467, "step": 9327 }, { "epoch": 30.58360655737705, "grad_norm": 7.789874076843262, "learning_rate": 1.6267932141673946e-05, "loss": 0.9536, "step": 9328 }, { "epoch": 30.58688524590164, "grad_norm": 8.109776496887207, "learning_rate": 1.6267104699241066e-05, "loss": 1.0235, "step": 9329 }, { "epoch": 30.59016393442623, "grad_norm": 9.377686500549316, "learning_rate": 1.6266277186140058e-05, "loss": 1.0878, "step": 9330 }, { "epoch": 30.59344262295082, "grad_norm": 7.172817230224609, "learning_rate": 1.626544960238026e-05, "loss": 1.0849, "step": 9331 }, { "epoch": 30.59672131147541, "grad_norm": 6.1501054763793945, "learning_rate": 1.6264621947971e-05, "loss": 1.1877, "step": 9332 }, { "epoch": 30.6, "grad_norm": 6.087054252624512, "learning_rate": 1.626379422292162e-05, "loss": 1.004, "step": 9333 }, { "epoch": 30.60327868852459, "grad_norm": 5.129743576049805, "learning_rate": 1.6262966427241447e-05, "loss": 1.3464, "step": 9334 }, { "epoch": 30.60655737704918, "grad_norm": 7.697075843811035, "learning_rate": 1.6262138560939813e-05, "loss": 0.9883, "step": 9335 }, { "epoch": 30.60983606557377, "grad_norm": 11.03814697265625, "learning_rate": 1.6261310624026056e-05, "loss": 0.7157, "step": 9336 }, { "epoch": 30.613114754098362, "grad_norm": 11.025744438171387, "learning_rate": 1.626048261650951e-05, "loss": 1.0612, "step": 9337 }, { "epoch": 30.61639344262295, "grad_norm": 5.893617153167725, "learning_rate": 1.625965453839952e-05, "loss": 0.9383, "step": 9338 }, { "epoch": 30.619672131147542, "grad_norm": 7.714190483093262, "learning_rate": 1.6258826389705407e-05, "loss": 0.8932, "step": 9339 }, { "epoch": 30.62295081967213, "grad_norm": 6.933393955230713, "learning_rate": 1.6257998170436528e-05, "loss": 1.1893, "step": 9340 }, { "epoch": 30.626229508196722, "grad_norm": 8.200688362121582, "learning_rate": 1.6257169880602207e-05, "loss": 1.0411, "step": 9341 }, { "epoch": 30.62950819672131, "grad_norm": 8.476531028747559, "learning_rate": 1.625634152021179e-05, "loss": 1.0112, "step": 9342 }, { "epoch": 30.632786885245903, "grad_norm": 9.626702308654785, "learning_rate": 1.6255513089274622e-05, "loss": 0.8883, "step": 9343 }, { "epoch": 30.63606557377049, "grad_norm": 8.166207313537598, "learning_rate": 1.6254684587800037e-05, "loss": 0.8181, "step": 9344 }, { "epoch": 30.639344262295083, "grad_norm": 6.013576030731201, "learning_rate": 1.625385601579738e-05, "loss": 1.0643, "step": 9345 }, { "epoch": 30.64262295081967, "grad_norm": 7.995948314666748, "learning_rate": 1.6253027373275994e-05, "loss": 0.7843, "step": 9346 }, { "epoch": 30.645901639344263, "grad_norm": 6.751413822174072, "learning_rate": 1.6252198660245223e-05, "loss": 0.8965, "step": 9347 }, { "epoch": 30.64918032786885, "grad_norm": 5.551761150360107, "learning_rate": 1.625136987671441e-05, "loss": 1.1112, "step": 9348 }, { "epoch": 30.652459016393443, "grad_norm": 7.45015287399292, "learning_rate": 1.6250541022692902e-05, "loss": 0.9895, "step": 9349 }, { "epoch": 30.65573770491803, "grad_norm": 11.818212509155273, "learning_rate": 1.624971209819005e-05, "loss": 0.9281, "step": 9350 }, { "epoch": 30.659016393442624, "grad_norm": 6.392435073852539, "learning_rate": 1.624888310321519e-05, "loss": 0.9376, "step": 9351 }, { "epoch": 30.662295081967212, "grad_norm": 6.602216720581055, "learning_rate": 1.6248054037777678e-05, "loss": 0.9802, "step": 9352 }, { "epoch": 30.665573770491804, "grad_norm": 9.059014320373535, "learning_rate": 1.624722490188686e-05, "loss": 0.8254, "step": 9353 }, { "epoch": 30.668852459016392, "grad_norm": 19.597654342651367, "learning_rate": 1.6246395695552086e-05, "loss": 0.7945, "step": 9354 }, { "epoch": 30.672131147540984, "grad_norm": 7.89304780960083, "learning_rate": 1.6245566418782707e-05, "loss": 0.9799, "step": 9355 }, { "epoch": 30.675409836065572, "grad_norm": 7.197523593902588, "learning_rate": 1.6244737071588072e-05, "loss": 0.9269, "step": 9356 }, { "epoch": 30.678688524590164, "grad_norm": 6.049012660980225, "learning_rate": 1.6243907653977535e-05, "loss": 0.952, "step": 9357 }, { "epoch": 30.681967213114753, "grad_norm": 15.126212120056152, "learning_rate": 1.6243078165960443e-05, "loss": 0.9405, "step": 9358 }, { "epoch": 30.685245901639345, "grad_norm": 6.189218997955322, "learning_rate": 1.6242248607546155e-05, "loss": 1.063, "step": 9359 }, { "epoch": 30.688524590163933, "grad_norm": 5.115277290344238, "learning_rate": 1.6241418978744025e-05, "loss": 1.4194, "step": 9360 }, { "epoch": 30.691803278688525, "grad_norm": 5.527192115783691, "learning_rate": 1.624058927956341e-05, "loss": 1.2585, "step": 9361 }, { "epoch": 30.695081967213113, "grad_norm": 9.627497673034668, "learning_rate": 1.6239759510013657e-05, "loss": 0.9979, "step": 9362 }, { "epoch": 30.698360655737705, "grad_norm": 5.543900966644287, "learning_rate": 1.623892967010413e-05, "loss": 1.1299, "step": 9363 }, { "epoch": 30.701639344262293, "grad_norm": 7.270673751831055, "learning_rate": 1.623809975984418e-05, "loss": 0.9713, "step": 9364 }, { "epoch": 30.704918032786885, "grad_norm": 7.407702922821045, "learning_rate": 1.6237269779243176e-05, "loss": 0.8903, "step": 9365 }, { "epoch": 30.708196721311474, "grad_norm": 6.043283462524414, "learning_rate": 1.6236439728310467e-05, "loss": 1.2588, "step": 9366 }, { "epoch": 30.711475409836066, "grad_norm": 7.36230993270874, "learning_rate": 1.6235609607055414e-05, "loss": 0.8274, "step": 9367 }, { "epoch": 30.714754098360658, "grad_norm": 6.8911590576171875, "learning_rate": 1.6234779415487382e-05, "loss": 1.1315, "step": 9368 }, { "epoch": 30.718032786885246, "grad_norm": 6.315779685974121, "learning_rate": 1.6233949153615728e-05, "loss": 1.0323, "step": 9369 }, { "epoch": 30.721311475409838, "grad_norm": 7.257360458374023, "learning_rate": 1.623311882144982e-05, "loss": 0.8964, "step": 9370 }, { "epoch": 30.724590163934426, "grad_norm": 6.377293586730957, "learning_rate": 1.623228841899901e-05, "loss": 1.1606, "step": 9371 }, { "epoch": 30.727868852459018, "grad_norm": 8.661441802978516, "learning_rate": 1.623145794627267e-05, "loss": 0.8328, "step": 9372 }, { "epoch": 30.731147540983606, "grad_norm": 6.234484672546387, "learning_rate": 1.6230627403280163e-05, "loss": 1.1827, "step": 9373 }, { "epoch": 30.7344262295082, "grad_norm": 6.04746675491333, "learning_rate": 1.6229796790030853e-05, "loss": 1.021, "step": 9374 }, { "epoch": 30.737704918032787, "grad_norm": 6.917830467224121, "learning_rate": 1.622896610653411e-05, "loss": 0.8884, "step": 9375 }, { "epoch": 30.74098360655738, "grad_norm": 6.811099052429199, "learning_rate": 1.6228135352799296e-05, "loss": 1.0363, "step": 9376 }, { "epoch": 30.744262295081967, "grad_norm": 7.415525436401367, "learning_rate": 1.6227304528835775e-05, "loss": 1.0636, "step": 9377 }, { "epoch": 30.74754098360656, "grad_norm": 8.514575958251953, "learning_rate": 1.6226473634652928e-05, "loss": 1.0046, "step": 9378 }, { "epoch": 30.750819672131147, "grad_norm": 7.4415178298950195, "learning_rate": 1.6225642670260116e-05, "loss": 0.9092, "step": 9379 }, { "epoch": 30.75409836065574, "grad_norm": 7.412233829498291, "learning_rate": 1.6224811635666705e-05, "loss": 0.9094, "step": 9380 }, { "epoch": 30.757377049180327, "grad_norm": 6.11525821685791, "learning_rate": 1.622398053088208e-05, "loss": 0.9259, "step": 9381 }, { "epoch": 30.76065573770492, "grad_norm": 7.7668938636779785, "learning_rate": 1.6223149355915592e-05, "loss": 1.1246, "step": 9382 }, { "epoch": 30.763934426229508, "grad_norm": 6.677982807159424, "learning_rate": 1.622231811077663e-05, "loss": 1.0727, "step": 9383 }, { "epoch": 30.7672131147541, "grad_norm": 11.46472454071045, "learning_rate": 1.6221486795474562e-05, "loss": 0.8813, "step": 9384 }, { "epoch": 30.770491803278688, "grad_norm": 6.7152323722839355, "learning_rate": 1.622065541001876e-05, "loss": 1.0724, "step": 9385 }, { "epoch": 30.77377049180328, "grad_norm": 5.987711429595947, "learning_rate": 1.62198239544186e-05, "loss": 1.0604, "step": 9386 }, { "epoch": 30.777049180327868, "grad_norm": 8.292247772216797, "learning_rate": 1.621899242868346e-05, "loss": 1.2916, "step": 9387 }, { "epoch": 30.78032786885246, "grad_norm": 11.312928199768066, "learning_rate": 1.6218160832822713e-05, "loss": 0.937, "step": 9388 }, { "epoch": 30.78360655737705, "grad_norm": 6.353515148162842, "learning_rate": 1.621732916684574e-05, "loss": 1.0494, "step": 9389 }, { "epoch": 30.78688524590164, "grad_norm": 6.649284362792969, "learning_rate": 1.6216497430761914e-05, "loss": 1.0015, "step": 9390 }, { "epoch": 30.79016393442623, "grad_norm": 6.017862319946289, "learning_rate": 1.6215665624580617e-05, "loss": 1.111, "step": 9391 }, { "epoch": 30.79344262295082, "grad_norm": 15.992745399475098, "learning_rate": 1.6214833748311226e-05, "loss": 0.9631, "step": 9392 }, { "epoch": 30.79672131147541, "grad_norm": 6.624209880828857, "learning_rate": 1.621400180196312e-05, "loss": 1.1079, "step": 9393 }, { "epoch": 30.8, "grad_norm": 6.7974019050598145, "learning_rate": 1.6213169785545688e-05, "loss": 1.1344, "step": 9394 }, { "epoch": 30.80327868852459, "grad_norm": 23.309326171875, "learning_rate": 1.6212337699068304e-05, "loss": 0.8052, "step": 9395 }, { "epoch": 30.80655737704918, "grad_norm": 5.220474720001221, "learning_rate": 1.6211505542540353e-05, "loss": 1.1648, "step": 9396 }, { "epoch": 30.80983606557377, "grad_norm": 11.258283615112305, "learning_rate": 1.6210673315971218e-05, "loss": 1.0399, "step": 9397 }, { "epoch": 30.81311475409836, "grad_norm": 7.247075080871582, "learning_rate": 1.6209841019370286e-05, "loss": 0.9882, "step": 9398 }, { "epoch": 30.81639344262295, "grad_norm": 7.977625370025635, "learning_rate": 1.620900865274694e-05, "loss": 1.0586, "step": 9399 }, { "epoch": 30.81967213114754, "grad_norm": 7.368559837341309, "learning_rate": 1.6208176216110566e-05, "loss": 1.2465, "step": 9400 }, { "epoch": 30.82295081967213, "grad_norm": 8.785700798034668, "learning_rate": 1.6207343709470545e-05, "loss": 0.9219, "step": 9401 }, { "epoch": 30.82622950819672, "grad_norm": 5.296710968017578, "learning_rate": 1.6206511132836276e-05, "loss": 1.1407, "step": 9402 }, { "epoch": 30.82950819672131, "grad_norm": 6.278173923492432, "learning_rate": 1.6205678486217142e-05, "loss": 1.0303, "step": 9403 }, { "epoch": 30.832786885245902, "grad_norm": 7.446387767791748, "learning_rate": 1.6204845769622526e-05, "loss": 1.2336, "step": 9404 }, { "epoch": 30.83606557377049, "grad_norm": 8.922503471374512, "learning_rate": 1.6204012983061823e-05, "loss": 1.0636, "step": 9405 }, { "epoch": 30.839344262295082, "grad_norm": 6.818061828613281, "learning_rate": 1.620318012654442e-05, "loss": 1.1322, "step": 9406 }, { "epoch": 30.84262295081967, "grad_norm": 9.609770774841309, "learning_rate": 1.620234720007972e-05, "loss": 0.7498, "step": 9407 }, { "epoch": 30.845901639344262, "grad_norm": 6.021172523498535, "learning_rate": 1.62015142036771e-05, "loss": 1.08, "step": 9408 }, { "epoch": 30.84918032786885, "grad_norm": 8.126900672912598, "learning_rate": 1.6200681137345962e-05, "loss": 1.0144, "step": 9409 }, { "epoch": 30.852459016393443, "grad_norm": 6.652210235595703, "learning_rate": 1.6199848001095696e-05, "loss": 0.8964, "step": 9410 }, { "epoch": 30.855737704918035, "grad_norm": 7.318122863769531, "learning_rate": 1.6199014794935698e-05, "loss": 1.1948, "step": 9411 }, { "epoch": 30.859016393442623, "grad_norm": 9.936141014099121, "learning_rate": 1.6198181518875363e-05, "loss": 1.0533, "step": 9412 }, { "epoch": 30.862295081967215, "grad_norm": 7.623866558074951, "learning_rate": 1.6197348172924086e-05, "loss": 0.9326, "step": 9413 }, { "epoch": 30.865573770491803, "grad_norm": 7.213803291320801, "learning_rate": 1.6196514757091263e-05, "loss": 0.9334, "step": 9414 }, { "epoch": 30.868852459016395, "grad_norm": 7.874694347381592, "learning_rate": 1.6195681271386296e-05, "loss": 0.6929, "step": 9415 }, { "epoch": 30.872131147540983, "grad_norm": 7.471795558929443, "learning_rate": 1.6194847715818584e-05, "loss": 0.8741, "step": 9416 }, { "epoch": 30.875409836065575, "grad_norm": 10.012353897094727, "learning_rate": 1.6194014090397517e-05, "loss": 1.0677, "step": 9417 }, { "epoch": 30.878688524590164, "grad_norm": 6.958166122436523, "learning_rate": 1.6193180395132503e-05, "loss": 0.7592, "step": 9418 }, { "epoch": 30.881967213114756, "grad_norm": 8.658551216125488, "learning_rate": 1.6192346630032942e-05, "loss": 1.012, "step": 9419 }, { "epoch": 30.885245901639344, "grad_norm": 6.933803558349609, "learning_rate": 1.6191512795108234e-05, "loss": 1.002, "step": 9420 }, { "epoch": 30.888524590163936, "grad_norm": 7.897582054138184, "learning_rate": 1.619067889036778e-05, "loss": 1.0333, "step": 9421 }, { "epoch": 30.891803278688524, "grad_norm": 6.176794052124023, "learning_rate": 1.6189844915820987e-05, "loss": 1.049, "step": 9422 }, { "epoch": 30.895081967213116, "grad_norm": 9.466744422912598, "learning_rate": 1.6189010871477252e-05, "loss": 0.9948, "step": 9423 }, { "epoch": 30.898360655737704, "grad_norm": 6.5195512771606445, "learning_rate": 1.618817675734599e-05, "loss": 1.0713, "step": 9424 }, { "epoch": 30.901639344262296, "grad_norm": 7.352208614349365, "learning_rate": 1.61873425734366e-05, "loss": 0.7745, "step": 9425 }, { "epoch": 30.904918032786885, "grad_norm": 9.347522735595703, "learning_rate": 1.618650831975849e-05, "loss": 0.8855, "step": 9426 }, { "epoch": 30.908196721311477, "grad_norm": 6.905290603637695, "learning_rate": 1.6185673996321064e-05, "loss": 1.1357, "step": 9427 }, { "epoch": 30.911475409836065, "grad_norm": 8.802523612976074, "learning_rate": 1.618483960313373e-05, "loss": 1.0652, "step": 9428 }, { "epoch": 30.914754098360657, "grad_norm": 6.327103137969971, "learning_rate": 1.6184005140205904e-05, "loss": 0.9643, "step": 9429 }, { "epoch": 30.918032786885245, "grad_norm": 9.515192031860352, "learning_rate": 1.6183170607546988e-05, "loss": 0.7654, "step": 9430 }, { "epoch": 30.921311475409837, "grad_norm": 10.24642276763916, "learning_rate": 1.6182336005166394e-05, "loss": 1.062, "step": 9431 }, { "epoch": 30.924590163934425, "grad_norm": 8.338976860046387, "learning_rate": 1.618150133307353e-05, "loss": 0.981, "step": 9432 }, { "epoch": 30.927868852459017, "grad_norm": 12.38966178894043, "learning_rate": 1.618066659127782e-05, "loss": 0.9156, "step": 9433 }, { "epoch": 30.931147540983606, "grad_norm": 7.0384087562561035, "learning_rate": 1.617983177978866e-05, "loss": 0.9457, "step": 9434 }, { "epoch": 30.934426229508198, "grad_norm": 7.109312534332275, "learning_rate": 1.6178996898615476e-05, "loss": 0.9445, "step": 9435 }, { "epoch": 30.937704918032786, "grad_norm": 11.250711441040039, "learning_rate": 1.6178161947767676e-05, "loss": 1.088, "step": 9436 }, { "epoch": 30.940983606557378, "grad_norm": 6.880568027496338, "learning_rate": 1.6177326927254678e-05, "loss": 1.1193, "step": 9437 }, { "epoch": 30.944262295081966, "grad_norm": 6.194268703460693, "learning_rate": 1.6176491837085896e-05, "loss": 0.9048, "step": 9438 }, { "epoch": 30.947540983606558, "grad_norm": 15.279450416564941, "learning_rate": 1.617565667727074e-05, "loss": 0.9896, "step": 9439 }, { "epoch": 30.950819672131146, "grad_norm": 7.470844745635986, "learning_rate": 1.6174821447818642e-05, "loss": 1.0515, "step": 9440 }, { "epoch": 30.95409836065574, "grad_norm": 10.56967544555664, "learning_rate": 1.617398614873901e-05, "loss": 1.0349, "step": 9441 }, { "epoch": 30.957377049180327, "grad_norm": 7.87727689743042, "learning_rate": 1.6173150780041263e-05, "loss": 1.1287, "step": 9442 }, { "epoch": 30.96065573770492, "grad_norm": 5.437452793121338, "learning_rate": 1.6172315341734825e-05, "loss": 1.0007, "step": 9443 }, { "epoch": 30.963934426229507, "grad_norm": 6.99072790145874, "learning_rate": 1.6171479833829108e-05, "loss": 1.2749, "step": 9444 }, { "epoch": 30.9672131147541, "grad_norm": 7.623429298400879, "learning_rate": 1.6170644256333547e-05, "loss": 1.1368, "step": 9445 }, { "epoch": 30.970491803278687, "grad_norm": 7.947430610656738, "learning_rate": 1.6169808609257552e-05, "loss": 0.8721, "step": 9446 }, { "epoch": 30.97377049180328, "grad_norm": 8.43157958984375, "learning_rate": 1.6168972892610547e-05, "loss": 1.0979, "step": 9447 }, { "epoch": 30.977049180327867, "grad_norm": 7.370020389556885, "learning_rate": 1.616813710640196e-05, "loss": 1.0011, "step": 9448 }, { "epoch": 30.98032786885246, "grad_norm": 7.432772159576416, "learning_rate": 1.616730125064122e-05, "loss": 1.0329, "step": 9449 }, { "epoch": 30.983606557377048, "grad_norm": 8.243924140930176, "learning_rate": 1.616646532533774e-05, "loss": 1.229, "step": 9450 }, { "epoch": 30.98688524590164, "grad_norm": 7.448024272918701, "learning_rate": 1.6165629330500952e-05, "loss": 0.8799, "step": 9451 }, { "epoch": 30.990163934426228, "grad_norm": 7.8534255027771, "learning_rate": 1.6164793266140285e-05, "loss": 0.6866, "step": 9452 }, { "epoch": 30.99344262295082, "grad_norm": 9.207056045532227, "learning_rate": 1.6163957132265166e-05, "loss": 0.9568, "step": 9453 }, { "epoch": 30.99672131147541, "grad_norm": 7.0639519691467285, "learning_rate": 1.6163120928885016e-05, "loss": 0.9017, "step": 9454 }, { "epoch": 31.0, "grad_norm": 7.593207836151123, "learning_rate": 1.6162284656009276e-05, "loss": 1.0309, "step": 9455 }, { "epoch": 31.003278688524592, "grad_norm": 6.583510875701904, "learning_rate": 1.6161448313647365e-05, "loss": 0.9749, "step": 9456 }, { "epoch": 31.00655737704918, "grad_norm": 6.662770748138428, "learning_rate": 1.6160611901808717e-05, "loss": 0.778, "step": 9457 }, { "epoch": 31.009836065573772, "grad_norm": 7.207027912139893, "learning_rate": 1.6159775420502767e-05, "loss": 0.9316, "step": 9458 }, { "epoch": 31.01311475409836, "grad_norm": 6.305034160614014, "learning_rate": 1.6158938869738942e-05, "loss": 0.9585, "step": 9459 }, { "epoch": 31.016393442622952, "grad_norm": 6.232143402099609, "learning_rate": 1.615810224952668e-05, "loss": 1.0546, "step": 9460 }, { "epoch": 31.01967213114754, "grad_norm": 14.927643775939941, "learning_rate": 1.615726555987541e-05, "loss": 0.7766, "step": 9461 }, { "epoch": 31.022950819672133, "grad_norm": 8.34843921661377, "learning_rate": 1.6156428800794574e-05, "loss": 0.7682, "step": 9462 }, { "epoch": 31.02622950819672, "grad_norm": 6.480865001678467, "learning_rate": 1.61555919722936e-05, "loss": 1.0229, "step": 9463 }, { "epoch": 31.029508196721313, "grad_norm": 6.233292102813721, "learning_rate": 1.615475507438192e-05, "loss": 0.7468, "step": 9464 }, { "epoch": 31.0327868852459, "grad_norm": 6.301235198974609, "learning_rate": 1.6153918107068983e-05, "loss": 0.7849, "step": 9465 }, { "epoch": 31.036065573770493, "grad_norm": 6.792318820953369, "learning_rate": 1.615308107036422e-05, "loss": 1.1579, "step": 9466 }, { "epoch": 31.03934426229508, "grad_norm": 6.619289398193359, "learning_rate": 1.615224396427707e-05, "loss": 1.0227, "step": 9467 }, { "epoch": 31.042622950819673, "grad_norm": 7.76638650894165, "learning_rate": 1.6151406788816975e-05, "loss": 0.7001, "step": 9468 }, { "epoch": 31.04590163934426, "grad_norm": 5.99716854095459, "learning_rate": 1.6150569543993367e-05, "loss": 1.0784, "step": 9469 }, { "epoch": 31.049180327868854, "grad_norm": 9.571444511413574, "learning_rate": 1.6149732229815698e-05, "loss": 1.0071, "step": 9470 }, { "epoch": 31.052459016393442, "grad_norm": 6.587896347045898, "learning_rate": 1.61488948462934e-05, "loss": 0.6113, "step": 9471 }, { "epoch": 31.055737704918034, "grad_norm": 6.872680187225342, "learning_rate": 1.6148057393435922e-05, "loss": 1.0299, "step": 9472 }, { "epoch": 31.059016393442622, "grad_norm": 6.935592174530029, "learning_rate": 1.6147219871252705e-05, "loss": 0.7878, "step": 9473 }, { "epoch": 31.062295081967214, "grad_norm": 5.468573093414307, "learning_rate": 1.614638227975319e-05, "loss": 0.814, "step": 9474 }, { "epoch": 31.065573770491802, "grad_norm": 6.6004557609558105, "learning_rate": 1.6145544618946826e-05, "loss": 1.0177, "step": 9475 }, { "epoch": 31.068852459016394, "grad_norm": 17.318988800048828, "learning_rate": 1.6144706888843057e-05, "loss": 0.884, "step": 9476 }, { "epoch": 31.072131147540983, "grad_norm": 8.451966285705566, "learning_rate": 1.6143869089451326e-05, "loss": 0.5807, "step": 9477 }, { "epoch": 31.075409836065575, "grad_norm": 6.026425838470459, "learning_rate": 1.614303122078109e-05, "loss": 1.0433, "step": 9478 }, { "epoch": 31.078688524590163, "grad_norm": 7.613195419311523, "learning_rate": 1.614219328284178e-05, "loss": 0.7798, "step": 9479 }, { "epoch": 31.081967213114755, "grad_norm": 5.706394195556641, "learning_rate": 1.614135527564286e-05, "loss": 0.757, "step": 9480 }, { "epoch": 31.085245901639343, "grad_norm": 8.732918739318848, "learning_rate": 1.6140517199193776e-05, "loss": 0.6672, "step": 9481 }, { "epoch": 31.088524590163935, "grad_norm": 8.763833999633789, "learning_rate": 1.613967905350397e-05, "loss": 0.9282, "step": 9482 }, { "epoch": 31.091803278688523, "grad_norm": 13.106245040893555, "learning_rate": 1.6138840838582904e-05, "loss": 0.7421, "step": 9483 }, { "epoch": 31.095081967213115, "grad_norm": 6.450795650482178, "learning_rate": 1.613800255444002e-05, "loss": 1.0643, "step": 9484 }, { "epoch": 31.098360655737704, "grad_norm": 7.636554718017578, "learning_rate": 1.613716420108478e-05, "loss": 1.1402, "step": 9485 }, { "epoch": 31.101639344262296, "grad_norm": 6.503547668457031, "learning_rate": 1.613632577852663e-05, "loss": 0.7535, "step": 9486 }, { "epoch": 31.104918032786884, "grad_norm": 8.81037712097168, "learning_rate": 1.6135487286775028e-05, "loss": 1.0007, "step": 9487 }, { "epoch": 31.108196721311476, "grad_norm": 9.544503211975098, "learning_rate": 1.6134648725839427e-05, "loss": 0.965, "step": 9488 }, { "epoch": 31.111475409836064, "grad_norm": 7.220677375793457, "learning_rate": 1.6133810095729284e-05, "loss": 0.9988, "step": 9489 }, { "epoch": 31.114754098360656, "grad_norm": 7.009112358093262, "learning_rate": 1.6132971396454052e-05, "loss": 1.0618, "step": 9490 }, { "epoch": 31.118032786885244, "grad_norm": 7.519686698913574, "learning_rate": 1.6132132628023192e-05, "loss": 0.9308, "step": 9491 }, { "epoch": 31.121311475409836, "grad_norm": 7.515139579772949, "learning_rate": 1.6131293790446162e-05, "loss": 1.0211, "step": 9492 }, { "epoch": 31.124590163934425, "grad_norm": 8.251132011413574, "learning_rate": 1.6130454883732417e-05, "loss": 1.1368, "step": 9493 }, { "epoch": 31.127868852459017, "grad_norm": 6.486266136169434, "learning_rate": 1.6129615907891424e-05, "loss": 0.7414, "step": 9494 }, { "epoch": 31.131147540983605, "grad_norm": 6.121373653411865, "learning_rate": 1.6128776862932635e-05, "loss": 1.1012, "step": 9495 }, { "epoch": 31.134426229508197, "grad_norm": 6.82420015335083, "learning_rate": 1.6127937748865515e-05, "loss": 1.1469, "step": 9496 }, { "epoch": 31.137704918032785, "grad_norm": 8.118711471557617, "learning_rate": 1.6127098565699524e-05, "loss": 0.9765, "step": 9497 }, { "epoch": 31.140983606557377, "grad_norm": 6.7804036140441895, "learning_rate": 1.612625931344413e-05, "loss": 1.1382, "step": 9498 }, { "epoch": 31.14426229508197, "grad_norm": 7.917726993560791, "learning_rate": 1.6125419992108788e-05, "loss": 0.7874, "step": 9499 }, { "epoch": 31.147540983606557, "grad_norm": 10.016959190368652, "learning_rate": 1.612458060170297e-05, "loss": 1.0247, "step": 9500 }, { "epoch": 31.15081967213115, "grad_norm": 6.453327178955078, "learning_rate": 1.6123741142236132e-05, "loss": 1.1333, "step": 9501 }, { "epoch": 31.154098360655738, "grad_norm": 6.692563533782959, "learning_rate": 1.6122901613717753e-05, "loss": 0.9283, "step": 9502 }, { "epoch": 31.15737704918033, "grad_norm": 11.075929641723633, "learning_rate": 1.6122062016157288e-05, "loss": 0.916, "step": 9503 }, { "epoch": 31.160655737704918, "grad_norm": 8.798147201538086, "learning_rate": 1.612122234956421e-05, "loss": 0.8812, "step": 9504 }, { "epoch": 31.16393442622951, "grad_norm": 7.473660469055176, "learning_rate": 1.6120382613947986e-05, "loss": 1.0942, "step": 9505 }, { "epoch": 31.167213114754098, "grad_norm": 7.665369033813477, "learning_rate": 1.6119542809318082e-05, "loss": 0.9998, "step": 9506 }, { "epoch": 31.17049180327869, "grad_norm": 7.203730583190918, "learning_rate": 1.6118702935683975e-05, "loss": 0.9539, "step": 9507 }, { "epoch": 31.17377049180328, "grad_norm": 5.8989081382751465, "learning_rate": 1.6117862993055125e-05, "loss": 1.0676, "step": 9508 }, { "epoch": 31.17704918032787, "grad_norm": 8.278525352478027, "learning_rate": 1.611702298144101e-05, "loss": 0.9072, "step": 9509 }, { "epoch": 31.18032786885246, "grad_norm": 5.6460652351379395, "learning_rate": 1.6116182900851104e-05, "loss": 1.2485, "step": 9510 }, { "epoch": 31.18360655737705, "grad_norm": 5.189024448394775, "learning_rate": 1.6115342751294873e-05, "loss": 1.1581, "step": 9511 }, { "epoch": 31.18688524590164, "grad_norm": 5.079197883605957, "learning_rate": 1.6114502532781794e-05, "loss": 1.0929, "step": 9512 }, { "epoch": 31.19016393442623, "grad_norm": 5.518466472625732, "learning_rate": 1.6113662245321342e-05, "loss": 1.0814, "step": 9513 }, { "epoch": 31.19344262295082, "grad_norm": 8.496190071105957, "learning_rate": 1.6112821888922992e-05, "loss": 0.9138, "step": 9514 }, { "epoch": 31.19672131147541, "grad_norm": 5.751200199127197, "learning_rate": 1.611198146359622e-05, "loss": 0.97, "step": 9515 }, { "epoch": 31.2, "grad_norm": 7.1508684158325195, "learning_rate": 1.6111140969350504e-05, "loss": 0.9849, "step": 9516 }, { "epoch": 31.20327868852459, "grad_norm": 5.619915962219238, "learning_rate": 1.6110300406195318e-05, "loss": 1.0913, "step": 9517 }, { "epoch": 31.20655737704918, "grad_norm": 6.725955009460449, "learning_rate": 1.6109459774140138e-05, "loss": 1.0837, "step": 9518 }, { "epoch": 31.20983606557377, "grad_norm": 5.819117546081543, "learning_rate": 1.6108619073194454e-05, "loss": 0.8509, "step": 9519 }, { "epoch": 31.21311475409836, "grad_norm": 8.475717544555664, "learning_rate": 1.6107778303367735e-05, "loss": 0.9572, "step": 9520 }, { "epoch": 31.21639344262295, "grad_norm": 7.723242282867432, "learning_rate": 1.6106937464669462e-05, "loss": 1.0234, "step": 9521 }, { "epoch": 31.21967213114754, "grad_norm": 7.613770484924316, "learning_rate": 1.6106096557109125e-05, "loss": 1.0819, "step": 9522 }, { "epoch": 31.222950819672132, "grad_norm": 6.78794527053833, "learning_rate": 1.6105255580696197e-05, "loss": 0.9727, "step": 9523 }, { "epoch": 31.22622950819672, "grad_norm": 9.753938674926758, "learning_rate": 1.6104414535440164e-05, "loss": 1.11, "step": 9524 }, { "epoch": 31.229508196721312, "grad_norm": 7.775892734527588, "learning_rate": 1.610357342135051e-05, "loss": 0.7872, "step": 9525 }, { "epoch": 31.2327868852459, "grad_norm": 8.212749481201172, "learning_rate": 1.6102732238436724e-05, "loss": 1.0476, "step": 9526 }, { "epoch": 31.236065573770492, "grad_norm": 5.363457679748535, "learning_rate": 1.6101890986708282e-05, "loss": 0.9572, "step": 9527 }, { "epoch": 31.23934426229508, "grad_norm": 6.9795708656311035, "learning_rate": 1.6101049666174677e-05, "loss": 1.4058, "step": 9528 }, { "epoch": 31.242622950819673, "grad_norm": 10.375450134277344, "learning_rate": 1.6100208276845394e-05, "loss": 0.8231, "step": 9529 }, { "epoch": 31.24590163934426, "grad_norm": 6.284091472625732, "learning_rate": 1.6099366818729918e-05, "loss": 1.1248, "step": 9530 }, { "epoch": 31.249180327868853, "grad_norm": 5.976312160491943, "learning_rate": 1.6098525291837738e-05, "loss": 1.0136, "step": 9531 }, { "epoch": 31.25245901639344, "grad_norm": 7.6934919357299805, "learning_rate": 1.609768369617835e-05, "loss": 0.9505, "step": 9532 }, { "epoch": 31.255737704918033, "grad_norm": 6.3896331787109375, "learning_rate": 1.6096842031761235e-05, "loss": 0.7033, "step": 9533 }, { "epoch": 31.25901639344262, "grad_norm": 6.959906101226807, "learning_rate": 1.6096000298595885e-05, "loss": 0.9414, "step": 9534 }, { "epoch": 31.262295081967213, "grad_norm": 40.720401763916016, "learning_rate": 1.6095158496691795e-05, "loss": 0.8312, "step": 9535 }, { "epoch": 31.2655737704918, "grad_norm": 8.116585731506348, "learning_rate": 1.6094316626058456e-05, "loss": 0.9767, "step": 9536 }, { "epoch": 31.268852459016394, "grad_norm": 6.016963481903076, "learning_rate": 1.6093474686705365e-05, "loss": 0.8159, "step": 9537 }, { "epoch": 31.272131147540982, "grad_norm": 11.290014266967773, "learning_rate": 1.6092632678642004e-05, "loss": 1.0064, "step": 9538 }, { "epoch": 31.275409836065574, "grad_norm": 8.612733840942383, "learning_rate": 1.609179060187788e-05, "loss": 1.0649, "step": 9539 }, { "epoch": 31.278688524590162, "grad_norm": 5.745126247406006, "learning_rate": 1.6090948456422477e-05, "loss": 0.8985, "step": 9540 }, { "epoch": 31.281967213114754, "grad_norm": 7.4009013175964355, "learning_rate": 1.6090106242285304e-05, "loss": 1.0243, "step": 9541 }, { "epoch": 31.285245901639342, "grad_norm": 6.5889763832092285, "learning_rate": 1.6089263959475847e-05, "loss": 1.0175, "step": 9542 }, { "epoch": 31.288524590163934, "grad_norm": 7.623071670532227, "learning_rate": 1.6088421608003608e-05, "loss": 1.1003, "step": 9543 }, { "epoch": 31.291803278688526, "grad_norm": 8.65993595123291, "learning_rate": 1.6087579187878085e-05, "loss": 1.0532, "step": 9544 }, { "epoch": 31.295081967213115, "grad_norm": 8.520206451416016, "learning_rate": 1.6086736699108782e-05, "loss": 1.0588, "step": 9545 }, { "epoch": 31.298360655737707, "grad_norm": 5.824670791625977, "learning_rate": 1.6085894141705188e-05, "loss": 1.3405, "step": 9546 }, { "epoch": 31.301639344262295, "grad_norm": 6.5522565841674805, "learning_rate": 1.608505151567681e-05, "loss": 1.1621, "step": 9547 }, { "epoch": 31.304918032786887, "grad_norm": 6.262831687927246, "learning_rate": 1.6084208821033152e-05, "loss": 1.2341, "step": 9548 }, { "epoch": 31.308196721311475, "grad_norm": 6.477989196777344, "learning_rate": 1.6083366057783713e-05, "loss": 0.9922, "step": 9549 }, { "epoch": 31.311475409836067, "grad_norm": 6.023189544677734, "learning_rate": 1.6082523225937995e-05, "loss": 0.8839, "step": 9550 }, { "epoch": 31.314754098360655, "grad_norm": 7.277851581573486, "learning_rate": 1.6081680325505503e-05, "loss": 0.76, "step": 9551 }, { "epoch": 31.318032786885247, "grad_norm": 6.6693620681762695, "learning_rate": 1.6080837356495745e-05, "loss": 0.9212, "step": 9552 }, { "epoch": 31.321311475409836, "grad_norm": 6.831274509429932, "learning_rate": 1.607999431891822e-05, "loss": 0.8409, "step": 9553 }, { "epoch": 31.324590163934428, "grad_norm": 8.034168243408203, "learning_rate": 1.607915121278244e-05, "loss": 1.15, "step": 9554 }, { "epoch": 31.327868852459016, "grad_norm": 5.8452467918396, "learning_rate": 1.607830803809791e-05, "loss": 1.0781, "step": 9555 }, { "epoch": 31.331147540983608, "grad_norm": 5.838291645050049, "learning_rate": 1.6077464794874137e-05, "loss": 0.7872, "step": 9556 }, { "epoch": 31.334426229508196, "grad_norm": 6.977620601654053, "learning_rate": 1.6076621483120626e-05, "loss": 0.9363, "step": 9557 }, { "epoch": 31.337704918032788, "grad_norm": 5.090821266174316, "learning_rate": 1.6075778102846892e-05, "loss": 0.9017, "step": 9558 }, { "epoch": 31.340983606557376, "grad_norm": 10.82612133026123, "learning_rate": 1.607493465406244e-05, "loss": 0.8295, "step": 9559 }, { "epoch": 31.34426229508197, "grad_norm": 6.053873538970947, "learning_rate": 1.6074091136776788e-05, "loss": 0.9807, "step": 9560 }, { "epoch": 31.347540983606557, "grad_norm": 6.1502156257629395, "learning_rate": 1.607324755099944e-05, "loss": 0.9701, "step": 9561 }, { "epoch": 31.35081967213115, "grad_norm": 6.1030144691467285, "learning_rate": 1.607240389673991e-05, "loss": 0.9789, "step": 9562 }, { "epoch": 31.354098360655737, "grad_norm": 7.278891563415527, "learning_rate": 1.6071560174007717e-05, "loss": 0.9575, "step": 9563 }, { "epoch": 31.35737704918033, "grad_norm": 6.486817836761475, "learning_rate": 1.607071638281237e-05, "loss": 0.9574, "step": 9564 }, { "epoch": 31.360655737704917, "grad_norm": 7.162229537963867, "learning_rate": 1.6069872523163378e-05, "loss": 0.9999, "step": 9565 }, { "epoch": 31.36393442622951, "grad_norm": 6.784564971923828, "learning_rate": 1.6069028595070266e-05, "loss": 0.9285, "step": 9566 }, { "epoch": 31.367213114754097, "grad_norm": 7.661242485046387, "learning_rate": 1.606818459854255e-05, "loss": 0.8783, "step": 9567 }, { "epoch": 31.37049180327869, "grad_norm": 6.983620643615723, "learning_rate": 1.6067340533589737e-05, "loss": 0.965, "step": 9568 }, { "epoch": 31.373770491803278, "grad_norm": 8.913742065429688, "learning_rate": 1.6066496400221355e-05, "loss": 0.8442, "step": 9569 }, { "epoch": 31.37704918032787, "grad_norm": 6.009003162384033, "learning_rate": 1.6065652198446914e-05, "loss": 1.04, "step": 9570 }, { "epoch": 31.380327868852458, "grad_norm": 7.346618175506592, "learning_rate": 1.606480792827594e-05, "loss": 1.0567, "step": 9571 }, { "epoch": 31.38360655737705, "grad_norm": 6.94974422454834, "learning_rate": 1.606396358971795e-05, "loss": 1.0403, "step": 9572 }, { "epoch": 31.386885245901638, "grad_norm": 7.752979278564453, "learning_rate": 1.606311918278247e-05, "loss": 0.9335, "step": 9573 }, { "epoch": 31.39016393442623, "grad_norm": 6.844838619232178, "learning_rate": 1.6062274707479013e-05, "loss": 0.9924, "step": 9574 }, { "epoch": 31.39344262295082, "grad_norm": 6.300386428833008, "learning_rate": 1.6061430163817108e-05, "loss": 1.0258, "step": 9575 }, { "epoch": 31.39672131147541, "grad_norm": 9.469972610473633, "learning_rate": 1.6060585551806274e-05, "loss": 0.9388, "step": 9576 }, { "epoch": 31.4, "grad_norm": 6.886125087738037, "learning_rate": 1.6059740871456035e-05, "loss": 1.0775, "step": 9577 }, { "epoch": 31.40327868852459, "grad_norm": 6.513442516326904, "learning_rate": 1.605889612277592e-05, "loss": 0.903, "step": 9578 }, { "epoch": 31.40655737704918, "grad_norm": 8.065661430358887, "learning_rate": 1.6058051305775452e-05, "loss": 1.0835, "step": 9579 }, { "epoch": 31.40983606557377, "grad_norm": 18.73785400390625, "learning_rate": 1.6057206420464156e-05, "loss": 0.8441, "step": 9580 }, { "epoch": 31.41311475409836, "grad_norm": 6.82932186126709, "learning_rate": 1.6056361466851554e-05, "loss": 1.0259, "step": 9581 }, { "epoch": 31.41639344262295, "grad_norm": 5.822592735290527, "learning_rate": 1.605551644494719e-05, "loss": 1.0323, "step": 9582 }, { "epoch": 31.41967213114754, "grad_norm": 7.998687267303467, "learning_rate": 1.605467135476057e-05, "loss": 1.1166, "step": 9583 }, { "epoch": 31.42295081967213, "grad_norm": 7.061553001403809, "learning_rate": 1.6053826196301244e-05, "loss": 1.111, "step": 9584 }, { "epoch": 31.42622950819672, "grad_norm": 5.615558624267578, "learning_rate": 1.6052980969578732e-05, "loss": 0.9767, "step": 9585 }, { "epoch": 31.42950819672131, "grad_norm": 8.542166709899902, "learning_rate": 1.6052135674602563e-05, "loss": 1.1987, "step": 9586 }, { "epoch": 31.432786885245903, "grad_norm": 10.446949005126953, "learning_rate": 1.6051290311382274e-05, "loss": 0.9183, "step": 9587 }, { "epoch": 31.43606557377049, "grad_norm": 7.132355690002441, "learning_rate": 1.6050444879927392e-05, "loss": 1.1473, "step": 9588 }, { "epoch": 31.439344262295084, "grad_norm": 7.01995849609375, "learning_rate": 1.6049599380247456e-05, "loss": 0.8222, "step": 9589 }, { "epoch": 31.442622950819672, "grad_norm": 6.661880016326904, "learning_rate": 1.6048753812352e-05, "loss": 0.8658, "step": 9590 }, { "epoch": 31.445901639344264, "grad_norm": 7.40562105178833, "learning_rate": 1.604790817625055e-05, "loss": 1.0468, "step": 9591 }, { "epoch": 31.449180327868852, "grad_norm": 8.158885955810547, "learning_rate": 1.6047062471952647e-05, "loss": 0.7485, "step": 9592 }, { "epoch": 31.452459016393444, "grad_norm": 6.033226013183594, "learning_rate": 1.604621669946783e-05, "loss": 1.0237, "step": 9593 }, { "epoch": 31.455737704918032, "grad_norm": 7.340654373168945, "learning_rate": 1.6045370858805633e-05, "loss": 1.0496, "step": 9594 }, { "epoch": 31.459016393442624, "grad_norm": 6.957544803619385, "learning_rate": 1.6044524949975593e-05, "loss": 0.9113, "step": 9595 }, { "epoch": 31.462295081967213, "grad_norm": 7.334590435028076, "learning_rate": 1.604367897298725e-05, "loss": 0.9644, "step": 9596 }, { "epoch": 31.465573770491805, "grad_norm": 7.048424243927002, "learning_rate": 1.6042832927850142e-05, "loss": 1.0233, "step": 9597 }, { "epoch": 31.468852459016393, "grad_norm": 6.160795211791992, "learning_rate": 1.604198681457381e-05, "loss": 0.7995, "step": 9598 }, { "epoch": 31.472131147540985, "grad_norm": 7.283331394195557, "learning_rate": 1.6041140633167795e-05, "loss": 1.0255, "step": 9599 }, { "epoch": 31.475409836065573, "grad_norm": 7.8004961013793945, "learning_rate": 1.604029438364164e-05, "loss": 0.8213, "step": 9600 }, { "epoch": 31.478688524590165, "grad_norm": 7.094579219818115, "learning_rate": 1.6039448066004882e-05, "loss": 0.8802, "step": 9601 }, { "epoch": 31.481967213114753, "grad_norm": 9.7326078414917, "learning_rate": 1.603860168026707e-05, "loss": 0.7637, "step": 9602 }, { "epoch": 31.485245901639345, "grad_norm": 7.7806854248046875, "learning_rate": 1.6037755226437742e-05, "loss": 0.9304, "step": 9603 }, { "epoch": 31.488524590163934, "grad_norm": 8.188004493713379, "learning_rate": 1.603690870452645e-05, "loss": 0.869, "step": 9604 }, { "epoch": 31.491803278688526, "grad_norm": 8.575849533081055, "learning_rate": 1.6036062114542734e-05, "loss": 0.796, "step": 9605 }, { "epoch": 31.495081967213114, "grad_norm": 6.873724460601807, "learning_rate": 1.6035215456496145e-05, "loss": 0.7364, "step": 9606 }, { "epoch": 31.498360655737706, "grad_norm": 6.1224565505981445, "learning_rate": 1.6034368730396225e-05, "loss": 0.9227, "step": 9607 }, { "epoch": 31.501639344262294, "grad_norm": 6.359044075012207, "learning_rate": 1.6033521936252522e-05, "loss": 0.955, "step": 9608 }, { "epoch": 31.504918032786886, "grad_norm": 5.840404510498047, "learning_rate": 1.6032675074074588e-05, "loss": 1.0833, "step": 9609 }, { "epoch": 31.508196721311474, "grad_norm": 7.387471675872803, "learning_rate": 1.6031828143871962e-05, "loss": 1.0935, "step": 9610 }, { "epoch": 31.511475409836066, "grad_norm": 6.763721466064453, "learning_rate": 1.603098114565421e-05, "loss": 0.9558, "step": 9611 }, { "epoch": 31.514754098360655, "grad_norm": 18.015913009643555, "learning_rate": 1.6030134079430874e-05, "loss": 1.017, "step": 9612 }, { "epoch": 31.518032786885247, "grad_norm": 5.89029598236084, "learning_rate": 1.6029286945211507e-05, "loss": 0.9033, "step": 9613 }, { "epoch": 31.521311475409835, "grad_norm": 7.543638706207275, "learning_rate": 1.6028439743005657e-05, "loss": 1.0742, "step": 9614 }, { "epoch": 31.524590163934427, "grad_norm": 8.252734184265137, "learning_rate": 1.6027592472822885e-05, "loss": 0.9081, "step": 9615 }, { "epoch": 31.527868852459015, "grad_norm": 6.302370071411133, "learning_rate": 1.602674513467274e-05, "loss": 0.9741, "step": 9616 }, { "epoch": 31.531147540983607, "grad_norm": 8.349111557006836, "learning_rate": 1.6025897728564775e-05, "loss": 0.8233, "step": 9617 }, { "epoch": 31.534426229508195, "grad_norm": 7.866089820861816, "learning_rate": 1.602505025450855e-05, "loss": 1.0209, "step": 9618 }, { "epoch": 31.537704918032787, "grad_norm": 7.102470397949219, "learning_rate": 1.602420271251362e-05, "loss": 0.9453, "step": 9619 }, { "epoch": 31.540983606557376, "grad_norm": 6.776553630828857, "learning_rate": 1.6023355102589534e-05, "loss": 0.8453, "step": 9620 }, { "epoch": 31.544262295081968, "grad_norm": 6.4155426025390625, "learning_rate": 1.6022507424745864e-05, "loss": 1.1015, "step": 9621 }, { "epoch": 31.547540983606556, "grad_norm": 7.8675150871276855, "learning_rate": 1.6021659678992162e-05, "loss": 0.9535, "step": 9622 }, { "epoch": 31.550819672131148, "grad_norm": 7.400248050689697, "learning_rate": 1.602081186533798e-05, "loss": 0.9374, "step": 9623 }, { "epoch": 31.554098360655736, "grad_norm": 6.944141864776611, "learning_rate": 1.601996398379289e-05, "loss": 0.6849, "step": 9624 }, { "epoch": 31.557377049180328, "grad_norm": 13.55089282989502, "learning_rate": 1.6019116034366442e-05, "loss": 0.7993, "step": 9625 }, { "epoch": 31.560655737704916, "grad_norm": 10.307243347167969, "learning_rate": 1.6018268017068203e-05, "loss": 0.9222, "step": 9626 }, { "epoch": 31.56393442622951, "grad_norm": 7.424251556396484, "learning_rate": 1.6017419931907734e-05, "loss": 0.9142, "step": 9627 }, { "epoch": 31.567213114754097, "grad_norm": 9.884879112243652, "learning_rate": 1.60165717788946e-05, "loss": 1.0197, "step": 9628 }, { "epoch": 31.57049180327869, "grad_norm": 7.901280403137207, "learning_rate": 1.6015723558038366e-05, "loss": 0.8143, "step": 9629 }, { "epoch": 31.57377049180328, "grad_norm": 7.2903666496276855, "learning_rate": 1.601487526934859e-05, "loss": 0.7931, "step": 9630 }, { "epoch": 31.57704918032787, "grad_norm": 8.021200180053711, "learning_rate": 1.6014026912834845e-05, "loss": 1.1721, "step": 9631 }, { "epoch": 31.58032786885246, "grad_norm": 7.435358047485352, "learning_rate": 1.6013178488506694e-05, "loss": 1.1715, "step": 9632 }, { "epoch": 31.58360655737705, "grad_norm": 13.538307189941406, "learning_rate": 1.6012329996373697e-05, "loss": 0.9592, "step": 9633 }, { "epoch": 31.58688524590164, "grad_norm": 7.786563873291016, "learning_rate": 1.6011481436445434e-05, "loss": 0.81, "step": 9634 }, { "epoch": 31.59016393442623, "grad_norm": 9.264138221740723, "learning_rate": 1.601063280873147e-05, "loss": 1.0012, "step": 9635 }, { "epoch": 31.59344262295082, "grad_norm": 7.667067527770996, "learning_rate": 1.6009784113241366e-05, "loss": 0.8217, "step": 9636 }, { "epoch": 31.59672131147541, "grad_norm": 7.28544282913208, "learning_rate": 1.6008935349984697e-05, "loss": 1.0631, "step": 9637 }, { "epoch": 31.6, "grad_norm": 8.66651439666748, "learning_rate": 1.6008086518971037e-05, "loss": 0.7749, "step": 9638 }, { "epoch": 31.60327868852459, "grad_norm": 11.70030403137207, "learning_rate": 1.6007237620209954e-05, "loss": 1.0634, "step": 9639 }, { "epoch": 31.60655737704918, "grad_norm": 8.165590286254883, "learning_rate": 1.600638865371102e-05, "loss": 0.8045, "step": 9640 }, { "epoch": 31.60983606557377, "grad_norm": 7.381747722625732, "learning_rate": 1.6005539619483812e-05, "loss": 1.037, "step": 9641 }, { "epoch": 31.613114754098362, "grad_norm": 9.376568794250488, "learning_rate": 1.60046905175379e-05, "loss": 0.8405, "step": 9642 }, { "epoch": 31.61639344262295, "grad_norm": 5.322335720062256, "learning_rate": 1.6003841347882855e-05, "loss": 1.0703, "step": 9643 }, { "epoch": 31.619672131147542, "grad_norm": 11.223467826843262, "learning_rate": 1.6002992110528256e-05, "loss": 1.0044, "step": 9644 }, { "epoch": 31.62295081967213, "grad_norm": 8.336038589477539, "learning_rate": 1.6002142805483686e-05, "loss": 1.0221, "step": 9645 }, { "epoch": 31.626229508196722, "grad_norm": 6.693852424621582, "learning_rate": 1.6001293432758707e-05, "loss": 0.8651, "step": 9646 }, { "epoch": 31.62950819672131, "grad_norm": 7.845508575439453, "learning_rate": 1.600044399236291e-05, "loss": 0.9573, "step": 9647 }, { "epoch": 31.632786885245903, "grad_norm": 8.219027519226074, "learning_rate": 1.599959448430587e-05, "loss": 1.0994, "step": 9648 }, { "epoch": 31.63606557377049, "grad_norm": 7.975203990936279, "learning_rate": 1.599874490859716e-05, "loss": 0.9886, "step": 9649 }, { "epoch": 31.639344262295083, "grad_norm": 9.698914527893066, "learning_rate": 1.5997895265246366e-05, "loss": 0.8828, "step": 9650 }, { "epoch": 31.64262295081967, "grad_norm": 7.539062976837158, "learning_rate": 1.5997045554263066e-05, "loss": 1.0159, "step": 9651 }, { "epoch": 31.645901639344263, "grad_norm": 6.498290061950684, "learning_rate": 1.5996195775656843e-05, "loss": 1.1377, "step": 9652 }, { "epoch": 31.64918032786885, "grad_norm": 5.963698387145996, "learning_rate": 1.5995345929437275e-05, "loss": 1.397, "step": 9653 }, { "epoch": 31.652459016393443, "grad_norm": 8.440017700195312, "learning_rate": 1.599449601561395e-05, "loss": 1.0852, "step": 9654 }, { "epoch": 31.65573770491803, "grad_norm": 9.969924926757812, "learning_rate": 1.599364603419645e-05, "loss": 0.9271, "step": 9655 }, { "epoch": 31.659016393442624, "grad_norm": 6.880777835845947, "learning_rate": 1.599279598519436e-05, "loss": 0.91, "step": 9656 }, { "epoch": 31.662295081967212, "grad_norm": 6.181086540222168, "learning_rate": 1.5991945868617263e-05, "loss": 0.9917, "step": 9657 }, { "epoch": 31.665573770491804, "grad_norm": 6.537873268127441, "learning_rate": 1.5991095684474748e-05, "loss": 0.9314, "step": 9658 }, { "epoch": 31.668852459016392, "grad_norm": 7.024059295654297, "learning_rate": 1.5990245432776395e-05, "loss": 0.894, "step": 9659 }, { "epoch": 31.672131147540984, "grad_norm": 8.76817798614502, "learning_rate": 1.59893951135318e-05, "loss": 0.7467, "step": 9660 }, { "epoch": 31.675409836065572, "grad_norm": 6.598752021789551, "learning_rate": 1.598854472675055e-05, "loss": 0.8911, "step": 9661 }, { "epoch": 31.678688524590164, "grad_norm": 7.504064559936523, "learning_rate": 1.5987694272442228e-05, "loss": 1.0229, "step": 9662 }, { "epoch": 31.681967213114753, "grad_norm": 6.813388824462891, "learning_rate": 1.5986843750616432e-05, "loss": 1.2087, "step": 9663 }, { "epoch": 31.685245901639345, "grad_norm": 10.400849342346191, "learning_rate": 1.5985993161282744e-05, "loss": 1.1387, "step": 9664 }, { "epoch": 31.688524590163933, "grad_norm": 7.968660354614258, "learning_rate": 1.5985142504450762e-05, "loss": 0.9408, "step": 9665 }, { "epoch": 31.691803278688525, "grad_norm": 7.835018634796143, "learning_rate": 1.598429178013007e-05, "loss": 0.9167, "step": 9666 }, { "epoch": 31.695081967213113, "grad_norm": 8.134323120117188, "learning_rate": 1.598344098833027e-05, "loss": 1.447, "step": 9667 }, { "epoch": 31.698360655737705, "grad_norm": 8.340340614318848, "learning_rate": 1.598259012906095e-05, "loss": 1.0023, "step": 9668 }, { "epoch": 31.701639344262293, "grad_norm": 7.8261284828186035, "learning_rate": 1.598173920233171e-05, "loss": 1.077, "step": 9669 }, { "epoch": 31.704918032786885, "grad_norm": 7.590132713317871, "learning_rate": 1.5980888208152135e-05, "loss": 0.8473, "step": 9670 }, { "epoch": 31.708196721311474, "grad_norm": 7.508799076080322, "learning_rate": 1.5980037146531832e-05, "loss": 1.1044, "step": 9671 }, { "epoch": 31.711475409836066, "grad_norm": 8.83678913116455, "learning_rate": 1.5979186017480388e-05, "loss": 0.791, "step": 9672 }, { "epoch": 31.714754098360658, "grad_norm": 7.47482967376709, "learning_rate": 1.5978334821007408e-05, "loss": 0.9658, "step": 9673 }, { "epoch": 31.718032786885246, "grad_norm": 8.089621543884277, "learning_rate": 1.5977483557122488e-05, "loss": 0.9971, "step": 9674 }, { "epoch": 31.721311475409838, "grad_norm": 9.450437545776367, "learning_rate": 1.5976632225835223e-05, "loss": 0.9219, "step": 9675 }, { "epoch": 31.724590163934426, "grad_norm": 5.632599353790283, "learning_rate": 1.5975780827155218e-05, "loss": 0.9031, "step": 9676 }, { "epoch": 31.727868852459018, "grad_norm": 7.634968280792236, "learning_rate": 1.5974929361092068e-05, "loss": 0.8392, "step": 9677 }, { "epoch": 31.731147540983606, "grad_norm": 6.982405185699463, "learning_rate": 1.597407782765538e-05, "loss": 0.9296, "step": 9678 }, { "epoch": 31.7344262295082, "grad_norm": 6.869833469390869, "learning_rate": 1.597322622685475e-05, "loss": 1.0388, "step": 9679 }, { "epoch": 31.737704918032787, "grad_norm": 7.185217380523682, "learning_rate": 1.5972374558699786e-05, "loss": 1.0018, "step": 9680 }, { "epoch": 31.74098360655738, "grad_norm": 7.566317558288574, "learning_rate": 1.5971522823200088e-05, "loss": 0.7977, "step": 9681 }, { "epoch": 31.744262295081967, "grad_norm": 8.584749221801758, "learning_rate": 1.5970671020365264e-05, "loss": 1.1252, "step": 9682 }, { "epoch": 31.74754098360656, "grad_norm": 8.521985054016113, "learning_rate": 1.596981915020491e-05, "loss": 1.0173, "step": 9683 }, { "epoch": 31.750819672131147, "grad_norm": 11.152631759643555, "learning_rate": 1.5968967212728644e-05, "loss": 0.8227, "step": 9684 }, { "epoch": 31.75409836065574, "grad_norm": 6.631721496582031, "learning_rate": 1.5968115207946065e-05, "loss": 0.7975, "step": 9685 }, { "epoch": 31.757377049180327, "grad_norm": 17.29890251159668, "learning_rate": 1.5967263135866783e-05, "loss": 1.1984, "step": 9686 }, { "epoch": 31.76065573770492, "grad_norm": 7.021842002868652, "learning_rate": 1.5966410996500402e-05, "loss": 0.8172, "step": 9687 }, { "epoch": 31.763934426229508, "grad_norm": 7.65458869934082, "learning_rate": 1.5965558789856533e-05, "loss": 1.064, "step": 9688 }, { "epoch": 31.7672131147541, "grad_norm": 6.7663044929504395, "learning_rate": 1.596470651594479e-05, "loss": 1.1281, "step": 9689 }, { "epoch": 31.770491803278688, "grad_norm": 8.311135292053223, "learning_rate": 1.5963854174774778e-05, "loss": 0.9544, "step": 9690 }, { "epoch": 31.77377049180328, "grad_norm": 7.638526916503906, "learning_rate": 1.5963001766356107e-05, "loss": 0.9003, "step": 9691 }, { "epoch": 31.777049180327868, "grad_norm": 7.505987167358398, "learning_rate": 1.5962149290698392e-05, "loss": 1.0017, "step": 9692 }, { "epoch": 31.78032786885246, "grad_norm": 13.096426010131836, "learning_rate": 1.5961296747811245e-05, "loss": 0.9117, "step": 9693 }, { "epoch": 31.78360655737705, "grad_norm": 7.627955436706543, "learning_rate": 1.5960444137704278e-05, "loss": 1.0093, "step": 9694 }, { "epoch": 31.78688524590164, "grad_norm": 12.028739929199219, "learning_rate": 1.5959591460387107e-05, "loss": 1.0793, "step": 9695 }, { "epoch": 31.79016393442623, "grad_norm": 6.498057842254639, "learning_rate": 1.5958738715869347e-05, "loss": 0.7264, "step": 9696 }, { "epoch": 31.79344262295082, "grad_norm": 6.916172981262207, "learning_rate": 1.5957885904160614e-05, "loss": 1.2285, "step": 9697 }, { "epoch": 31.79672131147541, "grad_norm": 7.623994827270508, "learning_rate": 1.5957033025270517e-05, "loss": 0.7264, "step": 9698 }, { "epoch": 31.8, "grad_norm": 6.960277080535889, "learning_rate": 1.5956180079208684e-05, "loss": 0.9514, "step": 9699 }, { "epoch": 31.80327868852459, "grad_norm": 18.052066802978516, "learning_rate": 1.5955327065984727e-05, "loss": 0.9525, "step": 9700 }, { "epoch": 31.80655737704918, "grad_norm": 6.500767707824707, "learning_rate": 1.5954473985608263e-05, "loss": 1.1788, "step": 9701 }, { "epoch": 31.80983606557377, "grad_norm": 6.539096832275391, "learning_rate": 1.5953620838088913e-05, "loss": 1.042, "step": 9702 }, { "epoch": 31.81311475409836, "grad_norm": 7.763019561767578, "learning_rate": 1.59527676234363e-05, "loss": 0.9344, "step": 9703 }, { "epoch": 31.81639344262295, "grad_norm": 7.387364864349365, "learning_rate": 1.5951914341660044e-05, "loss": 0.9918, "step": 9704 }, { "epoch": 31.81967213114754, "grad_norm": 7.775554656982422, "learning_rate": 1.595106099276976e-05, "loss": 1.0222, "step": 9705 }, { "epoch": 31.82295081967213, "grad_norm": 8.474642753601074, "learning_rate": 1.5950207576775082e-05, "loss": 1.2081, "step": 9706 }, { "epoch": 31.82622950819672, "grad_norm": 9.590840339660645, "learning_rate": 1.5949354093685626e-05, "loss": 0.8099, "step": 9707 }, { "epoch": 31.82950819672131, "grad_norm": 9.230117797851562, "learning_rate": 1.5948500543511015e-05, "loss": 1.0184, "step": 9708 }, { "epoch": 31.832786885245902, "grad_norm": 8.840280532836914, "learning_rate": 1.5947646926260874e-05, "loss": 0.8203, "step": 9709 }, { "epoch": 31.83606557377049, "grad_norm": 13.270390510559082, "learning_rate": 1.594679324194483e-05, "loss": 0.9327, "step": 9710 }, { "epoch": 31.839344262295082, "grad_norm": 7.007944107055664, "learning_rate": 1.5945939490572514e-05, "loss": 0.8511, "step": 9711 }, { "epoch": 31.84262295081967, "grad_norm": 7.168703556060791, "learning_rate": 1.5945085672153546e-05, "loss": 0.7513, "step": 9712 }, { "epoch": 31.845901639344262, "grad_norm": 7.030124187469482, "learning_rate": 1.5944231786697554e-05, "loss": 0.8093, "step": 9713 }, { "epoch": 31.84918032786885, "grad_norm": 8.883010864257812, "learning_rate": 1.5943377834214165e-05, "loss": 0.9658, "step": 9714 }, { "epoch": 31.852459016393443, "grad_norm": 6.578979969024658, "learning_rate": 1.5942523814713018e-05, "loss": 1.2361, "step": 9715 }, { "epoch": 31.855737704918035, "grad_norm": 9.38154125213623, "learning_rate": 1.5941669728203734e-05, "loss": 0.8954, "step": 9716 }, { "epoch": 31.859016393442623, "grad_norm": 8.205504417419434, "learning_rate": 1.5940815574695943e-05, "loss": 1.2202, "step": 9717 }, { "epoch": 31.862295081967215, "grad_norm": 7.326879978179932, "learning_rate": 1.593996135419928e-05, "loss": 1.106, "step": 9718 }, { "epoch": 31.865573770491803, "grad_norm": 8.358909606933594, "learning_rate": 1.5939107066723384e-05, "loss": 0.7939, "step": 9719 }, { "epoch": 31.868852459016395, "grad_norm": 8.297298431396484, "learning_rate": 1.5938252712277874e-05, "loss": 0.8949, "step": 9720 }, { "epoch": 31.872131147540983, "grad_norm": 9.048783302307129, "learning_rate": 1.5937398290872387e-05, "loss": 0.879, "step": 9721 }, { "epoch": 31.875409836065575, "grad_norm": 6.572513580322266, "learning_rate": 1.5936543802516568e-05, "loss": 1.1934, "step": 9722 }, { "epoch": 31.878688524590164, "grad_norm": 8.400985717773438, "learning_rate": 1.5935689247220044e-05, "loss": 0.8555, "step": 9723 }, { "epoch": 31.881967213114756, "grad_norm": 7.592126846313477, "learning_rate": 1.593483462499245e-05, "loss": 1.0142, "step": 9724 }, { "epoch": 31.885245901639344, "grad_norm": 7.368054389953613, "learning_rate": 1.5933979935843423e-05, "loss": 1.1508, "step": 9725 }, { "epoch": 31.888524590163936, "grad_norm": 7.704559326171875, "learning_rate": 1.5933125179782608e-05, "loss": 1.002, "step": 9726 }, { "epoch": 31.891803278688524, "grad_norm": 7.025596618652344, "learning_rate": 1.5932270356819633e-05, "loss": 0.8817, "step": 9727 }, { "epoch": 31.895081967213116, "grad_norm": 7.919111728668213, "learning_rate": 1.5931415466964147e-05, "loss": 0.8927, "step": 9728 }, { "epoch": 31.898360655737704, "grad_norm": 6.684272289276123, "learning_rate": 1.593056051022578e-05, "loss": 0.8667, "step": 9729 }, { "epoch": 31.901639344262296, "grad_norm": 6.522689342498779, "learning_rate": 1.592970548661418e-05, "loss": 1.1592, "step": 9730 }, { "epoch": 31.904918032786885, "grad_norm": 9.675054550170898, "learning_rate": 1.592885039613898e-05, "loss": 0.8955, "step": 9731 }, { "epoch": 31.908196721311477, "grad_norm": 7.306164264678955, "learning_rate": 1.5927995238809833e-05, "loss": 0.8768, "step": 9732 }, { "epoch": 31.911475409836065, "grad_norm": 7.148184299468994, "learning_rate": 1.592714001463637e-05, "loss": 1.027, "step": 9733 }, { "epoch": 31.914754098360657, "grad_norm": 7.617675304412842, "learning_rate": 1.592628472362825e-05, "loss": 0.8505, "step": 9734 }, { "epoch": 31.918032786885245, "grad_norm": 7.499141216278076, "learning_rate": 1.59254293657951e-05, "loss": 0.835, "step": 9735 }, { "epoch": 31.921311475409837, "grad_norm": 6.987870216369629, "learning_rate": 1.5924573941146574e-05, "loss": 0.9688, "step": 9736 }, { "epoch": 31.924590163934425, "grad_norm": 8.930991172790527, "learning_rate": 1.592371844969232e-05, "loss": 1.0276, "step": 9737 }, { "epoch": 31.927868852459017, "grad_norm": 6.3034281730651855, "learning_rate": 1.592286289144198e-05, "loss": 0.8964, "step": 9738 }, { "epoch": 31.931147540983606, "grad_norm": 9.13274097442627, "learning_rate": 1.5922007266405205e-05, "loss": 0.8029, "step": 9739 }, { "epoch": 31.934426229508198, "grad_norm": 8.28998851776123, "learning_rate": 1.5921151574591632e-05, "loss": 0.8292, "step": 9740 }, { "epoch": 31.937704918032786, "grad_norm": 8.767081260681152, "learning_rate": 1.592029581601093e-05, "loss": 0.7977, "step": 9741 }, { "epoch": 31.940983606557378, "grad_norm": 6.325235366821289, "learning_rate": 1.591943999067273e-05, "loss": 1.0309, "step": 9742 }, { "epoch": 31.944262295081966, "grad_norm": 6.771792411804199, "learning_rate": 1.591858409858669e-05, "loss": 1.0566, "step": 9743 }, { "epoch": 31.947540983606558, "grad_norm": 8.405219078063965, "learning_rate": 1.5917728139762464e-05, "loss": 0.7949, "step": 9744 }, { "epoch": 31.950819672131146, "grad_norm": 7.559660911560059, "learning_rate": 1.5916872114209698e-05, "loss": 0.8055, "step": 9745 }, { "epoch": 31.95409836065574, "grad_norm": 7.281687259674072, "learning_rate": 1.5916016021938047e-05, "loss": 0.8148, "step": 9746 }, { "epoch": 31.957377049180327, "grad_norm": 6.867103099822998, "learning_rate": 1.591515986295716e-05, "loss": 0.9146, "step": 9747 }, { "epoch": 31.96065573770492, "grad_norm": 7.381002426147461, "learning_rate": 1.5914303637276703e-05, "loss": 1.1499, "step": 9748 }, { "epoch": 31.963934426229507, "grad_norm": 7.967626094818115, "learning_rate": 1.5913447344906318e-05, "loss": 1.0557, "step": 9749 }, { "epoch": 31.9672131147541, "grad_norm": 7.616036415100098, "learning_rate": 1.5912590985855667e-05, "loss": 0.818, "step": 9750 }, { "epoch": 31.970491803278687, "grad_norm": 9.394414901733398, "learning_rate": 1.5911734560134403e-05, "loss": 0.9193, "step": 9751 }, { "epoch": 31.97377049180328, "grad_norm": 7.741619110107422, "learning_rate": 1.591087806775219e-05, "loss": 0.9092, "step": 9752 }, { "epoch": 31.977049180327867, "grad_norm": 6.9036407470703125, "learning_rate": 1.5910021508718677e-05, "loss": 0.9742, "step": 9753 }, { "epoch": 31.98032786885246, "grad_norm": 8.520195007324219, "learning_rate": 1.590916488304353e-05, "loss": 0.7641, "step": 9754 }, { "epoch": 31.983606557377048, "grad_norm": 7.419729709625244, "learning_rate": 1.5908308190736404e-05, "loss": 1.1389, "step": 9755 }, { "epoch": 31.98688524590164, "grad_norm": 8.483582496643066, "learning_rate": 1.590745143180696e-05, "loss": 0.8465, "step": 9756 }, { "epoch": 31.990163934426228, "grad_norm": 6.986649036407471, "learning_rate": 1.5906594606264857e-05, "loss": 1.0366, "step": 9757 }, { "epoch": 31.99344262295082, "grad_norm": 8.364713668823242, "learning_rate": 1.590573771411976e-05, "loss": 1.1735, "step": 9758 }, { "epoch": 31.99672131147541, "grad_norm": 7.601969242095947, "learning_rate": 1.590488075538133e-05, "loss": 0.9907, "step": 9759 }, { "epoch": 32.0, "grad_norm": 7.017031669616699, "learning_rate": 1.5904023730059227e-05, "loss": 1.0215, "step": 9760 }, { "epoch": 32.00327868852459, "grad_norm": 7.271355628967285, "learning_rate": 1.590316663816312e-05, "loss": 1.0135, "step": 9761 }, { "epoch": 32.006557377049184, "grad_norm": 7.390756130218506, "learning_rate": 1.5902309479702673e-05, "loss": 0.6861, "step": 9762 }, { "epoch": 32.00983606557377, "grad_norm": 8.20052433013916, "learning_rate": 1.590145225468755e-05, "loss": 0.931, "step": 9763 }, { "epoch": 32.01311475409836, "grad_norm": 6.7545599937438965, "learning_rate": 1.5900594963127414e-05, "loss": 0.9704, "step": 9764 }, { "epoch": 32.01639344262295, "grad_norm": 5.814520359039307, "learning_rate": 1.5899737605031935e-05, "loss": 0.9919, "step": 9765 }, { "epoch": 32.019672131147544, "grad_norm": 7.5999274253845215, "learning_rate": 1.589888018041078e-05, "loss": 1.1963, "step": 9766 }, { "epoch": 32.02295081967213, "grad_norm": 8.506434440612793, "learning_rate": 1.589802268927362e-05, "loss": 0.7613, "step": 9767 }, { "epoch": 32.02622950819672, "grad_norm": 9.14710521697998, "learning_rate": 1.589716513163012e-05, "loss": 0.9704, "step": 9768 }, { "epoch": 32.02950819672131, "grad_norm": 6.866007328033447, "learning_rate": 1.5896307507489953e-05, "loss": 0.8138, "step": 9769 }, { "epoch": 32.032786885245905, "grad_norm": 7.347790241241455, "learning_rate": 1.5895449816862787e-05, "loss": 1.02, "step": 9770 }, { "epoch": 32.03606557377049, "grad_norm": 6.20356559753418, "learning_rate": 1.5894592059758296e-05, "loss": 0.8503, "step": 9771 }, { "epoch": 32.03934426229508, "grad_norm": 11.432602882385254, "learning_rate": 1.5893734236186148e-05, "loss": 1.1191, "step": 9772 }, { "epoch": 32.04262295081967, "grad_norm": 5.5535101890563965, "learning_rate": 1.5892876346156022e-05, "loss": 1.1061, "step": 9773 }, { "epoch": 32.045901639344265, "grad_norm": 8.04333209991455, "learning_rate": 1.5892018389677588e-05, "loss": 1.0253, "step": 9774 }, { "epoch": 32.049180327868854, "grad_norm": 9.98076343536377, "learning_rate": 1.5891160366760518e-05, "loss": 0.9486, "step": 9775 }, { "epoch": 32.05245901639344, "grad_norm": 6.255334854125977, "learning_rate": 1.589030227741449e-05, "loss": 0.9465, "step": 9776 }, { "epoch": 32.05573770491803, "grad_norm": 9.223803520202637, "learning_rate": 1.588944412164918e-05, "loss": 0.7898, "step": 9777 }, { "epoch": 32.059016393442626, "grad_norm": 5.388885498046875, "learning_rate": 1.5888585899474266e-05, "loss": 1.001, "step": 9778 }, { "epoch": 32.062295081967214, "grad_norm": 10.09710693359375, "learning_rate": 1.588772761089942e-05, "loss": 0.9319, "step": 9779 }, { "epoch": 32.0655737704918, "grad_norm": 6.156067371368408, "learning_rate": 1.5886869255934326e-05, "loss": 0.6912, "step": 9780 }, { "epoch": 32.06885245901639, "grad_norm": 9.711641311645508, "learning_rate": 1.588601083458866e-05, "loss": 0.9352, "step": 9781 }, { "epoch": 32.072131147540986, "grad_norm": 7.580570220947266, "learning_rate": 1.5885152346872098e-05, "loss": 0.7599, "step": 9782 }, { "epoch": 32.075409836065575, "grad_norm": 5.736883163452148, "learning_rate": 1.5884293792794328e-05, "loss": 0.7122, "step": 9783 }, { "epoch": 32.07868852459016, "grad_norm": 5.924348831176758, "learning_rate": 1.588343517236503e-05, "loss": 0.9888, "step": 9784 }, { "epoch": 32.08196721311475, "grad_norm": 5.445765018463135, "learning_rate": 1.5882576485593875e-05, "loss": 1.0842, "step": 9785 }, { "epoch": 32.08524590163935, "grad_norm": 6.081981182098389, "learning_rate": 1.588171773249056e-05, "loss": 1.0704, "step": 9786 }, { "epoch": 32.088524590163935, "grad_norm": 12.023789405822754, "learning_rate": 1.5880858913064764e-05, "loss": 1.074, "step": 9787 }, { "epoch": 32.09180327868852, "grad_norm": 9.090272903442383, "learning_rate": 1.5880000027326164e-05, "loss": 0.7283, "step": 9788 }, { "epoch": 32.09508196721311, "grad_norm": 6.992852687835693, "learning_rate": 1.587914107528445e-05, "loss": 0.895, "step": 9789 }, { "epoch": 32.09836065573771, "grad_norm": 12.366190910339355, "learning_rate": 1.587828205694931e-05, "loss": 0.8544, "step": 9790 }, { "epoch": 32.101639344262296, "grad_norm": 7.033346176147461, "learning_rate": 1.587742297233043e-05, "loss": 0.7059, "step": 9791 }, { "epoch": 32.104918032786884, "grad_norm": 7.079171180725098, "learning_rate": 1.587656382143749e-05, "loss": 0.648, "step": 9792 }, { "epoch": 32.10819672131147, "grad_norm": 6.714221000671387, "learning_rate": 1.5875704604280188e-05, "loss": 0.8583, "step": 9793 }, { "epoch": 32.11147540983607, "grad_norm": 8.372243881225586, "learning_rate": 1.5874845320868205e-05, "loss": 0.994, "step": 9794 }, { "epoch": 32.114754098360656, "grad_norm": 7.602519989013672, "learning_rate": 1.5873985971211233e-05, "loss": 0.9237, "step": 9795 }, { "epoch": 32.118032786885244, "grad_norm": 8.691741943359375, "learning_rate": 1.5873126555318957e-05, "loss": 1.1587, "step": 9796 }, { "epoch": 32.12131147540983, "grad_norm": 6.2827982902526855, "learning_rate": 1.5872267073201082e-05, "loss": 0.7744, "step": 9797 }, { "epoch": 32.12459016393443, "grad_norm": 6.2465901374816895, "learning_rate": 1.5871407524867284e-05, "loss": 1.0876, "step": 9798 }, { "epoch": 32.12786885245902, "grad_norm": 6.3040852546691895, "learning_rate": 1.5870547910327262e-05, "loss": 0.9706, "step": 9799 }, { "epoch": 32.131147540983605, "grad_norm": 5.943995475769043, "learning_rate": 1.586968822959071e-05, "loss": 0.8393, "step": 9800 }, { "epoch": 32.13442622950819, "grad_norm": 5.782192230224609, "learning_rate": 1.5868828482667318e-05, "loss": 0.9921, "step": 9801 }, { "epoch": 32.13770491803279, "grad_norm": 5.502866744995117, "learning_rate": 1.5867968669566782e-05, "loss": 1.1519, "step": 9802 }, { "epoch": 32.14098360655738, "grad_norm": 6.510622978210449, "learning_rate": 1.5867108790298804e-05, "loss": 0.8196, "step": 9803 }, { "epoch": 32.144262295081965, "grad_norm": 9.516490936279297, "learning_rate": 1.5866248844873066e-05, "loss": 0.902, "step": 9804 }, { "epoch": 32.14754098360656, "grad_norm": 7.302739143371582, "learning_rate": 1.5865388833299276e-05, "loss": 0.9279, "step": 9805 }, { "epoch": 32.15081967213115, "grad_norm": 6.257508754730225, "learning_rate": 1.586452875558713e-05, "loss": 1.0227, "step": 9806 }, { "epoch": 32.15409836065574, "grad_norm": 5.832446098327637, "learning_rate": 1.5863668611746325e-05, "loss": 1.2181, "step": 9807 }, { "epoch": 32.157377049180326, "grad_norm": 7.130652904510498, "learning_rate": 1.586280840178656e-05, "loss": 0.6976, "step": 9808 }, { "epoch": 32.16065573770492, "grad_norm": 11.10889720916748, "learning_rate": 1.5861948125717534e-05, "loss": 0.8182, "step": 9809 }, { "epoch": 32.16393442622951, "grad_norm": 6.925963401794434, "learning_rate": 1.5861087783548947e-05, "loss": 1.0526, "step": 9810 }, { "epoch": 32.1672131147541, "grad_norm": 8.885028839111328, "learning_rate": 1.5860227375290502e-05, "loss": 1.0007, "step": 9811 }, { "epoch": 32.170491803278686, "grad_norm": 5.923007965087891, "learning_rate": 1.58593669009519e-05, "loss": 0.9287, "step": 9812 }, { "epoch": 32.17377049180328, "grad_norm": 5.390425682067871, "learning_rate": 1.5858506360542844e-05, "loss": 1.0681, "step": 9813 }, { "epoch": 32.17704918032787, "grad_norm": 10.209835052490234, "learning_rate": 1.5857645754073038e-05, "loss": 0.7787, "step": 9814 }, { "epoch": 32.18032786885246, "grad_norm": 6.533707618713379, "learning_rate": 1.5856785081552182e-05, "loss": 0.8122, "step": 9815 }, { "epoch": 32.18360655737705, "grad_norm": 8.300171852111816, "learning_rate": 1.585592434298999e-05, "loss": 1.014, "step": 9816 }, { "epoch": 32.18688524590164, "grad_norm": 7.955862045288086, "learning_rate": 1.585506353839616e-05, "loss": 0.7816, "step": 9817 }, { "epoch": 32.19016393442623, "grad_norm": 7.468183994293213, "learning_rate": 1.58542026677804e-05, "loss": 0.9926, "step": 9818 }, { "epoch": 32.19344262295082, "grad_norm": 7.315299987792969, "learning_rate": 1.5853341731152418e-05, "loss": 0.9117, "step": 9819 }, { "epoch": 32.19672131147541, "grad_norm": 5.248154640197754, "learning_rate": 1.5852480728521925e-05, "loss": 0.649, "step": 9820 }, { "epoch": 32.2, "grad_norm": 9.210670471191406, "learning_rate": 1.5851619659898623e-05, "loss": 1.0462, "step": 9821 }, { "epoch": 32.20327868852459, "grad_norm": 8.123260498046875, "learning_rate": 1.5850758525292228e-05, "loss": 0.8195, "step": 9822 }, { "epoch": 32.20655737704918, "grad_norm": 8.16445255279541, "learning_rate": 1.5849897324712446e-05, "loss": 1.0688, "step": 9823 }, { "epoch": 32.20983606557377, "grad_norm": 7.119326591491699, "learning_rate": 1.584903605816899e-05, "loss": 1.0531, "step": 9824 }, { "epoch": 32.21311475409836, "grad_norm": 8.064871788024902, "learning_rate": 1.584817472567157e-05, "loss": 0.8738, "step": 9825 }, { "epoch": 32.21639344262295, "grad_norm": 17.91849708557129, "learning_rate": 1.5847313327229897e-05, "loss": 1.0096, "step": 9826 }, { "epoch": 32.21967213114754, "grad_norm": 6.394158840179443, "learning_rate": 1.5846451862853694e-05, "loss": 0.981, "step": 9827 }, { "epoch": 32.22295081967213, "grad_norm": 6.602652072906494, "learning_rate": 1.5845590332552662e-05, "loss": 0.9226, "step": 9828 }, { "epoch": 32.226229508196724, "grad_norm": 6.893805980682373, "learning_rate": 1.584472873633652e-05, "loss": 1.0281, "step": 9829 }, { "epoch": 32.22950819672131, "grad_norm": 7.84778356552124, "learning_rate": 1.584386707421499e-05, "loss": 1.0619, "step": 9830 }, { "epoch": 32.2327868852459, "grad_norm": 9.443239212036133, "learning_rate": 1.5843005346197776e-05, "loss": 0.7253, "step": 9831 }, { "epoch": 32.23606557377049, "grad_norm": 6.098691940307617, "learning_rate": 1.5842143552294606e-05, "loss": 0.9035, "step": 9832 }, { "epoch": 32.239344262295084, "grad_norm": 6.31490421295166, "learning_rate": 1.5841281692515193e-05, "loss": 1.0812, "step": 9833 }, { "epoch": 32.24262295081967, "grad_norm": 8.306901931762695, "learning_rate": 1.584041976686925e-05, "loss": 1.0723, "step": 9834 }, { "epoch": 32.24590163934426, "grad_norm": 7.540862560272217, "learning_rate": 1.583955777536651e-05, "loss": 0.7913, "step": 9835 }, { "epoch": 32.24918032786885, "grad_norm": 7.182997703552246, "learning_rate": 1.583869571801668e-05, "loss": 0.9878, "step": 9836 }, { "epoch": 32.252459016393445, "grad_norm": 10.377595901489258, "learning_rate": 1.5837833594829487e-05, "loss": 0.754, "step": 9837 }, { "epoch": 32.25573770491803, "grad_norm": 7.819932460784912, "learning_rate": 1.583697140581465e-05, "loss": 1.0383, "step": 9838 }, { "epoch": 32.25901639344262, "grad_norm": 8.27171802520752, "learning_rate": 1.5836109150981885e-05, "loss": 0.7836, "step": 9839 }, { "epoch": 32.26229508196721, "grad_norm": 5.557468414306641, "learning_rate": 1.5835246830340933e-05, "loss": 1.0479, "step": 9840 }, { "epoch": 32.265573770491805, "grad_norm": 6.042588233947754, "learning_rate": 1.58343844439015e-05, "loss": 0.6726, "step": 9841 }, { "epoch": 32.268852459016394, "grad_norm": 7.107233047485352, "learning_rate": 1.5833521991673314e-05, "loss": 0.985, "step": 9842 }, { "epoch": 32.27213114754098, "grad_norm": 8.442493438720703, "learning_rate": 1.5832659473666102e-05, "loss": 0.9691, "step": 9843 }, { "epoch": 32.27540983606557, "grad_norm": 6.999227523803711, "learning_rate": 1.583179688988959e-05, "loss": 1.0891, "step": 9844 }, { "epoch": 32.278688524590166, "grad_norm": 6.752196788787842, "learning_rate": 1.5830934240353508e-05, "loss": 0.7947, "step": 9845 }, { "epoch": 32.281967213114754, "grad_norm": 10.630017280578613, "learning_rate": 1.583007152506758e-05, "loss": 0.7886, "step": 9846 }, { "epoch": 32.28524590163934, "grad_norm": 6.789228439331055, "learning_rate": 1.582920874404153e-05, "loss": 0.9738, "step": 9847 }, { "epoch": 32.28852459016394, "grad_norm": 6.342535018920898, "learning_rate": 1.5828345897285093e-05, "loss": 0.8235, "step": 9848 }, { "epoch": 32.291803278688526, "grad_norm": 11.198073387145996, "learning_rate": 1.5827482984807997e-05, "loss": 1.0307, "step": 9849 }, { "epoch": 32.295081967213115, "grad_norm": 6.213731288909912, "learning_rate": 1.582662000661997e-05, "loss": 1.1022, "step": 9850 }, { "epoch": 32.2983606557377, "grad_norm": 6.478967666625977, "learning_rate": 1.5825756962730743e-05, "loss": 1.1787, "step": 9851 }, { "epoch": 32.3016393442623, "grad_norm": 12.653185844421387, "learning_rate": 1.582489385315005e-05, "loss": 0.9694, "step": 9852 }, { "epoch": 32.30491803278689, "grad_norm": 7.692500591278076, "learning_rate": 1.5824030677887622e-05, "loss": 1.0637, "step": 9853 }, { "epoch": 32.308196721311475, "grad_norm": 5.945916652679443, "learning_rate": 1.5823167436953192e-05, "loss": 1.0894, "step": 9854 }, { "epoch": 32.31147540983606, "grad_norm": 8.804436683654785, "learning_rate": 1.5822304130356497e-05, "loss": 0.9373, "step": 9855 }, { "epoch": 32.31475409836066, "grad_norm": 7.522098541259766, "learning_rate": 1.5821440758107268e-05, "loss": 0.9378, "step": 9856 }, { "epoch": 32.31803278688525, "grad_norm": 9.634803771972656, "learning_rate": 1.5820577320215242e-05, "loss": 1.0912, "step": 9857 }, { "epoch": 32.321311475409836, "grad_norm": 7.49505615234375, "learning_rate": 1.5819713816690153e-05, "loss": 0.7978, "step": 9858 }, { "epoch": 32.324590163934424, "grad_norm": 9.396208763122559, "learning_rate": 1.5818850247541742e-05, "loss": 0.8975, "step": 9859 }, { "epoch": 32.32786885245902, "grad_norm": 11.343151092529297, "learning_rate": 1.5817986612779746e-05, "loss": 0.8314, "step": 9860 }, { "epoch": 32.33114754098361, "grad_norm": 6.108079433441162, "learning_rate": 1.5817122912413897e-05, "loss": 0.8694, "step": 9861 }, { "epoch": 32.334426229508196, "grad_norm": 7.538905143737793, "learning_rate": 1.5816259146453942e-05, "loss": 0.8649, "step": 9862 }, { "epoch": 32.337704918032784, "grad_norm": 7.5665788650512695, "learning_rate": 1.5815395314909615e-05, "loss": 0.9707, "step": 9863 }, { "epoch": 32.34098360655738, "grad_norm": 8.327336311340332, "learning_rate": 1.5814531417790664e-05, "loss": 0.9652, "step": 9864 }, { "epoch": 32.34426229508197, "grad_norm": 10.269634246826172, "learning_rate": 1.5813667455106822e-05, "loss": 0.8012, "step": 9865 }, { "epoch": 32.34754098360656, "grad_norm": 8.534765243530273, "learning_rate": 1.5812803426867834e-05, "loss": 0.8716, "step": 9866 }, { "epoch": 32.350819672131145, "grad_norm": 6.975672721862793, "learning_rate": 1.581193933308345e-05, "loss": 1.0815, "step": 9867 }, { "epoch": 32.35409836065574, "grad_norm": 7.2781195640563965, "learning_rate": 1.58110751737634e-05, "loss": 0.7842, "step": 9868 }, { "epoch": 32.35737704918033, "grad_norm": 7.37092924118042, "learning_rate": 1.581021094891744e-05, "loss": 1.0341, "step": 9869 }, { "epoch": 32.36065573770492, "grad_norm": 9.391735076904297, "learning_rate": 1.5809346658555303e-05, "loss": 1.0946, "step": 9870 }, { "epoch": 32.363934426229505, "grad_norm": 7.642223358154297, "learning_rate": 1.580848230268675e-05, "loss": 0.9155, "step": 9871 }, { "epoch": 32.3672131147541, "grad_norm": 21.009504318237305, "learning_rate": 1.5807617881321516e-05, "loss": 1.0732, "step": 9872 }, { "epoch": 32.37049180327869, "grad_norm": 6.463797092437744, "learning_rate": 1.5806753394469353e-05, "loss": 1.0905, "step": 9873 }, { "epoch": 32.37377049180328, "grad_norm": 6.306790828704834, "learning_rate": 1.580588884214001e-05, "loss": 0.9945, "step": 9874 }, { "epoch": 32.377049180327866, "grad_norm": 8.321728706359863, "learning_rate": 1.5805024224343233e-05, "loss": 0.9583, "step": 9875 }, { "epoch": 32.38032786885246, "grad_norm": 7.0049848556518555, "learning_rate": 1.5804159541088768e-05, "loss": 0.8313, "step": 9876 }, { "epoch": 32.38360655737705, "grad_norm": 10.233567237854004, "learning_rate": 1.5803294792386375e-05, "loss": 1.1058, "step": 9877 }, { "epoch": 32.38688524590164, "grad_norm": 8.018057823181152, "learning_rate": 1.5802429978245797e-05, "loss": 0.845, "step": 9878 }, { "epoch": 32.390163934426226, "grad_norm": 7.1554670333862305, "learning_rate": 1.5801565098676786e-05, "loss": 0.7251, "step": 9879 }, { "epoch": 32.39344262295082, "grad_norm": 6.991986274719238, "learning_rate": 1.58007001536891e-05, "loss": 0.9149, "step": 9880 }, { "epoch": 32.39672131147541, "grad_norm": 7.226223468780518, "learning_rate": 1.5799835143292486e-05, "loss": 0.8452, "step": 9881 }, { "epoch": 32.4, "grad_norm": 8.660140991210938, "learning_rate": 1.57989700674967e-05, "loss": 0.8967, "step": 9882 }, { "epoch": 32.40327868852459, "grad_norm": 9.939908027648926, "learning_rate": 1.57981049263115e-05, "loss": 0.9815, "step": 9883 }, { "epoch": 32.40655737704918, "grad_norm": 7.181910037994385, "learning_rate": 1.5797239719746635e-05, "loss": 0.9092, "step": 9884 }, { "epoch": 32.40983606557377, "grad_norm": 6.227235317230225, "learning_rate": 1.5796374447811868e-05, "loss": 0.8983, "step": 9885 }, { "epoch": 32.41311475409836, "grad_norm": 7.907435894012451, "learning_rate": 1.579550911051695e-05, "loss": 0.8956, "step": 9886 }, { "epoch": 32.41639344262295, "grad_norm": 7.849569320678711, "learning_rate": 1.5794643707871638e-05, "loss": 0.962, "step": 9887 }, { "epoch": 32.41967213114754, "grad_norm": 7.959455966949463, "learning_rate": 1.5793778239885698e-05, "loss": 0.7787, "step": 9888 }, { "epoch": 32.42295081967213, "grad_norm": 9.485424041748047, "learning_rate": 1.5792912706568883e-05, "loss": 0.9863, "step": 9889 }, { "epoch": 32.42622950819672, "grad_norm": 7.898427486419678, "learning_rate": 1.5792047107930953e-05, "loss": 1.0584, "step": 9890 }, { "epoch": 32.429508196721315, "grad_norm": 6.979424476623535, "learning_rate": 1.579118144398167e-05, "loss": 0.8671, "step": 9891 }, { "epoch": 32.4327868852459, "grad_norm": 8.468363761901855, "learning_rate": 1.5790315714730797e-05, "loss": 0.9835, "step": 9892 }, { "epoch": 32.43606557377049, "grad_norm": 7.871659755706787, "learning_rate": 1.5789449920188092e-05, "loss": 0.8502, "step": 9893 }, { "epoch": 32.43934426229508, "grad_norm": 8.155060768127441, "learning_rate": 1.578858406036332e-05, "loss": 0.7357, "step": 9894 }, { "epoch": 32.442622950819676, "grad_norm": 6.893415451049805, "learning_rate": 1.5787718135266246e-05, "loss": 1.1237, "step": 9895 }, { "epoch": 32.445901639344264, "grad_norm": 6.641611099243164, "learning_rate": 1.5786852144906634e-05, "loss": 1.1471, "step": 9896 }, { "epoch": 32.44918032786885, "grad_norm": 9.90955638885498, "learning_rate": 1.578598608929424e-05, "loss": 1.1953, "step": 9897 }, { "epoch": 32.45245901639344, "grad_norm": 7.513571739196777, "learning_rate": 1.578511996843884e-05, "loss": 1.1877, "step": 9898 }, { "epoch": 32.455737704918036, "grad_norm": 7.326745510101318, "learning_rate": 1.57842537823502e-05, "loss": 1.1234, "step": 9899 }, { "epoch": 32.459016393442624, "grad_norm": 6.901158809661865, "learning_rate": 1.578338753103808e-05, "loss": 0.889, "step": 9900 }, { "epoch": 32.46229508196721, "grad_norm": 7.38422155380249, "learning_rate": 1.5782521214512257e-05, "loss": 1.0716, "step": 9901 }, { "epoch": 32.4655737704918, "grad_norm": 6.905821800231934, "learning_rate": 1.5781654832782495e-05, "loss": 0.9711, "step": 9902 }, { "epoch": 32.4688524590164, "grad_norm": 8.735160827636719, "learning_rate": 1.578078838585856e-05, "loss": 0.7242, "step": 9903 }, { "epoch": 32.472131147540985, "grad_norm": 5.936222076416016, "learning_rate": 1.5779921873750225e-05, "loss": 1.0197, "step": 9904 }, { "epoch": 32.47540983606557, "grad_norm": 6.9447126388549805, "learning_rate": 1.5779055296467264e-05, "loss": 0.9003, "step": 9905 }, { "epoch": 32.47868852459016, "grad_norm": 6.809065341949463, "learning_rate": 1.5778188654019446e-05, "loss": 1.0403, "step": 9906 }, { "epoch": 32.48196721311476, "grad_norm": 7.018265247344971, "learning_rate": 1.5777321946416542e-05, "loss": 0.936, "step": 9907 }, { "epoch": 32.485245901639345, "grad_norm": 7.722720146179199, "learning_rate": 1.5776455173668325e-05, "loss": 0.9663, "step": 9908 }, { "epoch": 32.488524590163934, "grad_norm": 8.491552352905273, "learning_rate": 1.5775588335784574e-05, "loss": 1.085, "step": 9909 }, { "epoch": 32.49180327868852, "grad_norm": 7.365603923797607, "learning_rate": 1.5774721432775053e-05, "loss": 0.9349, "step": 9910 }, { "epoch": 32.49508196721312, "grad_norm": 7.9773478507995605, "learning_rate": 1.5773854464649548e-05, "loss": 0.8589, "step": 9911 }, { "epoch": 32.498360655737706, "grad_norm": 6.633171558380127, "learning_rate": 1.577298743141783e-05, "loss": 0.983, "step": 9912 }, { "epoch": 32.501639344262294, "grad_norm": 7.274122714996338, "learning_rate": 1.5772120333089675e-05, "loss": 0.8574, "step": 9913 }, { "epoch": 32.50491803278688, "grad_norm": 7.504805088043213, "learning_rate": 1.5771253169674862e-05, "loss": 0.8472, "step": 9914 }, { "epoch": 32.50819672131148, "grad_norm": 6.60406494140625, "learning_rate": 1.577038594118317e-05, "loss": 0.957, "step": 9915 }, { "epoch": 32.511475409836066, "grad_norm": 9.649795532226562, "learning_rate": 1.5769518647624378e-05, "loss": 0.7635, "step": 9916 }, { "epoch": 32.514754098360655, "grad_norm": 6.6359477043151855, "learning_rate": 1.5768651289008265e-05, "loss": 0.8612, "step": 9917 }, { "epoch": 32.51803278688524, "grad_norm": 7.399683952331543, "learning_rate": 1.5767783865344605e-05, "loss": 1.0548, "step": 9918 }, { "epoch": 32.52131147540984, "grad_norm": 6.18835973739624, "learning_rate": 1.5766916376643192e-05, "loss": 0.9775, "step": 9919 }, { "epoch": 32.52459016393443, "grad_norm": 7.251792907714844, "learning_rate": 1.5766048822913795e-05, "loss": 0.8025, "step": 9920 }, { "epoch": 32.527868852459015, "grad_norm": 7.264168739318848, "learning_rate": 1.5765181204166203e-05, "loss": 0.9081, "step": 9921 }, { "epoch": 32.5311475409836, "grad_norm": 7.121539115905762, "learning_rate": 1.5764313520410205e-05, "loss": 0.8233, "step": 9922 }, { "epoch": 32.5344262295082, "grad_norm": 6.676858901977539, "learning_rate": 1.576344577165557e-05, "loss": 1.1112, "step": 9923 }, { "epoch": 32.53770491803279, "grad_norm": 6.337843894958496, "learning_rate": 1.57625779579121e-05, "loss": 1.0139, "step": 9924 }, { "epoch": 32.540983606557376, "grad_norm": 8.915095329284668, "learning_rate": 1.5761710079189563e-05, "loss": 0.9958, "step": 9925 }, { "epoch": 32.544262295081964, "grad_norm": 6.753687381744385, "learning_rate": 1.576084213549776e-05, "loss": 0.8329, "step": 9926 }, { "epoch": 32.54754098360656, "grad_norm": 7.113697528839111, "learning_rate": 1.575997412684647e-05, "loss": 0.7242, "step": 9927 }, { "epoch": 32.55081967213115, "grad_norm": 7.554469585418701, "learning_rate": 1.5759106053245483e-05, "loss": 0.8005, "step": 9928 }, { "epoch": 32.554098360655736, "grad_norm": 7.470159530639648, "learning_rate": 1.5758237914704587e-05, "loss": 0.7303, "step": 9929 }, { "epoch": 32.557377049180324, "grad_norm": 6.424424648284912, "learning_rate": 1.5757369711233574e-05, "loss": 0.8778, "step": 9930 }, { "epoch": 32.56065573770492, "grad_norm": 7.958928108215332, "learning_rate": 1.575650144284223e-05, "loss": 1.1169, "step": 9931 }, { "epoch": 32.56393442622951, "grad_norm": 7.050161361694336, "learning_rate": 1.575563310954035e-05, "loss": 1.1965, "step": 9932 }, { "epoch": 32.5672131147541, "grad_norm": 9.651663780212402, "learning_rate": 1.575476471133772e-05, "loss": 0.9648, "step": 9933 }, { "epoch": 32.570491803278685, "grad_norm": 7.225399494171143, "learning_rate": 1.575389624824413e-05, "loss": 0.8763, "step": 9934 }, { "epoch": 32.57377049180328, "grad_norm": 7.206669330596924, "learning_rate": 1.575302772026938e-05, "loss": 1.1918, "step": 9935 }, { "epoch": 32.57704918032787, "grad_norm": 6.234837055206299, "learning_rate": 1.5752159127423262e-05, "loss": 0.7581, "step": 9936 }, { "epoch": 32.58032786885246, "grad_norm": 5.53239107131958, "learning_rate": 1.575129046971557e-05, "loss": 0.997, "step": 9937 }, { "epoch": 32.58360655737705, "grad_norm": 6.188277244567871, "learning_rate": 1.5750421747156096e-05, "loss": 0.781, "step": 9938 }, { "epoch": 32.58688524590164, "grad_norm": 8.878988265991211, "learning_rate": 1.574955295975464e-05, "loss": 0.9009, "step": 9939 }, { "epoch": 32.59016393442623, "grad_norm": 11.277852058410645, "learning_rate": 1.5748684107520994e-05, "loss": 0.7746, "step": 9940 }, { "epoch": 32.59344262295082, "grad_norm": 7.368190765380859, "learning_rate": 1.5747815190464956e-05, "loss": 0.9158, "step": 9941 }, { "epoch": 32.59672131147541, "grad_norm": 7.439161777496338, "learning_rate": 1.5746946208596326e-05, "loss": 0.9097, "step": 9942 }, { "epoch": 32.6, "grad_norm": 9.37320613861084, "learning_rate": 1.5746077161924905e-05, "loss": 0.8601, "step": 9943 }, { "epoch": 32.60327868852459, "grad_norm": 6.9832444190979, "learning_rate": 1.5745208050460492e-05, "loss": 0.6924, "step": 9944 }, { "epoch": 32.60655737704918, "grad_norm": 7.313137531280518, "learning_rate": 1.574433887421288e-05, "loss": 0.8949, "step": 9945 }, { "epoch": 32.609836065573774, "grad_norm": 6.544278144836426, "learning_rate": 1.5743469633191878e-05, "loss": 0.9368, "step": 9946 }, { "epoch": 32.61311475409836, "grad_norm": 6.877516269683838, "learning_rate": 1.574260032740728e-05, "loss": 0.8516, "step": 9947 }, { "epoch": 32.61639344262295, "grad_norm": 8.323189735412598, "learning_rate": 1.5741730956868896e-05, "loss": 0.8086, "step": 9948 }, { "epoch": 32.61967213114754, "grad_norm": 6.9262189865112305, "learning_rate": 1.5740861521586525e-05, "loss": 0.9802, "step": 9949 }, { "epoch": 32.622950819672134, "grad_norm": 8.45343017578125, "learning_rate": 1.5739992021569968e-05, "loss": 0.9626, "step": 9950 }, { "epoch": 32.62622950819672, "grad_norm": 13.748355865478516, "learning_rate": 1.5739122456829036e-05, "loss": 1.0497, "step": 9951 }, { "epoch": 32.62950819672131, "grad_norm": 7.6884331703186035, "learning_rate": 1.573825282737353e-05, "loss": 0.6115, "step": 9952 }, { "epoch": 32.6327868852459, "grad_norm": 7.153400421142578, "learning_rate": 1.573738313321326e-05, "loss": 0.9598, "step": 9953 }, { "epoch": 32.636065573770495, "grad_norm": 7.760299205780029, "learning_rate": 1.5736513374358025e-05, "loss": 0.7054, "step": 9954 }, { "epoch": 32.63934426229508, "grad_norm": 6.537341117858887, "learning_rate": 1.573564355081764e-05, "loss": 0.9836, "step": 9955 }, { "epoch": 32.64262295081967, "grad_norm": 9.327704429626465, "learning_rate": 1.573477366260191e-05, "loss": 0.9589, "step": 9956 }, { "epoch": 32.64590163934426, "grad_norm": 8.180306434631348, "learning_rate": 1.5733903709720646e-05, "loss": 1.0181, "step": 9957 }, { "epoch": 32.649180327868855, "grad_norm": 7.540436267852783, "learning_rate": 1.5733033692183656e-05, "loss": 0.8788, "step": 9958 }, { "epoch": 32.65245901639344, "grad_norm": 9.048688888549805, "learning_rate": 1.5732163610000745e-05, "loss": 0.9065, "step": 9959 }, { "epoch": 32.65573770491803, "grad_norm": 7.366155624389648, "learning_rate": 1.5731293463181736e-05, "loss": 0.8077, "step": 9960 }, { "epoch": 32.65901639344262, "grad_norm": 7.044800281524658, "learning_rate": 1.5730423251736427e-05, "loss": 0.9546, "step": 9961 }, { "epoch": 32.662295081967216, "grad_norm": 8.124261856079102, "learning_rate": 1.5729552975674644e-05, "loss": 0.7046, "step": 9962 }, { "epoch": 32.665573770491804, "grad_norm": 10.247136116027832, "learning_rate": 1.572868263500619e-05, "loss": 1.1045, "step": 9963 }, { "epoch": 32.66885245901639, "grad_norm": 7.696816921234131, "learning_rate": 1.5727812229740887e-05, "loss": 0.8932, "step": 9964 }, { "epoch": 32.67213114754098, "grad_norm": 16.10388946533203, "learning_rate": 1.572694175988854e-05, "loss": 0.8404, "step": 9965 }, { "epoch": 32.675409836065576, "grad_norm": 7.611879348754883, "learning_rate": 1.5726071225458977e-05, "loss": 0.7285, "step": 9966 }, { "epoch": 32.678688524590164, "grad_norm": 7.121585369110107, "learning_rate": 1.5725200626462e-05, "loss": 0.9458, "step": 9967 }, { "epoch": 32.68196721311475, "grad_norm": 11.81368637084961, "learning_rate": 1.5724329962907438e-05, "loss": 0.9121, "step": 9968 }, { "epoch": 32.68524590163934, "grad_norm": 7.922931671142578, "learning_rate": 1.5723459234805103e-05, "loss": 1.0607, "step": 9969 }, { "epoch": 32.68852459016394, "grad_norm": 7.162629127502441, "learning_rate": 1.5722588442164813e-05, "loss": 0.7566, "step": 9970 }, { "epoch": 32.691803278688525, "grad_norm": 6.929457664489746, "learning_rate": 1.5721717584996392e-05, "loss": 0.7988, "step": 9971 }, { "epoch": 32.69508196721311, "grad_norm": 8.036656379699707, "learning_rate": 1.5720846663309654e-05, "loss": 0.9442, "step": 9972 }, { "epoch": 32.6983606557377, "grad_norm": 7.3878021240234375, "learning_rate": 1.571997567711442e-05, "loss": 0.7493, "step": 9973 }, { "epoch": 32.7016393442623, "grad_norm": 6.665067672729492, "learning_rate": 1.5719104626420513e-05, "loss": 0.9471, "step": 9974 }, { "epoch": 32.704918032786885, "grad_norm": 6.190530776977539, "learning_rate": 1.571823351123776e-05, "loss": 1.1772, "step": 9975 }, { "epoch": 32.708196721311474, "grad_norm": 6.256853103637695, "learning_rate": 1.5717362331575973e-05, "loss": 0.9647, "step": 9976 }, { "epoch": 32.71147540983607, "grad_norm": 7.94619083404541, "learning_rate": 1.571649108744498e-05, "loss": 0.808, "step": 9977 }, { "epoch": 32.71475409836066, "grad_norm": 7.984313011169434, "learning_rate": 1.5715619778854613e-05, "loss": 0.884, "step": 9978 }, { "epoch": 32.718032786885246, "grad_norm": 6.9203200340271, "learning_rate": 1.5714748405814683e-05, "loss": 0.9049, "step": 9979 }, { "epoch": 32.721311475409834, "grad_norm": 15.06104850769043, "learning_rate": 1.5713876968335028e-05, "loss": 0.9389, "step": 9980 }, { "epoch": 32.72459016393443, "grad_norm": 7.094200134277344, "learning_rate": 1.5713005466425466e-05, "loss": 1.0405, "step": 9981 }, { "epoch": 32.72786885245902, "grad_norm": 10.542082786560059, "learning_rate": 1.5712133900095826e-05, "loss": 0.928, "step": 9982 }, { "epoch": 32.731147540983606, "grad_norm": 6.851940631866455, "learning_rate": 1.5711262269355944e-05, "loss": 0.7397, "step": 9983 }, { "epoch": 32.734426229508195, "grad_norm": 6.878911018371582, "learning_rate": 1.571039057421564e-05, "loss": 0.8932, "step": 9984 }, { "epoch": 32.73770491803279, "grad_norm": 6.6156768798828125, "learning_rate": 1.5709518814684737e-05, "loss": 0.9979, "step": 9985 }, { "epoch": 32.74098360655738, "grad_norm": 8.048110961914062, "learning_rate": 1.5708646990773083e-05, "loss": 0.8002, "step": 9986 }, { "epoch": 32.74426229508197, "grad_norm": 5.990955352783203, "learning_rate": 1.5707775102490493e-05, "loss": 1.057, "step": 9987 }, { "epoch": 32.747540983606555, "grad_norm": 8.611970901489258, "learning_rate": 1.5706903149846805e-05, "loss": 1.0085, "step": 9988 }, { "epoch": 32.75081967213115, "grad_norm": 6.116438388824463, "learning_rate": 1.5706031132851852e-05, "loss": 1.0588, "step": 9989 }, { "epoch": 32.75409836065574, "grad_norm": 9.015082359313965, "learning_rate": 1.5705159051515464e-05, "loss": 1.0017, "step": 9990 }, { "epoch": 32.75737704918033, "grad_norm": 7.415038108825684, "learning_rate": 1.5704286905847476e-05, "loss": 1.0598, "step": 9991 }, { "epoch": 32.760655737704916, "grad_norm": 7.869776248931885, "learning_rate": 1.5703414695857723e-05, "loss": 1.0539, "step": 9992 }, { "epoch": 32.76393442622951, "grad_norm": 7.776920318603516, "learning_rate": 1.5702542421556035e-05, "loss": 0.963, "step": 9993 }, { "epoch": 32.7672131147541, "grad_norm": 9.25119686126709, "learning_rate": 1.5701670082952258e-05, "loss": 1.0428, "step": 9994 }, { "epoch": 32.77049180327869, "grad_norm": 9.796049118041992, "learning_rate": 1.5700797680056216e-05, "loss": 0.7551, "step": 9995 }, { "epoch": 32.773770491803276, "grad_norm": 8.85645580291748, "learning_rate": 1.5699925212877757e-05, "loss": 0.7066, "step": 9996 }, { "epoch": 32.77704918032787, "grad_norm": 8.180240631103516, "learning_rate": 1.5699052681426716e-05, "loss": 0.9275, "step": 9997 }, { "epoch": 32.78032786885246, "grad_norm": 10.955709457397461, "learning_rate": 1.5698180085712928e-05, "loss": 1.3344, "step": 9998 }, { "epoch": 32.78360655737705, "grad_norm": 7.493354320526123, "learning_rate": 1.5697307425746236e-05, "loss": 0.8816, "step": 9999 }, { "epoch": 32.78688524590164, "grad_norm": 7.173989295959473, "learning_rate": 1.569643470153648e-05, "loss": 0.8115, "step": 10000 }, { "epoch": 32.79016393442623, "grad_norm": 6.611912727355957, "learning_rate": 1.5695561913093497e-05, "loss": 0.9689, "step": 10001 }, { "epoch": 32.79344262295082, "grad_norm": 7.569729328155518, "learning_rate": 1.5694689060427135e-05, "loss": 1.1517, "step": 10002 }, { "epoch": 32.79672131147541, "grad_norm": 10.833666801452637, "learning_rate": 1.5693816143547232e-05, "loss": 0.9189, "step": 10003 }, { "epoch": 32.8, "grad_norm": 7.887824058532715, "learning_rate": 1.5692943162463628e-05, "loss": 1.0393, "step": 10004 }, { "epoch": 32.80327868852459, "grad_norm": 7.232028961181641, "learning_rate": 1.5692070117186174e-05, "loss": 0.8937, "step": 10005 }, { "epoch": 32.80655737704918, "grad_norm": 6.446921348571777, "learning_rate": 1.569119700772471e-05, "loss": 1.0612, "step": 10006 }, { "epoch": 32.80983606557377, "grad_norm": 12.427862167358398, "learning_rate": 1.5690323834089085e-05, "loss": 1.2648, "step": 10007 }, { "epoch": 32.81311475409836, "grad_norm": 6.650768756866455, "learning_rate": 1.568945059628914e-05, "loss": 0.9816, "step": 10008 }, { "epoch": 32.81639344262295, "grad_norm": 8.07723617553711, "learning_rate": 1.5688577294334725e-05, "loss": 0.8512, "step": 10009 }, { "epoch": 32.81967213114754, "grad_norm": 11.286343574523926, "learning_rate": 1.5687703928235686e-05, "loss": 0.8655, "step": 10010 }, { "epoch": 32.82295081967213, "grad_norm": 9.060009956359863, "learning_rate": 1.5686830498001873e-05, "loss": 0.9885, "step": 10011 }, { "epoch": 32.82622950819672, "grad_norm": 7.973455429077148, "learning_rate": 1.568595700364313e-05, "loss": 0.893, "step": 10012 }, { "epoch": 32.829508196721314, "grad_norm": 7.563333988189697, "learning_rate": 1.5685083445169313e-05, "loss": 0.8766, "step": 10013 }, { "epoch": 32.8327868852459, "grad_norm": 7.253077507019043, "learning_rate": 1.568420982259027e-05, "loss": 0.7451, "step": 10014 }, { "epoch": 32.83606557377049, "grad_norm": 5.765043258666992, "learning_rate": 1.5683336135915843e-05, "loss": 0.9818, "step": 10015 }, { "epoch": 32.83934426229508, "grad_norm": 9.003902435302734, "learning_rate": 1.56824623851559e-05, "loss": 0.9175, "step": 10016 }, { "epoch": 32.842622950819674, "grad_norm": 7.108349323272705, "learning_rate": 1.5681588570320283e-05, "loss": 1.1646, "step": 10017 }, { "epoch": 32.84590163934426, "grad_norm": 13.529945373535156, "learning_rate": 1.5680714691418848e-05, "loss": 0.8817, "step": 10018 }, { "epoch": 32.84918032786885, "grad_norm": 8.52689266204834, "learning_rate": 1.5679840748461444e-05, "loss": 0.9421, "step": 10019 }, { "epoch": 32.85245901639344, "grad_norm": 7.410256862640381, "learning_rate": 1.5678966741457938e-05, "loss": 0.785, "step": 10020 }, { "epoch": 32.855737704918035, "grad_norm": 8.298973083496094, "learning_rate": 1.567809267041817e-05, "loss": 0.7428, "step": 10021 }, { "epoch": 32.85901639344262, "grad_norm": 7.377444744110107, "learning_rate": 1.567721853535201e-05, "loss": 0.8823, "step": 10022 }, { "epoch": 32.86229508196721, "grad_norm": 8.07552719116211, "learning_rate": 1.5676344336269303e-05, "loss": 0.9647, "step": 10023 }, { "epoch": 32.86557377049181, "grad_norm": 7.907415866851807, "learning_rate": 1.5675470073179913e-05, "loss": 1.0553, "step": 10024 }, { "epoch": 32.868852459016395, "grad_norm": 7.268730640411377, "learning_rate": 1.5674595746093698e-05, "loss": 1.0608, "step": 10025 }, { "epoch": 32.87213114754098, "grad_norm": 7.053991317749023, "learning_rate": 1.5673721355020517e-05, "loss": 0.84, "step": 10026 }, { "epoch": 32.87540983606557, "grad_norm": 8.0274658203125, "learning_rate": 1.5672846899970226e-05, "loss": 0.7983, "step": 10027 }, { "epoch": 32.87868852459017, "grad_norm": 6.213109016418457, "learning_rate": 1.567197238095269e-05, "loss": 1.1449, "step": 10028 }, { "epoch": 32.881967213114756, "grad_norm": 6.032423973083496, "learning_rate": 1.5671097797977764e-05, "loss": 0.7724, "step": 10029 }, { "epoch": 32.885245901639344, "grad_norm": 7.85275411605835, "learning_rate": 1.5670223151055316e-05, "loss": 0.9878, "step": 10030 }, { "epoch": 32.88852459016393, "grad_norm": 7.0569000244140625, "learning_rate": 1.566934844019521e-05, "loss": 0.8908, "step": 10031 }, { "epoch": 32.89180327868853, "grad_norm": 6.891747951507568, "learning_rate": 1.56684736654073e-05, "loss": 0.8575, "step": 10032 }, { "epoch": 32.895081967213116, "grad_norm": 7.659645080566406, "learning_rate": 1.5667598826701463e-05, "loss": 1.1155, "step": 10033 }, { "epoch": 32.898360655737704, "grad_norm": 24.815977096557617, "learning_rate": 1.566672392408755e-05, "loss": 0.7298, "step": 10034 }, { "epoch": 32.90163934426229, "grad_norm": 9.251503944396973, "learning_rate": 1.5665848957575436e-05, "loss": 0.9248, "step": 10035 }, { "epoch": 32.90491803278689, "grad_norm": 9.451050758361816, "learning_rate": 1.5664973927174983e-05, "loss": 1.0293, "step": 10036 }, { "epoch": 32.90819672131148, "grad_norm": 7.9785895347595215, "learning_rate": 1.5664098832896058e-05, "loss": 1.2267, "step": 10037 }, { "epoch": 32.911475409836065, "grad_norm": 7.286596298217773, "learning_rate": 1.566322367474853e-05, "loss": 0.8033, "step": 10038 }, { "epoch": 32.91475409836065, "grad_norm": 7.983547687530518, "learning_rate": 1.5662348452742267e-05, "loss": 0.7481, "step": 10039 }, { "epoch": 32.91803278688525, "grad_norm": 7.562473297119141, "learning_rate": 1.566147316688714e-05, "loss": 1.0748, "step": 10040 }, { "epoch": 32.92131147540984, "grad_norm": 6.530319690704346, "learning_rate": 1.5660597817193012e-05, "loss": 0.8401, "step": 10041 }, { "epoch": 32.924590163934425, "grad_norm": 8.339035034179688, "learning_rate": 1.5659722403669762e-05, "loss": 0.9805, "step": 10042 }, { "epoch": 32.927868852459014, "grad_norm": 7.800432205200195, "learning_rate": 1.5658846926327255e-05, "loss": 0.9389, "step": 10043 }, { "epoch": 32.93114754098361, "grad_norm": 8.827797889709473, "learning_rate": 1.5657971385175367e-05, "loss": 1.0065, "step": 10044 }, { "epoch": 32.9344262295082, "grad_norm": 7.894272804260254, "learning_rate": 1.5657095780223965e-05, "loss": 0.8284, "step": 10045 }, { "epoch": 32.937704918032786, "grad_norm": 6.161922454833984, "learning_rate": 1.5656220111482928e-05, "loss": 0.9531, "step": 10046 }, { "epoch": 32.940983606557374, "grad_norm": 7.7575178146362305, "learning_rate": 1.5655344378962133e-05, "loss": 0.9993, "step": 10047 }, { "epoch": 32.94426229508197, "grad_norm": 8.19900131225586, "learning_rate": 1.5654468582671443e-05, "loss": 0.863, "step": 10048 }, { "epoch": 32.94754098360656, "grad_norm": 10.635334014892578, "learning_rate": 1.565359272262074e-05, "loss": 0.8898, "step": 10049 }, { "epoch": 32.950819672131146, "grad_norm": 8.374717712402344, "learning_rate": 1.565271679881991e-05, "loss": 0.9965, "step": 10050 }, { "epoch": 32.954098360655735, "grad_norm": 13.668925285339355, "learning_rate": 1.5651840811278813e-05, "loss": 0.8107, "step": 10051 }, { "epoch": 32.95737704918033, "grad_norm": 10.99195384979248, "learning_rate": 1.5650964760007337e-05, "loss": 1.1694, "step": 10052 }, { "epoch": 32.96065573770492, "grad_norm": 9.472114562988281, "learning_rate": 1.5650088645015357e-05, "loss": 1.0297, "step": 10053 }, { "epoch": 32.96393442622951, "grad_norm": 7.510865211486816, "learning_rate": 1.5649212466312754e-05, "loss": 0.8418, "step": 10054 }, { "epoch": 32.967213114754095, "grad_norm": 9.859060287475586, "learning_rate": 1.5648336223909405e-05, "loss": 0.7745, "step": 10055 }, { "epoch": 32.97049180327869, "grad_norm": 9.353982925415039, "learning_rate": 1.5647459917815195e-05, "loss": 0.7393, "step": 10056 }, { "epoch": 32.97377049180328, "grad_norm": 8.215753555297852, "learning_rate": 1.564658354804e-05, "loss": 1.0508, "step": 10057 }, { "epoch": 32.97704918032787, "grad_norm": 8.028800964355469, "learning_rate": 1.5645707114593706e-05, "loss": 0.7496, "step": 10058 }, { "epoch": 32.980327868852456, "grad_norm": 9.631141662597656, "learning_rate": 1.5644830617486194e-05, "loss": 0.9525, "step": 10059 }, { "epoch": 32.98360655737705, "grad_norm": 8.77086353302002, "learning_rate": 1.5643954056727347e-05, "loss": 0.7443, "step": 10060 }, { "epoch": 32.98688524590164, "grad_norm": 7.749711036682129, "learning_rate": 1.5643077432327046e-05, "loss": 0.9, "step": 10061 }, { "epoch": 32.99016393442623, "grad_norm": 7.622419357299805, "learning_rate": 1.5642200744295187e-05, "loss": 0.8806, "step": 10062 }, { "epoch": 32.993442622950816, "grad_norm": 7.271721839904785, "learning_rate": 1.5641323992641643e-05, "loss": 0.9312, "step": 10063 }, { "epoch": 32.99672131147541, "grad_norm": 7.763856887817383, "learning_rate": 1.5640447177376308e-05, "loss": 1.1261, "step": 10064 }, { "epoch": 33.0, "grad_norm": 7.81259298324585, "learning_rate": 1.5639570298509067e-05, "loss": 0.8378, "step": 10065 }, { "epoch": 33.00327868852459, "grad_norm": 11.087801933288574, "learning_rate": 1.563869335604981e-05, "loss": 0.9744, "step": 10066 }, { "epoch": 33.006557377049184, "grad_norm": 13.218674659729004, "learning_rate": 1.5637816350008415e-05, "loss": 0.9417, "step": 10067 }, { "epoch": 33.00983606557377, "grad_norm": 7.059430122375488, "learning_rate": 1.563693928039478e-05, "loss": 1.0192, "step": 10068 }, { "epoch": 33.01311475409836, "grad_norm": 8.158641815185547, "learning_rate": 1.5636062147218796e-05, "loss": 0.9938, "step": 10069 }, { "epoch": 33.01639344262295, "grad_norm": 7.928633213043213, "learning_rate": 1.5635184950490353e-05, "loss": 0.8455, "step": 10070 }, { "epoch": 33.019672131147544, "grad_norm": 6.658041477203369, "learning_rate": 1.563430769021934e-05, "loss": 1.0397, "step": 10071 }, { "epoch": 33.02295081967213, "grad_norm": 8.687451362609863, "learning_rate": 1.563343036641565e-05, "loss": 0.9879, "step": 10072 }, { "epoch": 33.02622950819672, "grad_norm": 9.320508003234863, "learning_rate": 1.563255297908917e-05, "loss": 0.9396, "step": 10073 }, { "epoch": 33.02950819672131, "grad_norm": 7.019472122192383, "learning_rate": 1.5631675528249804e-05, "loss": 0.7412, "step": 10074 }, { "epoch": 33.032786885245905, "grad_norm": 8.886676788330078, "learning_rate": 1.563079801390744e-05, "loss": 0.5795, "step": 10075 }, { "epoch": 33.03606557377049, "grad_norm": 8.136214256286621, "learning_rate": 1.5629920436071974e-05, "loss": 0.6756, "step": 10076 }, { "epoch": 33.03934426229508, "grad_norm": 12.708650588989258, "learning_rate": 1.56290427947533e-05, "loss": 0.8498, "step": 10077 }, { "epoch": 33.04262295081967, "grad_norm": 11.97519302368164, "learning_rate": 1.5628165089961314e-05, "loss": 0.9287, "step": 10078 }, { "epoch": 33.045901639344265, "grad_norm": 6.620937347412109, "learning_rate": 1.562728732170592e-05, "loss": 0.8572, "step": 10079 }, { "epoch": 33.049180327868854, "grad_norm": 7.782042503356934, "learning_rate": 1.5626409489997008e-05, "loss": 1.038, "step": 10080 }, { "epoch": 33.05245901639344, "grad_norm": 7.560794830322266, "learning_rate": 1.562553159484448e-05, "loss": 1.0759, "step": 10081 }, { "epoch": 33.05573770491803, "grad_norm": 8.9354829788208, "learning_rate": 1.562465363625823e-05, "loss": 1.0186, "step": 10082 }, { "epoch": 33.059016393442626, "grad_norm": 11.882966995239258, "learning_rate": 1.5623775614248167e-05, "loss": 1.0087, "step": 10083 }, { "epoch": 33.062295081967214, "grad_norm": 7.500890254974365, "learning_rate": 1.5622897528824185e-05, "loss": 1.0553, "step": 10084 }, { "epoch": 33.0655737704918, "grad_norm": 8.675004005432129, "learning_rate": 1.5622019379996184e-05, "loss": 0.9026, "step": 10085 }, { "epoch": 33.06885245901639, "grad_norm": 10.001450538635254, "learning_rate": 1.5621141167774073e-05, "loss": 0.8568, "step": 10086 }, { "epoch": 33.072131147540986, "grad_norm": 13.437783241271973, "learning_rate": 1.562026289216775e-05, "loss": 0.9741, "step": 10087 }, { "epoch": 33.075409836065575, "grad_norm": 13.577411651611328, "learning_rate": 1.561938455318712e-05, "loss": 1.0876, "step": 10088 }, { "epoch": 33.07868852459016, "grad_norm": 7.213393211364746, "learning_rate": 1.5618506150842083e-05, "loss": 0.9109, "step": 10089 }, { "epoch": 33.08196721311475, "grad_norm": 8.853141784667969, "learning_rate": 1.5617627685142554e-05, "loss": 0.9943, "step": 10090 }, { "epoch": 33.08524590163935, "grad_norm": 7.784962177276611, "learning_rate": 1.5616749156098424e-05, "loss": 0.8606, "step": 10091 }, { "epoch": 33.088524590163935, "grad_norm": 9.388197898864746, "learning_rate": 1.5615870563719612e-05, "loss": 0.9342, "step": 10092 }, { "epoch": 33.09180327868852, "grad_norm": 9.016175270080566, "learning_rate": 1.561499190801602e-05, "loss": 0.7089, "step": 10093 }, { "epoch": 33.09508196721311, "grad_norm": 8.322403907775879, "learning_rate": 1.5614113188997556e-05, "loss": 0.778, "step": 10094 }, { "epoch": 33.09836065573771, "grad_norm": 6.407953262329102, "learning_rate": 1.5613234406674126e-05, "loss": 0.7495, "step": 10095 }, { "epoch": 33.101639344262296, "grad_norm": 8.558979034423828, "learning_rate": 1.5612355561055644e-05, "loss": 0.9794, "step": 10096 }, { "epoch": 33.104918032786884, "grad_norm": 7.281223773956299, "learning_rate": 1.5611476652152017e-05, "loss": 1.0082, "step": 10097 }, { "epoch": 33.10819672131147, "grad_norm": 9.007986068725586, "learning_rate": 1.561059767997316e-05, "loss": 0.8458, "step": 10098 }, { "epoch": 33.11147540983607, "grad_norm": 7.221841812133789, "learning_rate": 1.5609718644528976e-05, "loss": 1.0942, "step": 10099 }, { "epoch": 33.114754098360656, "grad_norm": 7.183264255523682, "learning_rate": 1.5608839545829382e-05, "loss": 1.0486, "step": 10100 }, { "epoch": 33.118032786885244, "grad_norm": 7.780261516571045, "learning_rate": 1.5607960383884294e-05, "loss": 1.043, "step": 10101 }, { "epoch": 33.12131147540983, "grad_norm": 7.04877233505249, "learning_rate": 1.560708115870362e-05, "loss": 0.9103, "step": 10102 }, { "epoch": 33.12459016393443, "grad_norm": 10.481917381286621, "learning_rate": 1.5606201870297276e-05, "loss": 0.7, "step": 10103 }, { "epoch": 33.12786885245902, "grad_norm": 7.419874668121338, "learning_rate": 1.5605322518675175e-05, "loss": 0.9094, "step": 10104 }, { "epoch": 33.131147540983605, "grad_norm": 21.67628288269043, "learning_rate": 1.5604443103847236e-05, "loss": 0.8006, "step": 10105 }, { "epoch": 33.13442622950819, "grad_norm": 9.847732543945312, "learning_rate": 1.5603563625823374e-05, "loss": 0.6129, "step": 10106 }, { "epoch": 33.13770491803279, "grad_norm": 6.47713565826416, "learning_rate": 1.5602684084613504e-05, "loss": 0.78, "step": 10107 }, { "epoch": 33.14098360655738, "grad_norm": 7.174961566925049, "learning_rate": 1.5601804480227543e-05, "loss": 0.8127, "step": 10108 }, { "epoch": 33.144262295081965, "grad_norm": 7.8107075691223145, "learning_rate": 1.560092481267542e-05, "loss": 0.9595, "step": 10109 }, { "epoch": 33.14754098360656, "grad_norm": 7.024982929229736, "learning_rate": 1.5600045081967042e-05, "loss": 0.945, "step": 10110 }, { "epoch": 33.15081967213115, "grad_norm": 7.807246685028076, "learning_rate": 1.5599165288112333e-05, "loss": 0.9451, "step": 10111 }, { "epoch": 33.15409836065574, "grad_norm": 13.582152366638184, "learning_rate": 1.5598285431121215e-05, "loss": 0.9825, "step": 10112 }, { "epoch": 33.157377049180326, "grad_norm": 7.371908187866211, "learning_rate": 1.5597405511003608e-05, "loss": 0.8138, "step": 10113 }, { "epoch": 33.16065573770492, "grad_norm": 10.710716247558594, "learning_rate": 1.559652552776943e-05, "loss": 1.0414, "step": 10114 }, { "epoch": 33.16393442622951, "grad_norm": 7.986227989196777, "learning_rate": 1.5595645481428614e-05, "loss": 0.7598, "step": 10115 }, { "epoch": 33.1672131147541, "grad_norm": 7.977749347686768, "learning_rate": 1.5594765371991073e-05, "loss": 0.7836, "step": 10116 }, { "epoch": 33.170491803278686, "grad_norm": 7.243414402008057, "learning_rate": 1.5593885199466737e-05, "loss": 0.85, "step": 10117 }, { "epoch": 33.17377049180328, "grad_norm": 7.661960601806641, "learning_rate": 1.559300496386553e-05, "loss": 1.0649, "step": 10118 }, { "epoch": 33.17704918032787, "grad_norm": 9.121298789978027, "learning_rate": 1.5592124665197374e-05, "loss": 0.8568, "step": 10119 }, { "epoch": 33.18032786885246, "grad_norm": 9.48310375213623, "learning_rate": 1.55912443034722e-05, "loss": 0.9261, "step": 10120 }, { "epoch": 33.18360655737705, "grad_norm": 7.54934024810791, "learning_rate": 1.5590363878699932e-05, "loss": 0.8249, "step": 10121 }, { "epoch": 33.18688524590164, "grad_norm": 7.601181507110596, "learning_rate": 1.55894833908905e-05, "loss": 0.8436, "step": 10122 }, { "epoch": 33.19016393442623, "grad_norm": 8.461892127990723, "learning_rate": 1.558860284005383e-05, "loss": 0.7808, "step": 10123 }, { "epoch": 33.19344262295082, "grad_norm": 8.060990333557129, "learning_rate": 1.5587722226199854e-05, "loss": 0.8698, "step": 10124 }, { "epoch": 33.19672131147541, "grad_norm": 9.372102737426758, "learning_rate": 1.55868415493385e-05, "loss": 1.1082, "step": 10125 }, { "epoch": 33.2, "grad_norm": 7.065751075744629, "learning_rate": 1.5585960809479698e-05, "loss": 0.8661, "step": 10126 }, { "epoch": 33.20327868852459, "grad_norm": 8.748187065124512, "learning_rate": 1.558508000663338e-05, "loss": 1.0077, "step": 10127 }, { "epoch": 33.20655737704918, "grad_norm": 6.004514217376709, "learning_rate": 1.5584199140809476e-05, "loss": 0.8065, "step": 10128 }, { "epoch": 33.20983606557377, "grad_norm": 9.11937427520752, "learning_rate": 1.5583318212017923e-05, "loss": 1.0324, "step": 10129 }, { "epoch": 33.21311475409836, "grad_norm": 7.337594985961914, "learning_rate": 1.5582437220268648e-05, "loss": 0.7707, "step": 10130 }, { "epoch": 33.21639344262295, "grad_norm": 7.183630466461182, "learning_rate": 1.5581556165571593e-05, "loss": 0.8037, "step": 10131 }, { "epoch": 33.21967213114754, "grad_norm": 7.308620929718018, "learning_rate": 1.5580675047936688e-05, "loss": 0.6284, "step": 10132 }, { "epoch": 33.22295081967213, "grad_norm": 7.676589488983154, "learning_rate": 1.5579793867373868e-05, "loss": 0.7957, "step": 10133 }, { "epoch": 33.226229508196724, "grad_norm": 6.154712677001953, "learning_rate": 1.557891262389307e-05, "loss": 0.9388, "step": 10134 }, { "epoch": 33.22950819672131, "grad_norm": 7.856605529785156, "learning_rate": 1.557803131750424e-05, "loss": 1.1684, "step": 10135 }, { "epoch": 33.2327868852459, "grad_norm": 7.9467644691467285, "learning_rate": 1.55771499482173e-05, "loss": 0.9578, "step": 10136 }, { "epoch": 33.23606557377049, "grad_norm": 8.891617774963379, "learning_rate": 1.5576268516042193e-05, "loss": 0.9153, "step": 10137 }, { "epoch": 33.239344262295084, "grad_norm": 9.259994506835938, "learning_rate": 1.5575387020988864e-05, "loss": 0.7729, "step": 10138 }, { "epoch": 33.24262295081967, "grad_norm": 7.137905597686768, "learning_rate": 1.5574505463067252e-05, "loss": 0.9886, "step": 10139 }, { "epoch": 33.24590163934426, "grad_norm": 7.368449687957764, "learning_rate": 1.557362384228729e-05, "loss": 0.8201, "step": 10140 }, { "epoch": 33.24918032786885, "grad_norm": 8.184511184692383, "learning_rate": 1.5572742158658923e-05, "loss": 0.7947, "step": 10141 }, { "epoch": 33.252459016393445, "grad_norm": 10.244637489318848, "learning_rate": 1.55718604121921e-05, "loss": 0.9951, "step": 10142 }, { "epoch": 33.25573770491803, "grad_norm": 16.455078125, "learning_rate": 1.5570978602896754e-05, "loss": 0.7768, "step": 10143 }, { "epoch": 33.25901639344262, "grad_norm": 7.288594722747803, "learning_rate": 1.5570096730782833e-05, "loss": 0.9587, "step": 10144 }, { "epoch": 33.26229508196721, "grad_norm": 8.179366111755371, "learning_rate": 1.556921479586028e-05, "loss": 0.8127, "step": 10145 }, { "epoch": 33.265573770491805, "grad_norm": 13.606159210205078, "learning_rate": 1.556833279813904e-05, "loss": 0.7794, "step": 10146 }, { "epoch": 33.268852459016394, "grad_norm": 11.777856826782227, "learning_rate": 1.5567450737629057e-05, "loss": 0.9528, "step": 10147 }, { "epoch": 33.27213114754098, "grad_norm": 8.184149742126465, "learning_rate": 1.5566568614340278e-05, "loss": 0.8668, "step": 10148 }, { "epoch": 33.27540983606557, "grad_norm": 7.990325927734375, "learning_rate": 1.556568642828265e-05, "loss": 0.9272, "step": 10149 }, { "epoch": 33.278688524590166, "grad_norm": 7.071419715881348, "learning_rate": 1.5564804179466124e-05, "loss": 0.8361, "step": 10150 }, { "epoch": 33.281967213114754, "grad_norm": 7.446767807006836, "learning_rate": 1.556392186790064e-05, "loss": 1.0582, "step": 10151 }, { "epoch": 33.28524590163934, "grad_norm": 15.414690017700195, "learning_rate": 1.556303949359615e-05, "loss": 0.964, "step": 10152 }, { "epoch": 33.28852459016394, "grad_norm": 12.437590599060059, "learning_rate": 1.5562157056562614e-05, "loss": 1.0851, "step": 10153 }, { "epoch": 33.291803278688526, "grad_norm": 7.651742458343506, "learning_rate": 1.5561274556809968e-05, "loss": 0.8728, "step": 10154 }, { "epoch": 33.295081967213115, "grad_norm": 8.952751159667969, "learning_rate": 1.5560391994348172e-05, "loss": 0.9957, "step": 10155 }, { "epoch": 33.2983606557377, "grad_norm": 7.244298458099365, "learning_rate": 1.555950936918717e-05, "loss": 1.0129, "step": 10156 }, { "epoch": 33.3016393442623, "grad_norm": 10.345125198364258, "learning_rate": 1.5558626681336926e-05, "loss": 0.9922, "step": 10157 }, { "epoch": 33.30491803278689, "grad_norm": 6.302105903625488, "learning_rate": 1.555774393080738e-05, "loss": 1.1234, "step": 10158 }, { "epoch": 33.308196721311475, "grad_norm": 8.472949028015137, "learning_rate": 1.55568611176085e-05, "loss": 0.7295, "step": 10159 }, { "epoch": 33.31147540983606, "grad_norm": 8.897068977355957, "learning_rate": 1.5555978241750228e-05, "loss": 0.8781, "step": 10160 }, { "epoch": 33.31475409836066, "grad_norm": 7.51796817779541, "learning_rate": 1.5555095303242528e-05, "loss": 1.1403, "step": 10161 }, { "epoch": 33.31803278688525, "grad_norm": 13.39395523071289, "learning_rate": 1.555421230209535e-05, "loss": 1.0522, "step": 10162 }, { "epoch": 33.321311475409836, "grad_norm": 7.84005069732666, "learning_rate": 1.5553329238318654e-05, "loss": 0.8326, "step": 10163 }, { "epoch": 33.324590163934424, "grad_norm": 8.589150428771973, "learning_rate": 1.5552446111922396e-05, "loss": 0.6855, "step": 10164 }, { "epoch": 33.32786885245902, "grad_norm": 6.716124534606934, "learning_rate": 1.5551562922916537e-05, "loss": 0.9136, "step": 10165 }, { "epoch": 33.33114754098361, "grad_norm": 8.213988304138184, "learning_rate": 1.5550679671311032e-05, "loss": 0.7338, "step": 10166 }, { "epoch": 33.334426229508196, "grad_norm": 7.099685192108154, "learning_rate": 1.5549796357115844e-05, "loss": 0.9032, "step": 10167 }, { "epoch": 33.337704918032784, "grad_norm": 7.839723587036133, "learning_rate": 1.554891298034093e-05, "loss": 0.7379, "step": 10168 }, { "epoch": 33.34098360655738, "grad_norm": 8.832687377929688, "learning_rate": 1.5548029540996254e-05, "loss": 0.679, "step": 10169 }, { "epoch": 33.34426229508197, "grad_norm": 5.961512565612793, "learning_rate": 1.5547146039091775e-05, "loss": 0.9822, "step": 10170 }, { "epoch": 33.34754098360656, "grad_norm": 7.568035125732422, "learning_rate": 1.554626247463746e-05, "loss": 0.8154, "step": 10171 }, { "epoch": 33.350819672131145, "grad_norm": 9.506756782531738, "learning_rate": 1.5545378847643267e-05, "loss": 0.8766, "step": 10172 }, { "epoch": 33.35409836065574, "grad_norm": 8.08466911315918, "learning_rate": 1.554449515811916e-05, "loss": 0.8956, "step": 10173 }, { "epoch": 33.35737704918033, "grad_norm": 9.037723541259766, "learning_rate": 1.5543611406075108e-05, "loss": 0.7867, "step": 10174 }, { "epoch": 33.36065573770492, "grad_norm": 6.905208587646484, "learning_rate": 1.554272759152107e-05, "loss": 0.8258, "step": 10175 }, { "epoch": 33.363934426229505, "grad_norm": 8.676980018615723, "learning_rate": 1.5541843714467018e-05, "loss": 0.8961, "step": 10176 }, { "epoch": 33.3672131147541, "grad_norm": 10.28270435333252, "learning_rate": 1.5540959774922915e-05, "loss": 0.7988, "step": 10177 }, { "epoch": 33.37049180327869, "grad_norm": 8.144938468933105, "learning_rate": 1.5540075772898732e-05, "loss": 0.8973, "step": 10178 }, { "epoch": 33.37377049180328, "grad_norm": 6.9304962158203125, "learning_rate": 1.5539191708404432e-05, "loss": 0.779, "step": 10179 }, { "epoch": 33.377049180327866, "grad_norm": 9.173887252807617, "learning_rate": 1.5538307581449984e-05, "loss": 0.7644, "step": 10180 }, { "epoch": 33.38032786885246, "grad_norm": 8.262948036193848, "learning_rate": 1.5537423392045365e-05, "loss": 0.9075, "step": 10181 }, { "epoch": 33.38360655737705, "grad_norm": 9.081925392150879, "learning_rate": 1.5536539140200537e-05, "loss": 0.8458, "step": 10182 }, { "epoch": 33.38688524590164, "grad_norm": 9.019230842590332, "learning_rate": 1.553565482592547e-05, "loss": 0.7227, "step": 10183 }, { "epoch": 33.390163934426226, "grad_norm": 6.626956939697266, "learning_rate": 1.5534770449230145e-05, "loss": 0.7792, "step": 10184 }, { "epoch": 33.39344262295082, "grad_norm": 6.822695255279541, "learning_rate": 1.553388601012453e-05, "loss": 1.3192, "step": 10185 }, { "epoch": 33.39672131147541, "grad_norm": 10.524805068969727, "learning_rate": 1.553300150861859e-05, "loss": 1.0574, "step": 10186 }, { "epoch": 33.4, "grad_norm": 8.447511672973633, "learning_rate": 1.5532116944722308e-05, "loss": 0.9868, "step": 10187 }, { "epoch": 33.40327868852459, "grad_norm": 6.782492637634277, "learning_rate": 1.5531232318445654e-05, "loss": 0.9846, "step": 10188 }, { "epoch": 33.40655737704918, "grad_norm": 11.16547966003418, "learning_rate": 1.5530347629798606e-05, "loss": 0.9377, "step": 10189 }, { "epoch": 33.40983606557377, "grad_norm": 7.14781379699707, "learning_rate": 1.552946287879114e-05, "loss": 1.0791, "step": 10190 }, { "epoch": 33.41311475409836, "grad_norm": 9.958312034606934, "learning_rate": 1.552857806543323e-05, "loss": 0.7886, "step": 10191 }, { "epoch": 33.41639344262295, "grad_norm": 12.705774307250977, "learning_rate": 1.5527693189734853e-05, "loss": 0.9544, "step": 10192 }, { "epoch": 33.41967213114754, "grad_norm": 10.219173431396484, "learning_rate": 1.5526808251705988e-05, "loss": 0.9344, "step": 10193 }, { "epoch": 33.42295081967213, "grad_norm": 9.803584098815918, "learning_rate": 1.5525923251356613e-05, "loss": 0.8035, "step": 10194 }, { "epoch": 33.42622950819672, "grad_norm": 8.051301956176758, "learning_rate": 1.552503818869671e-05, "loss": 0.9243, "step": 10195 }, { "epoch": 33.429508196721315, "grad_norm": 6.671399116516113, "learning_rate": 1.5524153063736255e-05, "loss": 0.9589, "step": 10196 }, { "epoch": 33.4327868852459, "grad_norm": 6.648656845092773, "learning_rate": 1.552326787648523e-05, "loss": 0.7989, "step": 10197 }, { "epoch": 33.43606557377049, "grad_norm": 7.343740940093994, "learning_rate": 1.5522382626953618e-05, "loss": 0.9642, "step": 10198 }, { "epoch": 33.43934426229508, "grad_norm": 7.595348834991455, "learning_rate": 1.55214973151514e-05, "loss": 1.0936, "step": 10199 }, { "epoch": 33.442622950819676, "grad_norm": 6.616563320159912, "learning_rate": 1.5520611941088558e-05, "loss": 1.0419, "step": 10200 }, { "epoch": 33.445901639344264, "grad_norm": 12.254389762878418, "learning_rate": 1.5519726504775076e-05, "loss": 0.9172, "step": 10201 }, { "epoch": 33.44918032786885, "grad_norm": 9.354141235351562, "learning_rate": 1.5518841006220942e-05, "loss": 0.9941, "step": 10202 }, { "epoch": 33.45245901639344, "grad_norm": 8.850845336914062, "learning_rate": 1.5517955445436138e-05, "loss": 1.1437, "step": 10203 }, { "epoch": 33.455737704918036, "grad_norm": 7.4391560554504395, "learning_rate": 1.551706982243064e-05, "loss": 0.8941, "step": 10204 }, { "epoch": 33.459016393442624, "grad_norm": 6.125725269317627, "learning_rate": 1.5516184137214454e-05, "loss": 0.82, "step": 10205 }, { "epoch": 33.46229508196721, "grad_norm": 13.268325805664062, "learning_rate": 1.551529838979755e-05, "loss": 1.1081, "step": 10206 }, { "epoch": 33.4655737704918, "grad_norm": 10.612605094909668, "learning_rate": 1.5514412580189926e-05, "loss": 0.8885, "step": 10207 }, { "epoch": 33.4688524590164, "grad_norm": 7.062865734100342, "learning_rate": 1.5513526708401566e-05, "loss": 1.1199, "step": 10208 }, { "epoch": 33.472131147540985, "grad_norm": 7.197677135467529, "learning_rate": 1.5512640774442455e-05, "loss": 0.8953, "step": 10209 }, { "epoch": 33.47540983606557, "grad_norm": 13.90084171295166, "learning_rate": 1.551175477832259e-05, "loss": 1.0916, "step": 10210 }, { "epoch": 33.47868852459016, "grad_norm": 10.602289199829102, "learning_rate": 1.5510868720051965e-05, "loss": 0.866, "step": 10211 }, { "epoch": 33.48196721311476, "grad_norm": 8.409305572509766, "learning_rate": 1.5509982599640556e-05, "loss": 0.7191, "step": 10212 }, { "epoch": 33.485245901639345, "grad_norm": 5.956305027008057, "learning_rate": 1.5509096417098372e-05, "loss": 1.116, "step": 10213 }, { "epoch": 33.488524590163934, "grad_norm": 8.803665161132812, "learning_rate": 1.5508210172435392e-05, "loss": 0.7877, "step": 10214 }, { "epoch": 33.49180327868852, "grad_norm": 7.901933670043945, "learning_rate": 1.550732386566162e-05, "loss": 0.8208, "step": 10215 }, { "epoch": 33.49508196721312, "grad_norm": 8.047418594360352, "learning_rate": 1.5506437496787045e-05, "loss": 0.9289, "step": 10216 }, { "epoch": 33.498360655737706, "grad_norm": 9.23779582977295, "learning_rate": 1.550555106582166e-05, "loss": 0.6763, "step": 10217 }, { "epoch": 33.501639344262294, "grad_norm": 5.921931743621826, "learning_rate": 1.5504664572775462e-05, "loss": 0.9361, "step": 10218 }, { "epoch": 33.50491803278688, "grad_norm": 9.167901039123535, "learning_rate": 1.5503778017658447e-05, "loss": 0.9276, "step": 10219 }, { "epoch": 33.50819672131148, "grad_norm": 8.560855865478516, "learning_rate": 1.5502891400480612e-05, "loss": 0.9833, "step": 10220 }, { "epoch": 33.511475409836066, "grad_norm": 7.977176189422607, "learning_rate": 1.550200472125196e-05, "loss": 0.7462, "step": 10221 }, { "epoch": 33.514754098360655, "grad_norm": 7.636713981628418, "learning_rate": 1.550111797998248e-05, "loss": 0.9023, "step": 10222 }, { "epoch": 33.51803278688524, "grad_norm": 10.473703384399414, "learning_rate": 1.5500231176682175e-05, "loss": 0.7709, "step": 10223 }, { "epoch": 33.52131147540984, "grad_norm": 7.784289360046387, "learning_rate": 1.5499344311361044e-05, "loss": 0.8687, "step": 10224 }, { "epoch": 33.52459016393443, "grad_norm": 6.167240142822266, "learning_rate": 1.5498457384029088e-05, "loss": 0.9602, "step": 10225 }, { "epoch": 33.527868852459015, "grad_norm": 7.759610652923584, "learning_rate": 1.549757039469631e-05, "loss": 0.6519, "step": 10226 }, { "epoch": 33.5311475409836, "grad_norm": 11.604219436645508, "learning_rate": 1.549668334337271e-05, "loss": 1.1545, "step": 10227 }, { "epoch": 33.5344262295082, "grad_norm": 12.317927360534668, "learning_rate": 1.549579623006829e-05, "loss": 0.7642, "step": 10228 }, { "epoch": 33.53770491803279, "grad_norm": 7.482689380645752, "learning_rate": 1.549490905479305e-05, "loss": 0.9307, "step": 10229 }, { "epoch": 33.540983606557376, "grad_norm": 8.778984069824219, "learning_rate": 1.5494021817557002e-05, "loss": 0.6922, "step": 10230 }, { "epoch": 33.544262295081964, "grad_norm": 7.158186912536621, "learning_rate": 1.5493134518370142e-05, "loss": 1.026, "step": 10231 }, { "epoch": 33.54754098360656, "grad_norm": 7.059859275817871, "learning_rate": 1.549224715724248e-05, "loss": 1.05, "step": 10232 }, { "epoch": 33.55081967213115, "grad_norm": 8.764963150024414, "learning_rate": 1.549135973418402e-05, "loss": 0.6585, "step": 10233 }, { "epoch": 33.554098360655736, "grad_norm": 7.582760810852051, "learning_rate": 1.549047224920477e-05, "loss": 1.0837, "step": 10234 }, { "epoch": 33.557377049180324, "grad_norm": 7.855360984802246, "learning_rate": 1.5489584702314737e-05, "loss": 0.8948, "step": 10235 }, { "epoch": 33.56065573770492, "grad_norm": 6.737240791320801, "learning_rate": 1.548869709352393e-05, "loss": 1.0156, "step": 10236 }, { "epoch": 33.56393442622951, "grad_norm": 9.40877628326416, "learning_rate": 1.5487809422842356e-05, "loss": 0.7955, "step": 10237 }, { "epoch": 33.5672131147541, "grad_norm": 7.254882335662842, "learning_rate": 1.5486921690280024e-05, "loss": 1.0381, "step": 10238 }, { "epoch": 33.570491803278685, "grad_norm": 14.78516674041748, "learning_rate": 1.5486033895846945e-05, "loss": 0.8834, "step": 10239 }, { "epoch": 33.57377049180328, "grad_norm": 7.383636951446533, "learning_rate": 1.548514603955313e-05, "loss": 1.0671, "step": 10240 }, { "epoch": 33.57704918032787, "grad_norm": 6.464609622955322, "learning_rate": 1.5484258121408592e-05, "loss": 1.0794, "step": 10241 }, { "epoch": 33.58032786885246, "grad_norm": 6.910569190979004, "learning_rate": 1.5483370141423338e-05, "loss": 0.9319, "step": 10242 }, { "epoch": 33.58360655737705, "grad_norm": 9.12902545928955, "learning_rate": 1.5482482099607382e-05, "loss": 0.9395, "step": 10243 }, { "epoch": 33.58688524590164, "grad_norm": 6.928332805633545, "learning_rate": 1.5481593995970747e-05, "loss": 0.8816, "step": 10244 }, { "epoch": 33.59016393442623, "grad_norm": 7.8349761962890625, "learning_rate": 1.5480705830523438e-05, "loss": 0.6972, "step": 10245 }, { "epoch": 33.59344262295082, "grad_norm": 9.258816719055176, "learning_rate": 1.547981760327547e-05, "loss": 1.1594, "step": 10246 }, { "epoch": 33.59672131147541, "grad_norm": 7.725579261779785, "learning_rate": 1.5478929314236865e-05, "loss": 1.0544, "step": 10247 }, { "epoch": 33.6, "grad_norm": 7.439844131469727, "learning_rate": 1.547804096341763e-05, "loss": 0.8479, "step": 10248 }, { "epoch": 33.60327868852459, "grad_norm": 7.380483150482178, "learning_rate": 1.5477152550827792e-05, "loss": 0.9755, "step": 10249 }, { "epoch": 33.60655737704918, "grad_norm": 10.296241760253906, "learning_rate": 1.5476264076477362e-05, "loss": 1.1084, "step": 10250 }, { "epoch": 33.609836065573774, "grad_norm": 7.917989253997803, "learning_rate": 1.547537554037636e-05, "loss": 0.8624, "step": 10251 }, { "epoch": 33.61311475409836, "grad_norm": 12.189139366149902, "learning_rate": 1.5474486942534808e-05, "loss": 0.8228, "step": 10252 }, { "epoch": 33.61639344262295, "grad_norm": 6.319253444671631, "learning_rate": 1.547359828296272e-05, "loss": 1.0247, "step": 10253 }, { "epoch": 33.61967213114754, "grad_norm": 11.875116348266602, "learning_rate": 1.5472709561670125e-05, "loss": 0.8906, "step": 10254 }, { "epoch": 33.622950819672134, "grad_norm": 10.233734130859375, "learning_rate": 1.5471820778667036e-05, "loss": 1.0557, "step": 10255 }, { "epoch": 33.62622950819672, "grad_norm": 6.802708625793457, "learning_rate": 1.547093193396348e-05, "loss": 0.7346, "step": 10256 }, { "epoch": 33.62950819672131, "grad_norm": 9.045613288879395, "learning_rate": 1.547004302756948e-05, "loss": 0.9114, "step": 10257 }, { "epoch": 33.6327868852459, "grad_norm": 7.01102876663208, "learning_rate": 1.5469154059495054e-05, "loss": 1.0825, "step": 10258 }, { "epoch": 33.636065573770495, "grad_norm": 6.13745641708374, "learning_rate": 1.546826502975023e-05, "loss": 0.9837, "step": 10259 }, { "epoch": 33.63934426229508, "grad_norm": 8.307984352111816, "learning_rate": 1.5467375938345032e-05, "loss": 0.7638, "step": 10260 }, { "epoch": 33.64262295081967, "grad_norm": 6.362316608428955, "learning_rate": 1.5466486785289487e-05, "loss": 0.8766, "step": 10261 }, { "epoch": 33.64590163934426, "grad_norm": 7.267005443572998, "learning_rate": 1.546559757059362e-05, "loss": 0.7911, "step": 10262 }, { "epoch": 33.649180327868855, "grad_norm": 7.464776515960693, "learning_rate": 1.546470829426746e-05, "loss": 1.1044, "step": 10263 }, { "epoch": 33.65245901639344, "grad_norm": 7.480789661407471, "learning_rate": 1.5463818956321026e-05, "loss": 0.871, "step": 10264 }, { "epoch": 33.65573770491803, "grad_norm": 11.536123275756836, "learning_rate": 1.5462929556764358e-05, "loss": 1.0051, "step": 10265 }, { "epoch": 33.65901639344262, "grad_norm": 10.86902141571045, "learning_rate": 1.5462040095607473e-05, "loss": 0.7781, "step": 10266 }, { "epoch": 33.662295081967216, "grad_norm": 8.05801010131836, "learning_rate": 1.5461150572860414e-05, "loss": 0.9, "step": 10267 }, { "epoch": 33.665573770491804, "grad_norm": 7.377964019775391, "learning_rate": 1.54602609885332e-05, "loss": 1.1672, "step": 10268 }, { "epoch": 33.66885245901639, "grad_norm": 8.63653564453125, "learning_rate": 1.5459371342635866e-05, "loss": 0.8932, "step": 10269 }, { "epoch": 33.67213114754098, "grad_norm": 8.859281539916992, "learning_rate": 1.5458481635178443e-05, "loss": 1.0792, "step": 10270 }, { "epoch": 33.675409836065576, "grad_norm": 9.666231155395508, "learning_rate": 1.5457591866170963e-05, "loss": 0.7834, "step": 10271 }, { "epoch": 33.678688524590164, "grad_norm": 7.649451732635498, "learning_rate": 1.5456702035623464e-05, "loss": 0.7982, "step": 10272 }, { "epoch": 33.68196721311475, "grad_norm": 8.315619468688965, "learning_rate": 1.5455812143545977e-05, "loss": 0.9752, "step": 10273 }, { "epoch": 33.68524590163934, "grad_norm": 7.563806056976318, "learning_rate": 1.5454922189948535e-05, "loss": 1.0051, "step": 10274 }, { "epoch": 33.68852459016394, "grad_norm": 7.799196243286133, "learning_rate": 1.545403217484117e-05, "loss": 0.8749, "step": 10275 }, { "epoch": 33.691803278688525, "grad_norm": 6.525850772857666, "learning_rate": 1.5453142098233925e-05, "loss": 0.8832, "step": 10276 }, { "epoch": 33.69508196721311, "grad_norm": 9.079116821289062, "learning_rate": 1.545225196013683e-05, "loss": 0.9406, "step": 10277 }, { "epoch": 33.6983606557377, "grad_norm": 8.715860366821289, "learning_rate": 1.5451361760559925e-05, "loss": 0.9166, "step": 10278 }, { "epoch": 33.7016393442623, "grad_norm": 7.111326694488525, "learning_rate": 1.545047149951325e-05, "loss": 0.8785, "step": 10279 }, { "epoch": 33.704918032786885, "grad_norm": 7.365187644958496, "learning_rate": 1.5449581177006843e-05, "loss": 0.6472, "step": 10280 }, { "epoch": 33.708196721311474, "grad_norm": 8.988767623901367, "learning_rate": 1.544869079305074e-05, "loss": 0.9047, "step": 10281 }, { "epoch": 33.71147540983607, "grad_norm": 7.353354454040527, "learning_rate": 1.5447800347654985e-05, "loss": 1.0087, "step": 10282 }, { "epoch": 33.71475409836066, "grad_norm": 6.809160232543945, "learning_rate": 1.5446909840829618e-05, "loss": 0.9965, "step": 10283 }, { "epoch": 33.718032786885246, "grad_norm": 7.539677143096924, "learning_rate": 1.5446019272584675e-05, "loss": 1.0876, "step": 10284 }, { "epoch": 33.721311475409834, "grad_norm": 11.300604820251465, "learning_rate": 1.5445128642930203e-05, "loss": 0.9457, "step": 10285 }, { "epoch": 33.72459016393443, "grad_norm": 8.928122520446777, "learning_rate": 1.5444237951876244e-05, "loss": 0.8882, "step": 10286 }, { "epoch": 33.72786885245902, "grad_norm": 6.836027145385742, "learning_rate": 1.5443347199432844e-05, "loss": 0.8868, "step": 10287 }, { "epoch": 33.731147540983606, "grad_norm": 8.42896842956543, "learning_rate": 1.544245638561004e-05, "loss": 1.1122, "step": 10288 }, { "epoch": 33.734426229508195, "grad_norm": 7.901337623596191, "learning_rate": 1.5441565510417886e-05, "loss": 0.9316, "step": 10289 }, { "epoch": 33.73770491803279, "grad_norm": 29.07809829711914, "learning_rate": 1.5440674573866423e-05, "loss": 1.0004, "step": 10290 }, { "epoch": 33.74098360655738, "grad_norm": 7.261210918426514, "learning_rate": 1.5439783575965695e-05, "loss": 1.0229, "step": 10291 }, { "epoch": 33.74426229508197, "grad_norm": 6.463996887207031, "learning_rate": 1.5438892516725755e-05, "loss": 0.9926, "step": 10292 }, { "epoch": 33.747540983606555, "grad_norm": 7.459066867828369, "learning_rate": 1.543800139615664e-05, "loss": 0.9005, "step": 10293 }, { "epoch": 33.75081967213115, "grad_norm": 7.398886203765869, "learning_rate": 1.543711021426841e-05, "loss": 0.7512, "step": 10294 }, { "epoch": 33.75409836065574, "grad_norm": 9.978055953979492, "learning_rate": 1.543621897107111e-05, "loss": 1.0594, "step": 10295 }, { "epoch": 33.75737704918033, "grad_norm": 7.685618877410889, "learning_rate": 1.543532766657479e-05, "loss": 1.0718, "step": 10296 }, { "epoch": 33.760655737704916, "grad_norm": 7.827953338623047, "learning_rate": 1.543443630078949e-05, "loss": 0.6901, "step": 10297 }, { "epoch": 33.76393442622951, "grad_norm": 12.795205116271973, "learning_rate": 1.543354487372528e-05, "loss": 0.9457, "step": 10298 }, { "epoch": 33.7672131147541, "grad_norm": 7.6997294425964355, "learning_rate": 1.54326533853922e-05, "loss": 0.873, "step": 10299 }, { "epoch": 33.77049180327869, "grad_norm": 7.498570442199707, "learning_rate": 1.5431761835800305e-05, "loss": 0.9216, "step": 10300 }, { "epoch": 33.773770491803276, "grad_norm": 7.465522766113281, "learning_rate": 1.543087022495964e-05, "loss": 0.8643, "step": 10301 }, { "epoch": 33.77704918032787, "grad_norm": 9.194684028625488, "learning_rate": 1.5429978552880275e-05, "loss": 1.0504, "step": 10302 }, { "epoch": 33.78032786885246, "grad_norm": 7.327474594116211, "learning_rate": 1.5429086819572254e-05, "loss": 0.8441, "step": 10303 }, { "epoch": 33.78360655737705, "grad_norm": 8.98211669921875, "learning_rate": 1.5428195025045635e-05, "loss": 1.0497, "step": 10304 }, { "epoch": 33.78688524590164, "grad_norm": 14.334070205688477, "learning_rate": 1.5427303169310474e-05, "loss": 0.7785, "step": 10305 }, { "epoch": 33.79016393442623, "grad_norm": 8.630518913269043, "learning_rate": 1.5426411252376823e-05, "loss": 0.7327, "step": 10306 }, { "epoch": 33.79344262295082, "grad_norm": 7.101185321807861, "learning_rate": 1.5425519274254745e-05, "loss": 1.0809, "step": 10307 }, { "epoch": 33.79672131147541, "grad_norm": 7.741137981414795, "learning_rate": 1.5424627234954294e-05, "loss": 0.9725, "step": 10308 }, { "epoch": 33.8, "grad_norm": 7.414067268371582, "learning_rate": 1.5423735134485537e-05, "loss": 0.8495, "step": 10309 }, { "epoch": 33.80327868852459, "grad_norm": 6.358252048492432, "learning_rate": 1.542284297285852e-05, "loss": 0.9756, "step": 10310 }, { "epoch": 33.80655737704918, "grad_norm": 8.144346237182617, "learning_rate": 1.5421950750083313e-05, "loss": 0.9463, "step": 10311 }, { "epoch": 33.80983606557377, "grad_norm": 9.15522575378418, "learning_rate": 1.5421058466169972e-05, "loss": 0.9514, "step": 10312 }, { "epoch": 33.81311475409836, "grad_norm": 7.771132946014404, "learning_rate": 1.5420166121128566e-05, "loss": 0.7803, "step": 10313 }, { "epoch": 33.81639344262295, "grad_norm": 6.986078262329102, "learning_rate": 1.5419273714969146e-05, "loss": 0.7949, "step": 10314 }, { "epoch": 33.81967213114754, "grad_norm": 6.519791126251221, "learning_rate": 1.5418381247701784e-05, "loss": 1.1996, "step": 10315 }, { "epoch": 33.82295081967213, "grad_norm": 7.310043811798096, "learning_rate": 1.5417488719336537e-05, "loss": 0.8731, "step": 10316 }, { "epoch": 33.82622950819672, "grad_norm": 7.963745594024658, "learning_rate": 1.541659612988347e-05, "loss": 0.9655, "step": 10317 }, { "epoch": 33.829508196721314, "grad_norm": 16.627304077148438, "learning_rate": 1.5415703479352655e-05, "loss": 0.9043, "step": 10318 }, { "epoch": 33.8327868852459, "grad_norm": 8.967028617858887, "learning_rate": 1.5414810767754147e-05, "loss": 0.7219, "step": 10319 }, { "epoch": 33.83606557377049, "grad_norm": 7.447226047515869, "learning_rate": 1.541391799509802e-05, "loss": 0.8564, "step": 10320 }, { "epoch": 33.83934426229508, "grad_norm": 10.913171768188477, "learning_rate": 1.541302516139434e-05, "loss": 0.912, "step": 10321 }, { "epoch": 33.842622950819674, "grad_norm": 7.321383476257324, "learning_rate": 1.5412132266653174e-05, "loss": 0.9526, "step": 10322 }, { "epoch": 33.84590163934426, "grad_norm": 6.743868350982666, "learning_rate": 1.5411239310884587e-05, "loss": 0.8838, "step": 10323 }, { "epoch": 33.84918032786885, "grad_norm": 6.563151836395264, "learning_rate": 1.541034629409865e-05, "loss": 1.2333, "step": 10324 }, { "epoch": 33.85245901639344, "grad_norm": 12.095970153808594, "learning_rate": 1.5409453216305435e-05, "loss": 0.903, "step": 10325 }, { "epoch": 33.855737704918035, "grad_norm": 6.893081188201904, "learning_rate": 1.5408560077515008e-05, "loss": 0.9281, "step": 10326 }, { "epoch": 33.85901639344262, "grad_norm": 7.572143077850342, "learning_rate": 1.5407666877737443e-05, "loss": 0.9016, "step": 10327 }, { "epoch": 33.86229508196721, "grad_norm": 5.275658130645752, "learning_rate": 1.5406773616982816e-05, "loss": 1.0692, "step": 10328 }, { "epoch": 33.86557377049181, "grad_norm": 9.389575004577637, "learning_rate": 1.540588029526119e-05, "loss": 0.818, "step": 10329 }, { "epoch": 33.868852459016395, "grad_norm": 7.193947792053223, "learning_rate": 1.5404986912582646e-05, "loss": 0.7388, "step": 10330 }, { "epoch": 33.87213114754098, "grad_norm": 6.985089302062988, "learning_rate": 1.540409346895725e-05, "loss": 1.0964, "step": 10331 }, { "epoch": 33.87540983606557, "grad_norm": 8.47607707977295, "learning_rate": 1.5403199964395087e-05, "loss": 1.0076, "step": 10332 }, { "epoch": 33.87868852459017, "grad_norm": 7.618077278137207, "learning_rate": 1.5402306398906222e-05, "loss": 0.8204, "step": 10333 }, { "epoch": 33.881967213114756, "grad_norm": 8.12131118774414, "learning_rate": 1.540141277250074e-05, "loss": 0.9911, "step": 10334 }, { "epoch": 33.885245901639344, "grad_norm": 6.257384777069092, "learning_rate": 1.540051908518871e-05, "loss": 1.0454, "step": 10335 }, { "epoch": 33.88852459016393, "grad_norm": 8.179566383361816, "learning_rate": 1.5399625336980212e-05, "loss": 1.0464, "step": 10336 }, { "epoch": 33.89180327868853, "grad_norm": 6.545378684997559, "learning_rate": 1.5398731527885326e-05, "loss": 0.9847, "step": 10337 }, { "epoch": 33.895081967213116, "grad_norm": 8.806989669799805, "learning_rate": 1.5397837657914124e-05, "loss": 0.9326, "step": 10338 }, { "epoch": 33.898360655737704, "grad_norm": 8.246038436889648, "learning_rate": 1.5396943727076696e-05, "loss": 1.1744, "step": 10339 }, { "epoch": 33.90163934426229, "grad_norm": 10.810149192810059, "learning_rate": 1.5396049735383112e-05, "loss": 0.8848, "step": 10340 }, { "epoch": 33.90491803278689, "grad_norm": 8.877704620361328, "learning_rate": 1.5395155682843462e-05, "loss": 1.0787, "step": 10341 }, { "epoch": 33.90819672131148, "grad_norm": 10.27530574798584, "learning_rate": 1.5394261569467815e-05, "loss": 0.8389, "step": 10342 }, { "epoch": 33.911475409836065, "grad_norm": 8.243295669555664, "learning_rate": 1.5393367395266262e-05, "loss": 0.995, "step": 10343 }, { "epoch": 33.91475409836065, "grad_norm": 9.12496566772461, "learning_rate": 1.539247316024889e-05, "loss": 0.8381, "step": 10344 }, { "epoch": 33.91803278688525, "grad_norm": 7.389695167541504, "learning_rate": 1.5391578864425773e-05, "loss": 0.7613, "step": 10345 }, { "epoch": 33.92131147540984, "grad_norm": 8.433003425598145, "learning_rate": 1.5390684507806993e-05, "loss": 0.8476, "step": 10346 }, { "epoch": 33.924590163934425, "grad_norm": 9.901344299316406, "learning_rate": 1.5389790090402646e-05, "loss": 0.8286, "step": 10347 }, { "epoch": 33.927868852459014, "grad_norm": 8.274396896362305, "learning_rate": 1.538889561222281e-05, "loss": 0.9464, "step": 10348 }, { "epoch": 33.93114754098361, "grad_norm": 7.308130741119385, "learning_rate": 1.5388001073277574e-05, "loss": 0.6667, "step": 10349 }, { "epoch": 33.9344262295082, "grad_norm": 6.616505146026611, "learning_rate": 1.5387106473577022e-05, "loss": 0.609, "step": 10350 }, { "epoch": 33.937704918032786, "grad_norm": 6.558348178863525, "learning_rate": 1.5386211813131245e-05, "loss": 0.6374, "step": 10351 }, { "epoch": 33.940983606557374, "grad_norm": 8.811355590820312, "learning_rate": 1.538531709195033e-05, "loss": 0.8312, "step": 10352 }, { "epoch": 33.94426229508197, "grad_norm": 7.349697113037109, "learning_rate": 1.538442231004436e-05, "loss": 1.163, "step": 10353 }, { "epoch": 33.94754098360656, "grad_norm": 6.952800273895264, "learning_rate": 1.538352746742344e-05, "loss": 0.9977, "step": 10354 }, { "epoch": 33.950819672131146, "grad_norm": 9.948197364807129, "learning_rate": 1.538263256409764e-05, "loss": 0.9447, "step": 10355 }, { "epoch": 33.954098360655735, "grad_norm": 7.422934532165527, "learning_rate": 1.5381737600077066e-05, "loss": 1.1312, "step": 10356 }, { "epoch": 33.95737704918033, "grad_norm": 9.242303848266602, "learning_rate": 1.5380842575371807e-05, "loss": 1.0137, "step": 10357 }, { "epoch": 33.96065573770492, "grad_norm": 8.962157249450684, "learning_rate": 1.537994748999195e-05, "loss": 0.9321, "step": 10358 }, { "epoch": 33.96393442622951, "grad_norm": 8.003495216369629, "learning_rate": 1.5379052343947596e-05, "loss": 0.8031, "step": 10359 }, { "epoch": 33.967213114754095, "grad_norm": 7.858402729034424, "learning_rate": 1.5378157137248828e-05, "loss": 1.184, "step": 10360 }, { "epoch": 33.97049180327869, "grad_norm": 40.07749938964844, "learning_rate": 1.537726186990575e-05, "loss": 1.0967, "step": 10361 }, { "epoch": 33.97377049180328, "grad_norm": 7.595590114593506, "learning_rate": 1.5376366541928455e-05, "loss": 0.9315, "step": 10362 }, { "epoch": 33.97704918032787, "grad_norm": 8.426904678344727, "learning_rate": 1.5375471153327034e-05, "loss": 0.6724, "step": 10363 }, { "epoch": 33.980327868852456, "grad_norm": 13.991842269897461, "learning_rate": 1.5374575704111586e-05, "loss": 1.1708, "step": 10364 }, { "epoch": 33.98360655737705, "grad_norm": 16.255462646484375, "learning_rate": 1.5373680194292208e-05, "loss": 0.9302, "step": 10365 }, { "epoch": 33.98688524590164, "grad_norm": 8.105960845947266, "learning_rate": 1.5372784623879003e-05, "loss": 0.7103, "step": 10366 }, { "epoch": 33.99016393442623, "grad_norm": 16.01172637939453, "learning_rate": 1.537188899288206e-05, "loss": 0.8884, "step": 10367 }, { "epoch": 33.993442622950816, "grad_norm": 7.542498588562012, "learning_rate": 1.5370993301311486e-05, "loss": 0.9595, "step": 10368 }, { "epoch": 33.99672131147541, "grad_norm": 6.973277568817139, "learning_rate": 1.537009754917738e-05, "loss": 0.9807, "step": 10369 }, { "epoch": 34.0, "grad_norm": 5.986689567565918, "learning_rate": 1.536920173648984e-05, "loss": 0.7835, "step": 10370 }, { "epoch": 34.00327868852459, "grad_norm": 7.664077281951904, "learning_rate": 1.5368305863258965e-05, "loss": 0.7259, "step": 10371 }, { "epoch": 34.006557377049184, "grad_norm": 8.3778657913208, "learning_rate": 1.5367409929494863e-05, "loss": 0.9555, "step": 10372 }, { "epoch": 34.00983606557377, "grad_norm": 8.015094757080078, "learning_rate": 1.5366513935207632e-05, "loss": 0.9622, "step": 10373 }, { "epoch": 34.01311475409836, "grad_norm": 6.09590482711792, "learning_rate": 1.5365617880407377e-05, "loss": 0.947, "step": 10374 }, { "epoch": 34.01639344262295, "grad_norm": 8.65226936340332, "learning_rate": 1.53647217651042e-05, "loss": 0.7141, "step": 10375 }, { "epoch": 34.019672131147544, "grad_norm": 6.7325215339660645, "learning_rate": 1.5363825589308206e-05, "loss": 0.9454, "step": 10376 }, { "epoch": 34.02295081967213, "grad_norm": 8.107091903686523, "learning_rate": 1.5362929353029506e-05, "loss": 0.6721, "step": 10377 }, { "epoch": 34.02622950819672, "grad_norm": 6.006111145019531, "learning_rate": 1.5362033056278197e-05, "loss": 0.9094, "step": 10378 }, { "epoch": 34.02950819672131, "grad_norm": 6.875494003295898, "learning_rate": 1.5361136699064392e-05, "loss": 1.1261, "step": 10379 }, { "epoch": 34.032786885245905, "grad_norm": 6.527244567871094, "learning_rate": 1.5360240281398198e-05, "loss": 0.7676, "step": 10380 }, { "epoch": 34.03606557377049, "grad_norm": 6.511804103851318, "learning_rate": 1.5359343803289718e-05, "loss": 0.7347, "step": 10381 }, { "epoch": 34.03934426229508, "grad_norm": 6.373693466186523, "learning_rate": 1.535844726474907e-05, "loss": 0.8496, "step": 10382 }, { "epoch": 34.04262295081967, "grad_norm": 9.486621856689453, "learning_rate": 1.5357550665786355e-05, "loss": 0.8238, "step": 10383 }, { "epoch": 34.045901639344265, "grad_norm": 7.279975891113281, "learning_rate": 1.5356654006411683e-05, "loss": 0.9576, "step": 10384 }, { "epoch": 34.049180327868854, "grad_norm": 6.015689849853516, "learning_rate": 1.5355757286635172e-05, "loss": 0.8393, "step": 10385 }, { "epoch": 34.05245901639344, "grad_norm": 8.999958038330078, "learning_rate": 1.5354860506466923e-05, "loss": 0.7885, "step": 10386 }, { "epoch": 34.05573770491803, "grad_norm": 8.067852020263672, "learning_rate": 1.535396366591706e-05, "loss": 0.9941, "step": 10387 }, { "epoch": 34.059016393442626, "grad_norm": 6.596363067626953, "learning_rate": 1.5353066764995686e-05, "loss": 1.0745, "step": 10388 }, { "epoch": 34.062295081967214, "grad_norm": 7.210446357727051, "learning_rate": 1.535216980371292e-05, "loss": 0.7332, "step": 10389 }, { "epoch": 34.0655737704918, "grad_norm": 6.1221089363098145, "learning_rate": 1.5351272782078876e-05, "loss": 1.128, "step": 10390 }, { "epoch": 34.06885245901639, "grad_norm": 9.366657257080078, "learning_rate": 1.5350375700103664e-05, "loss": 0.7575, "step": 10391 }, { "epoch": 34.072131147540986, "grad_norm": 6.220074653625488, "learning_rate": 1.534947855779741e-05, "loss": 0.7646, "step": 10392 }, { "epoch": 34.075409836065575, "grad_norm": 10.630179405212402, "learning_rate": 1.5348581355170217e-05, "loss": 0.7538, "step": 10393 }, { "epoch": 34.07868852459016, "grad_norm": 8.59875774383545, "learning_rate": 1.534768409223221e-05, "loss": 0.8248, "step": 10394 }, { "epoch": 34.08196721311475, "grad_norm": 6.6050004959106445, "learning_rate": 1.5346786768993503e-05, "loss": 0.942, "step": 10395 }, { "epoch": 34.08524590163935, "grad_norm": 7.277878761291504, "learning_rate": 1.5345889385464218e-05, "loss": 1.0073, "step": 10396 }, { "epoch": 34.088524590163935, "grad_norm": 7.149755001068115, "learning_rate": 1.534499194165447e-05, "loss": 0.7662, "step": 10397 }, { "epoch": 34.09180327868852, "grad_norm": 9.53137493133545, "learning_rate": 1.5344094437574383e-05, "loss": 0.8018, "step": 10398 }, { "epoch": 34.09508196721311, "grad_norm": 7.379647731781006, "learning_rate": 1.5343196873234073e-05, "loss": 0.8163, "step": 10399 }, { "epoch": 34.09836065573771, "grad_norm": 7.499905109405518, "learning_rate": 1.5342299248643663e-05, "loss": 0.8539, "step": 10400 }, { "epoch": 34.101639344262296, "grad_norm": 8.35842227935791, "learning_rate": 1.5341401563813273e-05, "loss": 0.6679, "step": 10401 }, { "epoch": 34.104918032786884, "grad_norm": 8.750001907348633, "learning_rate": 1.534050381875303e-05, "loss": 0.923, "step": 10402 }, { "epoch": 34.10819672131147, "grad_norm": 7.231203079223633, "learning_rate": 1.533960601347305e-05, "loss": 0.774, "step": 10403 }, { "epoch": 34.11147540983607, "grad_norm": 7.077048301696777, "learning_rate": 1.533870814798346e-05, "loss": 0.7848, "step": 10404 }, { "epoch": 34.114754098360656, "grad_norm": 7.069601058959961, "learning_rate": 1.533781022229439e-05, "loss": 0.7992, "step": 10405 }, { "epoch": 34.118032786885244, "grad_norm": 12.362993240356445, "learning_rate": 1.5336912236415954e-05, "loss": 0.8182, "step": 10406 }, { "epoch": 34.12131147540983, "grad_norm": 6.586558818817139, "learning_rate": 1.5336014190358287e-05, "loss": 1.1334, "step": 10407 }, { "epoch": 34.12459016393443, "grad_norm": 9.125088691711426, "learning_rate": 1.5335116084131512e-05, "loss": 0.8124, "step": 10408 }, { "epoch": 34.12786885245902, "grad_norm": 8.622777938842773, "learning_rate": 1.5334217917745754e-05, "loss": 0.8691, "step": 10409 }, { "epoch": 34.131147540983605, "grad_norm": 6.893795490264893, "learning_rate": 1.5333319691211143e-05, "loss": 0.6991, "step": 10410 }, { "epoch": 34.13442622950819, "grad_norm": 6.382213115692139, "learning_rate": 1.5332421404537813e-05, "loss": 1.1006, "step": 10411 }, { "epoch": 34.13770491803279, "grad_norm": 6.430716037750244, "learning_rate": 1.5331523057735877e-05, "loss": 1.004, "step": 10412 }, { "epoch": 34.14098360655738, "grad_norm": 7.818792343139648, "learning_rate": 1.5330624650815482e-05, "loss": 0.8221, "step": 10413 }, { "epoch": 34.144262295081965, "grad_norm": 9.823236465454102, "learning_rate": 1.532972618378675e-05, "loss": 0.8214, "step": 10414 }, { "epoch": 34.14754098360656, "grad_norm": 7.5910725593566895, "learning_rate": 1.5328827656659818e-05, "loss": 0.6031, "step": 10415 }, { "epoch": 34.15081967213115, "grad_norm": 6.06840181350708, "learning_rate": 1.5327929069444808e-05, "loss": 0.9305, "step": 10416 }, { "epoch": 34.15409836065574, "grad_norm": 7.421895980834961, "learning_rate": 1.532703042215186e-05, "loss": 1.3993, "step": 10417 }, { "epoch": 34.157377049180326, "grad_norm": 7.601039886474609, "learning_rate": 1.53261317147911e-05, "loss": 0.9183, "step": 10418 }, { "epoch": 34.16065573770492, "grad_norm": 8.572137832641602, "learning_rate": 1.5325232947372675e-05, "loss": 0.8743, "step": 10419 }, { "epoch": 34.16393442622951, "grad_norm": 8.19140911102295, "learning_rate": 1.5324334119906707e-05, "loss": 0.7783, "step": 10420 }, { "epoch": 34.1672131147541, "grad_norm": 10.69345760345459, "learning_rate": 1.532343523240334e-05, "loss": 1.0591, "step": 10421 }, { "epoch": 34.170491803278686, "grad_norm": 8.504630088806152, "learning_rate": 1.5322536284872704e-05, "loss": 0.7736, "step": 10422 }, { "epoch": 34.17377049180328, "grad_norm": 6.491239547729492, "learning_rate": 1.5321637277324937e-05, "loss": 1.0907, "step": 10423 }, { "epoch": 34.17704918032787, "grad_norm": 5.611697196960449, "learning_rate": 1.5320738209770177e-05, "loss": 0.7728, "step": 10424 }, { "epoch": 34.18032786885246, "grad_norm": 9.421736717224121, "learning_rate": 1.531983908221856e-05, "loss": 0.751, "step": 10425 }, { "epoch": 34.18360655737705, "grad_norm": 6.709623336791992, "learning_rate": 1.5318939894680228e-05, "loss": 0.9566, "step": 10426 }, { "epoch": 34.18688524590164, "grad_norm": 6.493346214294434, "learning_rate": 1.5318040647165316e-05, "loss": 0.7657, "step": 10427 }, { "epoch": 34.19016393442623, "grad_norm": 8.969630241394043, "learning_rate": 1.5317141339683968e-05, "loss": 0.9248, "step": 10428 }, { "epoch": 34.19344262295082, "grad_norm": 8.689085006713867, "learning_rate": 1.531624197224632e-05, "loss": 0.7182, "step": 10429 }, { "epoch": 34.19672131147541, "grad_norm": 7.227783203125, "learning_rate": 1.531534254486252e-05, "loss": 0.9686, "step": 10430 }, { "epoch": 34.2, "grad_norm": 7.926454067230225, "learning_rate": 1.5314443057542703e-05, "loss": 0.7686, "step": 10431 }, { "epoch": 34.20327868852459, "grad_norm": 8.333476066589355, "learning_rate": 1.5313543510297018e-05, "loss": 0.6904, "step": 10432 }, { "epoch": 34.20655737704918, "grad_norm": 24.9577579498291, "learning_rate": 1.5312643903135606e-05, "loss": 1.085, "step": 10433 }, { "epoch": 34.20983606557377, "grad_norm": 9.35501480102539, "learning_rate": 1.5311744236068606e-05, "loss": 0.7994, "step": 10434 }, { "epoch": 34.21311475409836, "grad_norm": 7.709585189819336, "learning_rate": 1.531084450910617e-05, "loss": 0.8219, "step": 10435 }, { "epoch": 34.21639344262295, "grad_norm": 7.692117214202881, "learning_rate": 1.5309944722258442e-05, "loss": 1.094, "step": 10436 }, { "epoch": 34.21967213114754, "grad_norm": 6.753594875335693, "learning_rate": 1.5309044875535564e-05, "loss": 1.0248, "step": 10437 }, { "epoch": 34.22295081967213, "grad_norm": 7.7751922607421875, "learning_rate": 1.5308144968947684e-05, "loss": 0.8759, "step": 10438 }, { "epoch": 34.226229508196724, "grad_norm": 10.390800476074219, "learning_rate": 1.5307245002504952e-05, "loss": 0.7999, "step": 10439 }, { "epoch": 34.22950819672131, "grad_norm": 7.010153293609619, "learning_rate": 1.530634497621751e-05, "loss": 0.7021, "step": 10440 }, { "epoch": 34.2327868852459, "grad_norm": 9.91808032989502, "learning_rate": 1.5305444890095514e-05, "loss": 0.6468, "step": 10441 }, { "epoch": 34.23606557377049, "grad_norm": 6.30147647857666, "learning_rate": 1.5304544744149113e-05, "loss": 0.9565, "step": 10442 }, { "epoch": 34.239344262295084, "grad_norm": 7.11971378326416, "learning_rate": 1.5303644538388453e-05, "loss": 0.7528, "step": 10443 }, { "epoch": 34.24262295081967, "grad_norm": 8.163481712341309, "learning_rate": 1.5302744272823687e-05, "loss": 0.9863, "step": 10444 }, { "epoch": 34.24590163934426, "grad_norm": 7.519053936004639, "learning_rate": 1.5301843947464967e-05, "loss": 0.8422, "step": 10445 }, { "epoch": 34.24918032786885, "grad_norm": 7.478112697601318, "learning_rate": 1.530094356232244e-05, "loss": 0.7064, "step": 10446 }, { "epoch": 34.252459016393445, "grad_norm": 6.776296615600586, "learning_rate": 1.5300043117406268e-05, "loss": 1.0929, "step": 10447 }, { "epoch": 34.25573770491803, "grad_norm": 6.553199291229248, "learning_rate": 1.5299142612726597e-05, "loss": 0.8085, "step": 10448 }, { "epoch": 34.25901639344262, "grad_norm": 7.482573986053467, "learning_rate": 1.529824204829358e-05, "loss": 0.6196, "step": 10449 }, { "epoch": 34.26229508196721, "grad_norm": 7.09693717956543, "learning_rate": 1.529734142411738e-05, "loss": 0.9794, "step": 10450 }, { "epoch": 34.265573770491805, "grad_norm": 7.97433614730835, "learning_rate": 1.529644074020814e-05, "loss": 0.8448, "step": 10451 }, { "epoch": 34.268852459016394, "grad_norm": 8.519623756408691, "learning_rate": 1.5295539996576034e-05, "loss": 0.7783, "step": 10452 }, { "epoch": 34.27213114754098, "grad_norm": 9.199942588806152, "learning_rate": 1.52946391932312e-05, "loss": 0.9613, "step": 10453 }, { "epoch": 34.27540983606557, "grad_norm": 9.330896377563477, "learning_rate": 1.529373833018381e-05, "loss": 0.8282, "step": 10454 }, { "epoch": 34.278688524590166, "grad_norm": 7.679793834686279, "learning_rate": 1.5292837407444015e-05, "loss": 0.7875, "step": 10455 }, { "epoch": 34.281967213114754, "grad_norm": 9.064118385314941, "learning_rate": 1.5291936425021973e-05, "loss": 0.9371, "step": 10456 }, { "epoch": 34.28524590163934, "grad_norm": 6.673379898071289, "learning_rate": 1.5291035382927846e-05, "loss": 0.8428, "step": 10457 }, { "epoch": 34.28852459016394, "grad_norm": 18.90375518798828, "learning_rate": 1.5290134281171795e-05, "loss": 0.9617, "step": 10458 }, { "epoch": 34.291803278688526, "grad_norm": 8.552647590637207, "learning_rate": 1.5289233119763977e-05, "loss": 1.0531, "step": 10459 }, { "epoch": 34.295081967213115, "grad_norm": 6.7200140953063965, "learning_rate": 1.5288331898714556e-05, "loss": 0.7935, "step": 10460 }, { "epoch": 34.2983606557377, "grad_norm": 24.858320236206055, "learning_rate": 1.52874306180337e-05, "loss": 0.8336, "step": 10461 }, { "epoch": 34.3016393442623, "grad_norm": 6.117551803588867, "learning_rate": 1.5286529277731562e-05, "loss": 0.8329, "step": 10462 }, { "epoch": 34.30491803278689, "grad_norm": 6.153502941131592, "learning_rate": 1.528562787781831e-05, "loss": 0.8619, "step": 10463 }, { "epoch": 34.308196721311475, "grad_norm": 7.1486639976501465, "learning_rate": 1.5284726418304108e-05, "loss": 0.8225, "step": 10464 }, { "epoch": 34.31147540983606, "grad_norm": 7.608231544494629, "learning_rate": 1.528382489919912e-05, "loss": 0.6715, "step": 10465 }, { "epoch": 34.31475409836066, "grad_norm": 6.869194984436035, "learning_rate": 1.5282923320513515e-05, "loss": 1.1788, "step": 10466 }, { "epoch": 34.31803278688525, "grad_norm": 8.768768310546875, "learning_rate": 1.5282021682257457e-05, "loss": 0.832, "step": 10467 }, { "epoch": 34.321311475409836, "grad_norm": 8.204642295837402, "learning_rate": 1.528111998444111e-05, "loss": 0.8165, "step": 10468 }, { "epoch": 34.324590163934424, "grad_norm": 7.181460857391357, "learning_rate": 1.5280218227074645e-05, "loss": 0.7358, "step": 10469 }, { "epoch": 34.32786885245902, "grad_norm": 8.335528373718262, "learning_rate": 1.527931641016823e-05, "loss": 0.9012, "step": 10470 }, { "epoch": 34.33114754098361, "grad_norm": 6.108577251434326, "learning_rate": 1.5278414533732032e-05, "loss": 0.8187, "step": 10471 }, { "epoch": 34.334426229508196, "grad_norm": 9.252873420715332, "learning_rate": 1.5277512597776227e-05, "loss": 0.895, "step": 10472 }, { "epoch": 34.337704918032784, "grad_norm": 11.966028213500977, "learning_rate": 1.5276610602310973e-05, "loss": 1.0359, "step": 10473 }, { "epoch": 34.34098360655738, "grad_norm": 17.48955726623535, "learning_rate": 1.527570854734645e-05, "loss": 0.7748, "step": 10474 }, { "epoch": 34.34426229508197, "grad_norm": 7.254396915435791, "learning_rate": 1.527480643289283e-05, "loss": 0.6936, "step": 10475 }, { "epoch": 34.34754098360656, "grad_norm": 9.31898307800293, "learning_rate": 1.5273904258960283e-05, "loss": 0.903, "step": 10476 }, { "epoch": 34.350819672131145, "grad_norm": 6.732034206390381, "learning_rate": 1.5273002025558982e-05, "loss": 0.9, "step": 10477 }, { "epoch": 34.35409836065574, "grad_norm": 6.350274562835693, "learning_rate": 1.5272099732699098e-05, "loss": 0.8942, "step": 10478 }, { "epoch": 34.35737704918033, "grad_norm": 7.548423767089844, "learning_rate": 1.527119738039081e-05, "loss": 0.9959, "step": 10479 }, { "epoch": 34.36065573770492, "grad_norm": 40.304786682128906, "learning_rate": 1.5270294968644292e-05, "loss": 0.9625, "step": 10480 }, { "epoch": 34.363934426229505, "grad_norm": 6.622154235839844, "learning_rate": 1.526939249746972e-05, "loss": 0.857, "step": 10481 }, { "epoch": 34.3672131147541, "grad_norm": 6.823415756225586, "learning_rate": 1.5268489966877265e-05, "loss": 0.9309, "step": 10482 }, { "epoch": 34.37049180327869, "grad_norm": 7.318732738494873, "learning_rate": 1.5267587376877105e-05, "loss": 0.915, "step": 10483 }, { "epoch": 34.37377049180328, "grad_norm": 9.521536827087402, "learning_rate": 1.5266684727479427e-05, "loss": 0.8512, "step": 10484 }, { "epoch": 34.377049180327866, "grad_norm": 7.23805570602417, "learning_rate": 1.5265782018694396e-05, "loss": 1.0008, "step": 10485 }, { "epoch": 34.38032786885246, "grad_norm": 7.694865703582764, "learning_rate": 1.5264879250532203e-05, "loss": 0.9701, "step": 10486 }, { "epoch": 34.38360655737705, "grad_norm": 6.669267177581787, "learning_rate": 1.5263976423003022e-05, "loss": 1.0349, "step": 10487 }, { "epoch": 34.38688524590164, "grad_norm": 9.018157005310059, "learning_rate": 1.526307353611703e-05, "loss": 0.6697, "step": 10488 }, { "epoch": 34.390163934426226, "grad_norm": 6.465518474578857, "learning_rate": 1.5262170589884416e-05, "loss": 1.0023, "step": 10489 }, { "epoch": 34.39344262295082, "grad_norm": 11.45867919921875, "learning_rate": 1.5261267584315354e-05, "loss": 1.0997, "step": 10490 }, { "epoch": 34.39672131147541, "grad_norm": 7.085529327392578, "learning_rate": 1.526036451942003e-05, "loss": 0.9953, "step": 10491 }, { "epoch": 34.4, "grad_norm": 7.672979354858398, "learning_rate": 1.5259461395208628e-05, "loss": 0.7806, "step": 10492 }, { "epoch": 34.40327868852459, "grad_norm": 6.024761199951172, "learning_rate": 1.5258558211691331e-05, "loss": 0.8896, "step": 10493 }, { "epoch": 34.40655737704918, "grad_norm": 6.207239151000977, "learning_rate": 1.5257654968878322e-05, "loss": 0.7734, "step": 10494 }, { "epoch": 34.40983606557377, "grad_norm": 6.058725833892822, "learning_rate": 1.5256751666779788e-05, "loss": 0.9071, "step": 10495 }, { "epoch": 34.41311475409836, "grad_norm": 7.310093879699707, "learning_rate": 1.525584830540591e-05, "loss": 0.804, "step": 10496 }, { "epoch": 34.41639344262295, "grad_norm": 9.895988464355469, "learning_rate": 1.5254944884766878e-05, "loss": 0.8569, "step": 10497 }, { "epoch": 34.41967213114754, "grad_norm": 10.825451850891113, "learning_rate": 1.525404140487288e-05, "loss": 0.8735, "step": 10498 }, { "epoch": 34.42295081967213, "grad_norm": 6.15325927734375, "learning_rate": 1.5253137865734102e-05, "loss": 0.9565, "step": 10499 }, { "epoch": 34.42622950819672, "grad_norm": 8.111647605895996, "learning_rate": 1.5252234267360735e-05, "loss": 0.9669, "step": 10500 }, { "epoch": 34.429508196721315, "grad_norm": 7.825717449188232, "learning_rate": 1.5251330609762962e-05, "loss": 0.9862, "step": 10501 }, { "epoch": 34.4327868852459, "grad_norm": 6.368621826171875, "learning_rate": 1.525042689295098e-05, "loss": 1.0549, "step": 10502 }, { "epoch": 34.43606557377049, "grad_norm": 10.744138717651367, "learning_rate": 1.5249523116934972e-05, "loss": 1.1174, "step": 10503 }, { "epoch": 34.43934426229508, "grad_norm": 6.622134685516357, "learning_rate": 1.5248619281725137e-05, "loss": 0.814, "step": 10504 }, { "epoch": 34.442622950819676, "grad_norm": 8.357389450073242, "learning_rate": 1.5247715387331657e-05, "loss": 0.8467, "step": 10505 }, { "epoch": 34.445901639344264, "grad_norm": 7.880314350128174, "learning_rate": 1.5246811433764732e-05, "loss": 0.9384, "step": 10506 }, { "epoch": 34.44918032786885, "grad_norm": 8.100407600402832, "learning_rate": 1.5245907421034554e-05, "loss": 0.7942, "step": 10507 }, { "epoch": 34.45245901639344, "grad_norm": 40.41938018798828, "learning_rate": 1.5245003349151315e-05, "loss": 0.7971, "step": 10508 }, { "epoch": 34.455737704918036, "grad_norm": 6.792123317718506, "learning_rate": 1.524409921812521e-05, "loss": 1.1496, "step": 10509 }, { "epoch": 34.459016393442624, "grad_norm": 7.038791656494141, "learning_rate": 1.5243195027966434e-05, "loss": 0.7367, "step": 10510 }, { "epoch": 34.46229508196721, "grad_norm": 9.07946491241455, "learning_rate": 1.524229077868518e-05, "loss": 0.9249, "step": 10511 }, { "epoch": 34.4655737704918, "grad_norm": 18.291715621948242, "learning_rate": 1.5241386470291648e-05, "loss": 0.9264, "step": 10512 }, { "epoch": 34.4688524590164, "grad_norm": 8.890670776367188, "learning_rate": 1.5240482102796037e-05, "loss": 0.9094, "step": 10513 }, { "epoch": 34.472131147540985, "grad_norm": 15.840296745300293, "learning_rate": 1.5239577676208537e-05, "loss": 0.626, "step": 10514 }, { "epoch": 34.47540983606557, "grad_norm": 21.482425689697266, "learning_rate": 1.5238673190539355e-05, "loss": 1.0312, "step": 10515 }, { "epoch": 34.47868852459016, "grad_norm": 7.50541353225708, "learning_rate": 1.523776864579868e-05, "loss": 0.7535, "step": 10516 }, { "epoch": 34.48196721311476, "grad_norm": 6.986146450042725, "learning_rate": 1.5236864041996723e-05, "loss": 0.8495, "step": 10517 }, { "epoch": 34.485245901639345, "grad_norm": 9.983402252197266, "learning_rate": 1.523595937914368e-05, "loss": 0.7819, "step": 10518 }, { "epoch": 34.488524590163934, "grad_norm": 8.732320785522461, "learning_rate": 1.5235054657249749e-05, "loss": 1.04, "step": 10519 }, { "epoch": 34.49180327868852, "grad_norm": 6.706696033477783, "learning_rate": 1.523414987632513e-05, "loss": 1.0132, "step": 10520 }, { "epoch": 34.49508196721312, "grad_norm": 40.215797424316406, "learning_rate": 1.5233245036380033e-05, "loss": 1.0178, "step": 10521 }, { "epoch": 34.498360655737706, "grad_norm": 11.529500961303711, "learning_rate": 1.5232340137424654e-05, "loss": 1.084, "step": 10522 }, { "epoch": 34.501639344262294, "grad_norm": 8.374833106994629, "learning_rate": 1.5231435179469202e-05, "loss": 0.8951, "step": 10523 }, { "epoch": 34.50491803278688, "grad_norm": 8.448509216308594, "learning_rate": 1.5230530162523882e-05, "loss": 0.8983, "step": 10524 }, { "epoch": 34.50819672131148, "grad_norm": 9.376859664916992, "learning_rate": 1.522962508659889e-05, "loss": 0.8877, "step": 10525 }, { "epoch": 34.511475409836066, "grad_norm": 6.350414276123047, "learning_rate": 1.5228719951704443e-05, "loss": 0.7987, "step": 10526 }, { "epoch": 34.514754098360655, "grad_norm": 8.031354904174805, "learning_rate": 1.5227814757850739e-05, "loss": 1.0077, "step": 10527 }, { "epoch": 34.51803278688524, "grad_norm": 7.690023899078369, "learning_rate": 1.5226909505047989e-05, "loss": 0.889, "step": 10528 }, { "epoch": 34.52131147540984, "grad_norm": 7.738251686096191, "learning_rate": 1.5226004193306399e-05, "loss": 0.7863, "step": 10529 }, { "epoch": 34.52459016393443, "grad_norm": 7.6559576988220215, "learning_rate": 1.522509882263618e-05, "loss": 0.7589, "step": 10530 }, { "epoch": 34.527868852459015, "grad_norm": 7.717452526092529, "learning_rate": 1.5224193393047535e-05, "loss": 0.7874, "step": 10531 }, { "epoch": 34.5311475409836, "grad_norm": 10.690328598022461, "learning_rate": 1.522328790455068e-05, "loss": 1.0192, "step": 10532 }, { "epoch": 34.5344262295082, "grad_norm": 7.271048069000244, "learning_rate": 1.5222382357155826e-05, "loss": 0.8634, "step": 10533 }, { "epoch": 34.53770491803279, "grad_norm": 10.018657684326172, "learning_rate": 1.5221476750873177e-05, "loss": 0.8493, "step": 10534 }, { "epoch": 34.540983606557376, "grad_norm": 13.790593147277832, "learning_rate": 1.522057108571295e-05, "loss": 1.0001, "step": 10535 }, { "epoch": 34.544262295081964, "grad_norm": 7.719738960266113, "learning_rate": 1.5219665361685356e-05, "loss": 1.0476, "step": 10536 }, { "epoch": 34.54754098360656, "grad_norm": 72.6010971069336, "learning_rate": 1.5218759578800608e-05, "loss": 0.7913, "step": 10537 }, { "epoch": 34.55081967213115, "grad_norm": 12.224279403686523, "learning_rate": 1.521785373706892e-05, "loss": 0.9921, "step": 10538 }, { "epoch": 34.554098360655736, "grad_norm": 8.125958442687988, "learning_rate": 1.5216947836500504e-05, "loss": 0.6884, "step": 10539 }, { "epoch": 34.557377049180324, "grad_norm": 8.59726619720459, "learning_rate": 1.5216041877105577e-05, "loss": 0.9048, "step": 10540 }, { "epoch": 34.56065573770492, "grad_norm": 13.554454803466797, "learning_rate": 1.5215135858894355e-05, "loss": 0.8672, "step": 10541 }, { "epoch": 34.56393442622951, "grad_norm": 9.300265312194824, "learning_rate": 1.5214229781877054e-05, "loss": 0.9237, "step": 10542 }, { "epoch": 34.5672131147541, "grad_norm": 8.773863792419434, "learning_rate": 1.5213323646063893e-05, "loss": 0.9478, "step": 10543 }, { "epoch": 34.570491803278685, "grad_norm": 7.17376708984375, "learning_rate": 1.5212417451465085e-05, "loss": 0.8551, "step": 10544 }, { "epoch": 34.57377049180328, "grad_norm": 7.736933708190918, "learning_rate": 1.5211511198090852e-05, "loss": 0.9226, "step": 10545 }, { "epoch": 34.57704918032787, "grad_norm": 9.397821426391602, "learning_rate": 1.521060488595141e-05, "loss": 0.7393, "step": 10546 }, { "epoch": 34.58032786885246, "grad_norm": 6.320971965789795, "learning_rate": 1.5209698515056982e-05, "loss": 0.9296, "step": 10547 }, { "epoch": 34.58360655737705, "grad_norm": 10.688486099243164, "learning_rate": 1.5208792085417785e-05, "loss": 0.9327, "step": 10548 }, { "epoch": 34.58688524590164, "grad_norm": 7.331930637359619, "learning_rate": 1.5207885597044044e-05, "loss": 0.944, "step": 10549 }, { "epoch": 34.59016393442623, "grad_norm": 10.369048118591309, "learning_rate": 1.5206979049945977e-05, "loss": 0.7863, "step": 10550 }, { "epoch": 34.59344262295082, "grad_norm": 6.706177711486816, "learning_rate": 1.5206072444133804e-05, "loss": 0.7516, "step": 10551 }, { "epoch": 34.59672131147541, "grad_norm": 6.62064790725708, "learning_rate": 1.5205165779617755e-05, "loss": 1.0314, "step": 10552 }, { "epoch": 34.6, "grad_norm": 15.104925155639648, "learning_rate": 1.5204259056408046e-05, "loss": 0.8928, "step": 10553 }, { "epoch": 34.60327868852459, "grad_norm": 7.469135284423828, "learning_rate": 1.5203352274514909e-05, "loss": 0.8228, "step": 10554 }, { "epoch": 34.60655737704918, "grad_norm": 7.847003936767578, "learning_rate": 1.5202445433948562e-05, "loss": 0.7062, "step": 10555 }, { "epoch": 34.609836065573774, "grad_norm": 8.019586563110352, "learning_rate": 1.5201538534719235e-05, "loss": 1.1816, "step": 10556 }, { "epoch": 34.61311475409836, "grad_norm": 9.62826919555664, "learning_rate": 1.5200631576837155e-05, "loss": 0.89, "step": 10557 }, { "epoch": 34.61639344262295, "grad_norm": 8.316201210021973, "learning_rate": 1.5199724560312544e-05, "loss": 0.9761, "step": 10558 }, { "epoch": 34.61967213114754, "grad_norm": 7.383453845977783, "learning_rate": 1.5198817485155633e-05, "loss": 1.1042, "step": 10559 }, { "epoch": 34.622950819672134, "grad_norm": 9.812905311584473, "learning_rate": 1.519791035137665e-05, "loss": 0.8407, "step": 10560 }, { "epoch": 34.62622950819672, "grad_norm": 6.253387928009033, "learning_rate": 1.5197003158985821e-05, "loss": 1.2089, "step": 10561 }, { "epoch": 34.62950819672131, "grad_norm": 19.93540382385254, "learning_rate": 1.519609590799338e-05, "loss": 0.7677, "step": 10562 }, { "epoch": 34.6327868852459, "grad_norm": 7.083782196044922, "learning_rate": 1.5195188598409553e-05, "loss": 0.8428, "step": 10563 }, { "epoch": 34.636065573770495, "grad_norm": 7.235205173492432, "learning_rate": 1.5194281230244576e-05, "loss": 0.9737, "step": 10564 }, { "epoch": 34.63934426229508, "grad_norm": 7.85178279876709, "learning_rate": 1.5193373803508675e-05, "loss": 0.8411, "step": 10565 }, { "epoch": 34.64262295081967, "grad_norm": 7.5373029708862305, "learning_rate": 1.5192466318212087e-05, "loss": 0.9454, "step": 10566 }, { "epoch": 34.64590163934426, "grad_norm": 11.019864082336426, "learning_rate": 1.519155877436504e-05, "loss": 0.9251, "step": 10567 }, { "epoch": 34.649180327868855, "grad_norm": 14.361922264099121, "learning_rate": 1.5190651171977768e-05, "loss": 1.0968, "step": 10568 }, { "epoch": 34.65245901639344, "grad_norm": 7.228264331817627, "learning_rate": 1.518974351106051e-05, "loss": 0.9852, "step": 10569 }, { "epoch": 34.65573770491803, "grad_norm": 9.333545684814453, "learning_rate": 1.51888357916235e-05, "loss": 1.0567, "step": 10570 }, { "epoch": 34.65901639344262, "grad_norm": 7.4224042892456055, "learning_rate": 1.5187928013676969e-05, "loss": 0.8702, "step": 10571 }, { "epoch": 34.662295081967216, "grad_norm": 8.208603858947754, "learning_rate": 1.5187020177231155e-05, "loss": 0.9146, "step": 10572 }, { "epoch": 34.665573770491804, "grad_norm": 8.47838306427002, "learning_rate": 1.5186112282296298e-05, "loss": 0.9393, "step": 10573 }, { "epoch": 34.66885245901639, "grad_norm": 7.800198554992676, "learning_rate": 1.5185204328882628e-05, "loss": 0.9789, "step": 10574 }, { "epoch": 34.67213114754098, "grad_norm": 12.910009384155273, "learning_rate": 1.5184296317000394e-05, "loss": 1.1994, "step": 10575 }, { "epoch": 34.675409836065576, "grad_norm": 6.820817947387695, "learning_rate": 1.5183388246659828e-05, "loss": 0.9316, "step": 10576 }, { "epoch": 34.678688524590164, "grad_norm": 7.195501804351807, "learning_rate": 1.5182480117871166e-05, "loss": 0.8099, "step": 10577 }, { "epoch": 34.68196721311475, "grad_norm": 7.599247932434082, "learning_rate": 1.5181571930644655e-05, "loss": 0.8158, "step": 10578 }, { "epoch": 34.68524590163934, "grad_norm": 6.902961254119873, "learning_rate": 1.5180663684990529e-05, "loss": 0.9473, "step": 10579 }, { "epoch": 34.68852459016394, "grad_norm": 7.053023338317871, "learning_rate": 1.517975538091904e-05, "loss": 0.86, "step": 10580 }, { "epoch": 34.691803278688525, "grad_norm": 8.36526107788086, "learning_rate": 1.5178847018440417e-05, "loss": 0.8792, "step": 10581 }, { "epoch": 34.69508196721311, "grad_norm": 11.151302337646484, "learning_rate": 1.5177938597564912e-05, "loss": 0.8572, "step": 10582 }, { "epoch": 34.6983606557377, "grad_norm": 7.950519561767578, "learning_rate": 1.5177030118302765e-05, "loss": 1.057, "step": 10583 }, { "epoch": 34.7016393442623, "grad_norm": 6.553191184997559, "learning_rate": 1.5176121580664222e-05, "loss": 0.935, "step": 10584 }, { "epoch": 34.704918032786885, "grad_norm": 49.47998046875, "learning_rate": 1.5175212984659524e-05, "loss": 0.8323, "step": 10585 }, { "epoch": 34.708196721311474, "grad_norm": 6.845775127410889, "learning_rate": 1.5174304330298923e-05, "loss": 0.7906, "step": 10586 }, { "epoch": 34.71147540983607, "grad_norm": 10.107820510864258, "learning_rate": 1.5173395617592656e-05, "loss": 0.9093, "step": 10587 }, { "epoch": 34.71475409836066, "grad_norm": 7.501683712005615, "learning_rate": 1.517248684655098e-05, "loss": 0.8395, "step": 10588 }, { "epoch": 34.718032786885246, "grad_norm": 8.434700965881348, "learning_rate": 1.5171578017184132e-05, "loss": 1.0795, "step": 10589 }, { "epoch": 34.721311475409834, "grad_norm": 8.959270477294922, "learning_rate": 1.5170669129502366e-05, "loss": 1.0481, "step": 10590 }, { "epoch": 34.72459016393443, "grad_norm": 7.247994899749756, "learning_rate": 1.516976018351593e-05, "loss": 0.8661, "step": 10591 }, { "epoch": 34.72786885245902, "grad_norm": 9.174943923950195, "learning_rate": 1.5168851179235074e-05, "loss": 0.8937, "step": 10592 }, { "epoch": 34.731147540983606, "grad_norm": 7.473255157470703, "learning_rate": 1.5167942116670043e-05, "loss": 0.6855, "step": 10593 }, { "epoch": 34.734426229508195, "grad_norm": 8.428119659423828, "learning_rate": 1.5167032995831093e-05, "loss": 0.7495, "step": 10594 }, { "epoch": 34.73770491803279, "grad_norm": 7.6139750480651855, "learning_rate": 1.5166123816728475e-05, "loss": 0.8832, "step": 10595 }, { "epoch": 34.74098360655738, "grad_norm": 7.390782356262207, "learning_rate": 1.5165214579372437e-05, "loss": 0.7262, "step": 10596 }, { "epoch": 34.74426229508197, "grad_norm": 8.082185745239258, "learning_rate": 1.5164305283773236e-05, "loss": 1.0596, "step": 10597 }, { "epoch": 34.747540983606555, "grad_norm": 6.756404876708984, "learning_rate": 1.516339592994112e-05, "loss": 0.7614, "step": 10598 }, { "epoch": 34.75081967213115, "grad_norm": 13.152889251708984, "learning_rate": 1.5162486517886352e-05, "loss": 1.1357, "step": 10599 }, { "epoch": 34.75409836065574, "grad_norm": 9.275542259216309, "learning_rate": 1.5161577047619175e-05, "loss": 0.8407, "step": 10600 }, { "epoch": 34.75737704918033, "grad_norm": 9.448954582214355, "learning_rate": 1.5160667519149856e-05, "loss": 0.8284, "step": 10601 }, { "epoch": 34.760655737704916, "grad_norm": 9.584308624267578, "learning_rate": 1.5159757932488644e-05, "loss": 0.9792, "step": 10602 }, { "epoch": 34.76393442622951, "grad_norm": 7.057735919952393, "learning_rate": 1.5158848287645793e-05, "loss": 0.9096, "step": 10603 }, { "epoch": 34.7672131147541, "grad_norm": 6.9239583015441895, "learning_rate": 1.5157938584631566e-05, "loss": 0.7679, "step": 10604 }, { "epoch": 34.77049180327869, "grad_norm": 8.400036811828613, "learning_rate": 1.5157028823456216e-05, "loss": 0.9525, "step": 10605 }, { "epoch": 34.773770491803276, "grad_norm": 9.518362998962402, "learning_rate": 1.515611900413001e-05, "loss": 0.868, "step": 10606 }, { "epoch": 34.77704918032787, "grad_norm": 12.35802173614502, "learning_rate": 1.5155209126663196e-05, "loss": 0.8882, "step": 10607 }, { "epoch": 34.78032786885246, "grad_norm": 9.339396476745605, "learning_rate": 1.5154299191066039e-05, "loss": 0.7703, "step": 10608 }, { "epoch": 34.78360655737705, "grad_norm": 6.7431135177612305, "learning_rate": 1.5153389197348802e-05, "loss": 0.9546, "step": 10609 }, { "epoch": 34.78688524590164, "grad_norm": 7.6141839027404785, "learning_rate": 1.5152479145521741e-05, "loss": 0.8039, "step": 10610 }, { "epoch": 34.79016393442623, "grad_norm": 8.861649513244629, "learning_rate": 1.5151569035595124e-05, "loss": 0.7416, "step": 10611 }, { "epoch": 34.79344262295082, "grad_norm": 8.425130844116211, "learning_rate": 1.515065886757921e-05, "loss": 1.1876, "step": 10612 }, { "epoch": 34.79672131147541, "grad_norm": 49.03404998779297, "learning_rate": 1.5149748641484254e-05, "loss": 0.8177, "step": 10613 }, { "epoch": 34.8, "grad_norm": 9.774949073791504, "learning_rate": 1.5148838357320537e-05, "loss": 0.6452, "step": 10614 }, { "epoch": 34.80327868852459, "grad_norm": 6.903090000152588, "learning_rate": 1.5147928015098309e-05, "loss": 0.7602, "step": 10615 }, { "epoch": 34.80655737704918, "grad_norm": 7.534725666046143, "learning_rate": 1.514701761482784e-05, "loss": 0.8309, "step": 10616 }, { "epoch": 34.80983606557377, "grad_norm": 9.075851440429688, "learning_rate": 1.5146107156519398e-05, "loss": 1.1329, "step": 10617 }, { "epoch": 34.81311475409836, "grad_norm": 19.116437911987305, "learning_rate": 1.5145196640183249e-05, "loss": 0.8736, "step": 10618 }, { "epoch": 34.81639344262295, "grad_norm": 7.115368843078613, "learning_rate": 1.5144286065829654e-05, "loss": 0.9282, "step": 10619 }, { "epoch": 34.81967213114754, "grad_norm": 5.857088565826416, "learning_rate": 1.5143375433468887e-05, "loss": 0.8911, "step": 10620 }, { "epoch": 34.82295081967213, "grad_norm": 9.004326820373535, "learning_rate": 1.5142464743111211e-05, "loss": 1.0307, "step": 10621 }, { "epoch": 34.82622950819672, "grad_norm": 9.002204895019531, "learning_rate": 1.5141553994766902e-05, "loss": 0.7474, "step": 10622 }, { "epoch": 34.829508196721314, "grad_norm": 8.164790153503418, "learning_rate": 1.5140643188446226e-05, "loss": 0.9691, "step": 10623 }, { "epoch": 34.8327868852459, "grad_norm": 6.266222953796387, "learning_rate": 1.513973232415945e-05, "loss": 0.7235, "step": 10624 }, { "epoch": 34.83606557377049, "grad_norm": 9.480390548706055, "learning_rate": 1.5138821401916853e-05, "loss": 0.8772, "step": 10625 }, { "epoch": 34.83934426229508, "grad_norm": 7.855201721191406, "learning_rate": 1.5137910421728697e-05, "loss": 0.8471, "step": 10626 }, { "epoch": 34.842622950819674, "grad_norm": 6.375432014465332, "learning_rate": 1.5136999383605262e-05, "loss": 0.7455, "step": 10627 }, { "epoch": 34.84590163934426, "grad_norm": 6.840843200683594, "learning_rate": 1.5136088287556816e-05, "loss": 1.1259, "step": 10628 }, { "epoch": 34.84918032786885, "grad_norm": 6.55164909362793, "learning_rate": 1.5135177133593634e-05, "loss": 0.7672, "step": 10629 }, { "epoch": 34.85245901639344, "grad_norm": 8.855192184448242, "learning_rate": 1.5134265921725992e-05, "loss": 0.9243, "step": 10630 }, { "epoch": 34.855737704918035, "grad_norm": 8.364895820617676, "learning_rate": 1.5133354651964162e-05, "loss": 0.6069, "step": 10631 }, { "epoch": 34.85901639344262, "grad_norm": 7.317701816558838, "learning_rate": 1.5132443324318423e-05, "loss": 0.9009, "step": 10632 }, { "epoch": 34.86229508196721, "grad_norm": 7.221648216247559, "learning_rate": 1.5131531938799047e-05, "loss": 1.0381, "step": 10633 }, { "epoch": 34.86557377049181, "grad_norm": 7.761817932128906, "learning_rate": 1.5130620495416315e-05, "loss": 0.9547, "step": 10634 }, { "epoch": 34.868852459016395, "grad_norm": 9.115408897399902, "learning_rate": 1.51297089941805e-05, "loss": 0.8893, "step": 10635 }, { "epoch": 34.87213114754098, "grad_norm": 10.00751781463623, "learning_rate": 1.5128797435101885e-05, "loss": 0.8207, "step": 10636 }, { "epoch": 34.87540983606557, "grad_norm": 7.564979553222656, "learning_rate": 1.5127885818190744e-05, "loss": 0.7903, "step": 10637 }, { "epoch": 34.87868852459017, "grad_norm": 7.923813819885254, "learning_rate": 1.512697414345736e-05, "loss": 1.0629, "step": 10638 }, { "epoch": 34.881967213114756, "grad_norm": 7.916151523590088, "learning_rate": 1.512606241091201e-05, "loss": 0.9506, "step": 10639 }, { "epoch": 34.885245901639344, "grad_norm": 7.052441120147705, "learning_rate": 1.5125150620564979e-05, "loss": 0.8773, "step": 10640 }, { "epoch": 34.88852459016393, "grad_norm": 12.83525276184082, "learning_rate": 1.5124238772426547e-05, "loss": 0.7361, "step": 10641 }, { "epoch": 34.89180327868853, "grad_norm": 8.960039138793945, "learning_rate": 1.5123326866506991e-05, "loss": 0.8334, "step": 10642 }, { "epoch": 34.895081967213116, "grad_norm": 6.985379219055176, "learning_rate": 1.5122414902816602e-05, "loss": 0.8624, "step": 10643 }, { "epoch": 34.898360655737704, "grad_norm": 6.127867221832275, "learning_rate": 1.5121502881365655e-05, "loss": 1.2419, "step": 10644 }, { "epoch": 34.90163934426229, "grad_norm": 9.335399627685547, "learning_rate": 1.512059080216444e-05, "loss": 1.0471, "step": 10645 }, { "epoch": 34.90491803278689, "grad_norm": 7.695171356201172, "learning_rate": 1.511967866522324e-05, "loss": 1.1056, "step": 10646 }, { "epoch": 34.90819672131148, "grad_norm": 6.523059368133545, "learning_rate": 1.5118766470552337e-05, "loss": 0.9816, "step": 10647 }, { "epoch": 34.911475409836065, "grad_norm": 7.4423980712890625, "learning_rate": 1.5117854218162023e-05, "loss": 1.1136, "step": 10648 }, { "epoch": 34.91475409836065, "grad_norm": 8.558871269226074, "learning_rate": 1.5116941908062579e-05, "loss": 0.6903, "step": 10649 }, { "epoch": 34.91803278688525, "grad_norm": 9.08874797821045, "learning_rate": 1.5116029540264294e-05, "loss": 0.8823, "step": 10650 }, { "epoch": 34.92131147540984, "grad_norm": 6.942991733551025, "learning_rate": 1.5115117114777462e-05, "loss": 1.0061, "step": 10651 }, { "epoch": 34.924590163934425, "grad_norm": 7.111298084259033, "learning_rate": 1.5114204631612361e-05, "loss": 0.914, "step": 10652 }, { "epoch": 34.927868852459014, "grad_norm": 10.852619171142578, "learning_rate": 1.5113292090779289e-05, "loss": 0.8418, "step": 10653 }, { "epoch": 34.93114754098361, "grad_norm": 6.722028732299805, "learning_rate": 1.511237949228853e-05, "loss": 0.9825, "step": 10654 }, { "epoch": 34.9344262295082, "grad_norm": 6.7617998123168945, "learning_rate": 1.5111466836150378e-05, "loss": 0.8009, "step": 10655 }, { "epoch": 34.937704918032786, "grad_norm": 7.80488395690918, "learning_rate": 1.511055412237512e-05, "loss": 0.8304, "step": 10656 }, { "epoch": 34.940983606557374, "grad_norm": 7.468938827514648, "learning_rate": 1.5109641350973054e-05, "loss": 0.8331, "step": 10657 }, { "epoch": 34.94426229508197, "grad_norm": 7.17476749420166, "learning_rate": 1.5108728521954468e-05, "loss": 0.997, "step": 10658 }, { "epoch": 34.94754098360656, "grad_norm": 11.628387451171875, "learning_rate": 1.5107815635329655e-05, "loss": 0.8285, "step": 10659 }, { "epoch": 34.950819672131146, "grad_norm": 6.963454723358154, "learning_rate": 1.5106902691108913e-05, "loss": 0.8759, "step": 10660 }, { "epoch": 34.954098360655735, "grad_norm": 8.65896224975586, "learning_rate": 1.510598968930253e-05, "loss": 0.9732, "step": 10661 }, { "epoch": 34.95737704918033, "grad_norm": 7.742009162902832, "learning_rate": 1.5105076629920805e-05, "loss": 0.9616, "step": 10662 }, { "epoch": 34.96065573770492, "grad_norm": 7.858949184417725, "learning_rate": 1.5104163512974033e-05, "loss": 1.0428, "step": 10663 }, { "epoch": 34.96393442622951, "grad_norm": 7.084496021270752, "learning_rate": 1.5103250338472512e-05, "loss": 0.9083, "step": 10664 }, { "epoch": 34.967213114754095, "grad_norm": 7.139747619628906, "learning_rate": 1.5102337106426536e-05, "loss": 0.9034, "step": 10665 }, { "epoch": 34.97049180327869, "grad_norm": 7.960867881774902, "learning_rate": 1.5101423816846406e-05, "loss": 0.8009, "step": 10666 }, { "epoch": 34.97377049180328, "grad_norm": 8.075615882873535, "learning_rate": 1.5100510469742417e-05, "loss": 0.8627, "step": 10667 }, { "epoch": 34.97704918032787, "grad_norm": 9.12692928314209, "learning_rate": 1.5099597065124867e-05, "loss": 0.9544, "step": 10668 }, { "epoch": 34.980327868852456, "grad_norm": 8.267406463623047, "learning_rate": 1.5098683603004061e-05, "loss": 0.8781, "step": 10669 }, { "epoch": 34.98360655737705, "grad_norm": 6.098667621612549, "learning_rate": 1.5097770083390293e-05, "loss": 0.6418, "step": 10670 }, { "epoch": 34.98688524590164, "grad_norm": 7.012650489807129, "learning_rate": 1.509685650629387e-05, "loss": 1.0122, "step": 10671 }, { "epoch": 34.99016393442623, "grad_norm": 6.061832904815674, "learning_rate": 1.5095942871725088e-05, "loss": 1.0502, "step": 10672 }, { "epoch": 34.993442622950816, "grad_norm": 13.541952133178711, "learning_rate": 1.5095029179694251e-05, "loss": 0.7868, "step": 10673 }, { "epoch": 34.99672131147541, "grad_norm": 7.315657615661621, "learning_rate": 1.5094115430211667e-05, "loss": 0.8649, "step": 10674 }, { "epoch": 35.0, "grad_norm": 8.104186058044434, "learning_rate": 1.5093201623287631e-05, "loss": 0.7799, "step": 10675 }, { "epoch": 35.00327868852459, "grad_norm": 9.581107139587402, "learning_rate": 1.5092287758932451e-05, "loss": 0.7016, "step": 10676 }, { "epoch": 35.006557377049184, "grad_norm": 6.289818286895752, "learning_rate": 1.5091373837156433e-05, "loss": 0.7664, "step": 10677 }, { "epoch": 35.00983606557377, "grad_norm": 10.207870483398438, "learning_rate": 1.5090459857969886e-05, "loss": 0.6897, "step": 10678 }, { "epoch": 35.01311475409836, "grad_norm": 10.568093299865723, "learning_rate": 1.5089545821383102e-05, "loss": 0.8091, "step": 10679 }, { "epoch": 35.01639344262295, "grad_norm": 8.676165580749512, "learning_rate": 1.5088631727406404e-05, "loss": 0.9825, "step": 10680 }, { "epoch": 35.019672131147544, "grad_norm": 7.114246845245361, "learning_rate": 1.508771757605009e-05, "loss": 0.8746, "step": 10681 }, { "epoch": 35.02295081967213, "grad_norm": 8.108438491821289, "learning_rate": 1.5086803367324471e-05, "loss": 0.7088, "step": 10682 }, { "epoch": 35.02622950819672, "grad_norm": 6.9407572746276855, "learning_rate": 1.5085889101239857e-05, "loss": 0.9198, "step": 10683 }, { "epoch": 35.02950819672131, "grad_norm": 8.224553108215332, "learning_rate": 1.5084974777806553e-05, "loss": 0.782, "step": 10684 }, { "epoch": 35.032786885245905, "grad_norm": 6.810299396514893, "learning_rate": 1.508406039703487e-05, "loss": 0.7036, "step": 10685 }, { "epoch": 35.03606557377049, "grad_norm": 9.498760223388672, "learning_rate": 1.5083145958935123e-05, "loss": 0.9341, "step": 10686 }, { "epoch": 35.03934426229508, "grad_norm": 4.987001895904541, "learning_rate": 1.5082231463517619e-05, "loss": 0.9744, "step": 10687 }, { "epoch": 35.04262295081967, "grad_norm": 6.858142375946045, "learning_rate": 1.508131691079267e-05, "loss": 0.8516, "step": 10688 }, { "epoch": 35.045901639344265, "grad_norm": 24.931055068969727, "learning_rate": 1.508040230077059e-05, "loss": 0.8803, "step": 10689 }, { "epoch": 35.049180327868854, "grad_norm": 7.545336723327637, "learning_rate": 1.5079487633461694e-05, "loss": 0.8315, "step": 10690 }, { "epoch": 35.05245901639344, "grad_norm": 6.360948085784912, "learning_rate": 1.5078572908876291e-05, "loss": 1.0696, "step": 10691 }, { "epoch": 35.05573770491803, "grad_norm": 8.453666687011719, "learning_rate": 1.50776581270247e-05, "loss": 0.9706, "step": 10692 }, { "epoch": 35.059016393442626, "grad_norm": 7.115556716918945, "learning_rate": 1.5076743287917233e-05, "loss": 0.6527, "step": 10693 }, { "epoch": 35.062295081967214, "grad_norm": 10.509991645812988, "learning_rate": 1.5075828391564206e-05, "loss": 0.9113, "step": 10694 }, { "epoch": 35.0655737704918, "grad_norm": 7.765040397644043, "learning_rate": 1.5074913437975938e-05, "loss": 0.8253, "step": 10695 }, { "epoch": 35.06885245901639, "grad_norm": 7.7965593338012695, "learning_rate": 1.5073998427162742e-05, "loss": 0.8276, "step": 10696 }, { "epoch": 35.072131147540986, "grad_norm": 6.86298131942749, "learning_rate": 1.5073083359134941e-05, "loss": 0.8094, "step": 10697 }, { "epoch": 35.075409836065575, "grad_norm": 5.608509540557861, "learning_rate": 1.5072168233902848e-05, "loss": 0.9317, "step": 10698 }, { "epoch": 35.07868852459016, "grad_norm": 6.112654685974121, "learning_rate": 1.5071253051476786e-05, "loss": 0.9196, "step": 10699 }, { "epoch": 35.08196721311475, "grad_norm": 8.433201789855957, "learning_rate": 1.5070337811867073e-05, "loss": 0.6496, "step": 10700 }, { "epoch": 35.08524590163935, "grad_norm": 7.948946952819824, "learning_rate": 1.5069422515084027e-05, "loss": 1.1093, "step": 10701 }, { "epoch": 35.088524590163935, "grad_norm": 5.416849613189697, "learning_rate": 1.5068507161137975e-05, "loss": 0.8577, "step": 10702 }, { "epoch": 35.09180327868852, "grad_norm": 7.09600305557251, "learning_rate": 1.5067591750039231e-05, "loss": 0.8546, "step": 10703 }, { "epoch": 35.09508196721311, "grad_norm": 7.55151891708374, "learning_rate": 1.5066676281798124e-05, "loss": 0.8232, "step": 10704 }, { "epoch": 35.09836065573771, "grad_norm": 7.04819917678833, "learning_rate": 1.5065760756424972e-05, "loss": 0.8619, "step": 10705 }, { "epoch": 35.101639344262296, "grad_norm": 6.301621437072754, "learning_rate": 1.5064845173930103e-05, "loss": 0.8245, "step": 10706 }, { "epoch": 35.104918032786884, "grad_norm": 7.361254692077637, "learning_rate": 1.5063929534323837e-05, "loss": 0.5878, "step": 10707 }, { "epoch": 35.10819672131147, "grad_norm": 9.724480628967285, "learning_rate": 1.5063013837616502e-05, "loss": 0.8069, "step": 10708 }, { "epoch": 35.11147540983607, "grad_norm": 7.991261005401611, "learning_rate": 1.5062098083818417e-05, "loss": 1.0812, "step": 10709 }, { "epoch": 35.114754098360656, "grad_norm": 21.434616088867188, "learning_rate": 1.5061182272939918e-05, "loss": 0.7593, "step": 10710 }, { "epoch": 35.118032786885244, "grad_norm": 5.992819309234619, "learning_rate": 1.5060266404991323e-05, "loss": 0.8654, "step": 10711 }, { "epoch": 35.12131147540983, "grad_norm": 7.789949417114258, "learning_rate": 1.5059350479982966e-05, "loss": 0.7433, "step": 10712 }, { "epoch": 35.12459016393443, "grad_norm": 6.649012088775635, "learning_rate": 1.505843449792517e-05, "loss": 0.6476, "step": 10713 }, { "epoch": 35.12786885245902, "grad_norm": 6.584887981414795, "learning_rate": 1.5057518458828266e-05, "loss": 1.0457, "step": 10714 }, { "epoch": 35.131147540983605, "grad_norm": 7.462019920349121, "learning_rate": 1.5056602362702584e-05, "loss": 0.6432, "step": 10715 }, { "epoch": 35.13442622950819, "grad_norm": 6.503612995147705, "learning_rate": 1.505568620955845e-05, "loss": 0.866, "step": 10716 }, { "epoch": 35.13770491803279, "grad_norm": 9.4231538772583, "learning_rate": 1.5054769999406201e-05, "loss": 0.8241, "step": 10717 }, { "epoch": 35.14098360655738, "grad_norm": 7.822826862335205, "learning_rate": 1.5053853732256163e-05, "loss": 0.8036, "step": 10718 }, { "epoch": 35.144262295081965, "grad_norm": 13.096497535705566, "learning_rate": 1.5052937408118669e-05, "loss": 0.8555, "step": 10719 }, { "epoch": 35.14754098360656, "grad_norm": 7.617149829864502, "learning_rate": 1.505202102700405e-05, "loss": 0.6302, "step": 10720 }, { "epoch": 35.15081967213115, "grad_norm": 6.04088020324707, "learning_rate": 1.5051104588922645e-05, "loss": 1.0054, "step": 10721 }, { "epoch": 35.15409836065574, "grad_norm": 8.207319259643555, "learning_rate": 1.505018809388478e-05, "loss": 0.7257, "step": 10722 }, { "epoch": 35.157377049180326, "grad_norm": 6.04086446762085, "learning_rate": 1.5049271541900798e-05, "loss": 0.8251, "step": 10723 }, { "epoch": 35.16065573770492, "grad_norm": 5.810701370239258, "learning_rate": 1.5048354932981027e-05, "loss": 0.9815, "step": 10724 }, { "epoch": 35.16393442622951, "grad_norm": 6.370295524597168, "learning_rate": 1.5047438267135806e-05, "loss": 0.9944, "step": 10725 }, { "epoch": 35.1672131147541, "grad_norm": 7.5384650230407715, "learning_rate": 1.5046521544375468e-05, "loss": 0.8187, "step": 10726 }, { "epoch": 35.170491803278686, "grad_norm": 17.200580596923828, "learning_rate": 1.5045604764710354e-05, "loss": 0.9657, "step": 10727 }, { "epoch": 35.17377049180328, "grad_norm": 7.178004264831543, "learning_rate": 1.50446879281508e-05, "loss": 0.7839, "step": 10728 }, { "epoch": 35.17704918032787, "grad_norm": 11.310693740844727, "learning_rate": 1.5043771034707143e-05, "loss": 0.8453, "step": 10729 }, { "epoch": 35.18032786885246, "grad_norm": 9.780014991760254, "learning_rate": 1.5042854084389729e-05, "loss": 0.8023, "step": 10730 }, { "epoch": 35.18360655737705, "grad_norm": 6.3421630859375, "learning_rate": 1.5041937077208886e-05, "loss": 0.8848, "step": 10731 }, { "epoch": 35.18688524590164, "grad_norm": 8.613791465759277, "learning_rate": 1.5041020013174962e-05, "loss": 0.9678, "step": 10732 }, { "epoch": 35.19016393442623, "grad_norm": 6.8558502197265625, "learning_rate": 1.5040102892298295e-05, "loss": 0.9376, "step": 10733 }, { "epoch": 35.19344262295082, "grad_norm": 7.708837509155273, "learning_rate": 1.5039185714589231e-05, "loss": 0.6547, "step": 10734 }, { "epoch": 35.19672131147541, "grad_norm": 7.649785041809082, "learning_rate": 1.5038268480058107e-05, "loss": 0.8156, "step": 10735 }, { "epoch": 35.2, "grad_norm": 7.238796710968018, "learning_rate": 1.5037351188715265e-05, "loss": 1.0526, "step": 10736 }, { "epoch": 35.20327868852459, "grad_norm": 9.544243812561035, "learning_rate": 1.5036433840571052e-05, "loss": 0.8018, "step": 10737 }, { "epoch": 35.20655737704918, "grad_norm": 6.70762825012207, "learning_rate": 1.5035516435635813e-05, "loss": 1.2164, "step": 10738 }, { "epoch": 35.20983606557377, "grad_norm": 7.564562797546387, "learning_rate": 1.5034598973919887e-05, "loss": 0.8981, "step": 10739 }, { "epoch": 35.21311475409836, "grad_norm": 11.74967098236084, "learning_rate": 1.5033681455433628e-05, "loss": 0.8664, "step": 10740 }, { "epoch": 35.21639344262295, "grad_norm": 6.584531784057617, "learning_rate": 1.5032763880187374e-05, "loss": 1.1293, "step": 10741 }, { "epoch": 35.21967213114754, "grad_norm": 11.09970474243164, "learning_rate": 1.5031846248191473e-05, "loss": 1.208, "step": 10742 }, { "epoch": 35.22295081967213, "grad_norm": 8.375617027282715, "learning_rate": 1.5030928559456275e-05, "loss": 0.9034, "step": 10743 }, { "epoch": 35.226229508196724, "grad_norm": 8.737051010131836, "learning_rate": 1.5030010813992126e-05, "loss": 0.6887, "step": 10744 }, { "epoch": 35.22950819672131, "grad_norm": 8.724390983581543, "learning_rate": 1.502909301180938e-05, "loss": 0.8285, "step": 10745 }, { "epoch": 35.2327868852459, "grad_norm": 6.635159969329834, "learning_rate": 1.5028175152918374e-05, "loss": 0.9016, "step": 10746 }, { "epoch": 35.23606557377049, "grad_norm": 6.8495869636535645, "learning_rate": 1.5027257237329469e-05, "loss": 0.7945, "step": 10747 }, { "epoch": 35.239344262295084, "grad_norm": 8.154391288757324, "learning_rate": 1.502633926505301e-05, "loss": 0.8536, "step": 10748 }, { "epoch": 35.24262295081967, "grad_norm": 11.70448112487793, "learning_rate": 1.5025421236099354e-05, "loss": 1.0159, "step": 10749 }, { "epoch": 35.24590163934426, "grad_norm": 8.33907413482666, "learning_rate": 1.5024503150478842e-05, "loss": 0.8082, "step": 10750 }, { "epoch": 35.24918032786885, "grad_norm": 9.852396011352539, "learning_rate": 1.5023585008201837e-05, "loss": 0.9071, "step": 10751 }, { "epoch": 35.252459016393445, "grad_norm": 9.73062801361084, "learning_rate": 1.5022666809278686e-05, "loss": 0.6977, "step": 10752 }, { "epoch": 35.25573770491803, "grad_norm": 7.6697587966918945, "learning_rate": 1.5021748553719746e-05, "loss": 0.6338, "step": 10753 }, { "epoch": 35.25901639344262, "grad_norm": 6.7769012451171875, "learning_rate": 1.5020830241535369e-05, "loss": 0.8073, "step": 10754 }, { "epoch": 35.26229508196721, "grad_norm": 9.239274024963379, "learning_rate": 1.5019911872735908e-05, "loss": 1.0311, "step": 10755 }, { "epoch": 35.265573770491805, "grad_norm": 6.2195305824279785, "learning_rate": 1.5018993447331727e-05, "loss": 1.1643, "step": 10756 }, { "epoch": 35.268852459016394, "grad_norm": 7.241485595703125, "learning_rate": 1.5018074965333172e-05, "loss": 0.8615, "step": 10757 }, { "epoch": 35.27213114754098, "grad_norm": 6.682156085968018, "learning_rate": 1.5017156426750605e-05, "loss": 0.6307, "step": 10758 }, { "epoch": 35.27540983606557, "grad_norm": 12.835750579833984, "learning_rate": 1.5016237831594383e-05, "loss": 0.9962, "step": 10759 }, { "epoch": 35.278688524590166, "grad_norm": 6.8987345695495605, "learning_rate": 1.5015319179874865e-05, "loss": 0.8768, "step": 10760 }, { "epoch": 35.281967213114754, "grad_norm": 5.6383442878723145, "learning_rate": 1.5014400471602408e-05, "loss": 0.7309, "step": 10761 }, { "epoch": 35.28524590163934, "grad_norm": 7.717905521392822, "learning_rate": 1.5013481706787372e-05, "loss": 0.9807, "step": 10762 }, { "epoch": 35.28852459016394, "grad_norm": 7.549165725708008, "learning_rate": 1.5012562885440117e-05, "loss": 0.8289, "step": 10763 }, { "epoch": 35.291803278688526, "grad_norm": 7.798788547515869, "learning_rate": 1.5011644007571003e-05, "loss": 0.8911, "step": 10764 }, { "epoch": 35.295081967213115, "grad_norm": 7.639687538146973, "learning_rate": 1.5010725073190391e-05, "loss": 0.6115, "step": 10765 }, { "epoch": 35.2983606557377, "grad_norm": 14.899227142333984, "learning_rate": 1.5009806082308647e-05, "loss": 0.9224, "step": 10766 }, { "epoch": 35.3016393442623, "grad_norm": 6.703104019165039, "learning_rate": 1.5008887034936129e-05, "loss": 0.8696, "step": 10767 }, { "epoch": 35.30491803278689, "grad_norm": 6.615783214569092, "learning_rate": 1.5007967931083202e-05, "loss": 0.8203, "step": 10768 }, { "epoch": 35.308196721311475, "grad_norm": 18.159000396728516, "learning_rate": 1.5007048770760231e-05, "loss": 0.8269, "step": 10769 }, { "epoch": 35.31147540983606, "grad_norm": 8.397737503051758, "learning_rate": 1.5006129553977577e-05, "loss": 0.7625, "step": 10770 }, { "epoch": 35.31475409836066, "grad_norm": 8.081217765808105, "learning_rate": 1.5005210280745608e-05, "loss": 0.6494, "step": 10771 }, { "epoch": 35.31803278688525, "grad_norm": 8.924436569213867, "learning_rate": 1.5004290951074688e-05, "loss": 0.5876, "step": 10772 }, { "epoch": 35.321311475409836, "grad_norm": 10.261865615844727, "learning_rate": 1.5003371564975187e-05, "loss": 0.9518, "step": 10773 }, { "epoch": 35.324590163934424, "grad_norm": 6.356977462768555, "learning_rate": 1.5002452122457465e-05, "loss": 1.0387, "step": 10774 }, { "epoch": 35.32786885245902, "grad_norm": 9.746615409851074, "learning_rate": 1.5001532623531897e-05, "loss": 0.7683, "step": 10775 }, { "epoch": 35.33114754098361, "grad_norm": 6.941715240478516, "learning_rate": 1.500061306820885e-05, "loss": 0.9397, "step": 10776 }, { "epoch": 35.334426229508196, "grad_norm": 6.777562618255615, "learning_rate": 1.4999693456498687e-05, "loss": 0.9934, "step": 10777 }, { "epoch": 35.337704918032784, "grad_norm": 6.6928863525390625, "learning_rate": 1.4998773788411785e-05, "loss": 0.8536, "step": 10778 }, { "epoch": 35.34098360655738, "grad_norm": 8.921420097351074, "learning_rate": 1.4997854063958511e-05, "loss": 0.7441, "step": 10779 }, { "epoch": 35.34426229508197, "grad_norm": 8.163256645202637, "learning_rate": 1.4996934283149233e-05, "loss": 0.7473, "step": 10780 }, { "epoch": 35.34754098360656, "grad_norm": 16.578182220458984, "learning_rate": 1.499601444599433e-05, "loss": 0.8852, "step": 10781 }, { "epoch": 35.350819672131145, "grad_norm": 6.794586181640625, "learning_rate": 1.4995094552504165e-05, "loss": 0.8282, "step": 10782 }, { "epoch": 35.35409836065574, "grad_norm": 13.374130249023438, "learning_rate": 1.4994174602689118e-05, "loss": 1.0031, "step": 10783 }, { "epoch": 35.35737704918033, "grad_norm": 7.613502025604248, "learning_rate": 1.4993254596559559e-05, "loss": 0.925, "step": 10784 }, { "epoch": 35.36065573770492, "grad_norm": 6.549971103668213, "learning_rate": 1.4992334534125861e-05, "loss": 0.9919, "step": 10785 }, { "epoch": 35.363934426229505, "grad_norm": 14.355766296386719, "learning_rate": 1.4991414415398405e-05, "loss": 0.8354, "step": 10786 }, { "epoch": 35.3672131147541, "grad_norm": 7.353321552276611, "learning_rate": 1.4990494240387556e-05, "loss": 1.1588, "step": 10787 }, { "epoch": 35.37049180327869, "grad_norm": 8.236878395080566, "learning_rate": 1.4989574009103702e-05, "loss": 0.9631, "step": 10788 }, { "epoch": 35.37377049180328, "grad_norm": 7.460803985595703, "learning_rate": 1.4988653721557209e-05, "loss": 0.7175, "step": 10789 }, { "epoch": 35.377049180327866, "grad_norm": 7.295990943908691, "learning_rate": 1.498773337775846e-05, "loss": 0.9361, "step": 10790 }, { "epoch": 35.38032786885246, "grad_norm": 12.319061279296875, "learning_rate": 1.4986812977717828e-05, "loss": 0.8232, "step": 10791 }, { "epoch": 35.38360655737705, "grad_norm": 6.547412395477295, "learning_rate": 1.49858925214457e-05, "loss": 0.9906, "step": 10792 }, { "epoch": 35.38688524590164, "grad_norm": 6.516564846038818, "learning_rate": 1.4984972008952445e-05, "loss": 1.0089, "step": 10793 }, { "epoch": 35.390163934426226, "grad_norm": 7.11379861831665, "learning_rate": 1.4984051440248451e-05, "loss": 0.7185, "step": 10794 }, { "epoch": 35.39344262295082, "grad_norm": 64.30835723876953, "learning_rate": 1.4983130815344094e-05, "loss": 0.875, "step": 10795 }, { "epoch": 35.39672131147541, "grad_norm": 6.894282341003418, "learning_rate": 1.4982210134249754e-05, "loss": 0.8928, "step": 10796 }, { "epoch": 35.4, "grad_norm": 11.655572891235352, "learning_rate": 1.4981289396975818e-05, "loss": 0.7023, "step": 10797 }, { "epoch": 35.40327868852459, "grad_norm": 9.064963340759277, "learning_rate": 1.498036860353266e-05, "loss": 0.784, "step": 10798 }, { "epoch": 35.40655737704918, "grad_norm": 10.180761337280273, "learning_rate": 1.4979447753930672e-05, "loss": 0.6436, "step": 10799 }, { "epoch": 35.40983606557377, "grad_norm": 9.046157836914062, "learning_rate": 1.497852684818023e-05, "loss": 0.9689, "step": 10800 }, { "epoch": 35.41311475409836, "grad_norm": 7.441510200500488, "learning_rate": 1.4977605886291726e-05, "loss": 0.6924, "step": 10801 }, { "epoch": 35.41639344262295, "grad_norm": 9.056931495666504, "learning_rate": 1.4976684868275535e-05, "loss": 0.7552, "step": 10802 }, { "epoch": 35.41967213114754, "grad_norm": 9.886680603027344, "learning_rate": 1.497576379414205e-05, "loss": 0.7836, "step": 10803 }, { "epoch": 35.42295081967213, "grad_norm": 10.58165454864502, "learning_rate": 1.4974842663901657e-05, "loss": 1.1036, "step": 10804 }, { "epoch": 35.42622950819672, "grad_norm": 8.931188583374023, "learning_rate": 1.4973921477564736e-05, "loss": 0.9193, "step": 10805 }, { "epoch": 35.429508196721315, "grad_norm": 7.1735053062438965, "learning_rate": 1.4973000235141681e-05, "loss": 0.8831, "step": 10806 }, { "epoch": 35.4327868852459, "grad_norm": 8.405659675598145, "learning_rate": 1.497207893664288e-05, "loss": 0.7172, "step": 10807 }, { "epoch": 35.43606557377049, "grad_norm": 9.334334373474121, "learning_rate": 1.4971157582078714e-05, "loss": 1.0314, "step": 10808 }, { "epoch": 35.43934426229508, "grad_norm": 15.513956069946289, "learning_rate": 1.497023617145958e-05, "loss": 0.7758, "step": 10809 }, { "epoch": 35.442622950819676, "grad_norm": 8.509373664855957, "learning_rate": 1.4969314704795867e-05, "loss": 0.8203, "step": 10810 }, { "epoch": 35.445901639344264, "grad_norm": 11.01240348815918, "learning_rate": 1.4968393182097962e-05, "loss": 0.6573, "step": 10811 }, { "epoch": 35.44918032786885, "grad_norm": 8.00651741027832, "learning_rate": 1.496747160337626e-05, "loss": 0.9738, "step": 10812 }, { "epoch": 35.45245901639344, "grad_norm": 6.443550109863281, "learning_rate": 1.4966549968641148e-05, "loss": 0.8558, "step": 10813 }, { "epoch": 35.455737704918036, "grad_norm": 6.953665256500244, "learning_rate": 1.4965628277903024e-05, "loss": 0.6484, "step": 10814 }, { "epoch": 35.459016393442624, "grad_norm": 8.640220642089844, "learning_rate": 1.4964706531172275e-05, "loss": 0.8129, "step": 10815 }, { "epoch": 35.46229508196721, "grad_norm": 15.611010551452637, "learning_rate": 1.49637847284593e-05, "loss": 0.8766, "step": 10816 }, { "epoch": 35.4655737704918, "grad_norm": 8.645088195800781, "learning_rate": 1.496286286977449e-05, "loss": 1.0442, "step": 10817 }, { "epoch": 35.4688524590164, "grad_norm": 19.15540885925293, "learning_rate": 1.4961940955128245e-05, "loss": 0.8167, "step": 10818 }, { "epoch": 35.472131147540985, "grad_norm": 15.601645469665527, "learning_rate": 1.4961018984530952e-05, "loss": 1.0067, "step": 10819 }, { "epoch": 35.47540983606557, "grad_norm": 8.71355152130127, "learning_rate": 1.4960096957993015e-05, "loss": 0.7451, "step": 10820 }, { "epoch": 35.47868852459016, "grad_norm": 11.790806770324707, "learning_rate": 1.4959174875524826e-05, "loss": 0.951, "step": 10821 }, { "epoch": 35.48196721311476, "grad_norm": 10.981053352355957, "learning_rate": 1.4958252737136784e-05, "loss": 0.8786, "step": 10822 }, { "epoch": 35.485245901639345, "grad_norm": 7.020688056945801, "learning_rate": 1.4957330542839288e-05, "loss": 0.8805, "step": 10823 }, { "epoch": 35.488524590163934, "grad_norm": 29.253564834594727, "learning_rate": 1.4956408292642734e-05, "loss": 0.912, "step": 10824 }, { "epoch": 35.49180327868852, "grad_norm": 11.08642292022705, "learning_rate": 1.4955485986557526e-05, "loss": 0.8813, "step": 10825 }, { "epoch": 35.49508196721312, "grad_norm": 7.444724082946777, "learning_rate": 1.4954563624594057e-05, "loss": 0.9941, "step": 10826 }, { "epoch": 35.498360655737706, "grad_norm": 15.387557983398438, "learning_rate": 1.4953641206762734e-05, "loss": 0.8185, "step": 10827 }, { "epoch": 35.501639344262294, "grad_norm": 8.641366004943848, "learning_rate": 1.4952718733073957e-05, "loss": 0.8651, "step": 10828 }, { "epoch": 35.50491803278688, "grad_norm": 10.07059383392334, "learning_rate": 1.4951796203538125e-05, "loss": 0.7617, "step": 10829 }, { "epoch": 35.50819672131148, "grad_norm": 10.198823928833008, "learning_rate": 1.4950873618165642e-05, "loss": 1.0604, "step": 10830 }, { "epoch": 35.511475409836066, "grad_norm": 9.266151428222656, "learning_rate": 1.4949950976966914e-05, "loss": 0.795, "step": 10831 }, { "epoch": 35.514754098360655, "grad_norm": 7.908354759216309, "learning_rate": 1.4949028279952338e-05, "loss": 0.8549, "step": 10832 }, { "epoch": 35.51803278688524, "grad_norm": 10.63176155090332, "learning_rate": 1.4948105527132326e-05, "loss": 0.8165, "step": 10833 }, { "epoch": 35.52131147540984, "grad_norm": 15.091792106628418, "learning_rate": 1.4947182718517277e-05, "loss": 1.0904, "step": 10834 }, { "epoch": 35.52459016393443, "grad_norm": 11.17287826538086, "learning_rate": 1.4946259854117601e-05, "loss": 1.0914, "step": 10835 }, { "epoch": 35.527868852459015, "grad_norm": 9.211493492126465, "learning_rate": 1.4945336933943702e-05, "loss": 0.9764, "step": 10836 }, { "epoch": 35.5311475409836, "grad_norm": 10.733503341674805, "learning_rate": 1.4944413958005984e-05, "loss": 1.076, "step": 10837 }, { "epoch": 35.5344262295082, "grad_norm": 23.612184524536133, "learning_rate": 1.4943490926314861e-05, "loss": 0.8922, "step": 10838 }, { "epoch": 35.53770491803279, "grad_norm": 13.437956809997559, "learning_rate": 1.4942567838880738e-05, "loss": 0.8107, "step": 10839 }, { "epoch": 35.540983606557376, "grad_norm": 8.214734077453613, "learning_rate": 1.4941644695714024e-05, "loss": 0.8276, "step": 10840 }, { "epoch": 35.544262295081964, "grad_norm": 7.272380828857422, "learning_rate": 1.4940721496825124e-05, "loss": 1.0701, "step": 10841 }, { "epoch": 35.54754098360656, "grad_norm": 8.451269149780273, "learning_rate": 1.4939798242224457e-05, "loss": 0.8935, "step": 10842 }, { "epoch": 35.55081967213115, "grad_norm": 6.379870891571045, "learning_rate": 1.4938874931922424e-05, "loss": 1.1486, "step": 10843 }, { "epoch": 35.554098360655736, "grad_norm": 8.902338027954102, "learning_rate": 1.4937951565929445e-05, "loss": 0.9009, "step": 10844 }, { "epoch": 35.557377049180324, "grad_norm": 9.282188415527344, "learning_rate": 1.4937028144255928e-05, "loss": 0.9579, "step": 10845 }, { "epoch": 35.56065573770492, "grad_norm": 7.947326183319092, "learning_rate": 1.4936104666912284e-05, "loss": 0.6753, "step": 10846 }, { "epoch": 35.56393442622951, "grad_norm": 6.4832258224487305, "learning_rate": 1.4935181133908926e-05, "loss": 0.7694, "step": 10847 }, { "epoch": 35.5672131147541, "grad_norm": 8.758113861083984, "learning_rate": 1.4934257545256271e-05, "loss": 0.736, "step": 10848 }, { "epoch": 35.570491803278685, "grad_norm": 9.073334693908691, "learning_rate": 1.4933333900964733e-05, "loss": 0.7918, "step": 10849 }, { "epoch": 35.57377049180328, "grad_norm": 28.593137741088867, "learning_rate": 1.4932410201044725e-05, "loss": 0.8549, "step": 10850 }, { "epoch": 35.57704918032787, "grad_norm": 10.134014129638672, "learning_rate": 1.4931486445506665e-05, "loss": 0.9915, "step": 10851 }, { "epoch": 35.58032786885246, "grad_norm": 8.4055814743042, "learning_rate": 1.4930562634360964e-05, "loss": 0.8824, "step": 10852 }, { "epoch": 35.58360655737705, "grad_norm": 10.363537788391113, "learning_rate": 1.4929638767618046e-05, "loss": 0.9969, "step": 10853 }, { "epoch": 35.58688524590164, "grad_norm": 11.704667091369629, "learning_rate": 1.4928714845288324e-05, "loss": 0.93, "step": 10854 }, { "epoch": 35.59016393442623, "grad_norm": 6.568280220031738, "learning_rate": 1.492779086738222e-05, "loss": 0.9076, "step": 10855 }, { "epoch": 35.59344262295082, "grad_norm": 11.70034408569336, "learning_rate": 1.4926866833910145e-05, "loss": 0.9331, "step": 10856 }, { "epoch": 35.59672131147541, "grad_norm": 7.8319315910339355, "learning_rate": 1.492594274488253e-05, "loss": 0.8194, "step": 10857 }, { "epoch": 35.6, "grad_norm": 8.319720268249512, "learning_rate": 1.4925018600309784e-05, "loss": 0.9846, "step": 10858 }, { "epoch": 35.60327868852459, "grad_norm": 6.121215343475342, "learning_rate": 1.4924094400202336e-05, "loss": 1.1058, "step": 10859 }, { "epoch": 35.60655737704918, "grad_norm": 11.192384719848633, "learning_rate": 1.49231701445706e-05, "loss": 0.7613, "step": 10860 }, { "epoch": 35.609836065573774, "grad_norm": 7.902768611907959, "learning_rate": 1.4922245833425006e-05, "loss": 0.9706, "step": 10861 }, { "epoch": 35.61311475409836, "grad_norm": 11.609637260437012, "learning_rate": 1.4921321466775969e-05, "loss": 0.9269, "step": 10862 }, { "epoch": 35.61639344262295, "grad_norm": 10.453474044799805, "learning_rate": 1.4920397044633919e-05, "loss": 0.7907, "step": 10863 }, { "epoch": 35.61967213114754, "grad_norm": 11.461111068725586, "learning_rate": 1.4919472567009273e-05, "loss": 1.006, "step": 10864 }, { "epoch": 35.622950819672134, "grad_norm": 8.793447494506836, "learning_rate": 1.4918548033912459e-05, "loss": 0.7994, "step": 10865 }, { "epoch": 35.62622950819672, "grad_norm": 13.15074348449707, "learning_rate": 1.4917623445353903e-05, "loss": 0.7976, "step": 10866 }, { "epoch": 35.62950819672131, "grad_norm": 8.037496566772461, "learning_rate": 1.491669880134403e-05, "loss": 0.8394, "step": 10867 }, { "epoch": 35.6327868852459, "grad_norm": 9.137192726135254, "learning_rate": 1.4915774101893265e-05, "loss": 0.8655, "step": 10868 }, { "epoch": 35.636065573770495, "grad_norm": 8.148653030395508, "learning_rate": 1.4914849347012037e-05, "loss": 1.094, "step": 10869 }, { "epoch": 35.63934426229508, "grad_norm": 11.581517219543457, "learning_rate": 1.4913924536710774e-05, "loss": 0.9432, "step": 10870 }, { "epoch": 35.64262295081967, "grad_norm": 25.55525016784668, "learning_rate": 1.4912999670999899e-05, "loss": 0.6312, "step": 10871 }, { "epoch": 35.64590163934426, "grad_norm": 4.9765944480896, "learning_rate": 1.4912074749889848e-05, "loss": 1.1664, "step": 10872 }, { "epoch": 35.649180327868855, "grad_norm": 8.693802833557129, "learning_rate": 1.4911149773391045e-05, "loss": 0.864, "step": 10873 }, { "epoch": 35.65245901639344, "grad_norm": 13.715498924255371, "learning_rate": 1.4910224741513922e-05, "loss": 0.7834, "step": 10874 }, { "epoch": 35.65573770491803, "grad_norm": 7.776614665985107, "learning_rate": 1.490929965426891e-05, "loss": 0.9044, "step": 10875 }, { "epoch": 35.65901639344262, "grad_norm": 10.529220581054688, "learning_rate": 1.4908374511666439e-05, "loss": 0.7095, "step": 10876 }, { "epoch": 35.662295081967216, "grad_norm": 8.995821952819824, "learning_rate": 1.4907449313716944e-05, "loss": 0.7498, "step": 10877 }, { "epoch": 35.665573770491804, "grad_norm": 8.250969886779785, "learning_rate": 1.4906524060430853e-05, "loss": 1.0038, "step": 10878 }, { "epoch": 35.66885245901639, "grad_norm": 10.246365547180176, "learning_rate": 1.4905598751818603e-05, "loss": 0.7911, "step": 10879 }, { "epoch": 35.67213114754098, "grad_norm": 8.766908645629883, "learning_rate": 1.4904673387890626e-05, "loss": 0.8116, "step": 10880 }, { "epoch": 35.675409836065576, "grad_norm": 6.835337162017822, "learning_rate": 1.4903747968657359e-05, "loss": 0.9084, "step": 10881 }, { "epoch": 35.678688524590164, "grad_norm": 10.428211212158203, "learning_rate": 1.4902822494129232e-05, "loss": 0.8304, "step": 10882 }, { "epoch": 35.68196721311475, "grad_norm": 6.92568302154541, "learning_rate": 1.4901896964316686e-05, "loss": 0.614, "step": 10883 }, { "epoch": 35.68524590163934, "grad_norm": 15.367003440856934, "learning_rate": 1.4900971379230156e-05, "loss": 0.7807, "step": 10884 }, { "epoch": 35.68852459016394, "grad_norm": 15.469870567321777, "learning_rate": 1.4900045738880075e-05, "loss": 0.7498, "step": 10885 }, { "epoch": 35.691803278688525, "grad_norm": 10.967727661132812, "learning_rate": 1.4899120043276886e-05, "loss": 0.8079, "step": 10886 }, { "epoch": 35.69508196721311, "grad_norm": 7.556370258331299, "learning_rate": 1.4898194292431023e-05, "loss": 0.9528, "step": 10887 }, { "epoch": 35.6983606557377, "grad_norm": 7.30964469909668, "learning_rate": 1.4897268486352925e-05, "loss": 0.9779, "step": 10888 }, { "epoch": 35.7016393442623, "grad_norm": 8.318267822265625, "learning_rate": 1.4896342625053035e-05, "loss": 0.8738, "step": 10889 }, { "epoch": 35.704918032786885, "grad_norm": 7.681332588195801, "learning_rate": 1.4895416708541792e-05, "loss": 0.5821, "step": 10890 }, { "epoch": 35.708196721311474, "grad_norm": 9.535375595092773, "learning_rate": 1.4894490736829635e-05, "loss": 1.0272, "step": 10891 }, { "epoch": 35.71147540983607, "grad_norm": 29.72068214416504, "learning_rate": 1.4893564709927005e-05, "loss": 0.8042, "step": 10892 }, { "epoch": 35.71475409836066, "grad_norm": 7.184913158416748, "learning_rate": 1.4892638627844345e-05, "loss": 0.9448, "step": 10893 }, { "epoch": 35.718032786885246, "grad_norm": 9.245256423950195, "learning_rate": 1.4891712490592096e-05, "loss": 0.9284, "step": 10894 }, { "epoch": 35.721311475409834, "grad_norm": 7.2122344970703125, "learning_rate": 1.4890786298180703e-05, "loss": 0.9542, "step": 10895 }, { "epoch": 35.72459016393443, "grad_norm": 8.002124786376953, "learning_rate": 1.4889860050620612e-05, "loss": 0.8741, "step": 10896 }, { "epoch": 35.72786885245902, "grad_norm": 5.961296558380127, "learning_rate": 1.4888933747922265e-05, "loss": 0.9324, "step": 10897 }, { "epoch": 35.731147540983606, "grad_norm": 8.58568286895752, "learning_rate": 1.4888007390096104e-05, "loss": 1.0079, "step": 10898 }, { "epoch": 35.734426229508195, "grad_norm": 8.47433090209961, "learning_rate": 1.4887080977152582e-05, "loss": 0.9548, "step": 10899 }, { "epoch": 35.73770491803279, "grad_norm": 96.2400894165039, "learning_rate": 1.4886154509102135e-05, "loss": 1.0454, "step": 10900 }, { "epoch": 35.74098360655738, "grad_norm": 8.100200653076172, "learning_rate": 1.4885227985955219e-05, "loss": 0.8072, "step": 10901 }, { "epoch": 35.74426229508197, "grad_norm": 7.775091648101807, "learning_rate": 1.4884301407722274e-05, "loss": 0.8936, "step": 10902 }, { "epoch": 35.747540983606555, "grad_norm": 7.798235893249512, "learning_rate": 1.488337477441376e-05, "loss": 0.7326, "step": 10903 }, { "epoch": 35.75081967213115, "grad_norm": 8.495076179504395, "learning_rate": 1.488244808604011e-05, "loss": 0.9424, "step": 10904 }, { "epoch": 35.75409836065574, "grad_norm": 8.47374153137207, "learning_rate": 1.4881521342611787e-05, "loss": 0.872, "step": 10905 }, { "epoch": 35.75737704918033, "grad_norm": 15.168984413146973, "learning_rate": 1.488059454413923e-05, "loss": 0.8157, "step": 10906 }, { "epoch": 35.760655737704916, "grad_norm": 8.294319152832031, "learning_rate": 1.48796676906329e-05, "loss": 0.8566, "step": 10907 }, { "epoch": 35.76393442622951, "grad_norm": 7.389718532562256, "learning_rate": 1.487874078210324e-05, "loss": 1.0275, "step": 10908 }, { "epoch": 35.7672131147541, "grad_norm": 11.54651927947998, "learning_rate": 1.4877813818560706e-05, "loss": 0.9956, "step": 10909 }, { "epoch": 35.77049180327869, "grad_norm": 46.292415618896484, "learning_rate": 1.487688680001575e-05, "loss": 0.8783, "step": 10910 }, { "epoch": 35.773770491803276, "grad_norm": 8.54928970336914, "learning_rate": 1.4875959726478826e-05, "loss": 0.9503, "step": 10911 }, { "epoch": 35.77704918032787, "grad_norm": 9.96810531616211, "learning_rate": 1.4875032597960383e-05, "loss": 0.8749, "step": 10912 }, { "epoch": 35.78032786885246, "grad_norm": 9.986212730407715, "learning_rate": 1.487410541447088e-05, "loss": 1.0484, "step": 10913 }, { "epoch": 35.78360655737705, "grad_norm": 8.687788009643555, "learning_rate": 1.4873178176020773e-05, "loss": 1.015, "step": 10914 }, { "epoch": 35.78688524590164, "grad_norm": 8.0433988571167, "learning_rate": 1.4872250882620511e-05, "loss": 0.8568, "step": 10915 }, { "epoch": 35.79016393442623, "grad_norm": 16.931621551513672, "learning_rate": 1.4871323534280557e-05, "loss": 0.9105, "step": 10916 }, { "epoch": 35.79344262295082, "grad_norm": 10.759347915649414, "learning_rate": 1.4870396131011365e-05, "loss": 0.8684, "step": 10917 }, { "epoch": 35.79672131147541, "grad_norm": 12.793553352355957, "learning_rate": 1.4869468672823393e-05, "loss": 1.0842, "step": 10918 }, { "epoch": 35.8, "grad_norm": 13.169939994812012, "learning_rate": 1.4868541159727097e-05, "loss": 0.9317, "step": 10919 }, { "epoch": 35.80327868852459, "grad_norm": 12.73569393157959, "learning_rate": 1.486761359173294e-05, "loss": 0.6254, "step": 10920 }, { "epoch": 35.80655737704918, "grad_norm": 18.259014129638672, "learning_rate": 1.4866685968851376e-05, "loss": 0.7262, "step": 10921 }, { "epoch": 35.80983606557377, "grad_norm": 10.552119255065918, "learning_rate": 1.4865758291092868e-05, "loss": 0.806, "step": 10922 }, { "epoch": 35.81311475409836, "grad_norm": 9.548450469970703, "learning_rate": 1.486483055846788e-05, "loss": 0.9353, "step": 10923 }, { "epoch": 35.81639344262295, "grad_norm": 12.261750221252441, "learning_rate": 1.4863902770986865e-05, "loss": 1.0374, "step": 10924 }, { "epoch": 35.81967213114754, "grad_norm": 8.671281814575195, "learning_rate": 1.4862974928660291e-05, "loss": 0.8577, "step": 10925 }, { "epoch": 35.82295081967213, "grad_norm": 9.073110580444336, "learning_rate": 1.4862047031498619e-05, "loss": 0.8858, "step": 10926 }, { "epoch": 35.82622950819672, "grad_norm": 9.40864372253418, "learning_rate": 1.486111907951231e-05, "loss": 0.7783, "step": 10927 }, { "epoch": 35.829508196721314, "grad_norm": 22.033788681030273, "learning_rate": 1.486019107271183e-05, "loss": 0.8071, "step": 10928 }, { "epoch": 35.8327868852459, "grad_norm": 16.9000244140625, "learning_rate": 1.485926301110764e-05, "loss": 0.9285, "step": 10929 }, { "epoch": 35.83606557377049, "grad_norm": 11.431852340698242, "learning_rate": 1.4858334894710212e-05, "loss": 0.9873, "step": 10930 }, { "epoch": 35.83934426229508, "grad_norm": 6.480532646179199, "learning_rate": 1.4857406723530003e-05, "loss": 1.0105, "step": 10931 }, { "epoch": 35.842622950819674, "grad_norm": 9.651009559631348, "learning_rate": 1.4856478497577483e-05, "loss": 0.8148, "step": 10932 }, { "epoch": 35.84590163934426, "grad_norm": 16.120298385620117, "learning_rate": 1.485555021686312e-05, "loss": 0.8867, "step": 10933 }, { "epoch": 35.84918032786885, "grad_norm": 8.197410583496094, "learning_rate": 1.4854621881397378e-05, "loss": 0.9064, "step": 10934 }, { "epoch": 35.85245901639344, "grad_norm": 11.808714866638184, "learning_rate": 1.4853693491190729e-05, "loss": 0.8856, "step": 10935 }, { "epoch": 35.855737704918035, "grad_norm": 14.776992797851562, "learning_rate": 1.4852765046253637e-05, "loss": 0.774, "step": 10936 }, { "epoch": 35.85901639344262, "grad_norm": 10.949447631835938, "learning_rate": 1.4851836546596574e-05, "loss": 0.8058, "step": 10937 }, { "epoch": 35.86229508196721, "grad_norm": 11.088028907775879, "learning_rate": 1.485090799223001e-05, "loss": 0.9082, "step": 10938 }, { "epoch": 35.86557377049181, "grad_norm": 11.459501266479492, "learning_rate": 1.4849979383164415e-05, "loss": 0.7409, "step": 10939 }, { "epoch": 35.868852459016395, "grad_norm": 10.56877613067627, "learning_rate": 1.4849050719410259e-05, "loss": 0.9634, "step": 10940 }, { "epoch": 35.87213114754098, "grad_norm": 10.975580215454102, "learning_rate": 1.4848122000978014e-05, "loss": 0.6311, "step": 10941 }, { "epoch": 35.87540983606557, "grad_norm": 9.268508911132812, "learning_rate": 1.4847193227878151e-05, "loss": 1.101, "step": 10942 }, { "epoch": 35.87868852459017, "grad_norm": 10.765588760375977, "learning_rate": 1.4846264400121148e-05, "loss": 0.8583, "step": 10943 }, { "epoch": 35.881967213114756, "grad_norm": 27.32767105102539, "learning_rate": 1.4845335517717472e-05, "loss": 0.8495, "step": 10944 }, { "epoch": 35.885245901639344, "grad_norm": 13.000617027282715, "learning_rate": 1.4844406580677604e-05, "loss": 1.0594, "step": 10945 }, { "epoch": 35.88852459016393, "grad_norm": 14.835369110107422, "learning_rate": 1.4843477589012012e-05, "loss": 0.8882, "step": 10946 }, { "epoch": 35.89180327868853, "grad_norm": 8.056471824645996, "learning_rate": 1.4842548542731172e-05, "loss": 1.0205, "step": 10947 }, { "epoch": 35.895081967213116, "grad_norm": 13.338518142700195, "learning_rate": 1.4841619441845568e-05, "loss": 0.7819, "step": 10948 }, { "epoch": 35.898360655737704, "grad_norm": 10.997776985168457, "learning_rate": 1.4840690286365669e-05, "loss": 0.8781, "step": 10949 }, { "epoch": 35.90163934426229, "grad_norm": 9.145400047302246, "learning_rate": 1.483976107630195e-05, "loss": 1.0782, "step": 10950 }, { "epoch": 35.90491803278689, "grad_norm": 8.945199966430664, "learning_rate": 1.4838831811664898e-05, "loss": 0.9849, "step": 10951 }, { "epoch": 35.90819672131148, "grad_norm": 12.799918174743652, "learning_rate": 1.4837902492464982e-05, "loss": 0.9537, "step": 10952 }, { "epoch": 35.911475409836065, "grad_norm": 8.13363265991211, "learning_rate": 1.4836973118712687e-05, "loss": 1.121, "step": 10953 }, { "epoch": 35.91475409836065, "grad_norm": 10.071988105773926, "learning_rate": 1.483604369041849e-05, "loss": 0.8032, "step": 10954 }, { "epoch": 35.91803278688525, "grad_norm": 31.12865447998047, "learning_rate": 1.4835114207592876e-05, "loss": 0.9634, "step": 10955 }, { "epoch": 35.92131147540984, "grad_norm": 16.163501739501953, "learning_rate": 1.483418467024632e-05, "loss": 1.0474, "step": 10956 }, { "epoch": 35.924590163934425, "grad_norm": 8.979888916015625, "learning_rate": 1.4833255078389304e-05, "loss": 0.8781, "step": 10957 }, { "epoch": 35.927868852459014, "grad_norm": 26.751989364624023, "learning_rate": 1.4832325432032311e-05, "loss": 0.8606, "step": 10958 }, { "epoch": 35.93114754098361, "grad_norm": 12.277077674865723, "learning_rate": 1.4831395731185826e-05, "loss": 0.9731, "step": 10959 }, { "epoch": 35.9344262295082, "grad_norm": 11.376344680786133, "learning_rate": 1.483046597586033e-05, "loss": 0.9017, "step": 10960 }, { "epoch": 35.937704918032786, "grad_norm": 7.531456470489502, "learning_rate": 1.482953616606631e-05, "loss": 0.7939, "step": 10961 }, { "epoch": 35.940983606557374, "grad_norm": 7.216392993927002, "learning_rate": 1.4828606301814245e-05, "loss": 0.9971, "step": 10962 }, { "epoch": 35.94426229508197, "grad_norm": 10.944976806640625, "learning_rate": 1.4827676383114626e-05, "loss": 1.01, "step": 10963 }, { "epoch": 35.94754098360656, "grad_norm": 10.096362113952637, "learning_rate": 1.4826746409977937e-05, "loss": 0.7919, "step": 10964 }, { "epoch": 35.950819672131146, "grad_norm": 21.39102554321289, "learning_rate": 1.4825816382414659e-05, "loss": 0.7004, "step": 10965 }, { "epoch": 35.954098360655735, "grad_norm": 10.751073837280273, "learning_rate": 1.4824886300435288e-05, "loss": 0.8345, "step": 10966 }, { "epoch": 35.95737704918033, "grad_norm": 8.343439102172852, "learning_rate": 1.4823956164050306e-05, "loss": 1.0585, "step": 10967 }, { "epoch": 35.96065573770492, "grad_norm": 6.080460548400879, "learning_rate": 1.4823025973270201e-05, "loss": 1.1737, "step": 10968 }, { "epoch": 35.96393442622951, "grad_norm": 13.926871299743652, "learning_rate": 1.4822095728105465e-05, "loss": 0.7127, "step": 10969 }, { "epoch": 35.967213114754095, "grad_norm": 10.355496406555176, "learning_rate": 1.4821165428566586e-05, "loss": 1.037, "step": 10970 }, { "epoch": 35.97049180327869, "grad_norm": 9.75857925415039, "learning_rate": 1.4820235074664055e-05, "loss": 1.0544, "step": 10971 }, { "epoch": 35.97377049180328, "grad_norm": 10.806561470031738, "learning_rate": 1.481930466640836e-05, "loss": 0.9672, "step": 10972 }, { "epoch": 35.97704918032787, "grad_norm": 12.79990005493164, "learning_rate": 1.4818374203809994e-05, "loss": 0.9778, "step": 10973 }, { "epoch": 35.980327868852456, "grad_norm": 7.772571086883545, "learning_rate": 1.4817443686879452e-05, "loss": 0.8181, "step": 10974 }, { "epoch": 35.98360655737705, "grad_norm": 21.368318557739258, "learning_rate": 1.4816513115627221e-05, "loss": 0.7445, "step": 10975 }, { "epoch": 35.98688524590164, "grad_norm": 8.013764381408691, "learning_rate": 1.4815582490063795e-05, "loss": 1.0432, "step": 10976 }, { "epoch": 35.99016393442623, "grad_norm": 8.577543258666992, "learning_rate": 1.4814651810199672e-05, "loss": 0.8796, "step": 10977 }, { "epoch": 35.993442622950816, "grad_norm": 10.568032264709473, "learning_rate": 1.4813721076045342e-05, "loss": 0.8662, "step": 10978 }, { "epoch": 35.99672131147541, "grad_norm": 7.403471946716309, "learning_rate": 1.4812790287611305e-05, "loss": 0.8676, "step": 10979 }, { "epoch": 36.0, "grad_norm": 6.906257152557373, "learning_rate": 1.4811859444908053e-05, "loss": 0.7159, "step": 10980 }, { "epoch": 36.00327868852459, "grad_norm": 11.560450553894043, "learning_rate": 1.481092854794608e-05, "loss": 0.8046, "step": 10981 }, { "epoch": 36.006557377049184, "grad_norm": 9.75936222076416, "learning_rate": 1.4809997596735888e-05, "loss": 0.8344, "step": 10982 }, { "epoch": 36.00983606557377, "grad_norm": 8.663511276245117, "learning_rate": 1.480906659128797e-05, "loss": 1.0333, "step": 10983 }, { "epoch": 36.01311475409836, "grad_norm": 6.672460079193115, "learning_rate": 1.4808135531612827e-05, "loss": 0.7857, "step": 10984 }, { "epoch": 36.01639344262295, "grad_norm": 13.049689292907715, "learning_rate": 1.4807204417720958e-05, "loss": 0.6631, "step": 10985 }, { "epoch": 36.019672131147544, "grad_norm": 8.117757797241211, "learning_rate": 1.4806273249622858e-05, "loss": 0.9098, "step": 10986 }, { "epoch": 36.02295081967213, "grad_norm": 8.619518280029297, "learning_rate": 1.480534202732903e-05, "loss": 0.8532, "step": 10987 }, { "epoch": 36.02622950819672, "grad_norm": 8.908915519714355, "learning_rate": 1.4804410750849978e-05, "loss": 0.9529, "step": 10988 }, { "epoch": 36.02950819672131, "grad_norm": 11.642589569091797, "learning_rate": 1.4803479420196197e-05, "loss": 0.6556, "step": 10989 }, { "epoch": 36.032786885245905, "grad_norm": 9.224726676940918, "learning_rate": 1.480254803537819e-05, "loss": 0.9628, "step": 10990 }, { "epoch": 36.03606557377049, "grad_norm": 13.973600387573242, "learning_rate": 1.480161659640646e-05, "loss": 0.7535, "step": 10991 }, { "epoch": 36.03934426229508, "grad_norm": 11.741273880004883, "learning_rate": 1.4800685103291515e-05, "loss": 0.8238, "step": 10992 }, { "epoch": 36.04262295081967, "grad_norm": 11.120110511779785, "learning_rate": 1.479975355604385e-05, "loss": 0.7624, "step": 10993 }, { "epoch": 36.045901639344265, "grad_norm": 14.964362144470215, "learning_rate": 1.4798821954673974e-05, "loss": 0.8925, "step": 10994 }, { "epoch": 36.049180327868854, "grad_norm": 7.644021511077881, "learning_rate": 1.4797890299192392e-05, "loss": 1.0205, "step": 10995 }, { "epoch": 36.05245901639344, "grad_norm": 22.6623477935791, "learning_rate": 1.4796958589609608e-05, "loss": 1.0212, "step": 10996 }, { "epoch": 36.05573770491803, "grad_norm": 7.263619422912598, "learning_rate": 1.4796026825936128e-05, "loss": 1.1481, "step": 10997 }, { "epoch": 36.059016393442626, "grad_norm": 8.971256256103516, "learning_rate": 1.4795095008182458e-05, "loss": 0.6677, "step": 10998 }, { "epoch": 36.062295081967214, "grad_norm": 6.914122104644775, "learning_rate": 1.4794163136359108e-05, "loss": 0.8207, "step": 10999 }, { "epoch": 36.0655737704918, "grad_norm": 12.133673667907715, "learning_rate": 1.4793231210476582e-05, "loss": 0.6313, "step": 11000 }, { "epoch": 36.06885245901639, "grad_norm": 8.847900390625, "learning_rate": 1.4792299230545393e-05, "loss": 0.6686, "step": 11001 }, { "epoch": 36.072131147540986, "grad_norm": 9.674348831176758, "learning_rate": 1.4791367196576044e-05, "loss": 0.8216, "step": 11002 }, { "epoch": 36.075409836065575, "grad_norm": 10.094075202941895, "learning_rate": 1.4790435108579048e-05, "loss": 0.5389, "step": 11003 }, { "epoch": 36.07868852459016, "grad_norm": 29.63893699645996, "learning_rate": 1.4789502966564917e-05, "loss": 0.7394, "step": 11004 }, { "epoch": 36.08196721311475, "grad_norm": 15.19424819946289, "learning_rate": 1.4788570770544163e-05, "loss": 0.9119, "step": 11005 }, { "epoch": 36.08524590163935, "grad_norm": 7.361155986785889, "learning_rate": 1.4787638520527292e-05, "loss": 0.8258, "step": 11006 }, { "epoch": 36.088524590163935, "grad_norm": 13.16805362701416, "learning_rate": 1.4786706216524818e-05, "loss": 0.7815, "step": 11007 }, { "epoch": 36.09180327868852, "grad_norm": 15.960262298583984, "learning_rate": 1.4785773858547255e-05, "loss": 0.8595, "step": 11008 }, { "epoch": 36.09508196721311, "grad_norm": 8.867819786071777, "learning_rate": 1.4784841446605116e-05, "loss": 0.7318, "step": 11009 }, { "epoch": 36.09836065573771, "grad_norm": 8.940120697021484, "learning_rate": 1.4783908980708917e-05, "loss": 0.8959, "step": 11010 }, { "epoch": 36.101639344262296, "grad_norm": 6.899765491485596, "learning_rate": 1.4782976460869168e-05, "loss": 0.9908, "step": 11011 }, { "epoch": 36.104918032786884, "grad_norm": 10.195365905761719, "learning_rate": 1.4782043887096385e-05, "loss": 1.0306, "step": 11012 }, { "epoch": 36.10819672131147, "grad_norm": 6.127986431121826, "learning_rate": 1.4781111259401087e-05, "loss": 0.9034, "step": 11013 }, { "epoch": 36.11147540983607, "grad_norm": 7.873058795928955, "learning_rate": 1.4780178577793789e-05, "loss": 0.8838, "step": 11014 }, { "epoch": 36.114754098360656, "grad_norm": 7.9043755531311035, "learning_rate": 1.4779245842285006e-05, "loss": 0.9318, "step": 11015 }, { "epoch": 36.118032786885244, "grad_norm": 7.4268598556518555, "learning_rate": 1.4778313052885259e-05, "loss": 0.8241, "step": 11016 }, { "epoch": 36.12131147540983, "grad_norm": 14.177947044372559, "learning_rate": 1.477738020960506e-05, "loss": 0.7672, "step": 11017 }, { "epoch": 36.12459016393443, "grad_norm": 9.867593765258789, "learning_rate": 1.4776447312454937e-05, "loss": 1.063, "step": 11018 }, { "epoch": 36.12786885245902, "grad_norm": 35.977535247802734, "learning_rate": 1.47755143614454e-05, "loss": 0.9579, "step": 11019 }, { "epoch": 36.131147540983605, "grad_norm": 11.799793243408203, "learning_rate": 1.4774581356586975e-05, "loss": 0.9659, "step": 11020 }, { "epoch": 36.13442622950819, "grad_norm": 6.532219886779785, "learning_rate": 1.477364829789018e-05, "loss": 0.7694, "step": 11021 }, { "epoch": 36.13770491803279, "grad_norm": 7.225185871124268, "learning_rate": 1.4772715185365538e-05, "loss": 0.8969, "step": 11022 }, { "epoch": 36.14098360655738, "grad_norm": 9.540557861328125, "learning_rate": 1.4771782019023571e-05, "loss": 0.7682, "step": 11023 }, { "epoch": 36.144262295081965, "grad_norm": 21.29964828491211, "learning_rate": 1.4770848798874796e-05, "loss": 0.7127, "step": 11024 }, { "epoch": 36.14754098360656, "grad_norm": 8.317611694335938, "learning_rate": 1.4769915524929745e-05, "loss": 0.6006, "step": 11025 }, { "epoch": 36.15081967213115, "grad_norm": 8.179642677307129, "learning_rate": 1.4768982197198936e-05, "loss": 0.8462, "step": 11026 }, { "epoch": 36.15409836065574, "grad_norm": 7.972043514251709, "learning_rate": 1.4768048815692892e-05, "loss": 0.7213, "step": 11027 }, { "epoch": 36.157377049180326, "grad_norm": 9.394268989562988, "learning_rate": 1.4767115380422143e-05, "loss": 0.8939, "step": 11028 }, { "epoch": 36.16065573770492, "grad_norm": 46.7251091003418, "learning_rate": 1.4766181891397212e-05, "loss": 0.9882, "step": 11029 }, { "epoch": 36.16393442622951, "grad_norm": 6.713399410247803, "learning_rate": 1.476524834862862e-05, "loss": 0.8503, "step": 11030 }, { "epoch": 36.1672131147541, "grad_norm": 6.797513961791992, "learning_rate": 1.4764314752126902e-05, "loss": 0.8355, "step": 11031 }, { "epoch": 36.170491803278686, "grad_norm": 8.117683410644531, "learning_rate": 1.4763381101902581e-05, "loss": 0.8423, "step": 11032 }, { "epoch": 36.17377049180328, "grad_norm": 11.200994491577148, "learning_rate": 1.4762447397966187e-05, "loss": 0.8062, "step": 11033 }, { "epoch": 36.17704918032787, "grad_norm": 5.770920753479004, "learning_rate": 1.4761513640328243e-05, "loss": 0.6164, "step": 11034 }, { "epoch": 36.18032786885246, "grad_norm": 19.992206573486328, "learning_rate": 1.4760579828999284e-05, "loss": 0.9001, "step": 11035 }, { "epoch": 36.18360655737705, "grad_norm": 7.952439308166504, "learning_rate": 1.4759645963989837e-05, "loss": 0.5131, "step": 11036 }, { "epoch": 36.18688524590164, "grad_norm": 10.55283260345459, "learning_rate": 1.4758712045310434e-05, "loss": 0.78, "step": 11037 }, { "epoch": 36.19016393442623, "grad_norm": 8.556529998779297, "learning_rate": 1.4757778072971605e-05, "loss": 0.8365, "step": 11038 }, { "epoch": 36.19344262295082, "grad_norm": 13.374958992004395, "learning_rate": 1.475684404698388e-05, "loss": 0.8012, "step": 11039 }, { "epoch": 36.19672131147541, "grad_norm": 7.63870906829834, "learning_rate": 1.4755909967357796e-05, "loss": 1.0079, "step": 11040 }, { "epoch": 36.2, "grad_norm": 8.760440826416016, "learning_rate": 1.4754975834103877e-05, "loss": 1.0363, "step": 11041 }, { "epoch": 36.20327868852459, "grad_norm": 9.561836242675781, "learning_rate": 1.4754041647232666e-05, "loss": 0.9948, "step": 11042 }, { "epoch": 36.20655737704918, "grad_norm": 9.694660186767578, "learning_rate": 1.475310740675469e-05, "loss": 0.8683, "step": 11043 }, { "epoch": 36.20983606557377, "grad_norm": 20.584867477416992, "learning_rate": 1.4752173112680485e-05, "loss": 0.7791, "step": 11044 }, { "epoch": 36.21311475409836, "grad_norm": 7.63016939163208, "learning_rate": 1.475123876502059e-05, "loss": 0.8649, "step": 11045 }, { "epoch": 36.21639344262295, "grad_norm": 12.693042755126953, "learning_rate": 1.4750304363785537e-05, "loss": 0.7604, "step": 11046 }, { "epoch": 36.21967213114754, "grad_norm": 7.8670454025268555, "learning_rate": 1.4749369908985862e-05, "loss": 0.8189, "step": 11047 }, { "epoch": 36.22295081967213, "grad_norm": 7.712030410766602, "learning_rate": 1.4748435400632107e-05, "loss": 0.5734, "step": 11048 }, { "epoch": 36.226229508196724, "grad_norm": 6.716281890869141, "learning_rate": 1.47475008387348e-05, "loss": 1.1084, "step": 11049 }, { "epoch": 36.22950819672131, "grad_norm": 6.8328166007995605, "learning_rate": 1.4746566223304488e-05, "loss": 0.7474, "step": 11050 }, { "epoch": 36.2327868852459, "grad_norm": 13.780585289001465, "learning_rate": 1.4745631554351705e-05, "loss": 0.813, "step": 11051 }, { "epoch": 36.23606557377049, "grad_norm": 9.25317668914795, "learning_rate": 1.4744696831886994e-05, "loss": 1.041, "step": 11052 }, { "epoch": 36.239344262295084, "grad_norm": 9.192596435546875, "learning_rate": 1.4743762055920892e-05, "loss": 0.6786, "step": 11053 }, { "epoch": 36.24262295081967, "grad_norm": 14.508624076843262, "learning_rate": 1.4742827226463941e-05, "loss": 0.9908, "step": 11054 }, { "epoch": 36.24590163934426, "grad_norm": 8.55797004699707, "learning_rate": 1.474189234352668e-05, "loss": 0.5531, "step": 11055 }, { "epoch": 36.24918032786885, "grad_norm": 7.333369255065918, "learning_rate": 1.4740957407119653e-05, "loss": 0.7754, "step": 11056 }, { "epoch": 36.252459016393445, "grad_norm": 6.551155090332031, "learning_rate": 1.4740022417253403e-05, "loss": 1.0103, "step": 11057 }, { "epoch": 36.25573770491803, "grad_norm": 8.337284088134766, "learning_rate": 1.4739087373938472e-05, "loss": 1.0917, "step": 11058 }, { "epoch": 36.25901639344262, "grad_norm": 7.024548530578613, "learning_rate": 1.4738152277185404e-05, "loss": 0.8361, "step": 11059 }, { "epoch": 36.26229508196721, "grad_norm": 6.232656955718994, "learning_rate": 1.4737217127004742e-05, "loss": 0.7154, "step": 11060 }, { "epoch": 36.265573770491805, "grad_norm": 10.179680824279785, "learning_rate": 1.4736281923407033e-05, "loss": 0.7538, "step": 11061 }, { "epoch": 36.268852459016394, "grad_norm": 9.116242408752441, "learning_rate": 1.4735346666402817e-05, "loss": 0.8223, "step": 11062 }, { "epoch": 36.27213114754098, "grad_norm": 11.112245559692383, "learning_rate": 1.4734411356002644e-05, "loss": 0.962, "step": 11063 }, { "epoch": 36.27540983606557, "grad_norm": 12.351571083068848, "learning_rate": 1.4733475992217063e-05, "loss": 0.8607, "step": 11064 }, { "epoch": 36.278688524590166, "grad_norm": 7.699501991271973, "learning_rate": 1.473254057505662e-05, "loss": 0.7787, "step": 11065 }, { "epoch": 36.281967213114754, "grad_norm": 8.034956932067871, "learning_rate": 1.4731605104531858e-05, "loss": 0.8451, "step": 11066 }, { "epoch": 36.28524590163934, "grad_norm": 9.241700172424316, "learning_rate": 1.473066958065333e-05, "loss": 0.9658, "step": 11067 }, { "epoch": 36.28852459016394, "grad_norm": 7.4369587898254395, "learning_rate": 1.4729734003431583e-05, "loss": 0.8235, "step": 11068 }, { "epoch": 36.291803278688526, "grad_norm": 7.355963706970215, "learning_rate": 1.472879837287717e-05, "loss": 0.6597, "step": 11069 }, { "epoch": 36.295081967213115, "grad_norm": 6.5923004150390625, "learning_rate": 1.4727862689000636e-05, "loss": 0.7144, "step": 11070 }, { "epoch": 36.2983606557377, "grad_norm": 7.339977264404297, "learning_rate": 1.4726926951812535e-05, "loss": 0.771, "step": 11071 }, { "epoch": 36.3016393442623, "grad_norm": 9.046211242675781, "learning_rate": 1.4725991161323418e-05, "loss": 0.8442, "step": 11072 }, { "epoch": 36.30491803278689, "grad_norm": 9.345569610595703, "learning_rate": 1.4725055317543837e-05, "loss": 0.5818, "step": 11073 }, { "epoch": 36.308196721311475, "grad_norm": 8.509129524230957, "learning_rate": 1.4724119420484347e-05, "loss": 0.7122, "step": 11074 }, { "epoch": 36.31147540983606, "grad_norm": 11.375345230102539, "learning_rate": 1.4723183470155494e-05, "loss": 0.7276, "step": 11075 }, { "epoch": 36.31475409836066, "grad_norm": 16.11749839782715, "learning_rate": 1.4722247466567838e-05, "loss": 0.7535, "step": 11076 }, { "epoch": 36.31803278688525, "grad_norm": 7.9690752029418945, "learning_rate": 1.4721311409731933e-05, "loss": 0.8395, "step": 11077 }, { "epoch": 36.321311475409836, "grad_norm": 8.940004348754883, "learning_rate": 1.4720375299658334e-05, "loss": 0.6176, "step": 11078 }, { "epoch": 36.324590163934424, "grad_norm": 8.700011253356934, "learning_rate": 1.4719439136357592e-05, "loss": 0.9906, "step": 11079 }, { "epoch": 36.32786885245902, "grad_norm": 8.196529388427734, "learning_rate": 1.4718502919840268e-05, "loss": 0.8654, "step": 11080 }, { "epoch": 36.33114754098361, "grad_norm": 8.153800010681152, "learning_rate": 1.471756665011692e-05, "loss": 0.6059, "step": 11081 }, { "epoch": 36.334426229508196, "grad_norm": 7.5700788497924805, "learning_rate": 1.47166303271981e-05, "loss": 1.1078, "step": 11082 }, { "epoch": 36.337704918032784, "grad_norm": 6.846557140350342, "learning_rate": 1.471569395109437e-05, "loss": 0.7643, "step": 11083 }, { "epoch": 36.34098360655738, "grad_norm": 6.3260345458984375, "learning_rate": 1.4714757521816288e-05, "loss": 0.5137, "step": 11084 }, { "epoch": 36.34426229508197, "grad_norm": 8.17413330078125, "learning_rate": 1.4713821039374413e-05, "loss": 0.865, "step": 11085 }, { "epoch": 36.34754098360656, "grad_norm": 7.868664741516113, "learning_rate": 1.4712884503779304e-05, "loss": 0.8044, "step": 11086 }, { "epoch": 36.350819672131145, "grad_norm": 7.9400200843811035, "learning_rate": 1.4711947915041522e-05, "loss": 0.7134, "step": 11087 }, { "epoch": 36.35409836065574, "grad_norm": 13.753103256225586, "learning_rate": 1.471101127317163e-05, "loss": 0.7412, "step": 11088 }, { "epoch": 36.35737704918033, "grad_norm": 7.808506011962891, "learning_rate": 1.4710074578180184e-05, "loss": 0.8484, "step": 11089 }, { "epoch": 36.36065573770492, "grad_norm": 6.762600898742676, "learning_rate": 1.4709137830077754e-05, "loss": 0.7333, "step": 11090 }, { "epoch": 36.363934426229505, "grad_norm": 14.634796142578125, "learning_rate": 1.4708201028874896e-05, "loss": 1.0069, "step": 11091 }, { "epoch": 36.3672131147541, "grad_norm": 8.055277824401855, "learning_rate": 1.4707264174582178e-05, "loss": 0.7104, "step": 11092 }, { "epoch": 36.37049180327869, "grad_norm": 7.005551338195801, "learning_rate": 1.470632726721016e-05, "loss": 0.8102, "step": 11093 }, { "epoch": 36.37377049180328, "grad_norm": 7.554540634155273, "learning_rate": 1.4705390306769412e-05, "loss": 0.6317, "step": 11094 }, { "epoch": 36.377049180327866, "grad_norm": 8.910754203796387, "learning_rate": 1.470445329327049e-05, "loss": 0.9198, "step": 11095 }, { "epoch": 36.38032786885246, "grad_norm": 8.539412498474121, "learning_rate": 1.4703516226723974e-05, "loss": 1.0379, "step": 11096 }, { "epoch": 36.38360655737705, "grad_norm": 8.564618110656738, "learning_rate": 1.4702579107140413e-05, "loss": 1.0428, "step": 11097 }, { "epoch": 36.38688524590164, "grad_norm": 6.433891773223877, "learning_rate": 1.470164193453039e-05, "loss": 1.0171, "step": 11098 }, { "epoch": 36.390163934426226, "grad_norm": 8.78714370727539, "learning_rate": 1.4700704708904461e-05, "loss": 0.8225, "step": 11099 }, { "epoch": 36.39344262295082, "grad_norm": 6.763310432434082, "learning_rate": 1.4699767430273202e-05, "loss": 0.9227, "step": 11100 }, { "epoch": 36.39672131147541, "grad_norm": 8.928878784179688, "learning_rate": 1.469883009864718e-05, "loss": 0.6593, "step": 11101 }, { "epoch": 36.4, "grad_norm": 6.4724040031433105, "learning_rate": 1.4697892714036959e-05, "loss": 0.8313, "step": 11102 }, { "epoch": 36.40327868852459, "grad_norm": 7.365674018859863, "learning_rate": 1.4696955276453113e-05, "loss": 0.9788, "step": 11103 }, { "epoch": 36.40655737704918, "grad_norm": 8.611135482788086, "learning_rate": 1.4696017785906218e-05, "loss": 0.8913, "step": 11104 }, { "epoch": 36.40983606557377, "grad_norm": 8.005705833435059, "learning_rate": 1.4695080242406834e-05, "loss": 0.6605, "step": 11105 }, { "epoch": 36.41311475409836, "grad_norm": 7.226456642150879, "learning_rate": 1.469414264596554e-05, "loss": 1.0629, "step": 11106 }, { "epoch": 36.41639344262295, "grad_norm": 13.794827461242676, "learning_rate": 1.4693204996592909e-05, "loss": 0.8004, "step": 11107 }, { "epoch": 36.41967213114754, "grad_norm": 11.08092212677002, "learning_rate": 1.4692267294299512e-05, "loss": 0.8859, "step": 11108 }, { "epoch": 36.42295081967213, "grad_norm": 7.09329080581665, "learning_rate": 1.469132953909592e-05, "loss": 0.6706, "step": 11109 }, { "epoch": 36.42622950819672, "grad_norm": 8.08116626739502, "learning_rate": 1.4690391730992711e-05, "loss": 0.749, "step": 11110 }, { "epoch": 36.429508196721315, "grad_norm": 8.464945793151855, "learning_rate": 1.4689453870000461e-05, "loss": 0.9644, "step": 11111 }, { "epoch": 36.4327868852459, "grad_norm": 13.468232154846191, "learning_rate": 1.4688515956129738e-05, "loss": 0.8106, "step": 11112 }, { "epoch": 36.43606557377049, "grad_norm": 7.716624736785889, "learning_rate": 1.4687577989391127e-05, "loss": 0.7916, "step": 11113 }, { "epoch": 36.43934426229508, "grad_norm": 9.640729904174805, "learning_rate": 1.4686639969795199e-05, "loss": 0.9156, "step": 11114 }, { "epoch": 36.442622950819676, "grad_norm": 6.874207973480225, "learning_rate": 1.4685701897352532e-05, "loss": 0.8965, "step": 11115 }, { "epoch": 36.445901639344264, "grad_norm": 7.5640997886657715, "learning_rate": 1.4684763772073702e-05, "loss": 0.7761, "step": 11116 }, { "epoch": 36.44918032786885, "grad_norm": 7.851397514343262, "learning_rate": 1.4683825593969293e-05, "loss": 0.8918, "step": 11117 }, { "epoch": 36.45245901639344, "grad_norm": 8.201029777526855, "learning_rate": 1.468288736304988e-05, "loss": 0.6638, "step": 11118 }, { "epoch": 36.455737704918036, "grad_norm": 6.640456199645996, "learning_rate": 1.4681949079326041e-05, "loss": 0.7233, "step": 11119 }, { "epoch": 36.459016393442624, "grad_norm": 8.058135986328125, "learning_rate": 1.468101074280836e-05, "loss": 0.9104, "step": 11120 }, { "epoch": 36.46229508196721, "grad_norm": 10.067231178283691, "learning_rate": 1.4680072353507414e-05, "loss": 0.8541, "step": 11121 }, { "epoch": 36.4655737704918, "grad_norm": 10.502010345458984, "learning_rate": 1.4679133911433789e-05, "loss": 0.9862, "step": 11122 }, { "epoch": 36.4688524590164, "grad_norm": 7.987493991851807, "learning_rate": 1.467819541659806e-05, "loss": 0.561, "step": 11123 }, { "epoch": 36.472131147540985, "grad_norm": 7.575984001159668, "learning_rate": 1.4677256869010817e-05, "loss": 1.0287, "step": 11124 }, { "epoch": 36.47540983606557, "grad_norm": 6.684974193572998, "learning_rate": 1.4676318268682637e-05, "loss": 0.8384, "step": 11125 }, { "epoch": 36.47868852459016, "grad_norm": 7.10746955871582, "learning_rate": 1.467537961562411e-05, "loss": 0.6835, "step": 11126 }, { "epoch": 36.48196721311476, "grad_norm": 39.654335021972656, "learning_rate": 1.4674440909845813e-05, "loss": 0.7905, "step": 11127 }, { "epoch": 36.485245901639345, "grad_norm": 9.121382713317871, "learning_rate": 1.4673502151358338e-05, "loss": 0.8462, "step": 11128 }, { "epoch": 36.488524590163934, "grad_norm": 7.545872211456299, "learning_rate": 1.4672563340172265e-05, "loss": 0.7911, "step": 11129 }, { "epoch": 36.49180327868852, "grad_norm": 8.068957328796387, "learning_rate": 1.4671624476298182e-05, "loss": 0.8911, "step": 11130 }, { "epoch": 36.49508196721312, "grad_norm": 6.559834957122803, "learning_rate": 1.4670685559746675e-05, "loss": 1.1548, "step": 11131 }, { "epoch": 36.498360655737706, "grad_norm": 8.969884872436523, "learning_rate": 1.4669746590528334e-05, "loss": 0.8988, "step": 11132 }, { "epoch": 36.501639344262294, "grad_norm": 8.482584953308105, "learning_rate": 1.4668807568653743e-05, "loss": 0.6717, "step": 11133 }, { "epoch": 36.50491803278688, "grad_norm": 7.98360013961792, "learning_rate": 1.4667868494133493e-05, "loss": 0.9852, "step": 11134 }, { "epoch": 36.50819672131148, "grad_norm": 9.30077838897705, "learning_rate": 1.4666929366978172e-05, "loss": 0.6254, "step": 11135 }, { "epoch": 36.511475409836066, "grad_norm": 7.296881198883057, "learning_rate": 1.4665990187198372e-05, "loss": 0.946, "step": 11136 }, { "epoch": 36.514754098360655, "grad_norm": 17.847095489501953, "learning_rate": 1.4665050954804679e-05, "loss": 0.909, "step": 11137 }, { "epoch": 36.51803278688524, "grad_norm": 7.775369167327881, "learning_rate": 1.4664111669807686e-05, "loss": 0.5775, "step": 11138 }, { "epoch": 36.52131147540984, "grad_norm": 7.411437511444092, "learning_rate": 1.4663172332217986e-05, "loss": 0.7417, "step": 11139 }, { "epoch": 36.52459016393443, "grad_norm": 10.285333633422852, "learning_rate": 1.4662232942046169e-05, "loss": 0.8914, "step": 11140 }, { "epoch": 36.527868852459015, "grad_norm": 7.391848087310791, "learning_rate": 1.466129349930283e-05, "loss": 0.8909, "step": 11141 }, { "epoch": 36.5311475409836, "grad_norm": 6.111509323120117, "learning_rate": 1.4660354003998558e-05, "loss": 0.8956, "step": 11142 }, { "epoch": 36.5344262295082, "grad_norm": 11.487824440002441, "learning_rate": 1.4659414456143948e-05, "loss": 0.7265, "step": 11143 }, { "epoch": 36.53770491803279, "grad_norm": 7.674001216888428, "learning_rate": 1.4658474855749597e-05, "loss": 0.986, "step": 11144 }, { "epoch": 36.540983606557376, "grad_norm": 8.349464416503906, "learning_rate": 1.4657535202826097e-05, "loss": 0.9642, "step": 11145 }, { "epoch": 36.544262295081964, "grad_norm": 10.021378517150879, "learning_rate": 1.4656595497384048e-05, "loss": 0.7478, "step": 11146 }, { "epoch": 36.54754098360656, "grad_norm": 43.91370391845703, "learning_rate": 1.4655655739434038e-05, "loss": 0.9896, "step": 11147 }, { "epoch": 36.55081967213115, "grad_norm": 5.556975841522217, "learning_rate": 1.4654715928986673e-05, "loss": 0.9383, "step": 11148 }, { "epoch": 36.554098360655736, "grad_norm": 7.0694804191589355, "learning_rate": 1.4653776066052545e-05, "loss": 0.8454, "step": 11149 }, { "epoch": 36.557377049180324, "grad_norm": 8.828359603881836, "learning_rate": 1.4652836150642253e-05, "loss": 0.9028, "step": 11150 }, { "epoch": 36.56065573770492, "grad_norm": 7.815197944641113, "learning_rate": 1.4651896182766396e-05, "loss": 0.8939, "step": 11151 }, { "epoch": 36.56393442622951, "grad_norm": 6.517923831939697, "learning_rate": 1.4650956162435575e-05, "loss": 0.8366, "step": 11152 }, { "epoch": 36.5672131147541, "grad_norm": 7.873880386352539, "learning_rate": 1.4650016089660383e-05, "loss": 0.7425, "step": 11153 }, { "epoch": 36.570491803278685, "grad_norm": 7.341925144195557, "learning_rate": 1.464907596445143e-05, "loss": 0.7897, "step": 11154 }, { "epoch": 36.57377049180328, "grad_norm": 7.0232157707214355, "learning_rate": 1.4648135786819308e-05, "loss": 1.0214, "step": 11155 }, { "epoch": 36.57704918032787, "grad_norm": 8.404939651489258, "learning_rate": 1.4647195556774623e-05, "loss": 0.8422, "step": 11156 }, { "epoch": 36.58032786885246, "grad_norm": 7.199296951293945, "learning_rate": 1.4646255274327974e-05, "loss": 0.746, "step": 11157 }, { "epoch": 36.58360655737705, "grad_norm": 6.983241558074951, "learning_rate": 1.464531493948997e-05, "loss": 0.8807, "step": 11158 }, { "epoch": 36.58688524590164, "grad_norm": 22.579763412475586, "learning_rate": 1.4644374552271208e-05, "loss": 1.0477, "step": 11159 }, { "epoch": 36.59016393442623, "grad_norm": 7.261860370635986, "learning_rate": 1.4643434112682293e-05, "loss": 0.565, "step": 11160 }, { "epoch": 36.59344262295082, "grad_norm": 10.096330642700195, "learning_rate": 1.4642493620733831e-05, "loss": 0.6941, "step": 11161 }, { "epoch": 36.59672131147541, "grad_norm": 5.714062213897705, "learning_rate": 1.4641553076436426e-05, "loss": 1.1205, "step": 11162 }, { "epoch": 36.6, "grad_norm": 6.648802280426025, "learning_rate": 1.4640612479800686e-05, "loss": 0.7216, "step": 11163 }, { "epoch": 36.60327868852459, "grad_norm": 8.715375900268555, "learning_rate": 1.4639671830837214e-05, "loss": 0.8628, "step": 11164 }, { "epoch": 36.60655737704918, "grad_norm": 8.638692855834961, "learning_rate": 1.4638731129556617e-05, "loss": 0.9321, "step": 11165 }, { "epoch": 36.609836065573774, "grad_norm": 6.97047758102417, "learning_rate": 1.4637790375969506e-05, "loss": 0.985, "step": 11166 }, { "epoch": 36.61311475409836, "grad_norm": 7.030257225036621, "learning_rate": 1.4636849570086487e-05, "loss": 0.7707, "step": 11167 }, { "epoch": 36.61639344262295, "grad_norm": 8.655823707580566, "learning_rate": 1.4635908711918165e-05, "loss": 0.7687, "step": 11168 }, { "epoch": 36.61967213114754, "grad_norm": 9.271791458129883, "learning_rate": 1.4634967801475152e-05, "loss": 0.7949, "step": 11169 }, { "epoch": 36.622950819672134, "grad_norm": 7.571730613708496, "learning_rate": 1.463402683876806e-05, "loss": 0.938, "step": 11170 }, { "epoch": 36.62622950819672, "grad_norm": 6.498270034790039, "learning_rate": 1.4633085823807496e-05, "loss": 1.0219, "step": 11171 }, { "epoch": 36.62950819672131, "grad_norm": 7.5297627449035645, "learning_rate": 1.4632144756604074e-05, "loss": 1.0227, "step": 11172 }, { "epoch": 36.6327868852459, "grad_norm": 8.075071334838867, "learning_rate": 1.4631203637168403e-05, "loss": 0.9504, "step": 11173 }, { "epoch": 36.636065573770495, "grad_norm": 7.173560619354248, "learning_rate": 1.4630262465511095e-05, "loss": 0.7921, "step": 11174 }, { "epoch": 36.63934426229508, "grad_norm": 6.828119277954102, "learning_rate": 1.4629321241642762e-05, "loss": 1.0139, "step": 11175 }, { "epoch": 36.64262295081967, "grad_norm": 13.46979808807373, "learning_rate": 1.462837996557402e-05, "loss": 0.7358, "step": 11176 }, { "epoch": 36.64590163934426, "grad_norm": 8.328105926513672, "learning_rate": 1.4627438637315483e-05, "loss": 0.8244, "step": 11177 }, { "epoch": 36.649180327868855, "grad_norm": 6.05449914932251, "learning_rate": 1.4626497256877762e-05, "loss": 1.0184, "step": 11178 }, { "epoch": 36.65245901639344, "grad_norm": 9.444014549255371, "learning_rate": 1.4625555824271474e-05, "loss": 1.0041, "step": 11179 }, { "epoch": 36.65573770491803, "grad_norm": 8.237432479858398, "learning_rate": 1.4624614339507237e-05, "loss": 0.8273, "step": 11180 }, { "epoch": 36.65901639344262, "grad_norm": 8.493433952331543, "learning_rate": 1.4623672802595663e-05, "loss": 0.8627, "step": 11181 }, { "epoch": 36.662295081967216, "grad_norm": 6.177042007446289, "learning_rate": 1.4622731213547372e-05, "loss": 1.06, "step": 11182 }, { "epoch": 36.665573770491804, "grad_norm": 7.594724178314209, "learning_rate": 1.462178957237298e-05, "loss": 0.758, "step": 11183 }, { "epoch": 36.66885245901639, "grad_norm": 11.712461471557617, "learning_rate": 1.4620847879083103e-05, "loss": 0.7335, "step": 11184 }, { "epoch": 36.67213114754098, "grad_norm": 6.512190341949463, "learning_rate": 1.4619906133688362e-05, "loss": 0.8678, "step": 11185 }, { "epoch": 36.675409836065576, "grad_norm": 6.594761848449707, "learning_rate": 1.4618964336199378e-05, "loss": 0.6856, "step": 11186 }, { "epoch": 36.678688524590164, "grad_norm": 9.23537540435791, "learning_rate": 1.4618022486626766e-05, "loss": 0.7161, "step": 11187 }, { "epoch": 36.68196721311475, "grad_norm": 9.021048545837402, "learning_rate": 1.4617080584981151e-05, "loss": 1.0588, "step": 11188 }, { "epoch": 36.68524590163934, "grad_norm": 7.017021179199219, "learning_rate": 1.4616138631273152e-05, "loss": 0.7296, "step": 11189 }, { "epoch": 36.68852459016394, "grad_norm": 7.2556376457214355, "learning_rate": 1.4615196625513389e-05, "loss": 0.7844, "step": 11190 }, { "epoch": 36.691803278688525, "grad_norm": 5.5747575759887695, "learning_rate": 1.4614254567712487e-05, "loss": 1.1283, "step": 11191 }, { "epoch": 36.69508196721311, "grad_norm": 8.12246322631836, "learning_rate": 1.4613312457881063e-05, "loss": 0.7635, "step": 11192 }, { "epoch": 36.6983606557377, "grad_norm": 6.846642971038818, "learning_rate": 1.461237029602975e-05, "loss": 0.9901, "step": 11193 }, { "epoch": 36.7016393442623, "grad_norm": 6.627722263336182, "learning_rate": 1.4611428082169166e-05, "loss": 0.7016, "step": 11194 }, { "epoch": 36.704918032786885, "grad_norm": 32.592472076416016, "learning_rate": 1.4610485816309931e-05, "loss": 0.5965, "step": 11195 }, { "epoch": 36.708196721311474, "grad_norm": 8.861900329589844, "learning_rate": 1.460954349846268e-05, "loss": 0.6252, "step": 11196 }, { "epoch": 36.71147540983607, "grad_norm": 6.460844039916992, "learning_rate": 1.4608601128638027e-05, "loss": 1.1763, "step": 11197 }, { "epoch": 36.71475409836066, "grad_norm": 13.780776023864746, "learning_rate": 1.460765870684661e-05, "loss": 0.9679, "step": 11198 }, { "epoch": 36.718032786885246, "grad_norm": 6.534177780151367, "learning_rate": 1.4606716233099048e-05, "loss": 1.0085, "step": 11199 }, { "epoch": 36.721311475409834, "grad_norm": 8.258406639099121, "learning_rate": 1.4605773707405972e-05, "loss": 0.778, "step": 11200 }, { "epoch": 36.72459016393443, "grad_norm": 8.974994659423828, "learning_rate": 1.4604831129778005e-05, "loss": 0.8395, "step": 11201 }, { "epoch": 36.72786885245902, "grad_norm": 6.039310932159424, "learning_rate": 1.4603888500225785e-05, "loss": 0.7606, "step": 11202 }, { "epoch": 36.731147540983606, "grad_norm": 7.286314010620117, "learning_rate": 1.4602945818759931e-05, "loss": 0.6782, "step": 11203 }, { "epoch": 36.734426229508195, "grad_norm": 8.377528190612793, "learning_rate": 1.4602003085391079e-05, "loss": 0.8308, "step": 11204 }, { "epoch": 36.73770491803279, "grad_norm": 8.64507007598877, "learning_rate": 1.4601060300129857e-05, "loss": 0.8265, "step": 11205 }, { "epoch": 36.74098360655738, "grad_norm": 7.486342430114746, "learning_rate": 1.4600117462986895e-05, "loss": 0.837, "step": 11206 }, { "epoch": 36.74426229508197, "grad_norm": 7.246687889099121, "learning_rate": 1.4599174573972828e-05, "loss": 0.6106, "step": 11207 }, { "epoch": 36.747540983606555, "grad_norm": 8.191529273986816, "learning_rate": 1.4598231633098283e-05, "loss": 0.6396, "step": 11208 }, { "epoch": 36.75081967213115, "grad_norm": 6.764096736907959, "learning_rate": 1.4597288640373898e-05, "loss": 1.0695, "step": 11209 }, { "epoch": 36.75409836065574, "grad_norm": 7.237008094787598, "learning_rate": 1.4596345595810301e-05, "loss": 1.0993, "step": 11210 }, { "epoch": 36.75737704918033, "grad_norm": 6.3693928718566895, "learning_rate": 1.4595402499418132e-05, "loss": 0.9122, "step": 11211 }, { "epoch": 36.760655737704916, "grad_norm": 6.013175964355469, "learning_rate": 1.4594459351208019e-05, "loss": 0.7145, "step": 11212 }, { "epoch": 36.76393442622951, "grad_norm": 7.351370811462402, "learning_rate": 1.45935161511906e-05, "loss": 0.8445, "step": 11213 }, { "epoch": 36.7672131147541, "grad_norm": 9.725768089294434, "learning_rate": 1.4592572899376509e-05, "loss": 0.9059, "step": 11214 }, { "epoch": 36.77049180327869, "grad_norm": 11.489124298095703, "learning_rate": 1.4591629595776384e-05, "loss": 0.9427, "step": 11215 }, { "epoch": 36.773770491803276, "grad_norm": 8.055845260620117, "learning_rate": 1.4590686240400862e-05, "loss": 0.7502, "step": 11216 }, { "epoch": 36.77704918032787, "grad_norm": 7.303882122039795, "learning_rate": 1.4589742833260581e-05, "loss": 0.7109, "step": 11217 }, { "epoch": 36.78032786885246, "grad_norm": 10.160003662109375, "learning_rate": 1.4588799374366174e-05, "loss": 0.9277, "step": 11218 }, { "epoch": 36.78360655737705, "grad_norm": 9.655468940734863, "learning_rate": 1.4587855863728287e-05, "loss": 0.7885, "step": 11219 }, { "epoch": 36.78688524590164, "grad_norm": 7.856849670410156, "learning_rate": 1.4586912301357553e-05, "loss": 0.9571, "step": 11220 }, { "epoch": 36.79016393442623, "grad_norm": 7.876307010650635, "learning_rate": 1.4585968687264613e-05, "loss": 0.9058, "step": 11221 }, { "epoch": 36.79344262295082, "grad_norm": 8.116868019104004, "learning_rate": 1.458502502146011e-05, "loss": 0.9574, "step": 11222 }, { "epoch": 36.79672131147541, "grad_norm": 7.428988456726074, "learning_rate": 1.4584081303954681e-05, "loss": 0.8858, "step": 11223 }, { "epoch": 36.8, "grad_norm": 7.424729824066162, "learning_rate": 1.4583137534758968e-05, "loss": 0.6899, "step": 11224 }, { "epoch": 36.80327868852459, "grad_norm": 8.758708000183105, "learning_rate": 1.4582193713883617e-05, "loss": 1.1002, "step": 11225 }, { "epoch": 36.80655737704918, "grad_norm": 10.506377220153809, "learning_rate": 1.4581249841339267e-05, "loss": 0.8224, "step": 11226 }, { "epoch": 36.80983606557377, "grad_norm": 10.126980781555176, "learning_rate": 1.4580305917136559e-05, "loss": 0.7852, "step": 11227 }, { "epoch": 36.81311475409836, "grad_norm": 9.850624084472656, "learning_rate": 1.4579361941286142e-05, "loss": 0.995, "step": 11228 }, { "epoch": 36.81639344262295, "grad_norm": 9.090499877929688, "learning_rate": 1.4578417913798655e-05, "loss": 0.8592, "step": 11229 }, { "epoch": 36.81967213114754, "grad_norm": 6.730146408081055, "learning_rate": 1.4577473834684749e-05, "loss": 0.8508, "step": 11230 }, { "epoch": 36.82295081967213, "grad_norm": 6.769001483917236, "learning_rate": 1.4576529703955062e-05, "loss": 0.6948, "step": 11231 }, { "epoch": 36.82622950819672, "grad_norm": 6.890468597412109, "learning_rate": 1.4575585521620248e-05, "loss": 0.8067, "step": 11232 }, { "epoch": 36.829508196721314, "grad_norm": 7.8337531089782715, "learning_rate": 1.4574641287690949e-05, "loss": 0.8031, "step": 11233 }, { "epoch": 36.8327868852459, "grad_norm": 24.71352767944336, "learning_rate": 1.4573697002177814e-05, "loss": 0.6479, "step": 11234 }, { "epoch": 36.83606557377049, "grad_norm": 22.284303665161133, "learning_rate": 1.4572752665091487e-05, "loss": 0.8229, "step": 11235 }, { "epoch": 36.83934426229508, "grad_norm": 7.049270153045654, "learning_rate": 1.4571808276442621e-05, "loss": 0.8056, "step": 11236 }, { "epoch": 36.842622950819674, "grad_norm": 8.944954872131348, "learning_rate": 1.4570863836241865e-05, "loss": 1.1245, "step": 11237 }, { "epoch": 36.84590163934426, "grad_norm": 7.091008186340332, "learning_rate": 1.4569919344499863e-05, "loss": 0.9708, "step": 11238 }, { "epoch": 36.84918032786885, "grad_norm": 9.71160888671875, "learning_rate": 1.4568974801227268e-05, "loss": 0.8502, "step": 11239 }, { "epoch": 36.85245901639344, "grad_norm": 10.861650466918945, "learning_rate": 1.4568030206434735e-05, "loss": 0.7535, "step": 11240 }, { "epoch": 36.855737704918035, "grad_norm": 8.17896556854248, "learning_rate": 1.4567085560132912e-05, "loss": 0.8186, "step": 11241 }, { "epoch": 36.85901639344262, "grad_norm": 106.21381378173828, "learning_rate": 1.456614086233245e-05, "loss": 0.8353, "step": 11242 }, { "epoch": 36.86229508196721, "grad_norm": 10.15918254852295, "learning_rate": 1.4565196113044002e-05, "loss": 1.0562, "step": 11243 }, { "epoch": 36.86557377049181, "grad_norm": 6.991917610168457, "learning_rate": 1.456425131227822e-05, "loss": 0.9635, "step": 11244 }, { "epoch": 36.868852459016395, "grad_norm": 7.903894901275635, "learning_rate": 1.456330646004576e-05, "loss": 0.6932, "step": 11245 }, { "epoch": 36.87213114754098, "grad_norm": 7.955077171325684, "learning_rate": 1.4562361556357276e-05, "loss": 0.7223, "step": 11246 }, { "epoch": 36.87540983606557, "grad_norm": 9.307366371154785, "learning_rate": 1.456141660122342e-05, "loss": 0.8064, "step": 11247 }, { "epoch": 36.87868852459017, "grad_norm": 8.46739673614502, "learning_rate": 1.456047159465485e-05, "loss": 0.7927, "step": 11248 }, { "epoch": 36.881967213114756, "grad_norm": 7.715935230255127, "learning_rate": 1.4559526536662221e-05, "loss": 0.9062, "step": 11249 }, { "epoch": 36.885245901639344, "grad_norm": 7.709571838378906, "learning_rate": 1.4558581427256191e-05, "loss": 0.9301, "step": 11250 }, { "epoch": 36.88852459016393, "grad_norm": 12.36152458190918, "learning_rate": 1.4557636266447413e-05, "loss": 1.0535, "step": 11251 }, { "epoch": 36.89180327868853, "grad_norm": 7.844059944152832, "learning_rate": 1.4556691054246547e-05, "loss": 0.9338, "step": 11252 }, { "epoch": 36.895081967213116, "grad_norm": 9.013567924499512, "learning_rate": 1.4555745790664253e-05, "loss": 0.9285, "step": 11253 }, { "epoch": 36.898360655737704, "grad_norm": 7.688643455505371, "learning_rate": 1.4554800475711191e-05, "loss": 0.8589, "step": 11254 }, { "epoch": 36.90163934426229, "grad_norm": 10.668888092041016, "learning_rate": 1.4553855109398013e-05, "loss": 0.7194, "step": 11255 }, { "epoch": 36.90491803278689, "grad_norm": 11.731328010559082, "learning_rate": 1.4552909691735388e-05, "loss": 0.9099, "step": 11256 }, { "epoch": 36.90819672131148, "grad_norm": 9.699204444885254, "learning_rate": 1.455196422273397e-05, "loss": 0.978, "step": 11257 }, { "epoch": 36.911475409836065, "grad_norm": 7.2749786376953125, "learning_rate": 1.4551018702404424e-05, "loss": 1.0987, "step": 11258 }, { "epoch": 36.91475409836065, "grad_norm": 11.444503784179688, "learning_rate": 1.4550073130757409e-05, "loss": 1.1053, "step": 11259 }, { "epoch": 36.91803278688525, "grad_norm": 8.524292945861816, "learning_rate": 1.4549127507803589e-05, "loss": 0.9098, "step": 11260 }, { "epoch": 36.92131147540984, "grad_norm": 7.839303970336914, "learning_rate": 1.4548181833553628e-05, "loss": 0.9404, "step": 11261 }, { "epoch": 36.924590163934425, "grad_norm": 9.784320831298828, "learning_rate": 1.4547236108018185e-05, "loss": 0.8981, "step": 11262 }, { "epoch": 36.927868852459014, "grad_norm": 11.627386093139648, "learning_rate": 1.4546290331207929e-05, "loss": 0.9243, "step": 11263 }, { "epoch": 36.93114754098361, "grad_norm": 8.250932693481445, "learning_rate": 1.454534450313352e-05, "loss": 1.0762, "step": 11264 }, { "epoch": 36.9344262295082, "grad_norm": 9.361166954040527, "learning_rate": 1.4544398623805629e-05, "loss": 0.9679, "step": 11265 }, { "epoch": 36.937704918032786, "grad_norm": 10.494014739990234, "learning_rate": 1.4543452693234916e-05, "loss": 1.0291, "step": 11266 }, { "epoch": 36.940983606557374, "grad_norm": 6.969033718109131, "learning_rate": 1.4542506711432049e-05, "loss": 0.7832, "step": 11267 }, { "epoch": 36.94426229508197, "grad_norm": 9.942000389099121, "learning_rate": 1.45415606784077e-05, "loss": 0.9474, "step": 11268 }, { "epoch": 36.94754098360656, "grad_norm": 13.1555757522583, "learning_rate": 1.4540614594172529e-05, "loss": 0.8065, "step": 11269 }, { "epoch": 36.950819672131146, "grad_norm": 6.068378448486328, "learning_rate": 1.4539668458737206e-05, "loss": 0.9142, "step": 11270 }, { "epoch": 36.954098360655735, "grad_norm": 14.543827056884766, "learning_rate": 1.4538722272112406e-05, "loss": 0.7977, "step": 11271 }, { "epoch": 36.95737704918033, "grad_norm": 8.594503402709961, "learning_rate": 1.4537776034308791e-05, "loss": 0.7015, "step": 11272 }, { "epoch": 36.96065573770492, "grad_norm": 8.294319152832031, "learning_rate": 1.4536829745337034e-05, "loss": 0.9501, "step": 11273 }, { "epoch": 36.96393442622951, "grad_norm": 8.913203239440918, "learning_rate": 1.4535883405207802e-05, "loss": 0.7474, "step": 11274 }, { "epoch": 36.967213114754095, "grad_norm": 9.337092399597168, "learning_rate": 1.4534937013931769e-05, "loss": 0.8557, "step": 11275 }, { "epoch": 36.97049180327869, "grad_norm": 9.072589874267578, "learning_rate": 1.4533990571519607e-05, "loss": 1.0458, "step": 11276 }, { "epoch": 36.97377049180328, "grad_norm": 13.143099784851074, "learning_rate": 1.4533044077981987e-05, "loss": 0.9208, "step": 11277 }, { "epoch": 36.97704918032787, "grad_norm": 10.089821815490723, "learning_rate": 1.4532097533329583e-05, "loss": 0.8355, "step": 11278 }, { "epoch": 36.980327868852456, "grad_norm": 9.32504653930664, "learning_rate": 1.4531150937573064e-05, "loss": 0.7348, "step": 11279 }, { "epoch": 36.98360655737705, "grad_norm": 8.152935981750488, "learning_rate": 1.4530204290723112e-05, "loss": 0.9037, "step": 11280 }, { "epoch": 36.98688524590164, "grad_norm": 8.958888053894043, "learning_rate": 1.4529257592790391e-05, "loss": 0.9687, "step": 11281 }, { "epoch": 36.99016393442623, "grad_norm": 8.726292610168457, "learning_rate": 1.4528310843785584e-05, "loss": 0.9074, "step": 11282 }, { "epoch": 36.993442622950816, "grad_norm": 8.674138069152832, "learning_rate": 1.4527364043719362e-05, "loss": 0.9189, "step": 11283 }, { "epoch": 36.99672131147541, "grad_norm": 10.317041397094727, "learning_rate": 1.4526417192602407e-05, "loss": 0.8906, "step": 11284 }, { "epoch": 37.0, "grad_norm": 8.263612747192383, "learning_rate": 1.4525470290445392e-05, "loss": 0.8177, "step": 11285 }, { "epoch": 37.00327868852459, "grad_norm": 5.647734642028809, "learning_rate": 1.4524523337258991e-05, "loss": 0.7983, "step": 11286 }, { "epoch": 37.006557377049184, "grad_norm": 9.01938247680664, "learning_rate": 1.4523576333053885e-05, "loss": 0.8653, "step": 11287 }, { "epoch": 37.00983606557377, "grad_norm": 7.624495506286621, "learning_rate": 1.4522629277840754e-05, "loss": 0.8976, "step": 11288 }, { "epoch": 37.01311475409836, "grad_norm": 7.008213043212891, "learning_rate": 1.4521682171630276e-05, "loss": 0.6848, "step": 11289 }, { "epoch": 37.01639344262295, "grad_norm": 7.20653772354126, "learning_rate": 1.4520735014433127e-05, "loss": 0.9329, "step": 11290 }, { "epoch": 37.019672131147544, "grad_norm": 9.10506534576416, "learning_rate": 1.4519787806259992e-05, "loss": 0.8859, "step": 11291 }, { "epoch": 37.02295081967213, "grad_norm": 6.437341690063477, "learning_rate": 1.451884054712155e-05, "loss": 1.0383, "step": 11292 }, { "epoch": 37.02622950819672, "grad_norm": 7.859292030334473, "learning_rate": 1.4517893237028483e-05, "loss": 0.7459, "step": 11293 }, { "epoch": 37.02950819672131, "grad_norm": 7.533991813659668, "learning_rate": 1.4516945875991472e-05, "loss": 0.9716, "step": 11294 }, { "epoch": 37.032786885245905, "grad_norm": 8.638507843017578, "learning_rate": 1.4515998464021199e-05, "loss": 0.7615, "step": 11295 }, { "epoch": 37.03606557377049, "grad_norm": 12.288698196411133, "learning_rate": 1.4515051001128352e-05, "loss": 0.712, "step": 11296 }, { "epoch": 37.03934426229508, "grad_norm": 6.800210475921631, "learning_rate": 1.4514103487323605e-05, "loss": 1.022, "step": 11297 }, { "epoch": 37.04262295081967, "grad_norm": 8.606212615966797, "learning_rate": 1.4513155922617652e-05, "loss": 0.8538, "step": 11298 }, { "epoch": 37.045901639344265, "grad_norm": 7.538832664489746, "learning_rate": 1.451220830702117e-05, "loss": 0.8607, "step": 11299 }, { "epoch": 37.049180327868854, "grad_norm": 29.822179794311523, "learning_rate": 1.4511260640544851e-05, "loss": 0.8145, "step": 11300 }, { "epoch": 37.05245901639344, "grad_norm": 8.178572654724121, "learning_rate": 1.4510312923199375e-05, "loss": 0.7095, "step": 11301 }, { "epoch": 37.05573770491803, "grad_norm": 8.005565643310547, "learning_rate": 1.4509365154995431e-05, "loss": 0.8763, "step": 11302 }, { "epoch": 37.059016393442626, "grad_norm": 8.229963302612305, "learning_rate": 1.4508417335943707e-05, "loss": 0.7781, "step": 11303 }, { "epoch": 37.062295081967214, "grad_norm": 16.042510986328125, "learning_rate": 1.4507469466054893e-05, "loss": 0.7766, "step": 11304 }, { "epoch": 37.0655737704918, "grad_norm": 7.329085350036621, "learning_rate": 1.4506521545339671e-05, "loss": 0.8521, "step": 11305 }, { "epoch": 37.06885245901639, "grad_norm": 6.576017379760742, "learning_rate": 1.4505573573808733e-05, "loss": 0.9282, "step": 11306 }, { "epoch": 37.072131147540986, "grad_norm": 6.818398475646973, "learning_rate": 1.450462555147277e-05, "loss": 0.8113, "step": 11307 }, { "epoch": 37.075409836065575, "grad_norm": 19.877395629882812, "learning_rate": 1.450367747834247e-05, "loss": 0.9474, "step": 11308 }, { "epoch": 37.07868852459016, "grad_norm": 8.208544731140137, "learning_rate": 1.4502729354428521e-05, "loss": 0.5701, "step": 11309 }, { "epoch": 37.08196721311475, "grad_norm": 7.368229866027832, "learning_rate": 1.4501781179741622e-05, "loss": 0.5973, "step": 11310 }, { "epoch": 37.08524590163935, "grad_norm": 8.26459789276123, "learning_rate": 1.4500832954292456e-05, "loss": 0.7037, "step": 11311 }, { "epoch": 37.088524590163935, "grad_norm": 8.334277153015137, "learning_rate": 1.4499884678091717e-05, "loss": 0.7106, "step": 11312 }, { "epoch": 37.09180327868852, "grad_norm": 7.829067707061768, "learning_rate": 1.4498936351150103e-05, "loss": 0.7235, "step": 11313 }, { "epoch": 37.09508196721311, "grad_norm": 10.196702003479004, "learning_rate": 1.4497987973478305e-05, "loss": 0.8182, "step": 11314 }, { "epoch": 37.09836065573771, "grad_norm": 8.550482749938965, "learning_rate": 1.4497039545087013e-05, "loss": 1.1227, "step": 11315 }, { "epoch": 37.101639344262296, "grad_norm": 8.77608585357666, "learning_rate": 1.4496091065986923e-05, "loss": 0.7875, "step": 11316 }, { "epoch": 37.104918032786884, "grad_norm": 6.948475360870361, "learning_rate": 1.4495142536188735e-05, "loss": 0.7576, "step": 11317 }, { "epoch": 37.10819672131147, "grad_norm": 7.5533671379089355, "learning_rate": 1.4494193955703138e-05, "loss": 0.6866, "step": 11318 }, { "epoch": 37.11147540983607, "grad_norm": 6.981894493103027, "learning_rate": 1.4493245324540835e-05, "loss": 0.8714, "step": 11319 }, { "epoch": 37.114754098360656, "grad_norm": 6.084556579589844, "learning_rate": 1.4492296642712516e-05, "loss": 0.5594, "step": 11320 }, { "epoch": 37.118032786885244, "grad_norm": 6.047963619232178, "learning_rate": 1.4491347910228881e-05, "loss": 0.7656, "step": 11321 }, { "epoch": 37.12131147540983, "grad_norm": 6.782155513763428, "learning_rate": 1.4490399127100631e-05, "loss": 0.7286, "step": 11322 }, { "epoch": 37.12459016393443, "grad_norm": 6.514803886413574, "learning_rate": 1.448945029333846e-05, "loss": 0.9355, "step": 11323 }, { "epoch": 37.12786885245902, "grad_norm": 10.255057334899902, "learning_rate": 1.4488501408953073e-05, "loss": 0.6816, "step": 11324 }, { "epoch": 37.131147540983605, "grad_norm": 7.971828460693359, "learning_rate": 1.448755247395516e-05, "loss": 0.8251, "step": 11325 }, { "epoch": 37.13442622950819, "grad_norm": 6.5793776512146, "learning_rate": 1.448660348835543e-05, "loss": 0.8751, "step": 11326 }, { "epoch": 37.13770491803279, "grad_norm": 6.831593036651611, "learning_rate": 1.4485654452164581e-05, "loss": 0.6518, "step": 11327 }, { "epoch": 37.14098360655738, "grad_norm": 6.833871841430664, "learning_rate": 1.4484705365393314e-05, "loss": 1.111, "step": 11328 }, { "epoch": 37.144262295081965, "grad_norm": 9.011520385742188, "learning_rate": 1.448375622805233e-05, "loss": 0.8048, "step": 11329 }, { "epoch": 37.14754098360656, "grad_norm": 10.114457130432129, "learning_rate": 1.4482807040152335e-05, "loss": 0.6623, "step": 11330 }, { "epoch": 37.15081967213115, "grad_norm": 10.841802597045898, "learning_rate": 1.4481857801704027e-05, "loss": 0.595, "step": 11331 }, { "epoch": 37.15409836065574, "grad_norm": 10.683982849121094, "learning_rate": 1.4480908512718115e-05, "loss": 0.7372, "step": 11332 }, { "epoch": 37.157377049180326, "grad_norm": 7.681792736053467, "learning_rate": 1.4479959173205298e-05, "loss": 0.6476, "step": 11333 }, { "epoch": 37.16065573770492, "grad_norm": 7.893265724182129, "learning_rate": 1.4479009783176285e-05, "loss": 0.8127, "step": 11334 }, { "epoch": 37.16393442622951, "grad_norm": 14.435247421264648, "learning_rate": 1.447806034264178e-05, "loss": 0.6878, "step": 11335 }, { "epoch": 37.1672131147541, "grad_norm": 5.133462429046631, "learning_rate": 1.4477110851612488e-05, "loss": 1.0194, "step": 11336 }, { "epoch": 37.170491803278686, "grad_norm": 15.746506690979004, "learning_rate": 1.4476161310099115e-05, "loss": 0.7999, "step": 11337 }, { "epoch": 37.17377049180328, "grad_norm": 6.546868801116943, "learning_rate": 1.447521171811237e-05, "loss": 0.9932, "step": 11338 }, { "epoch": 37.17704918032787, "grad_norm": 8.688690185546875, "learning_rate": 1.4474262075662962e-05, "loss": 0.8679, "step": 11339 }, { "epoch": 37.18032786885246, "grad_norm": 7.761402606964111, "learning_rate": 1.4473312382761592e-05, "loss": 0.664, "step": 11340 }, { "epoch": 37.18360655737705, "grad_norm": 7.892730712890625, "learning_rate": 1.4472362639418978e-05, "loss": 0.7646, "step": 11341 }, { "epoch": 37.18688524590164, "grad_norm": 6.608625411987305, "learning_rate": 1.4471412845645822e-05, "loss": 0.8681, "step": 11342 }, { "epoch": 37.19016393442623, "grad_norm": 8.404888153076172, "learning_rate": 1.4470463001452841e-05, "loss": 0.752, "step": 11343 }, { "epoch": 37.19344262295082, "grad_norm": 11.404685020446777, "learning_rate": 1.446951310685074e-05, "loss": 0.7826, "step": 11344 }, { "epoch": 37.19672131147541, "grad_norm": 7.061469078063965, "learning_rate": 1.4468563161850232e-05, "loss": 0.8831, "step": 11345 }, { "epoch": 37.2, "grad_norm": 7.691799640655518, "learning_rate": 1.4467613166462024e-05, "loss": 0.8317, "step": 11346 }, { "epoch": 37.20327868852459, "grad_norm": 7.369356155395508, "learning_rate": 1.4466663120696837e-05, "loss": 0.9503, "step": 11347 }, { "epoch": 37.20655737704918, "grad_norm": 6.561379432678223, "learning_rate": 1.4465713024565378e-05, "loss": 0.627, "step": 11348 }, { "epoch": 37.20983606557377, "grad_norm": 8.465331077575684, "learning_rate": 1.446476287807836e-05, "loss": 1.0505, "step": 11349 }, { "epoch": 37.21311475409836, "grad_norm": 7.681771278381348, "learning_rate": 1.4463812681246499e-05, "loss": 0.7319, "step": 11350 }, { "epoch": 37.21639344262295, "grad_norm": 6.193016052246094, "learning_rate": 1.4462862434080509e-05, "loss": 0.8245, "step": 11351 }, { "epoch": 37.21967213114754, "grad_norm": 6.339562892913818, "learning_rate": 1.4461912136591106e-05, "loss": 0.7286, "step": 11352 }, { "epoch": 37.22295081967213, "grad_norm": 7.014449119567871, "learning_rate": 1.4460961788789003e-05, "loss": 0.881, "step": 11353 }, { "epoch": 37.226229508196724, "grad_norm": 7.792283535003662, "learning_rate": 1.4460011390684918e-05, "loss": 0.76, "step": 11354 }, { "epoch": 37.22950819672131, "grad_norm": 7.247941493988037, "learning_rate": 1.4459060942289567e-05, "loss": 0.6551, "step": 11355 }, { "epoch": 37.2327868852459, "grad_norm": 7.790025234222412, "learning_rate": 1.445811044361367e-05, "loss": 0.8611, "step": 11356 }, { "epoch": 37.23606557377049, "grad_norm": 8.978407859802246, "learning_rate": 1.4457159894667938e-05, "loss": 1.0511, "step": 11357 }, { "epoch": 37.239344262295084, "grad_norm": 10.41557502746582, "learning_rate": 1.44562092954631e-05, "loss": 0.8423, "step": 11358 }, { "epoch": 37.24262295081967, "grad_norm": 6.916234016418457, "learning_rate": 1.4455258646009864e-05, "loss": 0.701, "step": 11359 }, { "epoch": 37.24590163934426, "grad_norm": 6.777075290679932, "learning_rate": 1.4454307946318955e-05, "loss": 0.667, "step": 11360 }, { "epoch": 37.24918032786885, "grad_norm": 6.481141567230225, "learning_rate": 1.4453357196401096e-05, "loss": 0.7739, "step": 11361 }, { "epoch": 37.252459016393445, "grad_norm": 15.806417465209961, "learning_rate": 1.4452406396267e-05, "loss": 1.0876, "step": 11362 }, { "epoch": 37.25573770491803, "grad_norm": 6.488063812255859, "learning_rate": 1.4451455545927396e-05, "loss": 1.0332, "step": 11363 }, { "epoch": 37.25901639344262, "grad_norm": 8.514113426208496, "learning_rate": 1.4450504645393e-05, "loss": 0.9468, "step": 11364 }, { "epoch": 37.26229508196721, "grad_norm": 7.346636772155762, "learning_rate": 1.4449553694674539e-05, "loss": 0.57, "step": 11365 }, { "epoch": 37.265573770491805, "grad_norm": 15.466022491455078, "learning_rate": 1.444860269378273e-05, "loss": 0.7101, "step": 11366 }, { "epoch": 37.268852459016394, "grad_norm": 6.997800350189209, "learning_rate": 1.4447651642728304e-05, "loss": 1.0104, "step": 11367 }, { "epoch": 37.27213114754098, "grad_norm": 7.243351459503174, "learning_rate": 1.4446700541521978e-05, "loss": 0.6919, "step": 11368 }, { "epoch": 37.27540983606557, "grad_norm": 6.956790447235107, "learning_rate": 1.4445749390174484e-05, "loss": 0.748, "step": 11369 }, { "epoch": 37.278688524590166, "grad_norm": 33.622928619384766, "learning_rate": 1.4444798188696539e-05, "loss": 0.7373, "step": 11370 }, { "epoch": 37.281967213114754, "grad_norm": 6.843276500701904, "learning_rate": 1.4443846937098875e-05, "loss": 0.9693, "step": 11371 }, { "epoch": 37.28524590163934, "grad_norm": 7.286499977111816, "learning_rate": 1.4442895635392216e-05, "loss": 0.5905, "step": 11372 }, { "epoch": 37.28852459016394, "grad_norm": 14.154009819030762, "learning_rate": 1.444194428358729e-05, "loss": 0.7749, "step": 11373 }, { "epoch": 37.291803278688526, "grad_norm": 7.208313941955566, "learning_rate": 1.4440992881694822e-05, "loss": 0.6327, "step": 11374 }, { "epoch": 37.295081967213115, "grad_norm": 7.635448455810547, "learning_rate": 1.4440041429725545e-05, "loss": 0.8353, "step": 11375 }, { "epoch": 37.2983606557377, "grad_norm": 8.258026123046875, "learning_rate": 1.4439089927690183e-05, "loss": 0.822, "step": 11376 }, { "epoch": 37.3016393442623, "grad_norm": 7.451219081878662, "learning_rate": 1.4438138375599465e-05, "loss": 0.7667, "step": 11377 }, { "epoch": 37.30491803278689, "grad_norm": 12.959503173828125, "learning_rate": 1.4437186773464122e-05, "loss": 0.9152, "step": 11378 }, { "epoch": 37.308196721311475, "grad_norm": 9.128085136413574, "learning_rate": 1.4436235121294886e-05, "loss": 1.1851, "step": 11379 }, { "epoch": 37.31147540983606, "grad_norm": 8.01034164428711, "learning_rate": 1.4435283419102487e-05, "loss": 0.8729, "step": 11380 }, { "epoch": 37.31475409836066, "grad_norm": 6.643662929534912, "learning_rate": 1.4434331666897654e-05, "loss": 0.8413, "step": 11381 }, { "epoch": 37.31803278688525, "grad_norm": 12.827290534973145, "learning_rate": 1.4433379864691123e-05, "loss": 0.7041, "step": 11382 }, { "epoch": 37.321311475409836, "grad_norm": 6.816972255706787, "learning_rate": 1.4432428012493622e-05, "loss": 0.7699, "step": 11383 }, { "epoch": 37.324590163934424, "grad_norm": 6.3607072830200195, "learning_rate": 1.4431476110315892e-05, "loss": 0.7164, "step": 11384 }, { "epoch": 37.32786885245902, "grad_norm": 20.0880126953125, "learning_rate": 1.4430524158168655e-05, "loss": 0.8303, "step": 11385 }, { "epoch": 37.33114754098361, "grad_norm": 6.817434787750244, "learning_rate": 1.4429572156062654e-05, "loss": 0.8486, "step": 11386 }, { "epoch": 37.334426229508196, "grad_norm": 7.305306911468506, "learning_rate": 1.4428620104008623e-05, "loss": 1.0426, "step": 11387 }, { "epoch": 37.337704918032784, "grad_norm": 8.348531723022461, "learning_rate": 1.4427668002017294e-05, "loss": 0.7325, "step": 11388 }, { "epoch": 37.34098360655738, "grad_norm": 8.31180477142334, "learning_rate": 1.4426715850099404e-05, "loss": 0.8306, "step": 11389 }, { "epoch": 37.34426229508197, "grad_norm": 10.766221046447754, "learning_rate": 1.442576364826569e-05, "loss": 0.6904, "step": 11390 }, { "epoch": 37.34754098360656, "grad_norm": 6.875072479248047, "learning_rate": 1.4424811396526892e-05, "loss": 0.9, "step": 11391 }, { "epoch": 37.350819672131145, "grad_norm": 7.101551532745361, "learning_rate": 1.4423859094893744e-05, "loss": 0.8912, "step": 11392 }, { "epoch": 37.35409836065574, "grad_norm": 5.767651081085205, "learning_rate": 1.4422906743376984e-05, "loss": 0.8076, "step": 11393 }, { "epoch": 37.35737704918033, "grad_norm": 7.554713726043701, "learning_rate": 1.4421954341987351e-05, "loss": 0.816, "step": 11394 }, { "epoch": 37.36065573770492, "grad_norm": 7.055295944213867, "learning_rate": 1.4421001890735586e-05, "loss": 1.1332, "step": 11395 }, { "epoch": 37.363934426229505, "grad_norm": 7.136486053466797, "learning_rate": 1.4420049389632426e-05, "loss": 0.7314, "step": 11396 }, { "epoch": 37.3672131147541, "grad_norm": 8.26211166381836, "learning_rate": 1.4419096838688617e-05, "loss": 0.5917, "step": 11397 }, { "epoch": 37.37049180327869, "grad_norm": 8.83993148803711, "learning_rate": 1.4418144237914896e-05, "loss": 1.0638, "step": 11398 }, { "epoch": 37.37377049180328, "grad_norm": 8.24224853515625, "learning_rate": 1.4417191587322004e-05, "loss": 0.8905, "step": 11399 }, { "epoch": 37.377049180327866, "grad_norm": 7.242853164672852, "learning_rate": 1.4416238886920684e-05, "loss": 0.6655, "step": 11400 }, { "epoch": 37.38032786885246, "grad_norm": 7.0886054039001465, "learning_rate": 1.4415286136721678e-05, "loss": 0.6135, "step": 11401 }, { "epoch": 37.38360655737705, "grad_norm": 9.95383358001709, "learning_rate": 1.4414333336735731e-05, "loss": 0.9071, "step": 11402 }, { "epoch": 37.38688524590164, "grad_norm": 5.394582748413086, "learning_rate": 1.4413380486973585e-05, "loss": 0.738, "step": 11403 }, { "epoch": 37.390163934426226, "grad_norm": 6.740528583526611, "learning_rate": 1.4412427587445987e-05, "loss": 0.7982, "step": 11404 }, { "epoch": 37.39344262295082, "grad_norm": 7.364223957061768, "learning_rate": 1.4411474638163675e-05, "loss": 0.6712, "step": 11405 }, { "epoch": 37.39672131147541, "grad_norm": 6.055296897888184, "learning_rate": 1.4410521639137407e-05, "loss": 1.0311, "step": 11406 }, { "epoch": 37.4, "grad_norm": 7.225000858306885, "learning_rate": 1.4409568590377918e-05, "loss": 0.8245, "step": 11407 }, { "epoch": 37.40327868852459, "grad_norm": 15.487188339233398, "learning_rate": 1.4408615491895958e-05, "loss": 0.7465, "step": 11408 }, { "epoch": 37.40655737704918, "grad_norm": 11.439262390136719, "learning_rate": 1.4407662343702275e-05, "loss": 0.9218, "step": 11409 }, { "epoch": 37.40983606557377, "grad_norm": 10.903196334838867, "learning_rate": 1.4406709145807618e-05, "loss": 0.7818, "step": 11410 }, { "epoch": 37.41311475409836, "grad_norm": 60.505741119384766, "learning_rate": 1.4405755898222733e-05, "loss": 0.7492, "step": 11411 }, { "epoch": 37.41639344262295, "grad_norm": 21.062700271606445, "learning_rate": 1.440480260095837e-05, "loss": 0.9792, "step": 11412 }, { "epoch": 37.41967213114754, "grad_norm": 8.31350326538086, "learning_rate": 1.4403849254025276e-05, "loss": 1.1265, "step": 11413 }, { "epoch": 37.42295081967213, "grad_norm": 7.423435688018799, "learning_rate": 1.4402895857434202e-05, "loss": 0.8214, "step": 11414 }, { "epoch": 37.42622950819672, "grad_norm": 7.849367618560791, "learning_rate": 1.4401942411195902e-05, "loss": 1.0116, "step": 11415 }, { "epoch": 37.429508196721315, "grad_norm": 9.745944023132324, "learning_rate": 1.4400988915321122e-05, "loss": 0.8525, "step": 11416 }, { "epoch": 37.4327868852459, "grad_norm": 8.470050811767578, "learning_rate": 1.4400035369820617e-05, "loss": 0.9924, "step": 11417 }, { "epoch": 37.43606557377049, "grad_norm": 9.008520126342773, "learning_rate": 1.439908177470514e-05, "loss": 0.9011, "step": 11418 }, { "epoch": 37.43934426229508, "grad_norm": 11.120476722717285, "learning_rate": 1.439812812998544e-05, "loss": 0.7885, "step": 11419 }, { "epoch": 37.442622950819676, "grad_norm": 7.696539878845215, "learning_rate": 1.4397174435672273e-05, "loss": 1.0347, "step": 11420 }, { "epoch": 37.445901639344264, "grad_norm": 8.771928787231445, "learning_rate": 1.4396220691776393e-05, "loss": 0.6478, "step": 11421 }, { "epoch": 37.44918032786885, "grad_norm": 7.568858623504639, "learning_rate": 1.439526689830855e-05, "loss": 0.7834, "step": 11422 }, { "epoch": 37.45245901639344, "grad_norm": 8.888193130493164, "learning_rate": 1.4394313055279507e-05, "loss": 1.0109, "step": 11423 }, { "epoch": 37.455737704918036, "grad_norm": 8.425475120544434, "learning_rate": 1.4393359162700014e-05, "loss": 0.7303, "step": 11424 }, { "epoch": 37.459016393442624, "grad_norm": 16.30586814880371, "learning_rate": 1.439240522058083e-05, "loss": 0.9484, "step": 11425 }, { "epoch": 37.46229508196721, "grad_norm": 7.465442180633545, "learning_rate": 1.439145122893271e-05, "loss": 0.6173, "step": 11426 }, { "epoch": 37.4655737704918, "grad_norm": 7.889735698699951, "learning_rate": 1.4390497187766409e-05, "loss": 0.9139, "step": 11427 }, { "epoch": 37.4688524590164, "grad_norm": 5.928305625915527, "learning_rate": 1.4389543097092689e-05, "loss": 0.6938, "step": 11428 }, { "epoch": 37.472131147540985, "grad_norm": 7.620213031768799, "learning_rate": 1.4388588956922304e-05, "loss": 0.7158, "step": 11429 }, { "epoch": 37.47540983606557, "grad_norm": 8.252348899841309, "learning_rate": 1.4387634767266019e-05, "loss": 0.8898, "step": 11430 }, { "epoch": 37.47868852459016, "grad_norm": 22.67323875427246, "learning_rate": 1.4386680528134586e-05, "loss": 0.7619, "step": 11431 }, { "epoch": 37.48196721311476, "grad_norm": 12.51258659362793, "learning_rate": 1.4385726239538772e-05, "loss": 0.8992, "step": 11432 }, { "epoch": 37.485245901639345, "grad_norm": 6.704048156738281, "learning_rate": 1.4384771901489336e-05, "loss": 0.9188, "step": 11433 }, { "epoch": 37.488524590163934, "grad_norm": 7.273578643798828, "learning_rate": 1.4383817513997035e-05, "loss": 0.7336, "step": 11434 }, { "epoch": 37.49180327868852, "grad_norm": 8.176192283630371, "learning_rate": 1.4382863077072635e-05, "loss": 0.6945, "step": 11435 }, { "epoch": 37.49508196721312, "grad_norm": 12.968870162963867, "learning_rate": 1.4381908590726897e-05, "loss": 0.6871, "step": 11436 }, { "epoch": 37.498360655737706, "grad_norm": 9.351551055908203, "learning_rate": 1.4380954054970584e-05, "loss": 0.8217, "step": 11437 }, { "epoch": 37.501639344262294, "grad_norm": 8.897361755371094, "learning_rate": 1.4379999469814458e-05, "loss": 0.973, "step": 11438 }, { "epoch": 37.50491803278688, "grad_norm": 13.113380432128906, "learning_rate": 1.4379044835269285e-05, "loss": 0.945, "step": 11439 }, { "epoch": 37.50819672131148, "grad_norm": 7.258513450622559, "learning_rate": 1.4378090151345827e-05, "loss": 1.1492, "step": 11440 }, { "epoch": 37.511475409836066, "grad_norm": 16.42208480834961, "learning_rate": 1.437713541805485e-05, "loss": 0.7045, "step": 11441 }, { "epoch": 37.514754098360655, "grad_norm": 7.230286121368408, "learning_rate": 1.437618063540712e-05, "loss": 0.8632, "step": 11442 }, { "epoch": 37.51803278688524, "grad_norm": 8.9188232421875, "learning_rate": 1.4375225803413406e-05, "loss": 0.8647, "step": 11443 }, { "epoch": 37.52131147540984, "grad_norm": 8.044244766235352, "learning_rate": 1.4374270922084468e-05, "loss": 0.6383, "step": 11444 }, { "epoch": 37.52459016393443, "grad_norm": 7.3296918869018555, "learning_rate": 1.437331599143108e-05, "loss": 1.007, "step": 11445 }, { "epoch": 37.527868852459015, "grad_norm": 12.001615524291992, "learning_rate": 1.4372361011464006e-05, "loss": 0.8072, "step": 11446 }, { "epoch": 37.5311475409836, "grad_norm": 12.459370613098145, "learning_rate": 1.4371405982194018e-05, "loss": 0.6353, "step": 11447 }, { "epoch": 37.5344262295082, "grad_norm": 8.089088439941406, "learning_rate": 1.4370450903631879e-05, "loss": 0.9366, "step": 11448 }, { "epoch": 37.53770491803279, "grad_norm": 9.547127723693848, "learning_rate": 1.4369495775788364e-05, "loss": 1.0071, "step": 11449 }, { "epoch": 37.540983606557376, "grad_norm": 6.677706718444824, "learning_rate": 1.4368540598674238e-05, "loss": 1.0706, "step": 11450 }, { "epoch": 37.544262295081964, "grad_norm": 8.666656494140625, "learning_rate": 1.436758537230028e-05, "loss": 1.0204, "step": 11451 }, { "epoch": 37.54754098360656, "grad_norm": 7.06594705581665, "learning_rate": 1.4366630096677252e-05, "loss": 0.8854, "step": 11452 }, { "epoch": 37.55081967213115, "grad_norm": 10.889479637145996, "learning_rate": 1.436567477181593e-05, "loss": 0.7169, "step": 11453 }, { "epoch": 37.554098360655736, "grad_norm": 7.3339524269104, "learning_rate": 1.4364719397727085e-05, "loss": 0.774, "step": 11454 }, { "epoch": 37.557377049180324, "grad_norm": 11.270181655883789, "learning_rate": 1.4363763974421492e-05, "loss": 0.9061, "step": 11455 }, { "epoch": 37.56065573770492, "grad_norm": 10.239025115966797, "learning_rate": 1.4362808501909921e-05, "loss": 0.8774, "step": 11456 }, { "epoch": 37.56393442622951, "grad_norm": 6.193477630615234, "learning_rate": 1.4361852980203153e-05, "loss": 0.775, "step": 11457 }, { "epoch": 37.5672131147541, "grad_norm": 7.37501335144043, "learning_rate": 1.4360897409311951e-05, "loss": 0.8335, "step": 11458 }, { "epoch": 37.570491803278685, "grad_norm": 6.965292453765869, "learning_rate": 1.4359941789247098e-05, "loss": 0.5495, "step": 11459 }, { "epoch": 37.57377049180328, "grad_norm": 7.051629066467285, "learning_rate": 1.4358986120019372e-05, "loss": 0.8693, "step": 11460 }, { "epoch": 37.57704918032787, "grad_norm": 8.245072364807129, "learning_rate": 1.4358030401639543e-05, "loss": 0.8596, "step": 11461 }, { "epoch": 37.58032786885246, "grad_norm": 8.208232879638672, "learning_rate": 1.4357074634118391e-05, "loss": 0.9582, "step": 11462 }, { "epoch": 37.58360655737705, "grad_norm": 6.973901748657227, "learning_rate": 1.4356118817466689e-05, "loss": 0.947, "step": 11463 }, { "epoch": 37.58688524590164, "grad_norm": 21.585590362548828, "learning_rate": 1.435516295169522e-05, "loss": 0.8947, "step": 11464 }, { "epoch": 37.59016393442623, "grad_norm": 6.649355411529541, "learning_rate": 1.4354207036814764e-05, "loss": 0.9136, "step": 11465 }, { "epoch": 37.59344262295082, "grad_norm": 8.66114330291748, "learning_rate": 1.4353251072836092e-05, "loss": 0.6823, "step": 11466 }, { "epoch": 37.59672131147541, "grad_norm": 8.199350357055664, "learning_rate": 1.435229505976999e-05, "loss": 0.8029, "step": 11467 }, { "epoch": 37.6, "grad_norm": 6.8508148193359375, "learning_rate": 1.4351338997627233e-05, "loss": 0.9061, "step": 11468 }, { "epoch": 37.60327868852459, "grad_norm": 11.817927360534668, "learning_rate": 1.4350382886418609e-05, "loss": 0.8475, "step": 11469 }, { "epoch": 37.60655737704918, "grad_norm": 5.858245849609375, "learning_rate": 1.4349426726154891e-05, "loss": 1.0487, "step": 11470 }, { "epoch": 37.609836065573774, "grad_norm": 9.666229248046875, "learning_rate": 1.4348470516846866e-05, "loss": 0.8938, "step": 11471 }, { "epoch": 37.61311475409836, "grad_norm": 8.127416610717773, "learning_rate": 1.4347514258505314e-05, "loss": 0.9267, "step": 11472 }, { "epoch": 37.61639344262295, "grad_norm": 8.305288314819336, "learning_rate": 1.434655795114102e-05, "loss": 0.8864, "step": 11473 }, { "epoch": 37.61967213114754, "grad_norm": 8.366690635681152, "learning_rate": 1.4345601594764765e-05, "loss": 0.7498, "step": 11474 }, { "epoch": 37.622950819672134, "grad_norm": 6.974803924560547, "learning_rate": 1.4344645189387332e-05, "loss": 0.8796, "step": 11475 }, { "epoch": 37.62622950819672, "grad_norm": 7.491985321044922, "learning_rate": 1.4343688735019508e-05, "loss": 0.8614, "step": 11476 }, { "epoch": 37.62950819672131, "grad_norm": 8.326180458068848, "learning_rate": 1.4342732231672078e-05, "loss": 0.9043, "step": 11477 }, { "epoch": 37.6327868852459, "grad_norm": 7.89654541015625, "learning_rate": 1.4341775679355827e-05, "loss": 0.7048, "step": 11478 }, { "epoch": 37.636065573770495, "grad_norm": 6.156245231628418, "learning_rate": 1.434081907808154e-05, "loss": 0.8434, "step": 11479 }, { "epoch": 37.63934426229508, "grad_norm": 9.013118743896484, "learning_rate": 1.4339862427860007e-05, "loss": 0.8608, "step": 11480 }, { "epoch": 37.64262295081967, "grad_norm": 6.74770975112915, "learning_rate": 1.433890572870201e-05, "loss": 0.9791, "step": 11481 }, { "epoch": 37.64590163934426, "grad_norm": 6.435152053833008, "learning_rate": 1.4337948980618339e-05, "loss": 0.7557, "step": 11482 }, { "epoch": 37.649180327868855, "grad_norm": 8.107579231262207, "learning_rate": 1.4336992183619785e-05, "loss": 0.7078, "step": 11483 }, { "epoch": 37.65245901639344, "grad_norm": 7.001088619232178, "learning_rate": 1.4336035337717134e-05, "loss": 0.7073, "step": 11484 }, { "epoch": 37.65573770491803, "grad_norm": 7.721796989440918, "learning_rate": 1.4335078442921176e-05, "loss": 1.0042, "step": 11485 }, { "epoch": 37.65901639344262, "grad_norm": 9.14535903930664, "learning_rate": 1.4334121499242701e-05, "loss": 1.0067, "step": 11486 }, { "epoch": 37.662295081967216, "grad_norm": 10.90992546081543, "learning_rate": 1.4333164506692499e-05, "loss": 0.7225, "step": 11487 }, { "epoch": 37.665573770491804, "grad_norm": 7.180770397186279, "learning_rate": 1.4332207465281365e-05, "loss": 0.7421, "step": 11488 }, { "epoch": 37.66885245901639, "grad_norm": 6.924752235412598, "learning_rate": 1.4331250375020083e-05, "loss": 0.7248, "step": 11489 }, { "epoch": 37.67213114754098, "grad_norm": 6.091207981109619, "learning_rate": 1.4330293235919455e-05, "loss": 0.9383, "step": 11490 }, { "epoch": 37.675409836065576, "grad_norm": 7.981006622314453, "learning_rate": 1.4329336047990265e-05, "loss": 0.7162, "step": 11491 }, { "epoch": 37.678688524590164, "grad_norm": 9.094135284423828, "learning_rate": 1.4328378811243311e-05, "loss": 0.9957, "step": 11492 }, { "epoch": 37.68196721311475, "grad_norm": 6.640871047973633, "learning_rate": 1.4327421525689384e-05, "loss": 0.798, "step": 11493 }, { "epoch": 37.68524590163934, "grad_norm": 6.861595153808594, "learning_rate": 1.432646419133928e-05, "loss": 0.7468, "step": 11494 }, { "epoch": 37.68852459016394, "grad_norm": 8.74648380279541, "learning_rate": 1.4325506808203795e-05, "loss": 0.6647, "step": 11495 }, { "epoch": 37.691803278688525, "grad_norm": 7.222250938415527, "learning_rate": 1.4324549376293723e-05, "loss": 0.9387, "step": 11496 }, { "epoch": 37.69508196721311, "grad_norm": 6.8185505867004395, "learning_rate": 1.4323591895619862e-05, "loss": 1.0298, "step": 11497 }, { "epoch": 37.6983606557377, "grad_norm": 10.222079277038574, "learning_rate": 1.4322634366193003e-05, "loss": 1.0202, "step": 11498 }, { "epoch": 37.7016393442623, "grad_norm": 9.85888385772705, "learning_rate": 1.4321676788023951e-05, "loss": 0.6658, "step": 11499 }, { "epoch": 37.704918032786885, "grad_norm": 8.330544471740723, "learning_rate": 1.4320719161123497e-05, "loss": 0.7737, "step": 11500 }, { "epoch": 37.708196721311474, "grad_norm": 14.934789657592773, "learning_rate": 1.4319761485502444e-05, "loss": 0.8156, "step": 11501 }, { "epoch": 37.71147540983607, "grad_norm": 8.890684127807617, "learning_rate": 1.4318803761171586e-05, "loss": 0.7252, "step": 11502 }, { "epoch": 37.71475409836066, "grad_norm": 9.835841178894043, "learning_rate": 1.431784598814173e-05, "loss": 0.7429, "step": 11503 }, { "epoch": 37.718032786885246, "grad_norm": 7.196121692657471, "learning_rate": 1.4316888166423669e-05, "loss": 0.7731, "step": 11504 }, { "epoch": 37.721311475409834, "grad_norm": 6.821866512298584, "learning_rate": 1.4315930296028201e-05, "loss": 0.7631, "step": 11505 }, { "epoch": 37.72459016393443, "grad_norm": 8.596697807312012, "learning_rate": 1.431497237696614e-05, "loss": 1.1386, "step": 11506 }, { "epoch": 37.72786885245902, "grad_norm": 9.779561996459961, "learning_rate": 1.4314014409248272e-05, "loss": 0.6287, "step": 11507 }, { "epoch": 37.731147540983606, "grad_norm": 6.360631465911865, "learning_rate": 1.4313056392885409e-05, "loss": 0.8331, "step": 11508 }, { "epoch": 37.734426229508195, "grad_norm": 9.327547073364258, "learning_rate": 1.4312098327888349e-05, "loss": 1.0757, "step": 11509 }, { "epoch": 37.73770491803279, "grad_norm": 11.055573463439941, "learning_rate": 1.4311140214267896e-05, "loss": 0.8022, "step": 11510 }, { "epoch": 37.74098360655738, "grad_norm": 10.85173511505127, "learning_rate": 1.4310182052034858e-05, "loss": 0.739, "step": 11511 }, { "epoch": 37.74426229508197, "grad_norm": 8.276712417602539, "learning_rate": 1.4309223841200032e-05, "loss": 0.8062, "step": 11512 }, { "epoch": 37.747540983606555, "grad_norm": 8.51102352142334, "learning_rate": 1.4308265581774228e-05, "loss": 0.89, "step": 11513 }, { "epoch": 37.75081967213115, "grad_norm": 9.129740715026855, "learning_rate": 1.4307307273768252e-05, "loss": 0.7727, "step": 11514 }, { "epoch": 37.75409836065574, "grad_norm": 7.7123589515686035, "learning_rate": 1.4306348917192907e-05, "loss": 0.755, "step": 11515 }, { "epoch": 37.75737704918033, "grad_norm": 14.306599617004395, "learning_rate": 1.4305390512059e-05, "loss": 1.0256, "step": 11516 }, { "epoch": 37.760655737704916, "grad_norm": 8.190909385681152, "learning_rate": 1.4304432058377342e-05, "loss": 0.8365, "step": 11517 }, { "epoch": 37.76393442622951, "grad_norm": 12.418959617614746, "learning_rate": 1.4303473556158734e-05, "loss": 0.6972, "step": 11518 }, { "epoch": 37.7672131147541, "grad_norm": 9.899160385131836, "learning_rate": 1.4302515005413989e-05, "loss": 0.7673, "step": 11519 }, { "epoch": 37.77049180327869, "grad_norm": 10.239486694335938, "learning_rate": 1.430155640615391e-05, "loss": 0.8498, "step": 11520 }, { "epoch": 37.773770491803276, "grad_norm": 8.646688461303711, "learning_rate": 1.4300597758389315e-05, "loss": 0.8008, "step": 11521 }, { "epoch": 37.77704918032787, "grad_norm": 11.630107879638672, "learning_rate": 1.4299639062131003e-05, "loss": 0.8209, "step": 11522 }, { "epoch": 37.78032786885246, "grad_norm": 7.481750965118408, "learning_rate": 1.4298680317389794e-05, "loss": 0.7206, "step": 11523 }, { "epoch": 37.78360655737705, "grad_norm": 7.9633331298828125, "learning_rate": 1.4297721524176493e-05, "loss": 1.0544, "step": 11524 }, { "epoch": 37.78688524590164, "grad_norm": 7.297097206115723, "learning_rate": 1.4296762682501914e-05, "loss": 0.9004, "step": 11525 }, { "epoch": 37.79016393442623, "grad_norm": 8.167828559875488, "learning_rate": 1.4295803792376868e-05, "loss": 0.8999, "step": 11526 }, { "epoch": 37.79344262295082, "grad_norm": 8.750019073486328, "learning_rate": 1.4294844853812171e-05, "loss": 1.1661, "step": 11527 }, { "epoch": 37.79672131147541, "grad_norm": 8.74988079071045, "learning_rate": 1.429388586681863e-05, "loss": 0.7334, "step": 11528 }, { "epoch": 37.8, "grad_norm": 7.830422401428223, "learning_rate": 1.429292683140706e-05, "loss": 1.0002, "step": 11529 }, { "epoch": 37.80327868852459, "grad_norm": 7.602424144744873, "learning_rate": 1.429196774758828e-05, "loss": 0.7491, "step": 11530 }, { "epoch": 37.80655737704918, "grad_norm": 6.017706871032715, "learning_rate": 1.4291008615373096e-05, "loss": 1.0074, "step": 11531 }, { "epoch": 37.80983606557377, "grad_norm": 6.174261569976807, "learning_rate": 1.4290049434772333e-05, "loss": 0.8655, "step": 11532 }, { "epoch": 37.81311475409836, "grad_norm": 7.015204429626465, "learning_rate": 1.4289090205796798e-05, "loss": 0.6329, "step": 11533 }, { "epoch": 37.81639344262295, "grad_norm": 7.259850025177002, "learning_rate": 1.4288130928457315e-05, "loss": 0.9081, "step": 11534 }, { "epoch": 37.81967213114754, "grad_norm": 7.589026927947998, "learning_rate": 1.4287171602764695e-05, "loss": 0.6911, "step": 11535 }, { "epoch": 37.82295081967213, "grad_norm": 6.346966743469238, "learning_rate": 1.4286212228729758e-05, "loss": 0.6768, "step": 11536 }, { "epoch": 37.82622950819672, "grad_norm": 8.592220306396484, "learning_rate": 1.4285252806363323e-05, "loss": 0.9398, "step": 11537 }, { "epoch": 37.829508196721314, "grad_norm": 7.249444007873535, "learning_rate": 1.4284293335676207e-05, "loss": 0.8088, "step": 11538 }, { "epoch": 37.8327868852459, "grad_norm": 6.789982318878174, "learning_rate": 1.4283333816679228e-05, "loss": 0.9652, "step": 11539 }, { "epoch": 37.83606557377049, "grad_norm": 7.514962673187256, "learning_rate": 1.4282374249383206e-05, "loss": 0.8325, "step": 11540 }, { "epoch": 37.83934426229508, "grad_norm": 14.75890064239502, "learning_rate": 1.4281414633798964e-05, "loss": 0.7555, "step": 11541 }, { "epoch": 37.842622950819674, "grad_norm": 6.541046142578125, "learning_rate": 1.4280454969937319e-05, "loss": 0.8649, "step": 11542 }, { "epoch": 37.84590163934426, "grad_norm": 11.137590408325195, "learning_rate": 1.4279495257809095e-05, "loss": 0.7778, "step": 11543 }, { "epoch": 37.84918032786885, "grad_norm": 14.647848129272461, "learning_rate": 1.427853549742511e-05, "loss": 1.0955, "step": 11544 }, { "epoch": 37.85245901639344, "grad_norm": 8.309566497802734, "learning_rate": 1.427757568879619e-05, "loss": 0.6889, "step": 11545 }, { "epoch": 37.855737704918035, "grad_norm": 7.6768670082092285, "learning_rate": 1.4276615831933156e-05, "loss": 0.6989, "step": 11546 }, { "epoch": 37.85901639344262, "grad_norm": 5.636090278625488, "learning_rate": 1.4275655926846834e-05, "loss": 0.9174, "step": 11547 }, { "epoch": 37.86229508196721, "grad_norm": 8.62482738494873, "learning_rate": 1.4274695973548043e-05, "loss": 0.6565, "step": 11548 }, { "epoch": 37.86557377049181, "grad_norm": 7.047860622406006, "learning_rate": 1.4273735972047613e-05, "loss": 0.6273, "step": 11549 }, { "epoch": 37.868852459016395, "grad_norm": 7.554318428039551, "learning_rate": 1.4272775922356363e-05, "loss": 0.6521, "step": 11550 }, { "epoch": 37.87213114754098, "grad_norm": 11.594684600830078, "learning_rate": 1.4271815824485127e-05, "loss": 0.809, "step": 11551 }, { "epoch": 37.87540983606557, "grad_norm": 7.664538383483887, "learning_rate": 1.427085567844472e-05, "loss": 1.0139, "step": 11552 }, { "epoch": 37.87868852459017, "grad_norm": 7.235647678375244, "learning_rate": 1.426989548424598e-05, "loss": 0.8432, "step": 11553 }, { "epoch": 37.881967213114756, "grad_norm": 7.843686103820801, "learning_rate": 1.4268935241899727e-05, "loss": 1.0439, "step": 11554 }, { "epoch": 37.885245901639344, "grad_norm": 10.393720626831055, "learning_rate": 1.426797495141679e-05, "loss": 0.8191, "step": 11555 }, { "epoch": 37.88852459016393, "grad_norm": 7.812302589416504, "learning_rate": 1.4267014612807997e-05, "loss": 0.627, "step": 11556 }, { "epoch": 37.89180327868853, "grad_norm": 8.188437461853027, "learning_rate": 1.4266054226084178e-05, "loss": 0.7773, "step": 11557 }, { "epoch": 37.895081967213116, "grad_norm": 6.7284698486328125, "learning_rate": 1.4265093791256164e-05, "loss": 0.9041, "step": 11558 }, { "epoch": 37.898360655737704, "grad_norm": 9.446938514709473, "learning_rate": 1.426413330833478e-05, "loss": 0.761, "step": 11559 }, { "epoch": 37.90163934426229, "grad_norm": 10.799469947814941, "learning_rate": 1.4263172777330861e-05, "loss": 0.5163, "step": 11560 }, { "epoch": 37.90491803278689, "grad_norm": 103.20819854736328, "learning_rate": 1.4262212198255236e-05, "loss": 0.8928, "step": 11561 }, { "epoch": 37.90819672131148, "grad_norm": 6.709503650665283, "learning_rate": 1.4261251571118737e-05, "loss": 0.9265, "step": 11562 }, { "epoch": 37.911475409836065, "grad_norm": 7.995664596557617, "learning_rate": 1.4260290895932196e-05, "loss": 0.8612, "step": 11563 }, { "epoch": 37.91475409836065, "grad_norm": 6.727102279663086, "learning_rate": 1.4259330172706443e-05, "loss": 0.8475, "step": 11564 }, { "epoch": 37.91803278688525, "grad_norm": 7.562857151031494, "learning_rate": 1.4258369401452318e-05, "loss": 0.7755, "step": 11565 }, { "epoch": 37.92131147540984, "grad_norm": 8.4026460647583, "learning_rate": 1.4257408582180648e-05, "loss": 0.9612, "step": 11566 }, { "epoch": 37.924590163934425, "grad_norm": 10.770997047424316, "learning_rate": 1.425644771490227e-05, "loss": 0.5909, "step": 11567 }, { "epoch": 37.927868852459014, "grad_norm": 7.752292633056641, "learning_rate": 1.4255486799628018e-05, "loss": 0.8413, "step": 11568 }, { "epoch": 37.93114754098361, "grad_norm": 13.735268592834473, "learning_rate": 1.4254525836368732e-05, "loss": 1.0382, "step": 11569 }, { "epoch": 37.9344262295082, "grad_norm": 7.873308181762695, "learning_rate": 1.4253564825135238e-05, "loss": 0.7949, "step": 11570 }, { "epoch": 37.937704918032786, "grad_norm": 7.846304416656494, "learning_rate": 1.425260376593838e-05, "loss": 0.6327, "step": 11571 }, { "epoch": 37.940983606557374, "grad_norm": 7.071199417114258, "learning_rate": 1.4251642658788991e-05, "loss": 1.0128, "step": 11572 }, { "epoch": 37.94426229508197, "grad_norm": 8.381333351135254, "learning_rate": 1.4250681503697914e-05, "loss": 1.0472, "step": 11573 }, { "epoch": 37.94754098360656, "grad_norm": 9.342321395874023, "learning_rate": 1.4249720300675983e-05, "loss": 0.7682, "step": 11574 }, { "epoch": 37.950819672131146, "grad_norm": 11.227930068969727, "learning_rate": 1.4248759049734035e-05, "loss": 0.9159, "step": 11575 }, { "epoch": 37.954098360655735, "grad_norm": 11.672530174255371, "learning_rate": 1.424779775088291e-05, "loss": 0.7158, "step": 11576 }, { "epoch": 37.95737704918033, "grad_norm": 20.394840240478516, "learning_rate": 1.4246836404133453e-05, "loss": 0.9781, "step": 11577 }, { "epoch": 37.96065573770492, "grad_norm": 9.879027366638184, "learning_rate": 1.4245875009496497e-05, "loss": 0.711, "step": 11578 }, { "epoch": 37.96393442622951, "grad_norm": 8.460948944091797, "learning_rate": 1.4244913566982886e-05, "loss": 0.8465, "step": 11579 }, { "epoch": 37.967213114754095, "grad_norm": 7.918420791625977, "learning_rate": 1.4243952076603462e-05, "loss": 0.8407, "step": 11580 }, { "epoch": 37.97049180327869, "grad_norm": 8.4445219039917, "learning_rate": 1.4242990538369066e-05, "loss": 0.729, "step": 11581 }, { "epoch": 37.97377049180328, "grad_norm": 8.470481872558594, "learning_rate": 1.4242028952290538e-05, "loss": 0.8105, "step": 11582 }, { "epoch": 37.97704918032787, "grad_norm": 7.123018264770508, "learning_rate": 1.4241067318378722e-05, "loss": 0.8472, "step": 11583 }, { "epoch": 37.980327868852456, "grad_norm": 7.538913249969482, "learning_rate": 1.4240105636644468e-05, "loss": 0.7455, "step": 11584 }, { "epoch": 37.98360655737705, "grad_norm": 8.561468124389648, "learning_rate": 1.423914390709861e-05, "loss": 0.6171, "step": 11585 }, { "epoch": 37.98688524590164, "grad_norm": 46.572357177734375, "learning_rate": 1.4238182129751996e-05, "loss": 1.0495, "step": 11586 }, { "epoch": 37.99016393442623, "grad_norm": 7.897739410400391, "learning_rate": 1.4237220304615473e-05, "loss": 0.7292, "step": 11587 }, { "epoch": 37.993442622950816, "grad_norm": 11.929489135742188, "learning_rate": 1.4236258431699887e-05, "loss": 0.8356, "step": 11588 }, { "epoch": 37.99672131147541, "grad_norm": 10.375622749328613, "learning_rate": 1.4235296511016081e-05, "loss": 0.8088, "step": 11589 }, { "epoch": 38.0, "grad_norm": 8.9079008102417, "learning_rate": 1.4234334542574906e-05, "loss": 0.8636, "step": 11590 }, { "epoch": 38.00327868852459, "grad_norm": 6.3844475746154785, "learning_rate": 1.4233372526387202e-05, "loss": 0.9505, "step": 11591 }, { "epoch": 38.006557377049184, "grad_norm": 9.629406929016113, "learning_rate": 1.4232410462463821e-05, "loss": 0.8122, "step": 11592 }, { "epoch": 38.00983606557377, "grad_norm": 6.757080554962158, "learning_rate": 1.4231448350815616e-05, "loss": 1.066, "step": 11593 }, { "epoch": 38.01311475409836, "grad_norm": 11.976222038269043, "learning_rate": 1.4230486191453428e-05, "loss": 0.657, "step": 11594 }, { "epoch": 38.01639344262295, "grad_norm": 10.971471786499023, "learning_rate": 1.4229523984388111e-05, "loss": 0.7823, "step": 11595 }, { "epoch": 38.019672131147544, "grad_norm": 7.511500358581543, "learning_rate": 1.4228561729630513e-05, "loss": 0.718, "step": 11596 }, { "epoch": 38.02295081967213, "grad_norm": 7.607735633850098, "learning_rate": 1.4227599427191485e-05, "loss": 0.8097, "step": 11597 }, { "epoch": 38.02622950819672, "grad_norm": 8.564778327941895, "learning_rate": 1.4226637077081877e-05, "loss": 0.8533, "step": 11598 }, { "epoch": 38.02950819672131, "grad_norm": 9.337423324584961, "learning_rate": 1.4225674679312542e-05, "loss": 0.658, "step": 11599 }, { "epoch": 38.032786885245905, "grad_norm": 7.478079795837402, "learning_rate": 1.422471223389433e-05, "loss": 0.7166, "step": 11600 }, { "epoch": 38.03606557377049, "grad_norm": 6.777475833892822, "learning_rate": 1.42237497408381e-05, "loss": 1.0151, "step": 11601 }, { "epoch": 38.03934426229508, "grad_norm": 7.867922782897949, "learning_rate": 1.4222787200154695e-05, "loss": 0.7609, "step": 11602 }, { "epoch": 38.04262295081967, "grad_norm": 8.01198959350586, "learning_rate": 1.4221824611854975e-05, "loss": 0.7302, "step": 11603 }, { "epoch": 38.045901639344265, "grad_norm": 8.344669342041016, "learning_rate": 1.4220861975949793e-05, "loss": 0.9062, "step": 11604 }, { "epoch": 38.049180327868854, "grad_norm": 8.321647644042969, "learning_rate": 1.4219899292450005e-05, "loss": 0.6511, "step": 11605 }, { "epoch": 38.05245901639344, "grad_norm": 16.12581443786621, "learning_rate": 1.4218936561366465e-05, "loss": 0.6234, "step": 11606 }, { "epoch": 38.05573770491803, "grad_norm": 18.966196060180664, "learning_rate": 1.4217973782710025e-05, "loss": 1.273, "step": 11607 }, { "epoch": 38.059016393442626, "grad_norm": 19.41593360900879, "learning_rate": 1.4217010956491551e-05, "loss": 0.9304, "step": 11608 }, { "epoch": 38.062295081967214, "grad_norm": 7.443686008453369, "learning_rate": 1.4216048082721887e-05, "loss": 0.7429, "step": 11609 }, { "epoch": 38.0655737704918, "grad_norm": 8.437095642089844, "learning_rate": 1.42150851614119e-05, "loss": 0.5919, "step": 11610 }, { "epoch": 38.06885245901639, "grad_norm": 6.826629161834717, "learning_rate": 1.4214122192572446e-05, "loss": 1.0955, "step": 11611 }, { "epoch": 38.072131147540986, "grad_norm": 12.532500267028809, "learning_rate": 1.4213159176214384e-05, "loss": 0.5947, "step": 11612 }, { "epoch": 38.075409836065575, "grad_norm": 10.040616989135742, "learning_rate": 1.4212196112348568e-05, "loss": 0.5976, "step": 11613 }, { "epoch": 38.07868852459016, "grad_norm": 7.161106586456299, "learning_rate": 1.4211233000985867e-05, "loss": 0.9674, "step": 11614 }, { "epoch": 38.08196721311475, "grad_norm": 8.772198677062988, "learning_rate": 1.421026984213713e-05, "loss": 0.6034, "step": 11615 }, { "epoch": 38.08524590163935, "grad_norm": 11.912940979003906, "learning_rate": 1.4209306635813225e-05, "loss": 0.7299, "step": 11616 }, { "epoch": 38.088524590163935, "grad_norm": 8.250792503356934, "learning_rate": 1.4208343382025009e-05, "loss": 0.7322, "step": 11617 }, { "epoch": 38.09180327868852, "grad_norm": 6.3888373374938965, "learning_rate": 1.4207380080783346e-05, "loss": 0.809, "step": 11618 }, { "epoch": 38.09508196721311, "grad_norm": 5.948307514190674, "learning_rate": 1.4206416732099101e-05, "loss": 0.7498, "step": 11619 }, { "epoch": 38.09836065573771, "grad_norm": 6.832138538360596, "learning_rate": 1.4205453335983133e-05, "loss": 1.0076, "step": 11620 }, { "epoch": 38.101639344262296, "grad_norm": 7.360279560089111, "learning_rate": 1.4204489892446308e-05, "loss": 0.5851, "step": 11621 }, { "epoch": 38.104918032786884, "grad_norm": 7.679520130157471, "learning_rate": 1.4203526401499482e-05, "loss": 0.6742, "step": 11622 }, { "epoch": 38.10819672131147, "grad_norm": 7.461571216583252, "learning_rate": 1.420256286315353e-05, "loss": 0.5905, "step": 11623 }, { "epoch": 38.11147540983607, "grad_norm": 7.143040180206299, "learning_rate": 1.420159927741931e-05, "loss": 0.7981, "step": 11624 }, { "epoch": 38.114754098360656, "grad_norm": 8.67618179321289, "learning_rate": 1.420063564430769e-05, "loss": 0.7942, "step": 11625 }, { "epoch": 38.118032786885244, "grad_norm": 11.448131561279297, "learning_rate": 1.4199671963829536e-05, "loss": 1.0229, "step": 11626 }, { "epoch": 38.12131147540983, "grad_norm": 9.767276763916016, "learning_rate": 1.4198708235995712e-05, "loss": 0.8122, "step": 11627 }, { "epoch": 38.12459016393443, "grad_norm": 7.685113430023193, "learning_rate": 1.4197744460817089e-05, "loss": 0.8361, "step": 11628 }, { "epoch": 38.12786885245902, "grad_norm": 6.6397480964660645, "learning_rate": 1.4196780638304535e-05, "loss": 0.7995, "step": 11629 }, { "epoch": 38.131147540983605, "grad_norm": 6.686416149139404, "learning_rate": 1.4195816768468911e-05, "loss": 0.702, "step": 11630 }, { "epoch": 38.13442622950819, "grad_norm": 6.154995918273926, "learning_rate": 1.4194852851321095e-05, "loss": 0.8387, "step": 11631 }, { "epoch": 38.13770491803279, "grad_norm": 7.2314229011535645, "learning_rate": 1.419388888687195e-05, "loss": 0.638, "step": 11632 }, { "epoch": 38.14098360655738, "grad_norm": 7.470821380615234, "learning_rate": 1.4192924875132345e-05, "loss": 0.6777, "step": 11633 }, { "epoch": 38.144262295081965, "grad_norm": 6.464827060699463, "learning_rate": 1.4191960816113154e-05, "loss": 0.8172, "step": 11634 }, { "epoch": 38.14754098360656, "grad_norm": 8.57394790649414, "learning_rate": 1.4190996709825247e-05, "loss": 0.6574, "step": 11635 }, { "epoch": 38.15081967213115, "grad_norm": 10.476968765258789, "learning_rate": 1.4190032556279493e-05, "loss": 0.8392, "step": 11636 }, { "epoch": 38.15409836065574, "grad_norm": 6.442867279052734, "learning_rate": 1.4189068355486765e-05, "loss": 0.8351, "step": 11637 }, { "epoch": 38.157377049180326, "grad_norm": 7.404082298278809, "learning_rate": 1.4188104107457939e-05, "loss": 0.6729, "step": 11638 }, { "epoch": 38.16065573770492, "grad_norm": 8.51107406616211, "learning_rate": 1.4187139812203881e-05, "loss": 0.6417, "step": 11639 }, { "epoch": 38.16393442622951, "grad_norm": 7.5694990158081055, "learning_rate": 1.4186175469735473e-05, "loss": 1.009, "step": 11640 }, { "epoch": 38.1672131147541, "grad_norm": 6.09955358505249, "learning_rate": 1.418521108006358e-05, "loss": 0.8531, "step": 11641 }, { "epoch": 38.170491803278686, "grad_norm": 7.825710773468018, "learning_rate": 1.418424664319908e-05, "loss": 0.9279, "step": 11642 }, { "epoch": 38.17377049180328, "grad_norm": 8.415191650390625, "learning_rate": 1.4183282159152848e-05, "loss": 0.9214, "step": 11643 }, { "epoch": 38.17704918032787, "grad_norm": 8.606860160827637, "learning_rate": 1.4182317627935764e-05, "loss": 0.6481, "step": 11644 }, { "epoch": 38.18032786885246, "grad_norm": 6.2218546867370605, "learning_rate": 1.4181353049558698e-05, "loss": 0.7802, "step": 11645 }, { "epoch": 38.18360655737705, "grad_norm": 5.997751712799072, "learning_rate": 1.4180388424032528e-05, "loss": 0.7538, "step": 11646 }, { "epoch": 38.18688524590164, "grad_norm": 7.551873207092285, "learning_rate": 1.4179423751368133e-05, "loss": 0.7922, "step": 11647 }, { "epoch": 38.19016393442623, "grad_norm": 7.032852649688721, "learning_rate": 1.4178459031576388e-05, "loss": 0.9354, "step": 11648 }, { "epoch": 38.19344262295082, "grad_norm": 13.172036170959473, "learning_rate": 1.4177494264668177e-05, "loss": 0.5763, "step": 11649 }, { "epoch": 38.19672131147541, "grad_norm": 8.542132377624512, "learning_rate": 1.417652945065437e-05, "loss": 0.9532, "step": 11650 }, { "epoch": 38.2, "grad_norm": 6.2803239822387695, "learning_rate": 1.4175564589545853e-05, "loss": 0.8366, "step": 11651 }, { "epoch": 38.20327868852459, "grad_norm": 7.138023853302002, "learning_rate": 1.4174599681353505e-05, "loss": 0.6995, "step": 11652 }, { "epoch": 38.20655737704918, "grad_norm": 7.190837383270264, "learning_rate": 1.4173634726088205e-05, "loss": 0.6974, "step": 11653 }, { "epoch": 38.20983606557377, "grad_norm": 6.546214580535889, "learning_rate": 1.4172669723760833e-05, "loss": 0.7025, "step": 11654 }, { "epoch": 38.21311475409836, "grad_norm": 13.27840805053711, "learning_rate": 1.4171704674382272e-05, "loss": 0.6608, "step": 11655 }, { "epoch": 38.21639344262295, "grad_norm": 6.392624378204346, "learning_rate": 1.4170739577963402e-05, "loss": 0.9428, "step": 11656 }, { "epoch": 38.21967213114754, "grad_norm": 7.171998977661133, "learning_rate": 1.4169774434515108e-05, "loss": 0.7863, "step": 11657 }, { "epoch": 38.22295081967213, "grad_norm": 8.32845687866211, "learning_rate": 1.4168809244048271e-05, "loss": 0.8541, "step": 11658 }, { "epoch": 38.226229508196724, "grad_norm": 6.26371431350708, "learning_rate": 1.4167844006573776e-05, "loss": 0.9741, "step": 11659 }, { "epoch": 38.22950819672131, "grad_norm": 9.414528846740723, "learning_rate": 1.4166878722102506e-05, "loss": 0.7018, "step": 11660 }, { "epoch": 38.2327868852459, "grad_norm": 11.805916786193848, "learning_rate": 1.4165913390645346e-05, "loss": 0.9814, "step": 11661 }, { "epoch": 38.23606557377049, "grad_norm": 8.207345962524414, "learning_rate": 1.4164948012213182e-05, "loss": 0.5503, "step": 11662 }, { "epoch": 38.239344262295084, "grad_norm": 5.981873989105225, "learning_rate": 1.4163982586816895e-05, "loss": 0.9511, "step": 11663 }, { "epoch": 38.24262295081967, "grad_norm": 6.11446475982666, "learning_rate": 1.4163017114467378e-05, "loss": 0.7103, "step": 11664 }, { "epoch": 38.24590163934426, "grad_norm": 8.313767433166504, "learning_rate": 1.4162051595175514e-05, "loss": 0.7146, "step": 11665 }, { "epoch": 38.24918032786885, "grad_norm": 6.277345180511475, "learning_rate": 1.4161086028952193e-05, "loss": 1.0465, "step": 11666 }, { "epoch": 38.252459016393445, "grad_norm": 5.467422962188721, "learning_rate": 1.4160120415808298e-05, "loss": 0.7173, "step": 11667 }, { "epoch": 38.25573770491803, "grad_norm": 6.833220958709717, "learning_rate": 1.4159154755754721e-05, "loss": 0.8863, "step": 11668 }, { "epoch": 38.25901639344262, "grad_norm": 5.516292572021484, "learning_rate": 1.4158189048802346e-05, "loss": 0.7594, "step": 11669 }, { "epoch": 38.26229508196721, "grad_norm": 6.61763334274292, "learning_rate": 1.4157223294962073e-05, "loss": 0.9588, "step": 11670 }, { "epoch": 38.265573770491805, "grad_norm": 6.847316741943359, "learning_rate": 1.4156257494244778e-05, "loss": 0.576, "step": 11671 }, { "epoch": 38.268852459016394, "grad_norm": 7.332890033721924, "learning_rate": 1.4155291646661363e-05, "loss": 0.6834, "step": 11672 }, { "epoch": 38.27213114754098, "grad_norm": 8.557778358459473, "learning_rate": 1.4154325752222712e-05, "loss": 0.9929, "step": 11673 }, { "epoch": 38.27540983606557, "grad_norm": 6.936802864074707, "learning_rate": 1.4153359810939718e-05, "loss": 0.8207, "step": 11674 }, { "epoch": 38.278688524590166, "grad_norm": 7.229499816894531, "learning_rate": 1.4152393822823276e-05, "loss": 0.8247, "step": 11675 }, { "epoch": 38.281967213114754, "grad_norm": 5.853603839874268, "learning_rate": 1.4151427787884276e-05, "loss": 0.7501, "step": 11676 }, { "epoch": 38.28524590163934, "grad_norm": 6.008711338043213, "learning_rate": 1.4150461706133612e-05, "loss": 0.8243, "step": 11677 }, { "epoch": 38.28852459016394, "grad_norm": 7.985689640045166, "learning_rate": 1.4149495577582174e-05, "loss": 0.6591, "step": 11678 }, { "epoch": 38.291803278688526, "grad_norm": 6.647415637969971, "learning_rate": 1.414852940224086e-05, "loss": 0.7779, "step": 11679 }, { "epoch": 38.295081967213115, "grad_norm": 6.643155097961426, "learning_rate": 1.4147563180120562e-05, "loss": 0.7516, "step": 11680 }, { "epoch": 38.2983606557377, "grad_norm": 9.378643035888672, "learning_rate": 1.414659691123218e-05, "loss": 0.8122, "step": 11681 }, { "epoch": 38.3016393442623, "grad_norm": 9.726426124572754, "learning_rate": 1.4145630595586607e-05, "loss": 0.8464, "step": 11682 }, { "epoch": 38.30491803278689, "grad_norm": 6.754894256591797, "learning_rate": 1.4144664233194737e-05, "loss": 0.696, "step": 11683 }, { "epoch": 38.308196721311475, "grad_norm": 7.353553295135498, "learning_rate": 1.4143697824067468e-05, "loss": 0.7133, "step": 11684 }, { "epoch": 38.31147540983606, "grad_norm": 6.435446739196777, "learning_rate": 1.4142731368215696e-05, "loss": 0.8742, "step": 11685 }, { "epoch": 38.31475409836066, "grad_norm": 7.538177490234375, "learning_rate": 1.4141764865650325e-05, "loss": 0.8098, "step": 11686 }, { "epoch": 38.31803278688525, "grad_norm": 7.711347579956055, "learning_rate": 1.4140798316382245e-05, "loss": 0.6827, "step": 11687 }, { "epoch": 38.321311475409836, "grad_norm": 6.041223526000977, "learning_rate": 1.4139831720422362e-05, "loss": 0.5961, "step": 11688 }, { "epoch": 38.324590163934424, "grad_norm": 6.374856472015381, "learning_rate": 1.4138865077781567e-05, "loss": 0.7133, "step": 11689 }, { "epoch": 38.32786885245902, "grad_norm": 7.5117597579956055, "learning_rate": 1.413789838847077e-05, "loss": 0.4942, "step": 11690 }, { "epoch": 38.33114754098361, "grad_norm": 5.410007953643799, "learning_rate": 1.4136931652500863e-05, "loss": 0.7441, "step": 11691 }, { "epoch": 38.334426229508196, "grad_norm": 6.796708106994629, "learning_rate": 1.4135964869882752e-05, "loss": 0.73, "step": 11692 }, { "epoch": 38.337704918032784, "grad_norm": 6.222961902618408, "learning_rate": 1.4134998040627334e-05, "loss": 0.7833, "step": 11693 }, { "epoch": 38.34098360655738, "grad_norm": 7.9855780601501465, "learning_rate": 1.4134031164745517e-05, "loss": 0.8223, "step": 11694 }, { "epoch": 38.34426229508197, "grad_norm": 6.565030574798584, "learning_rate": 1.4133064242248198e-05, "loss": 0.9865, "step": 11695 }, { "epoch": 38.34754098360656, "grad_norm": 13.776899337768555, "learning_rate": 1.4132097273146285e-05, "loss": 0.7101, "step": 11696 }, { "epoch": 38.350819672131145, "grad_norm": 10.115275382995605, "learning_rate": 1.4131130257450677e-05, "loss": 0.7235, "step": 11697 }, { "epoch": 38.35409836065574, "grad_norm": 7.919510841369629, "learning_rate": 1.4130163195172283e-05, "loss": 0.7092, "step": 11698 }, { "epoch": 38.35737704918033, "grad_norm": 7.337751388549805, "learning_rate": 1.4129196086322e-05, "loss": 0.9102, "step": 11699 }, { "epoch": 38.36065573770492, "grad_norm": 9.582131385803223, "learning_rate": 1.4128228930910739e-05, "loss": 0.7673, "step": 11700 }, { "epoch": 38.363934426229505, "grad_norm": 17.762819290161133, "learning_rate": 1.4127261728949407e-05, "loss": 0.7561, "step": 11701 }, { "epoch": 38.3672131147541, "grad_norm": 10.826415061950684, "learning_rate": 1.4126294480448906e-05, "loss": 0.6137, "step": 11702 }, { "epoch": 38.37049180327869, "grad_norm": 6.955933570861816, "learning_rate": 1.4125327185420146e-05, "loss": 0.6797, "step": 11703 }, { "epoch": 38.37377049180328, "grad_norm": 11.43163776397705, "learning_rate": 1.4124359843874029e-05, "loss": 0.8131, "step": 11704 }, { "epoch": 38.377049180327866, "grad_norm": 9.33780288696289, "learning_rate": 1.4123392455821469e-05, "loss": 0.7084, "step": 11705 }, { "epoch": 38.38032786885246, "grad_norm": 16.96792984008789, "learning_rate": 1.4122425021273372e-05, "loss": 0.5914, "step": 11706 }, { "epoch": 38.38360655737705, "grad_norm": 6.974432468414307, "learning_rate": 1.4121457540240649e-05, "loss": 0.9771, "step": 11707 }, { "epoch": 38.38688524590164, "grad_norm": 7.641016006469727, "learning_rate": 1.4120490012734203e-05, "loss": 0.6131, "step": 11708 }, { "epoch": 38.390163934426226, "grad_norm": 6.541407108306885, "learning_rate": 1.411952243876495e-05, "loss": 0.8119, "step": 11709 }, { "epoch": 38.39344262295082, "grad_norm": 6.890824794769287, "learning_rate": 1.4118554818343797e-05, "loss": 0.6581, "step": 11710 }, { "epoch": 38.39672131147541, "grad_norm": 8.969005584716797, "learning_rate": 1.4117587151481656e-05, "loss": 0.7141, "step": 11711 }, { "epoch": 38.4, "grad_norm": 7.744085788726807, "learning_rate": 1.411661943818944e-05, "loss": 0.6608, "step": 11712 }, { "epoch": 38.40327868852459, "grad_norm": 6.935240268707275, "learning_rate": 1.411565167847806e-05, "loss": 0.7518, "step": 11713 }, { "epoch": 38.40655737704918, "grad_norm": 6.683928966522217, "learning_rate": 1.4114683872358428e-05, "loss": 0.7978, "step": 11714 }, { "epoch": 38.40983606557377, "grad_norm": 6.89018440246582, "learning_rate": 1.4113716019841454e-05, "loss": 0.675, "step": 11715 }, { "epoch": 38.41311475409836, "grad_norm": 6.22205114364624, "learning_rate": 1.4112748120938057e-05, "loss": 1.0139, "step": 11716 }, { "epoch": 38.41639344262295, "grad_norm": 8.531909942626953, "learning_rate": 1.4111780175659149e-05, "loss": 0.8206, "step": 11717 }, { "epoch": 38.41967213114754, "grad_norm": 6.70676326751709, "learning_rate": 1.4110812184015646e-05, "loss": 0.6986, "step": 11718 }, { "epoch": 38.42295081967213, "grad_norm": 6.893270969390869, "learning_rate": 1.4109844146018458e-05, "loss": 0.847, "step": 11719 }, { "epoch": 38.42622950819672, "grad_norm": 6.147275447845459, "learning_rate": 1.4108876061678507e-05, "loss": 0.9557, "step": 11720 }, { "epoch": 38.429508196721315, "grad_norm": 7.209970951080322, "learning_rate": 1.4107907931006703e-05, "loss": 0.8411, "step": 11721 }, { "epoch": 38.4327868852459, "grad_norm": 6.3424553871154785, "learning_rate": 1.4106939754013968e-05, "loss": 1.0472, "step": 11722 }, { "epoch": 38.43606557377049, "grad_norm": 6.253347396850586, "learning_rate": 1.410597153071122e-05, "loss": 0.7778, "step": 11723 }, { "epoch": 38.43934426229508, "grad_norm": 7.778670787811279, "learning_rate": 1.410500326110937e-05, "loss": 0.8057, "step": 11724 }, { "epoch": 38.442622950819676, "grad_norm": 9.797432899475098, "learning_rate": 1.4104034945219338e-05, "loss": 0.8897, "step": 11725 }, { "epoch": 38.445901639344264, "grad_norm": 6.985130786895752, "learning_rate": 1.410306658305205e-05, "loss": 0.5594, "step": 11726 }, { "epoch": 38.44918032786885, "grad_norm": 8.66653060913086, "learning_rate": 1.4102098174618417e-05, "loss": 0.8423, "step": 11727 }, { "epoch": 38.45245901639344, "grad_norm": 5.756346702575684, "learning_rate": 1.410112971992936e-05, "loss": 0.7887, "step": 11728 }, { "epoch": 38.455737704918036, "grad_norm": 9.935402870178223, "learning_rate": 1.4100161218995807e-05, "loss": 0.7568, "step": 11729 }, { "epoch": 38.459016393442624, "grad_norm": 6.465209484100342, "learning_rate": 1.409919267182867e-05, "loss": 0.7812, "step": 11730 }, { "epoch": 38.46229508196721, "grad_norm": 7.28457498550415, "learning_rate": 1.4098224078438873e-05, "loss": 0.998, "step": 11731 }, { "epoch": 38.4655737704918, "grad_norm": 7.278117656707764, "learning_rate": 1.4097255438837338e-05, "loss": 0.7431, "step": 11732 }, { "epoch": 38.4688524590164, "grad_norm": 6.25927209854126, "learning_rate": 1.4096286753034992e-05, "loss": 0.8352, "step": 11733 }, { "epoch": 38.472131147540985, "grad_norm": 6.896855354309082, "learning_rate": 1.409531802104275e-05, "loss": 0.6623, "step": 11734 }, { "epoch": 38.47540983606557, "grad_norm": 5.803415775299072, "learning_rate": 1.4094349242871541e-05, "loss": 0.873, "step": 11735 }, { "epoch": 38.47868852459016, "grad_norm": 7.905999660491943, "learning_rate": 1.4093380418532288e-05, "loss": 0.6991, "step": 11736 }, { "epoch": 38.48196721311476, "grad_norm": 6.159153461456299, "learning_rate": 1.4092411548035912e-05, "loss": 0.9181, "step": 11737 }, { "epoch": 38.485245901639345, "grad_norm": 7.358333110809326, "learning_rate": 1.4091442631393341e-05, "loss": 0.9222, "step": 11738 }, { "epoch": 38.488524590163934, "grad_norm": 6.516833305358887, "learning_rate": 1.40904736686155e-05, "loss": 0.8575, "step": 11739 }, { "epoch": 38.49180327868852, "grad_norm": 10.271613121032715, "learning_rate": 1.4089504659713317e-05, "loss": 0.841, "step": 11740 }, { "epoch": 38.49508196721312, "grad_norm": 6.2791876792907715, "learning_rate": 1.4088535604697717e-05, "loss": 0.8606, "step": 11741 }, { "epoch": 38.498360655737706, "grad_norm": 7.909946441650391, "learning_rate": 1.4087566503579628e-05, "loss": 0.8253, "step": 11742 }, { "epoch": 38.501639344262294, "grad_norm": 6.641858100891113, "learning_rate": 1.4086597356369973e-05, "loss": 0.9384, "step": 11743 }, { "epoch": 38.50491803278688, "grad_norm": 7.2064619064331055, "learning_rate": 1.4085628163079688e-05, "loss": 0.8672, "step": 11744 }, { "epoch": 38.50819672131148, "grad_norm": 8.029994010925293, "learning_rate": 1.4084658923719694e-05, "loss": 0.9598, "step": 11745 }, { "epoch": 38.511475409836066, "grad_norm": 8.775003433227539, "learning_rate": 1.4083689638300926e-05, "loss": 0.8438, "step": 11746 }, { "epoch": 38.514754098360655, "grad_norm": 14.49664306640625, "learning_rate": 1.4082720306834312e-05, "loss": 0.9756, "step": 11747 }, { "epoch": 38.51803278688524, "grad_norm": 9.746071815490723, "learning_rate": 1.408175092933078e-05, "loss": 0.8764, "step": 11748 }, { "epoch": 38.52131147540984, "grad_norm": 6.890741348266602, "learning_rate": 1.4080781505801263e-05, "loss": 0.922, "step": 11749 }, { "epoch": 38.52459016393443, "grad_norm": 7.274062156677246, "learning_rate": 1.4079812036256691e-05, "loss": 0.8567, "step": 11750 }, { "epoch": 38.527868852459015, "grad_norm": 13.8351411819458, "learning_rate": 1.4078842520707993e-05, "loss": 0.9679, "step": 11751 }, { "epoch": 38.5311475409836, "grad_norm": 7.7383952140808105, "learning_rate": 1.407787295916611e-05, "loss": 0.7789, "step": 11752 }, { "epoch": 38.5344262295082, "grad_norm": 6.71243953704834, "learning_rate": 1.4076903351641966e-05, "loss": 0.8496, "step": 11753 }, { "epoch": 38.53770491803279, "grad_norm": 9.639703750610352, "learning_rate": 1.40759336981465e-05, "loss": 0.6252, "step": 11754 }, { "epoch": 38.540983606557376, "grad_norm": 7.033107280731201, "learning_rate": 1.4074963998690643e-05, "loss": 0.7359, "step": 11755 }, { "epoch": 38.544262295081964, "grad_norm": 8.268169403076172, "learning_rate": 1.4073994253285328e-05, "loss": 0.7906, "step": 11756 }, { "epoch": 38.54754098360656, "grad_norm": 6.798574924468994, "learning_rate": 1.4073024461941496e-05, "loss": 0.8473, "step": 11757 }, { "epoch": 38.55081967213115, "grad_norm": 8.256195068359375, "learning_rate": 1.4072054624670074e-05, "loss": 0.7904, "step": 11758 }, { "epoch": 38.554098360655736, "grad_norm": 7.868882179260254, "learning_rate": 1.4071084741482006e-05, "loss": 0.9554, "step": 11759 }, { "epoch": 38.557377049180324, "grad_norm": 8.256680488586426, "learning_rate": 1.4070114812388225e-05, "loss": 0.8853, "step": 11760 }, { "epoch": 38.56065573770492, "grad_norm": 8.961671829223633, "learning_rate": 1.4069144837399664e-05, "loss": 0.8081, "step": 11761 }, { "epoch": 38.56393442622951, "grad_norm": 36.10249328613281, "learning_rate": 1.4068174816527267e-05, "loss": 0.8864, "step": 11762 }, { "epoch": 38.5672131147541, "grad_norm": 8.52207088470459, "learning_rate": 1.4067204749781966e-05, "loss": 0.6875, "step": 11763 }, { "epoch": 38.570491803278685, "grad_norm": 17.65182113647461, "learning_rate": 1.4066234637174704e-05, "loss": 0.7281, "step": 11764 }, { "epoch": 38.57377049180328, "grad_norm": 7.217077255249023, "learning_rate": 1.406526447871642e-05, "loss": 0.6655, "step": 11765 }, { "epoch": 38.57704918032787, "grad_norm": 7.533520698547363, "learning_rate": 1.4064294274418053e-05, "loss": 0.697, "step": 11766 }, { "epoch": 38.58032786885246, "grad_norm": 6.274287700653076, "learning_rate": 1.4063324024290539e-05, "loss": 0.9203, "step": 11767 }, { "epoch": 38.58360655737705, "grad_norm": 7.218018054962158, "learning_rate": 1.4062353728344825e-05, "loss": 0.7109, "step": 11768 }, { "epoch": 38.58688524590164, "grad_norm": 6.434396743774414, "learning_rate": 1.4061383386591845e-05, "loss": 0.7917, "step": 11769 }, { "epoch": 38.59016393442623, "grad_norm": 6.03989839553833, "learning_rate": 1.4060412999042547e-05, "loss": 0.929, "step": 11770 }, { "epoch": 38.59344262295082, "grad_norm": 6.980258464813232, "learning_rate": 1.405944256570787e-05, "loss": 0.7786, "step": 11771 }, { "epoch": 38.59672131147541, "grad_norm": 6.103774547576904, "learning_rate": 1.405847208659876e-05, "loss": 0.6929, "step": 11772 }, { "epoch": 38.6, "grad_norm": 6.472476959228516, "learning_rate": 1.4057501561726157e-05, "loss": 0.7551, "step": 11773 }, { "epoch": 38.60327868852459, "grad_norm": 9.809215545654297, "learning_rate": 1.4056530991101e-05, "loss": 0.783, "step": 11774 }, { "epoch": 38.60655737704918, "grad_norm": 6.883854866027832, "learning_rate": 1.4055560374734242e-05, "loss": 0.8101, "step": 11775 }, { "epoch": 38.609836065573774, "grad_norm": 7.440694808959961, "learning_rate": 1.4054589712636824e-05, "loss": 0.8919, "step": 11776 }, { "epoch": 38.61311475409836, "grad_norm": 15.19448471069336, "learning_rate": 1.4053619004819691e-05, "loss": 0.8593, "step": 11777 }, { "epoch": 38.61639344262295, "grad_norm": 10.447669982910156, "learning_rate": 1.405264825129379e-05, "loss": 0.661, "step": 11778 }, { "epoch": 38.61967213114754, "grad_norm": 7.101494312286377, "learning_rate": 1.4051677452070064e-05, "loss": 0.6822, "step": 11779 }, { "epoch": 38.622950819672134, "grad_norm": 7.598463535308838, "learning_rate": 1.4050706607159463e-05, "loss": 0.6248, "step": 11780 }, { "epoch": 38.62622950819672, "grad_norm": 8.478802680969238, "learning_rate": 1.4049735716572934e-05, "loss": 0.8484, "step": 11781 }, { "epoch": 38.62950819672131, "grad_norm": 11.393181800842285, "learning_rate": 1.4048764780321425e-05, "loss": 1.0117, "step": 11782 }, { "epoch": 38.6327868852459, "grad_norm": 6.189411163330078, "learning_rate": 1.404779379841588e-05, "loss": 0.7709, "step": 11783 }, { "epoch": 38.636065573770495, "grad_norm": 6.6503071784973145, "learning_rate": 1.4046822770867254e-05, "loss": 0.6607, "step": 11784 }, { "epoch": 38.63934426229508, "grad_norm": 7.75649356842041, "learning_rate": 1.4045851697686495e-05, "loss": 0.9603, "step": 11785 }, { "epoch": 38.64262295081967, "grad_norm": 7.469099998474121, "learning_rate": 1.404488057888455e-05, "loss": 0.9413, "step": 11786 }, { "epoch": 38.64590163934426, "grad_norm": 5.955715656280518, "learning_rate": 1.404390941447237e-05, "loss": 1.0015, "step": 11787 }, { "epoch": 38.649180327868855, "grad_norm": 6.994668960571289, "learning_rate": 1.4042938204460907e-05, "loss": 0.7461, "step": 11788 }, { "epoch": 38.65245901639344, "grad_norm": 4.924715995788574, "learning_rate": 1.4041966948861113e-05, "loss": 0.8964, "step": 11789 }, { "epoch": 38.65573770491803, "grad_norm": 6.580183029174805, "learning_rate": 1.404099564768394e-05, "loss": 0.6154, "step": 11790 }, { "epoch": 38.65901639344262, "grad_norm": 7.333226680755615, "learning_rate": 1.4040024300940335e-05, "loss": 0.8508, "step": 11791 }, { "epoch": 38.662295081967216, "grad_norm": 10.37045955657959, "learning_rate": 1.403905290864126e-05, "loss": 0.9774, "step": 11792 }, { "epoch": 38.665573770491804, "grad_norm": 7.097982883453369, "learning_rate": 1.403808147079766e-05, "loss": 0.5902, "step": 11793 }, { "epoch": 38.66885245901639, "grad_norm": 9.590324401855469, "learning_rate": 1.4037109987420499e-05, "loss": 1.0741, "step": 11794 }, { "epoch": 38.67213114754098, "grad_norm": 6.173377513885498, "learning_rate": 1.4036138458520718e-05, "loss": 0.7552, "step": 11795 }, { "epoch": 38.675409836065576, "grad_norm": 9.739545822143555, "learning_rate": 1.4035166884109286e-05, "loss": 0.9397, "step": 11796 }, { "epoch": 38.678688524590164, "grad_norm": 5.6909074783325195, "learning_rate": 1.4034195264197145e-05, "loss": 0.8477, "step": 11797 }, { "epoch": 38.68196721311475, "grad_norm": 6.584863185882568, "learning_rate": 1.4033223598795264e-05, "loss": 0.8164, "step": 11798 }, { "epoch": 38.68524590163934, "grad_norm": 9.39228343963623, "learning_rate": 1.4032251887914591e-05, "loss": 0.8131, "step": 11799 }, { "epoch": 38.68852459016394, "grad_norm": 6.754323482513428, "learning_rate": 1.4031280131566085e-05, "loss": 0.805, "step": 11800 }, { "epoch": 38.691803278688525, "grad_norm": 6.592113971710205, "learning_rate": 1.4030308329760706e-05, "loss": 0.9815, "step": 11801 }, { "epoch": 38.69508196721311, "grad_norm": 7.215481281280518, "learning_rate": 1.4029336482509408e-05, "loss": 0.7039, "step": 11802 }, { "epoch": 38.6983606557377, "grad_norm": 7.109494209289551, "learning_rate": 1.4028364589823154e-05, "loss": 0.7147, "step": 11803 }, { "epoch": 38.7016393442623, "grad_norm": 16.672548294067383, "learning_rate": 1.4027392651712896e-05, "loss": 0.8803, "step": 11804 }, { "epoch": 38.704918032786885, "grad_norm": 7.987303733825684, "learning_rate": 1.4026420668189604e-05, "loss": 0.6744, "step": 11805 }, { "epoch": 38.708196721311474, "grad_norm": 6.339761257171631, "learning_rate": 1.4025448639264228e-05, "loss": 0.7635, "step": 11806 }, { "epoch": 38.71147540983607, "grad_norm": 6.242509365081787, "learning_rate": 1.4024476564947735e-05, "loss": 0.7829, "step": 11807 }, { "epoch": 38.71475409836066, "grad_norm": 6.765748023986816, "learning_rate": 1.4023504445251086e-05, "loss": 0.8406, "step": 11808 }, { "epoch": 38.718032786885246, "grad_norm": 5.561892032623291, "learning_rate": 1.4022532280185237e-05, "loss": 0.8828, "step": 11809 }, { "epoch": 38.721311475409834, "grad_norm": 6.309205532073975, "learning_rate": 1.4021560069761158e-05, "loss": 0.8309, "step": 11810 }, { "epoch": 38.72459016393443, "grad_norm": 6.482660293579102, "learning_rate": 1.4020587813989806e-05, "loss": 1.0021, "step": 11811 }, { "epoch": 38.72786885245902, "grad_norm": 8.738975524902344, "learning_rate": 1.4019615512882147e-05, "loss": 0.9786, "step": 11812 }, { "epoch": 38.731147540983606, "grad_norm": 5.50824499130249, "learning_rate": 1.4018643166449144e-05, "loss": 1.0754, "step": 11813 }, { "epoch": 38.734426229508195, "grad_norm": 5.816103458404541, "learning_rate": 1.4017670774701762e-05, "loss": 0.5513, "step": 11814 }, { "epoch": 38.73770491803279, "grad_norm": 5.066158771514893, "learning_rate": 1.401669833765096e-05, "loss": 0.5771, "step": 11815 }, { "epoch": 38.74098360655738, "grad_norm": 6.432920932769775, "learning_rate": 1.4015725855307714e-05, "loss": 0.61, "step": 11816 }, { "epoch": 38.74426229508197, "grad_norm": 6.587920188903809, "learning_rate": 1.401475332768298e-05, "loss": 0.9865, "step": 11817 }, { "epoch": 38.747540983606555, "grad_norm": 6.816427707672119, "learning_rate": 1.4013780754787727e-05, "loss": 1.0004, "step": 11818 }, { "epoch": 38.75081967213115, "grad_norm": 6.41752815246582, "learning_rate": 1.4012808136632922e-05, "loss": 0.9701, "step": 11819 }, { "epoch": 38.75409836065574, "grad_norm": 6.5093889236450195, "learning_rate": 1.4011835473229538e-05, "loss": 0.893, "step": 11820 }, { "epoch": 38.75737704918033, "grad_norm": 7.406366348266602, "learning_rate": 1.4010862764588535e-05, "loss": 0.6996, "step": 11821 }, { "epoch": 38.760655737704916, "grad_norm": 7.635592937469482, "learning_rate": 1.4009890010720883e-05, "loss": 0.8692, "step": 11822 }, { "epoch": 38.76393442622951, "grad_norm": 6.6534423828125, "learning_rate": 1.4008917211637554e-05, "loss": 0.7794, "step": 11823 }, { "epoch": 38.7672131147541, "grad_norm": 8.457961082458496, "learning_rate": 1.4007944367349513e-05, "loss": 0.8521, "step": 11824 }, { "epoch": 38.77049180327869, "grad_norm": 6.275430679321289, "learning_rate": 1.4006971477867737e-05, "loss": 1.0567, "step": 11825 }, { "epoch": 38.773770491803276, "grad_norm": 7.494436264038086, "learning_rate": 1.4005998543203185e-05, "loss": 0.6848, "step": 11826 }, { "epoch": 38.77704918032787, "grad_norm": 6.545753479003906, "learning_rate": 1.4005025563366838e-05, "loss": 0.8852, "step": 11827 }, { "epoch": 38.78032786885246, "grad_norm": 8.998950004577637, "learning_rate": 1.4004052538369661e-05, "loss": 0.8477, "step": 11828 }, { "epoch": 38.78360655737705, "grad_norm": 6.4787068367004395, "learning_rate": 1.4003079468222631e-05, "loss": 0.8167, "step": 11829 }, { "epoch": 38.78688524590164, "grad_norm": 6.5633955001831055, "learning_rate": 1.4002106352936715e-05, "loss": 0.7601, "step": 11830 }, { "epoch": 38.79016393442623, "grad_norm": 6.551499843597412, "learning_rate": 1.4001133192522894e-05, "loss": 0.9461, "step": 11831 }, { "epoch": 38.79344262295082, "grad_norm": 6.278939723968506, "learning_rate": 1.400015998699213e-05, "loss": 1.0356, "step": 11832 }, { "epoch": 38.79672131147541, "grad_norm": 5.221374988555908, "learning_rate": 1.3999186736355404e-05, "loss": 0.7414, "step": 11833 }, { "epoch": 38.8, "grad_norm": 8.96237564086914, "learning_rate": 1.3998213440623691e-05, "loss": 0.7767, "step": 11834 }, { "epoch": 38.80327868852459, "grad_norm": 7.992954254150391, "learning_rate": 1.3997240099807966e-05, "loss": 0.6938, "step": 11835 }, { "epoch": 38.80655737704918, "grad_norm": 6.409029006958008, "learning_rate": 1.39962667139192e-05, "loss": 0.9313, "step": 11836 }, { "epoch": 38.80983606557377, "grad_norm": 6.946361064910889, "learning_rate": 1.3995293282968372e-05, "loss": 0.7838, "step": 11837 }, { "epoch": 38.81311475409836, "grad_norm": 24.76949691772461, "learning_rate": 1.399431980696646e-05, "loss": 0.6174, "step": 11838 }, { "epoch": 38.81639344262295, "grad_norm": 11.377471923828125, "learning_rate": 1.3993346285924438e-05, "loss": 0.9998, "step": 11839 }, { "epoch": 38.81967213114754, "grad_norm": 8.964943885803223, "learning_rate": 1.3992372719853285e-05, "loss": 0.8496, "step": 11840 }, { "epoch": 38.82295081967213, "grad_norm": 10.166143417358398, "learning_rate": 1.3991399108763979e-05, "loss": 0.722, "step": 11841 }, { "epoch": 38.82622950819672, "grad_norm": 9.600552558898926, "learning_rate": 1.3990425452667498e-05, "loss": 0.7157, "step": 11842 }, { "epoch": 38.829508196721314, "grad_norm": 7.114921569824219, "learning_rate": 1.3989451751574819e-05, "loss": 0.8849, "step": 11843 }, { "epoch": 38.8327868852459, "grad_norm": 8.565250396728516, "learning_rate": 1.3988478005496927e-05, "loss": 0.7718, "step": 11844 }, { "epoch": 38.83606557377049, "grad_norm": 8.938582420349121, "learning_rate": 1.3987504214444794e-05, "loss": 0.8026, "step": 11845 }, { "epoch": 38.83934426229508, "grad_norm": 23.388816833496094, "learning_rate": 1.3986530378429409e-05, "loss": 1.0667, "step": 11846 }, { "epoch": 38.842622950819674, "grad_norm": 7.0561418533325195, "learning_rate": 1.3985556497461745e-05, "loss": 1.0109, "step": 11847 }, { "epoch": 38.84590163934426, "grad_norm": 8.75629997253418, "learning_rate": 1.3984582571552792e-05, "loss": 0.8398, "step": 11848 }, { "epoch": 38.84918032786885, "grad_norm": 7.675825119018555, "learning_rate": 1.3983608600713523e-05, "loss": 0.9407, "step": 11849 }, { "epoch": 38.85245901639344, "grad_norm": 12.2157621383667, "learning_rate": 1.3982634584954931e-05, "loss": 0.6842, "step": 11850 }, { "epoch": 38.855737704918035, "grad_norm": 7.146827220916748, "learning_rate": 1.398166052428799e-05, "loss": 0.842, "step": 11851 }, { "epoch": 38.85901639344262, "grad_norm": 9.917769432067871, "learning_rate": 1.3980686418723685e-05, "loss": 0.7229, "step": 11852 }, { "epoch": 38.86229508196721, "grad_norm": 6.6616740226745605, "learning_rate": 1.3979712268273004e-05, "loss": 0.8794, "step": 11853 }, { "epoch": 38.86557377049181, "grad_norm": 7.397886753082275, "learning_rate": 1.3978738072946927e-05, "loss": 0.7662, "step": 11854 }, { "epoch": 38.868852459016395, "grad_norm": 5.831602573394775, "learning_rate": 1.3977763832756446e-05, "loss": 0.9508, "step": 11855 }, { "epoch": 38.87213114754098, "grad_norm": 7.084209442138672, "learning_rate": 1.3976789547712537e-05, "loss": 0.7015, "step": 11856 }, { "epoch": 38.87540983606557, "grad_norm": 6.987911224365234, "learning_rate": 1.3975815217826195e-05, "loss": 0.9591, "step": 11857 }, { "epoch": 38.87868852459017, "grad_norm": 8.819568634033203, "learning_rate": 1.39748408431084e-05, "loss": 0.9703, "step": 11858 }, { "epoch": 38.881967213114756, "grad_norm": 9.144683837890625, "learning_rate": 1.3973866423570142e-05, "loss": 0.9236, "step": 11859 }, { "epoch": 38.885245901639344, "grad_norm": 13.584622383117676, "learning_rate": 1.3972891959222407e-05, "loss": 0.69, "step": 11860 }, { "epoch": 38.88852459016393, "grad_norm": 7.060985565185547, "learning_rate": 1.3971917450076185e-05, "loss": 0.7856, "step": 11861 }, { "epoch": 38.89180327868853, "grad_norm": 5.965367794036865, "learning_rate": 1.3970942896142466e-05, "loss": 0.9375, "step": 11862 }, { "epoch": 38.895081967213116, "grad_norm": 9.113286018371582, "learning_rate": 1.3969968297432236e-05, "loss": 0.7903, "step": 11863 }, { "epoch": 38.898360655737704, "grad_norm": 7.306721210479736, "learning_rate": 1.3968993653956487e-05, "loss": 0.6457, "step": 11864 }, { "epoch": 38.90163934426229, "grad_norm": 7.782294273376465, "learning_rate": 1.3968018965726204e-05, "loss": 0.567, "step": 11865 }, { "epoch": 38.90491803278689, "grad_norm": 7.752462863922119, "learning_rate": 1.3967044232752384e-05, "loss": 0.9606, "step": 11866 }, { "epoch": 38.90819672131148, "grad_norm": 6.119112491607666, "learning_rate": 1.3966069455046017e-05, "loss": 0.7293, "step": 11867 }, { "epoch": 38.911475409836065, "grad_norm": 9.529172897338867, "learning_rate": 1.3965094632618093e-05, "loss": 0.7185, "step": 11868 }, { "epoch": 38.91475409836065, "grad_norm": 5.695375919342041, "learning_rate": 1.39641197654796e-05, "loss": 0.6942, "step": 11869 }, { "epoch": 38.91803278688525, "grad_norm": 12.5607328414917, "learning_rate": 1.3963144853641541e-05, "loss": 0.7882, "step": 11870 }, { "epoch": 38.92131147540984, "grad_norm": 7.732707977294922, "learning_rate": 1.3962169897114899e-05, "loss": 0.8479, "step": 11871 }, { "epoch": 38.924590163934425, "grad_norm": 7.678771495819092, "learning_rate": 1.3961194895910675e-05, "loss": 0.6754, "step": 11872 }, { "epoch": 38.927868852459014, "grad_norm": 7.031034469604492, "learning_rate": 1.3960219850039856e-05, "loss": 0.9341, "step": 11873 }, { "epoch": 38.93114754098361, "grad_norm": 5.3556227684021, "learning_rate": 1.3959244759513445e-05, "loss": 0.7564, "step": 11874 }, { "epoch": 38.9344262295082, "grad_norm": 14.07992935180664, "learning_rate": 1.3958269624342427e-05, "loss": 0.7242, "step": 11875 }, { "epoch": 38.937704918032786, "grad_norm": 8.741772651672363, "learning_rate": 1.3957294444537808e-05, "loss": 0.5864, "step": 11876 }, { "epoch": 38.940983606557374, "grad_norm": 6.928326606750488, "learning_rate": 1.3956319220110579e-05, "loss": 0.63, "step": 11877 }, { "epoch": 38.94426229508197, "grad_norm": 7.37368106842041, "learning_rate": 1.3955343951071735e-05, "loss": 0.8584, "step": 11878 }, { "epoch": 38.94754098360656, "grad_norm": 7.36400032043457, "learning_rate": 1.3954368637432278e-05, "loss": 0.8049, "step": 11879 }, { "epoch": 38.950819672131146, "grad_norm": 7.655116081237793, "learning_rate": 1.3953393279203201e-05, "loss": 0.8594, "step": 11880 }, { "epoch": 38.954098360655735, "grad_norm": 7.745476722717285, "learning_rate": 1.3952417876395507e-05, "loss": 0.8002, "step": 11881 }, { "epoch": 38.95737704918033, "grad_norm": 7.8996453285217285, "learning_rate": 1.395144242902019e-05, "loss": 0.7753, "step": 11882 }, { "epoch": 38.96065573770492, "grad_norm": 5.89921760559082, "learning_rate": 1.3950466937088254e-05, "loss": 0.6214, "step": 11883 }, { "epoch": 38.96393442622951, "grad_norm": 7.9498066902160645, "learning_rate": 1.3949491400610691e-05, "loss": 0.7426, "step": 11884 }, { "epoch": 38.967213114754095, "grad_norm": 7.744011402130127, "learning_rate": 1.3948515819598509e-05, "loss": 0.7401, "step": 11885 }, { "epoch": 38.97049180327869, "grad_norm": 7.464309215545654, "learning_rate": 1.3947540194062705e-05, "loss": 0.9022, "step": 11886 }, { "epoch": 38.97377049180328, "grad_norm": 7.839404582977295, "learning_rate": 1.3946564524014281e-05, "loss": 0.7413, "step": 11887 }, { "epoch": 38.97704918032787, "grad_norm": 6.583295822143555, "learning_rate": 1.3945588809464239e-05, "loss": 1.084, "step": 11888 }, { "epoch": 38.980327868852456, "grad_norm": 8.36205768585205, "learning_rate": 1.3944613050423579e-05, "loss": 0.8942, "step": 11889 }, { "epoch": 38.98360655737705, "grad_norm": 10.444433212280273, "learning_rate": 1.3943637246903307e-05, "loss": 0.5707, "step": 11890 }, { "epoch": 38.98688524590164, "grad_norm": 11.797687530517578, "learning_rate": 1.3942661398914423e-05, "loss": 0.6418, "step": 11891 }, { "epoch": 38.99016393442623, "grad_norm": 9.298844337463379, "learning_rate": 1.3941685506467936e-05, "loss": 0.7245, "step": 11892 }, { "epoch": 38.993442622950816, "grad_norm": 6.832991600036621, "learning_rate": 1.3940709569574844e-05, "loss": 0.7788, "step": 11893 }, { "epoch": 38.99672131147541, "grad_norm": 6.560680389404297, "learning_rate": 1.3939733588246155e-05, "loss": 0.9255, "step": 11894 }, { "epoch": 39.0, "grad_norm": 5.062660217285156, "learning_rate": 1.3938757562492873e-05, "loss": 1.0215, "step": 11895 }, { "epoch": 39.00327868852459, "grad_norm": 6.72236442565918, "learning_rate": 1.3937781492326005e-05, "loss": 0.59, "step": 11896 }, { "epoch": 39.006557377049184, "grad_norm": 7.273407936096191, "learning_rate": 1.3936805377756554e-05, "loss": 0.7063, "step": 11897 }, { "epoch": 39.00983606557377, "grad_norm": 6.039341926574707, "learning_rate": 1.3935829218795533e-05, "loss": 0.6359, "step": 11898 }, { "epoch": 39.01311475409836, "grad_norm": 9.717732429504395, "learning_rate": 1.3934853015453942e-05, "loss": 0.6909, "step": 11899 }, { "epoch": 39.01639344262295, "grad_norm": 10.490508079528809, "learning_rate": 1.3933876767742796e-05, "loss": 0.8116, "step": 11900 }, { "epoch": 39.019672131147544, "grad_norm": 20.462541580200195, "learning_rate": 1.3932900475673095e-05, "loss": 0.741, "step": 11901 }, { "epoch": 39.02295081967213, "grad_norm": 10.193926811218262, "learning_rate": 1.3931924139255854e-05, "loss": 0.6268, "step": 11902 }, { "epoch": 39.02622950819672, "grad_norm": 8.118648529052734, "learning_rate": 1.3930947758502081e-05, "loss": 1.013, "step": 11903 }, { "epoch": 39.02950819672131, "grad_norm": 8.868206024169922, "learning_rate": 1.3929971333422783e-05, "loss": 0.7295, "step": 11904 }, { "epoch": 39.032786885245905, "grad_norm": 7.0718536376953125, "learning_rate": 1.3928994864028974e-05, "loss": 0.9295, "step": 11905 }, { "epoch": 39.03606557377049, "grad_norm": 7.566700458526611, "learning_rate": 1.3928018350331658e-05, "loss": 1.0031, "step": 11906 }, { "epoch": 39.03934426229508, "grad_norm": 6.560904026031494, "learning_rate": 1.3927041792341856e-05, "loss": 0.4747, "step": 11907 }, { "epoch": 39.04262295081967, "grad_norm": 6.678253173828125, "learning_rate": 1.3926065190070573e-05, "loss": 1.0091, "step": 11908 }, { "epoch": 39.045901639344265, "grad_norm": 5.947327613830566, "learning_rate": 1.3925088543528823e-05, "loss": 0.8322, "step": 11909 }, { "epoch": 39.049180327868854, "grad_norm": 10.547920227050781, "learning_rate": 1.3924111852727617e-05, "loss": 0.9188, "step": 11910 }, { "epoch": 39.05245901639344, "grad_norm": 6.3913984298706055, "learning_rate": 1.3923135117677974e-05, "loss": 0.9765, "step": 11911 }, { "epoch": 39.05573770491803, "grad_norm": 6.8037638664245605, "learning_rate": 1.3922158338390901e-05, "loss": 0.8501, "step": 11912 }, { "epoch": 39.059016393442626, "grad_norm": 8.074983596801758, "learning_rate": 1.3921181514877415e-05, "loss": 0.7626, "step": 11913 }, { "epoch": 39.062295081967214, "grad_norm": 5.703777313232422, "learning_rate": 1.392020464714853e-05, "loss": 1.0378, "step": 11914 }, { "epoch": 39.0655737704918, "grad_norm": 7.842596530914307, "learning_rate": 1.3919227735215262e-05, "loss": 0.8755, "step": 11915 }, { "epoch": 39.06885245901639, "grad_norm": 7.139363765716553, "learning_rate": 1.3918250779088625e-05, "loss": 0.7903, "step": 11916 }, { "epoch": 39.072131147540986, "grad_norm": 18.836502075195312, "learning_rate": 1.3917273778779636e-05, "loss": 0.609, "step": 11917 }, { "epoch": 39.075409836065575, "grad_norm": 6.425036907196045, "learning_rate": 1.3916296734299315e-05, "loss": 0.6371, "step": 11918 }, { "epoch": 39.07868852459016, "grad_norm": 7.941464900970459, "learning_rate": 1.3915319645658674e-05, "loss": 0.4968, "step": 11919 }, { "epoch": 39.08196721311475, "grad_norm": 7.170867443084717, "learning_rate": 1.3914342512868736e-05, "loss": 0.7365, "step": 11920 }, { "epoch": 39.08524590163935, "grad_norm": 5.650522708892822, "learning_rate": 1.3913365335940511e-05, "loss": 0.8826, "step": 11921 }, { "epoch": 39.088524590163935, "grad_norm": 7.481775760650635, "learning_rate": 1.3912388114885026e-05, "loss": 0.8852, "step": 11922 }, { "epoch": 39.09180327868852, "grad_norm": 6.846269607543945, "learning_rate": 1.3911410849713298e-05, "loss": 0.5974, "step": 11923 }, { "epoch": 39.09508196721311, "grad_norm": 10.790955543518066, "learning_rate": 1.3910433540436348e-05, "loss": 0.605, "step": 11924 }, { "epoch": 39.09836065573771, "grad_norm": 11.855162620544434, "learning_rate": 1.390945618706519e-05, "loss": 0.8083, "step": 11925 }, { "epoch": 39.101639344262296, "grad_norm": 7.379953384399414, "learning_rate": 1.3908478789610851e-05, "loss": 0.8499, "step": 11926 }, { "epoch": 39.104918032786884, "grad_norm": 7.085536479949951, "learning_rate": 1.3907501348084347e-05, "loss": 0.723, "step": 11927 }, { "epoch": 39.10819672131147, "grad_norm": 5.641020774841309, "learning_rate": 1.3906523862496705e-05, "loss": 0.8567, "step": 11928 }, { "epoch": 39.11147540983607, "grad_norm": 6.245382308959961, "learning_rate": 1.3905546332858946e-05, "loss": 0.8724, "step": 11929 }, { "epoch": 39.114754098360656, "grad_norm": 6.576574802398682, "learning_rate": 1.3904568759182088e-05, "loss": 0.757, "step": 11930 }, { "epoch": 39.118032786885244, "grad_norm": 7.626788139343262, "learning_rate": 1.390359114147716e-05, "loss": 0.9081, "step": 11931 }, { "epoch": 39.12131147540983, "grad_norm": 15.054801940917969, "learning_rate": 1.390261347975518e-05, "loss": 0.8403, "step": 11932 }, { "epoch": 39.12459016393443, "grad_norm": 7.159558296203613, "learning_rate": 1.390163577402718e-05, "loss": 0.9768, "step": 11933 }, { "epoch": 39.12786885245902, "grad_norm": 26.648530960083008, "learning_rate": 1.3900658024304176e-05, "loss": 0.8434, "step": 11934 }, { "epoch": 39.131147540983605, "grad_norm": 10.06126594543457, "learning_rate": 1.3899680230597199e-05, "loss": 0.8677, "step": 11935 }, { "epoch": 39.13442622950819, "grad_norm": 5.949366092681885, "learning_rate": 1.3898702392917271e-05, "loss": 0.4898, "step": 11936 }, { "epoch": 39.13770491803279, "grad_norm": 9.52975845336914, "learning_rate": 1.3897724511275423e-05, "loss": 0.8819, "step": 11937 }, { "epoch": 39.14098360655738, "grad_norm": 6.347777843475342, "learning_rate": 1.3896746585682674e-05, "loss": 0.9877, "step": 11938 }, { "epoch": 39.144262295081965, "grad_norm": 7.439732551574707, "learning_rate": 1.3895768616150061e-05, "loss": 0.8116, "step": 11939 }, { "epoch": 39.14754098360656, "grad_norm": 6.254082679748535, "learning_rate": 1.3894790602688601e-05, "loss": 0.7609, "step": 11940 }, { "epoch": 39.15081967213115, "grad_norm": 8.97293758392334, "learning_rate": 1.389381254530933e-05, "loss": 0.758, "step": 11941 }, { "epoch": 39.15409836065574, "grad_norm": 6.584181308746338, "learning_rate": 1.3892834444023275e-05, "loss": 0.7484, "step": 11942 }, { "epoch": 39.157377049180326, "grad_norm": 7.756588935852051, "learning_rate": 1.3891856298841462e-05, "loss": 0.6351, "step": 11943 }, { "epoch": 39.16065573770492, "grad_norm": 6.289510250091553, "learning_rate": 1.3890878109774924e-05, "loss": 0.5917, "step": 11944 }, { "epoch": 39.16393442622951, "grad_norm": 10.896533012390137, "learning_rate": 1.388989987683469e-05, "loss": 0.6159, "step": 11945 }, { "epoch": 39.1672131147541, "grad_norm": 5.943666934967041, "learning_rate": 1.3888921600031791e-05, "loss": 0.5731, "step": 11946 }, { "epoch": 39.170491803278686, "grad_norm": 7.02172327041626, "learning_rate": 1.3887943279377254e-05, "loss": 0.7618, "step": 11947 }, { "epoch": 39.17377049180328, "grad_norm": 7.135829448699951, "learning_rate": 1.388696491488212e-05, "loss": 0.7108, "step": 11948 }, { "epoch": 39.17704918032787, "grad_norm": 6.527624130249023, "learning_rate": 1.388598650655741e-05, "loss": 0.7277, "step": 11949 }, { "epoch": 39.18032786885246, "grad_norm": 5.95576286315918, "learning_rate": 1.3885008054414164e-05, "loss": 0.6595, "step": 11950 }, { "epoch": 39.18360655737705, "grad_norm": 6.518865585327148, "learning_rate": 1.388402955846341e-05, "loss": 0.9449, "step": 11951 }, { "epoch": 39.18688524590164, "grad_norm": 9.424179077148438, "learning_rate": 1.3883051018716188e-05, "loss": 0.9865, "step": 11952 }, { "epoch": 39.19016393442623, "grad_norm": 5.854234218597412, "learning_rate": 1.3882072435183526e-05, "loss": 1.0887, "step": 11953 }, { "epoch": 39.19344262295082, "grad_norm": 6.551154136657715, "learning_rate": 1.3881093807876465e-05, "loss": 0.5385, "step": 11954 }, { "epoch": 39.19672131147541, "grad_norm": 6.838539123535156, "learning_rate": 1.3880115136806032e-05, "loss": 1.0779, "step": 11955 }, { "epoch": 39.2, "grad_norm": 6.22255802154541, "learning_rate": 1.3879136421983265e-05, "loss": 0.7958, "step": 11956 }, { "epoch": 39.20327868852459, "grad_norm": 7.234159469604492, "learning_rate": 1.3878157663419207e-05, "loss": 0.7422, "step": 11957 }, { "epoch": 39.20655737704918, "grad_norm": 6.6661272048950195, "learning_rate": 1.3877178861124885e-05, "loss": 0.6597, "step": 11958 }, { "epoch": 39.20983606557377, "grad_norm": 6.6998090744018555, "learning_rate": 1.387620001511134e-05, "loss": 0.645, "step": 11959 }, { "epoch": 39.21311475409836, "grad_norm": 6.308470726013184, "learning_rate": 1.387522112538961e-05, "loss": 0.5095, "step": 11960 }, { "epoch": 39.21639344262295, "grad_norm": 7.369472980499268, "learning_rate": 1.3874242191970737e-05, "loss": 0.8969, "step": 11961 }, { "epoch": 39.21967213114754, "grad_norm": 7.191126346588135, "learning_rate": 1.3873263214865749e-05, "loss": 0.5311, "step": 11962 }, { "epoch": 39.22295081967213, "grad_norm": 6.418568134307861, "learning_rate": 1.3872284194085695e-05, "loss": 0.8853, "step": 11963 }, { "epoch": 39.226229508196724, "grad_norm": 8.469228744506836, "learning_rate": 1.3871305129641607e-05, "loss": 0.8781, "step": 11964 }, { "epoch": 39.22950819672131, "grad_norm": 6.905625343322754, "learning_rate": 1.3870326021544531e-05, "loss": 0.8732, "step": 11965 }, { "epoch": 39.2327868852459, "grad_norm": 20.842924118041992, "learning_rate": 1.3869346869805506e-05, "loss": 0.8971, "step": 11966 }, { "epoch": 39.23606557377049, "grad_norm": 6.029550075531006, "learning_rate": 1.386836767443557e-05, "loss": 0.8025, "step": 11967 }, { "epoch": 39.239344262295084, "grad_norm": 7.2836432456970215, "learning_rate": 1.3867388435445765e-05, "loss": 0.6793, "step": 11968 }, { "epoch": 39.24262295081967, "grad_norm": 5.433499813079834, "learning_rate": 1.3866409152847137e-05, "loss": 0.8864, "step": 11969 }, { "epoch": 39.24590163934426, "grad_norm": 5.638219356536865, "learning_rate": 1.3865429826650724e-05, "loss": 0.7162, "step": 11970 }, { "epoch": 39.24918032786885, "grad_norm": 7.038004398345947, "learning_rate": 1.3864450456867572e-05, "loss": 0.6769, "step": 11971 }, { "epoch": 39.252459016393445, "grad_norm": 5.717422962188721, "learning_rate": 1.3863471043508722e-05, "loss": 0.9311, "step": 11972 }, { "epoch": 39.25573770491803, "grad_norm": 8.85159969329834, "learning_rate": 1.386249158658522e-05, "loss": 1.0182, "step": 11973 }, { "epoch": 39.25901639344262, "grad_norm": 6.3065266609191895, "learning_rate": 1.386151208610811e-05, "loss": 0.7456, "step": 11974 }, { "epoch": 39.26229508196721, "grad_norm": 7.094346523284912, "learning_rate": 1.3860532542088435e-05, "loss": 0.865, "step": 11975 }, { "epoch": 39.265573770491805, "grad_norm": 6.65016508102417, "learning_rate": 1.3859552954537243e-05, "loss": 0.6986, "step": 11976 }, { "epoch": 39.268852459016394, "grad_norm": 7.602504253387451, "learning_rate": 1.3858573323465576e-05, "loss": 0.8298, "step": 11977 }, { "epoch": 39.27213114754098, "grad_norm": 6.872299671173096, "learning_rate": 1.3857593648884484e-05, "loss": 0.8633, "step": 11978 }, { "epoch": 39.27540983606557, "grad_norm": 5.3218584060668945, "learning_rate": 1.3856613930805013e-05, "loss": 0.7603, "step": 11979 }, { "epoch": 39.278688524590166, "grad_norm": 8.690678596496582, "learning_rate": 1.385563416923821e-05, "loss": 0.8132, "step": 11980 }, { "epoch": 39.281967213114754, "grad_norm": 9.39780330657959, "learning_rate": 1.3854654364195126e-05, "loss": 0.458, "step": 11981 }, { "epoch": 39.28524590163934, "grad_norm": 7.85296630859375, "learning_rate": 1.38536745156868e-05, "loss": 0.69, "step": 11982 }, { "epoch": 39.28852459016394, "grad_norm": 7.951887130737305, "learning_rate": 1.3852694623724292e-05, "loss": 0.522, "step": 11983 }, { "epoch": 39.291803278688526, "grad_norm": 10.980724334716797, "learning_rate": 1.3851714688318643e-05, "loss": 0.6377, "step": 11984 }, { "epoch": 39.295081967213115, "grad_norm": 5.95806360244751, "learning_rate": 1.3850734709480908e-05, "loss": 0.6581, "step": 11985 }, { "epoch": 39.2983606557377, "grad_norm": 5.468589782714844, "learning_rate": 1.3849754687222135e-05, "loss": 0.713, "step": 11986 }, { "epoch": 39.3016393442623, "grad_norm": 5.467265605926514, "learning_rate": 1.3848774621553376e-05, "loss": 0.5324, "step": 11987 }, { "epoch": 39.30491803278689, "grad_norm": 5.971362113952637, "learning_rate": 1.3847794512485679e-05, "loss": 0.851, "step": 11988 }, { "epoch": 39.308196721311475, "grad_norm": 8.897218704223633, "learning_rate": 1.3846814360030102e-05, "loss": 0.6333, "step": 11989 }, { "epoch": 39.31147540983606, "grad_norm": 6.064599514007568, "learning_rate": 1.384583416419769e-05, "loss": 0.5592, "step": 11990 }, { "epoch": 39.31475409836066, "grad_norm": 6.174157619476318, "learning_rate": 1.3844853924999501e-05, "loss": 0.7881, "step": 11991 }, { "epoch": 39.31803278688525, "grad_norm": 7.301353454589844, "learning_rate": 1.3843873642446585e-05, "loss": 1.0468, "step": 11992 }, { "epoch": 39.321311475409836, "grad_norm": 5.076292991638184, "learning_rate": 1.3842893316549999e-05, "loss": 1.038, "step": 11993 }, { "epoch": 39.324590163934424, "grad_norm": 5.383114814758301, "learning_rate": 1.3841912947320793e-05, "loss": 1.2032, "step": 11994 }, { "epoch": 39.32786885245902, "grad_norm": 5.641293048858643, "learning_rate": 1.3840932534770024e-05, "loss": 0.8329, "step": 11995 }, { "epoch": 39.33114754098361, "grad_norm": 6.434061050415039, "learning_rate": 1.3839952078908747e-05, "loss": 0.8854, "step": 11996 }, { "epoch": 39.334426229508196, "grad_norm": 6.401751518249512, "learning_rate": 1.383897157974802e-05, "loss": 0.6688, "step": 11997 }, { "epoch": 39.337704918032784, "grad_norm": 6.5351338386535645, "learning_rate": 1.3837991037298895e-05, "loss": 0.5518, "step": 11998 }, { "epoch": 39.34098360655738, "grad_norm": 9.001128196716309, "learning_rate": 1.383701045157243e-05, "loss": 0.8904, "step": 11999 }, { "epoch": 39.34426229508197, "grad_norm": 5.214675426483154, "learning_rate": 1.3836029822579686e-05, "loss": 0.7969, "step": 12000 }, { "epoch": 39.34754098360656, "grad_norm": 5.6400251388549805, "learning_rate": 1.3835049150331714e-05, "loss": 1.0209, "step": 12001 }, { "epoch": 39.350819672131145, "grad_norm": 6.892267227172852, "learning_rate": 1.3834068434839576e-05, "loss": 0.9001, "step": 12002 }, { "epoch": 39.35409836065574, "grad_norm": 7.1078033447265625, "learning_rate": 1.383308767611433e-05, "loss": 0.8263, "step": 12003 }, { "epoch": 39.35737704918033, "grad_norm": 6.3201518058776855, "learning_rate": 1.3832106874167037e-05, "loss": 0.6353, "step": 12004 }, { "epoch": 39.36065573770492, "grad_norm": 9.456876754760742, "learning_rate": 1.3831126029008754e-05, "loss": 0.7343, "step": 12005 }, { "epoch": 39.363934426229505, "grad_norm": 5.376890659332275, "learning_rate": 1.3830145140650539e-05, "loss": 0.9675, "step": 12006 }, { "epoch": 39.3672131147541, "grad_norm": 6.568940162658691, "learning_rate": 1.3829164209103459e-05, "loss": 0.8744, "step": 12007 }, { "epoch": 39.37049180327869, "grad_norm": 7.985073566436768, "learning_rate": 1.3828183234378568e-05, "loss": 0.8413, "step": 12008 }, { "epoch": 39.37377049180328, "grad_norm": 6.249086380004883, "learning_rate": 1.3827202216486933e-05, "loss": 0.7429, "step": 12009 }, { "epoch": 39.377049180327866, "grad_norm": 6.203634738922119, "learning_rate": 1.3826221155439611e-05, "loss": 0.8682, "step": 12010 }, { "epoch": 39.38032786885246, "grad_norm": 5.960893154144287, "learning_rate": 1.382524005124767e-05, "loss": 0.7551, "step": 12011 }, { "epoch": 39.38360655737705, "grad_norm": 11.63634204864502, "learning_rate": 1.3824258903922168e-05, "loss": 0.7122, "step": 12012 }, { "epoch": 39.38688524590164, "grad_norm": 7.021653652191162, "learning_rate": 1.3823277713474172e-05, "loss": 0.7281, "step": 12013 }, { "epoch": 39.390163934426226, "grad_norm": 6.054247856140137, "learning_rate": 1.3822296479914743e-05, "loss": 0.9956, "step": 12014 }, { "epoch": 39.39344262295082, "grad_norm": 6.732401371002197, "learning_rate": 1.382131520325495e-05, "loss": 0.8472, "step": 12015 }, { "epoch": 39.39672131147541, "grad_norm": 6.45262336730957, "learning_rate": 1.3820333883505851e-05, "loss": 0.7303, "step": 12016 }, { "epoch": 39.4, "grad_norm": 7.543644905090332, "learning_rate": 1.3819352520678519e-05, "loss": 0.8687, "step": 12017 }, { "epoch": 39.40327868852459, "grad_norm": 7.6536078453063965, "learning_rate": 1.3818371114784015e-05, "loss": 0.6946, "step": 12018 }, { "epoch": 39.40655737704918, "grad_norm": 6.133620262145996, "learning_rate": 1.3817389665833405e-05, "loss": 0.6196, "step": 12019 }, { "epoch": 39.40983606557377, "grad_norm": 6.980945587158203, "learning_rate": 1.3816408173837762e-05, "loss": 0.7682, "step": 12020 }, { "epoch": 39.41311475409836, "grad_norm": 6.527194499969482, "learning_rate": 1.3815426638808143e-05, "loss": 0.5404, "step": 12021 }, { "epoch": 39.41639344262295, "grad_norm": 6.39341926574707, "learning_rate": 1.3814445060755621e-05, "loss": 0.7196, "step": 12022 }, { "epoch": 39.41967213114754, "grad_norm": 10.983896255493164, "learning_rate": 1.381346343969127e-05, "loss": 0.7343, "step": 12023 }, { "epoch": 39.42295081967213, "grad_norm": 6.923686504364014, "learning_rate": 1.3812481775626152e-05, "loss": 0.855, "step": 12024 }, { "epoch": 39.42622950819672, "grad_norm": 6.205013275146484, "learning_rate": 1.3811500068571335e-05, "loss": 0.8522, "step": 12025 }, { "epoch": 39.429508196721315, "grad_norm": 6.345317840576172, "learning_rate": 1.3810518318537895e-05, "loss": 1.0024, "step": 12026 }, { "epoch": 39.4327868852459, "grad_norm": 5.5767412185668945, "learning_rate": 1.3809536525536897e-05, "loss": 0.7001, "step": 12027 }, { "epoch": 39.43606557377049, "grad_norm": 7.639876365661621, "learning_rate": 1.3808554689579417e-05, "loss": 0.9061, "step": 12028 }, { "epoch": 39.43934426229508, "grad_norm": 6.725412368774414, "learning_rate": 1.380757281067652e-05, "loss": 0.8055, "step": 12029 }, { "epoch": 39.442622950819676, "grad_norm": 7.444146633148193, "learning_rate": 1.3806590888839284e-05, "loss": 0.7214, "step": 12030 }, { "epoch": 39.445901639344264, "grad_norm": 6.7025580406188965, "learning_rate": 1.3805608924078775e-05, "loss": 0.875, "step": 12031 }, { "epoch": 39.44918032786885, "grad_norm": 7.930349826812744, "learning_rate": 1.3804626916406068e-05, "loss": 0.8225, "step": 12032 }, { "epoch": 39.45245901639344, "grad_norm": 5.786984920501709, "learning_rate": 1.380364486583224e-05, "loss": 0.7482, "step": 12033 }, { "epoch": 39.455737704918036, "grad_norm": 8.353476524353027, "learning_rate": 1.3802662772368359e-05, "loss": 0.7757, "step": 12034 }, { "epoch": 39.459016393442624, "grad_norm": 6.322610855102539, "learning_rate": 1.38016806360255e-05, "loss": 0.7281, "step": 12035 }, { "epoch": 39.46229508196721, "grad_norm": 6.072986602783203, "learning_rate": 1.380069845681474e-05, "loss": 0.5378, "step": 12036 }, { "epoch": 39.4655737704918, "grad_norm": 6.266613483428955, "learning_rate": 1.3799716234747156e-05, "loss": 0.7873, "step": 12037 }, { "epoch": 39.4688524590164, "grad_norm": 5.784861087799072, "learning_rate": 1.3798733969833818e-05, "loss": 0.7933, "step": 12038 }, { "epoch": 39.472131147540985, "grad_norm": 6.044412136077881, "learning_rate": 1.3797751662085805e-05, "loss": 0.9511, "step": 12039 }, { "epoch": 39.47540983606557, "grad_norm": 6.083718776702881, "learning_rate": 1.3796769311514193e-05, "loss": 0.7805, "step": 12040 }, { "epoch": 39.47868852459016, "grad_norm": 6.163713455200195, "learning_rate": 1.3795786918130061e-05, "loss": 0.7926, "step": 12041 }, { "epoch": 39.48196721311476, "grad_norm": 6.706792831420898, "learning_rate": 1.3794804481944484e-05, "loss": 0.8811, "step": 12042 }, { "epoch": 39.485245901639345, "grad_norm": 7.091008186340332, "learning_rate": 1.3793822002968542e-05, "loss": 0.9978, "step": 12043 }, { "epoch": 39.488524590163934, "grad_norm": 19.03205680847168, "learning_rate": 1.3792839481213312e-05, "loss": 0.5758, "step": 12044 }, { "epoch": 39.49180327868852, "grad_norm": 6.0994873046875, "learning_rate": 1.3791856916689871e-05, "loss": 0.9457, "step": 12045 }, { "epoch": 39.49508196721312, "grad_norm": 7.433351516723633, "learning_rate": 1.3790874309409304e-05, "loss": 0.8095, "step": 12046 }, { "epoch": 39.498360655737706, "grad_norm": 6.506320953369141, "learning_rate": 1.3789891659382683e-05, "loss": 0.802, "step": 12047 }, { "epoch": 39.501639344262294, "grad_norm": 6.499443054199219, "learning_rate": 1.3788908966621097e-05, "loss": 0.6032, "step": 12048 }, { "epoch": 39.50491803278688, "grad_norm": 5.549999237060547, "learning_rate": 1.3787926231135622e-05, "loss": 0.8535, "step": 12049 }, { "epoch": 39.50819672131148, "grad_norm": 7.389978885650635, "learning_rate": 1.3786943452937337e-05, "loss": 0.6147, "step": 12050 }, { "epoch": 39.511475409836066, "grad_norm": 6.600887775421143, "learning_rate": 1.3785960632037334e-05, "loss": 0.6231, "step": 12051 }, { "epoch": 39.514754098360655, "grad_norm": 6.305276870727539, "learning_rate": 1.3784977768446682e-05, "loss": 1.0184, "step": 12052 }, { "epoch": 39.51803278688524, "grad_norm": 8.881120681762695, "learning_rate": 1.3783994862176472e-05, "loss": 0.861, "step": 12053 }, { "epoch": 39.52131147540984, "grad_norm": 8.171233177185059, "learning_rate": 1.3783011913237788e-05, "loss": 0.7808, "step": 12054 }, { "epoch": 39.52459016393443, "grad_norm": 7.921905040740967, "learning_rate": 1.3782028921641708e-05, "loss": 0.8401, "step": 12055 }, { "epoch": 39.527868852459015, "grad_norm": 7.835484027862549, "learning_rate": 1.3781045887399323e-05, "loss": 0.6974, "step": 12056 }, { "epoch": 39.5311475409836, "grad_norm": 6.971430778503418, "learning_rate": 1.3780062810521716e-05, "loss": 0.8949, "step": 12057 }, { "epoch": 39.5344262295082, "grad_norm": 6.578360557556152, "learning_rate": 1.3779079691019965e-05, "loss": 0.8802, "step": 12058 }, { "epoch": 39.53770491803279, "grad_norm": 7.279572010040283, "learning_rate": 1.3778096528905164e-05, "loss": 0.745, "step": 12059 }, { "epoch": 39.540983606557376, "grad_norm": 6.934145450592041, "learning_rate": 1.3777113324188394e-05, "loss": 0.7597, "step": 12060 }, { "epoch": 39.544262295081964, "grad_norm": 8.110054969787598, "learning_rate": 1.3776130076880748e-05, "loss": 0.6952, "step": 12061 }, { "epoch": 39.54754098360656, "grad_norm": 9.146888732910156, "learning_rate": 1.3775146786993306e-05, "loss": 0.5334, "step": 12062 }, { "epoch": 39.55081967213115, "grad_norm": 7.822032451629639, "learning_rate": 1.3774163454537161e-05, "loss": 0.8113, "step": 12063 }, { "epoch": 39.554098360655736, "grad_norm": 8.249215126037598, "learning_rate": 1.3773180079523396e-05, "loss": 0.6917, "step": 12064 }, { "epoch": 39.557377049180324, "grad_norm": 7.916392803192139, "learning_rate": 1.3772196661963105e-05, "loss": 1.0798, "step": 12065 }, { "epoch": 39.56065573770492, "grad_norm": 6.877768516540527, "learning_rate": 1.3771213201867372e-05, "loss": 0.8825, "step": 12066 }, { "epoch": 39.56393442622951, "grad_norm": 8.242460250854492, "learning_rate": 1.3770229699247292e-05, "loss": 0.7689, "step": 12067 }, { "epoch": 39.5672131147541, "grad_norm": 8.671026229858398, "learning_rate": 1.3769246154113951e-05, "loss": 0.7616, "step": 12068 }, { "epoch": 39.570491803278685, "grad_norm": 5.8182783126831055, "learning_rate": 1.3768262566478443e-05, "loss": 0.8539, "step": 12069 }, { "epoch": 39.57377049180328, "grad_norm": 6.791013717651367, "learning_rate": 1.3767278936351853e-05, "loss": 1.0403, "step": 12070 }, { "epoch": 39.57704918032787, "grad_norm": 5.618821144104004, "learning_rate": 1.3766295263745277e-05, "loss": 0.8025, "step": 12071 }, { "epoch": 39.58032786885246, "grad_norm": 13.76158332824707, "learning_rate": 1.3765311548669807e-05, "loss": 0.733, "step": 12072 }, { "epoch": 39.58360655737705, "grad_norm": 8.553535461425781, "learning_rate": 1.3764327791136532e-05, "loss": 0.799, "step": 12073 }, { "epoch": 39.58688524590164, "grad_norm": 7.043129920959473, "learning_rate": 1.376334399115655e-05, "loss": 0.9348, "step": 12074 }, { "epoch": 39.59016393442623, "grad_norm": 7.987471580505371, "learning_rate": 1.376236014874095e-05, "loss": 0.5825, "step": 12075 }, { "epoch": 39.59344262295082, "grad_norm": 6.672502517700195, "learning_rate": 1.3761376263900826e-05, "loss": 0.9352, "step": 12076 }, { "epoch": 39.59672131147541, "grad_norm": 7.08888053894043, "learning_rate": 1.3760392336647278e-05, "loss": 0.8992, "step": 12077 }, { "epoch": 39.6, "grad_norm": 6.518970012664795, "learning_rate": 1.3759408366991391e-05, "loss": 0.6529, "step": 12078 }, { "epoch": 39.60327868852459, "grad_norm": 8.558029174804688, "learning_rate": 1.375842435494427e-05, "loss": 0.7008, "step": 12079 }, { "epoch": 39.60655737704918, "grad_norm": 7.4590325355529785, "learning_rate": 1.3757440300517004e-05, "loss": 0.8008, "step": 12080 }, { "epoch": 39.609836065573774, "grad_norm": 14.249942779541016, "learning_rate": 1.3756456203720695e-05, "loss": 0.8739, "step": 12081 }, { "epoch": 39.61311475409836, "grad_norm": 6.656235218048096, "learning_rate": 1.3755472064566436e-05, "loss": 0.6605, "step": 12082 }, { "epoch": 39.61639344262295, "grad_norm": 6.623065948486328, "learning_rate": 1.3754487883065324e-05, "loss": 1.015, "step": 12083 }, { "epoch": 39.61967213114754, "grad_norm": 7.469566822052002, "learning_rate": 1.3753503659228456e-05, "loss": 1.0594, "step": 12084 }, { "epoch": 39.622950819672134, "grad_norm": 5.884705543518066, "learning_rate": 1.3752519393066935e-05, "loss": 0.593, "step": 12085 }, { "epoch": 39.62622950819672, "grad_norm": 6.64385986328125, "learning_rate": 1.3751535084591852e-05, "loss": 0.6857, "step": 12086 }, { "epoch": 39.62950819672131, "grad_norm": 7.1430487632751465, "learning_rate": 1.3750550733814312e-05, "loss": 0.8514, "step": 12087 }, { "epoch": 39.6327868852459, "grad_norm": 7.055837154388428, "learning_rate": 1.3749566340745413e-05, "loss": 0.7363, "step": 12088 }, { "epoch": 39.636065573770495, "grad_norm": 7.874588966369629, "learning_rate": 1.3748581905396257e-05, "loss": 0.7647, "step": 12089 }, { "epoch": 39.63934426229508, "grad_norm": 6.254632472991943, "learning_rate": 1.3747597427777937e-05, "loss": 0.9391, "step": 12090 }, { "epoch": 39.64262295081967, "grad_norm": 6.994525909423828, "learning_rate": 1.3746612907901564e-05, "loss": 0.7801, "step": 12091 }, { "epoch": 39.64590163934426, "grad_norm": 7.5679450035095215, "learning_rate": 1.3745628345778235e-05, "loss": 0.9094, "step": 12092 }, { "epoch": 39.649180327868855, "grad_norm": 6.463565349578857, "learning_rate": 1.374464374141905e-05, "loss": 0.611, "step": 12093 }, { "epoch": 39.65245901639344, "grad_norm": 7.193966388702393, "learning_rate": 1.3743659094835113e-05, "loss": 0.6631, "step": 12094 }, { "epoch": 39.65573770491803, "grad_norm": 7.127033233642578, "learning_rate": 1.374267440603753e-05, "loss": 0.9858, "step": 12095 }, { "epoch": 39.65901639344262, "grad_norm": 6.983008861541748, "learning_rate": 1.3741689675037402e-05, "loss": 0.59, "step": 12096 }, { "epoch": 39.662295081967216, "grad_norm": 10.056299209594727, "learning_rate": 1.374070490184583e-05, "loss": 0.8279, "step": 12097 }, { "epoch": 39.665573770491804, "grad_norm": 6.366668701171875, "learning_rate": 1.3739720086473922e-05, "loss": 0.8839, "step": 12098 }, { "epoch": 39.66885245901639, "grad_norm": 7.5024285316467285, "learning_rate": 1.373873522893278e-05, "loss": 0.6995, "step": 12099 }, { "epoch": 39.67213114754098, "grad_norm": 6.047818183898926, "learning_rate": 1.3737750329233515e-05, "loss": 0.5997, "step": 12100 }, { "epoch": 39.675409836065576, "grad_norm": 12.347843170166016, "learning_rate": 1.3736765387387223e-05, "loss": 0.8461, "step": 12101 }, { "epoch": 39.678688524590164, "grad_norm": 10.62542724609375, "learning_rate": 1.3735780403405022e-05, "loss": 0.7791, "step": 12102 }, { "epoch": 39.68196721311475, "grad_norm": 5.8037004470825195, "learning_rate": 1.3734795377298006e-05, "loss": 0.8411, "step": 12103 }, { "epoch": 39.68524590163934, "grad_norm": 6.684274196624756, "learning_rate": 1.3733810309077294e-05, "loss": 0.6159, "step": 12104 }, { "epoch": 39.68852459016394, "grad_norm": 8.318302154541016, "learning_rate": 1.3732825198753987e-05, "loss": 0.8309, "step": 12105 }, { "epoch": 39.691803278688525, "grad_norm": 6.612960338592529, "learning_rate": 1.3731840046339193e-05, "loss": 0.9543, "step": 12106 }, { "epoch": 39.69508196721311, "grad_norm": 7.516500473022461, "learning_rate": 1.3730854851844025e-05, "loss": 0.5814, "step": 12107 }, { "epoch": 39.6983606557377, "grad_norm": 12.865793228149414, "learning_rate": 1.372986961527959e-05, "loss": 0.8361, "step": 12108 }, { "epoch": 39.7016393442623, "grad_norm": 19.599529266357422, "learning_rate": 1.3728884336656995e-05, "loss": 0.7946, "step": 12109 }, { "epoch": 39.704918032786885, "grad_norm": 6.947085857391357, "learning_rate": 1.3727899015987352e-05, "loss": 0.8416, "step": 12110 }, { "epoch": 39.708196721311474, "grad_norm": 5.550378322601318, "learning_rate": 1.3726913653281773e-05, "loss": 0.7452, "step": 12111 }, { "epoch": 39.71147540983607, "grad_norm": 7.756575584411621, "learning_rate": 1.3725928248551366e-05, "loss": 0.8433, "step": 12112 }, { "epoch": 39.71475409836066, "grad_norm": 7.104249477386475, "learning_rate": 1.3724942801807246e-05, "loss": 0.6981, "step": 12113 }, { "epoch": 39.718032786885246, "grad_norm": 6.252318859100342, "learning_rate": 1.3723957313060521e-05, "loss": 0.7292, "step": 12114 }, { "epoch": 39.721311475409834, "grad_norm": 15.311223030090332, "learning_rate": 1.3722971782322308e-05, "loss": 0.7082, "step": 12115 }, { "epoch": 39.72459016393443, "grad_norm": 7.678683757781982, "learning_rate": 1.3721986209603712e-05, "loss": 0.802, "step": 12116 }, { "epoch": 39.72786885245902, "grad_norm": 210.17196655273438, "learning_rate": 1.3721000594915857e-05, "loss": 0.7714, "step": 12117 }, { "epoch": 39.731147540983606, "grad_norm": 8.290760040283203, "learning_rate": 1.3720014938269848e-05, "loss": 0.801, "step": 12118 }, { "epoch": 39.734426229508195, "grad_norm": 6.752884864807129, "learning_rate": 1.3719029239676807e-05, "loss": 0.9364, "step": 12119 }, { "epoch": 39.73770491803279, "grad_norm": 7.483573913574219, "learning_rate": 1.371804349914784e-05, "loss": 0.9483, "step": 12120 }, { "epoch": 39.74098360655738, "grad_norm": 8.262362480163574, "learning_rate": 1.3717057716694069e-05, "loss": 0.9059, "step": 12121 }, { "epoch": 39.74426229508197, "grad_norm": 9.008947372436523, "learning_rate": 1.3716071892326607e-05, "loss": 0.6411, "step": 12122 }, { "epoch": 39.747540983606555, "grad_norm": 7.377036094665527, "learning_rate": 1.371508602605657e-05, "loss": 0.61, "step": 12123 }, { "epoch": 39.75081967213115, "grad_norm": 7.8012375831604, "learning_rate": 1.3714100117895077e-05, "loss": 0.6148, "step": 12124 }, { "epoch": 39.75409836065574, "grad_norm": 8.528889656066895, "learning_rate": 1.371311416785324e-05, "loss": 0.8303, "step": 12125 }, { "epoch": 39.75737704918033, "grad_norm": 8.548812866210938, "learning_rate": 1.3712128175942186e-05, "loss": 0.7377, "step": 12126 }, { "epoch": 39.760655737704916, "grad_norm": 14.023489952087402, "learning_rate": 1.3711142142173021e-05, "loss": 0.8129, "step": 12127 }, { "epoch": 39.76393442622951, "grad_norm": 64.12960815429688, "learning_rate": 1.3710156066556875e-05, "loss": 0.6577, "step": 12128 }, { "epoch": 39.7672131147541, "grad_norm": 9.128954887390137, "learning_rate": 1.3709169949104857e-05, "loss": 0.8859, "step": 12129 }, { "epoch": 39.77049180327869, "grad_norm": 8.374517440795898, "learning_rate": 1.3708183789828093e-05, "loss": 0.9918, "step": 12130 }, { "epoch": 39.773770491803276, "grad_norm": 17.16236686706543, "learning_rate": 1.3707197588737702e-05, "loss": 0.6349, "step": 12131 }, { "epoch": 39.77704918032787, "grad_norm": 8.032076835632324, "learning_rate": 1.3706211345844802e-05, "loss": 0.8375, "step": 12132 }, { "epoch": 39.78032786885246, "grad_norm": 9.16000747680664, "learning_rate": 1.3705225061160516e-05, "loss": 0.8017, "step": 12133 }, { "epoch": 39.78360655737705, "grad_norm": 7.382822513580322, "learning_rate": 1.3704238734695966e-05, "loss": 0.8905, "step": 12134 }, { "epoch": 39.78688524590164, "grad_norm": 13.40578556060791, "learning_rate": 1.3703252366462274e-05, "loss": 0.6885, "step": 12135 }, { "epoch": 39.79016393442623, "grad_norm": 10.040979385375977, "learning_rate": 1.3702265956470558e-05, "loss": 0.8306, "step": 12136 }, { "epoch": 39.79344262295082, "grad_norm": 17.962400436401367, "learning_rate": 1.3701279504731946e-05, "loss": 0.8304, "step": 12137 }, { "epoch": 39.79672131147541, "grad_norm": 31.971729278564453, "learning_rate": 1.3700293011257557e-05, "loss": 1.1182, "step": 12138 }, { "epoch": 39.8, "grad_norm": 13.367081642150879, "learning_rate": 1.3699306476058523e-05, "loss": 0.7748, "step": 12139 }, { "epoch": 39.80327868852459, "grad_norm": 9.017455101013184, "learning_rate": 1.3698319899145956e-05, "loss": 0.9126, "step": 12140 }, { "epoch": 39.80655737704918, "grad_norm": 9.231613159179688, "learning_rate": 1.3697333280530989e-05, "loss": 0.8512, "step": 12141 }, { "epoch": 39.80983606557377, "grad_norm": 9.57387638092041, "learning_rate": 1.3696346620224743e-05, "loss": 0.847, "step": 12142 }, { "epoch": 39.81311475409836, "grad_norm": 13.879741668701172, "learning_rate": 1.3695359918238349e-05, "loss": 0.9581, "step": 12143 }, { "epoch": 39.81639344262295, "grad_norm": 8.82685375213623, "learning_rate": 1.3694373174582926e-05, "loss": 0.7845, "step": 12144 }, { "epoch": 39.81967213114754, "grad_norm": 8.141766548156738, "learning_rate": 1.369338638926961e-05, "loss": 0.925, "step": 12145 }, { "epoch": 39.82295081967213, "grad_norm": 8.765380859375, "learning_rate": 1.3692399562309517e-05, "loss": 0.6672, "step": 12146 }, { "epoch": 39.82622950819672, "grad_norm": 7.9454345703125, "learning_rate": 1.3691412693713782e-05, "loss": 0.4978, "step": 12147 }, { "epoch": 39.829508196721314, "grad_norm": 8.752057075500488, "learning_rate": 1.3690425783493533e-05, "loss": 0.7069, "step": 12148 }, { "epoch": 39.8327868852459, "grad_norm": 17.654064178466797, "learning_rate": 1.3689438831659891e-05, "loss": 0.7972, "step": 12149 }, { "epoch": 39.83606557377049, "grad_norm": 7.156238555908203, "learning_rate": 1.3688451838223995e-05, "loss": 0.6028, "step": 12150 }, { "epoch": 39.83934426229508, "grad_norm": 7.719756603240967, "learning_rate": 1.3687464803196963e-05, "loss": 0.9556, "step": 12151 }, { "epoch": 39.842622950819674, "grad_norm": 7.003945350646973, "learning_rate": 1.368647772658994e-05, "loss": 0.8926, "step": 12152 }, { "epoch": 39.84590163934426, "grad_norm": 7.573827743530273, "learning_rate": 1.3685490608414041e-05, "loss": 0.791, "step": 12153 }, { "epoch": 39.84918032786885, "grad_norm": 8.409744262695312, "learning_rate": 1.3684503448680407e-05, "loss": 0.6404, "step": 12154 }, { "epoch": 39.85245901639344, "grad_norm": 7.103931903839111, "learning_rate": 1.3683516247400164e-05, "loss": 0.6927, "step": 12155 }, { "epoch": 39.855737704918035, "grad_norm": 5.724786281585693, "learning_rate": 1.3682529004584445e-05, "loss": 0.8078, "step": 12156 }, { "epoch": 39.85901639344262, "grad_norm": 25.668039321899414, "learning_rate": 1.3681541720244382e-05, "loss": 0.6374, "step": 12157 }, { "epoch": 39.86229508196721, "grad_norm": 35.7377815246582, "learning_rate": 1.368055439439111e-05, "loss": 0.7567, "step": 12158 }, { "epoch": 39.86557377049181, "grad_norm": 8.178922653198242, "learning_rate": 1.367956702703576e-05, "loss": 0.7829, "step": 12159 }, { "epoch": 39.868852459016395, "grad_norm": 8.546087265014648, "learning_rate": 1.3678579618189465e-05, "loss": 0.8339, "step": 12160 }, { "epoch": 39.87213114754098, "grad_norm": 12.730707168579102, "learning_rate": 1.367759216786336e-05, "loss": 0.5296, "step": 12161 }, { "epoch": 39.87540983606557, "grad_norm": 8.677217483520508, "learning_rate": 1.3676604676068581e-05, "loss": 0.6277, "step": 12162 }, { "epoch": 39.87868852459017, "grad_norm": 6.627484321594238, "learning_rate": 1.3675617142816262e-05, "loss": 0.711, "step": 12163 }, { "epoch": 39.881967213114756, "grad_norm": 8.432112693786621, "learning_rate": 1.3674629568117536e-05, "loss": 0.814, "step": 12164 }, { "epoch": 39.885245901639344, "grad_norm": 13.196627616882324, "learning_rate": 1.3673641951983543e-05, "loss": 0.8099, "step": 12165 }, { "epoch": 39.88852459016393, "grad_norm": 6.971263885498047, "learning_rate": 1.3672654294425416e-05, "loss": 0.7123, "step": 12166 }, { "epoch": 39.89180327868853, "grad_norm": 9.926105499267578, "learning_rate": 1.3671666595454296e-05, "loss": 0.91, "step": 12167 }, { "epoch": 39.895081967213116, "grad_norm": 8.943208694458008, "learning_rate": 1.3670678855081315e-05, "loss": 0.6171, "step": 12168 }, { "epoch": 39.898360655737704, "grad_norm": 9.127135276794434, "learning_rate": 1.3669691073317613e-05, "loss": 0.7244, "step": 12169 }, { "epoch": 39.90163934426229, "grad_norm": 7.934724807739258, "learning_rate": 1.3668703250174327e-05, "loss": 0.8468, "step": 12170 }, { "epoch": 39.90491803278689, "grad_norm": 6.648648738861084, "learning_rate": 1.36677153856626e-05, "loss": 0.8312, "step": 12171 }, { "epoch": 39.90819672131148, "grad_norm": 10.519739151000977, "learning_rate": 1.3666727479793571e-05, "loss": 0.808, "step": 12172 }, { "epoch": 39.911475409836065, "grad_norm": 7.42850923538208, "learning_rate": 1.3665739532578373e-05, "loss": 0.7995, "step": 12173 }, { "epoch": 39.91475409836065, "grad_norm": 6.31751823425293, "learning_rate": 1.3664751544028152e-05, "loss": 0.7403, "step": 12174 }, { "epoch": 39.91803278688525, "grad_norm": 8.661209106445312, "learning_rate": 1.3663763514154047e-05, "loss": 0.8965, "step": 12175 }, { "epoch": 39.92131147540984, "grad_norm": 9.245556831359863, "learning_rate": 1.36627754429672e-05, "loss": 0.812, "step": 12176 }, { "epoch": 39.924590163934425, "grad_norm": 6.286188125610352, "learning_rate": 1.3661787330478749e-05, "loss": 0.8825, "step": 12177 }, { "epoch": 39.927868852459014, "grad_norm": 8.791657447814941, "learning_rate": 1.366079917669984e-05, "loss": 0.7599, "step": 12178 }, { "epoch": 39.93114754098361, "grad_norm": 7.269491195678711, "learning_rate": 1.3659810981641612e-05, "loss": 0.8538, "step": 12179 }, { "epoch": 39.9344262295082, "grad_norm": 6.668618679046631, "learning_rate": 1.3658822745315213e-05, "loss": 0.7962, "step": 12180 }, { "epoch": 39.937704918032786, "grad_norm": 6.806713104248047, "learning_rate": 1.3657834467731781e-05, "loss": 0.8572, "step": 12181 }, { "epoch": 39.940983606557374, "grad_norm": 12.273054122924805, "learning_rate": 1.3656846148902465e-05, "loss": 0.7991, "step": 12182 }, { "epoch": 39.94426229508197, "grad_norm": 11.190539360046387, "learning_rate": 1.3655857788838404e-05, "loss": 0.7507, "step": 12183 }, { "epoch": 39.94754098360656, "grad_norm": 17.27566909790039, "learning_rate": 1.3654869387550747e-05, "loss": 0.9811, "step": 12184 }, { "epoch": 39.950819672131146, "grad_norm": 6.995475769042969, "learning_rate": 1.3653880945050634e-05, "loss": 0.9117, "step": 12185 }, { "epoch": 39.954098360655735, "grad_norm": 10.438260078430176, "learning_rate": 1.3652892461349216e-05, "loss": 0.6278, "step": 12186 }, { "epoch": 39.95737704918033, "grad_norm": 10.437211036682129, "learning_rate": 1.365190393645764e-05, "loss": 0.7213, "step": 12187 }, { "epoch": 39.96065573770492, "grad_norm": 7.447690963745117, "learning_rate": 1.3650915370387048e-05, "loss": 0.7986, "step": 12188 }, { "epoch": 39.96393442622951, "grad_norm": 12.690519332885742, "learning_rate": 1.3649926763148588e-05, "loss": 0.9177, "step": 12189 }, { "epoch": 39.967213114754095, "grad_norm": 7.455809593200684, "learning_rate": 1.3648938114753411e-05, "loss": 0.8132, "step": 12190 }, { "epoch": 39.97049180327869, "grad_norm": 8.200352668762207, "learning_rate": 1.3647949425212663e-05, "loss": 0.6756, "step": 12191 }, { "epoch": 39.97377049180328, "grad_norm": 28.09374237060547, "learning_rate": 1.364696069453749e-05, "loss": 0.5462, "step": 12192 }, { "epoch": 39.97704918032787, "grad_norm": 7.588699817657471, "learning_rate": 1.3645971922739048e-05, "loss": 0.8899, "step": 12193 }, { "epoch": 39.980327868852456, "grad_norm": 7.3318328857421875, "learning_rate": 1.3644983109828476e-05, "loss": 0.6163, "step": 12194 }, { "epoch": 39.98360655737705, "grad_norm": 8.697731018066406, "learning_rate": 1.3643994255816932e-05, "loss": 0.6934, "step": 12195 }, { "epoch": 39.98688524590164, "grad_norm": 9.416088104248047, "learning_rate": 1.3643005360715564e-05, "loss": 0.8383, "step": 12196 }, { "epoch": 39.99016393442623, "grad_norm": 7.2023396492004395, "learning_rate": 1.3642016424535525e-05, "loss": 0.859, "step": 12197 }, { "epoch": 39.993442622950816, "grad_norm": 7.353969573974609, "learning_rate": 1.3641027447287963e-05, "loss": 0.4719, "step": 12198 }, { "epoch": 39.99672131147541, "grad_norm": 8.508024215698242, "learning_rate": 1.3640038428984028e-05, "loss": 0.7255, "step": 12199 }, { "epoch": 40.0, "grad_norm": 6.497402667999268, "learning_rate": 1.3639049369634878e-05, "loss": 0.5323, "step": 12200 }, { "epoch": 40.00327868852459, "grad_norm": 8.60344123840332, "learning_rate": 1.363806026925166e-05, "loss": 0.7267, "step": 12201 }, { "epoch": 40.006557377049184, "grad_norm": 7.138844966888428, "learning_rate": 1.3637071127845531e-05, "loss": 0.7945, "step": 12202 }, { "epoch": 40.00983606557377, "grad_norm": 7.086264610290527, "learning_rate": 1.3636081945427643e-05, "loss": 0.9341, "step": 12203 }, { "epoch": 40.01311475409836, "grad_norm": 7.699326992034912, "learning_rate": 1.3635092722009154e-05, "loss": 0.6402, "step": 12204 }, { "epoch": 40.01639344262295, "grad_norm": 6.548423767089844, "learning_rate": 1.363410345760121e-05, "loss": 0.6507, "step": 12205 }, { "epoch": 40.019672131147544, "grad_norm": 7.24653959274292, "learning_rate": 1.3633114152214975e-05, "loss": 0.6231, "step": 12206 }, { "epoch": 40.02295081967213, "grad_norm": 5.984201431274414, "learning_rate": 1.3632124805861598e-05, "loss": 0.7618, "step": 12207 }, { "epoch": 40.02622950819672, "grad_norm": 6.157212734222412, "learning_rate": 1.3631135418552237e-05, "loss": 0.8006, "step": 12208 }, { "epoch": 40.02950819672131, "grad_norm": 6.0841217041015625, "learning_rate": 1.363014599029805e-05, "loss": 0.5262, "step": 12209 }, { "epoch": 40.032786885245905, "grad_norm": 7.646154403686523, "learning_rate": 1.3629156521110191e-05, "loss": 0.6475, "step": 12210 }, { "epoch": 40.03606557377049, "grad_norm": 5.887955188751221, "learning_rate": 1.362816701099982e-05, "loss": 0.8866, "step": 12211 }, { "epoch": 40.03934426229508, "grad_norm": 8.153398513793945, "learning_rate": 1.3627177459978095e-05, "loss": 0.8197, "step": 12212 }, { "epoch": 40.04262295081967, "grad_norm": 9.072991371154785, "learning_rate": 1.362618786805617e-05, "loss": 0.9576, "step": 12213 }, { "epoch": 40.045901639344265, "grad_norm": 13.27306079864502, "learning_rate": 1.3625198235245208e-05, "loss": 0.6611, "step": 12214 }, { "epoch": 40.049180327868854, "grad_norm": 7.833564758300781, "learning_rate": 1.3624208561556367e-05, "loss": 0.7429, "step": 12215 }, { "epoch": 40.05245901639344, "grad_norm": 7.374127388000488, "learning_rate": 1.3623218847000805e-05, "loss": 0.983, "step": 12216 }, { "epoch": 40.05573770491803, "grad_norm": 6.350316047668457, "learning_rate": 1.3622229091589685e-05, "loss": 0.4366, "step": 12217 }, { "epoch": 40.059016393442626, "grad_norm": 7.6677350997924805, "learning_rate": 1.3621239295334166e-05, "loss": 0.7829, "step": 12218 }, { "epoch": 40.062295081967214, "grad_norm": 6.794818878173828, "learning_rate": 1.3620249458245407e-05, "loss": 0.7689, "step": 12219 }, { "epoch": 40.0655737704918, "grad_norm": 6.372815132141113, "learning_rate": 1.3619259580334571e-05, "loss": 0.5911, "step": 12220 }, { "epoch": 40.06885245901639, "grad_norm": 7.562721252441406, "learning_rate": 1.3618269661612822e-05, "loss": 0.7487, "step": 12221 }, { "epoch": 40.072131147540986, "grad_norm": 9.780732154846191, "learning_rate": 1.3617279702091319e-05, "loss": 0.8729, "step": 12222 }, { "epoch": 40.075409836065575, "grad_norm": 6.572467803955078, "learning_rate": 1.3616289701781229e-05, "loss": 0.748, "step": 12223 }, { "epoch": 40.07868852459016, "grad_norm": 7.782295227050781, "learning_rate": 1.361529966069371e-05, "loss": 0.6338, "step": 12224 }, { "epoch": 40.08196721311475, "grad_norm": 7.019989490509033, "learning_rate": 1.3614309578839928e-05, "loss": 0.6751, "step": 12225 }, { "epoch": 40.08524590163935, "grad_norm": 6.886468887329102, "learning_rate": 1.361331945623105e-05, "loss": 0.6284, "step": 12226 }, { "epoch": 40.088524590163935, "grad_norm": 6.387851238250732, "learning_rate": 1.3612329292878234e-05, "loss": 0.83, "step": 12227 }, { "epoch": 40.09180327868852, "grad_norm": 7.6995930671691895, "learning_rate": 1.3611339088792654e-05, "loss": 0.6957, "step": 12228 }, { "epoch": 40.09508196721311, "grad_norm": 6.932914733886719, "learning_rate": 1.361034884398547e-05, "loss": 0.6145, "step": 12229 }, { "epoch": 40.09836065573771, "grad_norm": 7.5584845542907715, "learning_rate": 1.3609358558467847e-05, "loss": 0.8309, "step": 12230 }, { "epoch": 40.101639344262296, "grad_norm": 8.86400032043457, "learning_rate": 1.3608368232250955e-05, "loss": 0.8607, "step": 12231 }, { "epoch": 40.104918032786884, "grad_norm": 7.156610488891602, "learning_rate": 1.3607377865345959e-05, "loss": 0.9009, "step": 12232 }, { "epoch": 40.10819672131147, "grad_norm": 6.525928497314453, "learning_rate": 1.3606387457764025e-05, "loss": 0.7208, "step": 12233 }, { "epoch": 40.11147540983607, "grad_norm": 6.54276180267334, "learning_rate": 1.3605397009516326e-05, "loss": 0.7133, "step": 12234 }, { "epoch": 40.114754098360656, "grad_norm": 6.846290111541748, "learning_rate": 1.3604406520614025e-05, "loss": 0.6141, "step": 12235 }, { "epoch": 40.118032786885244, "grad_norm": 5.048425674438477, "learning_rate": 1.3603415991068295e-05, "loss": 0.8429, "step": 12236 }, { "epoch": 40.12131147540983, "grad_norm": 5.492076396942139, "learning_rate": 1.36024254208903e-05, "loss": 0.3674, "step": 12237 }, { "epoch": 40.12459016393443, "grad_norm": 13.495416641235352, "learning_rate": 1.3601434810091214e-05, "loss": 0.6307, "step": 12238 }, { "epoch": 40.12786885245902, "grad_norm": 14.738722801208496, "learning_rate": 1.3600444158682207e-05, "loss": 0.5884, "step": 12239 }, { "epoch": 40.131147540983605, "grad_norm": 5.997570037841797, "learning_rate": 1.3599453466674446e-05, "loss": 0.622, "step": 12240 }, { "epoch": 40.13442622950819, "grad_norm": 6.639709949493408, "learning_rate": 1.3598462734079108e-05, "loss": 0.7668, "step": 12241 }, { "epoch": 40.13770491803279, "grad_norm": 9.050456047058105, "learning_rate": 1.3597471960907358e-05, "loss": 0.6952, "step": 12242 }, { "epoch": 40.14098360655738, "grad_norm": 6.518143653869629, "learning_rate": 1.3596481147170373e-05, "loss": 0.7819, "step": 12243 }, { "epoch": 40.144262295081965, "grad_norm": 5.7742109298706055, "learning_rate": 1.3595490292879322e-05, "loss": 0.6495, "step": 12244 }, { "epoch": 40.14754098360656, "grad_norm": 7.805915832519531, "learning_rate": 1.3594499398045382e-05, "loss": 0.7303, "step": 12245 }, { "epoch": 40.15081967213115, "grad_norm": 10.685829162597656, "learning_rate": 1.3593508462679723e-05, "loss": 0.7232, "step": 12246 }, { "epoch": 40.15409836065574, "grad_norm": 7.265591621398926, "learning_rate": 1.359251748679352e-05, "loss": 1.1517, "step": 12247 }, { "epoch": 40.157377049180326, "grad_norm": 9.420047760009766, "learning_rate": 1.3591526470397946e-05, "loss": 1.1083, "step": 12248 }, { "epoch": 40.16065573770492, "grad_norm": 8.344549179077148, "learning_rate": 1.3590535413504177e-05, "loss": 0.7467, "step": 12249 }, { "epoch": 40.16393442622951, "grad_norm": 12.613602638244629, "learning_rate": 1.358954431612339e-05, "loss": 0.7432, "step": 12250 }, { "epoch": 40.1672131147541, "grad_norm": 7.104030609130859, "learning_rate": 1.358855317826676e-05, "loss": 0.8181, "step": 12251 }, { "epoch": 40.170491803278686, "grad_norm": 20.548301696777344, "learning_rate": 1.3587561999945457e-05, "loss": 0.7383, "step": 12252 }, { "epoch": 40.17377049180328, "grad_norm": 6.584007740020752, "learning_rate": 1.3586570781170665e-05, "loss": 0.7037, "step": 12253 }, { "epoch": 40.17704918032787, "grad_norm": 6.916869640350342, "learning_rate": 1.3585579521953557e-05, "loss": 0.8489, "step": 12254 }, { "epoch": 40.18032786885246, "grad_norm": 7.352086544036865, "learning_rate": 1.3584588222305312e-05, "loss": 0.6082, "step": 12255 }, { "epoch": 40.18360655737705, "grad_norm": 5.720729827880859, "learning_rate": 1.358359688223711e-05, "loss": 0.8253, "step": 12256 }, { "epoch": 40.18688524590164, "grad_norm": 6.694274425506592, "learning_rate": 1.3582605501760124e-05, "loss": 0.6475, "step": 12257 }, { "epoch": 40.19016393442623, "grad_norm": 6.239319324493408, "learning_rate": 1.3581614080885538e-05, "loss": 1.0048, "step": 12258 }, { "epoch": 40.19344262295082, "grad_norm": 6.0781707763671875, "learning_rate": 1.3580622619624528e-05, "loss": 0.7963, "step": 12259 }, { "epoch": 40.19672131147541, "grad_norm": 6.643462657928467, "learning_rate": 1.3579631117988277e-05, "loss": 0.7521, "step": 12260 }, { "epoch": 40.2, "grad_norm": 6.845733165740967, "learning_rate": 1.357863957598796e-05, "loss": 0.7482, "step": 12261 }, { "epoch": 40.20327868852459, "grad_norm": 10.156227111816406, "learning_rate": 1.3577647993634764e-05, "loss": 0.7912, "step": 12262 }, { "epoch": 40.20655737704918, "grad_norm": 6.61570930480957, "learning_rate": 1.357665637093987e-05, "loss": 0.8233, "step": 12263 }, { "epoch": 40.20983606557377, "grad_norm": 6.970962047576904, "learning_rate": 1.357566470791445e-05, "loss": 0.6187, "step": 12264 }, { "epoch": 40.21311475409836, "grad_norm": 9.720771789550781, "learning_rate": 1.3574673004569694e-05, "loss": 0.684, "step": 12265 }, { "epoch": 40.21639344262295, "grad_norm": 10.191377639770508, "learning_rate": 1.3573681260916785e-05, "loss": 0.8162, "step": 12266 }, { "epoch": 40.21967213114754, "grad_norm": 8.385127067565918, "learning_rate": 1.3572689476966903e-05, "loss": 0.5807, "step": 12267 }, { "epoch": 40.22295081967213, "grad_norm": 5.498740196228027, "learning_rate": 1.357169765273123e-05, "loss": 0.6622, "step": 12268 }, { "epoch": 40.226229508196724, "grad_norm": 7.873338222503662, "learning_rate": 1.3570705788220957e-05, "loss": 0.8098, "step": 12269 }, { "epoch": 40.22950819672131, "grad_norm": 6.268111705780029, "learning_rate": 1.3569713883447262e-05, "loss": 0.7532, "step": 12270 }, { "epoch": 40.2327868852459, "grad_norm": 6.034352779388428, "learning_rate": 1.3568721938421333e-05, "loss": 0.7714, "step": 12271 }, { "epoch": 40.23606557377049, "grad_norm": 5.276493072509766, "learning_rate": 1.3567729953154349e-05, "loss": 0.7699, "step": 12272 }, { "epoch": 40.239344262295084, "grad_norm": 6.286479949951172, "learning_rate": 1.3566737927657505e-05, "loss": 0.7803, "step": 12273 }, { "epoch": 40.24262295081967, "grad_norm": 10.673735618591309, "learning_rate": 1.3565745861941978e-05, "loss": 0.7176, "step": 12274 }, { "epoch": 40.24590163934426, "grad_norm": 7.692169189453125, "learning_rate": 1.3564753756018965e-05, "loss": 0.8478, "step": 12275 }, { "epoch": 40.24918032786885, "grad_norm": 10.945898056030273, "learning_rate": 1.3563761609899643e-05, "loss": 0.7545, "step": 12276 }, { "epoch": 40.252459016393445, "grad_norm": 10.343452453613281, "learning_rate": 1.3562769423595204e-05, "loss": 0.6362, "step": 12277 }, { "epoch": 40.25573770491803, "grad_norm": 7.42725944519043, "learning_rate": 1.3561777197116837e-05, "loss": 0.759, "step": 12278 }, { "epoch": 40.25901639344262, "grad_norm": 8.626775741577148, "learning_rate": 1.3560784930475725e-05, "loss": 0.8822, "step": 12279 }, { "epoch": 40.26229508196721, "grad_norm": 7.065799236297607, "learning_rate": 1.3559792623683063e-05, "loss": 0.8143, "step": 12280 }, { "epoch": 40.265573770491805, "grad_norm": 6.600005149841309, "learning_rate": 1.3558800276750038e-05, "loss": 0.7463, "step": 12281 }, { "epoch": 40.268852459016394, "grad_norm": 6.490273952484131, "learning_rate": 1.355780788968784e-05, "loss": 0.803, "step": 12282 }, { "epoch": 40.27213114754098, "grad_norm": 7.324841499328613, "learning_rate": 1.3556815462507658e-05, "loss": 0.6776, "step": 12283 }, { "epoch": 40.27540983606557, "grad_norm": 9.11096477508545, "learning_rate": 1.3555822995220684e-05, "loss": 0.8801, "step": 12284 }, { "epoch": 40.278688524590166, "grad_norm": 7.689518928527832, "learning_rate": 1.3554830487838109e-05, "loss": 0.5908, "step": 12285 }, { "epoch": 40.281967213114754, "grad_norm": 9.028133392333984, "learning_rate": 1.3553837940371125e-05, "loss": 0.7095, "step": 12286 }, { "epoch": 40.28524590163934, "grad_norm": 7.40793514251709, "learning_rate": 1.3552845352830918e-05, "loss": 0.7726, "step": 12287 }, { "epoch": 40.28852459016394, "grad_norm": 12.2131929397583, "learning_rate": 1.3551852725228691e-05, "loss": 0.8046, "step": 12288 }, { "epoch": 40.291803278688526, "grad_norm": 5.98703670501709, "learning_rate": 1.355086005757563e-05, "loss": 0.7706, "step": 12289 }, { "epoch": 40.295081967213115, "grad_norm": 8.5346040725708, "learning_rate": 1.3549867349882927e-05, "loss": 0.9731, "step": 12290 }, { "epoch": 40.2983606557377, "grad_norm": 7.317689895629883, "learning_rate": 1.3548874602161784e-05, "loss": 0.9444, "step": 12291 }, { "epoch": 40.3016393442623, "grad_norm": 7.747157573699951, "learning_rate": 1.3547881814423388e-05, "loss": 0.6115, "step": 12292 }, { "epoch": 40.30491803278689, "grad_norm": 6.801743507385254, "learning_rate": 1.3546888986678932e-05, "loss": 0.9412, "step": 12293 }, { "epoch": 40.308196721311475, "grad_norm": 8.352203369140625, "learning_rate": 1.354589611893962e-05, "loss": 0.6536, "step": 12294 }, { "epoch": 40.31147540983606, "grad_norm": 7.560935974121094, "learning_rate": 1.3544903211216638e-05, "loss": 0.9203, "step": 12295 }, { "epoch": 40.31475409836066, "grad_norm": 7.571980953216553, "learning_rate": 1.354391026352119e-05, "loss": 0.6401, "step": 12296 }, { "epoch": 40.31803278688525, "grad_norm": 8.487058639526367, "learning_rate": 1.3542917275864467e-05, "loss": 0.6387, "step": 12297 }, { "epoch": 40.321311475409836, "grad_norm": 7.587336540222168, "learning_rate": 1.3541924248257668e-05, "loss": 0.6176, "step": 12298 }, { "epoch": 40.324590163934424, "grad_norm": 6.6305975914001465, "learning_rate": 1.3540931180711993e-05, "loss": 0.6919, "step": 12299 }, { "epoch": 40.32786885245902, "grad_norm": 9.103447914123535, "learning_rate": 1.3539938073238634e-05, "loss": 0.8806, "step": 12300 }, { "epoch": 40.33114754098361, "grad_norm": 8.358625411987305, "learning_rate": 1.3538944925848796e-05, "loss": 0.7143, "step": 12301 }, { "epoch": 40.334426229508196, "grad_norm": 18.151914596557617, "learning_rate": 1.3537951738553674e-05, "loss": 0.7612, "step": 12302 }, { "epoch": 40.337704918032784, "grad_norm": 11.24295425415039, "learning_rate": 1.3536958511364464e-05, "loss": 0.7865, "step": 12303 }, { "epoch": 40.34098360655738, "grad_norm": 7.142697811126709, "learning_rate": 1.3535965244292372e-05, "loss": 0.4553, "step": 12304 }, { "epoch": 40.34426229508197, "grad_norm": 6.78442907333374, "learning_rate": 1.3534971937348593e-05, "loss": 0.7129, "step": 12305 }, { "epoch": 40.34754098360656, "grad_norm": 5.911091327667236, "learning_rate": 1.3533978590544335e-05, "loss": 0.7856, "step": 12306 }, { "epoch": 40.350819672131145, "grad_norm": 7.727376937866211, "learning_rate": 1.3532985203890788e-05, "loss": 0.5658, "step": 12307 }, { "epoch": 40.35409836065574, "grad_norm": 6.708475589752197, "learning_rate": 1.3531991777399162e-05, "loss": 0.4829, "step": 12308 }, { "epoch": 40.35737704918033, "grad_norm": 7.514475345611572, "learning_rate": 1.3530998311080657e-05, "loss": 0.7476, "step": 12309 }, { "epoch": 40.36065573770492, "grad_norm": 6.46337366104126, "learning_rate": 1.3530004804946476e-05, "loss": 0.5815, "step": 12310 }, { "epoch": 40.363934426229505, "grad_norm": 10.14471435546875, "learning_rate": 1.3529011259007815e-05, "loss": 0.681, "step": 12311 }, { "epoch": 40.3672131147541, "grad_norm": 5.704432487487793, "learning_rate": 1.3528017673275891e-05, "loss": 0.8544, "step": 12312 }, { "epoch": 40.37049180327869, "grad_norm": 6.829358100891113, "learning_rate": 1.3527024047761893e-05, "loss": 0.8795, "step": 12313 }, { "epoch": 40.37377049180328, "grad_norm": 6.547038555145264, "learning_rate": 1.3526030382477035e-05, "loss": 0.9116, "step": 12314 }, { "epoch": 40.377049180327866, "grad_norm": 6.312441349029541, "learning_rate": 1.3525036677432516e-05, "loss": 0.8757, "step": 12315 }, { "epoch": 40.38032786885246, "grad_norm": 7.812562465667725, "learning_rate": 1.3524042932639545e-05, "loss": 0.6401, "step": 12316 }, { "epoch": 40.38360655737705, "grad_norm": 6.413692474365234, "learning_rate": 1.3523049148109326e-05, "loss": 0.6673, "step": 12317 }, { "epoch": 40.38688524590164, "grad_norm": 5.691187858581543, "learning_rate": 1.3522055323853063e-05, "loss": 0.6487, "step": 12318 }, { "epoch": 40.390163934426226, "grad_norm": 6.650147914886475, "learning_rate": 1.3521061459881963e-05, "loss": 0.9495, "step": 12319 }, { "epoch": 40.39344262295082, "grad_norm": 7.25719690322876, "learning_rate": 1.3520067556207238e-05, "loss": 0.9789, "step": 12320 }, { "epoch": 40.39672131147541, "grad_norm": 7.751839637756348, "learning_rate": 1.3519073612840091e-05, "loss": 0.8654, "step": 12321 }, { "epoch": 40.4, "grad_norm": 6.088550567626953, "learning_rate": 1.3518079629791725e-05, "loss": 0.8001, "step": 12322 }, { "epoch": 40.40327868852459, "grad_norm": 9.24310302734375, "learning_rate": 1.3517085607073359e-05, "loss": 0.6404, "step": 12323 }, { "epoch": 40.40655737704918, "grad_norm": 7.2172651290893555, "learning_rate": 1.3516091544696193e-05, "loss": 0.9481, "step": 12324 }, { "epoch": 40.40983606557377, "grad_norm": 6.2151103019714355, "learning_rate": 1.3515097442671442e-05, "loss": 0.5851, "step": 12325 }, { "epoch": 40.41311475409836, "grad_norm": 7.055150985717773, "learning_rate": 1.3514103301010308e-05, "loss": 0.8011, "step": 12326 }, { "epoch": 40.41639344262295, "grad_norm": 7.839053630828857, "learning_rate": 1.351310911972401e-05, "loss": 0.5755, "step": 12327 }, { "epoch": 40.41967213114754, "grad_norm": 10.87124252319336, "learning_rate": 1.3512114898823754e-05, "loss": 0.6817, "step": 12328 }, { "epoch": 40.42295081967213, "grad_norm": 6.80279541015625, "learning_rate": 1.3511120638320747e-05, "loss": 0.6326, "step": 12329 }, { "epoch": 40.42622950819672, "grad_norm": 7.085321426391602, "learning_rate": 1.3510126338226206e-05, "loss": 1.0616, "step": 12330 }, { "epoch": 40.429508196721315, "grad_norm": 8.504044532775879, "learning_rate": 1.3509131998551342e-05, "loss": 0.7461, "step": 12331 }, { "epoch": 40.4327868852459, "grad_norm": 7.861621379852295, "learning_rate": 1.3508137619307365e-05, "loss": 0.8211, "step": 12332 }, { "epoch": 40.43606557377049, "grad_norm": 6.905022144317627, "learning_rate": 1.3507143200505488e-05, "loss": 0.8274, "step": 12333 }, { "epoch": 40.43934426229508, "grad_norm": 7.239614009857178, "learning_rate": 1.3506148742156927e-05, "loss": 0.4488, "step": 12334 }, { "epoch": 40.442622950819676, "grad_norm": 6.373598575592041, "learning_rate": 1.350515424427289e-05, "loss": 0.8208, "step": 12335 }, { "epoch": 40.445901639344264, "grad_norm": 27.718040466308594, "learning_rate": 1.3504159706864597e-05, "loss": 0.6219, "step": 12336 }, { "epoch": 40.44918032786885, "grad_norm": 6.07092809677124, "learning_rate": 1.350316512994326e-05, "loss": 0.8755, "step": 12337 }, { "epoch": 40.45245901639344, "grad_norm": 5.220706939697266, "learning_rate": 1.3502170513520094e-05, "loss": 0.7918, "step": 12338 }, { "epoch": 40.455737704918036, "grad_norm": 6.534107208251953, "learning_rate": 1.3501175857606313e-05, "loss": 0.8381, "step": 12339 }, { "epoch": 40.459016393442624, "grad_norm": 8.404561042785645, "learning_rate": 1.3500181162213135e-05, "loss": 0.9147, "step": 12340 }, { "epoch": 40.46229508196721, "grad_norm": 6.536882400512695, "learning_rate": 1.3499186427351776e-05, "loss": 0.7545, "step": 12341 }, { "epoch": 40.4655737704918, "grad_norm": 6.317025661468506, "learning_rate": 1.3498191653033448e-05, "loss": 1.0527, "step": 12342 }, { "epoch": 40.4688524590164, "grad_norm": 7.192734241485596, "learning_rate": 1.3497196839269374e-05, "loss": 0.7847, "step": 12343 }, { "epoch": 40.472131147540985, "grad_norm": 5.617842674255371, "learning_rate": 1.3496201986070769e-05, "loss": 0.8491, "step": 12344 }, { "epoch": 40.47540983606557, "grad_norm": 8.058084487915039, "learning_rate": 1.3495207093448854e-05, "loss": 0.8127, "step": 12345 }, { "epoch": 40.47868852459016, "grad_norm": 6.793260097503662, "learning_rate": 1.3494212161414844e-05, "loss": 0.7749, "step": 12346 }, { "epoch": 40.48196721311476, "grad_norm": 6.386213302612305, "learning_rate": 1.3493217189979954e-05, "loss": 0.7019, "step": 12347 }, { "epoch": 40.485245901639345, "grad_norm": 7.981024265289307, "learning_rate": 1.3492222179155415e-05, "loss": 0.6197, "step": 12348 }, { "epoch": 40.488524590163934, "grad_norm": 7.672698974609375, "learning_rate": 1.3491227128952436e-05, "loss": 0.9479, "step": 12349 }, { "epoch": 40.49180327868852, "grad_norm": 7.538677215576172, "learning_rate": 1.349023203938224e-05, "loss": 0.7643, "step": 12350 }, { "epoch": 40.49508196721312, "grad_norm": 6.587961196899414, "learning_rate": 1.3489236910456052e-05, "loss": 0.7658, "step": 12351 }, { "epoch": 40.498360655737706, "grad_norm": 7.09282922744751, "learning_rate": 1.3488241742185086e-05, "loss": 0.5997, "step": 12352 }, { "epoch": 40.501639344262294, "grad_norm": 9.557889938354492, "learning_rate": 1.3487246534580573e-05, "loss": 0.6903, "step": 12353 }, { "epoch": 40.50491803278688, "grad_norm": 8.471711158752441, "learning_rate": 1.3486251287653728e-05, "loss": 0.4143, "step": 12354 }, { "epoch": 40.50819672131148, "grad_norm": 7.485469341278076, "learning_rate": 1.3485256001415772e-05, "loss": 0.6378, "step": 12355 }, { "epoch": 40.511475409836066, "grad_norm": 4.956235408782959, "learning_rate": 1.3484260675877934e-05, "loss": 0.894, "step": 12356 }, { "epoch": 40.514754098360655, "grad_norm": 6.578891754150391, "learning_rate": 1.3483265311051433e-05, "loss": 0.8342, "step": 12357 }, { "epoch": 40.51803278688524, "grad_norm": 5.539565086364746, "learning_rate": 1.3482269906947494e-05, "loss": 0.9429, "step": 12358 }, { "epoch": 40.52131147540984, "grad_norm": 5.150231838226318, "learning_rate": 1.3481274463577339e-05, "loss": 0.6079, "step": 12359 }, { "epoch": 40.52459016393443, "grad_norm": 6.79200553894043, "learning_rate": 1.3480278980952197e-05, "loss": 0.6601, "step": 12360 }, { "epoch": 40.527868852459015, "grad_norm": 6.740339756011963, "learning_rate": 1.347928345908329e-05, "loss": 0.7479, "step": 12361 }, { "epoch": 40.5311475409836, "grad_norm": 8.259753227233887, "learning_rate": 1.3478287897981846e-05, "loss": 0.7835, "step": 12362 }, { "epoch": 40.5344262295082, "grad_norm": 7.192842960357666, "learning_rate": 1.3477292297659087e-05, "loss": 0.9636, "step": 12363 }, { "epoch": 40.53770491803279, "grad_norm": 7.362115383148193, "learning_rate": 1.3476296658126244e-05, "loss": 0.9094, "step": 12364 }, { "epoch": 40.540983606557376, "grad_norm": 5.927029132843018, "learning_rate": 1.3475300979394542e-05, "loss": 0.8373, "step": 12365 }, { "epoch": 40.544262295081964, "grad_norm": 7.660556316375732, "learning_rate": 1.3474305261475206e-05, "loss": 0.5782, "step": 12366 }, { "epoch": 40.54754098360656, "grad_norm": 6.416204929351807, "learning_rate": 1.347330950437947e-05, "loss": 0.8015, "step": 12367 }, { "epoch": 40.55081967213115, "grad_norm": 6.923553466796875, "learning_rate": 1.3472313708118553e-05, "loss": 0.4666, "step": 12368 }, { "epoch": 40.554098360655736, "grad_norm": 7.0764665603637695, "learning_rate": 1.3471317872703691e-05, "loss": 0.7084, "step": 12369 }, { "epoch": 40.557377049180324, "grad_norm": 9.838562965393066, "learning_rate": 1.347032199814611e-05, "loss": 0.6853, "step": 12370 }, { "epoch": 40.56065573770492, "grad_norm": 8.795564651489258, "learning_rate": 1.3469326084457041e-05, "loss": 0.6744, "step": 12371 }, { "epoch": 40.56393442622951, "grad_norm": 6.970638275146484, "learning_rate": 1.346833013164771e-05, "loss": 0.7037, "step": 12372 }, { "epoch": 40.5672131147541, "grad_norm": 6.404440879821777, "learning_rate": 1.3467334139729354e-05, "loss": 0.9899, "step": 12373 }, { "epoch": 40.570491803278685, "grad_norm": 8.238582611083984, "learning_rate": 1.3466338108713202e-05, "loss": 0.7837, "step": 12374 }, { "epoch": 40.57377049180328, "grad_norm": 5.406695365905762, "learning_rate": 1.3465342038610479e-05, "loss": 0.7532, "step": 12375 }, { "epoch": 40.57704918032787, "grad_norm": 8.00943660736084, "learning_rate": 1.3464345929432425e-05, "loss": 0.5964, "step": 12376 }, { "epoch": 40.58032786885246, "grad_norm": 13.926122665405273, "learning_rate": 1.3463349781190267e-05, "loss": 0.8774, "step": 12377 }, { "epoch": 40.58360655737705, "grad_norm": 6.817537307739258, "learning_rate": 1.3462353593895238e-05, "loss": 1.0476, "step": 12378 }, { "epoch": 40.58688524590164, "grad_norm": 7.611715316772461, "learning_rate": 1.3461357367558575e-05, "loss": 0.6817, "step": 12379 }, { "epoch": 40.59016393442623, "grad_norm": 6.497992515563965, "learning_rate": 1.346036110219151e-05, "loss": 0.8684, "step": 12380 }, { "epoch": 40.59344262295082, "grad_norm": 6.7169952392578125, "learning_rate": 1.3459364797805273e-05, "loss": 0.7596, "step": 12381 }, { "epoch": 40.59672131147541, "grad_norm": 9.313982963562012, "learning_rate": 1.3458368454411101e-05, "loss": 0.7739, "step": 12382 }, { "epoch": 40.6, "grad_norm": 6.241846561431885, "learning_rate": 1.345737207202023e-05, "loss": 1.0336, "step": 12383 }, { "epoch": 40.60327868852459, "grad_norm": 5.963932514190674, "learning_rate": 1.3456375650643893e-05, "loss": 0.8207, "step": 12384 }, { "epoch": 40.60655737704918, "grad_norm": 5.576345443725586, "learning_rate": 1.3455379190293327e-05, "loss": 0.5785, "step": 12385 }, { "epoch": 40.609836065573774, "grad_norm": 6.6194963455200195, "learning_rate": 1.345438269097977e-05, "loss": 0.9708, "step": 12386 }, { "epoch": 40.61311475409836, "grad_norm": 7.105234622955322, "learning_rate": 1.3453386152714454e-05, "loss": 0.9898, "step": 12387 }, { "epoch": 40.61639344262295, "grad_norm": 5.794884204864502, "learning_rate": 1.345238957550862e-05, "loss": 0.7916, "step": 12388 }, { "epoch": 40.61967213114754, "grad_norm": 8.040778160095215, "learning_rate": 1.3451392959373502e-05, "loss": 0.6363, "step": 12389 }, { "epoch": 40.622950819672134, "grad_norm": 5.513176441192627, "learning_rate": 1.345039630432034e-05, "loss": 0.7309, "step": 12390 }, { "epoch": 40.62622950819672, "grad_norm": 6.872064113616943, "learning_rate": 1.3449399610360376e-05, "loss": 0.7953, "step": 12391 }, { "epoch": 40.62950819672131, "grad_norm": 5.967870712280273, "learning_rate": 1.3448402877504841e-05, "loss": 0.546, "step": 12392 }, { "epoch": 40.6327868852459, "grad_norm": 6.066037654876709, "learning_rate": 1.3447406105764982e-05, "loss": 0.7444, "step": 12393 }, { "epoch": 40.636065573770495, "grad_norm": 6.7416672706604, "learning_rate": 1.3446409295152029e-05, "loss": 0.5053, "step": 12394 }, { "epoch": 40.63934426229508, "grad_norm": 8.459108352661133, "learning_rate": 1.3445412445677234e-05, "loss": 0.6725, "step": 12395 }, { "epoch": 40.64262295081967, "grad_norm": 7.104532241821289, "learning_rate": 1.3444415557351827e-05, "loss": 0.6573, "step": 12396 }, { "epoch": 40.64590163934426, "grad_norm": 6.902348518371582, "learning_rate": 1.3443418630187054e-05, "loss": 0.7327, "step": 12397 }, { "epoch": 40.649180327868855, "grad_norm": 6.428092002868652, "learning_rate": 1.3442421664194156e-05, "loss": 1.0235, "step": 12398 }, { "epoch": 40.65245901639344, "grad_norm": 8.001379013061523, "learning_rate": 1.3441424659384374e-05, "loss": 0.7007, "step": 12399 }, { "epoch": 40.65573770491803, "grad_norm": 7.14898681640625, "learning_rate": 1.3440427615768951e-05, "loss": 0.8558, "step": 12400 }, { "epoch": 40.65901639344262, "grad_norm": 7.874419212341309, "learning_rate": 1.343943053335913e-05, "loss": 0.717, "step": 12401 }, { "epoch": 40.662295081967216, "grad_norm": 7.997844219207764, "learning_rate": 1.3438433412166154e-05, "loss": 0.7298, "step": 12402 }, { "epoch": 40.665573770491804, "grad_norm": 6.9530744552612305, "learning_rate": 1.3437436252201266e-05, "loss": 0.6887, "step": 12403 }, { "epoch": 40.66885245901639, "grad_norm": 13.012727737426758, "learning_rate": 1.343643905347571e-05, "loss": 0.6408, "step": 12404 }, { "epoch": 40.67213114754098, "grad_norm": 8.187252044677734, "learning_rate": 1.3435441816000729e-05, "loss": 0.8894, "step": 12405 }, { "epoch": 40.675409836065576, "grad_norm": 8.526824951171875, "learning_rate": 1.3434444539787575e-05, "loss": 0.6499, "step": 12406 }, { "epoch": 40.678688524590164, "grad_norm": 9.400930404663086, "learning_rate": 1.3433447224847482e-05, "loss": 0.7894, "step": 12407 }, { "epoch": 40.68196721311475, "grad_norm": 7.3734259605407715, "learning_rate": 1.3432449871191704e-05, "loss": 0.7773, "step": 12408 }, { "epoch": 40.68524590163934, "grad_norm": 6.715418815612793, "learning_rate": 1.3431452478831483e-05, "loss": 0.658, "step": 12409 }, { "epoch": 40.68852459016394, "grad_norm": 8.369282722473145, "learning_rate": 1.3430455047778072e-05, "loss": 0.5966, "step": 12410 }, { "epoch": 40.691803278688525, "grad_norm": 5.824760913848877, "learning_rate": 1.3429457578042708e-05, "loss": 0.6756, "step": 12411 }, { "epoch": 40.69508196721311, "grad_norm": 8.544333457946777, "learning_rate": 1.3428460069636648e-05, "loss": 0.7113, "step": 12412 }, { "epoch": 40.6983606557377, "grad_norm": 8.42284107208252, "learning_rate": 1.3427462522571135e-05, "loss": 0.8625, "step": 12413 }, { "epoch": 40.7016393442623, "grad_norm": 8.269091606140137, "learning_rate": 1.342646493685742e-05, "loss": 0.6873, "step": 12414 }, { "epoch": 40.704918032786885, "grad_norm": 8.976609230041504, "learning_rate": 1.3425467312506746e-05, "loss": 0.6741, "step": 12415 }, { "epoch": 40.708196721311474, "grad_norm": 9.992155075073242, "learning_rate": 1.342446964953037e-05, "loss": 0.9765, "step": 12416 }, { "epoch": 40.71147540983607, "grad_norm": 7.125435829162598, "learning_rate": 1.3423471947939539e-05, "loss": 0.791, "step": 12417 }, { "epoch": 40.71475409836066, "grad_norm": 8.466032981872559, "learning_rate": 1.3422474207745499e-05, "loss": 0.6501, "step": 12418 }, { "epoch": 40.718032786885246, "grad_norm": 8.582844734191895, "learning_rate": 1.3421476428959505e-05, "loss": 1.0196, "step": 12419 }, { "epoch": 40.721311475409834, "grad_norm": 7.828038215637207, "learning_rate": 1.3420478611592807e-05, "loss": 0.5951, "step": 12420 }, { "epoch": 40.72459016393443, "grad_norm": 5.823336124420166, "learning_rate": 1.3419480755656657e-05, "loss": 0.8949, "step": 12421 }, { "epoch": 40.72786885245902, "grad_norm": 9.063163757324219, "learning_rate": 1.3418482861162305e-05, "loss": 0.7714, "step": 12422 }, { "epoch": 40.731147540983606, "grad_norm": 14.395147323608398, "learning_rate": 1.3417484928121005e-05, "loss": 0.9146, "step": 12423 }, { "epoch": 40.734426229508195, "grad_norm": 9.850935935974121, "learning_rate": 1.3416486956544009e-05, "loss": 0.5494, "step": 12424 }, { "epoch": 40.73770491803279, "grad_norm": 8.13437271118164, "learning_rate": 1.341548894644257e-05, "loss": 0.7795, "step": 12425 }, { "epoch": 40.74098360655738, "grad_norm": 11.202980041503906, "learning_rate": 1.341449089782794e-05, "loss": 0.8287, "step": 12426 }, { "epoch": 40.74426229508197, "grad_norm": 6.892708778381348, "learning_rate": 1.341349281071138e-05, "loss": 0.7666, "step": 12427 }, { "epoch": 40.747540983606555, "grad_norm": 4.662358283996582, "learning_rate": 1.3412494685104133e-05, "loss": 0.9285, "step": 12428 }, { "epoch": 40.75081967213115, "grad_norm": 8.614603996276855, "learning_rate": 1.3411496521017465e-05, "loss": 0.7434, "step": 12429 }, { "epoch": 40.75409836065574, "grad_norm": 6.491398811340332, "learning_rate": 1.3410498318462625e-05, "loss": 0.7962, "step": 12430 }, { "epoch": 40.75737704918033, "grad_norm": 7.8467607498168945, "learning_rate": 1.3409500077450869e-05, "loss": 0.8813, "step": 12431 }, { "epoch": 40.760655737704916, "grad_norm": 8.16181755065918, "learning_rate": 1.3408501797993458e-05, "loss": 0.6477, "step": 12432 }, { "epoch": 40.76393442622951, "grad_norm": 9.153674125671387, "learning_rate": 1.3407503480101642e-05, "loss": 0.8618, "step": 12433 }, { "epoch": 40.7672131147541, "grad_norm": 6.260248184204102, "learning_rate": 1.3406505123786686e-05, "loss": 0.9779, "step": 12434 }, { "epoch": 40.77049180327869, "grad_norm": 6.025625705718994, "learning_rate": 1.3405506729059839e-05, "loss": 0.8563, "step": 12435 }, { "epoch": 40.773770491803276, "grad_norm": 11.869047164916992, "learning_rate": 1.3404508295932363e-05, "loss": 0.7045, "step": 12436 }, { "epoch": 40.77704918032787, "grad_norm": 8.949578285217285, "learning_rate": 1.3403509824415517e-05, "loss": 0.9703, "step": 12437 }, { "epoch": 40.78032786885246, "grad_norm": 6.317122936248779, "learning_rate": 1.340251131452056e-05, "loss": 0.6381, "step": 12438 }, { "epoch": 40.78360655737705, "grad_norm": 6.624623775482178, "learning_rate": 1.3401512766258749e-05, "loss": 0.7584, "step": 12439 }, { "epoch": 40.78688524590164, "grad_norm": 9.727290153503418, "learning_rate": 1.3400514179641344e-05, "loss": 0.8577, "step": 12440 }, { "epoch": 40.79016393442623, "grad_norm": 30.928340911865234, "learning_rate": 1.3399515554679607e-05, "loss": 0.6632, "step": 12441 }, { "epoch": 40.79344262295082, "grad_norm": 6.958768844604492, "learning_rate": 1.3398516891384798e-05, "loss": 0.5471, "step": 12442 }, { "epoch": 40.79672131147541, "grad_norm": 7.893246173858643, "learning_rate": 1.3397518189768177e-05, "loss": 0.721, "step": 12443 }, { "epoch": 40.8, "grad_norm": 12.14602279663086, "learning_rate": 1.3396519449841006e-05, "loss": 1.0118, "step": 12444 }, { "epoch": 40.80327868852459, "grad_norm": 6.560255527496338, "learning_rate": 1.3395520671614549e-05, "loss": 0.7652, "step": 12445 }, { "epoch": 40.80655737704918, "grad_norm": 9.265108108520508, "learning_rate": 1.339452185510006e-05, "loss": 0.69, "step": 12446 }, { "epoch": 40.80983606557377, "grad_norm": 7.04487419128418, "learning_rate": 1.3393523000308812e-05, "loss": 0.6123, "step": 12447 }, { "epoch": 40.81311475409836, "grad_norm": 6.069937229156494, "learning_rate": 1.3392524107252062e-05, "loss": 0.6707, "step": 12448 }, { "epoch": 40.81639344262295, "grad_norm": 7.265008926391602, "learning_rate": 1.3391525175941078e-05, "loss": 0.8066, "step": 12449 }, { "epoch": 40.81967213114754, "grad_norm": 11.291196823120117, "learning_rate": 1.3390526206387117e-05, "loss": 0.6835, "step": 12450 }, { "epoch": 40.82295081967213, "grad_norm": 7.251643657684326, "learning_rate": 1.3389527198601454e-05, "loss": 0.7592, "step": 12451 }, { "epoch": 40.82622950819672, "grad_norm": 10.932731628417969, "learning_rate": 1.338852815259534e-05, "loss": 0.9749, "step": 12452 }, { "epoch": 40.829508196721314, "grad_norm": 7.046280860900879, "learning_rate": 1.3387529068380056e-05, "loss": 0.8188, "step": 12453 }, { "epoch": 40.8327868852459, "grad_norm": 14.100638389587402, "learning_rate": 1.3386529945966854e-05, "loss": 0.9191, "step": 12454 }, { "epoch": 40.83606557377049, "grad_norm": 8.262117385864258, "learning_rate": 1.3385530785367005e-05, "loss": 0.9933, "step": 12455 }, { "epoch": 40.83934426229508, "grad_norm": 7.79600191116333, "learning_rate": 1.338453158659178e-05, "loss": 0.6422, "step": 12456 }, { "epoch": 40.842622950819674, "grad_norm": 6.7155256271362305, "learning_rate": 1.338353234965244e-05, "loss": 0.6828, "step": 12457 }, { "epoch": 40.84590163934426, "grad_norm": 6.952385902404785, "learning_rate": 1.3382533074560256e-05, "loss": 0.7355, "step": 12458 }, { "epoch": 40.84918032786885, "grad_norm": 6.526601791381836, "learning_rate": 1.3381533761326494e-05, "loss": 0.5842, "step": 12459 }, { "epoch": 40.85245901639344, "grad_norm": 8.102806091308594, "learning_rate": 1.3380534409962425e-05, "loss": 0.9324, "step": 12460 }, { "epoch": 40.855737704918035, "grad_norm": 6.535938262939453, "learning_rate": 1.337953502047931e-05, "loss": 0.874, "step": 12461 }, { "epoch": 40.85901639344262, "grad_norm": 7.8044352531433105, "learning_rate": 1.3378535592888431e-05, "loss": 0.6538, "step": 12462 }, { "epoch": 40.86229508196721, "grad_norm": 6.686742782592773, "learning_rate": 1.3377536127201045e-05, "loss": 0.5776, "step": 12463 }, { "epoch": 40.86557377049181, "grad_norm": 6.125966548919678, "learning_rate": 1.3376536623428431e-05, "loss": 0.8419, "step": 12464 }, { "epoch": 40.868852459016395, "grad_norm": 9.887679100036621, "learning_rate": 1.3375537081581853e-05, "loss": 0.7878, "step": 12465 }, { "epoch": 40.87213114754098, "grad_norm": 6.394730567932129, "learning_rate": 1.3374537501672587e-05, "loss": 0.9501, "step": 12466 }, { "epoch": 40.87540983606557, "grad_norm": 6.659977912902832, "learning_rate": 1.33735378837119e-05, "loss": 0.7811, "step": 12467 }, { "epoch": 40.87868852459017, "grad_norm": 6.815410137176514, "learning_rate": 1.3372538227711069e-05, "loss": 0.6754, "step": 12468 }, { "epoch": 40.881967213114756, "grad_norm": 7.287814140319824, "learning_rate": 1.3371538533681361e-05, "loss": 0.7628, "step": 12469 }, { "epoch": 40.885245901639344, "grad_norm": 7.843199729919434, "learning_rate": 1.337053880163405e-05, "loss": 0.6628, "step": 12470 }, { "epoch": 40.88852459016393, "grad_norm": 12.420967102050781, "learning_rate": 1.336953903158041e-05, "loss": 0.84, "step": 12471 }, { "epoch": 40.89180327868853, "grad_norm": 7.590868949890137, "learning_rate": 1.3368539223531713e-05, "loss": 0.6929, "step": 12472 }, { "epoch": 40.895081967213116, "grad_norm": 9.579608917236328, "learning_rate": 1.3367539377499234e-05, "loss": 0.5554, "step": 12473 }, { "epoch": 40.898360655737704, "grad_norm": 22.141185760498047, "learning_rate": 1.3366539493494248e-05, "loss": 1.0055, "step": 12474 }, { "epoch": 40.90163934426229, "grad_norm": 7.133476734161377, "learning_rate": 1.336553957152803e-05, "loss": 0.6375, "step": 12475 }, { "epoch": 40.90491803278689, "grad_norm": 8.30863094329834, "learning_rate": 1.3364539611611851e-05, "loss": 0.8286, "step": 12476 }, { "epoch": 40.90819672131148, "grad_norm": 5.879636287689209, "learning_rate": 1.3363539613756992e-05, "loss": 0.8403, "step": 12477 }, { "epoch": 40.911475409836065, "grad_norm": 7.465361595153809, "learning_rate": 1.3362539577974727e-05, "loss": 0.7083, "step": 12478 }, { "epoch": 40.91475409836065, "grad_norm": 8.364503860473633, "learning_rate": 1.3361539504276331e-05, "loss": 0.5593, "step": 12479 }, { "epoch": 40.91803278688525, "grad_norm": 11.914952278137207, "learning_rate": 1.3360539392673082e-05, "loss": 0.6156, "step": 12480 }, { "epoch": 40.92131147540984, "grad_norm": 7.14321756362915, "learning_rate": 1.3359539243176259e-05, "loss": 0.888, "step": 12481 }, { "epoch": 40.924590163934425, "grad_norm": 6.243953227996826, "learning_rate": 1.3358539055797133e-05, "loss": 0.7088, "step": 12482 }, { "epoch": 40.927868852459014, "grad_norm": 15.630765914916992, "learning_rate": 1.335753883054699e-05, "loss": 0.7503, "step": 12483 }, { "epoch": 40.93114754098361, "grad_norm": 5.503742218017578, "learning_rate": 1.3356538567437108e-05, "loss": 1.102, "step": 12484 }, { "epoch": 40.9344262295082, "grad_norm": 7.957124710083008, "learning_rate": 1.335553826647876e-05, "loss": 0.8249, "step": 12485 }, { "epoch": 40.937704918032786, "grad_norm": 13.538032531738281, "learning_rate": 1.3354537927683234e-05, "loss": 0.6329, "step": 12486 }, { "epoch": 40.940983606557374, "grad_norm": 6.626559257507324, "learning_rate": 1.33535375510618e-05, "loss": 0.9073, "step": 12487 }, { "epoch": 40.94426229508197, "grad_norm": 5.981478214263916, "learning_rate": 1.3352537136625748e-05, "loss": 1.1024, "step": 12488 }, { "epoch": 40.94754098360656, "grad_norm": 10.868133544921875, "learning_rate": 1.3351536684386347e-05, "loss": 0.6082, "step": 12489 }, { "epoch": 40.950819672131146, "grad_norm": 8.090205192565918, "learning_rate": 1.3350536194354893e-05, "loss": 0.7737, "step": 12490 }, { "epoch": 40.954098360655735, "grad_norm": 10.38841724395752, "learning_rate": 1.3349535666542654e-05, "loss": 0.7212, "step": 12491 }, { "epoch": 40.95737704918033, "grad_norm": 11.424325942993164, "learning_rate": 1.3348535100960922e-05, "loss": 0.5291, "step": 12492 }, { "epoch": 40.96065573770492, "grad_norm": 8.03466510772705, "learning_rate": 1.334753449762097e-05, "loss": 0.8061, "step": 12493 }, { "epoch": 40.96393442622951, "grad_norm": 12.649380683898926, "learning_rate": 1.3346533856534092e-05, "loss": 0.8381, "step": 12494 }, { "epoch": 40.967213114754095, "grad_norm": 6.305080413818359, "learning_rate": 1.3345533177711563e-05, "loss": 0.9043, "step": 12495 }, { "epoch": 40.97049180327869, "grad_norm": 7.871833324432373, "learning_rate": 1.3344532461164667e-05, "loss": 0.6578, "step": 12496 }, { "epoch": 40.97377049180328, "grad_norm": 8.289070129394531, "learning_rate": 1.3343531706904694e-05, "loss": 0.7102, "step": 12497 }, { "epoch": 40.97704918032787, "grad_norm": 8.573221206665039, "learning_rate": 1.334253091494292e-05, "loss": 0.7548, "step": 12498 }, { "epoch": 40.980327868852456, "grad_norm": 8.970769882202148, "learning_rate": 1.3341530085290637e-05, "loss": 0.8081, "step": 12499 }, { "epoch": 40.98360655737705, "grad_norm": 7.410013198852539, "learning_rate": 1.3340529217959126e-05, "loss": 0.8758, "step": 12500 }, { "epoch": 40.98688524590164, "grad_norm": 8.74470043182373, "learning_rate": 1.3339528312959678e-05, "loss": 0.5724, "step": 12501 }, { "epoch": 40.99016393442623, "grad_norm": 8.724284172058105, "learning_rate": 1.3338527370303573e-05, "loss": 0.9193, "step": 12502 }, { "epoch": 40.993442622950816, "grad_norm": 5.283675670623779, "learning_rate": 1.3337526390002102e-05, "loss": 0.5415, "step": 12503 }, { "epoch": 40.99672131147541, "grad_norm": 35.76531982421875, "learning_rate": 1.333652537206655e-05, "loss": 0.6703, "step": 12504 }, { "epoch": 41.0, "grad_norm": 8.085567474365234, "learning_rate": 1.3335524316508208e-05, "loss": 0.7884, "step": 12505 }, { "epoch": 41.00327868852459, "grad_norm": 7.835958003997803, "learning_rate": 1.3334523223338358e-05, "loss": 0.8795, "step": 12506 }, { "epoch": 41.006557377049184, "grad_norm": 12.281420707702637, "learning_rate": 1.3333522092568294e-05, "loss": 0.8085, "step": 12507 }, { "epoch": 41.00983606557377, "grad_norm": 5.565911769866943, "learning_rate": 1.33325209242093e-05, "loss": 0.4867, "step": 12508 }, { "epoch": 41.01311475409836, "grad_norm": 7.281698226928711, "learning_rate": 1.3331519718272672e-05, "loss": 0.8271, "step": 12509 }, { "epoch": 41.01639344262295, "grad_norm": 8.194411277770996, "learning_rate": 1.333051847476969e-05, "loss": 0.6669, "step": 12510 }, { "epoch": 41.019672131147544, "grad_norm": 8.144156455993652, "learning_rate": 1.3329517193711653e-05, "loss": 0.7509, "step": 12511 }, { "epoch": 41.02295081967213, "grad_norm": 6.435244083404541, "learning_rate": 1.3328515875109847e-05, "loss": 0.6131, "step": 12512 }, { "epoch": 41.02622950819672, "grad_norm": 6.766719818115234, "learning_rate": 1.332751451897556e-05, "loss": 0.6277, "step": 12513 }, { "epoch": 41.02950819672131, "grad_norm": 7.651451587677002, "learning_rate": 1.332651312532009e-05, "loss": 0.7649, "step": 12514 }, { "epoch": 41.032786885245905, "grad_norm": 9.303668975830078, "learning_rate": 1.3325511694154727e-05, "loss": 0.8644, "step": 12515 }, { "epoch": 41.03606557377049, "grad_norm": 7.200976371765137, "learning_rate": 1.3324510225490763e-05, "loss": 0.6363, "step": 12516 }, { "epoch": 41.03934426229508, "grad_norm": 8.10496997833252, "learning_rate": 1.3323508719339487e-05, "loss": 0.6033, "step": 12517 }, { "epoch": 41.04262295081967, "grad_norm": 7.6675591468811035, "learning_rate": 1.3322507175712197e-05, "loss": 0.8554, "step": 12518 }, { "epoch": 41.045901639344265, "grad_norm": 7.225252151489258, "learning_rate": 1.3321505594620178e-05, "loss": 0.7287, "step": 12519 }, { "epoch": 41.049180327868854, "grad_norm": 8.860506057739258, "learning_rate": 1.3320503976074736e-05, "loss": 0.9105, "step": 12520 }, { "epoch": 41.05245901639344, "grad_norm": 7.663388729095459, "learning_rate": 1.3319502320087158e-05, "loss": 0.3983, "step": 12521 }, { "epoch": 41.05573770491803, "grad_norm": 11.337303161621094, "learning_rate": 1.3318500626668738e-05, "loss": 0.6422, "step": 12522 }, { "epoch": 41.059016393442626, "grad_norm": 7.314824104309082, "learning_rate": 1.3317498895830777e-05, "loss": 0.6354, "step": 12523 }, { "epoch": 41.062295081967214, "grad_norm": 7.093935012817383, "learning_rate": 1.3316497127584562e-05, "loss": 0.5439, "step": 12524 }, { "epoch": 41.0655737704918, "grad_norm": 6.880271911621094, "learning_rate": 1.3315495321941397e-05, "loss": 0.8019, "step": 12525 }, { "epoch": 41.06885245901639, "grad_norm": 7.1114277839660645, "learning_rate": 1.3314493478912574e-05, "loss": 0.7819, "step": 12526 }, { "epoch": 41.072131147540986, "grad_norm": 6.091897487640381, "learning_rate": 1.3313491598509389e-05, "loss": 0.5799, "step": 12527 }, { "epoch": 41.075409836065575, "grad_norm": 8.096471786499023, "learning_rate": 1.331248968074314e-05, "loss": 0.8495, "step": 12528 }, { "epoch": 41.07868852459016, "grad_norm": 7.08358907699585, "learning_rate": 1.3311487725625132e-05, "loss": 0.6251, "step": 12529 }, { "epoch": 41.08196721311475, "grad_norm": 16.963947296142578, "learning_rate": 1.3310485733166652e-05, "loss": 0.8438, "step": 12530 }, { "epoch": 41.08524590163935, "grad_norm": 9.741580963134766, "learning_rate": 1.3309483703379004e-05, "loss": 0.8152, "step": 12531 }, { "epoch": 41.088524590163935, "grad_norm": 6.88072395324707, "learning_rate": 1.3308481636273487e-05, "loss": 0.6686, "step": 12532 }, { "epoch": 41.09180327868852, "grad_norm": 8.098076820373535, "learning_rate": 1.33074795318614e-05, "loss": 0.6982, "step": 12533 }, { "epoch": 41.09508196721311, "grad_norm": 9.727994918823242, "learning_rate": 1.330647739015404e-05, "loss": 0.8862, "step": 12534 }, { "epoch": 41.09836065573771, "grad_norm": 5.82159948348999, "learning_rate": 1.3305475211162713e-05, "loss": 0.8865, "step": 12535 }, { "epoch": 41.101639344262296, "grad_norm": 7.3877081871032715, "learning_rate": 1.3304472994898712e-05, "loss": 0.6984, "step": 12536 }, { "epoch": 41.104918032786884, "grad_norm": 6.5510663986206055, "learning_rate": 1.3303470741373343e-05, "loss": 0.8516, "step": 12537 }, { "epoch": 41.10819672131147, "grad_norm": 6.494876384735107, "learning_rate": 1.330246845059791e-05, "loss": 0.5545, "step": 12538 }, { "epoch": 41.11147540983607, "grad_norm": 18.955862045288086, "learning_rate": 1.3301466122583708e-05, "loss": 0.8254, "step": 12539 }, { "epoch": 41.114754098360656, "grad_norm": 7.493495464324951, "learning_rate": 1.3300463757342046e-05, "loss": 0.8133, "step": 12540 }, { "epoch": 41.118032786885244, "grad_norm": 11.551005363464355, "learning_rate": 1.329946135488422e-05, "loss": 0.7401, "step": 12541 }, { "epoch": 41.12131147540983, "grad_norm": 7.297074317932129, "learning_rate": 1.3298458915221539e-05, "loss": 0.7478, "step": 12542 }, { "epoch": 41.12459016393443, "grad_norm": 6.130612850189209, "learning_rate": 1.3297456438365304e-05, "loss": 0.9065, "step": 12543 }, { "epoch": 41.12786885245902, "grad_norm": 15.39603042602539, "learning_rate": 1.329645392432682e-05, "loss": 0.6926, "step": 12544 }, { "epoch": 41.131147540983605, "grad_norm": 11.408501625061035, "learning_rate": 1.3295451373117387e-05, "loss": 0.928, "step": 12545 }, { "epoch": 41.13442622950819, "grad_norm": 6.297382831573486, "learning_rate": 1.3294448784748316e-05, "loss": 0.7616, "step": 12546 }, { "epoch": 41.13770491803279, "grad_norm": 5.535819053649902, "learning_rate": 1.3293446159230911e-05, "loss": 0.6419, "step": 12547 }, { "epoch": 41.14098360655738, "grad_norm": 7.929716110229492, "learning_rate": 1.3292443496576475e-05, "loss": 0.8426, "step": 12548 }, { "epoch": 41.144262295081965, "grad_norm": 6.712207794189453, "learning_rate": 1.3291440796796315e-05, "loss": 0.7799, "step": 12549 }, { "epoch": 41.14754098360656, "grad_norm": 7.207492828369141, "learning_rate": 1.3290438059901738e-05, "loss": 1.0215, "step": 12550 }, { "epoch": 41.15081967213115, "grad_norm": 7.136857032775879, "learning_rate": 1.3289435285904051e-05, "loss": 0.6964, "step": 12551 }, { "epoch": 41.15409836065574, "grad_norm": 20.1102237701416, "learning_rate": 1.3288432474814558e-05, "loss": 0.8624, "step": 12552 }, { "epoch": 41.157377049180326, "grad_norm": 9.782776832580566, "learning_rate": 1.3287429626644575e-05, "loss": 0.8321, "step": 12553 }, { "epoch": 41.16065573770492, "grad_norm": 71.18834686279297, "learning_rate": 1.3286426741405401e-05, "loss": 0.7069, "step": 12554 }, { "epoch": 41.16393442622951, "grad_norm": 5.359735488891602, "learning_rate": 1.3285423819108349e-05, "loss": 0.8192, "step": 12555 }, { "epoch": 41.1672131147541, "grad_norm": 7.171512603759766, "learning_rate": 1.3284420859764726e-05, "loss": 0.7631, "step": 12556 }, { "epoch": 41.170491803278686, "grad_norm": 8.134406089782715, "learning_rate": 1.3283417863385849e-05, "loss": 0.8353, "step": 12557 }, { "epoch": 41.17377049180328, "grad_norm": 7.150765895843506, "learning_rate": 1.3282414829983014e-05, "loss": 0.6776, "step": 12558 }, { "epoch": 41.17704918032787, "grad_norm": 6.8435187339782715, "learning_rate": 1.3281411759567544e-05, "loss": 0.4735, "step": 12559 }, { "epoch": 41.18032786885246, "grad_norm": 12.16314697265625, "learning_rate": 1.3280408652150745e-05, "loss": 0.8469, "step": 12560 }, { "epoch": 41.18360655737705, "grad_norm": 6.453035354614258, "learning_rate": 1.3279405507743923e-05, "loss": 0.7465, "step": 12561 }, { "epoch": 41.18688524590164, "grad_norm": 7.2301506996154785, "learning_rate": 1.3278402326358397e-05, "loss": 0.7082, "step": 12562 }, { "epoch": 41.19016393442623, "grad_norm": 7.126986980438232, "learning_rate": 1.3277399108005478e-05, "loss": 0.6878, "step": 12563 }, { "epoch": 41.19344262295082, "grad_norm": 8.247785568237305, "learning_rate": 1.327639585269647e-05, "loss": 0.8511, "step": 12564 }, { "epoch": 41.19672131147541, "grad_norm": 21.63768196105957, "learning_rate": 1.3275392560442697e-05, "loss": 0.5854, "step": 12565 }, { "epoch": 41.2, "grad_norm": 6.1580119132995605, "learning_rate": 1.3274389231255466e-05, "loss": 0.6486, "step": 12566 }, { "epoch": 41.20327868852459, "grad_norm": 6.741732120513916, "learning_rate": 1.3273385865146093e-05, "loss": 0.7785, "step": 12567 }, { "epoch": 41.20655737704918, "grad_norm": 7.668515205383301, "learning_rate": 1.3272382462125893e-05, "loss": 0.4996, "step": 12568 }, { "epoch": 41.20983606557377, "grad_norm": 9.982364654541016, "learning_rate": 1.3271379022206174e-05, "loss": 0.7769, "step": 12569 }, { "epoch": 41.21311475409836, "grad_norm": 6.317191123962402, "learning_rate": 1.327037554539826e-05, "loss": 0.9966, "step": 12570 }, { "epoch": 41.21639344262295, "grad_norm": 5.888591289520264, "learning_rate": 1.3269372031713456e-05, "loss": 0.8409, "step": 12571 }, { "epoch": 41.21967213114754, "grad_norm": 7.023979187011719, "learning_rate": 1.3268368481163085e-05, "loss": 0.8834, "step": 12572 }, { "epoch": 41.22295081967213, "grad_norm": 8.337352752685547, "learning_rate": 1.3267364893758465e-05, "loss": 0.7488, "step": 12573 }, { "epoch": 41.226229508196724, "grad_norm": 10.730910301208496, "learning_rate": 1.3266361269510904e-05, "loss": 0.8373, "step": 12574 }, { "epoch": 41.22950819672131, "grad_norm": 8.686038970947266, "learning_rate": 1.3265357608431726e-05, "loss": 0.5819, "step": 12575 }, { "epoch": 41.2327868852459, "grad_norm": 8.460734367370605, "learning_rate": 1.3264353910532242e-05, "loss": 0.7184, "step": 12576 }, { "epoch": 41.23606557377049, "grad_norm": 6.505263328552246, "learning_rate": 1.3263350175823778e-05, "loss": 0.7452, "step": 12577 }, { "epoch": 41.239344262295084, "grad_norm": 10.137653350830078, "learning_rate": 1.3262346404317646e-05, "loss": 0.7869, "step": 12578 }, { "epoch": 41.24262295081967, "grad_norm": 8.579291343688965, "learning_rate": 1.3261342596025167e-05, "loss": 0.7578, "step": 12579 }, { "epoch": 41.24590163934426, "grad_norm": 17.075048446655273, "learning_rate": 1.3260338750957656e-05, "loss": 0.7374, "step": 12580 }, { "epoch": 41.24918032786885, "grad_norm": 9.625164031982422, "learning_rate": 1.3259334869126441e-05, "loss": 0.4922, "step": 12581 }, { "epoch": 41.252459016393445, "grad_norm": 7.245504379272461, "learning_rate": 1.325833095054283e-05, "loss": 0.6808, "step": 12582 }, { "epoch": 41.25573770491803, "grad_norm": 6.747288703918457, "learning_rate": 1.3257326995218157e-05, "loss": 0.7397, "step": 12583 }, { "epoch": 41.25901639344262, "grad_norm": 10.260188102722168, "learning_rate": 1.3256323003163729e-05, "loss": 0.5505, "step": 12584 }, { "epoch": 41.26229508196721, "grad_norm": 8.427057266235352, "learning_rate": 1.3255318974390879e-05, "loss": 1.0265, "step": 12585 }, { "epoch": 41.265573770491805, "grad_norm": 9.542734146118164, "learning_rate": 1.325431490891092e-05, "loss": 0.5475, "step": 12586 }, { "epoch": 41.268852459016394, "grad_norm": 9.112378120422363, "learning_rate": 1.3253310806735175e-05, "loss": 0.8091, "step": 12587 }, { "epoch": 41.27213114754098, "grad_norm": 6.403030872344971, "learning_rate": 1.325230666787497e-05, "loss": 0.5217, "step": 12588 }, { "epoch": 41.27540983606557, "grad_norm": 6.371132850646973, "learning_rate": 1.3251302492341626e-05, "loss": 1.1429, "step": 12589 }, { "epoch": 41.278688524590166, "grad_norm": 6.599191188812256, "learning_rate": 1.3250298280146464e-05, "loss": 0.7127, "step": 12590 }, { "epoch": 41.281967213114754, "grad_norm": 6.104619026184082, "learning_rate": 1.3249294031300812e-05, "loss": 0.6437, "step": 12591 }, { "epoch": 41.28524590163934, "grad_norm": 6.410402774810791, "learning_rate": 1.324828974581599e-05, "loss": 0.7237, "step": 12592 }, { "epoch": 41.28852459016394, "grad_norm": 9.028908729553223, "learning_rate": 1.3247285423703322e-05, "loss": 0.6902, "step": 12593 }, { "epoch": 41.291803278688526, "grad_norm": 8.450810432434082, "learning_rate": 1.3246281064974137e-05, "loss": 0.6231, "step": 12594 }, { "epoch": 41.295081967213115, "grad_norm": 8.011194229125977, "learning_rate": 1.324527666963976e-05, "loss": 0.947, "step": 12595 }, { "epoch": 41.2983606557377, "grad_norm": 7.4547600746154785, "learning_rate": 1.324427223771151e-05, "loss": 0.7338, "step": 12596 }, { "epoch": 41.3016393442623, "grad_norm": 6.771643161773682, "learning_rate": 1.3243267769200718e-05, "loss": 0.7861, "step": 12597 }, { "epoch": 41.30491803278689, "grad_norm": 7.766796588897705, "learning_rate": 1.3242263264118712e-05, "loss": 0.9241, "step": 12598 }, { "epoch": 41.308196721311475, "grad_norm": 6.643062591552734, "learning_rate": 1.3241258722476817e-05, "loss": 0.7402, "step": 12599 }, { "epoch": 41.31147540983606, "grad_norm": 6.125054359436035, "learning_rate": 1.3240254144286357e-05, "loss": 0.7209, "step": 12600 }, { "epoch": 41.31475409836066, "grad_norm": 7.307758331298828, "learning_rate": 1.3239249529558664e-05, "loss": 0.8142, "step": 12601 }, { "epoch": 41.31803278688525, "grad_norm": 6.447131156921387, "learning_rate": 1.3238244878305065e-05, "loss": 0.7098, "step": 12602 }, { "epoch": 41.321311475409836, "grad_norm": 6.961703777313232, "learning_rate": 1.3237240190536887e-05, "loss": 0.6436, "step": 12603 }, { "epoch": 41.324590163934424, "grad_norm": 17.8508243560791, "learning_rate": 1.3236235466265459e-05, "loss": 0.937, "step": 12604 }, { "epoch": 41.32786885245902, "grad_norm": 7.116600036621094, "learning_rate": 1.3235230705502114e-05, "loss": 0.8375, "step": 12605 }, { "epoch": 41.33114754098361, "grad_norm": 5.705142021179199, "learning_rate": 1.3234225908258175e-05, "loss": 0.6352, "step": 12606 }, { "epoch": 41.334426229508196, "grad_norm": 7.864116191864014, "learning_rate": 1.3233221074544982e-05, "loss": 0.7206, "step": 12607 }, { "epoch": 41.337704918032784, "grad_norm": 7.195532321929932, "learning_rate": 1.3232216204373853e-05, "loss": 0.9366, "step": 12608 }, { "epoch": 41.34098360655738, "grad_norm": 7.960840225219727, "learning_rate": 1.323121129775613e-05, "loss": 0.8374, "step": 12609 }, { "epoch": 41.34426229508197, "grad_norm": 15.15457534790039, "learning_rate": 1.3230206354703141e-05, "loss": 0.8624, "step": 12610 }, { "epoch": 41.34754098360656, "grad_norm": 6.775305271148682, "learning_rate": 1.3229201375226212e-05, "loss": 0.5609, "step": 12611 }, { "epoch": 41.350819672131145, "grad_norm": 13.736207008361816, "learning_rate": 1.3228196359336684e-05, "loss": 0.7229, "step": 12612 }, { "epoch": 41.35409836065574, "grad_norm": 9.396380424499512, "learning_rate": 1.3227191307045886e-05, "loss": 0.7229, "step": 12613 }, { "epoch": 41.35737704918033, "grad_norm": 7.433881759643555, "learning_rate": 1.3226186218365149e-05, "loss": 0.6237, "step": 12614 }, { "epoch": 41.36065573770492, "grad_norm": 7.367398738861084, "learning_rate": 1.3225181093305808e-05, "loss": 0.6505, "step": 12615 }, { "epoch": 41.363934426229505, "grad_norm": 6.515584945678711, "learning_rate": 1.3224175931879195e-05, "loss": 0.6709, "step": 12616 }, { "epoch": 41.3672131147541, "grad_norm": 6.889213562011719, "learning_rate": 1.3223170734096649e-05, "loss": 0.6259, "step": 12617 }, { "epoch": 41.37049180327869, "grad_norm": 7.3969407081604, "learning_rate": 1.32221654999695e-05, "loss": 0.6267, "step": 12618 }, { "epoch": 41.37377049180328, "grad_norm": 7.174098014831543, "learning_rate": 1.3221160229509083e-05, "loss": 0.8499, "step": 12619 }, { "epoch": 41.377049180327866, "grad_norm": 5.966920852661133, "learning_rate": 1.3220154922726737e-05, "loss": 0.8605, "step": 12620 }, { "epoch": 41.38032786885246, "grad_norm": 12.496769905090332, "learning_rate": 1.3219149579633796e-05, "loss": 0.6991, "step": 12621 }, { "epoch": 41.38360655737705, "grad_norm": 5.886530876159668, "learning_rate": 1.3218144200241597e-05, "loss": 0.8124, "step": 12622 }, { "epoch": 41.38688524590164, "grad_norm": 7.100975513458252, "learning_rate": 1.3217138784561479e-05, "loss": 0.7112, "step": 12623 }, { "epoch": 41.390163934426226, "grad_norm": 7.919388294219971, "learning_rate": 1.3216133332604772e-05, "loss": 0.4651, "step": 12624 }, { "epoch": 41.39344262295082, "grad_norm": 16.88629913330078, "learning_rate": 1.321512784438282e-05, "loss": 0.9686, "step": 12625 }, { "epoch": 41.39672131147541, "grad_norm": 7.261074066162109, "learning_rate": 1.3214122319906958e-05, "loss": 0.7159, "step": 12626 }, { "epoch": 41.4, "grad_norm": 7.465419769287109, "learning_rate": 1.3213116759188525e-05, "loss": 0.7805, "step": 12627 }, { "epoch": 41.40327868852459, "grad_norm": 5.602601051330566, "learning_rate": 1.3212111162238858e-05, "loss": 0.7742, "step": 12628 }, { "epoch": 41.40655737704918, "grad_norm": 11.64616870880127, "learning_rate": 1.3211105529069301e-05, "loss": 0.7275, "step": 12629 }, { "epoch": 41.40983606557377, "grad_norm": 7.181148529052734, "learning_rate": 1.3210099859691189e-05, "loss": 1.0414, "step": 12630 }, { "epoch": 41.41311475409836, "grad_norm": 6.383806228637695, "learning_rate": 1.3209094154115863e-05, "loss": 0.7548, "step": 12631 }, { "epoch": 41.41639344262295, "grad_norm": 6.277932643890381, "learning_rate": 1.3208088412354663e-05, "loss": 0.7933, "step": 12632 }, { "epoch": 41.41967213114754, "grad_norm": 7.992922306060791, "learning_rate": 1.3207082634418933e-05, "loss": 0.8228, "step": 12633 }, { "epoch": 41.42295081967213, "grad_norm": 10.250818252563477, "learning_rate": 1.3206076820320008e-05, "loss": 0.7317, "step": 12634 }, { "epoch": 41.42622950819672, "grad_norm": 8.663224220275879, "learning_rate": 1.3205070970069238e-05, "loss": 0.6687, "step": 12635 }, { "epoch": 41.429508196721315, "grad_norm": 6.373883247375488, "learning_rate": 1.320406508367796e-05, "loss": 0.7872, "step": 12636 }, { "epoch": 41.4327868852459, "grad_norm": 7.785717010498047, "learning_rate": 1.3203059161157512e-05, "loss": 0.5951, "step": 12637 }, { "epoch": 41.43606557377049, "grad_norm": 6.646958351135254, "learning_rate": 1.3202053202519247e-05, "loss": 0.7603, "step": 12638 }, { "epoch": 41.43934426229508, "grad_norm": 6.185369968414307, "learning_rate": 1.3201047207774498e-05, "loss": 0.6534, "step": 12639 }, { "epoch": 41.442622950819676, "grad_norm": 6.457903861999512, "learning_rate": 1.3200041176934616e-05, "loss": 0.5983, "step": 12640 }, { "epoch": 41.445901639344264, "grad_norm": 7.203600883483887, "learning_rate": 1.3199035110010943e-05, "loss": 0.6992, "step": 12641 }, { "epoch": 41.44918032786885, "grad_norm": 7.838335990905762, "learning_rate": 1.3198029007014823e-05, "loss": 0.6773, "step": 12642 }, { "epoch": 41.45245901639344, "grad_norm": 9.871983528137207, "learning_rate": 1.3197022867957598e-05, "loss": 0.9353, "step": 12643 }, { "epoch": 41.455737704918036, "grad_norm": 16.910676956176758, "learning_rate": 1.3196016692850617e-05, "loss": 0.5386, "step": 12644 }, { "epoch": 41.459016393442624, "grad_norm": 6.678652286529541, "learning_rate": 1.3195010481705229e-05, "loss": 0.8089, "step": 12645 }, { "epoch": 41.46229508196721, "grad_norm": 6.149143218994141, "learning_rate": 1.3194004234532771e-05, "loss": 0.7354, "step": 12646 }, { "epoch": 41.4655737704918, "grad_norm": 5.965269088745117, "learning_rate": 1.3192997951344595e-05, "loss": 0.9282, "step": 12647 }, { "epoch": 41.4688524590164, "grad_norm": 7.927684783935547, "learning_rate": 1.3191991632152048e-05, "loss": 0.8791, "step": 12648 }, { "epoch": 41.472131147540985, "grad_norm": 6.143893718719482, "learning_rate": 1.3190985276966479e-05, "loss": 0.8441, "step": 12649 }, { "epoch": 41.47540983606557, "grad_norm": 9.84931468963623, "learning_rate": 1.318997888579923e-05, "loss": 0.7188, "step": 12650 }, { "epoch": 41.47868852459016, "grad_norm": 6.014158248901367, "learning_rate": 1.3188972458661655e-05, "loss": 0.6735, "step": 12651 }, { "epoch": 41.48196721311476, "grad_norm": 14.063128471374512, "learning_rate": 1.3187965995565098e-05, "loss": 0.6362, "step": 12652 }, { "epoch": 41.485245901639345, "grad_norm": 9.067590713500977, "learning_rate": 1.318695949652091e-05, "loss": 0.6722, "step": 12653 }, { "epoch": 41.488524590163934, "grad_norm": 6.3859124183654785, "learning_rate": 1.3185952961540439e-05, "loss": 0.9355, "step": 12654 }, { "epoch": 41.49180327868852, "grad_norm": 8.629100799560547, "learning_rate": 1.3184946390635038e-05, "loss": 0.839, "step": 12655 }, { "epoch": 41.49508196721312, "grad_norm": 7.135865211486816, "learning_rate": 1.3183939783816054e-05, "loss": 0.7644, "step": 12656 }, { "epoch": 41.498360655737706, "grad_norm": 6.19851016998291, "learning_rate": 1.3182933141094837e-05, "loss": 0.8287, "step": 12657 }, { "epoch": 41.501639344262294, "grad_norm": 9.255269050598145, "learning_rate": 1.318192646248274e-05, "loss": 0.5338, "step": 12658 }, { "epoch": 41.50491803278688, "grad_norm": 6.917901039123535, "learning_rate": 1.3180919747991116e-05, "loss": 0.5222, "step": 12659 }, { "epoch": 41.50819672131148, "grad_norm": 10.146490097045898, "learning_rate": 1.3179912997631313e-05, "loss": 0.6284, "step": 12660 }, { "epoch": 41.511475409836066, "grad_norm": 7.367798328399658, "learning_rate": 1.3178906211414684e-05, "loss": 0.5694, "step": 12661 }, { "epoch": 41.514754098360655, "grad_norm": 7.011794567108154, "learning_rate": 1.3177899389352584e-05, "loss": 0.4238, "step": 12662 }, { "epoch": 41.51803278688524, "grad_norm": 5.613997459411621, "learning_rate": 1.3176892531456363e-05, "loss": 0.5287, "step": 12663 }, { "epoch": 41.52131147540984, "grad_norm": 8.289380073547363, "learning_rate": 1.3175885637737375e-05, "loss": 0.8183, "step": 12664 }, { "epoch": 41.52459016393443, "grad_norm": 5.627397537231445, "learning_rate": 1.3174878708206974e-05, "loss": 0.9069, "step": 12665 }, { "epoch": 41.527868852459015, "grad_norm": 8.093181610107422, "learning_rate": 1.3173871742876516e-05, "loss": 0.7103, "step": 12666 }, { "epoch": 41.5311475409836, "grad_norm": 7.636239051818848, "learning_rate": 1.3172864741757354e-05, "loss": 0.7382, "step": 12667 }, { "epoch": 41.5344262295082, "grad_norm": 8.136150360107422, "learning_rate": 1.3171857704860845e-05, "loss": 0.7711, "step": 12668 }, { "epoch": 41.53770491803279, "grad_norm": 5.783015251159668, "learning_rate": 1.3170850632198337e-05, "loss": 0.92, "step": 12669 }, { "epoch": 41.540983606557376, "grad_norm": 6.6424407958984375, "learning_rate": 1.3169843523781193e-05, "loss": 0.6497, "step": 12670 }, { "epoch": 41.544262295081964, "grad_norm": 8.045389175415039, "learning_rate": 1.316883637962077e-05, "loss": 0.7705, "step": 12671 }, { "epoch": 41.54754098360656, "grad_norm": 6.382166862487793, "learning_rate": 1.3167829199728421e-05, "loss": 0.5572, "step": 12672 }, { "epoch": 41.55081967213115, "grad_norm": 5.327461242675781, "learning_rate": 1.3166821984115506e-05, "loss": 0.7077, "step": 12673 }, { "epoch": 41.554098360655736, "grad_norm": 10.54458236694336, "learning_rate": 1.3165814732793377e-05, "loss": 0.8914, "step": 12674 }, { "epoch": 41.557377049180324, "grad_norm": 6.266876220703125, "learning_rate": 1.3164807445773398e-05, "loss": 0.8419, "step": 12675 }, { "epoch": 41.56065573770492, "grad_norm": 15.915081977844238, "learning_rate": 1.316380012306692e-05, "loss": 0.9835, "step": 12676 }, { "epoch": 41.56393442622951, "grad_norm": 8.167153358459473, "learning_rate": 1.316279276468531e-05, "loss": 0.7629, "step": 12677 }, { "epoch": 41.5672131147541, "grad_norm": 7.204090595245361, "learning_rate": 1.3161785370639924e-05, "loss": 0.7108, "step": 12678 }, { "epoch": 41.570491803278685, "grad_norm": 7.266298294067383, "learning_rate": 1.3160777940942118e-05, "loss": 0.6046, "step": 12679 }, { "epoch": 41.57377049180328, "grad_norm": 7.532045841217041, "learning_rate": 1.3159770475603256e-05, "loss": 0.7178, "step": 12680 }, { "epoch": 41.57704918032787, "grad_norm": 6.628849506378174, "learning_rate": 1.3158762974634696e-05, "loss": 0.6245, "step": 12681 }, { "epoch": 41.58032786885246, "grad_norm": 6.620700359344482, "learning_rate": 1.3157755438047796e-05, "loss": 0.7457, "step": 12682 }, { "epoch": 41.58360655737705, "grad_norm": 6.877124309539795, "learning_rate": 1.3156747865853925e-05, "loss": 0.5958, "step": 12683 }, { "epoch": 41.58688524590164, "grad_norm": 6.682162284851074, "learning_rate": 1.3155740258064438e-05, "loss": 0.8448, "step": 12684 }, { "epoch": 41.59016393442623, "grad_norm": 5.914279937744141, "learning_rate": 1.3154732614690699e-05, "loss": 0.5872, "step": 12685 }, { "epoch": 41.59344262295082, "grad_norm": 6.301435470581055, "learning_rate": 1.3153724935744068e-05, "loss": 0.9001, "step": 12686 }, { "epoch": 41.59672131147541, "grad_norm": 6.415953159332275, "learning_rate": 1.3152717221235909e-05, "loss": 0.7066, "step": 12687 }, { "epoch": 41.6, "grad_norm": 5.758964538574219, "learning_rate": 1.3151709471177589e-05, "loss": 1.0114, "step": 12688 }, { "epoch": 41.60327868852459, "grad_norm": 6.519903659820557, "learning_rate": 1.3150701685580462e-05, "loss": 0.6033, "step": 12689 }, { "epoch": 41.60655737704918, "grad_norm": 30.378795623779297, "learning_rate": 1.3149693864455903e-05, "loss": 0.8935, "step": 12690 }, { "epoch": 41.609836065573774, "grad_norm": 6.307392597198486, "learning_rate": 1.3148686007815268e-05, "loss": 0.5062, "step": 12691 }, { "epoch": 41.61311475409836, "grad_norm": 9.06342601776123, "learning_rate": 1.3147678115669926e-05, "loss": 0.6995, "step": 12692 }, { "epoch": 41.61639344262295, "grad_norm": 12.54092788696289, "learning_rate": 1.3146670188031238e-05, "loss": 0.6112, "step": 12693 }, { "epoch": 41.61967213114754, "grad_norm": 6.4837493896484375, "learning_rate": 1.3145662224910576e-05, "loss": 0.6094, "step": 12694 }, { "epoch": 41.622950819672134, "grad_norm": 8.016136169433594, "learning_rate": 1.3144654226319298e-05, "loss": 0.7288, "step": 12695 }, { "epoch": 41.62622950819672, "grad_norm": 9.99728012084961, "learning_rate": 1.3143646192268776e-05, "loss": 0.5686, "step": 12696 }, { "epoch": 41.62950819672131, "grad_norm": 6.290631294250488, "learning_rate": 1.3142638122770372e-05, "loss": 0.589, "step": 12697 }, { "epoch": 41.6327868852459, "grad_norm": 6.378855228424072, "learning_rate": 1.3141630017835456e-05, "loss": 0.8401, "step": 12698 }, { "epoch": 41.636065573770495, "grad_norm": 6.644272327423096, "learning_rate": 1.31406218774754e-05, "loss": 0.7343, "step": 12699 }, { "epoch": 41.63934426229508, "grad_norm": 6.899677753448486, "learning_rate": 1.3139613701701561e-05, "loss": 0.6145, "step": 12700 }, { "epoch": 41.64262295081967, "grad_norm": 5.4177045822143555, "learning_rate": 1.3138605490525316e-05, "loss": 0.6022, "step": 12701 }, { "epoch": 41.64590163934426, "grad_norm": 6.5828657150268555, "learning_rate": 1.313759724395803e-05, "loss": 0.3257, "step": 12702 }, { "epoch": 41.649180327868855, "grad_norm": 8.136817932128906, "learning_rate": 1.3136588962011074e-05, "loss": 0.9265, "step": 12703 }, { "epoch": 41.65245901639344, "grad_norm": 8.059564590454102, "learning_rate": 1.3135580644695813e-05, "loss": 0.8688, "step": 12704 }, { "epoch": 41.65573770491803, "grad_norm": 13.12625503540039, "learning_rate": 1.3134572292023624e-05, "loss": 0.8551, "step": 12705 }, { "epoch": 41.65901639344262, "grad_norm": 8.088926315307617, "learning_rate": 1.313356390400587e-05, "loss": 0.6719, "step": 12706 }, { "epoch": 41.662295081967216, "grad_norm": 6.89192008972168, "learning_rate": 1.3132555480653929e-05, "loss": 0.8087, "step": 12707 }, { "epoch": 41.665573770491804, "grad_norm": 6.693421363830566, "learning_rate": 1.3131547021979163e-05, "loss": 0.8831, "step": 12708 }, { "epoch": 41.66885245901639, "grad_norm": 6.293246269226074, "learning_rate": 1.3130538527992953e-05, "loss": 0.6877, "step": 12709 }, { "epoch": 41.67213114754098, "grad_norm": 6.850815773010254, "learning_rate": 1.3129529998706663e-05, "loss": 1.0045, "step": 12710 }, { "epoch": 41.675409836065576, "grad_norm": 6.122580051422119, "learning_rate": 1.312852143413167e-05, "loss": 0.6245, "step": 12711 }, { "epoch": 41.678688524590164, "grad_norm": 23.27798080444336, "learning_rate": 1.3127512834279344e-05, "loss": 0.9332, "step": 12712 }, { "epoch": 41.68196721311475, "grad_norm": 12.467641830444336, "learning_rate": 1.3126504199161061e-05, "loss": 0.4597, "step": 12713 }, { "epoch": 41.68524590163934, "grad_norm": 12.059324264526367, "learning_rate": 1.312549552878819e-05, "loss": 1.104, "step": 12714 }, { "epoch": 41.68852459016394, "grad_norm": 6.7647504806518555, "learning_rate": 1.3124486823172107e-05, "loss": 0.7042, "step": 12715 }, { "epoch": 41.691803278688525, "grad_norm": 10.421001434326172, "learning_rate": 1.3123478082324188e-05, "loss": 0.611, "step": 12716 }, { "epoch": 41.69508196721311, "grad_norm": 6.441600322723389, "learning_rate": 1.3122469306255804e-05, "loss": 0.8813, "step": 12717 }, { "epoch": 41.6983606557377, "grad_norm": 6.7069549560546875, "learning_rate": 1.3121460494978335e-05, "loss": 0.792, "step": 12718 }, { "epoch": 41.7016393442623, "grad_norm": 5.668835639953613, "learning_rate": 1.3120451648503151e-05, "loss": 0.8062, "step": 12719 }, { "epoch": 41.704918032786885, "grad_norm": 7.213037014007568, "learning_rate": 1.3119442766841633e-05, "loss": 0.8205, "step": 12720 }, { "epoch": 41.708196721311474, "grad_norm": 8.250679969787598, "learning_rate": 1.311843385000515e-05, "loss": 0.6657, "step": 12721 }, { "epoch": 41.71147540983607, "grad_norm": 6.3237714767456055, "learning_rate": 1.3117424898005086e-05, "loss": 0.6255, "step": 12722 }, { "epoch": 41.71475409836066, "grad_norm": 5.476111888885498, "learning_rate": 1.3116415910852814e-05, "loss": 0.722, "step": 12723 }, { "epoch": 41.718032786885246, "grad_norm": 6.962310791015625, "learning_rate": 1.3115406888559715e-05, "loss": 0.8509, "step": 12724 }, { "epoch": 41.721311475409834, "grad_norm": 6.680850028991699, "learning_rate": 1.311439783113716e-05, "loss": 0.7975, "step": 12725 }, { "epoch": 41.72459016393443, "grad_norm": 9.894285202026367, "learning_rate": 1.311338873859653e-05, "loss": 0.868, "step": 12726 }, { "epoch": 41.72786885245902, "grad_norm": 6.215753555297852, "learning_rate": 1.3112379610949211e-05, "loss": 0.7788, "step": 12727 }, { "epoch": 41.731147540983606, "grad_norm": 8.122842788696289, "learning_rate": 1.311137044820657e-05, "loss": 0.8427, "step": 12728 }, { "epoch": 41.734426229508195, "grad_norm": 15.148086547851562, "learning_rate": 1.3110361250379997e-05, "loss": 0.6992, "step": 12729 }, { "epoch": 41.73770491803279, "grad_norm": 5.456527233123779, "learning_rate": 1.3109352017480863e-05, "loss": 0.6869, "step": 12730 }, { "epoch": 41.74098360655738, "grad_norm": 7.160598278045654, "learning_rate": 1.3108342749520555e-05, "loss": 0.7859, "step": 12731 }, { "epoch": 41.74426229508197, "grad_norm": 7.684780120849609, "learning_rate": 1.3107333446510448e-05, "loss": 0.6104, "step": 12732 }, { "epoch": 41.747540983606555, "grad_norm": 9.501614570617676, "learning_rate": 1.310632410846193e-05, "loss": 0.7095, "step": 12733 }, { "epoch": 41.75081967213115, "grad_norm": 7.63187313079834, "learning_rate": 1.3105314735386374e-05, "loss": 0.6103, "step": 12734 }, { "epoch": 41.75409836065574, "grad_norm": 9.513092994689941, "learning_rate": 1.3104305327295169e-05, "loss": 0.4821, "step": 12735 }, { "epoch": 41.75737704918033, "grad_norm": 8.465237617492676, "learning_rate": 1.3103295884199689e-05, "loss": 0.7742, "step": 12736 }, { "epoch": 41.760655737704916, "grad_norm": 8.070051193237305, "learning_rate": 1.3102286406111324e-05, "loss": 0.8231, "step": 12737 }, { "epoch": 41.76393442622951, "grad_norm": 10.020684242248535, "learning_rate": 1.3101276893041455e-05, "loss": 0.6216, "step": 12738 }, { "epoch": 41.7672131147541, "grad_norm": 7.371585845947266, "learning_rate": 1.3100267345001463e-05, "loss": 0.8538, "step": 12739 }, { "epoch": 41.77049180327869, "grad_norm": 7.287532329559326, "learning_rate": 1.3099257762002734e-05, "loss": 0.6177, "step": 12740 }, { "epoch": 41.773770491803276, "grad_norm": 7.140990734100342, "learning_rate": 1.309824814405665e-05, "loss": 0.6154, "step": 12741 }, { "epoch": 41.77704918032787, "grad_norm": 5.74653959274292, "learning_rate": 1.30972384911746e-05, "loss": 1.0453, "step": 12742 }, { "epoch": 41.78032786885246, "grad_norm": 8.25743293762207, "learning_rate": 1.309622880336796e-05, "loss": 0.5858, "step": 12743 }, { "epoch": 41.78360655737705, "grad_norm": 5.696859836578369, "learning_rate": 1.3095219080648128e-05, "loss": 0.5848, "step": 12744 }, { "epoch": 41.78688524590164, "grad_norm": 6.842925071716309, "learning_rate": 1.3094209323026479e-05, "loss": 0.7969, "step": 12745 }, { "epoch": 41.79016393442623, "grad_norm": 6.446621417999268, "learning_rate": 1.3093199530514401e-05, "loss": 1.0789, "step": 12746 }, { "epoch": 41.79344262295082, "grad_norm": 7.2998857498168945, "learning_rate": 1.3092189703123284e-05, "loss": 0.818, "step": 12747 }, { "epoch": 41.79672131147541, "grad_norm": 5.6644110679626465, "learning_rate": 1.3091179840864515e-05, "loss": 0.6984, "step": 12748 }, { "epoch": 41.8, "grad_norm": 6.228221893310547, "learning_rate": 1.3090169943749475e-05, "loss": 0.759, "step": 12749 }, { "epoch": 41.80327868852459, "grad_norm": 9.073100090026855, "learning_rate": 1.308916001178956e-05, "loss": 0.9575, "step": 12750 }, { "epoch": 41.80655737704918, "grad_norm": 8.41801643371582, "learning_rate": 1.3088150044996151e-05, "loss": 0.6684, "step": 12751 }, { "epoch": 41.80983606557377, "grad_norm": 6.634823322296143, "learning_rate": 1.308714004338064e-05, "loss": 0.872, "step": 12752 }, { "epoch": 41.81311475409836, "grad_norm": 10.55289363861084, "learning_rate": 1.3086130006954417e-05, "loss": 0.6442, "step": 12753 }, { "epoch": 41.81639344262295, "grad_norm": 5.4796953201293945, "learning_rate": 1.3085119935728867e-05, "loss": 0.6759, "step": 12754 }, { "epoch": 41.81967213114754, "grad_norm": 7.142111778259277, "learning_rate": 1.3084109829715383e-05, "loss": 0.7209, "step": 12755 }, { "epoch": 41.82295081967213, "grad_norm": 5.483480453491211, "learning_rate": 1.3083099688925353e-05, "loss": 0.7963, "step": 12756 }, { "epoch": 41.82622950819672, "grad_norm": 6.648690700531006, "learning_rate": 1.308208951337017e-05, "loss": 0.6722, "step": 12757 }, { "epoch": 41.829508196721314, "grad_norm": 7.046885013580322, "learning_rate": 1.3081079303061224e-05, "loss": 0.6319, "step": 12758 }, { "epoch": 41.8327868852459, "grad_norm": 5.973771095275879, "learning_rate": 1.3080069058009904e-05, "loss": 0.5921, "step": 12759 }, { "epoch": 41.83606557377049, "grad_norm": 6.976320266723633, "learning_rate": 1.3079058778227602e-05, "loss": 0.9697, "step": 12760 }, { "epoch": 41.83934426229508, "grad_norm": 7.888815402984619, "learning_rate": 1.3078048463725713e-05, "loss": 0.7845, "step": 12761 }, { "epoch": 41.842622950819674, "grad_norm": 6.4346394538879395, "learning_rate": 1.3077038114515625e-05, "loss": 0.8538, "step": 12762 }, { "epoch": 41.84590163934426, "grad_norm": 6.983926773071289, "learning_rate": 1.3076027730608733e-05, "loss": 0.7627, "step": 12763 }, { "epoch": 41.84918032786885, "grad_norm": 5.615272045135498, "learning_rate": 1.3075017312016432e-05, "loss": 0.5254, "step": 12764 }, { "epoch": 41.85245901639344, "grad_norm": 6.288975238800049, "learning_rate": 1.3074006858750115e-05, "loss": 0.5507, "step": 12765 }, { "epoch": 41.855737704918035, "grad_norm": 6.172274589538574, "learning_rate": 1.3072996370821172e-05, "loss": 0.6677, "step": 12766 }, { "epoch": 41.85901639344262, "grad_norm": 6.597602844238281, "learning_rate": 1.3071985848240998e-05, "loss": 0.6712, "step": 12767 }, { "epoch": 41.86229508196721, "grad_norm": 6.47252082824707, "learning_rate": 1.3070975291020994e-05, "loss": 0.8158, "step": 12768 }, { "epoch": 41.86557377049181, "grad_norm": 6.002832412719727, "learning_rate": 1.3069964699172547e-05, "loss": 0.6985, "step": 12769 }, { "epoch": 41.868852459016395, "grad_norm": 5.981487274169922, "learning_rate": 1.3068954072707056e-05, "loss": 0.6197, "step": 12770 }, { "epoch": 41.87213114754098, "grad_norm": 7.512269496917725, "learning_rate": 1.3067943411635917e-05, "loss": 0.6876, "step": 12771 }, { "epoch": 41.87540983606557, "grad_norm": 6.413262367248535, "learning_rate": 1.3066932715970529e-05, "loss": 0.8078, "step": 12772 }, { "epoch": 41.87868852459017, "grad_norm": 10.101892471313477, "learning_rate": 1.3065921985722284e-05, "loss": 0.5455, "step": 12773 }, { "epoch": 41.881967213114756, "grad_norm": 9.292350769042969, "learning_rate": 1.306491122090258e-05, "loss": 0.716, "step": 12774 }, { "epoch": 41.885245901639344, "grad_norm": 5.700480937957764, "learning_rate": 1.3063900421522814e-05, "loss": 0.7958, "step": 12775 }, { "epoch": 41.88852459016393, "grad_norm": 6.747375965118408, "learning_rate": 1.3062889587594389e-05, "loss": 0.7382, "step": 12776 }, { "epoch": 41.89180327868853, "grad_norm": 6.717631816864014, "learning_rate": 1.3061878719128698e-05, "loss": 0.9288, "step": 12777 }, { "epoch": 41.895081967213116, "grad_norm": 7.874168395996094, "learning_rate": 1.306086781613714e-05, "loss": 0.631, "step": 12778 }, { "epoch": 41.898360655737704, "grad_norm": 6.073225975036621, "learning_rate": 1.3059856878631115e-05, "loss": 0.6398, "step": 12779 }, { "epoch": 41.90163934426229, "grad_norm": 7.144956588745117, "learning_rate": 1.3058845906622023e-05, "loss": 0.642, "step": 12780 }, { "epoch": 41.90491803278689, "grad_norm": 7.030219078063965, "learning_rate": 1.3057834900121261e-05, "loss": 0.6921, "step": 12781 }, { "epoch": 41.90819672131148, "grad_norm": 11.530272483825684, "learning_rate": 1.3056823859140231e-05, "loss": 0.6053, "step": 12782 }, { "epoch": 41.911475409836065, "grad_norm": 5.521697998046875, "learning_rate": 1.3055812783690339e-05, "loss": 0.6763, "step": 12783 }, { "epoch": 41.91475409836065, "grad_norm": 5.686891555786133, "learning_rate": 1.3054801673782973e-05, "loss": 0.7193, "step": 12784 }, { "epoch": 41.91803278688525, "grad_norm": 5.90354585647583, "learning_rate": 1.305379052942955e-05, "loss": 0.6136, "step": 12785 }, { "epoch": 41.92131147540984, "grad_norm": 8.613659858703613, "learning_rate": 1.3052779350641456e-05, "loss": 0.8282, "step": 12786 }, { "epoch": 41.924590163934425, "grad_norm": 8.881139755249023, "learning_rate": 1.3051768137430105e-05, "loss": 0.644, "step": 12787 }, { "epoch": 41.927868852459014, "grad_norm": 6.692333221435547, "learning_rate": 1.3050756889806895e-05, "loss": 0.7636, "step": 12788 }, { "epoch": 41.93114754098361, "grad_norm": 8.584470748901367, "learning_rate": 1.3049745607783229e-05, "loss": 0.6146, "step": 12789 }, { "epoch": 41.9344262295082, "grad_norm": 5.890355110168457, "learning_rate": 1.3048734291370508e-05, "loss": 0.7661, "step": 12790 }, { "epoch": 41.937704918032786, "grad_norm": 13.148192405700684, "learning_rate": 1.304772294058014e-05, "loss": 0.6695, "step": 12791 }, { "epoch": 41.940983606557374, "grad_norm": 7.11041784286499, "learning_rate": 1.3046711555423529e-05, "loss": 0.8229, "step": 12792 }, { "epoch": 41.94426229508197, "grad_norm": 6.711983680725098, "learning_rate": 1.3045700135912074e-05, "loss": 0.4153, "step": 12793 }, { "epoch": 41.94754098360656, "grad_norm": 6.883385181427002, "learning_rate": 1.3044688682057185e-05, "loss": 0.711, "step": 12794 }, { "epoch": 41.950819672131146, "grad_norm": 7.318783283233643, "learning_rate": 1.3043677193870265e-05, "loss": 0.8704, "step": 12795 }, { "epoch": 41.954098360655735, "grad_norm": 6.2055768966674805, "learning_rate": 1.304266567136272e-05, "loss": 0.7891, "step": 12796 }, { "epoch": 41.95737704918033, "grad_norm": 7.7047295570373535, "learning_rate": 1.3041654114545956e-05, "loss": 0.5242, "step": 12797 }, { "epoch": 41.96065573770492, "grad_norm": 4.689451694488525, "learning_rate": 1.3040642523431379e-05, "loss": 0.768, "step": 12798 }, { "epoch": 41.96393442622951, "grad_norm": 9.472658157348633, "learning_rate": 1.3039630898030395e-05, "loss": 0.6259, "step": 12799 }, { "epoch": 41.967213114754095, "grad_norm": 7.857870101928711, "learning_rate": 1.3038619238354415e-05, "loss": 0.7225, "step": 12800 }, { "epoch": 41.97049180327869, "grad_norm": 8.457698822021484, "learning_rate": 1.3037607544414841e-05, "loss": 1.0157, "step": 12801 }, { "epoch": 41.97377049180328, "grad_norm": 6.65294075012207, "learning_rate": 1.3036595816223086e-05, "loss": 0.8081, "step": 12802 }, { "epoch": 41.97704918032787, "grad_norm": 6.4410786628723145, "learning_rate": 1.3035584053790553e-05, "loss": 0.5909, "step": 12803 }, { "epoch": 41.980327868852456, "grad_norm": 6.035661220550537, "learning_rate": 1.3034572257128658e-05, "loss": 0.7983, "step": 12804 }, { "epoch": 41.98360655737705, "grad_norm": 6.251911163330078, "learning_rate": 1.3033560426248801e-05, "loss": 0.8803, "step": 12805 }, { "epoch": 41.98688524590164, "grad_norm": 6.833539009094238, "learning_rate": 1.30325485611624e-05, "loss": 0.5193, "step": 12806 }, { "epoch": 41.99016393442623, "grad_norm": 6.226401329040527, "learning_rate": 1.3031536661880856e-05, "loss": 0.7756, "step": 12807 }, { "epoch": 41.993442622950816, "grad_norm": 7.550477981567383, "learning_rate": 1.3030524728415586e-05, "loss": 0.6839, "step": 12808 }, { "epoch": 41.99672131147541, "grad_norm": 7.047519207000732, "learning_rate": 1.3029512760778e-05, "loss": 0.8199, "step": 12809 }, { "epoch": 42.0, "grad_norm": 6.68721342086792, "learning_rate": 1.3028500758979507e-05, "loss": 0.7695, "step": 12810 }, { "epoch": 42.00327868852459, "grad_norm": 11.423229217529297, "learning_rate": 1.3027488723031522e-05, "loss": 0.5698, "step": 12811 }, { "epoch": 42.006557377049184, "grad_norm": 15.603191375732422, "learning_rate": 1.302647665294545e-05, "loss": 0.6881, "step": 12812 }, { "epoch": 42.00983606557377, "grad_norm": 7.055757522583008, "learning_rate": 1.302546454873271e-05, "loss": 0.7959, "step": 12813 }, { "epoch": 42.01311475409836, "grad_norm": 6.3273725509643555, "learning_rate": 1.302445241040471e-05, "loss": 0.6535, "step": 12814 }, { "epoch": 42.01639344262295, "grad_norm": 6.455973148345947, "learning_rate": 1.3023440237972864e-05, "loss": 0.809, "step": 12815 }, { "epoch": 42.019672131147544, "grad_norm": 5.797685146331787, "learning_rate": 1.3022428031448586e-05, "loss": 0.8726, "step": 12816 }, { "epoch": 42.02295081967213, "grad_norm": 5.952233791351318, "learning_rate": 1.3021415790843288e-05, "loss": 0.8542, "step": 12817 }, { "epoch": 42.02622950819672, "grad_norm": 5.988656044006348, "learning_rate": 1.302040351616839e-05, "loss": 0.7211, "step": 12818 }, { "epoch": 42.02950819672131, "grad_norm": 6.590559959411621, "learning_rate": 1.3019391207435297e-05, "loss": 0.7682, "step": 12819 }, { "epoch": 42.032786885245905, "grad_norm": 6.432886123657227, "learning_rate": 1.3018378864655433e-05, "loss": 0.5463, "step": 12820 }, { "epoch": 42.03606557377049, "grad_norm": 6.414907932281494, "learning_rate": 1.3017366487840203e-05, "loss": 0.5532, "step": 12821 }, { "epoch": 42.03934426229508, "grad_norm": 6.874351501464844, "learning_rate": 1.3016354077001035e-05, "loss": 0.6795, "step": 12822 }, { "epoch": 42.04262295081967, "grad_norm": 6.94756555557251, "learning_rate": 1.3015341632149334e-05, "loss": 0.6151, "step": 12823 }, { "epoch": 42.045901639344265, "grad_norm": 8.305549621582031, "learning_rate": 1.3014329153296524e-05, "loss": 0.9404, "step": 12824 }, { "epoch": 42.049180327868854, "grad_norm": 5.584094524383545, "learning_rate": 1.3013316640454017e-05, "loss": 0.4806, "step": 12825 }, { "epoch": 42.05245901639344, "grad_norm": 6.226016998291016, "learning_rate": 1.3012304093633233e-05, "loss": 0.6302, "step": 12826 }, { "epoch": 42.05573770491803, "grad_norm": 7.470178127288818, "learning_rate": 1.3011291512845587e-05, "loss": 0.8834, "step": 12827 }, { "epoch": 42.059016393442626, "grad_norm": 18.36968421936035, "learning_rate": 1.30102788981025e-05, "loss": 0.931, "step": 12828 }, { "epoch": 42.062295081967214, "grad_norm": 36.478431701660156, "learning_rate": 1.3009266249415387e-05, "loss": 0.8318, "step": 12829 }, { "epoch": 42.0655737704918, "grad_norm": 7.046248912811279, "learning_rate": 1.3008253566795668e-05, "loss": 0.6401, "step": 12830 }, { "epoch": 42.06885245901639, "grad_norm": 5.686350345611572, "learning_rate": 1.3007240850254762e-05, "loss": 0.6496, "step": 12831 }, { "epoch": 42.072131147540986, "grad_norm": 8.289499282836914, "learning_rate": 1.300622809980409e-05, "loss": 0.7231, "step": 12832 }, { "epoch": 42.075409836065575, "grad_norm": 5.810878276824951, "learning_rate": 1.300521531545507e-05, "loss": 0.7694, "step": 12833 }, { "epoch": 42.07868852459016, "grad_norm": 6.5807576179504395, "learning_rate": 1.300420249721912e-05, "loss": 0.5357, "step": 12834 }, { "epoch": 42.08196721311475, "grad_norm": 5.177429676055908, "learning_rate": 1.3003189645107668e-05, "loss": 0.7811, "step": 12835 }, { "epoch": 42.08524590163935, "grad_norm": 7.381596565246582, "learning_rate": 1.3002176759132125e-05, "loss": 0.5709, "step": 12836 }, { "epoch": 42.088524590163935, "grad_norm": 12.88396167755127, "learning_rate": 1.3001163839303923e-05, "loss": 0.7785, "step": 12837 }, { "epoch": 42.09180327868852, "grad_norm": 9.480999946594238, "learning_rate": 1.3000150885634474e-05, "loss": 0.5718, "step": 12838 }, { "epoch": 42.09508196721311, "grad_norm": 6.378766059875488, "learning_rate": 1.2999137898135207e-05, "loss": 0.5601, "step": 12839 }, { "epoch": 42.09836065573771, "grad_norm": 7.588125228881836, "learning_rate": 1.299812487681754e-05, "loss": 0.8535, "step": 12840 }, { "epoch": 42.101639344262296, "grad_norm": 6.042849540710449, "learning_rate": 1.2997111821692901e-05, "loss": 0.4698, "step": 12841 }, { "epoch": 42.104918032786884, "grad_norm": 8.4302396774292, "learning_rate": 1.299609873277271e-05, "loss": 0.5545, "step": 12842 }, { "epoch": 42.10819672131147, "grad_norm": 7.011269569396973, "learning_rate": 1.2995085610068385e-05, "loss": 0.6266, "step": 12843 }, { "epoch": 42.11147540983607, "grad_norm": 6.090213298797607, "learning_rate": 1.2994072453591363e-05, "loss": 0.7577, "step": 12844 }, { "epoch": 42.114754098360656, "grad_norm": 8.699468612670898, "learning_rate": 1.2993059263353054e-05, "loss": 0.5315, "step": 12845 }, { "epoch": 42.118032786885244, "grad_norm": 7.574873447418213, "learning_rate": 1.2992046039364893e-05, "loss": 0.5883, "step": 12846 }, { "epoch": 42.12131147540983, "grad_norm": 10.514225006103516, "learning_rate": 1.2991032781638301e-05, "loss": 0.8734, "step": 12847 }, { "epoch": 42.12459016393443, "grad_norm": 48.2840576171875, "learning_rate": 1.2990019490184707e-05, "loss": 0.9242, "step": 12848 }, { "epoch": 42.12786885245902, "grad_norm": 5.923463344573975, "learning_rate": 1.2989006165015532e-05, "loss": 0.9048, "step": 12849 }, { "epoch": 42.131147540983605, "grad_norm": 6.511079788208008, "learning_rate": 1.2987992806142206e-05, "loss": 0.8344, "step": 12850 }, { "epoch": 42.13442622950819, "grad_norm": 7.310028076171875, "learning_rate": 1.2986979413576153e-05, "loss": 0.637, "step": 12851 }, { "epoch": 42.13770491803279, "grad_norm": 6.618677616119385, "learning_rate": 1.2985965987328803e-05, "loss": 0.7031, "step": 12852 }, { "epoch": 42.14098360655738, "grad_norm": 8.03079891204834, "learning_rate": 1.298495252741158e-05, "loss": 0.8408, "step": 12853 }, { "epoch": 42.144262295081965, "grad_norm": 8.023857116699219, "learning_rate": 1.2983939033835916e-05, "loss": 0.8882, "step": 12854 }, { "epoch": 42.14754098360656, "grad_norm": 6.915862083435059, "learning_rate": 1.2982925506613237e-05, "loss": 0.7678, "step": 12855 }, { "epoch": 42.15081967213115, "grad_norm": 10.212035179138184, "learning_rate": 1.298191194575497e-05, "loss": 0.5828, "step": 12856 }, { "epoch": 42.15409836065574, "grad_norm": 5.561195373535156, "learning_rate": 1.2980898351272547e-05, "loss": 0.7212, "step": 12857 }, { "epoch": 42.157377049180326, "grad_norm": 6.6244707107543945, "learning_rate": 1.2979884723177391e-05, "loss": 0.8133, "step": 12858 }, { "epoch": 42.16065573770492, "grad_norm": 8.584066390991211, "learning_rate": 1.297887106148094e-05, "loss": 0.75, "step": 12859 }, { "epoch": 42.16393442622951, "grad_norm": 7.14933967590332, "learning_rate": 1.2977857366194622e-05, "loss": 0.7609, "step": 12860 }, { "epoch": 42.1672131147541, "grad_norm": 8.164958000183105, "learning_rate": 1.2976843637329863e-05, "loss": 0.499, "step": 12861 }, { "epoch": 42.170491803278686, "grad_norm": 6.47918176651001, "learning_rate": 1.2975829874898096e-05, "loss": 0.8337, "step": 12862 }, { "epoch": 42.17377049180328, "grad_norm": 6.383769512176514, "learning_rate": 1.2974816078910757e-05, "loss": 0.7413, "step": 12863 }, { "epoch": 42.17704918032787, "grad_norm": 6.793506145477295, "learning_rate": 1.2973802249379271e-05, "loss": 0.5845, "step": 12864 }, { "epoch": 42.18032786885246, "grad_norm": 9.50711727142334, "learning_rate": 1.2972788386315074e-05, "loss": 0.7657, "step": 12865 }, { "epoch": 42.18360655737705, "grad_norm": 9.39004898071289, "learning_rate": 1.2971774489729595e-05, "loss": 0.6732, "step": 12866 }, { "epoch": 42.18688524590164, "grad_norm": 7.119709491729736, "learning_rate": 1.2970760559634272e-05, "loss": 0.7812, "step": 12867 }, { "epoch": 42.19016393442623, "grad_norm": 6.501291751861572, "learning_rate": 1.2969746596040534e-05, "loss": 0.8332, "step": 12868 }, { "epoch": 42.19344262295082, "grad_norm": 12.503328323364258, "learning_rate": 1.2968732598959815e-05, "loss": 0.6529, "step": 12869 }, { "epoch": 42.19672131147541, "grad_norm": 5.663197040557861, "learning_rate": 1.2967718568403548e-05, "loss": 0.9053, "step": 12870 }, { "epoch": 42.2, "grad_norm": 9.293498039245605, "learning_rate": 1.296670450438317e-05, "loss": 0.8319, "step": 12871 }, { "epoch": 42.20327868852459, "grad_norm": 6.622499942779541, "learning_rate": 1.2965690406910114e-05, "loss": 0.5363, "step": 12872 }, { "epoch": 42.20655737704918, "grad_norm": 6.349853515625, "learning_rate": 1.2964676275995814e-05, "loss": 0.8468, "step": 12873 }, { "epoch": 42.20983606557377, "grad_norm": 7.03707218170166, "learning_rate": 1.2963662111651708e-05, "loss": 0.8208, "step": 12874 }, { "epoch": 42.21311475409836, "grad_norm": 8.010137557983398, "learning_rate": 1.2962647913889228e-05, "loss": 0.7204, "step": 12875 }, { "epoch": 42.21639344262295, "grad_norm": 6.348579406738281, "learning_rate": 1.2961633682719814e-05, "loss": 0.4811, "step": 12876 }, { "epoch": 42.21967213114754, "grad_norm": 8.608875274658203, "learning_rate": 1.29606194181549e-05, "loss": 0.5635, "step": 12877 }, { "epoch": 42.22295081967213, "grad_norm": 5.515144348144531, "learning_rate": 1.2959605120205924e-05, "loss": 0.8751, "step": 12878 }, { "epoch": 42.226229508196724, "grad_norm": 7.699432849884033, "learning_rate": 1.2958590788884324e-05, "loss": 0.7426, "step": 12879 }, { "epoch": 42.22950819672131, "grad_norm": 10.096257209777832, "learning_rate": 1.2957576424201537e-05, "loss": 0.8501, "step": 12880 }, { "epoch": 42.2327868852459, "grad_norm": 5.840721130371094, "learning_rate": 1.2956562026169e-05, "loss": 0.4237, "step": 12881 }, { "epoch": 42.23606557377049, "grad_norm": 6.858120441436768, "learning_rate": 1.2955547594798152e-05, "loss": 0.9446, "step": 12882 }, { "epoch": 42.239344262295084, "grad_norm": 7.464710712432861, "learning_rate": 1.2954533130100433e-05, "loss": 0.6822, "step": 12883 }, { "epoch": 42.24262295081967, "grad_norm": 6.5248260498046875, "learning_rate": 1.2953518632087278e-05, "loss": 0.6748, "step": 12884 }, { "epoch": 42.24590163934426, "grad_norm": 7.241003513336182, "learning_rate": 1.2952504100770132e-05, "loss": 0.6793, "step": 12885 }, { "epoch": 42.24918032786885, "grad_norm": 5.927154064178467, "learning_rate": 1.295148953616043e-05, "loss": 0.529, "step": 12886 }, { "epoch": 42.252459016393445, "grad_norm": 6.9107666015625, "learning_rate": 1.2950474938269615e-05, "loss": 0.7446, "step": 12887 }, { "epoch": 42.25573770491803, "grad_norm": 8.681983947753906, "learning_rate": 1.2949460307109131e-05, "loss": 0.7765, "step": 12888 }, { "epoch": 42.25901639344262, "grad_norm": 6.287587642669678, "learning_rate": 1.2948445642690412e-05, "loss": 0.561, "step": 12889 }, { "epoch": 42.26229508196721, "grad_norm": 6.740078926086426, "learning_rate": 1.2947430945024904e-05, "loss": 0.6028, "step": 12890 }, { "epoch": 42.265573770491805, "grad_norm": 6.019047260284424, "learning_rate": 1.2946416214124046e-05, "loss": 0.9175, "step": 12891 }, { "epoch": 42.268852459016394, "grad_norm": 5.6704254150390625, "learning_rate": 1.2945401449999285e-05, "loss": 0.7861, "step": 12892 }, { "epoch": 42.27213114754098, "grad_norm": 6.252636432647705, "learning_rate": 1.2944386652662058e-05, "loss": 0.8692, "step": 12893 }, { "epoch": 42.27540983606557, "grad_norm": 17.686805725097656, "learning_rate": 1.2943371822123812e-05, "loss": 0.8341, "step": 12894 }, { "epoch": 42.278688524590166, "grad_norm": 7.854821681976318, "learning_rate": 1.2942356958395985e-05, "loss": 0.7554, "step": 12895 }, { "epoch": 42.281967213114754, "grad_norm": 5.466119766235352, "learning_rate": 1.2941342061490027e-05, "loss": 0.7761, "step": 12896 }, { "epoch": 42.28524590163934, "grad_norm": 7.747655868530273, "learning_rate": 1.2940327131417378e-05, "loss": 0.7227, "step": 12897 }, { "epoch": 42.28852459016394, "grad_norm": 7.250037670135498, "learning_rate": 1.2939312168189484e-05, "loss": 0.6251, "step": 12898 }, { "epoch": 42.291803278688526, "grad_norm": 7.335936069488525, "learning_rate": 1.2938297171817787e-05, "loss": 0.63, "step": 12899 }, { "epoch": 42.295081967213115, "grad_norm": 8.163517951965332, "learning_rate": 1.2937282142313736e-05, "loss": 0.7287, "step": 12900 }, { "epoch": 42.2983606557377, "grad_norm": 8.295082092285156, "learning_rate": 1.2936267079688773e-05, "loss": 0.5737, "step": 12901 }, { "epoch": 42.3016393442623, "grad_norm": 10.973278045654297, "learning_rate": 1.2935251983954348e-05, "loss": 1.0757, "step": 12902 }, { "epoch": 42.30491803278689, "grad_norm": 6.957396030426025, "learning_rate": 1.2934236855121904e-05, "loss": 0.4595, "step": 12903 }, { "epoch": 42.308196721311475, "grad_norm": 5.572709083557129, "learning_rate": 1.2933221693202891e-05, "loss": 0.5833, "step": 12904 }, { "epoch": 42.31147540983606, "grad_norm": 8.491318702697754, "learning_rate": 1.293220649820875e-05, "loss": 0.7393, "step": 12905 }, { "epoch": 42.31475409836066, "grad_norm": 5.098992347717285, "learning_rate": 1.2931191270150936e-05, "loss": 0.8445, "step": 12906 }, { "epoch": 42.31803278688525, "grad_norm": 5.44808292388916, "learning_rate": 1.293017600904089e-05, "loss": 0.7081, "step": 12907 }, { "epoch": 42.321311475409836, "grad_norm": 6.052974224090576, "learning_rate": 1.2929160714890063e-05, "loss": 0.5438, "step": 12908 }, { "epoch": 42.324590163934424, "grad_norm": 6.563992023468018, "learning_rate": 1.2928145387709905e-05, "loss": 0.5864, "step": 12909 }, { "epoch": 42.32786885245902, "grad_norm": 8.034812927246094, "learning_rate": 1.2927130027511861e-05, "loss": 0.7105, "step": 12910 }, { "epoch": 42.33114754098361, "grad_norm": 7.344184398651123, "learning_rate": 1.2926114634307383e-05, "loss": 0.5544, "step": 12911 }, { "epoch": 42.334426229508196, "grad_norm": 6.481594085693359, "learning_rate": 1.2925099208107921e-05, "loss": 0.6656, "step": 12912 }, { "epoch": 42.337704918032784, "grad_norm": 8.44786262512207, "learning_rate": 1.292408374892492e-05, "loss": 0.4516, "step": 12913 }, { "epoch": 42.34098360655738, "grad_norm": 7.3392014503479, "learning_rate": 1.292306825676984e-05, "loss": 0.8428, "step": 12914 }, { "epoch": 42.34426229508197, "grad_norm": 6.6909565925598145, "learning_rate": 1.2922052731654122e-05, "loss": 0.4967, "step": 12915 }, { "epoch": 42.34754098360656, "grad_norm": 7.2544474601745605, "learning_rate": 1.2921037173589223e-05, "loss": 0.6506, "step": 12916 }, { "epoch": 42.350819672131145, "grad_norm": 11.21451473236084, "learning_rate": 1.2920021582586595e-05, "loss": 0.65, "step": 12917 }, { "epoch": 42.35409836065574, "grad_norm": 8.489761352539062, "learning_rate": 1.2919005958657685e-05, "loss": 0.6501, "step": 12918 }, { "epoch": 42.35737704918033, "grad_norm": 9.588883399963379, "learning_rate": 1.291799030181395e-05, "loss": 0.7674, "step": 12919 }, { "epoch": 42.36065573770492, "grad_norm": 6.055847644805908, "learning_rate": 1.2916974612066838e-05, "loss": 0.9053, "step": 12920 }, { "epoch": 42.363934426229505, "grad_norm": 5.847592353820801, "learning_rate": 1.2915958889427805e-05, "loss": 0.7327, "step": 12921 }, { "epoch": 42.3672131147541, "grad_norm": 6.27869987487793, "learning_rate": 1.2914943133908304e-05, "loss": 0.8211, "step": 12922 }, { "epoch": 42.37049180327869, "grad_norm": 6.515879154205322, "learning_rate": 1.2913927345519788e-05, "loss": 0.5129, "step": 12923 }, { "epoch": 42.37377049180328, "grad_norm": 9.413688659667969, "learning_rate": 1.2912911524273714e-05, "loss": 0.7507, "step": 12924 }, { "epoch": 42.377049180327866, "grad_norm": 6.991098880767822, "learning_rate": 1.2911895670181532e-05, "loss": 0.5873, "step": 12925 }, { "epoch": 42.38032786885246, "grad_norm": 5.872345447540283, "learning_rate": 1.2910879783254698e-05, "loss": 0.6902, "step": 12926 }, { "epoch": 42.38360655737705, "grad_norm": 6.508946418762207, "learning_rate": 1.2909863863504665e-05, "loss": 0.6514, "step": 12927 }, { "epoch": 42.38688524590164, "grad_norm": 7.704999923706055, "learning_rate": 1.2908847910942899e-05, "loss": 0.8907, "step": 12928 }, { "epoch": 42.390163934426226, "grad_norm": 8.248600959777832, "learning_rate": 1.290783192558084e-05, "loss": 0.5742, "step": 12929 }, { "epoch": 42.39344262295082, "grad_norm": 7.092855930328369, "learning_rate": 1.290681590742996e-05, "loss": 0.8188, "step": 12930 }, { "epoch": 42.39672131147541, "grad_norm": 7.527866840362549, "learning_rate": 1.2905799856501703e-05, "loss": 0.6682, "step": 12931 }, { "epoch": 42.4, "grad_norm": 7.775332927703857, "learning_rate": 1.2904783772807534e-05, "loss": 0.8531, "step": 12932 }, { "epoch": 42.40327868852459, "grad_norm": 7.276278495788574, "learning_rate": 1.290376765635891e-05, "loss": 0.4455, "step": 12933 }, { "epoch": 42.40655737704918, "grad_norm": 6.406558036804199, "learning_rate": 1.2902751507167281e-05, "loss": 0.5763, "step": 12934 }, { "epoch": 42.40983606557377, "grad_norm": 6.2018961906433105, "learning_rate": 1.2901735325244116e-05, "loss": 0.6146, "step": 12935 }, { "epoch": 42.41311475409836, "grad_norm": 8.437413215637207, "learning_rate": 1.2900719110600863e-05, "loss": 0.7399, "step": 12936 }, { "epoch": 42.41639344262295, "grad_norm": 8.380687713623047, "learning_rate": 1.2899702863248992e-05, "loss": 0.728, "step": 12937 }, { "epoch": 42.41967213114754, "grad_norm": 6.208614349365234, "learning_rate": 1.289868658319995e-05, "loss": 0.6302, "step": 12938 }, { "epoch": 42.42295081967213, "grad_norm": 5.633131980895996, "learning_rate": 1.2897670270465206e-05, "loss": 0.8652, "step": 12939 }, { "epoch": 42.42622950819672, "grad_norm": 6.002651691436768, "learning_rate": 1.2896653925056218e-05, "loss": 0.615, "step": 12940 }, { "epoch": 42.429508196721315, "grad_norm": 7.274869441986084, "learning_rate": 1.2895637546984441e-05, "loss": 0.6592, "step": 12941 }, { "epoch": 42.4327868852459, "grad_norm": 6.390692710876465, "learning_rate": 1.2894621136261342e-05, "loss": 0.6432, "step": 12942 }, { "epoch": 42.43606557377049, "grad_norm": 5.616554260253906, "learning_rate": 1.2893604692898381e-05, "loss": 0.6879, "step": 12943 }, { "epoch": 42.43934426229508, "grad_norm": 7.375906467437744, "learning_rate": 1.2892588216907018e-05, "loss": 0.5726, "step": 12944 }, { "epoch": 42.442622950819676, "grad_norm": 7.124731540679932, "learning_rate": 1.2891571708298716e-05, "loss": 0.7892, "step": 12945 }, { "epoch": 42.445901639344264, "grad_norm": 4.982125282287598, "learning_rate": 1.2890555167084937e-05, "loss": 0.5936, "step": 12946 }, { "epoch": 42.44918032786885, "grad_norm": 5.780651092529297, "learning_rate": 1.2889538593277143e-05, "loss": 0.8201, "step": 12947 }, { "epoch": 42.45245901639344, "grad_norm": 8.132279396057129, "learning_rate": 1.2888521986886797e-05, "loss": 0.775, "step": 12948 }, { "epoch": 42.455737704918036, "grad_norm": 6.670762538909912, "learning_rate": 1.288750534792536e-05, "loss": 0.7178, "step": 12949 }, { "epoch": 42.459016393442624, "grad_norm": 6.529726982116699, "learning_rate": 1.2886488676404301e-05, "loss": 0.6697, "step": 12950 }, { "epoch": 42.46229508196721, "grad_norm": 6.435230731964111, "learning_rate": 1.2885471972335078e-05, "loss": 0.5635, "step": 12951 }, { "epoch": 42.4655737704918, "grad_norm": 6.100646495819092, "learning_rate": 1.2884455235729161e-05, "loss": 0.7869, "step": 12952 }, { "epoch": 42.4688524590164, "grad_norm": 7.107685565948486, "learning_rate": 1.2883438466598008e-05, "loss": 0.7102, "step": 12953 }, { "epoch": 42.472131147540985, "grad_norm": 6.825368881225586, "learning_rate": 1.2882421664953093e-05, "loss": 0.9647, "step": 12954 }, { "epoch": 42.47540983606557, "grad_norm": 5.278468132019043, "learning_rate": 1.2881404830805876e-05, "loss": 0.9077, "step": 12955 }, { "epoch": 42.47868852459016, "grad_norm": 5.713191509246826, "learning_rate": 1.2880387964167821e-05, "loss": 0.7441, "step": 12956 }, { "epoch": 42.48196721311476, "grad_norm": 10.393773078918457, "learning_rate": 1.2879371065050399e-05, "loss": 0.7354, "step": 12957 }, { "epoch": 42.485245901639345, "grad_norm": 6.1803483963012695, "learning_rate": 1.2878354133465073e-05, "loss": 0.8315, "step": 12958 }, { "epoch": 42.488524590163934, "grad_norm": 12.044877052307129, "learning_rate": 1.2877337169423314e-05, "loss": 0.7975, "step": 12959 }, { "epoch": 42.49180327868852, "grad_norm": 8.0352783203125, "learning_rate": 1.2876320172936584e-05, "loss": 0.904, "step": 12960 }, { "epoch": 42.49508196721312, "grad_norm": 7.026576995849609, "learning_rate": 1.2875303144016355e-05, "loss": 0.7707, "step": 12961 }, { "epoch": 42.498360655737706, "grad_norm": 5.990832805633545, "learning_rate": 1.2874286082674092e-05, "loss": 0.4855, "step": 12962 }, { "epoch": 42.501639344262294, "grad_norm": 6.3662943840026855, "learning_rate": 1.2873268988921268e-05, "loss": 0.7763, "step": 12963 }, { "epoch": 42.50491803278688, "grad_norm": 6.495965003967285, "learning_rate": 1.2872251862769345e-05, "loss": 0.6568, "step": 12964 }, { "epoch": 42.50819672131148, "grad_norm": 7.846060276031494, "learning_rate": 1.2871234704229799e-05, "loss": 0.7146, "step": 12965 }, { "epoch": 42.511475409836066, "grad_norm": 6.854307651519775, "learning_rate": 1.2870217513314095e-05, "loss": 0.8913, "step": 12966 }, { "epoch": 42.514754098360655, "grad_norm": 11.141021728515625, "learning_rate": 1.28692002900337e-05, "loss": 0.9546, "step": 12967 }, { "epoch": 42.51803278688524, "grad_norm": 6.023065090179443, "learning_rate": 1.2868183034400095e-05, "loss": 0.7416, "step": 12968 }, { "epoch": 42.52131147540984, "grad_norm": 6.64578104019165, "learning_rate": 1.2867165746424739e-05, "loss": 0.7995, "step": 12969 }, { "epoch": 42.52459016393443, "grad_norm": 8.930736541748047, "learning_rate": 1.2866148426119114e-05, "loss": 0.6947, "step": 12970 }, { "epoch": 42.527868852459015, "grad_norm": 10.098678588867188, "learning_rate": 1.2865131073494678e-05, "loss": 0.7798, "step": 12971 }, { "epoch": 42.5311475409836, "grad_norm": 6.504623889923096, "learning_rate": 1.2864113688562919e-05, "loss": 0.8472, "step": 12972 }, { "epoch": 42.5344262295082, "grad_norm": 10.88247013092041, "learning_rate": 1.2863096271335293e-05, "loss": 0.8179, "step": 12973 }, { "epoch": 42.53770491803279, "grad_norm": 6.841804027557373, "learning_rate": 1.2862078821823283e-05, "loss": 0.8189, "step": 12974 }, { "epoch": 42.540983606557376, "grad_norm": 6.7338385581970215, "learning_rate": 1.2861061340038357e-05, "loss": 0.463, "step": 12975 }, { "epoch": 42.544262295081964, "grad_norm": 5.723842144012451, "learning_rate": 1.2860043825991993e-05, "loss": 0.5958, "step": 12976 }, { "epoch": 42.54754098360656, "grad_norm": 8.267388343811035, "learning_rate": 1.2859026279695657e-05, "loss": 0.9645, "step": 12977 }, { "epoch": 42.55081967213115, "grad_norm": 5.618808746337891, "learning_rate": 1.285800870116083e-05, "loss": 0.7452, "step": 12978 }, { "epoch": 42.554098360655736, "grad_norm": 6.282522678375244, "learning_rate": 1.2856991090398977e-05, "loss": 0.8958, "step": 12979 }, { "epoch": 42.557377049180324, "grad_norm": 7.275294303894043, "learning_rate": 1.2855973447421588e-05, "loss": 0.7844, "step": 12980 }, { "epoch": 42.56065573770492, "grad_norm": 5.475503921508789, "learning_rate": 1.2854955772240123e-05, "loss": 0.6454, "step": 12981 }, { "epoch": 42.56393442622951, "grad_norm": 6.775280952453613, "learning_rate": 1.2853938064866066e-05, "loss": 0.6466, "step": 12982 }, { "epoch": 42.5672131147541, "grad_norm": 6.739169120788574, "learning_rate": 1.2852920325310887e-05, "loss": 0.7699, "step": 12983 }, { "epoch": 42.570491803278685, "grad_norm": 5.683871746063232, "learning_rate": 1.2851902553586069e-05, "loss": 0.8167, "step": 12984 }, { "epoch": 42.57377049180328, "grad_norm": 8.913093566894531, "learning_rate": 1.2850884749703084e-05, "loss": 0.7228, "step": 12985 }, { "epoch": 42.57704918032787, "grad_norm": 9.16761302947998, "learning_rate": 1.2849866913673406e-05, "loss": 1.0149, "step": 12986 }, { "epoch": 42.58032786885246, "grad_norm": 9.215904235839844, "learning_rate": 1.2848849045508518e-05, "loss": 0.6068, "step": 12987 }, { "epoch": 42.58360655737705, "grad_norm": 21.78432273864746, "learning_rate": 1.2847831145219893e-05, "loss": 0.8109, "step": 12988 }, { "epoch": 42.58688524590164, "grad_norm": 6.282674789428711, "learning_rate": 1.2846813212819014e-05, "loss": 0.774, "step": 12989 }, { "epoch": 42.59016393442623, "grad_norm": 6.008711338043213, "learning_rate": 1.2845795248317352e-05, "loss": 0.4269, "step": 12990 }, { "epoch": 42.59344262295082, "grad_norm": 7.447181701660156, "learning_rate": 1.2844777251726393e-05, "loss": 0.8905, "step": 12991 }, { "epoch": 42.59672131147541, "grad_norm": 6.023548603057861, "learning_rate": 1.284375922305761e-05, "loss": 0.8323, "step": 12992 }, { "epoch": 42.6, "grad_norm": 8.758856773376465, "learning_rate": 1.2842741162322487e-05, "loss": 0.6228, "step": 12993 }, { "epoch": 42.60327868852459, "grad_norm": 11.163351058959961, "learning_rate": 1.2841723069532502e-05, "loss": 0.5597, "step": 12994 }, { "epoch": 42.60655737704918, "grad_norm": 8.091005325317383, "learning_rate": 1.2840704944699132e-05, "loss": 0.5923, "step": 12995 }, { "epoch": 42.609836065573774, "grad_norm": 25.37787437438965, "learning_rate": 1.2839686787833862e-05, "loss": 0.9275, "step": 12996 }, { "epoch": 42.61311475409836, "grad_norm": 5.416574001312256, "learning_rate": 1.283866859894817e-05, "loss": 0.596, "step": 12997 }, { "epoch": 42.61639344262295, "grad_norm": 7.946795463562012, "learning_rate": 1.2837650378053541e-05, "loss": 0.7305, "step": 12998 }, { "epoch": 42.61967213114754, "grad_norm": 5.3911452293396, "learning_rate": 1.283663212516145e-05, "loss": 0.7385, "step": 12999 }, { "epoch": 42.622950819672134, "grad_norm": 6.019237518310547, "learning_rate": 1.2835613840283386e-05, "loss": 0.7707, "step": 13000 }, { "epoch": 42.62622950819672, "grad_norm": 6.631789207458496, "learning_rate": 1.2834595523430824e-05, "loss": 0.5763, "step": 13001 }, { "epoch": 42.62950819672131, "grad_norm": 8.961909294128418, "learning_rate": 1.2833577174615253e-05, "loss": 0.4194, "step": 13002 }, { "epoch": 42.6327868852459, "grad_norm": 5.356602668762207, "learning_rate": 1.2832558793848148e-05, "loss": 0.7859, "step": 13003 }, { "epoch": 42.636065573770495, "grad_norm": 7.516244888305664, "learning_rate": 1.2831540381141006e-05, "loss": 0.8327, "step": 13004 }, { "epoch": 42.63934426229508, "grad_norm": 6.546135425567627, "learning_rate": 1.2830521936505296e-05, "loss": 0.8434, "step": 13005 }, { "epoch": 42.64262295081967, "grad_norm": 25.71258544921875, "learning_rate": 1.282950345995251e-05, "loss": 0.5509, "step": 13006 }, { "epoch": 42.64590163934426, "grad_norm": 6.741132736206055, "learning_rate": 1.282848495149413e-05, "loss": 0.7026, "step": 13007 }, { "epoch": 42.649180327868855, "grad_norm": 6.747297286987305, "learning_rate": 1.2827466411141643e-05, "loss": 0.5644, "step": 13008 }, { "epoch": 42.65245901639344, "grad_norm": 7.2929863929748535, "learning_rate": 1.2826447838906528e-05, "loss": 0.5393, "step": 13009 }, { "epoch": 42.65573770491803, "grad_norm": 9.032392501831055, "learning_rate": 1.282542923480028e-05, "loss": 0.9344, "step": 13010 }, { "epoch": 42.65901639344262, "grad_norm": 7.844969272613525, "learning_rate": 1.2824410598834378e-05, "loss": 0.7487, "step": 13011 }, { "epoch": 42.662295081967216, "grad_norm": 9.054651260375977, "learning_rate": 1.2823391931020308e-05, "loss": 0.7772, "step": 13012 }, { "epoch": 42.665573770491804, "grad_norm": 8.64837646484375, "learning_rate": 1.2822373231369562e-05, "loss": 0.7095, "step": 13013 }, { "epoch": 42.66885245901639, "grad_norm": 5.995030879974365, "learning_rate": 1.2821354499893619e-05, "loss": 0.8147, "step": 13014 }, { "epoch": 42.67213114754098, "grad_norm": 10.53969955444336, "learning_rate": 1.2820335736603975e-05, "loss": 0.9453, "step": 13015 }, { "epoch": 42.675409836065576, "grad_norm": 7.423030853271484, "learning_rate": 1.281931694151211e-05, "loss": 0.622, "step": 13016 }, { "epoch": 42.678688524590164, "grad_norm": 6.036779403686523, "learning_rate": 1.2818298114629518e-05, "loss": 0.7614, "step": 13017 }, { "epoch": 42.68196721311475, "grad_norm": 5.962153434753418, "learning_rate": 1.2817279255967679e-05, "loss": 0.6403, "step": 13018 }, { "epoch": 42.68524590163934, "grad_norm": 7.61128568649292, "learning_rate": 1.2816260365538092e-05, "loss": 0.763, "step": 13019 }, { "epoch": 42.68852459016394, "grad_norm": 6.890455722808838, "learning_rate": 1.2815241443352238e-05, "loss": 0.8373, "step": 13020 }, { "epoch": 42.691803278688525, "grad_norm": 7.949923038482666, "learning_rate": 1.2814222489421612e-05, "loss": 0.8741, "step": 13021 }, { "epoch": 42.69508196721311, "grad_norm": 6.756128787994385, "learning_rate": 1.2813203503757702e-05, "loss": 0.7576, "step": 13022 }, { "epoch": 42.6983606557377, "grad_norm": 5.73433780670166, "learning_rate": 1.2812184486371995e-05, "loss": 0.5397, "step": 13023 }, { "epoch": 42.7016393442623, "grad_norm": 6.627987384796143, "learning_rate": 1.2811165437275985e-05, "loss": 0.706, "step": 13024 }, { "epoch": 42.704918032786885, "grad_norm": 10.662535667419434, "learning_rate": 1.2810146356481158e-05, "loss": 0.5796, "step": 13025 }, { "epoch": 42.708196721311474, "grad_norm": 6.437988758087158, "learning_rate": 1.2809127243999017e-05, "loss": 0.662, "step": 13026 }, { "epoch": 42.71147540983607, "grad_norm": 6.117427349090576, "learning_rate": 1.2808108099841041e-05, "loss": 0.8578, "step": 13027 }, { "epoch": 42.71475409836066, "grad_norm": 7.23578405380249, "learning_rate": 1.2807088924018727e-05, "loss": 0.7413, "step": 13028 }, { "epoch": 42.718032786885246, "grad_norm": 19.542734146118164, "learning_rate": 1.2806069716543566e-05, "loss": 0.9078, "step": 13029 }, { "epoch": 42.721311475409834, "grad_norm": 5.792455196380615, "learning_rate": 1.2805050477427053e-05, "loss": 0.7426, "step": 13030 }, { "epoch": 42.72459016393443, "grad_norm": 6.632336616516113, "learning_rate": 1.2804031206680679e-05, "loss": 0.8424, "step": 13031 }, { "epoch": 42.72786885245902, "grad_norm": 6.069246768951416, "learning_rate": 1.2803011904315937e-05, "loss": 0.8362, "step": 13032 }, { "epoch": 42.731147540983606, "grad_norm": 6.257509231567383, "learning_rate": 1.280199257034432e-05, "loss": 0.5889, "step": 13033 }, { "epoch": 42.734426229508195, "grad_norm": 6.510041236877441, "learning_rate": 1.2800973204777324e-05, "loss": 0.7683, "step": 13034 }, { "epoch": 42.73770491803279, "grad_norm": 18.27693748474121, "learning_rate": 1.2799953807626446e-05, "loss": 1.0308, "step": 13035 }, { "epoch": 42.74098360655738, "grad_norm": 9.452291488647461, "learning_rate": 1.2798934378903174e-05, "loss": 0.6911, "step": 13036 }, { "epoch": 42.74426229508197, "grad_norm": 5.667867660522461, "learning_rate": 1.279791491861901e-05, "loss": 0.7153, "step": 13037 }, { "epoch": 42.747540983606555, "grad_norm": 9.103915214538574, "learning_rate": 1.2796895426785442e-05, "loss": 0.5312, "step": 13038 }, { "epoch": 42.75081967213115, "grad_norm": 6.697160720825195, "learning_rate": 1.2795875903413975e-05, "loss": 0.5281, "step": 13039 }, { "epoch": 42.75409836065574, "grad_norm": 6.293798923492432, "learning_rate": 1.2794856348516095e-05, "loss": 0.679, "step": 13040 }, { "epoch": 42.75737704918033, "grad_norm": 7.34146785736084, "learning_rate": 1.279383676210331e-05, "loss": 0.7114, "step": 13041 }, { "epoch": 42.760655737704916, "grad_norm": 8.580094337463379, "learning_rate": 1.2792817144187104e-05, "loss": 0.9122, "step": 13042 }, { "epoch": 42.76393442622951, "grad_norm": 6.814224720001221, "learning_rate": 1.2791797494778985e-05, "loss": 0.6721, "step": 13043 }, { "epoch": 42.7672131147541, "grad_norm": 6.656826019287109, "learning_rate": 1.2790777813890445e-05, "loss": 0.609, "step": 13044 }, { "epoch": 42.77049180327869, "grad_norm": 8.268540382385254, "learning_rate": 1.2789758101532983e-05, "loss": 0.8206, "step": 13045 }, { "epoch": 42.773770491803276, "grad_norm": 7.4696526527404785, "learning_rate": 1.2788738357718098e-05, "loss": 0.6306, "step": 13046 }, { "epoch": 42.77704918032787, "grad_norm": 6.521905422210693, "learning_rate": 1.2787718582457292e-05, "loss": 0.5775, "step": 13047 }, { "epoch": 42.78032786885246, "grad_norm": 8.412530899047852, "learning_rate": 1.2786698775762054e-05, "loss": 0.6647, "step": 13048 }, { "epoch": 42.78360655737705, "grad_norm": 6.434238910675049, "learning_rate": 1.2785678937643892e-05, "loss": 0.7217, "step": 13049 }, { "epoch": 42.78688524590164, "grad_norm": 5.799352645874023, "learning_rate": 1.2784659068114306e-05, "loss": 0.6518, "step": 13050 }, { "epoch": 42.79016393442623, "grad_norm": 9.408262252807617, "learning_rate": 1.2783639167184794e-05, "loss": 0.7035, "step": 13051 }, { "epoch": 42.79344262295082, "grad_norm": 7.243824481964111, "learning_rate": 1.2782619234866855e-05, "loss": 0.7527, "step": 13052 }, { "epoch": 42.79672131147541, "grad_norm": 6.759266376495361, "learning_rate": 1.278159927117199e-05, "loss": 0.911, "step": 13053 }, { "epoch": 42.8, "grad_norm": 6.074750900268555, "learning_rate": 1.2780579276111702e-05, "loss": 0.8483, "step": 13054 }, { "epoch": 42.80327868852459, "grad_norm": 8.99831771850586, "learning_rate": 1.2779559249697491e-05, "loss": 0.6651, "step": 13055 }, { "epoch": 42.80655737704918, "grad_norm": 5.898564338684082, "learning_rate": 1.2778539191940859e-05, "loss": 0.9923, "step": 13056 }, { "epoch": 42.80983606557377, "grad_norm": 5.480362892150879, "learning_rate": 1.2777519102853311e-05, "loss": 0.668, "step": 13057 }, { "epoch": 42.81311475409836, "grad_norm": 5.263017177581787, "learning_rate": 1.2776498982446346e-05, "loss": 0.8088, "step": 13058 }, { "epoch": 42.81639344262295, "grad_norm": 6.389814853668213, "learning_rate": 1.2775478830731468e-05, "loss": 1.0082, "step": 13059 }, { "epoch": 42.81967213114754, "grad_norm": 6.839870452880859, "learning_rate": 1.277445864772018e-05, "loss": 0.8025, "step": 13060 }, { "epoch": 42.82295081967213, "grad_norm": 9.814803123474121, "learning_rate": 1.2773438433423989e-05, "loss": 0.6397, "step": 13061 }, { "epoch": 42.82622950819672, "grad_norm": 6.779411792755127, "learning_rate": 1.277241818785439e-05, "loss": 0.9011, "step": 13062 }, { "epoch": 42.829508196721314, "grad_norm": 7.816442966461182, "learning_rate": 1.2771397911022898e-05, "loss": 0.5831, "step": 13063 }, { "epoch": 42.8327868852459, "grad_norm": 6.339003562927246, "learning_rate": 1.277037760294101e-05, "loss": 0.7165, "step": 13064 }, { "epoch": 42.83606557377049, "grad_norm": 8.81121826171875, "learning_rate": 1.2769357263620237e-05, "loss": 0.7184, "step": 13065 }, { "epoch": 42.83934426229508, "grad_norm": 8.239829063415527, "learning_rate": 1.276833689307208e-05, "loss": 0.6674, "step": 13066 }, { "epoch": 42.842622950819674, "grad_norm": 5.371394157409668, "learning_rate": 1.2767316491308047e-05, "loss": 1.0062, "step": 13067 }, { "epoch": 42.84590163934426, "grad_norm": 7.583030700683594, "learning_rate": 1.2766296058339642e-05, "loss": 0.8081, "step": 13068 }, { "epoch": 42.84918032786885, "grad_norm": 6.375234127044678, "learning_rate": 1.2765275594178372e-05, "loss": 0.651, "step": 13069 }, { "epoch": 42.85245901639344, "grad_norm": 7.87590217590332, "learning_rate": 1.2764255098835747e-05, "loss": 0.8915, "step": 13070 }, { "epoch": 42.855737704918035, "grad_norm": 8.847772598266602, "learning_rate": 1.276323457232327e-05, "loss": 0.6724, "step": 13071 }, { "epoch": 42.85901639344262, "grad_norm": 5.367013931274414, "learning_rate": 1.276221401465245e-05, "loss": 0.7204, "step": 13072 }, { "epoch": 42.86229508196721, "grad_norm": 7.809916973114014, "learning_rate": 1.2761193425834798e-05, "loss": 0.8654, "step": 13073 }, { "epoch": 42.86557377049181, "grad_norm": 6.561684608459473, "learning_rate": 1.2760172805881814e-05, "loss": 0.8799, "step": 13074 }, { "epoch": 42.868852459016395, "grad_norm": 9.652324676513672, "learning_rate": 1.2759152154805017e-05, "loss": 0.7084, "step": 13075 }, { "epoch": 42.87213114754098, "grad_norm": 7.211964130401611, "learning_rate": 1.2758131472615906e-05, "loss": 0.7383, "step": 13076 }, { "epoch": 42.87540983606557, "grad_norm": 7.555404186248779, "learning_rate": 1.2757110759325997e-05, "loss": 0.655, "step": 13077 }, { "epoch": 42.87868852459017, "grad_norm": 6.381028652191162, "learning_rate": 1.2756090014946798e-05, "loss": 0.8158, "step": 13078 }, { "epoch": 42.881967213114756, "grad_norm": 6.858593940734863, "learning_rate": 1.2755069239489817e-05, "loss": 0.6953, "step": 13079 }, { "epoch": 42.885245901639344, "grad_norm": 7.36426305770874, "learning_rate": 1.2754048432966568e-05, "loss": 0.6737, "step": 13080 }, { "epoch": 42.88852459016393, "grad_norm": 12.517045021057129, "learning_rate": 1.2753027595388558e-05, "loss": 0.7007, "step": 13081 }, { "epoch": 42.89180327868853, "grad_norm": 13.747758865356445, "learning_rate": 1.27520067267673e-05, "loss": 0.717, "step": 13082 }, { "epoch": 42.895081967213116, "grad_norm": 8.027409553527832, "learning_rate": 1.2750985827114304e-05, "loss": 0.4451, "step": 13083 }, { "epoch": 42.898360655737704, "grad_norm": 25.889162063598633, "learning_rate": 1.2749964896441084e-05, "loss": 0.829, "step": 13084 }, { "epoch": 42.90163934426229, "grad_norm": 6.827164173126221, "learning_rate": 1.2748943934759149e-05, "loss": 0.5324, "step": 13085 }, { "epoch": 42.90491803278689, "grad_norm": 22.837615966796875, "learning_rate": 1.2747922942080014e-05, "loss": 0.6242, "step": 13086 }, { "epoch": 42.90819672131148, "grad_norm": 6.219278335571289, "learning_rate": 1.2746901918415191e-05, "loss": 0.5677, "step": 13087 }, { "epoch": 42.911475409836065, "grad_norm": 10.458203315734863, "learning_rate": 1.274588086377619e-05, "loss": 0.7751, "step": 13088 }, { "epoch": 42.91475409836065, "grad_norm": 7.69795560836792, "learning_rate": 1.2744859778174532e-05, "loss": 0.8113, "step": 13089 }, { "epoch": 42.91803278688525, "grad_norm": 6.758213996887207, "learning_rate": 1.2743838661621722e-05, "loss": 0.8137, "step": 13090 }, { "epoch": 42.92131147540984, "grad_norm": 5.777834415435791, "learning_rate": 1.274281751412928e-05, "loss": 0.8246, "step": 13091 }, { "epoch": 42.924590163934425, "grad_norm": 6.115323066711426, "learning_rate": 1.2741796335708718e-05, "loss": 0.8751, "step": 13092 }, { "epoch": 42.927868852459014, "grad_norm": 7.681859016418457, "learning_rate": 1.2740775126371553e-05, "loss": 0.7575, "step": 13093 }, { "epoch": 42.93114754098361, "grad_norm": 6.9963250160217285, "learning_rate": 1.2739753886129296e-05, "loss": 0.5578, "step": 13094 }, { "epoch": 42.9344262295082, "grad_norm": 8.674314498901367, "learning_rate": 1.2738732614993467e-05, "loss": 0.6913, "step": 13095 }, { "epoch": 42.937704918032786, "grad_norm": 6.210935115814209, "learning_rate": 1.273771131297558e-05, "loss": 0.6088, "step": 13096 }, { "epoch": 42.940983606557374, "grad_norm": 9.102789878845215, "learning_rate": 1.2736689980087154e-05, "loss": 0.8405, "step": 13097 }, { "epoch": 42.94426229508197, "grad_norm": 7.68086576461792, "learning_rate": 1.2735668616339699e-05, "loss": 0.8179, "step": 13098 }, { "epoch": 42.94754098360656, "grad_norm": 6.4392991065979, "learning_rate": 1.2734647221744739e-05, "loss": 0.8055, "step": 13099 }, { "epoch": 42.950819672131146, "grad_norm": 5.007757663726807, "learning_rate": 1.2733625796313785e-05, "loss": 0.823, "step": 13100 }, { "epoch": 42.954098360655735, "grad_norm": 7.241195201873779, "learning_rate": 1.2732604340058361e-05, "loss": 0.5613, "step": 13101 }, { "epoch": 42.95737704918033, "grad_norm": 6.662076950073242, "learning_rate": 1.2731582852989978e-05, "loss": 0.5342, "step": 13102 }, { "epoch": 42.96065573770492, "grad_norm": 5.949502944946289, "learning_rate": 1.2730561335120162e-05, "loss": 0.8306, "step": 13103 }, { "epoch": 42.96393442622951, "grad_norm": 25.9007568359375, "learning_rate": 1.2729539786460428e-05, "loss": 0.6741, "step": 13104 }, { "epoch": 42.967213114754095, "grad_norm": 7.068081378936768, "learning_rate": 1.2728518207022292e-05, "loss": 0.7975, "step": 13105 }, { "epoch": 42.97049180327869, "grad_norm": 5.638195037841797, "learning_rate": 1.2727496596817277e-05, "loss": 0.6794, "step": 13106 }, { "epoch": 42.97377049180328, "grad_norm": 6.43511962890625, "learning_rate": 1.2726474955856903e-05, "loss": 0.7505, "step": 13107 }, { "epoch": 42.97704918032787, "grad_norm": 9.393447875976562, "learning_rate": 1.272545328415269e-05, "loss": 0.7253, "step": 13108 }, { "epoch": 42.980327868852456, "grad_norm": 5.559689521789551, "learning_rate": 1.2724431581716154e-05, "loss": 1.009, "step": 13109 }, { "epoch": 42.98360655737705, "grad_norm": 6.162020683288574, "learning_rate": 1.2723409848558823e-05, "loss": 0.6613, "step": 13110 }, { "epoch": 42.98688524590164, "grad_norm": 32.80390548706055, "learning_rate": 1.2722388084692211e-05, "loss": 0.4532, "step": 13111 }, { "epoch": 42.99016393442623, "grad_norm": 5.779294967651367, "learning_rate": 1.2721366290127848e-05, "loss": 0.5175, "step": 13112 }, { "epoch": 42.993442622950816, "grad_norm": 5.937351703643799, "learning_rate": 1.2720344464877248e-05, "loss": 0.6341, "step": 13113 }, { "epoch": 42.99672131147541, "grad_norm": 8.732985496520996, "learning_rate": 1.2719322608951932e-05, "loss": 0.7523, "step": 13114 }, { "epoch": 43.0, "grad_norm": 21.304866790771484, "learning_rate": 1.2718300722363431e-05, "loss": 0.5359, "step": 13115 }, { "epoch": 43.00327868852459, "grad_norm": 6.348755836486816, "learning_rate": 1.271727880512326e-05, "loss": 0.6044, "step": 13116 }, { "epoch": 43.006557377049184, "grad_norm": 8.620586395263672, "learning_rate": 1.2716256857242947e-05, "loss": 0.7808, "step": 13117 }, { "epoch": 43.00983606557377, "grad_norm": 6.984116554260254, "learning_rate": 1.271523487873401e-05, "loss": 0.6226, "step": 13118 }, { "epoch": 43.01311475409836, "grad_norm": 8.166244506835938, "learning_rate": 1.2714212869607982e-05, "loss": 0.5674, "step": 13119 }, { "epoch": 43.01639344262295, "grad_norm": 7.707512855529785, "learning_rate": 1.2713190829876378e-05, "loss": 0.8062, "step": 13120 }, { "epoch": 43.019672131147544, "grad_norm": 7.494457721710205, "learning_rate": 1.271216875955073e-05, "loss": 0.6994, "step": 13121 }, { "epoch": 43.02295081967213, "grad_norm": 6.915075778961182, "learning_rate": 1.2711146658642557e-05, "loss": 0.584, "step": 13122 }, { "epoch": 43.02622950819672, "grad_norm": 10.08214282989502, "learning_rate": 1.2710124527163387e-05, "loss": 0.6496, "step": 13123 }, { "epoch": 43.02950819672131, "grad_norm": 8.435379981994629, "learning_rate": 1.2709102365124743e-05, "loss": 0.5297, "step": 13124 }, { "epoch": 43.032786885245905, "grad_norm": 6.259830474853516, "learning_rate": 1.2708080172538158e-05, "loss": 0.6306, "step": 13125 }, { "epoch": 43.03606557377049, "grad_norm": 8.802112579345703, "learning_rate": 1.270705794941515e-05, "loss": 0.8125, "step": 13126 }, { "epoch": 43.03934426229508, "grad_norm": 8.35471248626709, "learning_rate": 1.270603569576725e-05, "loss": 0.6475, "step": 13127 }, { "epoch": 43.04262295081967, "grad_norm": 6.31114387512207, "learning_rate": 1.270501341160598e-05, "loss": 0.6628, "step": 13128 }, { "epoch": 43.045901639344265, "grad_norm": 8.479994773864746, "learning_rate": 1.2703991096942876e-05, "loss": 0.771, "step": 13129 }, { "epoch": 43.049180327868854, "grad_norm": 6.680384635925293, "learning_rate": 1.270296875178946e-05, "loss": 0.7791, "step": 13130 }, { "epoch": 43.05245901639344, "grad_norm": 18.036954879760742, "learning_rate": 1.2701946376157258e-05, "loss": 0.8239, "step": 13131 }, { "epoch": 43.05573770491803, "grad_norm": 6.736188888549805, "learning_rate": 1.2700923970057803e-05, "loss": 0.7243, "step": 13132 }, { "epoch": 43.059016393442626, "grad_norm": 9.536314010620117, "learning_rate": 1.2699901533502624e-05, "loss": 0.8759, "step": 13133 }, { "epoch": 43.062295081967214, "grad_norm": 5.646450042724609, "learning_rate": 1.2698879066503247e-05, "loss": 0.7521, "step": 13134 }, { "epoch": 43.0655737704918, "grad_norm": 12.620141983032227, "learning_rate": 1.26978565690712e-05, "loss": 0.4171, "step": 13135 }, { "epoch": 43.06885245901639, "grad_norm": 6.511333465576172, "learning_rate": 1.2696834041218017e-05, "loss": 0.5239, "step": 13136 }, { "epoch": 43.072131147540986, "grad_norm": 14.8787260055542, "learning_rate": 1.2695811482955227e-05, "loss": 0.6882, "step": 13137 }, { "epoch": 43.075409836065575, "grad_norm": 6.058736801147461, "learning_rate": 1.2694788894294358e-05, "loss": 0.6431, "step": 13138 }, { "epoch": 43.07868852459016, "grad_norm": 12.443083763122559, "learning_rate": 1.2693766275246947e-05, "loss": 0.5689, "step": 13139 }, { "epoch": 43.08196721311475, "grad_norm": 5.279326915740967, "learning_rate": 1.2692743625824515e-05, "loss": 0.6062, "step": 13140 }, { "epoch": 43.08524590163935, "grad_norm": 8.680734634399414, "learning_rate": 1.2691720946038602e-05, "loss": 0.5464, "step": 13141 }, { "epoch": 43.088524590163935, "grad_norm": 6.5769124031066895, "learning_rate": 1.2690698235900734e-05, "loss": 0.7327, "step": 13142 }, { "epoch": 43.09180327868852, "grad_norm": 7.842638969421387, "learning_rate": 1.2689675495422447e-05, "loss": 0.5437, "step": 13143 }, { "epoch": 43.09508196721311, "grad_norm": 7.597887992858887, "learning_rate": 1.2688652724615271e-05, "loss": 0.6642, "step": 13144 }, { "epoch": 43.09836065573771, "grad_norm": 5.499435901641846, "learning_rate": 1.2687629923490743e-05, "loss": 0.7497, "step": 13145 }, { "epoch": 43.101639344262296, "grad_norm": 8.783194541931152, "learning_rate": 1.268660709206039e-05, "loss": 0.7319, "step": 13146 }, { "epoch": 43.104918032786884, "grad_norm": 11.508561134338379, "learning_rate": 1.268558423033575e-05, "loss": 0.7795, "step": 13147 }, { "epoch": 43.10819672131147, "grad_norm": 7.225283145904541, "learning_rate": 1.2684561338328356e-05, "loss": 0.5264, "step": 13148 }, { "epoch": 43.11147540983607, "grad_norm": 8.778206825256348, "learning_rate": 1.2683538416049741e-05, "loss": 0.6514, "step": 13149 }, { "epoch": 43.114754098360656, "grad_norm": 6.59124755859375, "learning_rate": 1.268251546351144e-05, "loss": 0.7664, "step": 13150 }, { "epoch": 43.118032786885244, "grad_norm": 7.172652244567871, "learning_rate": 1.2681492480724991e-05, "loss": 0.4729, "step": 13151 }, { "epoch": 43.12131147540983, "grad_norm": 6.932473659515381, "learning_rate": 1.2680469467701924e-05, "loss": 0.8076, "step": 13152 }, { "epoch": 43.12459016393443, "grad_norm": 11.035197257995605, "learning_rate": 1.2679446424453773e-05, "loss": 0.6445, "step": 13153 }, { "epoch": 43.12786885245902, "grad_norm": 6.290109157562256, "learning_rate": 1.2678423350992085e-05, "loss": 0.8571, "step": 13154 }, { "epoch": 43.131147540983605, "grad_norm": 7.003852367401123, "learning_rate": 1.2677400247328384e-05, "loss": 0.6676, "step": 13155 }, { "epoch": 43.13442622950819, "grad_norm": 17.157794952392578, "learning_rate": 1.267637711347421e-05, "loss": 0.7717, "step": 13156 }, { "epoch": 43.13770491803279, "grad_norm": 8.833161354064941, "learning_rate": 1.2675353949441105e-05, "loss": 0.7578, "step": 13157 }, { "epoch": 43.14098360655738, "grad_norm": 6.739609241485596, "learning_rate": 1.26743307552406e-05, "loss": 0.4129, "step": 13158 }, { "epoch": 43.144262295081965, "grad_norm": 8.436367988586426, "learning_rate": 1.2673307530884236e-05, "loss": 0.8444, "step": 13159 }, { "epoch": 43.14754098360656, "grad_norm": 6.926057815551758, "learning_rate": 1.267228427638355e-05, "loss": 0.5755, "step": 13160 }, { "epoch": 43.15081967213115, "grad_norm": 6.021864891052246, "learning_rate": 1.267126099175008e-05, "loss": 0.8544, "step": 13161 }, { "epoch": 43.15409836065574, "grad_norm": 7.262709617614746, "learning_rate": 1.2670237676995368e-05, "loss": 0.7965, "step": 13162 }, { "epoch": 43.157377049180326, "grad_norm": 6.2261199951171875, "learning_rate": 1.2669214332130945e-05, "loss": 0.8198, "step": 13163 }, { "epoch": 43.16065573770492, "grad_norm": 6.894772529602051, "learning_rate": 1.2668190957168358e-05, "loss": 0.787, "step": 13164 }, { "epoch": 43.16393442622951, "grad_norm": 8.625454902648926, "learning_rate": 1.2667167552119146e-05, "loss": 0.7872, "step": 13165 }, { "epoch": 43.1672131147541, "grad_norm": 8.762826919555664, "learning_rate": 1.2666144116994843e-05, "loss": 1.1326, "step": 13166 }, { "epoch": 43.170491803278686, "grad_norm": 5.780481815338135, "learning_rate": 1.2665120651806994e-05, "loss": 0.6723, "step": 13167 }, { "epoch": 43.17377049180328, "grad_norm": 7.36769962310791, "learning_rate": 1.2664097156567138e-05, "loss": 0.7054, "step": 13168 }, { "epoch": 43.17704918032787, "grad_norm": 6.126039505004883, "learning_rate": 1.2663073631286817e-05, "loss": 0.5963, "step": 13169 }, { "epoch": 43.18032786885246, "grad_norm": 9.25143814086914, "learning_rate": 1.2662050075977571e-05, "loss": 0.6268, "step": 13170 }, { "epoch": 43.18360655737705, "grad_norm": 6.381728172302246, "learning_rate": 1.2661026490650945e-05, "loss": 1.0476, "step": 13171 }, { "epoch": 43.18688524590164, "grad_norm": 10.778106689453125, "learning_rate": 1.2660002875318477e-05, "loss": 0.5576, "step": 13172 }, { "epoch": 43.19016393442623, "grad_norm": 12.174882888793945, "learning_rate": 1.2658979229991713e-05, "loss": 0.9007, "step": 13173 }, { "epoch": 43.19344262295082, "grad_norm": 8.036275863647461, "learning_rate": 1.2657955554682189e-05, "loss": 0.6802, "step": 13174 }, { "epoch": 43.19672131147541, "grad_norm": 7.397181510925293, "learning_rate": 1.2656931849401457e-05, "loss": 0.6875, "step": 13175 }, { "epoch": 43.2, "grad_norm": 6.310253143310547, "learning_rate": 1.2655908114161053e-05, "loss": 0.6926, "step": 13176 }, { "epoch": 43.20327868852459, "grad_norm": 5.782046318054199, "learning_rate": 1.2654884348972525e-05, "loss": 0.8181, "step": 13177 }, { "epoch": 43.20655737704918, "grad_norm": 7.887100696563721, "learning_rate": 1.2653860553847417e-05, "loss": 0.5412, "step": 13178 }, { "epoch": 43.20983606557377, "grad_norm": 7.417466640472412, "learning_rate": 1.2652836728797269e-05, "loss": 0.5382, "step": 13179 }, { "epoch": 43.21311475409836, "grad_norm": 8.589509963989258, "learning_rate": 1.2651812873833631e-05, "loss": 0.6761, "step": 13180 }, { "epoch": 43.21639344262295, "grad_norm": 7.561518669128418, "learning_rate": 1.2650788988968042e-05, "loss": 0.8345, "step": 13181 }, { "epoch": 43.21967213114754, "grad_norm": 7.207070350646973, "learning_rate": 1.2649765074212053e-05, "loss": 1.0331, "step": 13182 }, { "epoch": 43.22295081967213, "grad_norm": 7.646053314208984, "learning_rate": 1.2648741129577208e-05, "loss": 0.8874, "step": 13183 }, { "epoch": 43.226229508196724, "grad_norm": 6.212592124938965, "learning_rate": 1.2647717155075052e-05, "loss": 0.802, "step": 13184 }, { "epoch": 43.22950819672131, "grad_norm": 8.821964263916016, "learning_rate": 1.2646693150717136e-05, "loss": 0.7646, "step": 13185 }, { "epoch": 43.2327868852459, "grad_norm": 6.5697245597839355, "learning_rate": 1.2645669116514998e-05, "loss": 0.595, "step": 13186 }, { "epoch": 43.23606557377049, "grad_norm": 11.309167861938477, "learning_rate": 1.2644645052480188e-05, "loss": 0.6523, "step": 13187 }, { "epoch": 43.239344262295084, "grad_norm": 7.15547513961792, "learning_rate": 1.2643620958624263e-05, "loss": 0.5427, "step": 13188 }, { "epoch": 43.24262295081967, "grad_norm": 6.826924800872803, "learning_rate": 1.2642596834958757e-05, "loss": 0.4492, "step": 13189 }, { "epoch": 43.24590163934426, "grad_norm": 7.3738017082214355, "learning_rate": 1.2641572681495226e-05, "loss": 1.0472, "step": 13190 }, { "epoch": 43.24918032786885, "grad_norm": 13.844751358032227, "learning_rate": 1.264054849824522e-05, "loss": 0.577, "step": 13191 }, { "epoch": 43.252459016393445, "grad_norm": 6.672473430633545, "learning_rate": 1.263952428522028e-05, "loss": 0.4116, "step": 13192 }, { "epoch": 43.25573770491803, "grad_norm": 7.672874927520752, "learning_rate": 1.263850004243196e-05, "loss": 0.494, "step": 13193 }, { "epoch": 43.25901639344262, "grad_norm": 10.12606430053711, "learning_rate": 1.2637475769891807e-05, "loss": 0.8036, "step": 13194 }, { "epoch": 43.26229508196721, "grad_norm": 5.762200355529785, "learning_rate": 1.2636451467611375e-05, "loss": 0.6971, "step": 13195 }, { "epoch": 43.265573770491805, "grad_norm": 8.345038414001465, "learning_rate": 1.2635427135602209e-05, "loss": 0.6289, "step": 13196 }, { "epoch": 43.268852459016394, "grad_norm": 9.280878067016602, "learning_rate": 1.2634402773875866e-05, "loss": 0.5178, "step": 13197 }, { "epoch": 43.27213114754098, "grad_norm": 8.138218879699707, "learning_rate": 1.2633378382443888e-05, "loss": 0.5277, "step": 13198 }, { "epoch": 43.27540983606557, "grad_norm": 7.798349380493164, "learning_rate": 1.2632353961317834e-05, "loss": 0.6429, "step": 13199 }, { "epoch": 43.278688524590166, "grad_norm": 6.991128444671631, "learning_rate": 1.263132951050925e-05, "loss": 0.7291, "step": 13200 }, { "epoch": 43.281967213114754, "grad_norm": 7.316357612609863, "learning_rate": 1.2630305030029692e-05, "loss": 0.7017, "step": 13201 }, { "epoch": 43.28524590163934, "grad_norm": 12.935757637023926, "learning_rate": 1.262928051989071e-05, "loss": 0.6146, "step": 13202 }, { "epoch": 43.28852459016394, "grad_norm": 5.68044900894165, "learning_rate": 1.2628255980103854e-05, "loss": 0.779, "step": 13203 }, { "epoch": 43.291803278688526, "grad_norm": 12.00387191772461, "learning_rate": 1.2627231410680682e-05, "loss": 0.7056, "step": 13204 }, { "epoch": 43.295081967213115, "grad_norm": 6.482786178588867, "learning_rate": 1.2626206811632743e-05, "loss": 0.4328, "step": 13205 }, { "epoch": 43.2983606557377, "grad_norm": 5.841290473937988, "learning_rate": 1.2625182182971591e-05, "loss": 0.6666, "step": 13206 }, { "epoch": 43.3016393442623, "grad_norm": 46.845035552978516, "learning_rate": 1.262415752470878e-05, "loss": 0.5723, "step": 13207 }, { "epoch": 43.30491803278689, "grad_norm": 6.448000907897949, "learning_rate": 1.2623132836855865e-05, "loss": 0.5477, "step": 13208 }, { "epoch": 43.308196721311475, "grad_norm": 6.058466911315918, "learning_rate": 1.26221081194244e-05, "loss": 0.8299, "step": 13209 }, { "epoch": 43.31147540983606, "grad_norm": 22.379316329956055, "learning_rate": 1.2621083372425937e-05, "loss": 0.9355, "step": 13210 }, { "epoch": 43.31475409836066, "grad_norm": 5.896424770355225, "learning_rate": 1.2620058595872039e-05, "loss": 0.9554, "step": 13211 }, { "epoch": 43.31803278688525, "grad_norm": 5.460204601287842, "learning_rate": 1.2619033789774251e-05, "loss": 0.9947, "step": 13212 }, { "epoch": 43.321311475409836, "grad_norm": 7.125367164611816, "learning_rate": 1.2618008954144135e-05, "loss": 0.8203, "step": 13213 }, { "epoch": 43.324590163934424, "grad_norm": 10.242843627929688, "learning_rate": 1.261698408899325e-05, "loss": 0.7809, "step": 13214 }, { "epoch": 43.32786885245902, "grad_norm": 9.331868171691895, "learning_rate": 1.2615959194333143e-05, "loss": 0.6064, "step": 13215 }, { "epoch": 43.33114754098361, "grad_norm": 7.574275970458984, "learning_rate": 1.261493427017538e-05, "loss": 0.8154, "step": 13216 }, { "epoch": 43.334426229508196, "grad_norm": 7.459763526916504, "learning_rate": 1.2613909316531514e-05, "loss": 0.5935, "step": 13217 }, { "epoch": 43.337704918032784, "grad_norm": 7.732729434967041, "learning_rate": 1.2612884333413102e-05, "loss": 0.5941, "step": 13218 }, { "epoch": 43.34098360655738, "grad_norm": 10.020735740661621, "learning_rate": 1.2611859320831703e-05, "loss": 0.8972, "step": 13219 }, { "epoch": 43.34426229508197, "grad_norm": 9.940263748168945, "learning_rate": 1.2610834278798873e-05, "loss": 0.6772, "step": 13220 }, { "epoch": 43.34754098360656, "grad_norm": 8.513751029968262, "learning_rate": 1.2609809207326173e-05, "loss": 0.6556, "step": 13221 }, { "epoch": 43.350819672131145, "grad_norm": 9.675219535827637, "learning_rate": 1.260878410642516e-05, "loss": 0.6735, "step": 13222 }, { "epoch": 43.35409836065574, "grad_norm": 11.828601837158203, "learning_rate": 1.2607758976107394e-05, "loss": 0.7815, "step": 13223 }, { "epoch": 43.35737704918033, "grad_norm": 9.2481050491333, "learning_rate": 1.2606733816384433e-05, "loss": 0.8544, "step": 13224 }, { "epoch": 43.36065573770492, "grad_norm": 6.807620048522949, "learning_rate": 1.260570862726784e-05, "loss": 0.6768, "step": 13225 }, { "epoch": 43.363934426229505, "grad_norm": 6.533982276916504, "learning_rate": 1.2604683408769171e-05, "loss": 0.6273, "step": 13226 }, { "epoch": 43.3672131147541, "grad_norm": 6.787465572357178, "learning_rate": 1.2603658160899991e-05, "loss": 0.4932, "step": 13227 }, { "epoch": 43.37049180327869, "grad_norm": 8.256022453308105, "learning_rate": 1.2602632883671855e-05, "loss": 0.7544, "step": 13228 }, { "epoch": 43.37377049180328, "grad_norm": 7.565066814422607, "learning_rate": 1.2601607577096331e-05, "loss": 0.6456, "step": 13229 }, { "epoch": 43.377049180327866, "grad_norm": 7.519180774688721, "learning_rate": 1.2600582241184975e-05, "loss": 0.8706, "step": 13230 }, { "epoch": 43.38032786885246, "grad_norm": 7.247434616088867, "learning_rate": 1.2599556875949351e-05, "loss": 0.9327, "step": 13231 }, { "epoch": 43.38360655737705, "grad_norm": 6.12073278427124, "learning_rate": 1.2598531481401017e-05, "loss": 0.5805, "step": 13232 }, { "epoch": 43.38688524590164, "grad_norm": 8.408380508422852, "learning_rate": 1.259750605755154e-05, "loss": 0.6732, "step": 13233 }, { "epoch": 43.390163934426226, "grad_norm": 8.233945846557617, "learning_rate": 1.2596480604412485e-05, "loss": 0.7139, "step": 13234 }, { "epoch": 43.39344262295082, "grad_norm": 6.718369483947754, "learning_rate": 1.2595455121995408e-05, "loss": 0.798, "step": 13235 }, { "epoch": 43.39672131147541, "grad_norm": 10.112702369689941, "learning_rate": 1.2594429610311876e-05, "loss": 0.6438, "step": 13236 }, { "epoch": 43.4, "grad_norm": 8.12997055053711, "learning_rate": 1.2593404069373452e-05, "loss": 0.6482, "step": 13237 }, { "epoch": 43.40327868852459, "grad_norm": 13.027345657348633, "learning_rate": 1.2592378499191701e-05, "loss": 0.6635, "step": 13238 }, { "epoch": 43.40655737704918, "grad_norm": 7.487415313720703, "learning_rate": 1.2591352899778188e-05, "loss": 0.8745, "step": 13239 }, { "epoch": 43.40983606557377, "grad_norm": 7.021340370178223, "learning_rate": 1.2590327271144478e-05, "loss": 0.6084, "step": 13240 }, { "epoch": 43.41311475409836, "grad_norm": 10.492396354675293, "learning_rate": 1.2589301613302131e-05, "loss": 0.709, "step": 13241 }, { "epoch": 43.41639344262295, "grad_norm": 7.982426643371582, "learning_rate": 1.2588275926262721e-05, "loss": 0.7583, "step": 13242 }, { "epoch": 43.41967213114754, "grad_norm": 6.880666255950928, "learning_rate": 1.2587250210037807e-05, "loss": 0.4333, "step": 13243 }, { "epoch": 43.42295081967213, "grad_norm": 6.5279860496521, "learning_rate": 1.2586224464638955e-05, "loss": 0.5285, "step": 13244 }, { "epoch": 43.42622950819672, "grad_norm": 5.526214599609375, "learning_rate": 1.2585198690077736e-05, "loss": 0.6985, "step": 13245 }, { "epoch": 43.429508196721315, "grad_norm": 6.6788330078125, "learning_rate": 1.2584172886365709e-05, "loss": 0.7365, "step": 13246 }, { "epoch": 43.4327868852459, "grad_norm": 7.257148265838623, "learning_rate": 1.258314705351445e-05, "loss": 0.6273, "step": 13247 }, { "epoch": 43.43606557377049, "grad_norm": 7.2490363121032715, "learning_rate": 1.2582121191535522e-05, "loss": 0.7171, "step": 13248 }, { "epoch": 43.43934426229508, "grad_norm": 7.413153648376465, "learning_rate": 1.2581095300440493e-05, "loss": 0.9225, "step": 13249 }, { "epoch": 43.442622950819676, "grad_norm": 13.437106132507324, "learning_rate": 1.2580069380240927e-05, "loss": 0.7385, "step": 13250 }, { "epoch": 43.445901639344264, "grad_norm": 5.907153606414795, "learning_rate": 1.25790434309484e-05, "loss": 0.9145, "step": 13251 }, { "epoch": 43.44918032786885, "grad_norm": 8.441707611083984, "learning_rate": 1.2578017452574476e-05, "loss": 0.4032, "step": 13252 }, { "epoch": 43.45245901639344, "grad_norm": 6.230010032653809, "learning_rate": 1.2576991445130724e-05, "loss": 0.9161, "step": 13253 }, { "epoch": 43.455737704918036, "grad_norm": 6.182676792144775, "learning_rate": 1.2575965408628716e-05, "loss": 0.7598, "step": 13254 }, { "epoch": 43.459016393442624, "grad_norm": 8.118892669677734, "learning_rate": 1.257493934308002e-05, "loss": 0.7664, "step": 13255 }, { "epoch": 43.46229508196721, "grad_norm": 5.557079792022705, "learning_rate": 1.2573913248496203e-05, "loss": 0.6414, "step": 13256 }, { "epoch": 43.4655737704918, "grad_norm": 8.475250244140625, "learning_rate": 1.2572887124888837e-05, "loss": 0.7754, "step": 13257 }, { "epoch": 43.4688524590164, "grad_norm": 7.3437347412109375, "learning_rate": 1.2571860972269496e-05, "loss": 0.5651, "step": 13258 }, { "epoch": 43.472131147540985, "grad_norm": 8.846553802490234, "learning_rate": 1.2570834790649748e-05, "loss": 0.8072, "step": 13259 }, { "epoch": 43.47540983606557, "grad_norm": 7.1932692527771, "learning_rate": 1.2569808580041165e-05, "loss": 0.6561, "step": 13260 }, { "epoch": 43.47868852459016, "grad_norm": 6.834001541137695, "learning_rate": 1.2568782340455316e-05, "loss": 0.6692, "step": 13261 }, { "epoch": 43.48196721311476, "grad_norm": 7.05276346206665, "learning_rate": 1.2567756071903778e-05, "loss": 0.958, "step": 13262 }, { "epoch": 43.485245901639345, "grad_norm": 7.6644511222839355, "learning_rate": 1.2566729774398119e-05, "loss": 0.8431, "step": 13263 }, { "epoch": 43.488524590163934, "grad_norm": 8.75557804107666, "learning_rate": 1.2565703447949914e-05, "loss": 0.568, "step": 13264 }, { "epoch": 43.49180327868852, "grad_norm": 6.590178489685059, "learning_rate": 1.2564677092570734e-05, "loss": 0.5319, "step": 13265 }, { "epoch": 43.49508196721312, "grad_norm": 7.643472194671631, "learning_rate": 1.2563650708272155e-05, "loss": 0.8217, "step": 13266 }, { "epoch": 43.498360655737706, "grad_norm": 5.805504322052002, "learning_rate": 1.2562624295065748e-05, "loss": 0.574, "step": 13267 }, { "epoch": 43.501639344262294, "grad_norm": 11.789652824401855, "learning_rate": 1.2561597852963086e-05, "loss": 0.6432, "step": 13268 }, { "epoch": 43.50491803278688, "grad_norm": 6.314768314361572, "learning_rate": 1.2560571381975745e-05, "loss": 0.8286, "step": 13269 }, { "epoch": 43.50819672131148, "grad_norm": 6.196229457855225, "learning_rate": 1.25595448821153e-05, "loss": 0.499, "step": 13270 }, { "epoch": 43.511475409836066, "grad_norm": 12.596099853515625, "learning_rate": 1.2558518353393327e-05, "loss": 0.5923, "step": 13271 }, { "epoch": 43.514754098360655, "grad_norm": 6.864739418029785, "learning_rate": 1.2557491795821396e-05, "loss": 0.5017, "step": 13272 }, { "epoch": 43.51803278688524, "grad_norm": 8.17983627319336, "learning_rate": 1.255646520941109e-05, "loss": 0.6897, "step": 13273 }, { "epoch": 43.52131147540984, "grad_norm": 7.812204837799072, "learning_rate": 1.2555438594173977e-05, "loss": 0.7365, "step": 13274 }, { "epoch": 43.52459016393443, "grad_norm": 6.120307445526123, "learning_rate": 1.255441195012164e-05, "loss": 0.5068, "step": 13275 }, { "epoch": 43.527868852459015, "grad_norm": 8.317536354064941, "learning_rate": 1.2553385277265649e-05, "loss": 0.7607, "step": 13276 }, { "epoch": 43.5311475409836, "grad_norm": 18.254005432128906, "learning_rate": 1.2552358575617587e-05, "loss": 0.6773, "step": 13277 }, { "epoch": 43.5344262295082, "grad_norm": 6.649528503417969, "learning_rate": 1.2551331845189027e-05, "loss": 0.7415, "step": 13278 }, { "epoch": 43.53770491803279, "grad_norm": 9.873821258544922, "learning_rate": 1.2550305085991548e-05, "loss": 0.7657, "step": 13279 }, { "epoch": 43.540983606557376, "grad_norm": 6.519954204559326, "learning_rate": 1.2549278298036728e-05, "loss": 0.6112, "step": 13280 }, { "epoch": 43.544262295081964, "grad_norm": 6.827812194824219, "learning_rate": 1.2548251481336144e-05, "loss": 0.6155, "step": 13281 }, { "epoch": 43.54754098360656, "grad_norm": 8.091174125671387, "learning_rate": 1.2547224635901376e-05, "loss": 0.6198, "step": 13282 }, { "epoch": 43.55081967213115, "grad_norm": 7.021613597869873, "learning_rate": 1.2546197761743999e-05, "loss": 0.5703, "step": 13283 }, { "epoch": 43.554098360655736, "grad_norm": 7.935778617858887, "learning_rate": 1.2545170858875597e-05, "loss": 0.8416, "step": 13284 }, { "epoch": 43.557377049180324, "grad_norm": 7.3287577629089355, "learning_rate": 1.2544143927307749e-05, "loss": 0.7198, "step": 13285 }, { "epoch": 43.56065573770492, "grad_norm": 7.776947975158691, "learning_rate": 1.254311696705203e-05, "loss": 0.5017, "step": 13286 }, { "epoch": 43.56393442622951, "grad_norm": 7.086766242980957, "learning_rate": 1.2542089978120022e-05, "loss": 0.7613, "step": 13287 }, { "epoch": 43.5672131147541, "grad_norm": 8.603793144226074, "learning_rate": 1.254106296052331e-05, "loss": 0.6755, "step": 13288 }, { "epoch": 43.570491803278685, "grad_norm": 5.892855644226074, "learning_rate": 1.2540035914273468e-05, "loss": 0.7773, "step": 13289 }, { "epoch": 43.57377049180328, "grad_norm": 7.856745719909668, "learning_rate": 1.2539008839382083e-05, "loss": 0.6013, "step": 13290 }, { "epoch": 43.57704918032787, "grad_norm": 7.020966053009033, "learning_rate": 1.2537981735860729e-05, "loss": 0.6621, "step": 13291 }, { "epoch": 43.58032786885246, "grad_norm": 8.307225227355957, "learning_rate": 1.2536954603720995e-05, "loss": 0.7887, "step": 13292 }, { "epoch": 43.58360655737705, "grad_norm": 6.937822341918945, "learning_rate": 1.253592744297446e-05, "loss": 0.6165, "step": 13293 }, { "epoch": 43.58688524590164, "grad_norm": 6.938227653503418, "learning_rate": 1.2534900253632707e-05, "loss": 0.8871, "step": 13294 }, { "epoch": 43.59016393442623, "grad_norm": 6.837037086486816, "learning_rate": 1.2533873035707317e-05, "loss": 0.758, "step": 13295 }, { "epoch": 43.59344262295082, "grad_norm": 5.794003009796143, "learning_rate": 1.2532845789209875e-05, "loss": 0.5759, "step": 13296 }, { "epoch": 43.59672131147541, "grad_norm": 6.969367980957031, "learning_rate": 1.2531818514151964e-05, "loss": 0.6394, "step": 13297 }, { "epoch": 43.6, "grad_norm": 5.845217704772949, "learning_rate": 1.2530791210545163e-05, "loss": 0.5753, "step": 13298 }, { "epoch": 43.60327868852459, "grad_norm": 8.242355346679688, "learning_rate": 1.2529763878401063e-05, "loss": 0.7173, "step": 13299 }, { "epoch": 43.60655737704918, "grad_norm": 6.959094524383545, "learning_rate": 1.2528736517731243e-05, "loss": 0.6845, "step": 13300 }, { "epoch": 43.609836065573774, "grad_norm": 7.682035446166992, "learning_rate": 1.2527709128547292e-05, "loss": 0.8033, "step": 13301 }, { "epoch": 43.61311475409836, "grad_norm": 6.6594648361206055, "learning_rate": 1.2526681710860791e-05, "loss": 0.7131, "step": 13302 }, { "epoch": 43.61639344262295, "grad_norm": 6.279094219207764, "learning_rate": 1.2525654264683327e-05, "loss": 0.6683, "step": 13303 }, { "epoch": 43.61967213114754, "grad_norm": 6.719636917114258, "learning_rate": 1.2524626790026484e-05, "loss": 0.8036, "step": 13304 }, { "epoch": 43.622950819672134, "grad_norm": 6.932938575744629, "learning_rate": 1.252359928690185e-05, "loss": 0.6842, "step": 13305 }, { "epoch": 43.62622950819672, "grad_norm": 6.20286750793457, "learning_rate": 1.252257175532101e-05, "loss": 0.6636, "step": 13306 }, { "epoch": 43.62950819672131, "grad_norm": 12.512378692626953, "learning_rate": 1.2521544195295552e-05, "loss": 0.8354, "step": 13307 }, { "epoch": 43.6327868852459, "grad_norm": 7.485294342041016, "learning_rate": 1.2520516606837058e-05, "loss": 0.7782, "step": 13308 }, { "epoch": 43.636065573770495, "grad_norm": 6.324283123016357, "learning_rate": 1.2519488989957123e-05, "loss": 0.8769, "step": 13309 }, { "epoch": 43.63934426229508, "grad_norm": 6.58060359954834, "learning_rate": 1.2518461344667327e-05, "loss": 0.6806, "step": 13310 }, { "epoch": 43.64262295081967, "grad_norm": 7.878608226776123, "learning_rate": 1.251743367097926e-05, "loss": 0.4867, "step": 13311 }, { "epoch": 43.64590163934426, "grad_norm": 7.361553192138672, "learning_rate": 1.2516405968904515e-05, "loss": 0.5378, "step": 13312 }, { "epoch": 43.649180327868855, "grad_norm": 6.592843532562256, "learning_rate": 1.2515378238454673e-05, "loss": 0.6316, "step": 13313 }, { "epoch": 43.65245901639344, "grad_norm": 5.47165584564209, "learning_rate": 1.2514350479641326e-05, "loss": 0.4619, "step": 13314 }, { "epoch": 43.65573770491803, "grad_norm": 6.719738006591797, "learning_rate": 1.2513322692476063e-05, "loss": 0.7849, "step": 13315 }, { "epoch": 43.65901639344262, "grad_norm": 7.912967205047607, "learning_rate": 1.2512294876970474e-05, "loss": 0.968, "step": 13316 }, { "epoch": 43.662295081967216, "grad_norm": 6.043729782104492, "learning_rate": 1.2511267033136147e-05, "loss": 0.7526, "step": 13317 }, { "epoch": 43.665573770491804, "grad_norm": 6.624820232391357, "learning_rate": 1.2510239160984676e-05, "loss": 0.6872, "step": 13318 }, { "epoch": 43.66885245901639, "grad_norm": 6.408120155334473, "learning_rate": 1.2509211260527647e-05, "loss": 0.822, "step": 13319 }, { "epoch": 43.67213114754098, "grad_norm": 5.698254585266113, "learning_rate": 1.2508183331776651e-05, "loss": 0.5239, "step": 13320 }, { "epoch": 43.675409836065576, "grad_norm": 8.037960052490234, "learning_rate": 1.2507155374743281e-05, "loss": 0.5512, "step": 13321 }, { "epoch": 43.678688524590164, "grad_norm": 5.533054351806641, "learning_rate": 1.2506127389439126e-05, "loss": 0.7863, "step": 13322 }, { "epoch": 43.68196721311475, "grad_norm": 6.969295978546143, "learning_rate": 1.2505099375875782e-05, "loss": 0.7844, "step": 13323 }, { "epoch": 43.68524590163934, "grad_norm": 8.800853729248047, "learning_rate": 1.2504071334064836e-05, "loss": 0.7495, "step": 13324 }, { "epoch": 43.68852459016394, "grad_norm": 7.4540252685546875, "learning_rate": 1.2503043264017882e-05, "loss": 0.6078, "step": 13325 }, { "epoch": 43.691803278688525, "grad_norm": 6.70091438293457, "learning_rate": 1.2502015165746512e-05, "loss": 0.6289, "step": 13326 }, { "epoch": 43.69508196721311, "grad_norm": 10.716791152954102, "learning_rate": 1.2500987039262322e-05, "loss": 0.6957, "step": 13327 }, { "epoch": 43.6983606557377, "grad_norm": 6.329911231994629, "learning_rate": 1.2499958884576902e-05, "loss": 0.7155, "step": 13328 }, { "epoch": 43.7016393442623, "grad_norm": 6.644753932952881, "learning_rate": 1.2498930701701845e-05, "loss": 0.8879, "step": 13329 }, { "epoch": 43.704918032786885, "grad_norm": 6.911326885223389, "learning_rate": 1.2497902490648746e-05, "loss": 0.703, "step": 13330 }, { "epoch": 43.708196721311474, "grad_norm": 6.639866352081299, "learning_rate": 1.24968742514292e-05, "loss": 0.9403, "step": 13331 }, { "epoch": 43.71147540983607, "grad_norm": 10.011512756347656, "learning_rate": 1.2495845984054804e-05, "loss": 0.6035, "step": 13332 }, { "epoch": 43.71475409836066, "grad_norm": 6.501643180847168, "learning_rate": 1.2494817688537144e-05, "loss": 0.6526, "step": 13333 }, { "epoch": 43.718032786885246, "grad_norm": 8.139985084533691, "learning_rate": 1.2493789364887825e-05, "loss": 0.7685, "step": 13334 }, { "epoch": 43.721311475409834, "grad_norm": 6.919559001922607, "learning_rate": 1.2492761013118435e-05, "loss": 0.6804, "step": 13335 }, { "epoch": 43.72459016393443, "grad_norm": 7.839818954467773, "learning_rate": 1.2491732633240575e-05, "loss": 0.5829, "step": 13336 }, { "epoch": 43.72786885245902, "grad_norm": 6.3866119384765625, "learning_rate": 1.2490704225265835e-05, "loss": 0.7154, "step": 13337 }, { "epoch": 43.731147540983606, "grad_norm": 8.17812442779541, "learning_rate": 1.248967578920582e-05, "loss": 0.9497, "step": 13338 }, { "epoch": 43.734426229508195, "grad_norm": 5.370506763458252, "learning_rate": 1.2488647325072117e-05, "loss": 0.7241, "step": 13339 }, { "epoch": 43.73770491803279, "grad_norm": 7.798325061798096, "learning_rate": 1.2487618832876331e-05, "loss": 0.7497, "step": 13340 }, { "epoch": 43.74098360655738, "grad_norm": 13.97204303741455, "learning_rate": 1.2486590312630057e-05, "loss": 0.5096, "step": 13341 }, { "epoch": 43.74426229508197, "grad_norm": 13.234609603881836, "learning_rate": 1.2485561764344889e-05, "loss": 0.7089, "step": 13342 }, { "epoch": 43.747540983606555, "grad_norm": 6.9063239097595215, "learning_rate": 1.2484533188032428e-05, "loss": 0.7774, "step": 13343 }, { "epoch": 43.75081967213115, "grad_norm": 6.056461811065674, "learning_rate": 1.2483504583704276e-05, "loss": 0.5749, "step": 13344 }, { "epoch": 43.75409836065574, "grad_norm": 9.010692596435547, "learning_rate": 1.2482475951372024e-05, "loss": 0.8986, "step": 13345 }, { "epoch": 43.75737704918033, "grad_norm": 6.130058288574219, "learning_rate": 1.2481447291047272e-05, "loss": 0.709, "step": 13346 }, { "epoch": 43.760655737704916, "grad_norm": 6.319964408874512, "learning_rate": 1.2480418602741626e-05, "loss": 0.6373, "step": 13347 }, { "epoch": 43.76393442622951, "grad_norm": 5.5644917488098145, "learning_rate": 1.2479389886466679e-05, "loss": 0.6806, "step": 13348 }, { "epoch": 43.7672131147541, "grad_norm": 6.46353816986084, "learning_rate": 1.2478361142234037e-05, "loss": 0.812, "step": 13349 }, { "epoch": 43.77049180327869, "grad_norm": 5.897275447845459, "learning_rate": 1.2477332370055292e-05, "loss": 0.4631, "step": 13350 }, { "epoch": 43.773770491803276, "grad_norm": 6.212409973144531, "learning_rate": 1.2476303569942052e-05, "loss": 0.7109, "step": 13351 }, { "epoch": 43.77704918032787, "grad_norm": 8.219386100769043, "learning_rate": 1.2475274741905912e-05, "loss": 0.5652, "step": 13352 }, { "epoch": 43.78032786885246, "grad_norm": 5.790966033935547, "learning_rate": 1.2474245885958478e-05, "loss": 0.8527, "step": 13353 }, { "epoch": 43.78360655737705, "grad_norm": 6.49249267578125, "learning_rate": 1.2473217002111346e-05, "loss": 0.8716, "step": 13354 }, { "epoch": 43.78688524590164, "grad_norm": 5.948297500610352, "learning_rate": 1.2472188090376123e-05, "loss": 0.7531, "step": 13355 }, { "epoch": 43.79016393442623, "grad_norm": 6.1034674644470215, "learning_rate": 1.2471159150764409e-05, "loss": 0.5792, "step": 13356 }, { "epoch": 43.79344262295082, "grad_norm": 7.243927001953125, "learning_rate": 1.2470130183287806e-05, "loss": 0.7692, "step": 13357 }, { "epoch": 43.79672131147541, "grad_norm": 6.083242893218994, "learning_rate": 1.2469101187957917e-05, "loss": 1.0672, "step": 13358 }, { "epoch": 43.8, "grad_norm": 6.4946088790893555, "learning_rate": 1.2468072164786342e-05, "loss": 0.5598, "step": 13359 }, { "epoch": 43.80327868852459, "grad_norm": 5.662287712097168, "learning_rate": 1.246704311378469e-05, "loss": 0.9728, "step": 13360 }, { "epoch": 43.80655737704918, "grad_norm": 11.396892547607422, "learning_rate": 1.2466014034964562e-05, "loss": 1.019, "step": 13361 }, { "epoch": 43.80983606557377, "grad_norm": 6.193069934844971, "learning_rate": 1.2464984928337563e-05, "loss": 0.7895, "step": 13362 }, { "epoch": 43.81311475409836, "grad_norm": 6.166454315185547, "learning_rate": 1.2463955793915292e-05, "loss": 0.7351, "step": 13363 }, { "epoch": 43.81639344262295, "grad_norm": 5.203486442565918, "learning_rate": 1.246292663170936e-05, "loss": 0.7918, "step": 13364 }, { "epoch": 43.81967213114754, "grad_norm": 7.088256359100342, "learning_rate": 1.246189744173137e-05, "loss": 0.8131, "step": 13365 }, { "epoch": 43.82295081967213, "grad_norm": 5.536586761474609, "learning_rate": 1.2460868223992925e-05, "loss": 0.6174, "step": 13366 }, { "epoch": 43.82622950819672, "grad_norm": 5.868101119995117, "learning_rate": 1.2459838978505632e-05, "loss": 0.6214, "step": 13367 }, { "epoch": 43.829508196721314, "grad_norm": 6.670469284057617, "learning_rate": 1.2458809705281099e-05, "loss": 0.5315, "step": 13368 }, { "epoch": 43.8327868852459, "grad_norm": 5.803178787231445, "learning_rate": 1.2457780404330928e-05, "loss": 0.7729, "step": 13369 }, { "epoch": 43.83606557377049, "grad_norm": 5.510312080383301, "learning_rate": 1.2456751075666729e-05, "loss": 0.5726, "step": 13370 }, { "epoch": 43.83934426229508, "grad_norm": 6.537578582763672, "learning_rate": 1.2455721719300105e-05, "loss": 0.7971, "step": 13371 }, { "epoch": 43.842622950819674, "grad_norm": 7.4695258140563965, "learning_rate": 1.2454692335242668e-05, "loss": 0.7877, "step": 13372 }, { "epoch": 43.84590163934426, "grad_norm": 14.02165699005127, "learning_rate": 1.2453662923506021e-05, "loss": 0.5714, "step": 13373 }, { "epoch": 43.84918032786885, "grad_norm": 8.360270500183105, "learning_rate": 1.2452633484101773e-05, "loss": 0.7754, "step": 13374 }, { "epoch": 43.85245901639344, "grad_norm": 6.121203422546387, "learning_rate": 1.2451604017041534e-05, "loss": 0.9025, "step": 13375 }, { "epoch": 43.855737704918035, "grad_norm": 9.049583435058594, "learning_rate": 1.2450574522336909e-05, "loss": 0.716, "step": 13376 }, { "epoch": 43.85901639344262, "grad_norm": 7.008096218109131, "learning_rate": 1.244954499999951e-05, "loss": 0.6819, "step": 13377 }, { "epoch": 43.86229508196721, "grad_norm": 6.251030921936035, "learning_rate": 1.2448515450040942e-05, "loss": 0.7074, "step": 13378 }, { "epoch": 43.86557377049181, "grad_norm": 6.9049506187438965, "learning_rate": 1.2447485872472819e-05, "loss": 0.6205, "step": 13379 }, { "epoch": 43.868852459016395, "grad_norm": 9.434188842773438, "learning_rate": 1.2446456267306745e-05, "loss": 0.6298, "step": 13380 }, { "epoch": 43.87213114754098, "grad_norm": 14.601737022399902, "learning_rate": 1.2445426634554337e-05, "loss": 0.7879, "step": 13381 }, { "epoch": 43.87540983606557, "grad_norm": 7.147428512573242, "learning_rate": 1.2444396974227197e-05, "loss": 0.6641, "step": 13382 }, { "epoch": 43.87868852459017, "grad_norm": 8.800165176391602, "learning_rate": 1.2443367286336943e-05, "loss": 0.6404, "step": 13383 }, { "epoch": 43.881967213114756, "grad_norm": 8.27088737487793, "learning_rate": 1.2442337570895183e-05, "loss": 0.896, "step": 13384 }, { "epoch": 43.885245901639344, "grad_norm": 6.542057037353516, "learning_rate": 1.2441307827913525e-05, "loss": 0.4589, "step": 13385 }, { "epoch": 43.88852459016393, "grad_norm": 14.202253341674805, "learning_rate": 1.2440278057403584e-05, "loss": 0.8493, "step": 13386 }, { "epoch": 43.89180327868853, "grad_norm": 6.096277713775635, "learning_rate": 1.2439248259376968e-05, "loss": 0.6837, "step": 13387 }, { "epoch": 43.895081967213116, "grad_norm": 9.395062446594238, "learning_rate": 1.2438218433845295e-05, "loss": 0.8418, "step": 13388 }, { "epoch": 43.898360655737704, "grad_norm": 9.5091552734375, "learning_rate": 1.2437188580820172e-05, "loss": 0.7454, "step": 13389 }, { "epoch": 43.90163934426229, "grad_norm": 8.359721183776855, "learning_rate": 1.2436158700313215e-05, "loss": 0.6674, "step": 13390 }, { "epoch": 43.90491803278689, "grad_norm": 5.847011089324951, "learning_rate": 1.2435128792336032e-05, "loss": 0.6865, "step": 13391 }, { "epoch": 43.90819672131148, "grad_norm": 6.066157341003418, "learning_rate": 1.2434098856900245e-05, "loss": 0.6635, "step": 13392 }, { "epoch": 43.911475409836065, "grad_norm": 10.577010154724121, "learning_rate": 1.2433068894017462e-05, "loss": 0.5486, "step": 13393 }, { "epoch": 43.91475409836065, "grad_norm": 7.316690444946289, "learning_rate": 1.2432038903699295e-05, "loss": 0.6427, "step": 13394 }, { "epoch": 43.91803278688525, "grad_norm": 8.826340675354004, "learning_rate": 1.243100888595736e-05, "loss": 0.5311, "step": 13395 }, { "epoch": 43.92131147540984, "grad_norm": 8.29491901397705, "learning_rate": 1.2429978840803277e-05, "loss": 0.6642, "step": 13396 }, { "epoch": 43.924590163934425, "grad_norm": 8.375900268554688, "learning_rate": 1.2428948768248653e-05, "loss": 0.7208, "step": 13397 }, { "epoch": 43.927868852459014, "grad_norm": 7.882202625274658, "learning_rate": 1.2427918668305105e-05, "loss": 0.5959, "step": 13398 }, { "epoch": 43.93114754098361, "grad_norm": 6.5092854499816895, "learning_rate": 1.242688854098425e-05, "loss": 0.9277, "step": 13399 }, { "epoch": 43.9344262295082, "grad_norm": 7.518605709075928, "learning_rate": 1.2425858386297704e-05, "loss": 0.999, "step": 13400 }, { "epoch": 43.937704918032786, "grad_norm": 6.063810348510742, "learning_rate": 1.2424828204257082e-05, "loss": 0.8187, "step": 13401 }, { "epoch": 43.940983606557374, "grad_norm": 8.099640846252441, "learning_rate": 1.2423797994874001e-05, "loss": 0.9227, "step": 13402 }, { "epoch": 43.94426229508197, "grad_norm": 6.3032331466674805, "learning_rate": 1.2422767758160079e-05, "loss": 0.8786, "step": 13403 }, { "epoch": 43.94754098360656, "grad_norm": 6.850149631500244, "learning_rate": 1.2421737494126929e-05, "loss": 0.6903, "step": 13404 }, { "epoch": 43.950819672131146, "grad_norm": 6.464613437652588, "learning_rate": 1.2420707202786173e-05, "loss": 0.7971, "step": 13405 }, { "epoch": 43.954098360655735, "grad_norm": 7.6052045822143555, "learning_rate": 1.2419676884149425e-05, "loss": 0.7802, "step": 13406 }, { "epoch": 43.95737704918033, "grad_norm": 16.323863983154297, "learning_rate": 1.2418646538228305e-05, "loss": 0.6919, "step": 13407 }, { "epoch": 43.96065573770492, "grad_norm": 6.124459743499756, "learning_rate": 1.2417616165034429e-05, "loss": 0.5505, "step": 13408 }, { "epoch": 43.96393442622951, "grad_norm": 5.697957515716553, "learning_rate": 1.241658576457942e-05, "loss": 0.81, "step": 13409 }, { "epoch": 43.967213114754095, "grad_norm": 7.9700822830200195, "learning_rate": 1.2415555336874894e-05, "loss": 0.7192, "step": 13410 }, { "epoch": 43.97049180327869, "grad_norm": 5.479928970336914, "learning_rate": 1.2414524881932467e-05, "loss": 0.8553, "step": 13411 }, { "epoch": 43.97377049180328, "grad_norm": 6.272089958190918, "learning_rate": 1.2413494399763763e-05, "loss": 0.6368, "step": 13412 }, { "epoch": 43.97704918032787, "grad_norm": 6.10414457321167, "learning_rate": 1.24124638903804e-05, "loss": 0.8047, "step": 13413 }, { "epoch": 43.980327868852456, "grad_norm": 5.7866644859313965, "learning_rate": 1.2411433353793998e-05, "loss": 0.6156, "step": 13414 }, { "epoch": 43.98360655737705, "grad_norm": 8.187914848327637, "learning_rate": 1.2410402790016179e-05, "loss": 0.6358, "step": 13415 }, { "epoch": 43.98688524590164, "grad_norm": 6.781529903411865, "learning_rate": 1.240937219905856e-05, "loss": 0.8293, "step": 13416 }, { "epoch": 43.99016393442623, "grad_norm": 8.098577499389648, "learning_rate": 1.2408341580932766e-05, "loss": 0.5313, "step": 13417 }, { "epoch": 43.993442622950816, "grad_norm": 6.819585800170898, "learning_rate": 1.2407310935650416e-05, "loss": 0.6588, "step": 13418 }, { "epoch": 43.99672131147541, "grad_norm": 7.0740556716918945, "learning_rate": 1.2406280263223132e-05, "loss": 0.7018, "step": 13419 }, { "epoch": 44.0, "grad_norm": 9.338296890258789, "learning_rate": 1.2405249563662539e-05, "loss": 0.6313, "step": 13420 }, { "epoch": 44.00327868852459, "grad_norm": 7.785154819488525, "learning_rate": 1.2404218836980253e-05, "loss": 0.5723, "step": 13421 }, { "epoch": 44.006557377049184, "grad_norm": 6.385659217834473, "learning_rate": 1.2403188083187901e-05, "loss": 0.7346, "step": 13422 }, { "epoch": 44.00983606557377, "grad_norm": 5.039982318878174, "learning_rate": 1.2402157302297106e-05, "loss": 0.7456, "step": 13423 }, { "epoch": 44.01311475409836, "grad_norm": 8.721189498901367, "learning_rate": 1.2401126494319487e-05, "loss": 0.4289, "step": 13424 }, { "epoch": 44.01639344262295, "grad_norm": 10.301374435424805, "learning_rate": 1.2400095659266671e-05, "loss": 0.5902, "step": 13425 }, { "epoch": 44.019672131147544, "grad_norm": 8.829245567321777, "learning_rate": 1.2399064797150282e-05, "loss": 0.6434, "step": 13426 }, { "epoch": 44.02295081967213, "grad_norm": 6.035318851470947, "learning_rate": 1.239803390798194e-05, "loss": 0.6626, "step": 13427 }, { "epoch": 44.02622950819672, "grad_norm": 7.131776332855225, "learning_rate": 1.2397002991773277e-05, "loss": 0.7594, "step": 13428 }, { "epoch": 44.02950819672131, "grad_norm": 6.166780948638916, "learning_rate": 1.2395972048535909e-05, "loss": 0.5413, "step": 13429 }, { "epoch": 44.032786885245905, "grad_norm": 6.731770038604736, "learning_rate": 1.2394941078281466e-05, "loss": 0.746, "step": 13430 }, { "epoch": 44.03606557377049, "grad_norm": 8.830713272094727, "learning_rate": 1.2393910081021574e-05, "loss": 0.7187, "step": 13431 }, { "epoch": 44.03934426229508, "grad_norm": 6.965868949890137, "learning_rate": 1.2392879056767855e-05, "loss": 0.5442, "step": 13432 }, { "epoch": 44.04262295081967, "grad_norm": 8.922689437866211, "learning_rate": 1.2391848005531938e-05, "loss": 0.5892, "step": 13433 }, { "epoch": 44.045901639344265, "grad_norm": 6.409641265869141, "learning_rate": 1.2390816927325449e-05, "loss": 0.855, "step": 13434 }, { "epoch": 44.049180327868854, "grad_norm": 7.395653247833252, "learning_rate": 1.2389785822160011e-05, "loss": 0.8236, "step": 13435 }, { "epoch": 44.05245901639344, "grad_norm": 6.893357753753662, "learning_rate": 1.2388754690047256e-05, "loss": 0.728, "step": 13436 }, { "epoch": 44.05573770491803, "grad_norm": 6.1247968673706055, "learning_rate": 1.2387723530998805e-05, "loss": 0.6428, "step": 13437 }, { "epoch": 44.059016393442626, "grad_norm": 7.257850170135498, "learning_rate": 1.238669234502629e-05, "loss": 0.6595, "step": 13438 }, { "epoch": 44.062295081967214, "grad_norm": 6.570251941680908, "learning_rate": 1.2385661132141335e-05, "loss": 0.5401, "step": 13439 }, { "epoch": 44.0655737704918, "grad_norm": 6.700113296508789, "learning_rate": 1.2384629892355574e-05, "loss": 0.4801, "step": 13440 }, { "epoch": 44.06885245901639, "grad_norm": 9.516968727111816, "learning_rate": 1.2383598625680628e-05, "loss": 0.9204, "step": 13441 }, { "epoch": 44.072131147540986, "grad_norm": 5.508144378662109, "learning_rate": 1.2382567332128133e-05, "loss": 0.5106, "step": 13442 }, { "epoch": 44.075409836065575, "grad_norm": 5.511137008666992, "learning_rate": 1.2381536011709709e-05, "loss": 0.4755, "step": 13443 }, { "epoch": 44.07868852459016, "grad_norm": 20.708717346191406, "learning_rate": 1.2380504664436996e-05, "loss": 0.5808, "step": 13444 }, { "epoch": 44.08196721311475, "grad_norm": 8.02158260345459, "learning_rate": 1.2379473290321613e-05, "loss": 0.8077, "step": 13445 }, { "epoch": 44.08524590163935, "grad_norm": 7.994925022125244, "learning_rate": 1.23784418893752e-05, "loss": 0.6696, "step": 13446 }, { "epoch": 44.088524590163935, "grad_norm": 5.7241621017456055, "learning_rate": 1.2377410461609377e-05, "loss": 0.6396, "step": 13447 }, { "epoch": 44.09180327868852, "grad_norm": 7.687220096588135, "learning_rate": 1.2376379007035779e-05, "loss": 0.5872, "step": 13448 }, { "epoch": 44.09508196721311, "grad_norm": 6.6560139656066895, "learning_rate": 1.237534752566604e-05, "loss": 0.6411, "step": 13449 }, { "epoch": 44.09836065573771, "grad_norm": 5.73159646987915, "learning_rate": 1.2374316017511784e-05, "loss": 0.8145, "step": 13450 }, { "epoch": 44.101639344262296, "grad_norm": 5.914182662963867, "learning_rate": 1.2373284482584652e-05, "loss": 0.6681, "step": 13451 }, { "epoch": 44.104918032786884, "grad_norm": 7.554740905761719, "learning_rate": 1.2372252920896264e-05, "loss": 0.4597, "step": 13452 }, { "epoch": 44.10819672131147, "grad_norm": 6.086056709289551, "learning_rate": 1.2371221332458258e-05, "loss": 0.5277, "step": 13453 }, { "epoch": 44.11147540983607, "grad_norm": 5.746881484985352, "learning_rate": 1.237018971728227e-05, "loss": 0.795, "step": 13454 }, { "epoch": 44.114754098360656, "grad_norm": 9.496354103088379, "learning_rate": 1.2369158075379925e-05, "loss": 0.5442, "step": 13455 }, { "epoch": 44.118032786885244, "grad_norm": 7.745061874389648, "learning_rate": 1.2368126406762862e-05, "loss": 0.7584, "step": 13456 }, { "epoch": 44.12131147540983, "grad_norm": 5.9799723625183105, "learning_rate": 1.236709471144271e-05, "loss": 0.7534, "step": 13457 }, { "epoch": 44.12459016393443, "grad_norm": 15.058724403381348, "learning_rate": 1.2366062989431105e-05, "loss": 0.6416, "step": 13458 }, { "epoch": 44.12786885245902, "grad_norm": 6.807914733886719, "learning_rate": 1.236503124073968e-05, "loss": 0.7067, "step": 13459 }, { "epoch": 44.131147540983605, "grad_norm": 6.373671531677246, "learning_rate": 1.236399946538007e-05, "loss": 0.5635, "step": 13460 }, { "epoch": 44.13442622950819, "grad_norm": 6.85888147354126, "learning_rate": 1.2362967663363905e-05, "loss": 0.7656, "step": 13461 }, { "epoch": 44.13770491803279, "grad_norm": 6.117863655090332, "learning_rate": 1.2361935834702826e-05, "loss": 0.6717, "step": 13462 }, { "epoch": 44.14098360655738, "grad_norm": 6.104964733123779, "learning_rate": 1.2360903979408461e-05, "loss": 0.8983, "step": 13463 }, { "epoch": 44.144262295081965, "grad_norm": 9.56892204284668, "learning_rate": 1.235987209749245e-05, "loss": 0.524, "step": 13464 }, { "epoch": 44.14754098360656, "grad_norm": 8.460991859436035, "learning_rate": 1.2358840188966427e-05, "loss": 0.7607, "step": 13465 }, { "epoch": 44.15081967213115, "grad_norm": 9.902984619140625, "learning_rate": 1.2357808253842031e-05, "loss": 0.707, "step": 13466 }, { "epoch": 44.15409836065574, "grad_norm": 7.313135623931885, "learning_rate": 1.2356776292130892e-05, "loss": 0.479, "step": 13467 }, { "epoch": 44.157377049180326, "grad_norm": 9.391947746276855, "learning_rate": 1.2355744303844652e-05, "loss": 0.5665, "step": 13468 }, { "epoch": 44.16065573770492, "grad_norm": 6.422666549682617, "learning_rate": 1.2354712288994946e-05, "loss": 0.5621, "step": 13469 }, { "epoch": 44.16393442622951, "grad_norm": 6.513156414031982, "learning_rate": 1.2353680247593411e-05, "loss": 0.4891, "step": 13470 }, { "epoch": 44.1672131147541, "grad_norm": 6.494028091430664, "learning_rate": 1.235264817965168e-05, "loss": 0.7324, "step": 13471 }, { "epoch": 44.170491803278686, "grad_norm": 7.168301582336426, "learning_rate": 1.23516160851814e-05, "loss": 0.7247, "step": 13472 }, { "epoch": 44.17377049180328, "grad_norm": 6.852575302124023, "learning_rate": 1.2350583964194202e-05, "loss": 0.4115, "step": 13473 }, { "epoch": 44.17704918032787, "grad_norm": 8.589534759521484, "learning_rate": 1.2349551816701724e-05, "loss": 0.9824, "step": 13474 }, { "epoch": 44.18032786885246, "grad_norm": 6.293148040771484, "learning_rate": 1.2348519642715608e-05, "loss": 0.652, "step": 13475 }, { "epoch": 44.18360655737705, "grad_norm": 6.1451897621154785, "learning_rate": 1.234748744224749e-05, "loss": 0.7722, "step": 13476 }, { "epoch": 44.18688524590164, "grad_norm": 7.242432117462158, "learning_rate": 1.234645521530901e-05, "loss": 0.8234, "step": 13477 }, { "epoch": 44.19016393442623, "grad_norm": 8.976360321044922, "learning_rate": 1.2345422961911808e-05, "loss": 0.6994, "step": 13478 }, { "epoch": 44.19344262295082, "grad_norm": 7.161147594451904, "learning_rate": 1.2344390682067524e-05, "loss": 0.7667, "step": 13479 }, { "epoch": 44.19672131147541, "grad_norm": 8.783374786376953, "learning_rate": 1.2343358375787798e-05, "loss": 0.5651, "step": 13480 }, { "epoch": 44.2, "grad_norm": 8.566656112670898, "learning_rate": 1.2342326043084268e-05, "loss": 0.8622, "step": 13481 }, { "epoch": 44.20327868852459, "grad_norm": 8.497947692871094, "learning_rate": 1.2341293683968579e-05, "loss": 0.6515, "step": 13482 }, { "epoch": 44.20655737704918, "grad_norm": 7.17061185836792, "learning_rate": 1.2340261298452365e-05, "loss": 0.5906, "step": 13483 }, { "epoch": 44.20983606557377, "grad_norm": 7.539074897766113, "learning_rate": 1.2339228886547273e-05, "loss": 0.8515, "step": 13484 }, { "epoch": 44.21311475409836, "grad_norm": 6.417182445526123, "learning_rate": 1.2338196448264947e-05, "loss": 0.6338, "step": 13485 }, { "epoch": 44.21639344262295, "grad_norm": 5.909548759460449, "learning_rate": 1.2337163983617025e-05, "loss": 0.735, "step": 13486 }, { "epoch": 44.21967213114754, "grad_norm": 6.04482889175415, "learning_rate": 1.2336131492615145e-05, "loss": 0.6494, "step": 13487 }, { "epoch": 44.22295081967213, "grad_norm": 9.890331268310547, "learning_rate": 1.2335098975270957e-05, "loss": 0.6618, "step": 13488 }, { "epoch": 44.226229508196724, "grad_norm": 7.448037147521973, "learning_rate": 1.2334066431596097e-05, "loss": 0.5917, "step": 13489 }, { "epoch": 44.22950819672131, "grad_norm": 5.089932441711426, "learning_rate": 1.2333033861602213e-05, "loss": 0.8339, "step": 13490 }, { "epoch": 44.2327868852459, "grad_norm": 6.3124237060546875, "learning_rate": 1.2332001265300945e-05, "loss": 0.6515, "step": 13491 }, { "epoch": 44.23606557377049, "grad_norm": 6.691074848175049, "learning_rate": 1.233096864270394e-05, "loss": 0.7626, "step": 13492 }, { "epoch": 44.239344262295084, "grad_norm": 5.427692890167236, "learning_rate": 1.232993599382284e-05, "loss": 0.7171, "step": 13493 }, { "epoch": 44.24262295081967, "grad_norm": 12.743362426757812, "learning_rate": 1.2328903318669287e-05, "loss": 0.6612, "step": 13494 }, { "epoch": 44.24590163934426, "grad_norm": 7.983310222625732, "learning_rate": 1.2327870617254929e-05, "loss": 0.4357, "step": 13495 }, { "epoch": 44.24918032786885, "grad_norm": 8.217155456542969, "learning_rate": 1.232683788959141e-05, "loss": 0.4718, "step": 13496 }, { "epoch": 44.252459016393445, "grad_norm": 10.912760734558105, "learning_rate": 1.2325805135690372e-05, "loss": 0.6523, "step": 13497 }, { "epoch": 44.25573770491803, "grad_norm": 9.5678071975708, "learning_rate": 1.2324772355563467e-05, "loss": 0.6021, "step": 13498 }, { "epoch": 44.25901639344262, "grad_norm": 5.481099605560303, "learning_rate": 1.2323739549222333e-05, "loss": 0.9916, "step": 13499 }, { "epoch": 44.26229508196721, "grad_norm": 9.661591529846191, "learning_rate": 1.232270671667862e-05, "loss": 0.8928, "step": 13500 }, { "epoch": 44.265573770491805, "grad_norm": 7.729679584503174, "learning_rate": 1.2321673857943977e-05, "loss": 0.8363, "step": 13501 }, { "epoch": 44.268852459016394, "grad_norm": 5.512687683105469, "learning_rate": 1.2320640973030043e-05, "loss": 0.6097, "step": 13502 }, { "epoch": 44.27213114754098, "grad_norm": 6.330310821533203, "learning_rate": 1.2319608061948471e-05, "loss": 0.8249, "step": 13503 }, { "epoch": 44.27540983606557, "grad_norm": 6.307584762573242, "learning_rate": 1.2318575124710905e-05, "loss": 0.7095, "step": 13504 }, { "epoch": 44.278688524590166, "grad_norm": 7.100154399871826, "learning_rate": 1.2317542161328997e-05, "loss": 0.4937, "step": 13505 }, { "epoch": 44.281967213114754, "grad_norm": 5.6552958488464355, "learning_rate": 1.2316509171814388e-05, "loss": 0.7409, "step": 13506 }, { "epoch": 44.28524590163934, "grad_norm": 6.817533016204834, "learning_rate": 1.231547615617873e-05, "loss": 0.7729, "step": 13507 }, { "epoch": 44.28852459016394, "grad_norm": 6.525726318359375, "learning_rate": 1.2314443114433671e-05, "loss": 0.6189, "step": 13508 }, { "epoch": 44.291803278688526, "grad_norm": 5.655951023101807, "learning_rate": 1.2313410046590861e-05, "loss": 0.8309, "step": 13509 }, { "epoch": 44.295081967213115, "grad_norm": 7.634216785430908, "learning_rate": 1.2312376952661946e-05, "loss": 0.7234, "step": 13510 }, { "epoch": 44.2983606557377, "grad_norm": 8.008371353149414, "learning_rate": 1.2311343832658577e-05, "loss": 0.8579, "step": 13511 }, { "epoch": 44.3016393442623, "grad_norm": 6.0241169929504395, "learning_rate": 1.2310310686592404e-05, "loss": 0.8665, "step": 13512 }, { "epoch": 44.30491803278689, "grad_norm": 9.750202178955078, "learning_rate": 1.2309277514475076e-05, "loss": 0.6494, "step": 13513 }, { "epoch": 44.308196721311475, "grad_norm": 8.503998756408691, "learning_rate": 1.2308244316318243e-05, "loss": 0.7982, "step": 13514 }, { "epoch": 44.31147540983606, "grad_norm": 6.989342212677002, "learning_rate": 1.2307211092133552e-05, "loss": 0.7684, "step": 13515 }, { "epoch": 44.31475409836066, "grad_norm": 5.1150288581848145, "learning_rate": 1.230617784193266e-05, "loss": 0.5442, "step": 13516 }, { "epoch": 44.31803278688525, "grad_norm": 5.42686128616333, "learning_rate": 1.2305144565727214e-05, "loss": 0.6813, "step": 13517 }, { "epoch": 44.321311475409836, "grad_norm": 6.276243209838867, "learning_rate": 1.2304111263528868e-05, "loss": 0.8726, "step": 13518 }, { "epoch": 44.324590163934424, "grad_norm": 5.814010143280029, "learning_rate": 1.230307793534927e-05, "loss": 0.5, "step": 13519 }, { "epoch": 44.32786885245902, "grad_norm": 6.071951389312744, "learning_rate": 1.2302044581200075e-05, "loss": 0.7482, "step": 13520 }, { "epoch": 44.33114754098361, "grad_norm": 10.091235160827637, "learning_rate": 1.2301011201092931e-05, "loss": 0.8217, "step": 13521 }, { "epoch": 44.334426229508196, "grad_norm": 7.029980659484863, "learning_rate": 1.2299977795039498e-05, "loss": 0.8909, "step": 13522 }, { "epoch": 44.337704918032784, "grad_norm": 6.2308669090271, "learning_rate": 1.229894436305142e-05, "loss": 0.6698, "step": 13523 }, { "epoch": 44.34098360655738, "grad_norm": 6.939520835876465, "learning_rate": 1.2297910905140356e-05, "loss": 0.7068, "step": 13524 }, { "epoch": 44.34426229508197, "grad_norm": 6.599099159240723, "learning_rate": 1.2296877421317958e-05, "loss": 0.6842, "step": 13525 }, { "epoch": 44.34754098360656, "grad_norm": 6.546462535858154, "learning_rate": 1.2295843911595876e-05, "loss": 0.5934, "step": 13526 }, { "epoch": 44.350819672131145, "grad_norm": 6.7987494468688965, "learning_rate": 1.229481037598577e-05, "loss": 0.4273, "step": 13527 }, { "epoch": 44.35409836065574, "grad_norm": 6.469584941864014, "learning_rate": 1.229377681449929e-05, "loss": 0.4139, "step": 13528 }, { "epoch": 44.35737704918033, "grad_norm": 12.043046951293945, "learning_rate": 1.2292743227148091e-05, "loss": 0.595, "step": 13529 }, { "epoch": 44.36065573770492, "grad_norm": 6.036343574523926, "learning_rate": 1.2291709613943828e-05, "loss": 0.6122, "step": 13530 }, { "epoch": 44.363934426229505, "grad_norm": 6.3413801193237305, "learning_rate": 1.2290675974898157e-05, "loss": 0.7737, "step": 13531 }, { "epoch": 44.3672131147541, "grad_norm": 6.669993877410889, "learning_rate": 1.228964231002273e-05, "loss": 0.6916, "step": 13532 }, { "epoch": 44.37049180327869, "grad_norm": 7.802298069000244, "learning_rate": 1.228860861932921e-05, "loss": 0.7187, "step": 13533 }, { "epoch": 44.37377049180328, "grad_norm": 12.703046798706055, "learning_rate": 1.2287574902829245e-05, "loss": 0.656, "step": 13534 }, { "epoch": 44.377049180327866, "grad_norm": 7.599445819854736, "learning_rate": 1.2286541160534492e-05, "loss": 0.697, "step": 13535 }, { "epoch": 44.38032786885246, "grad_norm": 6.033142566680908, "learning_rate": 1.2285507392456614e-05, "loss": 0.6587, "step": 13536 }, { "epoch": 44.38360655737705, "grad_norm": 6.838961601257324, "learning_rate": 1.2284473598607263e-05, "loss": 0.7105, "step": 13537 }, { "epoch": 44.38688524590164, "grad_norm": 9.274346351623535, "learning_rate": 1.2283439778998099e-05, "loss": 0.372, "step": 13538 }, { "epoch": 44.390163934426226, "grad_norm": 7.410439491271973, "learning_rate": 1.2282405933640773e-05, "loss": 0.5154, "step": 13539 }, { "epoch": 44.39344262295082, "grad_norm": 11.763394355773926, "learning_rate": 1.228137206254695e-05, "loss": 0.6792, "step": 13540 }, { "epoch": 44.39672131147541, "grad_norm": 7.711185932159424, "learning_rate": 1.2280338165728285e-05, "loss": 0.6565, "step": 13541 }, { "epoch": 44.4, "grad_norm": 5.488805770874023, "learning_rate": 1.2279304243196438e-05, "loss": 0.4944, "step": 13542 }, { "epoch": 44.40327868852459, "grad_norm": 6.5070013999938965, "learning_rate": 1.227827029496306e-05, "loss": 0.7275, "step": 13543 }, { "epoch": 44.40655737704918, "grad_norm": 6.571943283081055, "learning_rate": 1.227723632103982e-05, "loss": 0.5655, "step": 13544 }, { "epoch": 44.40983606557377, "grad_norm": 7.4482102394104, "learning_rate": 1.2276202321438371e-05, "loss": 0.8759, "step": 13545 }, { "epoch": 44.41311475409836, "grad_norm": 5.693851947784424, "learning_rate": 1.2275168296170377e-05, "loss": 0.597, "step": 13546 }, { "epoch": 44.41639344262295, "grad_norm": 6.672719955444336, "learning_rate": 1.2274134245247492e-05, "loss": 0.7786, "step": 13547 }, { "epoch": 44.41967213114754, "grad_norm": 5.868122577667236, "learning_rate": 1.2273100168681378e-05, "loss": 0.635, "step": 13548 }, { "epoch": 44.42295081967213, "grad_norm": 12.728158950805664, "learning_rate": 1.2272066066483697e-05, "loss": 0.5506, "step": 13549 }, { "epoch": 44.42622950819672, "grad_norm": 7.7084221839904785, "learning_rate": 1.2271031938666111e-05, "loss": 0.819, "step": 13550 }, { "epoch": 44.429508196721315, "grad_norm": 4.966594219207764, "learning_rate": 1.2269997785240277e-05, "loss": 0.5999, "step": 13551 }, { "epoch": 44.4327868852459, "grad_norm": 7.9047417640686035, "learning_rate": 1.2268963606217858e-05, "loss": 0.5839, "step": 13552 }, { "epoch": 44.43606557377049, "grad_norm": 8.105927467346191, "learning_rate": 1.2267929401610517e-05, "loss": 0.6665, "step": 13553 }, { "epoch": 44.43934426229508, "grad_norm": 8.12702465057373, "learning_rate": 1.2266895171429911e-05, "loss": 0.9937, "step": 13554 }, { "epoch": 44.442622950819676, "grad_norm": 6.070274353027344, "learning_rate": 1.2265860915687706e-05, "loss": 0.4925, "step": 13555 }, { "epoch": 44.445901639344264, "grad_norm": 6.115999221801758, "learning_rate": 1.226482663439556e-05, "loss": 0.6768, "step": 13556 }, { "epoch": 44.44918032786885, "grad_norm": 6.447794437408447, "learning_rate": 1.2263792327565145e-05, "loss": 0.8873, "step": 13557 }, { "epoch": 44.45245901639344, "grad_norm": 10.584880828857422, "learning_rate": 1.2262757995208115e-05, "loss": 0.6922, "step": 13558 }, { "epoch": 44.455737704918036, "grad_norm": 5.9531049728393555, "learning_rate": 1.2261723637336136e-05, "loss": 0.6486, "step": 13559 }, { "epoch": 44.459016393442624, "grad_norm": 6.763786315917969, "learning_rate": 1.2260689253960872e-05, "loss": 0.7032, "step": 13560 }, { "epoch": 44.46229508196721, "grad_norm": 6.627378940582275, "learning_rate": 1.2259654845093985e-05, "loss": 0.7487, "step": 13561 }, { "epoch": 44.4655737704918, "grad_norm": 6.221643924713135, "learning_rate": 1.225862041074714e-05, "loss": 0.5971, "step": 13562 }, { "epoch": 44.4688524590164, "grad_norm": 7.995494365692139, "learning_rate": 1.2257585950932e-05, "loss": 0.6919, "step": 13563 }, { "epoch": 44.472131147540985, "grad_norm": 8.266844749450684, "learning_rate": 1.2256551465660234e-05, "loss": 0.6783, "step": 13564 }, { "epoch": 44.47540983606557, "grad_norm": 7.645579814910889, "learning_rate": 1.2255516954943503e-05, "loss": 0.7291, "step": 13565 }, { "epoch": 44.47868852459016, "grad_norm": 9.768335342407227, "learning_rate": 1.2254482418793474e-05, "loss": 0.7868, "step": 13566 }, { "epoch": 44.48196721311476, "grad_norm": 5.344858169555664, "learning_rate": 1.2253447857221809e-05, "loss": 0.5941, "step": 13567 }, { "epoch": 44.485245901639345, "grad_norm": 7.791526794433594, "learning_rate": 1.225241327024018e-05, "loss": 0.5432, "step": 13568 }, { "epoch": 44.488524590163934, "grad_norm": 6.8490376472473145, "learning_rate": 1.2251378657860247e-05, "loss": 0.7792, "step": 13569 }, { "epoch": 44.49180327868852, "grad_norm": 5.6733622550964355, "learning_rate": 1.225034402009368e-05, "loss": 0.9046, "step": 13570 }, { "epoch": 44.49508196721312, "grad_norm": 7.355715274810791, "learning_rate": 1.2249309356952143e-05, "loss": 0.502, "step": 13571 }, { "epoch": 44.498360655737706, "grad_norm": 8.64738941192627, "learning_rate": 1.2248274668447307e-05, "loss": 0.5188, "step": 13572 }, { "epoch": 44.501639344262294, "grad_norm": 6.389908790588379, "learning_rate": 1.2247239954590833e-05, "loss": 0.6353, "step": 13573 }, { "epoch": 44.50491803278688, "grad_norm": 11.887003898620605, "learning_rate": 1.2246205215394395e-05, "loss": 0.7497, "step": 13574 }, { "epoch": 44.50819672131148, "grad_norm": 6.02133846282959, "learning_rate": 1.2245170450869655e-05, "loss": 0.636, "step": 13575 }, { "epoch": 44.511475409836066, "grad_norm": 6.603867053985596, "learning_rate": 1.2244135661028287e-05, "loss": 0.6087, "step": 13576 }, { "epoch": 44.514754098360655, "grad_norm": 5.964959621429443, "learning_rate": 1.2243100845881955e-05, "loss": 0.7401, "step": 13577 }, { "epoch": 44.51803278688524, "grad_norm": 5.791136741638184, "learning_rate": 1.2242066005442329e-05, "loss": 0.5891, "step": 13578 }, { "epoch": 44.52131147540984, "grad_norm": 6.667850971221924, "learning_rate": 1.2241031139721076e-05, "loss": 0.6786, "step": 13579 }, { "epoch": 44.52459016393443, "grad_norm": 13.305939674377441, "learning_rate": 1.2239996248729867e-05, "loss": 0.7803, "step": 13580 }, { "epoch": 44.527868852459015, "grad_norm": 7.174389839172363, "learning_rate": 1.2238961332480372e-05, "loss": 0.6207, "step": 13581 }, { "epoch": 44.5311475409836, "grad_norm": 6.478039741516113, "learning_rate": 1.2237926390984259e-05, "loss": 0.5656, "step": 13582 }, { "epoch": 44.5344262295082, "grad_norm": 5.882485866546631, "learning_rate": 1.2236891424253201e-05, "loss": 0.7417, "step": 13583 }, { "epoch": 44.53770491803279, "grad_norm": 6.701595306396484, "learning_rate": 1.2235856432298864e-05, "loss": 0.6083, "step": 13584 }, { "epoch": 44.540983606557376, "grad_norm": 6.365326881408691, "learning_rate": 1.2234821415132922e-05, "loss": 0.7601, "step": 13585 }, { "epoch": 44.544262295081964, "grad_norm": 8.60671615600586, "learning_rate": 1.2233786372767045e-05, "loss": 0.5758, "step": 13586 }, { "epoch": 44.54754098360656, "grad_norm": 7.337775707244873, "learning_rate": 1.2232751305212907e-05, "loss": 0.4299, "step": 13587 }, { "epoch": 44.55081967213115, "grad_norm": 7.7429914474487305, "learning_rate": 1.2231716212482173e-05, "loss": 0.9689, "step": 13588 }, { "epoch": 44.554098360655736, "grad_norm": 7.349781513214111, "learning_rate": 1.2230681094586517e-05, "loss": 0.6919, "step": 13589 }, { "epoch": 44.557377049180324, "grad_norm": 6.139439105987549, "learning_rate": 1.2229645951537616e-05, "loss": 0.6814, "step": 13590 }, { "epoch": 44.56065573770492, "grad_norm": 5.959263324737549, "learning_rate": 1.2228610783347134e-05, "loss": 0.6743, "step": 13591 }, { "epoch": 44.56393442622951, "grad_norm": 5.948053359985352, "learning_rate": 1.2227575590026754e-05, "loss": 0.7029, "step": 13592 }, { "epoch": 44.5672131147541, "grad_norm": 12.33366584777832, "learning_rate": 1.222654037158814e-05, "loss": 0.7073, "step": 13593 }, { "epoch": 44.570491803278685, "grad_norm": 8.352535247802734, "learning_rate": 1.222550512804297e-05, "loss": 0.5731, "step": 13594 }, { "epoch": 44.57377049180328, "grad_norm": 6.384799480438232, "learning_rate": 1.2224469859402913e-05, "loss": 0.7971, "step": 13595 }, { "epoch": 44.57704918032787, "grad_norm": 7.094561576843262, "learning_rate": 1.2223434565679648e-05, "loss": 0.6887, "step": 13596 }, { "epoch": 44.58032786885246, "grad_norm": 7.029449462890625, "learning_rate": 1.2222399246884845e-05, "loss": 0.6555, "step": 13597 }, { "epoch": 44.58360655737705, "grad_norm": 6.686728000640869, "learning_rate": 1.2221363903030182e-05, "loss": 0.713, "step": 13598 }, { "epoch": 44.58688524590164, "grad_norm": 7.097387313842773, "learning_rate": 1.2220328534127329e-05, "loss": 0.5566, "step": 13599 }, { "epoch": 44.59016393442623, "grad_norm": 6.75651216506958, "learning_rate": 1.2219293140187962e-05, "loss": 1.0104, "step": 13600 }, { "epoch": 44.59344262295082, "grad_norm": 6.619221210479736, "learning_rate": 1.2218257721223759e-05, "loss": 0.8361, "step": 13601 }, { "epoch": 44.59672131147541, "grad_norm": 5.774677276611328, "learning_rate": 1.2217222277246395e-05, "loss": 0.7811, "step": 13602 }, { "epoch": 44.6, "grad_norm": 6.299915313720703, "learning_rate": 1.2216186808267544e-05, "loss": 0.6364, "step": 13603 }, { "epoch": 44.60327868852459, "grad_norm": 6.845921039581299, "learning_rate": 1.221515131429888e-05, "loss": 0.6858, "step": 13604 }, { "epoch": 44.60655737704918, "grad_norm": 8.141722679138184, "learning_rate": 1.2214115795352086e-05, "loss": 0.4049, "step": 13605 }, { "epoch": 44.609836065573774, "grad_norm": 6.0994486808776855, "learning_rate": 1.2213080251438832e-05, "loss": 0.8507, "step": 13606 }, { "epoch": 44.61311475409836, "grad_norm": 5.899051666259766, "learning_rate": 1.2212044682570799e-05, "loss": 0.7771, "step": 13607 }, { "epoch": 44.61639344262295, "grad_norm": 8.734251976013184, "learning_rate": 1.2211009088759658e-05, "loss": 0.5666, "step": 13608 }, { "epoch": 44.61967213114754, "grad_norm": 6.78230619430542, "learning_rate": 1.2209973470017093e-05, "loss": 0.4868, "step": 13609 }, { "epoch": 44.622950819672134, "grad_norm": 7.097991943359375, "learning_rate": 1.2208937826354781e-05, "loss": 0.6224, "step": 13610 }, { "epoch": 44.62622950819672, "grad_norm": 7.668011665344238, "learning_rate": 1.2207902157784397e-05, "loss": 0.7653, "step": 13611 }, { "epoch": 44.62950819672131, "grad_norm": 14.814737319946289, "learning_rate": 1.2206866464317619e-05, "loss": 0.7382, "step": 13612 }, { "epoch": 44.6327868852459, "grad_norm": 7.428074359893799, "learning_rate": 1.220583074596613e-05, "loss": 0.8011, "step": 13613 }, { "epoch": 44.636065573770495, "grad_norm": 6.578111171722412, "learning_rate": 1.2204795002741603e-05, "loss": 0.6886, "step": 13614 }, { "epoch": 44.63934426229508, "grad_norm": 7.581916809082031, "learning_rate": 1.220375923465572e-05, "loss": 0.5927, "step": 13615 }, { "epoch": 44.64262295081967, "grad_norm": 6.240387916564941, "learning_rate": 1.220272344172016e-05, "loss": 0.4947, "step": 13616 }, { "epoch": 44.64590163934426, "grad_norm": 6.752715587615967, "learning_rate": 1.2201687623946605e-05, "loss": 0.7794, "step": 13617 }, { "epoch": 44.649180327868855, "grad_norm": 8.86851978302002, "learning_rate": 1.2200651781346732e-05, "loss": 0.4962, "step": 13618 }, { "epoch": 44.65245901639344, "grad_norm": 6.18311882019043, "learning_rate": 1.2199615913932221e-05, "loss": 0.9566, "step": 13619 }, { "epoch": 44.65573770491803, "grad_norm": 6.759428977966309, "learning_rate": 1.2198580021714756e-05, "loss": 0.7478, "step": 13620 }, { "epoch": 44.65901639344262, "grad_norm": 7.699070930480957, "learning_rate": 1.2197544104706013e-05, "loss": 0.6403, "step": 13621 }, { "epoch": 44.662295081967216, "grad_norm": 21.732892990112305, "learning_rate": 1.2196508162917678e-05, "loss": 0.6593, "step": 13622 }, { "epoch": 44.665573770491804, "grad_norm": 6.28718376159668, "learning_rate": 1.2195472196361426e-05, "loss": 0.6134, "step": 13623 }, { "epoch": 44.66885245901639, "grad_norm": 5.7981390953063965, "learning_rate": 1.2194436205048948e-05, "loss": 0.8574, "step": 13624 }, { "epoch": 44.67213114754098, "grad_norm": 9.787039756774902, "learning_rate": 1.2193400188991913e-05, "loss": 0.6188, "step": 13625 }, { "epoch": 44.675409836065576, "grad_norm": 7.4134111404418945, "learning_rate": 1.2192364148202016e-05, "loss": 0.6732, "step": 13626 }, { "epoch": 44.678688524590164, "grad_norm": 6.450430393218994, "learning_rate": 1.219132808269093e-05, "loss": 0.8504, "step": 13627 }, { "epoch": 44.68196721311475, "grad_norm": 6.080451488494873, "learning_rate": 1.2190291992470345e-05, "loss": 0.8505, "step": 13628 }, { "epoch": 44.68524590163934, "grad_norm": 6.947671890258789, "learning_rate": 1.218925587755194e-05, "loss": 0.721, "step": 13629 }, { "epoch": 44.68852459016394, "grad_norm": 6.356510162353516, "learning_rate": 1.2188219737947396e-05, "loss": 0.6016, "step": 13630 }, { "epoch": 44.691803278688525, "grad_norm": 6.350411891937256, "learning_rate": 1.21871835736684e-05, "loss": 0.9236, "step": 13631 }, { "epoch": 44.69508196721311, "grad_norm": 7.591343879699707, "learning_rate": 1.2186147384726634e-05, "loss": 0.4265, "step": 13632 }, { "epoch": 44.6983606557377, "grad_norm": 6.890514373779297, "learning_rate": 1.2185111171133786e-05, "loss": 0.9075, "step": 13633 }, { "epoch": 44.7016393442623, "grad_norm": 6.750080108642578, "learning_rate": 1.2184074932901535e-05, "loss": 0.8264, "step": 13634 }, { "epoch": 44.704918032786885, "grad_norm": 6.282925128936768, "learning_rate": 1.2183038670041571e-05, "loss": 0.7296, "step": 13635 }, { "epoch": 44.708196721311474, "grad_norm": 7.3833394050598145, "learning_rate": 1.2182002382565575e-05, "loss": 0.9927, "step": 13636 }, { "epoch": 44.71147540983607, "grad_norm": 7.161409378051758, "learning_rate": 1.2180966070485235e-05, "loss": 0.6639, "step": 13637 }, { "epoch": 44.71475409836066, "grad_norm": 6.642004013061523, "learning_rate": 1.217992973381223e-05, "loss": 0.7306, "step": 13638 }, { "epoch": 44.718032786885246, "grad_norm": 8.878411293029785, "learning_rate": 1.2178893372558255e-05, "loss": 0.8125, "step": 13639 }, { "epoch": 44.721311475409834, "grad_norm": 6.056924819946289, "learning_rate": 1.2177856986734991e-05, "loss": 0.6645, "step": 13640 }, { "epoch": 44.72459016393443, "grad_norm": 6.5341620445251465, "learning_rate": 1.2176820576354124e-05, "loss": 0.6485, "step": 13641 }, { "epoch": 44.72786885245902, "grad_norm": 6.4195356369018555, "learning_rate": 1.2175784141427342e-05, "loss": 0.6709, "step": 13642 }, { "epoch": 44.731147540983606, "grad_norm": 7.665170669555664, "learning_rate": 1.2174747681966332e-05, "loss": 0.3911, "step": 13643 }, { "epoch": 44.734426229508195, "grad_norm": 10.02538013458252, "learning_rate": 1.2173711197982781e-05, "loss": 0.6419, "step": 13644 }, { "epoch": 44.73770491803279, "grad_norm": 6.0438313484191895, "learning_rate": 1.2172674689488375e-05, "loss": 0.566, "step": 13645 }, { "epoch": 44.74098360655738, "grad_norm": 6.555770397186279, "learning_rate": 1.2171638156494803e-05, "loss": 0.538, "step": 13646 }, { "epoch": 44.74426229508197, "grad_norm": 8.851892471313477, "learning_rate": 1.2170601599013755e-05, "loss": 0.7775, "step": 13647 }, { "epoch": 44.747540983606555, "grad_norm": 22.750612258911133, "learning_rate": 1.2169565017056915e-05, "loss": 0.5291, "step": 13648 }, { "epoch": 44.75081967213115, "grad_norm": 7.566888332366943, "learning_rate": 1.2168528410635974e-05, "loss": 0.6975, "step": 13649 }, { "epoch": 44.75409836065574, "grad_norm": 6.047909736633301, "learning_rate": 1.2167491779762621e-05, "loss": 0.5691, "step": 13650 }, { "epoch": 44.75737704918033, "grad_norm": 6.634340286254883, "learning_rate": 1.2166455124448543e-05, "loss": 0.7216, "step": 13651 }, { "epoch": 44.760655737704916, "grad_norm": 6.535261154174805, "learning_rate": 1.2165418444705433e-05, "loss": 0.5057, "step": 13652 }, { "epoch": 44.76393442622951, "grad_norm": 7.890451431274414, "learning_rate": 1.2164381740544978e-05, "loss": 0.7645, "step": 13653 }, { "epoch": 44.7672131147541, "grad_norm": 5.616978168487549, "learning_rate": 1.216334501197887e-05, "loss": 0.5686, "step": 13654 }, { "epoch": 44.77049180327869, "grad_norm": 6.563719272613525, "learning_rate": 1.2162308259018797e-05, "loss": 0.4964, "step": 13655 }, { "epoch": 44.773770491803276, "grad_norm": 6.727896690368652, "learning_rate": 1.2161271481676447e-05, "loss": 0.6233, "step": 13656 }, { "epoch": 44.77704918032787, "grad_norm": 8.508773803710938, "learning_rate": 1.2160234679963517e-05, "loss": 0.887, "step": 13657 }, { "epoch": 44.78032786885246, "grad_norm": 7.20041561126709, "learning_rate": 1.215919785389169e-05, "loss": 0.9287, "step": 13658 }, { "epoch": 44.78360655737705, "grad_norm": 6.035815715789795, "learning_rate": 1.2158161003472669e-05, "loss": 0.6981, "step": 13659 }, { "epoch": 44.78688524590164, "grad_norm": 10.211908340454102, "learning_rate": 1.2157124128718133e-05, "loss": 0.5828, "step": 13660 }, { "epoch": 44.79016393442623, "grad_norm": 6.176314830780029, "learning_rate": 1.2156087229639787e-05, "loss": 0.6125, "step": 13661 }, { "epoch": 44.79344262295082, "grad_norm": 5.8503828048706055, "learning_rate": 1.2155050306249307e-05, "loss": 0.7989, "step": 13662 }, { "epoch": 44.79672131147541, "grad_norm": 15.685676574707031, "learning_rate": 1.2154013358558401e-05, "loss": 0.7525, "step": 13663 }, { "epoch": 44.8, "grad_norm": 5.891763210296631, "learning_rate": 1.215297638657875e-05, "loss": 0.9016, "step": 13664 }, { "epoch": 44.80327868852459, "grad_norm": 6.273739814758301, "learning_rate": 1.2151939390322057e-05, "loss": 0.9832, "step": 13665 }, { "epoch": 44.80655737704918, "grad_norm": 5.8835248947143555, "learning_rate": 1.2150902369800004e-05, "loss": 0.6755, "step": 13666 }, { "epoch": 44.80983606557377, "grad_norm": 5.993711948394775, "learning_rate": 1.2149865325024295e-05, "loss": 0.627, "step": 13667 }, { "epoch": 44.81311475409836, "grad_norm": 7.156312465667725, "learning_rate": 1.2148828256006616e-05, "loss": 0.7255, "step": 13668 }, { "epoch": 44.81639344262295, "grad_norm": 6.854434013366699, "learning_rate": 1.2147791162758664e-05, "loss": 0.7454, "step": 13669 }, { "epoch": 44.81967213114754, "grad_norm": 6.32757043838501, "learning_rate": 1.2146754045292135e-05, "loss": 0.5163, "step": 13670 }, { "epoch": 44.82295081967213, "grad_norm": 7.6032538414001465, "learning_rate": 1.214571690361872e-05, "loss": 0.7469, "step": 13671 }, { "epoch": 44.82622950819672, "grad_norm": 7.732081413269043, "learning_rate": 1.214467973775012e-05, "loss": 0.5309, "step": 13672 }, { "epoch": 44.829508196721314, "grad_norm": 6.087118148803711, "learning_rate": 1.2143642547698022e-05, "loss": 0.709, "step": 13673 }, { "epoch": 44.8327868852459, "grad_norm": 6.566056728363037, "learning_rate": 1.2142605333474129e-05, "loss": 0.7837, "step": 13674 }, { "epoch": 44.83606557377049, "grad_norm": 12.586938858032227, "learning_rate": 1.2141568095090131e-05, "loss": 0.8254, "step": 13675 }, { "epoch": 44.83934426229508, "grad_norm": 7.932088375091553, "learning_rate": 1.2140530832557727e-05, "loss": 0.6227, "step": 13676 }, { "epoch": 44.842622950819674, "grad_norm": 6.903958797454834, "learning_rate": 1.213949354588861e-05, "loss": 1.0068, "step": 13677 }, { "epoch": 44.84590163934426, "grad_norm": 8.08675479888916, "learning_rate": 1.2138456235094483e-05, "loss": 0.7198, "step": 13678 }, { "epoch": 44.84918032786885, "grad_norm": 6.723871231079102, "learning_rate": 1.2137418900187037e-05, "loss": 0.7294, "step": 13679 }, { "epoch": 44.85245901639344, "grad_norm": 7.547519207000732, "learning_rate": 1.2136381541177969e-05, "loss": 0.6004, "step": 13680 }, { "epoch": 44.855737704918035, "grad_norm": 7.9544291496276855, "learning_rate": 1.2135344158078978e-05, "loss": 0.5801, "step": 13681 }, { "epoch": 44.85901639344262, "grad_norm": 6.947579860687256, "learning_rate": 1.2134306750901762e-05, "loss": 0.7451, "step": 13682 }, { "epoch": 44.86229508196721, "grad_norm": 5.427744388580322, "learning_rate": 1.2133269319658018e-05, "loss": 0.8664, "step": 13683 }, { "epoch": 44.86557377049181, "grad_norm": 6.78148078918457, "learning_rate": 1.2132231864359445e-05, "loss": 0.672, "step": 13684 }, { "epoch": 44.868852459016395, "grad_norm": 8.0216646194458, "learning_rate": 1.2131194385017739e-05, "loss": 0.632, "step": 13685 }, { "epoch": 44.87213114754098, "grad_norm": 7.199121952056885, "learning_rate": 1.2130156881644601e-05, "loss": 0.6498, "step": 13686 }, { "epoch": 44.87540983606557, "grad_norm": 23.880157470703125, "learning_rate": 1.2129119354251732e-05, "loss": 0.6424, "step": 13687 }, { "epoch": 44.87868852459017, "grad_norm": 7.482081890106201, "learning_rate": 1.2128081802850826e-05, "loss": 0.6641, "step": 13688 }, { "epoch": 44.881967213114756, "grad_norm": 8.99301815032959, "learning_rate": 1.2127044227453586e-05, "loss": 0.7167, "step": 13689 }, { "epoch": 44.885245901639344, "grad_norm": 7.641873359680176, "learning_rate": 1.2126006628071708e-05, "loss": 0.6565, "step": 13690 }, { "epoch": 44.88852459016393, "grad_norm": 6.999391078948975, "learning_rate": 1.21249690047169e-05, "loss": 0.727, "step": 13691 }, { "epoch": 44.89180327868853, "grad_norm": 16.682767868041992, "learning_rate": 1.2123931357400855e-05, "loss": 0.7874, "step": 13692 }, { "epoch": 44.895081967213116, "grad_norm": 6.746870517730713, "learning_rate": 1.2122893686135273e-05, "loss": 0.7901, "step": 13693 }, { "epoch": 44.898360655737704, "grad_norm": 9.50780200958252, "learning_rate": 1.2121855990931861e-05, "loss": 0.6063, "step": 13694 }, { "epoch": 44.90163934426229, "grad_norm": 9.67125415802002, "learning_rate": 1.2120818271802315e-05, "loss": 0.7934, "step": 13695 }, { "epoch": 44.90491803278689, "grad_norm": 7.677447319030762, "learning_rate": 1.2119780528758336e-05, "loss": 0.7127, "step": 13696 }, { "epoch": 44.90819672131148, "grad_norm": 10.075992584228516, "learning_rate": 1.211874276181163e-05, "loss": 0.7249, "step": 13697 }, { "epoch": 44.911475409836065, "grad_norm": 6.953293800354004, "learning_rate": 1.2117704970973893e-05, "loss": 0.6317, "step": 13698 }, { "epoch": 44.91475409836065, "grad_norm": 7.429379463195801, "learning_rate": 1.2116667156256833e-05, "loss": 0.5959, "step": 13699 }, { "epoch": 44.91803278688525, "grad_norm": 6.322168350219727, "learning_rate": 1.2115629317672154e-05, "loss": 0.6152, "step": 13700 }, { "epoch": 44.92131147540984, "grad_norm": 11.29621696472168, "learning_rate": 1.2114591455231548e-05, "loss": 0.7508, "step": 13701 }, { "epoch": 44.924590163934425, "grad_norm": 9.490548133850098, "learning_rate": 1.2113553568946728e-05, "loss": 0.7307, "step": 13702 }, { "epoch": 44.927868852459014, "grad_norm": 13.732097625732422, "learning_rate": 1.2112515658829393e-05, "loss": 0.4633, "step": 13703 }, { "epoch": 44.93114754098361, "grad_norm": 6.204080104827881, "learning_rate": 1.211147772489125e-05, "loss": 0.6393, "step": 13704 }, { "epoch": 44.9344262295082, "grad_norm": 9.684873580932617, "learning_rate": 1.2110439767143998e-05, "loss": 0.5657, "step": 13705 }, { "epoch": 44.937704918032786, "grad_norm": 10.353528022766113, "learning_rate": 1.2109401785599344e-05, "loss": 0.686, "step": 13706 }, { "epoch": 44.940983606557374, "grad_norm": 7.227951526641846, "learning_rate": 1.2108363780268992e-05, "loss": 0.6397, "step": 13707 }, { "epoch": 44.94426229508197, "grad_norm": 7.62162446975708, "learning_rate": 1.2107325751164645e-05, "loss": 0.7192, "step": 13708 }, { "epoch": 44.94754098360656, "grad_norm": 6.016881465911865, "learning_rate": 1.210628769829801e-05, "loss": 0.6615, "step": 13709 }, { "epoch": 44.950819672131146, "grad_norm": 6.309826374053955, "learning_rate": 1.210524962168079e-05, "loss": 0.8946, "step": 13710 }, { "epoch": 44.954098360655735, "grad_norm": 8.551984786987305, "learning_rate": 1.2104211521324693e-05, "loss": 0.9071, "step": 13711 }, { "epoch": 44.95737704918033, "grad_norm": 7.022805213928223, "learning_rate": 1.2103173397241421e-05, "loss": 0.6765, "step": 13712 }, { "epoch": 44.96065573770492, "grad_norm": 8.229565620422363, "learning_rate": 1.2102135249442687e-05, "loss": 0.6969, "step": 13713 }, { "epoch": 44.96393442622951, "grad_norm": 9.439021110534668, "learning_rate": 1.2101097077940187e-05, "loss": 0.6174, "step": 13714 }, { "epoch": 44.967213114754095, "grad_norm": 7.087189674377441, "learning_rate": 1.2100058882745635e-05, "loss": 0.5693, "step": 13715 }, { "epoch": 44.97049180327869, "grad_norm": 7.2653422355651855, "learning_rate": 1.2099020663870734e-05, "loss": 0.6877, "step": 13716 }, { "epoch": 44.97377049180328, "grad_norm": 6.818160057067871, "learning_rate": 1.2097982421327198e-05, "loss": 0.7253, "step": 13717 }, { "epoch": 44.97704918032787, "grad_norm": 8.4537992477417, "learning_rate": 1.2096944155126727e-05, "loss": 0.4617, "step": 13718 }, { "epoch": 44.980327868852456, "grad_norm": 6.694338321685791, "learning_rate": 1.2095905865281026e-05, "loss": 0.8221, "step": 13719 }, { "epoch": 44.98360655737705, "grad_norm": 7.30985689163208, "learning_rate": 1.2094867551801812e-05, "loss": 0.6501, "step": 13720 }, { "epoch": 44.98688524590164, "grad_norm": 7.252175807952881, "learning_rate": 1.2093829214700782e-05, "loss": 0.7567, "step": 13721 }, { "epoch": 44.99016393442623, "grad_norm": 12.205493927001953, "learning_rate": 1.2092790853989656e-05, "loss": 0.6945, "step": 13722 }, { "epoch": 44.993442622950816, "grad_norm": 6.53126859664917, "learning_rate": 1.2091752469680136e-05, "loss": 0.9243, "step": 13723 }, { "epoch": 44.99672131147541, "grad_norm": 8.828011512756348, "learning_rate": 1.2090714061783931e-05, "loss": 0.9991, "step": 13724 }, { "epoch": 45.0, "grad_norm": 7.643192291259766, "learning_rate": 1.2089675630312755e-05, "loss": 0.4173, "step": 13725 }, { "epoch": 45.00327868852459, "grad_norm": 7.222321033477783, "learning_rate": 1.2088637175278308e-05, "loss": 0.8662, "step": 13726 }, { "epoch": 45.006557377049184, "grad_norm": 49.692596435546875, "learning_rate": 1.2087598696692306e-05, "loss": 0.7109, "step": 13727 }, { "epoch": 45.00983606557377, "grad_norm": 6.740454196929932, "learning_rate": 1.208656019456646e-05, "loss": 0.5635, "step": 13728 }, { "epoch": 45.01311475409836, "grad_norm": 7.840219497680664, "learning_rate": 1.2085521668912476e-05, "loss": 0.4688, "step": 13729 }, { "epoch": 45.01639344262295, "grad_norm": 11.189818382263184, "learning_rate": 1.2084483119742072e-05, "loss": 0.7943, "step": 13730 }, { "epoch": 45.019672131147544, "grad_norm": 6.918399333953857, "learning_rate": 1.2083444547066951e-05, "loss": 0.6437, "step": 13731 }, { "epoch": 45.02295081967213, "grad_norm": 5.808445453643799, "learning_rate": 1.2082405950898826e-05, "loss": 0.9238, "step": 13732 }, { "epoch": 45.02622950819672, "grad_norm": 6.144369125366211, "learning_rate": 1.2081367331249409e-05, "loss": 0.7802, "step": 13733 }, { "epoch": 45.02950819672131, "grad_norm": 8.653483390808105, "learning_rate": 1.208032868813041e-05, "loss": 0.6421, "step": 13734 }, { "epoch": 45.032786885245905, "grad_norm": 7.82164192199707, "learning_rate": 1.2079290021553542e-05, "loss": 0.4935, "step": 13735 }, { "epoch": 45.03606557377049, "grad_norm": 6.716062545776367, "learning_rate": 1.207825133153052e-05, "loss": 0.5065, "step": 13736 }, { "epoch": 45.03934426229508, "grad_norm": 7.3249101638793945, "learning_rate": 1.2077212618073052e-05, "loss": 0.4894, "step": 13737 }, { "epoch": 45.04262295081967, "grad_norm": 7.01027250289917, "learning_rate": 1.207617388119285e-05, "loss": 1.1551, "step": 13738 }, { "epoch": 45.045901639344265, "grad_norm": 16.833206176757812, "learning_rate": 1.207513512090163e-05, "loss": 0.6965, "step": 13739 }, { "epoch": 45.049180327868854, "grad_norm": 10.48754596710205, "learning_rate": 1.2074096337211103e-05, "loss": 0.8226, "step": 13740 }, { "epoch": 45.05245901639344, "grad_norm": 6.236688613891602, "learning_rate": 1.2073057530132985e-05, "loss": 0.4259, "step": 13741 }, { "epoch": 45.05573770491803, "grad_norm": 8.47504711151123, "learning_rate": 1.2072018699678987e-05, "loss": 0.6387, "step": 13742 }, { "epoch": 45.059016393442626, "grad_norm": 8.338211059570312, "learning_rate": 1.2070979845860826e-05, "loss": 0.7326, "step": 13743 }, { "epoch": 45.062295081967214, "grad_norm": 6.697504043579102, "learning_rate": 1.2069940968690213e-05, "loss": 0.6393, "step": 13744 }, { "epoch": 45.0655737704918, "grad_norm": 7.353867530822754, "learning_rate": 1.206890206817886e-05, "loss": 0.6684, "step": 13745 }, { "epoch": 45.06885245901639, "grad_norm": 7.141124248504639, "learning_rate": 1.2067863144338489e-05, "loss": 0.5173, "step": 13746 }, { "epoch": 45.072131147540986, "grad_norm": 6.868338108062744, "learning_rate": 1.2066824197180808e-05, "loss": 0.6821, "step": 13747 }, { "epoch": 45.075409836065575, "grad_norm": 7.109879970550537, "learning_rate": 1.206578522671754e-05, "loss": 0.6966, "step": 13748 }, { "epoch": 45.07868852459016, "grad_norm": 6.717137813568115, "learning_rate": 1.2064746232960392e-05, "loss": 0.7959, "step": 13749 }, { "epoch": 45.08196721311475, "grad_norm": 8.511841773986816, "learning_rate": 1.206370721592108e-05, "loss": 0.5432, "step": 13750 }, { "epoch": 45.08524590163935, "grad_norm": 8.236675262451172, "learning_rate": 1.2062668175611329e-05, "loss": 0.4993, "step": 13751 }, { "epoch": 45.088524590163935, "grad_norm": 8.084646224975586, "learning_rate": 1.2061629112042848e-05, "loss": 0.5406, "step": 13752 }, { "epoch": 45.09180327868852, "grad_norm": 6.289858341217041, "learning_rate": 1.2060590025227354e-05, "loss": 0.8799, "step": 13753 }, { "epoch": 45.09508196721311, "grad_norm": 6.62746000289917, "learning_rate": 1.2059550915176567e-05, "loss": 0.5767, "step": 13754 }, { "epoch": 45.09836065573771, "grad_norm": 8.877226829528809, "learning_rate": 1.20585117819022e-05, "loss": 0.4692, "step": 13755 }, { "epoch": 45.101639344262296, "grad_norm": 8.98908519744873, "learning_rate": 1.2057472625415976e-05, "loss": 0.611, "step": 13756 }, { "epoch": 45.104918032786884, "grad_norm": 7.5157270431518555, "learning_rate": 1.2056433445729607e-05, "loss": 0.6868, "step": 13757 }, { "epoch": 45.10819672131147, "grad_norm": 7.8672776222229, "learning_rate": 1.205539424285481e-05, "loss": 0.7221, "step": 13758 }, { "epoch": 45.11147540983607, "grad_norm": 7.694485187530518, "learning_rate": 1.205435501680331e-05, "loss": 0.8304, "step": 13759 }, { "epoch": 45.114754098360656, "grad_norm": 7.263636589050293, "learning_rate": 1.2053315767586818e-05, "loss": 0.4418, "step": 13760 }, { "epoch": 45.118032786885244, "grad_norm": 7.98360538482666, "learning_rate": 1.2052276495217056e-05, "loss": 0.7338, "step": 13761 }, { "epoch": 45.12131147540983, "grad_norm": 13.444664001464844, "learning_rate": 1.2051237199705743e-05, "loss": 0.7014, "step": 13762 }, { "epoch": 45.12459016393443, "grad_norm": 7.043367385864258, "learning_rate": 1.20501978810646e-05, "loss": 0.8263, "step": 13763 }, { "epoch": 45.12786885245902, "grad_norm": 8.069273948669434, "learning_rate": 1.2049158539305344e-05, "loss": 0.6626, "step": 13764 }, { "epoch": 45.131147540983605, "grad_norm": 5.714592933654785, "learning_rate": 1.2048119174439694e-05, "loss": 0.6414, "step": 13765 }, { "epoch": 45.13442622950819, "grad_norm": 14.123921394348145, "learning_rate": 1.204707978647937e-05, "loss": 0.4626, "step": 13766 }, { "epoch": 45.13770491803279, "grad_norm": 7.507538318634033, "learning_rate": 1.2046040375436094e-05, "loss": 0.7216, "step": 13767 }, { "epoch": 45.14098360655738, "grad_norm": 8.662848472595215, "learning_rate": 1.2045000941321583e-05, "loss": 0.7028, "step": 13768 }, { "epoch": 45.144262295081965, "grad_norm": 6.1361494064331055, "learning_rate": 1.2043961484147563e-05, "loss": 0.6664, "step": 13769 }, { "epoch": 45.14754098360656, "grad_norm": 8.535415649414062, "learning_rate": 1.2042922003925752e-05, "loss": 0.4566, "step": 13770 }, { "epoch": 45.15081967213115, "grad_norm": 6.381206035614014, "learning_rate": 1.204188250066787e-05, "loss": 0.6885, "step": 13771 }, { "epoch": 45.15409836065574, "grad_norm": 7.504036903381348, "learning_rate": 1.204084297438564e-05, "loss": 0.5878, "step": 13772 }, { "epoch": 45.157377049180326, "grad_norm": 8.53562068939209, "learning_rate": 1.2039803425090784e-05, "loss": 0.7578, "step": 13773 }, { "epoch": 45.16065573770492, "grad_norm": 9.042196273803711, "learning_rate": 1.2038763852795024e-05, "loss": 0.9942, "step": 13774 }, { "epoch": 45.16393442622951, "grad_norm": 6.662565231323242, "learning_rate": 1.203772425751008e-05, "loss": 0.7774, "step": 13775 }, { "epoch": 45.1672131147541, "grad_norm": 7.648731231689453, "learning_rate": 1.2036684639247679e-05, "loss": 0.6715, "step": 13776 }, { "epoch": 45.170491803278686, "grad_norm": 7.876788139343262, "learning_rate": 1.2035644998019537e-05, "loss": 0.7171, "step": 13777 }, { "epoch": 45.17377049180328, "grad_norm": 6.870977878570557, "learning_rate": 1.203460533383738e-05, "loss": 0.6043, "step": 13778 }, { "epoch": 45.17704918032787, "grad_norm": 16.821542739868164, "learning_rate": 1.2033565646712939e-05, "loss": 0.6362, "step": 13779 }, { "epoch": 45.18032786885246, "grad_norm": 6.568994998931885, "learning_rate": 1.2032525936657926e-05, "loss": 0.5118, "step": 13780 }, { "epoch": 45.18360655737705, "grad_norm": 7.670189380645752, "learning_rate": 1.2031486203684071e-05, "loss": 0.7191, "step": 13781 }, { "epoch": 45.18688524590164, "grad_norm": 5.585615158081055, "learning_rate": 1.2030446447803099e-05, "loss": 0.5797, "step": 13782 }, { "epoch": 45.19016393442623, "grad_norm": 10.129386901855469, "learning_rate": 1.202940666902673e-05, "loss": 0.7357, "step": 13783 }, { "epoch": 45.19344262295082, "grad_norm": 6.852324485778809, "learning_rate": 1.2028366867366692e-05, "loss": 0.351, "step": 13784 }, { "epoch": 45.19672131147541, "grad_norm": 6.620956897735596, "learning_rate": 1.2027327042834708e-05, "loss": 0.4102, "step": 13785 }, { "epoch": 45.2, "grad_norm": 16.338848114013672, "learning_rate": 1.2026287195442503e-05, "loss": 0.6329, "step": 13786 }, { "epoch": 45.20327868852459, "grad_norm": 8.14570140838623, "learning_rate": 1.2025247325201805e-05, "loss": 0.3304, "step": 13787 }, { "epoch": 45.20655737704918, "grad_norm": 8.743741989135742, "learning_rate": 1.2024207432124336e-05, "loss": 0.7422, "step": 13788 }, { "epoch": 45.20983606557377, "grad_norm": 5.578296661376953, "learning_rate": 1.2023167516221823e-05, "loss": 0.8436, "step": 13789 }, { "epoch": 45.21311475409836, "grad_norm": 9.419571876525879, "learning_rate": 1.2022127577505993e-05, "loss": 0.4605, "step": 13790 }, { "epoch": 45.21639344262295, "grad_norm": 8.127289772033691, "learning_rate": 1.2021087615988573e-05, "loss": 0.7315, "step": 13791 }, { "epoch": 45.21967213114754, "grad_norm": 6.162898063659668, "learning_rate": 1.2020047631681289e-05, "loss": 0.6987, "step": 13792 }, { "epoch": 45.22295081967213, "grad_norm": 9.642962455749512, "learning_rate": 1.2019007624595867e-05, "loss": 0.85, "step": 13793 }, { "epoch": 45.226229508196724, "grad_norm": 5.403510570526123, "learning_rate": 1.2017967594744034e-05, "loss": 0.8387, "step": 13794 }, { "epoch": 45.22950819672131, "grad_norm": 21.965482711791992, "learning_rate": 1.201692754213752e-05, "loss": 0.628, "step": 13795 }, { "epoch": 45.2327868852459, "grad_norm": 9.65262508392334, "learning_rate": 1.201588746678805e-05, "loss": 0.5634, "step": 13796 }, { "epoch": 45.23606557377049, "grad_norm": 10.31676197052002, "learning_rate": 1.2014847368707353e-05, "loss": 0.6337, "step": 13797 }, { "epoch": 45.239344262295084, "grad_norm": 5.726150989532471, "learning_rate": 1.2013807247907156e-05, "loss": 0.7769, "step": 13798 }, { "epoch": 45.24262295081967, "grad_norm": 12.742862701416016, "learning_rate": 1.201276710439919e-05, "loss": 0.7243, "step": 13799 }, { "epoch": 45.24590163934426, "grad_norm": 5.753144264221191, "learning_rate": 1.201172693819518e-05, "loss": 0.55, "step": 13800 }, { "epoch": 45.24918032786885, "grad_norm": 6.612165927886963, "learning_rate": 1.201068674930686e-05, "loss": 0.657, "step": 13801 }, { "epoch": 45.252459016393445, "grad_norm": 7.632359027862549, "learning_rate": 1.2009646537745953e-05, "loss": 0.8138, "step": 13802 }, { "epoch": 45.25573770491803, "grad_norm": 7.21408748626709, "learning_rate": 1.2008606303524192e-05, "loss": 0.7072, "step": 13803 }, { "epoch": 45.25901639344262, "grad_norm": 5.729049205780029, "learning_rate": 1.200756604665331e-05, "loss": 0.5857, "step": 13804 }, { "epoch": 45.26229508196721, "grad_norm": 9.850296974182129, "learning_rate": 1.2006525767145033e-05, "loss": 0.8415, "step": 13805 }, { "epoch": 45.265573770491805, "grad_norm": 8.982263565063477, "learning_rate": 1.2005485465011089e-05, "loss": 0.6644, "step": 13806 }, { "epoch": 45.268852459016394, "grad_norm": 6.857630252838135, "learning_rate": 1.2004445140263213e-05, "loss": 0.8687, "step": 13807 }, { "epoch": 45.27213114754098, "grad_norm": 6.635631561279297, "learning_rate": 1.2003404792913131e-05, "loss": 0.9443, "step": 13808 }, { "epoch": 45.27540983606557, "grad_norm": 5.789867877960205, "learning_rate": 1.200236442297258e-05, "loss": 0.8307, "step": 13809 }, { "epoch": 45.278688524590166, "grad_norm": 8.690018653869629, "learning_rate": 1.2001324030453288e-05, "loss": 0.6319, "step": 13810 }, { "epoch": 45.281967213114754, "grad_norm": 8.544903755187988, "learning_rate": 1.2000283615366986e-05, "loss": 0.6873, "step": 13811 }, { "epoch": 45.28524590163934, "grad_norm": 6.802201747894287, "learning_rate": 1.1999243177725406e-05, "loss": 0.6065, "step": 13812 }, { "epoch": 45.28852459016394, "grad_norm": 7.623818397521973, "learning_rate": 1.1998202717540282e-05, "loss": 0.7182, "step": 13813 }, { "epoch": 45.291803278688526, "grad_norm": 9.127097129821777, "learning_rate": 1.1997162234823342e-05, "loss": 0.4205, "step": 13814 }, { "epoch": 45.295081967213115, "grad_norm": 7.1416425704956055, "learning_rate": 1.1996121729586324e-05, "loss": 0.6144, "step": 13815 }, { "epoch": 45.2983606557377, "grad_norm": 7.733702182769775, "learning_rate": 1.1995081201840958e-05, "loss": 0.5648, "step": 13816 }, { "epoch": 45.3016393442623, "grad_norm": 17.584287643432617, "learning_rate": 1.1994040651598977e-05, "loss": 0.6511, "step": 13817 }, { "epoch": 45.30491803278689, "grad_norm": 7.200998783111572, "learning_rate": 1.1993000078872112e-05, "loss": 0.7204, "step": 13818 }, { "epoch": 45.308196721311475, "grad_norm": 8.027229309082031, "learning_rate": 1.1991959483672102e-05, "loss": 0.5447, "step": 13819 }, { "epoch": 45.31147540983606, "grad_norm": 6.886612892150879, "learning_rate": 1.1990918866010675e-05, "loss": 0.6411, "step": 13820 }, { "epoch": 45.31475409836066, "grad_norm": 8.258451461791992, "learning_rate": 1.1989878225899569e-05, "loss": 0.6374, "step": 13821 }, { "epoch": 45.31803278688525, "grad_norm": 7.183422088623047, "learning_rate": 1.198883756335052e-05, "loss": 0.6794, "step": 13822 }, { "epoch": 45.321311475409836, "grad_norm": 7.911472797393799, "learning_rate": 1.1987796878375253e-05, "loss": 0.739, "step": 13823 }, { "epoch": 45.324590163934424, "grad_norm": 6.27009916305542, "learning_rate": 1.1986756170985516e-05, "loss": 0.9108, "step": 13824 }, { "epoch": 45.32786885245902, "grad_norm": 5.025712490081787, "learning_rate": 1.1985715441193034e-05, "loss": 0.5004, "step": 13825 }, { "epoch": 45.33114754098361, "grad_norm": 7.316226005554199, "learning_rate": 1.1984674689009545e-05, "loss": 0.7562, "step": 13826 }, { "epoch": 45.334426229508196, "grad_norm": 5.037535667419434, "learning_rate": 1.1983633914446787e-05, "loss": 0.554, "step": 13827 }, { "epoch": 45.337704918032784, "grad_norm": 8.205689430236816, "learning_rate": 1.1982593117516494e-05, "loss": 0.6581, "step": 13828 }, { "epoch": 45.34098360655738, "grad_norm": 9.145892143249512, "learning_rate": 1.1981552298230402e-05, "loss": 0.6786, "step": 13829 }, { "epoch": 45.34426229508197, "grad_norm": 7.555292129516602, "learning_rate": 1.1980511456600247e-05, "loss": 0.4211, "step": 13830 }, { "epoch": 45.34754098360656, "grad_norm": 11.092327117919922, "learning_rate": 1.1979470592637768e-05, "loss": 0.7816, "step": 13831 }, { "epoch": 45.350819672131145, "grad_norm": 6.041897773742676, "learning_rate": 1.1978429706354695e-05, "loss": 0.6965, "step": 13832 }, { "epoch": 45.35409836065574, "grad_norm": 8.526664733886719, "learning_rate": 1.1977388797762774e-05, "loss": 0.6316, "step": 13833 }, { "epoch": 45.35737704918033, "grad_norm": 6.243255138397217, "learning_rate": 1.1976347866873738e-05, "loss": 0.7846, "step": 13834 }, { "epoch": 45.36065573770492, "grad_norm": 7.808637619018555, "learning_rate": 1.1975306913699328e-05, "loss": 0.7423, "step": 13835 }, { "epoch": 45.363934426229505, "grad_norm": 8.052072525024414, "learning_rate": 1.1974265938251273e-05, "loss": 0.647, "step": 13836 }, { "epoch": 45.3672131147541, "grad_norm": 6.436969757080078, "learning_rate": 1.1973224940541322e-05, "loss": 0.6026, "step": 13837 }, { "epoch": 45.37049180327869, "grad_norm": 7.940410137176514, "learning_rate": 1.1972183920581201e-05, "loss": 0.6112, "step": 13838 }, { "epoch": 45.37377049180328, "grad_norm": 8.695940017700195, "learning_rate": 1.1971142878382663e-05, "loss": 0.7425, "step": 13839 }, { "epoch": 45.377049180327866, "grad_norm": 8.934134483337402, "learning_rate": 1.1970101813957436e-05, "loss": 0.5929, "step": 13840 }, { "epoch": 45.38032786885246, "grad_norm": 8.308547973632812, "learning_rate": 1.1969060727317265e-05, "loss": 0.63, "step": 13841 }, { "epoch": 45.38360655737705, "grad_norm": 6.3330769538879395, "learning_rate": 1.1968019618473883e-05, "loss": 0.6478, "step": 13842 }, { "epoch": 45.38688524590164, "grad_norm": 7.395103931427002, "learning_rate": 1.1966978487439039e-05, "loss": 0.7179, "step": 13843 }, { "epoch": 45.390163934426226, "grad_norm": 6.505764007568359, "learning_rate": 1.1965937334224462e-05, "loss": 0.7119, "step": 13844 }, { "epoch": 45.39344262295082, "grad_norm": 7.006620407104492, "learning_rate": 1.1964896158841902e-05, "loss": 0.6784, "step": 13845 }, { "epoch": 45.39672131147541, "grad_norm": 7.630232810974121, "learning_rate": 1.1963854961303092e-05, "loss": 0.7611, "step": 13846 }, { "epoch": 45.4, "grad_norm": 6.419206142425537, "learning_rate": 1.1962813741619777e-05, "loss": 0.9337, "step": 13847 }, { "epoch": 45.40327868852459, "grad_norm": 7.499058246612549, "learning_rate": 1.1961772499803696e-05, "loss": 0.8347, "step": 13848 }, { "epoch": 45.40655737704918, "grad_norm": 7.477614402770996, "learning_rate": 1.1960731235866589e-05, "loss": 0.6716, "step": 13849 }, { "epoch": 45.40983606557377, "grad_norm": 6.273382663726807, "learning_rate": 1.19596899498202e-05, "loss": 0.5886, "step": 13850 }, { "epoch": 45.41311475409836, "grad_norm": 7.7541961669921875, "learning_rate": 1.1958648641676268e-05, "loss": 0.802, "step": 13851 }, { "epoch": 45.41639344262295, "grad_norm": 7.348505973815918, "learning_rate": 1.1957607311446537e-05, "loss": 0.7158, "step": 13852 }, { "epoch": 45.41967213114754, "grad_norm": 6.897982597351074, "learning_rate": 1.1956565959142748e-05, "loss": 1.0624, "step": 13853 }, { "epoch": 45.42295081967213, "grad_norm": 6.817715167999268, "learning_rate": 1.1955524584776646e-05, "loss": 0.7379, "step": 13854 }, { "epoch": 45.42622950819672, "grad_norm": 7.276891231536865, "learning_rate": 1.1954483188359967e-05, "loss": 0.3716, "step": 13855 }, { "epoch": 45.429508196721315, "grad_norm": 5.925295352935791, "learning_rate": 1.1953441769904458e-05, "loss": 0.8429, "step": 13856 }, { "epoch": 45.4327868852459, "grad_norm": 6.676523685455322, "learning_rate": 1.1952400329421865e-05, "loss": 0.6199, "step": 13857 }, { "epoch": 45.43606557377049, "grad_norm": 8.048027992248535, "learning_rate": 1.1951358866923926e-05, "loss": 0.5782, "step": 13858 }, { "epoch": 45.43934426229508, "grad_norm": 8.546664237976074, "learning_rate": 1.1950317382422387e-05, "loss": 0.3994, "step": 13859 }, { "epoch": 45.442622950819676, "grad_norm": 6.992175579071045, "learning_rate": 1.1949275875928991e-05, "loss": 0.5729, "step": 13860 }, { "epoch": 45.445901639344264, "grad_norm": 5.504490375518799, "learning_rate": 1.1948234347455485e-05, "loss": 0.7834, "step": 13861 }, { "epoch": 45.44918032786885, "grad_norm": 6.336127281188965, "learning_rate": 1.194719279701361e-05, "loss": 0.7369, "step": 13862 }, { "epoch": 45.45245901639344, "grad_norm": 5.990421772003174, "learning_rate": 1.1946151224615113e-05, "loss": 0.5734, "step": 13863 }, { "epoch": 45.455737704918036, "grad_norm": 5.887300491333008, "learning_rate": 1.1945109630271734e-05, "loss": 0.9623, "step": 13864 }, { "epoch": 45.459016393442624, "grad_norm": 5.990382671356201, "learning_rate": 1.1944068013995224e-05, "loss": 0.7104, "step": 13865 }, { "epoch": 45.46229508196721, "grad_norm": 5.643084526062012, "learning_rate": 1.1943026375797326e-05, "loss": 0.6355, "step": 13866 }, { "epoch": 45.4655737704918, "grad_norm": 7.809845924377441, "learning_rate": 1.1941984715689785e-05, "loss": 0.6682, "step": 13867 }, { "epoch": 45.4688524590164, "grad_norm": 6.212427139282227, "learning_rate": 1.1940943033684348e-05, "loss": 0.4833, "step": 13868 }, { "epoch": 45.472131147540985, "grad_norm": 8.603339195251465, "learning_rate": 1.1939901329792757e-05, "loss": 0.6467, "step": 13869 }, { "epoch": 45.47540983606557, "grad_norm": 7.446549415588379, "learning_rate": 1.1938859604026765e-05, "loss": 0.7948, "step": 13870 }, { "epoch": 45.47868852459016, "grad_norm": 5.1750617027282715, "learning_rate": 1.1937817856398115e-05, "loss": 0.9313, "step": 13871 }, { "epoch": 45.48196721311476, "grad_norm": 8.712133407592773, "learning_rate": 1.1936776086918552e-05, "loss": 0.6368, "step": 13872 }, { "epoch": 45.485245901639345, "grad_norm": 5.576364040374756, "learning_rate": 1.1935734295599826e-05, "loss": 0.5257, "step": 13873 }, { "epoch": 45.488524590163934, "grad_norm": 6.575456619262695, "learning_rate": 1.1934692482453683e-05, "loss": 0.5414, "step": 13874 }, { "epoch": 45.49180327868852, "grad_norm": 8.210951805114746, "learning_rate": 1.193365064749187e-05, "loss": 0.5248, "step": 13875 }, { "epoch": 45.49508196721312, "grad_norm": 5.842141151428223, "learning_rate": 1.1932608790726137e-05, "loss": 0.4976, "step": 13876 }, { "epoch": 45.498360655737706, "grad_norm": 12.695504188537598, "learning_rate": 1.1931566912168229e-05, "loss": 0.5295, "step": 13877 }, { "epoch": 45.501639344262294, "grad_norm": 8.65814208984375, "learning_rate": 1.1930525011829896e-05, "loss": 0.946, "step": 13878 }, { "epoch": 45.50491803278688, "grad_norm": 7.874116897583008, "learning_rate": 1.1929483089722887e-05, "loss": 0.7626, "step": 13879 }, { "epoch": 45.50819672131148, "grad_norm": 7.068567276000977, "learning_rate": 1.192844114585895e-05, "loss": 0.6883, "step": 13880 }, { "epoch": 45.511475409836066, "grad_norm": 69.05923461914062, "learning_rate": 1.1927399180249832e-05, "loss": 0.4576, "step": 13881 }, { "epoch": 45.514754098360655, "grad_norm": 6.199620723724365, "learning_rate": 1.1926357192907285e-05, "loss": 0.6671, "step": 13882 }, { "epoch": 45.51803278688524, "grad_norm": 7.253248691558838, "learning_rate": 1.1925315183843058e-05, "loss": 0.7924, "step": 13883 }, { "epoch": 45.52131147540984, "grad_norm": 7.263664245605469, "learning_rate": 1.1924273153068901e-05, "loss": 0.7348, "step": 13884 }, { "epoch": 45.52459016393443, "grad_norm": 7.061496257781982, "learning_rate": 1.1923231100596565e-05, "loss": 0.8231, "step": 13885 }, { "epoch": 45.527868852459015, "grad_norm": 5.668055057525635, "learning_rate": 1.1922189026437793e-05, "loss": 0.5842, "step": 13886 }, { "epoch": 45.5311475409836, "grad_norm": 7.661664009094238, "learning_rate": 1.1921146930604348e-05, "loss": 0.7494, "step": 13887 }, { "epoch": 45.5344262295082, "grad_norm": 7.404150009155273, "learning_rate": 1.1920104813107969e-05, "loss": 0.4501, "step": 13888 }, { "epoch": 45.53770491803279, "grad_norm": 13.312908172607422, "learning_rate": 1.1919062673960415e-05, "loss": 0.974, "step": 13889 }, { "epoch": 45.540983606557376, "grad_norm": 7.3360514640808105, "learning_rate": 1.1918020513173432e-05, "loss": 0.6069, "step": 13890 }, { "epoch": 45.544262295081964, "grad_norm": 12.301837921142578, "learning_rate": 1.1916978330758774e-05, "loss": 0.6382, "step": 13891 }, { "epoch": 45.54754098360656, "grad_norm": 6.788354396820068, "learning_rate": 1.1915936126728193e-05, "loss": 0.904, "step": 13892 }, { "epoch": 45.55081967213115, "grad_norm": 9.481228828430176, "learning_rate": 1.191489390109344e-05, "loss": 0.7623, "step": 13893 }, { "epoch": 45.554098360655736, "grad_norm": 14.68188762664795, "learning_rate": 1.1913851653866266e-05, "loss": 0.9644, "step": 13894 }, { "epoch": 45.557377049180324, "grad_norm": 6.3351521492004395, "learning_rate": 1.1912809385058425e-05, "loss": 0.7436, "step": 13895 }, { "epoch": 45.56065573770492, "grad_norm": 8.532129287719727, "learning_rate": 1.191176709468167e-05, "loss": 0.6056, "step": 13896 }, { "epoch": 45.56393442622951, "grad_norm": 11.375141143798828, "learning_rate": 1.1910724782747756e-05, "loss": 0.6475, "step": 13897 }, { "epoch": 45.5672131147541, "grad_norm": 44.13644790649414, "learning_rate": 1.1909682449268431e-05, "loss": 0.5439, "step": 13898 }, { "epoch": 45.570491803278685, "grad_norm": 6.1307220458984375, "learning_rate": 1.1908640094255449e-05, "loss": 0.7083, "step": 13899 }, { "epoch": 45.57377049180328, "grad_norm": 9.92250919342041, "learning_rate": 1.1907597717720567e-05, "loss": 0.7764, "step": 13900 }, { "epoch": 45.57704918032787, "grad_norm": 10.945440292358398, "learning_rate": 1.1906555319675537e-05, "loss": 0.687, "step": 13901 }, { "epoch": 45.58032786885246, "grad_norm": 7.8970136642456055, "learning_rate": 1.1905512900132114e-05, "loss": 0.6836, "step": 13902 }, { "epoch": 45.58360655737705, "grad_norm": 9.154990196228027, "learning_rate": 1.1904470459102051e-05, "loss": 0.7205, "step": 13903 }, { "epoch": 45.58688524590164, "grad_norm": 7.9629693031311035, "learning_rate": 1.1903427996597106e-05, "loss": 0.5129, "step": 13904 }, { "epoch": 45.59016393442623, "grad_norm": 6.804569721221924, "learning_rate": 1.1902385512629026e-05, "loss": 0.5627, "step": 13905 }, { "epoch": 45.59344262295082, "grad_norm": 9.20070743560791, "learning_rate": 1.1901343007209575e-05, "loss": 0.6444, "step": 13906 }, { "epoch": 45.59672131147541, "grad_norm": 6.936881065368652, "learning_rate": 1.1900300480350504e-05, "loss": 0.5365, "step": 13907 }, { "epoch": 45.6, "grad_norm": 7.224920272827148, "learning_rate": 1.189925793206357e-05, "loss": 0.6918, "step": 13908 }, { "epoch": 45.60327868852459, "grad_norm": 7.041031837463379, "learning_rate": 1.1898215362360527e-05, "loss": 0.7775, "step": 13909 }, { "epoch": 45.60655737704918, "grad_norm": 16.609580993652344, "learning_rate": 1.1897172771253133e-05, "loss": 0.7617, "step": 13910 }, { "epoch": 45.609836065573774, "grad_norm": 8.643889427185059, "learning_rate": 1.1896130158753143e-05, "loss": 0.5876, "step": 13911 }, { "epoch": 45.61311475409836, "grad_norm": 8.297006607055664, "learning_rate": 1.1895087524872316e-05, "loss": 0.9075, "step": 13912 }, { "epoch": 45.61639344262295, "grad_norm": 7.611166477203369, "learning_rate": 1.1894044869622403e-05, "loss": 0.8439, "step": 13913 }, { "epoch": 45.61967213114754, "grad_norm": 12.138594627380371, "learning_rate": 1.1893002193015166e-05, "loss": 0.5383, "step": 13914 }, { "epoch": 45.622950819672134, "grad_norm": 7.523990154266357, "learning_rate": 1.1891959495062362e-05, "loss": 0.4539, "step": 13915 }, { "epoch": 45.62622950819672, "grad_norm": 5.9622721672058105, "learning_rate": 1.1890916775775747e-05, "loss": 0.6943, "step": 13916 }, { "epoch": 45.62950819672131, "grad_norm": 51.6323356628418, "learning_rate": 1.188987403516708e-05, "loss": 0.6697, "step": 13917 }, { "epoch": 45.6327868852459, "grad_norm": 9.672122955322266, "learning_rate": 1.1888831273248116e-05, "loss": 0.7892, "step": 13918 }, { "epoch": 45.636065573770495, "grad_norm": 9.77629566192627, "learning_rate": 1.1887788490030619e-05, "loss": 1.1201, "step": 13919 }, { "epoch": 45.63934426229508, "grad_norm": 10.524499893188477, "learning_rate": 1.1886745685526342e-05, "loss": 0.699, "step": 13920 }, { "epoch": 45.64262295081967, "grad_norm": 9.305091857910156, "learning_rate": 1.1885702859747047e-05, "loss": 0.8387, "step": 13921 }, { "epoch": 45.64590163934426, "grad_norm": 7.777833461761475, "learning_rate": 1.188466001270449e-05, "loss": 0.798, "step": 13922 }, { "epoch": 45.649180327868855, "grad_norm": 12.664791107177734, "learning_rate": 1.1883617144410433e-05, "loss": 0.6293, "step": 13923 }, { "epoch": 45.65245901639344, "grad_norm": 10.901954650878906, "learning_rate": 1.1882574254876633e-05, "loss": 0.6265, "step": 13924 }, { "epoch": 45.65573770491803, "grad_norm": 27.169832229614258, "learning_rate": 1.1881531344114852e-05, "loss": 0.6761, "step": 13925 }, { "epoch": 45.65901639344262, "grad_norm": 9.034186363220215, "learning_rate": 1.1880488412136848e-05, "loss": 0.6043, "step": 13926 }, { "epoch": 45.662295081967216, "grad_norm": 12.305630683898926, "learning_rate": 1.1879445458954382e-05, "loss": 0.7518, "step": 13927 }, { "epoch": 45.665573770491804, "grad_norm": 8.868887901306152, "learning_rate": 1.1878402484579214e-05, "loss": 0.5463, "step": 13928 }, { "epoch": 45.66885245901639, "grad_norm": 7.740640640258789, "learning_rate": 1.1877359489023105e-05, "loss": 0.76, "step": 13929 }, { "epoch": 45.67213114754098, "grad_norm": 8.603486061096191, "learning_rate": 1.1876316472297818e-05, "loss": 0.7657, "step": 13930 }, { "epoch": 45.675409836065576, "grad_norm": 9.897993087768555, "learning_rate": 1.1875273434415108e-05, "loss": 0.7762, "step": 13931 }, { "epoch": 45.678688524590164, "grad_norm": 6.910926342010498, "learning_rate": 1.1874230375386745e-05, "loss": 0.7585, "step": 13932 }, { "epoch": 45.68196721311475, "grad_norm": 7.480660438537598, "learning_rate": 1.1873187295224482e-05, "loss": 0.6937, "step": 13933 }, { "epoch": 45.68524590163934, "grad_norm": 8.994328498840332, "learning_rate": 1.1872144193940087e-05, "loss": 0.5852, "step": 13934 }, { "epoch": 45.68852459016394, "grad_norm": 21.22798728942871, "learning_rate": 1.1871101071545317e-05, "loss": 0.793, "step": 13935 }, { "epoch": 45.691803278688525, "grad_norm": 8.732731819152832, "learning_rate": 1.1870057928051939e-05, "loss": 0.4879, "step": 13936 }, { "epoch": 45.69508196721311, "grad_norm": 7.165849208831787, "learning_rate": 1.1869014763471713e-05, "loss": 0.6769, "step": 13937 }, { "epoch": 45.6983606557377, "grad_norm": 7.144831657409668, "learning_rate": 1.1867971577816401e-05, "loss": 0.6242, "step": 13938 }, { "epoch": 45.7016393442623, "grad_norm": 6.798626899719238, "learning_rate": 1.1866928371097768e-05, "loss": 0.8828, "step": 13939 }, { "epoch": 45.704918032786885, "grad_norm": 10.934534072875977, "learning_rate": 1.1865885143327578e-05, "loss": 0.6182, "step": 13940 }, { "epoch": 45.708196721311474, "grad_norm": 7.453314781188965, "learning_rate": 1.1864841894517591e-05, "loss": 0.857, "step": 13941 }, { "epoch": 45.71147540983607, "grad_norm": 6.2935791015625, "learning_rate": 1.1863798624679573e-05, "loss": 0.6412, "step": 13942 }, { "epoch": 45.71475409836066, "grad_norm": 6.703152179718018, "learning_rate": 1.186275533382529e-05, "loss": 0.6882, "step": 13943 }, { "epoch": 45.718032786885246, "grad_norm": 8.662792205810547, "learning_rate": 1.18617120219665e-05, "loss": 0.5514, "step": 13944 }, { "epoch": 45.721311475409834, "grad_norm": 11.191614151000977, "learning_rate": 1.1860668689114973e-05, "loss": 0.7496, "step": 13945 }, { "epoch": 45.72459016393443, "grad_norm": 6.825950622558594, "learning_rate": 1.1859625335282474e-05, "loss": 0.6615, "step": 13946 }, { "epoch": 45.72786885245902, "grad_norm": 7.08752965927124, "learning_rate": 1.1858581960480764e-05, "loss": 1.0037, "step": 13947 }, { "epoch": 45.731147540983606, "grad_norm": 6.790338516235352, "learning_rate": 1.185753856472161e-05, "loss": 0.5239, "step": 13948 }, { "epoch": 45.734426229508195, "grad_norm": 6.646660804748535, "learning_rate": 1.1856495148016779e-05, "loss": 0.7409, "step": 13949 }, { "epoch": 45.73770491803279, "grad_norm": 10.391172409057617, "learning_rate": 1.1855451710378031e-05, "loss": 0.7251, "step": 13950 }, { "epoch": 45.74098360655738, "grad_norm": 8.05413818359375, "learning_rate": 1.185440825181714e-05, "loss": 0.6758, "step": 13951 }, { "epoch": 45.74426229508197, "grad_norm": 8.207406997680664, "learning_rate": 1.1853364772345865e-05, "loss": 0.6975, "step": 13952 }, { "epoch": 45.747540983606555, "grad_norm": 6.875355243682861, "learning_rate": 1.1852321271975975e-05, "loss": 0.6266, "step": 13953 }, { "epoch": 45.75081967213115, "grad_norm": 6.316845417022705, "learning_rate": 1.185127775071924e-05, "loss": 0.5741, "step": 13954 }, { "epoch": 45.75409836065574, "grad_norm": 5.273309230804443, "learning_rate": 1.1850234208587418e-05, "loss": 0.794, "step": 13955 }, { "epoch": 45.75737704918033, "grad_norm": 6.552456855773926, "learning_rate": 1.1849190645592288e-05, "loss": 0.4523, "step": 13956 }, { "epoch": 45.760655737704916, "grad_norm": 29.085891723632812, "learning_rate": 1.1848147061745608e-05, "loss": 0.4277, "step": 13957 }, { "epoch": 45.76393442622951, "grad_norm": 7.526770114898682, "learning_rate": 1.1847103457059148e-05, "loss": 0.6512, "step": 13958 }, { "epoch": 45.7672131147541, "grad_norm": 10.198744773864746, "learning_rate": 1.1846059831544673e-05, "loss": 0.7751, "step": 13959 }, { "epoch": 45.77049180327869, "grad_norm": 9.289836883544922, "learning_rate": 1.184501618521396e-05, "loss": 0.6891, "step": 13960 }, { "epoch": 45.773770491803276, "grad_norm": 8.171814918518066, "learning_rate": 1.1843972518078767e-05, "loss": 0.742, "step": 13961 }, { "epoch": 45.77704918032787, "grad_norm": 8.041097640991211, "learning_rate": 1.1842928830150869e-05, "loss": 0.7117, "step": 13962 }, { "epoch": 45.78032786885246, "grad_norm": 5.97736930847168, "learning_rate": 1.1841885121442032e-05, "loss": 0.6785, "step": 13963 }, { "epoch": 45.78360655737705, "grad_norm": 6.414918899536133, "learning_rate": 1.1840841391964023e-05, "loss": 0.841, "step": 13964 }, { "epoch": 45.78688524590164, "grad_norm": 7.745952606201172, "learning_rate": 1.1839797641728612e-05, "loss": 0.5715, "step": 13965 }, { "epoch": 45.79016393442623, "grad_norm": 5.614185810089111, "learning_rate": 1.1838753870747575e-05, "loss": 0.4599, "step": 13966 }, { "epoch": 45.79344262295082, "grad_norm": 8.650702476501465, "learning_rate": 1.1837710079032671e-05, "loss": 0.7145, "step": 13967 }, { "epoch": 45.79672131147541, "grad_norm": 10.132913589477539, "learning_rate": 1.1836666266595678e-05, "loss": 0.864, "step": 13968 }, { "epoch": 45.8, "grad_norm": 7.525055885314941, "learning_rate": 1.1835622433448361e-05, "loss": 0.5994, "step": 13969 }, { "epoch": 45.80327868852459, "grad_norm": 6.4674787521362305, "learning_rate": 1.1834578579602496e-05, "loss": 0.61, "step": 13970 }, { "epoch": 45.80655737704918, "grad_norm": 9.502344131469727, "learning_rate": 1.1833534705069848e-05, "loss": 0.4706, "step": 13971 }, { "epoch": 45.80983606557377, "grad_norm": 8.242715835571289, "learning_rate": 1.1832490809862189e-05, "loss": 0.5513, "step": 13972 }, { "epoch": 45.81311475409836, "grad_norm": 11.757991790771484, "learning_rate": 1.1831446893991294e-05, "loss": 0.7819, "step": 13973 }, { "epoch": 45.81639344262295, "grad_norm": 7.911838054656982, "learning_rate": 1.1830402957468927e-05, "loss": 0.8199, "step": 13974 }, { "epoch": 45.81967213114754, "grad_norm": 10.01766586303711, "learning_rate": 1.1829359000306867e-05, "loss": 0.8697, "step": 13975 }, { "epoch": 45.82295081967213, "grad_norm": 7.213202953338623, "learning_rate": 1.1828315022516881e-05, "loss": 0.7765, "step": 13976 }, { "epoch": 45.82622950819672, "grad_norm": 8.020058631896973, "learning_rate": 1.182727102411074e-05, "loss": 0.6894, "step": 13977 }, { "epoch": 45.829508196721314, "grad_norm": 7.891575813293457, "learning_rate": 1.1826227005100222e-05, "loss": 0.6994, "step": 13978 }, { "epoch": 45.8327868852459, "grad_norm": 8.166693687438965, "learning_rate": 1.1825182965497092e-05, "loss": 0.8202, "step": 13979 }, { "epoch": 45.83606557377049, "grad_norm": 9.405390739440918, "learning_rate": 1.182413890531313e-05, "loss": 0.5556, "step": 13980 }, { "epoch": 45.83934426229508, "grad_norm": 7.207626819610596, "learning_rate": 1.1823094824560102e-05, "loss": 0.7843, "step": 13981 }, { "epoch": 45.842622950819674, "grad_norm": 8.635390281677246, "learning_rate": 1.1822050723249787e-05, "loss": 0.8533, "step": 13982 }, { "epoch": 45.84590163934426, "grad_norm": 9.225932121276855, "learning_rate": 1.1821006601393954e-05, "loss": 0.6984, "step": 13983 }, { "epoch": 45.84918032786885, "grad_norm": 7.951817512512207, "learning_rate": 1.1819962459004379e-05, "loss": 0.6397, "step": 13984 }, { "epoch": 45.85245901639344, "grad_norm": 7.910554885864258, "learning_rate": 1.1818918296092834e-05, "loss": 0.6464, "step": 13985 }, { "epoch": 45.855737704918035, "grad_norm": 12.765883445739746, "learning_rate": 1.1817874112671096e-05, "loss": 0.6034, "step": 13986 }, { "epoch": 45.85901639344262, "grad_norm": 15.53738021850586, "learning_rate": 1.1816829908750935e-05, "loss": 0.8049, "step": 13987 }, { "epoch": 45.86229508196721, "grad_norm": 7.214761734008789, "learning_rate": 1.181578568434413e-05, "loss": 0.5147, "step": 13988 }, { "epoch": 45.86557377049181, "grad_norm": 7.887706279754639, "learning_rate": 1.1814741439462451e-05, "loss": 0.7225, "step": 13989 }, { "epoch": 45.868852459016395, "grad_norm": 5.855155944824219, "learning_rate": 1.1813697174117675e-05, "loss": 1.0425, "step": 13990 }, { "epoch": 45.87213114754098, "grad_norm": 6.098721027374268, "learning_rate": 1.181265288832158e-05, "loss": 0.6232, "step": 13991 }, { "epoch": 45.87540983606557, "grad_norm": 6.868618011474609, "learning_rate": 1.1811608582085938e-05, "loss": 0.4922, "step": 13992 }, { "epoch": 45.87868852459017, "grad_norm": 10.016484260559082, "learning_rate": 1.1810564255422526e-05, "loss": 0.6325, "step": 13993 }, { "epoch": 45.881967213114756, "grad_norm": 6.564746856689453, "learning_rate": 1.180951990834312e-05, "loss": 0.8101, "step": 13994 }, { "epoch": 45.885245901639344, "grad_norm": 8.089183807373047, "learning_rate": 1.1808475540859492e-05, "loss": 0.5436, "step": 13995 }, { "epoch": 45.88852459016393, "grad_norm": 7.682507038116455, "learning_rate": 1.1807431152983423e-05, "loss": 0.6591, "step": 13996 }, { "epoch": 45.89180327868853, "grad_norm": 15.389555931091309, "learning_rate": 1.1806386744726693e-05, "loss": 0.7668, "step": 13997 }, { "epoch": 45.895081967213116, "grad_norm": 9.973822593688965, "learning_rate": 1.180534231610107e-05, "loss": 0.5566, "step": 13998 }, { "epoch": 45.898360655737704, "grad_norm": 7.864930152893066, "learning_rate": 1.1804297867118338e-05, "loss": 0.5305, "step": 13999 }, { "epoch": 45.90163934426229, "grad_norm": 7.186824321746826, "learning_rate": 1.180325339779027e-05, "loss": 0.613, "step": 14000 }, { "epoch": 45.90491803278689, "grad_norm": 19.687746047973633, "learning_rate": 1.1802208908128645e-05, "loss": 0.6971, "step": 14001 }, { "epoch": 45.90819672131148, "grad_norm": 8.575642585754395, "learning_rate": 1.1801164398145245e-05, "loss": 0.6564, "step": 14002 }, { "epoch": 45.911475409836065, "grad_norm": 11.298977851867676, "learning_rate": 1.1800119867851837e-05, "loss": 0.4829, "step": 14003 }, { "epoch": 45.91475409836065, "grad_norm": 8.78116226196289, "learning_rate": 1.1799075317260209e-05, "loss": 0.5473, "step": 14004 }, { "epoch": 45.91803278688525, "grad_norm": 6.787496566772461, "learning_rate": 1.1798030746382136e-05, "loss": 0.5781, "step": 14005 }, { "epoch": 45.92131147540984, "grad_norm": 12.365489959716797, "learning_rate": 1.1796986155229397e-05, "loss": 0.5877, "step": 14006 }, { "epoch": 45.924590163934425, "grad_norm": 7.055874347686768, "learning_rate": 1.1795941543813769e-05, "loss": 0.4306, "step": 14007 }, { "epoch": 45.927868852459014, "grad_norm": 6.367859840393066, "learning_rate": 1.1794896912147037e-05, "loss": 0.4146, "step": 14008 }, { "epoch": 45.93114754098361, "grad_norm": 6.6688232421875, "learning_rate": 1.179385226024097e-05, "loss": 0.5746, "step": 14009 }, { "epoch": 45.9344262295082, "grad_norm": 11.222711563110352, "learning_rate": 1.1792807588107358e-05, "loss": 0.5621, "step": 14010 }, { "epoch": 45.937704918032786, "grad_norm": 8.2923583984375, "learning_rate": 1.1791762895757974e-05, "loss": 0.7967, "step": 14011 }, { "epoch": 45.940983606557374, "grad_norm": 5.850050449371338, "learning_rate": 1.1790718183204603e-05, "loss": 0.4342, "step": 14012 }, { "epoch": 45.94426229508197, "grad_norm": 6.688544750213623, "learning_rate": 1.178967345045902e-05, "loss": 0.9786, "step": 14013 }, { "epoch": 45.94754098360656, "grad_norm": 11.639066696166992, "learning_rate": 1.1788628697533012e-05, "loss": 0.6476, "step": 14014 }, { "epoch": 45.950819672131146, "grad_norm": 8.187540054321289, "learning_rate": 1.1787583924438352e-05, "loss": 0.6418, "step": 14015 }, { "epoch": 45.954098360655735, "grad_norm": 6.658229351043701, "learning_rate": 1.1786539131186828e-05, "loss": 0.8646, "step": 14016 }, { "epoch": 45.95737704918033, "grad_norm": 11.914138793945312, "learning_rate": 1.1785494317790214e-05, "loss": 0.6931, "step": 14017 }, { "epoch": 45.96065573770492, "grad_norm": 8.642834663391113, "learning_rate": 1.1784449484260297e-05, "loss": 0.6904, "step": 14018 }, { "epoch": 45.96393442622951, "grad_norm": 6.032801151275635, "learning_rate": 1.1783404630608854e-05, "loss": 0.7914, "step": 14019 }, { "epoch": 45.967213114754095, "grad_norm": 10.918596267700195, "learning_rate": 1.1782359756847673e-05, "loss": 0.7143, "step": 14020 }, { "epoch": 45.97049180327869, "grad_norm": 7.777734279632568, "learning_rate": 1.178131486298853e-05, "loss": 0.7679, "step": 14021 }, { "epoch": 45.97377049180328, "grad_norm": 6.838663578033447, "learning_rate": 1.1780269949043212e-05, "loss": 0.713, "step": 14022 }, { "epoch": 45.97704918032787, "grad_norm": 7.111409664154053, "learning_rate": 1.1779225015023497e-05, "loss": 0.5966, "step": 14023 }, { "epoch": 45.980327868852456, "grad_norm": 8.953436851501465, "learning_rate": 1.1778180060941172e-05, "loss": 0.6529, "step": 14024 }, { "epoch": 45.98360655737705, "grad_norm": 6.444719314575195, "learning_rate": 1.1777135086808018e-05, "loss": 0.7578, "step": 14025 }, { "epoch": 45.98688524590164, "grad_norm": 5.91796350479126, "learning_rate": 1.1776090092635818e-05, "loss": 0.8892, "step": 14026 }, { "epoch": 45.99016393442623, "grad_norm": 26.08374786376953, "learning_rate": 1.1775045078436353e-05, "loss": 0.7017, "step": 14027 }, { "epoch": 45.993442622950816, "grad_norm": 9.138446807861328, "learning_rate": 1.1774000044221414e-05, "loss": 0.726, "step": 14028 }, { "epoch": 45.99672131147541, "grad_norm": 6.676384449005127, "learning_rate": 1.1772954990002776e-05, "loss": 0.7634, "step": 14029 }, { "epoch": 46.0, "grad_norm": 8.710521697998047, "learning_rate": 1.177190991579223e-05, "loss": 0.8298, "step": 14030 }, { "epoch": 46.00327868852459, "grad_norm": 6.369886875152588, "learning_rate": 1.1770864821601553e-05, "loss": 0.6808, "step": 14031 }, { "epoch": 46.006557377049184, "grad_norm": 7.693785190582275, "learning_rate": 1.1769819707442538e-05, "loss": 0.8992, "step": 14032 }, { "epoch": 46.00983606557377, "grad_norm": 9.787626266479492, "learning_rate": 1.1768774573326965e-05, "loss": 0.5055, "step": 14033 }, { "epoch": 46.01311475409836, "grad_norm": 5.918605804443359, "learning_rate": 1.1767729419266618e-05, "loss": 0.5477, "step": 14034 }, { "epoch": 46.01639344262295, "grad_norm": 7.461677074432373, "learning_rate": 1.1766684245273286e-05, "loss": 0.5974, "step": 14035 }, { "epoch": 46.019672131147544, "grad_norm": 6.1439714431762695, "learning_rate": 1.176563905135875e-05, "loss": 0.6447, "step": 14036 }, { "epoch": 46.02295081967213, "grad_norm": 6.0949249267578125, "learning_rate": 1.17645938375348e-05, "loss": 0.4097, "step": 14037 }, { "epoch": 46.02622950819672, "grad_norm": 7.29119873046875, "learning_rate": 1.1763548603813218e-05, "loss": 0.6616, "step": 14038 }, { "epoch": 46.02950819672131, "grad_norm": 5.480334758758545, "learning_rate": 1.1762503350205792e-05, "loss": 0.6492, "step": 14039 }, { "epoch": 46.032786885245905, "grad_norm": 7.199010372161865, "learning_rate": 1.1761458076724308e-05, "loss": 0.6044, "step": 14040 }, { "epoch": 46.03606557377049, "grad_norm": 5.481022834777832, "learning_rate": 1.1760412783380555e-05, "loss": 0.6443, "step": 14041 }, { "epoch": 46.03934426229508, "grad_norm": 9.267760276794434, "learning_rate": 1.1759367470186316e-05, "loss": 0.8671, "step": 14042 }, { "epoch": 46.04262295081967, "grad_norm": 8.131989479064941, "learning_rate": 1.175832213715338e-05, "loss": 0.5714, "step": 14043 }, { "epoch": 46.045901639344265, "grad_norm": 5.790295124053955, "learning_rate": 1.1757276784293531e-05, "loss": 0.759, "step": 14044 }, { "epoch": 46.049180327868854, "grad_norm": 6.276011943817139, "learning_rate": 1.1756231411618563e-05, "loss": 0.4926, "step": 14045 }, { "epoch": 46.05245901639344, "grad_norm": 21.95463752746582, "learning_rate": 1.1755186019140257e-05, "loss": 0.4988, "step": 14046 }, { "epoch": 46.05573770491803, "grad_norm": 5.407639503479004, "learning_rate": 1.1754140606870403e-05, "loss": 0.5579, "step": 14047 }, { "epoch": 46.059016393442626, "grad_norm": 12.044167518615723, "learning_rate": 1.1753095174820794e-05, "loss": 0.585, "step": 14048 }, { "epoch": 46.062295081967214, "grad_norm": 7.470001220703125, "learning_rate": 1.175204972300321e-05, "loss": 0.4351, "step": 14049 }, { "epoch": 46.0655737704918, "grad_norm": 7.080703258514404, "learning_rate": 1.1751004251429446e-05, "loss": 0.6183, "step": 14050 }, { "epoch": 46.06885245901639, "grad_norm": 4.836883544921875, "learning_rate": 1.1749958760111286e-05, "loss": 0.6327, "step": 14051 }, { "epoch": 46.072131147540986, "grad_norm": 20.938278198242188, "learning_rate": 1.1748913249060523e-05, "loss": 0.8112, "step": 14052 }, { "epoch": 46.075409836065575, "grad_norm": 6.281764507293701, "learning_rate": 1.1747867718288948e-05, "loss": 0.6112, "step": 14053 }, { "epoch": 46.07868852459016, "grad_norm": 5.944125175476074, "learning_rate": 1.1746822167808344e-05, "loss": 0.3006, "step": 14054 }, { "epoch": 46.08196721311475, "grad_norm": 7.055207252502441, "learning_rate": 1.1745776597630502e-05, "loss": 0.4219, "step": 14055 }, { "epoch": 46.08524590163935, "grad_norm": 9.279796600341797, "learning_rate": 1.1744731007767219e-05, "loss": 0.6649, "step": 14056 }, { "epoch": 46.088524590163935, "grad_norm": 8.487800598144531, "learning_rate": 1.1743685398230273e-05, "loss": 0.8716, "step": 14057 }, { "epoch": 46.09180327868852, "grad_norm": 9.0087308883667, "learning_rate": 1.1742639769031467e-05, "loss": 0.8773, "step": 14058 }, { "epoch": 46.09508196721311, "grad_norm": 6.395232677459717, "learning_rate": 1.1741594120182582e-05, "loss": 0.6859, "step": 14059 }, { "epoch": 46.09836065573771, "grad_norm": 6.384952545166016, "learning_rate": 1.1740548451695415e-05, "loss": 0.6292, "step": 14060 }, { "epoch": 46.101639344262296, "grad_norm": 6.394045352935791, "learning_rate": 1.1739502763581752e-05, "loss": 0.3804, "step": 14061 }, { "epoch": 46.104918032786884, "grad_norm": 6.393052577972412, "learning_rate": 1.173845705585339e-05, "loss": 0.562, "step": 14062 }, { "epoch": 46.10819672131147, "grad_norm": 6.539155006408691, "learning_rate": 1.1737411328522115e-05, "loss": 0.7395, "step": 14063 }, { "epoch": 46.11147540983607, "grad_norm": 5.925066947937012, "learning_rate": 1.1736365581599721e-05, "loss": 0.5462, "step": 14064 }, { "epoch": 46.114754098360656, "grad_norm": 23.391191482543945, "learning_rate": 1.1735319815097999e-05, "loss": 0.5894, "step": 14065 }, { "epoch": 46.118032786885244, "grad_norm": 5.697593688964844, "learning_rate": 1.1734274029028743e-05, "loss": 0.6096, "step": 14066 }, { "epoch": 46.12131147540983, "grad_norm": 26.829132080078125, "learning_rate": 1.1733228223403745e-05, "loss": 0.4604, "step": 14067 }, { "epoch": 46.12459016393443, "grad_norm": 7.007139205932617, "learning_rate": 1.1732182398234795e-05, "loss": 0.6756, "step": 14068 }, { "epoch": 46.12786885245902, "grad_norm": 9.777054786682129, "learning_rate": 1.1731136553533689e-05, "loss": 0.6356, "step": 14069 }, { "epoch": 46.131147540983605, "grad_norm": 5.686227321624756, "learning_rate": 1.1730090689312216e-05, "loss": 0.4971, "step": 14070 }, { "epoch": 46.13442622950819, "grad_norm": 10.69310188293457, "learning_rate": 1.1729044805582173e-05, "loss": 0.7452, "step": 14071 }, { "epoch": 46.13770491803279, "grad_norm": 7.808959007263184, "learning_rate": 1.172799890235535e-05, "loss": 0.5286, "step": 14072 }, { "epoch": 46.14098360655738, "grad_norm": 6.986111164093018, "learning_rate": 1.1726952979643546e-05, "loss": 0.7059, "step": 14073 }, { "epoch": 46.144262295081965, "grad_norm": 7.702972888946533, "learning_rate": 1.172590703745855e-05, "loss": 0.5149, "step": 14074 }, { "epoch": 46.14754098360656, "grad_norm": 6.38146448135376, "learning_rate": 1.1724861075812158e-05, "loss": 0.6794, "step": 14075 }, { "epoch": 46.15081967213115, "grad_norm": 6.174106597900391, "learning_rate": 1.1723815094716165e-05, "loss": 0.4977, "step": 14076 }, { "epoch": 46.15409836065574, "grad_norm": 6.973898410797119, "learning_rate": 1.172276909418236e-05, "loss": 0.5972, "step": 14077 }, { "epoch": 46.157377049180326, "grad_norm": 10.445889472961426, "learning_rate": 1.1721723074222546e-05, "loss": 0.6259, "step": 14078 }, { "epoch": 46.16065573770492, "grad_norm": 6.83654260635376, "learning_rate": 1.1720677034848516e-05, "loss": 0.7556, "step": 14079 }, { "epoch": 46.16393442622951, "grad_norm": 7.499392509460449, "learning_rate": 1.171963097607206e-05, "loss": 0.5902, "step": 14080 }, { "epoch": 46.1672131147541, "grad_norm": 6.429505825042725, "learning_rate": 1.1718584897904978e-05, "loss": 0.5877, "step": 14081 }, { "epoch": 46.170491803278686, "grad_norm": 14.997905731201172, "learning_rate": 1.1717538800359065e-05, "loss": 0.7337, "step": 14082 }, { "epoch": 46.17377049180328, "grad_norm": 7.467113494873047, "learning_rate": 1.1716492683446115e-05, "loss": 0.5224, "step": 14083 }, { "epoch": 46.17704918032787, "grad_norm": 9.917892456054688, "learning_rate": 1.1715446547177926e-05, "loss": 0.5361, "step": 14084 }, { "epoch": 46.18032786885246, "grad_norm": 6.65418815612793, "learning_rate": 1.1714400391566292e-05, "loss": 0.7489, "step": 14085 }, { "epoch": 46.18360655737705, "grad_norm": 7.063413143157959, "learning_rate": 1.1713354216623014e-05, "loss": 0.7166, "step": 14086 }, { "epoch": 46.18688524590164, "grad_norm": 5.34768009185791, "learning_rate": 1.1712308022359884e-05, "loss": 0.6198, "step": 14087 }, { "epoch": 46.19016393442623, "grad_norm": 7.314189910888672, "learning_rate": 1.17112618087887e-05, "loss": 0.618, "step": 14088 }, { "epoch": 46.19344262295082, "grad_norm": 7.6578593254089355, "learning_rate": 1.1710215575921261e-05, "loss": 0.8074, "step": 14089 }, { "epoch": 46.19672131147541, "grad_norm": 6.025665760040283, "learning_rate": 1.1709169323769365e-05, "loss": 0.7252, "step": 14090 }, { "epoch": 46.2, "grad_norm": 7.289761066436768, "learning_rate": 1.1708123052344803e-05, "loss": 0.6742, "step": 14091 }, { "epoch": 46.20327868852459, "grad_norm": 6.614601135253906, "learning_rate": 1.1707076761659381e-05, "loss": 0.8165, "step": 14092 }, { "epoch": 46.20655737704918, "grad_norm": 8.22714900970459, "learning_rate": 1.1706030451724895e-05, "loss": 0.8144, "step": 14093 }, { "epoch": 46.20983606557377, "grad_norm": 5.160268306732178, "learning_rate": 1.1704984122553138e-05, "loss": 0.4551, "step": 14094 }, { "epoch": 46.21311475409836, "grad_norm": 7.813998222351074, "learning_rate": 1.1703937774155916e-05, "loss": 0.7165, "step": 14095 }, { "epoch": 46.21639344262295, "grad_norm": 9.170753479003906, "learning_rate": 1.170289140654502e-05, "loss": 0.8104, "step": 14096 }, { "epoch": 46.21967213114754, "grad_norm": 6.557090759277344, "learning_rate": 1.1701845019732256e-05, "loss": 0.9518, "step": 14097 }, { "epoch": 46.22295081967213, "grad_norm": 6.742056846618652, "learning_rate": 1.1700798613729419e-05, "loss": 0.7408, "step": 14098 }, { "epoch": 46.226229508196724, "grad_norm": 7.47540283203125, "learning_rate": 1.169975218854831e-05, "loss": 0.6992, "step": 14099 }, { "epoch": 46.22950819672131, "grad_norm": 13.806300163269043, "learning_rate": 1.1698705744200724e-05, "loss": 0.816, "step": 14100 }, { "epoch": 46.2327868852459, "grad_norm": 6.230739116668701, "learning_rate": 1.1697659280698468e-05, "loss": 0.3087, "step": 14101 }, { "epoch": 46.23606557377049, "grad_norm": 6.823498249053955, "learning_rate": 1.1696612798053337e-05, "loss": 0.5969, "step": 14102 }, { "epoch": 46.239344262295084, "grad_norm": 7.321627616882324, "learning_rate": 1.1695566296277135e-05, "loss": 0.5977, "step": 14103 }, { "epoch": 46.24262295081967, "grad_norm": 8.939764022827148, "learning_rate": 1.169451977538166e-05, "loss": 0.5045, "step": 14104 }, { "epoch": 46.24590163934426, "grad_norm": 9.002175331115723, "learning_rate": 1.169347323537871e-05, "loss": 0.8351, "step": 14105 }, { "epoch": 46.24918032786885, "grad_norm": 11.232108116149902, "learning_rate": 1.1692426676280088e-05, "loss": 0.7011, "step": 14106 }, { "epoch": 46.252459016393445, "grad_norm": 4.974362373352051, "learning_rate": 1.1691380098097598e-05, "loss": 0.6797, "step": 14107 }, { "epoch": 46.25573770491803, "grad_norm": 8.874528884887695, "learning_rate": 1.1690333500843039e-05, "loss": 0.6794, "step": 14108 }, { "epoch": 46.25901639344262, "grad_norm": 5.905694007873535, "learning_rate": 1.1689286884528211e-05, "loss": 0.8207, "step": 14109 }, { "epoch": 46.26229508196721, "grad_norm": 6.566954135894775, "learning_rate": 1.1688240249164916e-05, "loss": 0.6551, "step": 14110 }, { "epoch": 46.265573770491805, "grad_norm": 5.886348247528076, "learning_rate": 1.1687193594764958e-05, "loss": 0.6178, "step": 14111 }, { "epoch": 46.268852459016394, "grad_norm": 5.822439193725586, "learning_rate": 1.168614692134014e-05, "loss": 0.7429, "step": 14112 }, { "epoch": 46.27213114754098, "grad_norm": 7.365962505340576, "learning_rate": 1.1685100228902258e-05, "loss": 0.6411, "step": 14113 }, { "epoch": 46.27540983606557, "grad_norm": 7.119906902313232, "learning_rate": 1.1684053517463122e-05, "loss": 0.7061, "step": 14114 }, { "epoch": 46.278688524590166, "grad_norm": 6.528820991516113, "learning_rate": 1.168300678703453e-05, "loss": 0.6514, "step": 14115 }, { "epoch": 46.281967213114754, "grad_norm": 5.753468990325928, "learning_rate": 1.1681960037628288e-05, "loss": 0.4419, "step": 14116 }, { "epoch": 46.28524590163934, "grad_norm": 6.079106330871582, "learning_rate": 1.16809132692562e-05, "loss": 0.4694, "step": 14117 }, { "epoch": 46.28852459016394, "grad_norm": 10.877878189086914, "learning_rate": 1.1679866481930063e-05, "loss": 0.6805, "step": 14118 }, { "epoch": 46.291803278688526, "grad_norm": 7.4628376960754395, "learning_rate": 1.1678819675661688e-05, "loss": 0.7488, "step": 14119 }, { "epoch": 46.295081967213115, "grad_norm": 8.858858108520508, "learning_rate": 1.1677772850462872e-05, "loss": 0.4926, "step": 14120 }, { "epoch": 46.2983606557377, "grad_norm": 15.81279468536377, "learning_rate": 1.1676726006345424e-05, "loss": 0.5778, "step": 14121 }, { "epoch": 46.3016393442623, "grad_norm": 6.900452613830566, "learning_rate": 1.1675679143321148e-05, "loss": 0.5759, "step": 14122 }, { "epoch": 46.30491803278689, "grad_norm": 55.108394622802734, "learning_rate": 1.1674632261401848e-05, "loss": 0.8463, "step": 14123 }, { "epoch": 46.308196721311475, "grad_norm": 6.682821273803711, "learning_rate": 1.1673585360599324e-05, "loss": 0.4959, "step": 14124 }, { "epoch": 46.31147540983606, "grad_norm": 7.526483535766602, "learning_rate": 1.1672538440925389e-05, "loss": 0.7554, "step": 14125 }, { "epoch": 46.31475409836066, "grad_norm": 7.36767053604126, "learning_rate": 1.1671491502391843e-05, "loss": 0.6478, "step": 14126 }, { "epoch": 46.31803278688525, "grad_norm": 6.283412933349609, "learning_rate": 1.1670444545010493e-05, "loss": 0.5517, "step": 14127 }, { "epoch": 46.321311475409836, "grad_norm": 7.167769908905029, "learning_rate": 1.1669397568793141e-05, "loss": 0.4442, "step": 14128 }, { "epoch": 46.324590163934424, "grad_norm": 7.687861919403076, "learning_rate": 1.1668350573751599e-05, "loss": 0.6694, "step": 14129 }, { "epoch": 46.32786885245902, "grad_norm": 7.888954162597656, "learning_rate": 1.166730355989767e-05, "loss": 0.6128, "step": 14130 }, { "epoch": 46.33114754098361, "grad_norm": 6.782474040985107, "learning_rate": 1.1666256527243156e-05, "loss": 0.8179, "step": 14131 }, { "epoch": 46.334426229508196, "grad_norm": 11.238252639770508, "learning_rate": 1.166520947579987e-05, "loss": 0.8865, "step": 14132 }, { "epoch": 46.337704918032784, "grad_norm": 31.084821701049805, "learning_rate": 1.1664162405579616e-05, "loss": 0.6035, "step": 14133 }, { "epoch": 46.34098360655738, "grad_norm": 6.011995792388916, "learning_rate": 1.16631153165942e-05, "loss": 0.7373, "step": 14134 }, { "epoch": 46.34426229508197, "grad_norm": 14.9913911819458, "learning_rate": 1.1662068208855428e-05, "loss": 0.8472, "step": 14135 }, { "epoch": 46.34754098360656, "grad_norm": 5.917087554931641, "learning_rate": 1.1661021082375113e-05, "loss": 0.6326, "step": 14136 }, { "epoch": 46.350819672131145, "grad_norm": 5.931341171264648, "learning_rate": 1.1659973937165052e-05, "loss": 0.4219, "step": 14137 }, { "epoch": 46.35409836065574, "grad_norm": 9.147119522094727, "learning_rate": 1.1658926773237064e-05, "loss": 0.8445, "step": 14138 }, { "epoch": 46.35737704918033, "grad_norm": 7.520003318786621, "learning_rate": 1.165787959060295e-05, "loss": 0.6372, "step": 14139 }, { "epoch": 46.36065573770492, "grad_norm": 7.43834114074707, "learning_rate": 1.1656832389274522e-05, "loss": 0.6436, "step": 14140 }, { "epoch": 46.363934426229505, "grad_norm": 7.089094638824463, "learning_rate": 1.1655785169263583e-05, "loss": 0.6576, "step": 14141 }, { "epoch": 46.3672131147541, "grad_norm": 7.996570110321045, "learning_rate": 1.1654737930581948e-05, "loss": 0.5999, "step": 14142 }, { "epoch": 46.37049180327869, "grad_norm": 6.677794456481934, "learning_rate": 1.165369067324142e-05, "loss": 0.5714, "step": 14143 }, { "epoch": 46.37377049180328, "grad_norm": 7.06234884262085, "learning_rate": 1.1652643397253809e-05, "loss": 0.6831, "step": 14144 }, { "epoch": 46.377049180327866, "grad_norm": 5.754881858825684, "learning_rate": 1.165159610263093e-05, "loss": 0.6275, "step": 14145 }, { "epoch": 46.38032786885246, "grad_norm": 8.30588436126709, "learning_rate": 1.1650548789384582e-05, "loss": 0.6758, "step": 14146 }, { "epoch": 46.38360655737705, "grad_norm": 6.8931097984313965, "learning_rate": 1.1649501457526585e-05, "loss": 0.6135, "step": 14147 }, { "epoch": 46.38688524590164, "grad_norm": 6.791041851043701, "learning_rate": 1.1648454107068738e-05, "loss": 0.6941, "step": 14148 }, { "epoch": 46.390163934426226, "grad_norm": 8.337041854858398, "learning_rate": 1.1647406738022864e-05, "loss": 0.6361, "step": 14149 }, { "epoch": 46.39344262295082, "grad_norm": 7.867053985595703, "learning_rate": 1.164635935040076e-05, "loss": 0.8566, "step": 14150 }, { "epoch": 46.39672131147541, "grad_norm": 8.314702033996582, "learning_rate": 1.1645311944214249e-05, "loss": 0.8281, "step": 14151 }, { "epoch": 46.4, "grad_norm": 5.947676658630371, "learning_rate": 1.164426451947513e-05, "loss": 0.6258, "step": 14152 }, { "epoch": 46.40327868852459, "grad_norm": 10.160844802856445, "learning_rate": 1.1643217076195222e-05, "loss": 0.4991, "step": 14153 }, { "epoch": 46.40655737704918, "grad_norm": 6.337465286254883, "learning_rate": 1.164216961438633e-05, "loss": 0.6133, "step": 14154 }, { "epoch": 46.40983606557377, "grad_norm": 6.8384318351745605, "learning_rate": 1.164112213406027e-05, "loss": 0.7005, "step": 14155 }, { "epoch": 46.41311475409836, "grad_norm": 13.898707389831543, "learning_rate": 1.1640074635228852e-05, "loss": 0.8214, "step": 14156 }, { "epoch": 46.41639344262295, "grad_norm": 8.032434463500977, "learning_rate": 1.1639027117903883e-05, "loss": 0.5833, "step": 14157 }, { "epoch": 46.41967213114754, "grad_norm": 7.253436088562012, "learning_rate": 1.1637979582097186e-05, "loss": 0.64, "step": 14158 }, { "epoch": 46.42295081967213, "grad_norm": 5.285187721252441, "learning_rate": 1.163693202782056e-05, "loss": 0.6652, "step": 14159 }, { "epoch": 46.42622950819672, "grad_norm": 6.487152099609375, "learning_rate": 1.1635884455085828e-05, "loss": 0.5588, "step": 14160 }, { "epoch": 46.429508196721315, "grad_norm": 11.914780616760254, "learning_rate": 1.1634836863904794e-05, "loss": 0.6467, "step": 14161 }, { "epoch": 46.4327868852459, "grad_norm": 8.53439712524414, "learning_rate": 1.1633789254289278e-05, "loss": 0.9974, "step": 14162 }, { "epoch": 46.43606557377049, "grad_norm": 11.398533821105957, "learning_rate": 1.1632741626251087e-05, "loss": 0.4803, "step": 14163 }, { "epoch": 46.43934426229508, "grad_norm": 5.7964348793029785, "learning_rate": 1.163169397980204e-05, "loss": 0.4834, "step": 14164 }, { "epoch": 46.442622950819676, "grad_norm": 15.643033027648926, "learning_rate": 1.1630646314953942e-05, "loss": 0.5801, "step": 14165 }, { "epoch": 46.445901639344264, "grad_norm": 8.15535831451416, "learning_rate": 1.1629598631718615e-05, "loss": 0.6266, "step": 14166 }, { "epoch": 46.44918032786885, "grad_norm": 6.307145118713379, "learning_rate": 1.1628550930107868e-05, "loss": 0.7229, "step": 14167 }, { "epoch": 46.45245901639344, "grad_norm": 7.19253396987915, "learning_rate": 1.1627503210133516e-05, "loss": 0.8941, "step": 14168 }, { "epoch": 46.455737704918036, "grad_norm": 27.206859588623047, "learning_rate": 1.1626455471807374e-05, "loss": 0.6439, "step": 14169 }, { "epoch": 46.459016393442624, "grad_norm": 8.705448150634766, "learning_rate": 1.1625407715141252e-05, "loss": 0.5562, "step": 14170 }, { "epoch": 46.46229508196721, "grad_norm": 6.63970947265625, "learning_rate": 1.1624359940146972e-05, "loss": 0.7187, "step": 14171 }, { "epoch": 46.4655737704918, "grad_norm": 6.804604530334473, "learning_rate": 1.1623312146836343e-05, "loss": 0.874, "step": 14172 }, { "epoch": 46.4688524590164, "grad_norm": 17.60186004638672, "learning_rate": 1.1622264335221184e-05, "loss": 0.6924, "step": 14173 }, { "epoch": 46.472131147540985, "grad_norm": 7.481011867523193, "learning_rate": 1.1621216505313304e-05, "loss": 0.7604, "step": 14174 }, { "epoch": 46.47540983606557, "grad_norm": 6.18420934677124, "learning_rate": 1.1620168657124523e-05, "loss": 0.6509, "step": 14175 }, { "epoch": 46.47868852459016, "grad_norm": 6.338833808898926, "learning_rate": 1.1619120790666659e-05, "loss": 0.6841, "step": 14176 }, { "epoch": 46.48196721311476, "grad_norm": 6.534512519836426, "learning_rate": 1.161807290595152e-05, "loss": 0.6665, "step": 14177 }, { "epoch": 46.485245901639345, "grad_norm": 8.660465240478516, "learning_rate": 1.161702500299093e-05, "loss": 0.5306, "step": 14178 }, { "epoch": 46.488524590163934, "grad_norm": 6.947399139404297, "learning_rate": 1.1615977081796702e-05, "loss": 0.6156, "step": 14179 }, { "epoch": 46.49180327868852, "grad_norm": 5.151007175445557, "learning_rate": 1.161492914238065e-05, "loss": 0.8683, "step": 14180 }, { "epoch": 46.49508196721312, "grad_norm": 7.946610927581787, "learning_rate": 1.1613881184754595e-05, "loss": 0.4617, "step": 14181 }, { "epoch": 46.498360655737706, "grad_norm": 9.302681922912598, "learning_rate": 1.161283320893035e-05, "loss": 0.5568, "step": 14182 }, { "epoch": 46.501639344262294, "grad_norm": 6.492547512054443, "learning_rate": 1.1611785214919733e-05, "loss": 0.5952, "step": 14183 }, { "epoch": 46.50491803278688, "grad_norm": 5.922481536865234, "learning_rate": 1.1610737202734563e-05, "loss": 0.5529, "step": 14184 }, { "epoch": 46.50819672131148, "grad_norm": 8.615971565246582, "learning_rate": 1.1609689172386656e-05, "loss": 0.4102, "step": 14185 }, { "epoch": 46.511475409836066, "grad_norm": 8.106108665466309, "learning_rate": 1.1608641123887831e-05, "loss": 0.6535, "step": 14186 }, { "epoch": 46.514754098360655, "grad_norm": 7.874007225036621, "learning_rate": 1.1607593057249905e-05, "loss": 0.5565, "step": 14187 }, { "epoch": 46.51803278688524, "grad_norm": 6.02070951461792, "learning_rate": 1.1606544972484695e-05, "loss": 0.5804, "step": 14188 }, { "epoch": 46.52131147540984, "grad_norm": 6.262401580810547, "learning_rate": 1.1605496869604019e-05, "loss": 0.4899, "step": 14189 }, { "epoch": 46.52459016393443, "grad_norm": 6.470864772796631, "learning_rate": 1.1604448748619699e-05, "loss": 0.4974, "step": 14190 }, { "epoch": 46.527868852459015, "grad_norm": 6.184713840484619, "learning_rate": 1.1603400609543547e-05, "loss": 0.7304, "step": 14191 }, { "epoch": 46.5311475409836, "grad_norm": 11.359052658081055, "learning_rate": 1.1602352452387391e-05, "loss": 1.1489, "step": 14192 }, { "epoch": 46.5344262295082, "grad_norm": 9.805346488952637, "learning_rate": 1.160130427716304e-05, "loss": 0.3875, "step": 14193 }, { "epoch": 46.53770491803279, "grad_norm": 6.20965051651001, "learning_rate": 1.1600256083882324e-05, "loss": 0.688, "step": 14194 }, { "epoch": 46.540983606557376, "grad_norm": 10.945436477661133, "learning_rate": 1.1599207872557055e-05, "loss": 0.5497, "step": 14195 }, { "epoch": 46.544262295081964, "grad_norm": 5.368715286254883, "learning_rate": 1.1598159643199052e-05, "loss": 0.5089, "step": 14196 }, { "epoch": 46.54754098360656, "grad_norm": 6.155696392059326, "learning_rate": 1.1597111395820137e-05, "loss": 0.6489, "step": 14197 }, { "epoch": 46.55081967213115, "grad_norm": 6.937198162078857, "learning_rate": 1.1596063130432133e-05, "loss": 0.6372, "step": 14198 }, { "epoch": 46.554098360655736, "grad_norm": 5.662539482116699, "learning_rate": 1.1595014847046857e-05, "loss": 0.7327, "step": 14199 }, { "epoch": 46.557377049180324, "grad_norm": 11.548402786254883, "learning_rate": 1.159396654567613e-05, "loss": 0.7545, "step": 14200 }, { "epoch": 46.56065573770492, "grad_norm": 7.615600109100342, "learning_rate": 1.1592918226331774e-05, "loss": 0.726, "step": 14201 }, { "epoch": 46.56393442622951, "grad_norm": 6.901257038116455, "learning_rate": 1.1591869889025607e-05, "loss": 0.7331, "step": 14202 }, { "epoch": 46.5672131147541, "grad_norm": 6.0869951248168945, "learning_rate": 1.1590821533769452e-05, "loss": 0.4971, "step": 14203 }, { "epoch": 46.570491803278685, "grad_norm": 10.408458709716797, "learning_rate": 1.158977316057513e-05, "loss": 0.5843, "step": 14204 }, { "epoch": 46.57377049180328, "grad_norm": 5.630298614501953, "learning_rate": 1.1588724769454465e-05, "loss": 0.4969, "step": 14205 }, { "epoch": 46.57704918032787, "grad_norm": 7.219836235046387, "learning_rate": 1.1587676360419274e-05, "loss": 0.6796, "step": 14206 }, { "epoch": 46.58032786885246, "grad_norm": 6.227511405944824, "learning_rate": 1.1586627933481383e-05, "loss": 0.5868, "step": 14207 }, { "epoch": 46.58360655737705, "grad_norm": 5.873558044433594, "learning_rate": 1.158557948865261e-05, "loss": 0.7083, "step": 14208 }, { "epoch": 46.58688524590164, "grad_norm": 7.73718786239624, "learning_rate": 1.158453102594478e-05, "loss": 0.7613, "step": 14209 }, { "epoch": 46.59016393442623, "grad_norm": 7.717961311340332, "learning_rate": 1.1583482545369718e-05, "loss": 0.7036, "step": 14210 }, { "epoch": 46.59344262295082, "grad_norm": 7.417534828186035, "learning_rate": 1.158243404693924e-05, "loss": 0.8042, "step": 14211 }, { "epoch": 46.59672131147541, "grad_norm": 8.948149681091309, "learning_rate": 1.1581385530665177e-05, "loss": 0.7103, "step": 14212 }, { "epoch": 46.6, "grad_norm": 6.709009170532227, "learning_rate": 1.1580336996559343e-05, "loss": 0.6746, "step": 14213 }, { "epoch": 46.60327868852459, "grad_norm": 10.294055938720703, "learning_rate": 1.1579288444633572e-05, "loss": 0.7942, "step": 14214 }, { "epoch": 46.60655737704918, "grad_norm": 6.47608757019043, "learning_rate": 1.1578239874899678e-05, "loss": 0.6677, "step": 14215 }, { "epoch": 46.609836065573774, "grad_norm": 6.109007835388184, "learning_rate": 1.1577191287369489e-05, "loss": 0.7134, "step": 14216 }, { "epoch": 46.61311475409836, "grad_norm": 6.16493558883667, "learning_rate": 1.1576142682054828e-05, "loss": 0.604, "step": 14217 }, { "epoch": 46.61639344262295, "grad_norm": 6.850626468658447, "learning_rate": 1.157509405896752e-05, "loss": 0.8226, "step": 14218 }, { "epoch": 46.61967213114754, "grad_norm": 7.102962493896484, "learning_rate": 1.1574045418119389e-05, "loss": 0.6846, "step": 14219 }, { "epoch": 46.622950819672134, "grad_norm": 6.991864204406738, "learning_rate": 1.157299675952226e-05, "loss": 0.6075, "step": 14220 }, { "epoch": 46.62622950819672, "grad_norm": 5.1230292320251465, "learning_rate": 1.1571948083187956e-05, "loss": 0.606, "step": 14221 }, { "epoch": 46.62950819672131, "grad_norm": 8.025415420532227, "learning_rate": 1.1570899389128303e-05, "loss": 0.4195, "step": 14222 }, { "epoch": 46.6327868852459, "grad_norm": 7.653438091278076, "learning_rate": 1.1569850677355128e-05, "loss": 0.7345, "step": 14223 }, { "epoch": 46.636065573770495, "grad_norm": 10.114676475524902, "learning_rate": 1.156880194788025e-05, "loss": 0.6317, "step": 14224 }, { "epoch": 46.63934426229508, "grad_norm": 6.118844032287598, "learning_rate": 1.1567753200715503e-05, "loss": 0.7931, "step": 14225 }, { "epoch": 46.64262295081967, "grad_norm": 6.423735618591309, "learning_rate": 1.1566704435872707e-05, "loss": 0.7501, "step": 14226 }, { "epoch": 46.64590163934426, "grad_norm": 6.745036602020264, "learning_rate": 1.156565565336369e-05, "loss": 0.6193, "step": 14227 }, { "epoch": 46.649180327868855, "grad_norm": 6.871294021606445, "learning_rate": 1.1564606853200275e-05, "loss": 0.646, "step": 14228 }, { "epoch": 46.65245901639344, "grad_norm": 7.282630443572998, "learning_rate": 1.1563558035394296e-05, "loss": 0.7441, "step": 14229 }, { "epoch": 46.65573770491803, "grad_norm": 7.128201961517334, "learning_rate": 1.156250919995757e-05, "loss": 0.6958, "step": 14230 }, { "epoch": 46.65901639344262, "grad_norm": 5.895630836486816, "learning_rate": 1.1561460346901932e-05, "loss": 0.5938, "step": 14231 }, { "epoch": 46.662295081967216, "grad_norm": 6.9646711349487305, "learning_rate": 1.1560411476239201e-05, "loss": 0.7941, "step": 14232 }, { "epoch": 46.665573770491804, "grad_norm": 7.8903913497924805, "learning_rate": 1.1559362587981211e-05, "loss": 0.6083, "step": 14233 }, { "epoch": 46.66885245901639, "grad_norm": 6.521425724029541, "learning_rate": 1.1558313682139786e-05, "loss": 0.4623, "step": 14234 }, { "epoch": 46.67213114754098, "grad_norm": 8.285701751708984, "learning_rate": 1.1557264758726754e-05, "loss": 0.6243, "step": 14235 }, { "epoch": 46.675409836065576, "grad_norm": 6.309481143951416, "learning_rate": 1.1556215817753941e-05, "loss": 0.9426, "step": 14236 }, { "epoch": 46.678688524590164, "grad_norm": 5.926043510437012, "learning_rate": 1.1555166859233177e-05, "loss": 0.4832, "step": 14237 }, { "epoch": 46.68196721311475, "grad_norm": 5.963881015777588, "learning_rate": 1.155411788317629e-05, "loss": 0.6595, "step": 14238 }, { "epoch": 46.68524590163934, "grad_norm": 7.932068824768066, "learning_rate": 1.155306888959511e-05, "loss": 0.8386, "step": 14239 }, { "epoch": 46.68852459016394, "grad_norm": 9.010345458984375, "learning_rate": 1.1552019878501462e-05, "loss": 0.7156, "step": 14240 }, { "epoch": 46.691803278688525, "grad_norm": 6.4000349044799805, "learning_rate": 1.1550970849907175e-05, "loss": 0.6233, "step": 14241 }, { "epoch": 46.69508196721311, "grad_norm": 7.32419490814209, "learning_rate": 1.1549921803824082e-05, "loss": 0.6284, "step": 14242 }, { "epoch": 46.6983606557377, "grad_norm": 5.905836582183838, "learning_rate": 1.1548872740264006e-05, "loss": 0.6002, "step": 14243 }, { "epoch": 46.7016393442623, "grad_norm": 6.035689830780029, "learning_rate": 1.1547823659238781e-05, "loss": 0.7291, "step": 14244 }, { "epoch": 46.704918032786885, "grad_norm": 7.123879909515381, "learning_rate": 1.1546774560760234e-05, "loss": 0.5368, "step": 14245 }, { "epoch": 46.708196721311474, "grad_norm": 8.292611122131348, "learning_rate": 1.1545725444840196e-05, "loss": 0.6501, "step": 14246 }, { "epoch": 46.71147540983607, "grad_norm": 15.202984809875488, "learning_rate": 1.1544676311490499e-05, "loss": 0.6119, "step": 14247 }, { "epoch": 46.71475409836066, "grad_norm": 6.332094192504883, "learning_rate": 1.1543627160722968e-05, "loss": 0.7963, "step": 14248 }, { "epoch": 46.718032786885246, "grad_norm": 6.43886661529541, "learning_rate": 1.1542577992549437e-05, "loss": 0.7628, "step": 14249 }, { "epoch": 46.721311475409834, "grad_norm": 8.843653678894043, "learning_rate": 1.1541528806981734e-05, "loss": 0.7091, "step": 14250 }, { "epoch": 46.72459016393443, "grad_norm": 6.690816402435303, "learning_rate": 1.1540479604031692e-05, "loss": 0.811, "step": 14251 }, { "epoch": 46.72786885245902, "grad_norm": 6.01731014251709, "learning_rate": 1.1539430383711138e-05, "loss": 0.6825, "step": 14252 }, { "epoch": 46.731147540983606, "grad_norm": 5.878541469573975, "learning_rate": 1.153838114603191e-05, "loss": 0.62, "step": 14253 }, { "epoch": 46.734426229508195, "grad_norm": 6.1738691329956055, "learning_rate": 1.1537331891005831e-05, "loss": 0.5894, "step": 14254 }, { "epoch": 46.73770491803279, "grad_norm": 8.957565307617188, "learning_rate": 1.153628261864474e-05, "loss": 0.4149, "step": 14255 }, { "epoch": 46.74098360655738, "grad_norm": 6.169356822967529, "learning_rate": 1.1535233328960462e-05, "loss": 0.7115, "step": 14256 }, { "epoch": 46.74426229508197, "grad_norm": 6.719536781311035, "learning_rate": 1.1534184021964837e-05, "loss": 0.5221, "step": 14257 }, { "epoch": 46.747540983606555, "grad_norm": 7.093072891235352, "learning_rate": 1.1533134697669686e-05, "loss": 0.4483, "step": 14258 }, { "epoch": 46.75081967213115, "grad_norm": 6.276782035827637, "learning_rate": 1.153208535608685e-05, "loss": 0.5826, "step": 14259 }, { "epoch": 46.75409836065574, "grad_norm": 7.1018853187561035, "learning_rate": 1.153103599722816e-05, "loss": 0.7893, "step": 14260 }, { "epoch": 46.75737704918033, "grad_norm": 8.894725799560547, "learning_rate": 1.1529986621105445e-05, "loss": 0.8638, "step": 14261 }, { "epoch": 46.760655737704916, "grad_norm": 6.717761516571045, "learning_rate": 1.1528937227730539e-05, "loss": 0.4623, "step": 14262 }, { "epoch": 46.76393442622951, "grad_norm": 6.548680782318115, "learning_rate": 1.1527887817115279e-05, "loss": 0.7268, "step": 14263 }, { "epoch": 46.7672131147541, "grad_norm": 7.637378692626953, "learning_rate": 1.1526838389271492e-05, "loss": 0.537, "step": 14264 }, { "epoch": 46.77049180327869, "grad_norm": 6.446214199066162, "learning_rate": 1.1525788944211016e-05, "loss": 0.6992, "step": 14265 }, { "epoch": 46.773770491803276, "grad_norm": 6.2452311515808105, "learning_rate": 1.1524739481945686e-05, "loss": 0.5907, "step": 14266 }, { "epoch": 46.77704918032787, "grad_norm": 7.004512786865234, "learning_rate": 1.152369000248733e-05, "loss": 0.6181, "step": 14267 }, { "epoch": 46.78032786885246, "grad_norm": 6.298426151275635, "learning_rate": 1.1522640505847786e-05, "loss": 0.8011, "step": 14268 }, { "epoch": 46.78360655737705, "grad_norm": 13.007338523864746, "learning_rate": 1.1521590992038887e-05, "loss": 0.8478, "step": 14269 }, { "epoch": 46.78688524590164, "grad_norm": 7.906056880950928, "learning_rate": 1.152054146107247e-05, "loss": 0.7777, "step": 14270 }, { "epoch": 46.79016393442623, "grad_norm": 6.165666580200195, "learning_rate": 1.1519491912960362e-05, "loss": 0.522, "step": 14271 }, { "epoch": 46.79344262295082, "grad_norm": 7.688560485839844, "learning_rate": 1.1518442347714407e-05, "loss": 0.7994, "step": 14272 }, { "epoch": 46.79672131147541, "grad_norm": 7.650486469268799, "learning_rate": 1.1517392765346436e-05, "loss": 0.7143, "step": 14273 }, { "epoch": 46.8, "grad_norm": 6.634582042694092, "learning_rate": 1.151634316586828e-05, "loss": 0.4271, "step": 14274 }, { "epoch": 46.80327868852459, "grad_norm": 9.013313293457031, "learning_rate": 1.1515293549291782e-05, "loss": 0.59, "step": 14275 }, { "epoch": 46.80655737704918, "grad_norm": 7.901802062988281, "learning_rate": 1.151424391562877e-05, "loss": 0.6078, "step": 14276 }, { "epoch": 46.80983606557377, "grad_norm": 9.796072006225586, "learning_rate": 1.1513194264891088e-05, "loss": 0.6608, "step": 14277 }, { "epoch": 46.81311475409836, "grad_norm": 6.069675922393799, "learning_rate": 1.1512144597090562e-05, "loss": 0.6287, "step": 14278 }, { "epoch": 46.81639344262295, "grad_norm": 17.313899993896484, "learning_rate": 1.1511094912239039e-05, "loss": 0.6644, "step": 14279 }, { "epoch": 46.81967213114754, "grad_norm": 5.43841552734375, "learning_rate": 1.1510045210348343e-05, "loss": 0.9649, "step": 14280 }, { "epoch": 46.82295081967213, "grad_norm": 7.161876678466797, "learning_rate": 1.1508995491430324e-05, "loss": 0.609, "step": 14281 }, { "epoch": 46.82622950819672, "grad_norm": 13.11165714263916, "learning_rate": 1.1507945755496807e-05, "loss": 0.5505, "step": 14282 }, { "epoch": 46.829508196721314, "grad_norm": 11.944936752319336, "learning_rate": 1.1506896002559638e-05, "loss": 0.5278, "step": 14283 }, { "epoch": 46.8327868852459, "grad_norm": 10.886032104492188, "learning_rate": 1.1505846232630647e-05, "loss": 0.602, "step": 14284 }, { "epoch": 46.83606557377049, "grad_norm": 6.6425461769104, "learning_rate": 1.1504796445721676e-05, "loss": 0.6166, "step": 14285 }, { "epoch": 46.83934426229508, "grad_norm": 6.44477653503418, "learning_rate": 1.1503746641844558e-05, "loss": 0.7051, "step": 14286 }, { "epoch": 46.842622950819674, "grad_norm": 6.357210636138916, "learning_rate": 1.1502696821011134e-05, "loss": 0.6503, "step": 14287 }, { "epoch": 46.84590163934426, "grad_norm": 10.591753959655762, "learning_rate": 1.150164698323324e-05, "loss": 0.6445, "step": 14288 }, { "epoch": 46.84918032786885, "grad_norm": 7.37918758392334, "learning_rate": 1.1500597128522716e-05, "loss": 0.8005, "step": 14289 }, { "epoch": 46.85245901639344, "grad_norm": 6.758077621459961, "learning_rate": 1.1499547256891399e-05, "loss": 0.4261, "step": 14290 }, { "epoch": 46.855737704918035, "grad_norm": 5.654829025268555, "learning_rate": 1.1498497368351129e-05, "loss": 0.6756, "step": 14291 }, { "epoch": 46.85901639344262, "grad_norm": 7.51113224029541, "learning_rate": 1.1497447462913741e-05, "loss": 0.797, "step": 14292 }, { "epoch": 46.86229508196721, "grad_norm": 6.347184181213379, "learning_rate": 1.1496397540591076e-05, "loss": 0.4513, "step": 14293 }, { "epoch": 46.86557377049181, "grad_norm": 5.724496364593506, "learning_rate": 1.1495347601394973e-05, "loss": 0.7552, "step": 14294 }, { "epoch": 46.868852459016395, "grad_norm": 6.575818061828613, "learning_rate": 1.1494297645337272e-05, "loss": 0.6022, "step": 14295 }, { "epoch": 46.87213114754098, "grad_norm": 6.079710006713867, "learning_rate": 1.1493247672429813e-05, "loss": 0.6352, "step": 14296 }, { "epoch": 46.87540983606557, "grad_norm": 28.976362228393555, "learning_rate": 1.149219768268443e-05, "loss": 0.6588, "step": 14297 }, { "epoch": 46.87868852459017, "grad_norm": 10.957226753234863, "learning_rate": 1.1491147676112968e-05, "loss": 0.6332, "step": 14298 }, { "epoch": 46.881967213114756, "grad_norm": 6.00409460067749, "learning_rate": 1.1490097652727267e-05, "loss": 0.5603, "step": 14299 }, { "epoch": 46.885245901639344, "grad_norm": 7.108243942260742, "learning_rate": 1.1489047612539164e-05, "loss": 0.719, "step": 14300 }, { "epoch": 46.88852459016393, "grad_norm": 6.5476603507995605, "learning_rate": 1.1487997555560503e-05, "loss": 0.5775, "step": 14301 }, { "epoch": 46.89180327868853, "grad_norm": 9.891883850097656, "learning_rate": 1.1486947481803122e-05, "loss": 0.7335, "step": 14302 }, { "epoch": 46.895081967213116, "grad_norm": 7.78061580657959, "learning_rate": 1.148589739127886e-05, "loss": 0.5218, "step": 14303 }, { "epoch": 46.898360655737704, "grad_norm": 6.870102882385254, "learning_rate": 1.148484728399956e-05, "loss": 0.5904, "step": 14304 }, { "epoch": 46.90163934426229, "grad_norm": 8.191605567932129, "learning_rate": 1.1483797159977067e-05, "loss": 0.5221, "step": 14305 }, { "epoch": 46.90491803278689, "grad_norm": 8.55335807800293, "learning_rate": 1.1482747019223212e-05, "loss": 0.5747, "step": 14306 }, { "epoch": 46.90819672131148, "grad_norm": 9.448442459106445, "learning_rate": 1.148169686174985e-05, "loss": 0.8295, "step": 14307 }, { "epoch": 46.911475409836065, "grad_norm": 11.827613830566406, "learning_rate": 1.1480646687568807e-05, "loss": 0.5118, "step": 14308 }, { "epoch": 46.91475409836065, "grad_norm": 11.116729736328125, "learning_rate": 1.147959649669194e-05, "loss": 0.7728, "step": 14309 }, { "epoch": 46.91803278688525, "grad_norm": 13.29793930053711, "learning_rate": 1.1478546289131081e-05, "loss": 0.8905, "step": 14310 }, { "epoch": 46.92131147540984, "grad_norm": 6.040517807006836, "learning_rate": 1.1477496064898078e-05, "loss": 0.3622, "step": 14311 }, { "epoch": 46.924590163934425, "grad_norm": 7.668783187866211, "learning_rate": 1.147644582400477e-05, "loss": 0.5789, "step": 14312 }, { "epoch": 46.927868852459014, "grad_norm": 13.726205825805664, "learning_rate": 1.1475395566462997e-05, "loss": 0.7082, "step": 14313 }, { "epoch": 46.93114754098361, "grad_norm": 8.026822090148926, "learning_rate": 1.1474345292284608e-05, "loss": 0.7229, "step": 14314 }, { "epoch": 46.9344262295082, "grad_norm": 20.391624450683594, "learning_rate": 1.1473295001481439e-05, "loss": 0.4508, "step": 14315 }, { "epoch": 46.937704918032786, "grad_norm": 7.302352428436279, "learning_rate": 1.147224469406534e-05, "loss": 0.7558, "step": 14316 }, { "epoch": 46.940983606557374, "grad_norm": 12.126473426818848, "learning_rate": 1.147119437004815e-05, "loss": 0.5881, "step": 14317 }, { "epoch": 46.94426229508197, "grad_norm": 7.091170787811279, "learning_rate": 1.1470144029441716e-05, "loss": 0.6013, "step": 14318 }, { "epoch": 46.94754098360656, "grad_norm": 5.659376621246338, "learning_rate": 1.1469093672257879e-05, "loss": 0.5539, "step": 14319 }, { "epoch": 46.950819672131146, "grad_norm": 6.541531562805176, "learning_rate": 1.1468043298508482e-05, "loss": 0.7776, "step": 14320 }, { "epoch": 46.954098360655735, "grad_norm": 6.4538655281066895, "learning_rate": 1.146699290820537e-05, "loss": 0.6128, "step": 14321 }, { "epoch": 46.95737704918033, "grad_norm": 7.695260524749756, "learning_rate": 1.146594250136039e-05, "loss": 0.642, "step": 14322 }, { "epoch": 46.96065573770492, "grad_norm": 8.222078323364258, "learning_rate": 1.1464892077985384e-05, "loss": 0.5627, "step": 14323 }, { "epoch": 46.96393442622951, "grad_norm": 6.98315954208374, "learning_rate": 1.1463841638092195e-05, "loss": 0.6805, "step": 14324 }, { "epoch": 46.967213114754095, "grad_norm": 8.01372241973877, "learning_rate": 1.1462791181692672e-05, "loss": 0.7149, "step": 14325 }, { "epoch": 46.97049180327869, "grad_norm": 5.443749904632568, "learning_rate": 1.1461740708798654e-05, "loss": 0.7934, "step": 14326 }, { "epoch": 46.97377049180328, "grad_norm": 12.348593711853027, "learning_rate": 1.146069021942199e-05, "loss": 0.6418, "step": 14327 }, { "epoch": 46.97704918032787, "grad_norm": 10.233881950378418, "learning_rate": 1.1459639713574527e-05, "loss": 0.739, "step": 14328 }, { "epoch": 46.980327868852456, "grad_norm": 6.540158748626709, "learning_rate": 1.1458589191268107e-05, "loss": 0.483, "step": 14329 }, { "epoch": 46.98360655737705, "grad_norm": 9.353594779968262, "learning_rate": 1.145753865251458e-05, "loss": 0.8132, "step": 14330 }, { "epoch": 46.98688524590164, "grad_norm": 8.227631568908691, "learning_rate": 1.1456488097325788e-05, "loss": 0.5252, "step": 14331 }, { "epoch": 46.99016393442623, "grad_norm": 7.086906909942627, "learning_rate": 1.1455437525713577e-05, "loss": 0.9004, "step": 14332 }, { "epoch": 46.993442622950816, "grad_norm": 6.399485111236572, "learning_rate": 1.1454386937689796e-05, "loss": 0.7341, "step": 14333 }, { "epoch": 46.99672131147541, "grad_norm": 6.025208473205566, "learning_rate": 1.145333633326629e-05, "loss": 0.6471, "step": 14334 }, { "epoch": 47.0, "grad_norm": 7.1134209632873535, "learning_rate": 1.1452285712454905e-05, "loss": 0.6114, "step": 14335 }, { "epoch": 47.00327868852459, "grad_norm": 6.759347438812256, "learning_rate": 1.145123507526749e-05, "loss": 0.6177, "step": 14336 }, { "epoch": 47.006557377049184, "grad_norm": 11.653260231018066, "learning_rate": 1.1450184421715889e-05, "loss": 0.7695, "step": 14337 }, { "epoch": 47.00983606557377, "grad_norm": 6.534050941467285, "learning_rate": 1.1449133751811952e-05, "loss": 0.6507, "step": 14338 }, { "epoch": 47.01311475409836, "grad_norm": 6.814029216766357, "learning_rate": 1.1448083065567523e-05, "loss": 0.5229, "step": 14339 }, { "epoch": 47.01639344262295, "grad_norm": 9.86030387878418, "learning_rate": 1.1447032362994455e-05, "loss": 0.7354, "step": 14340 }, { "epoch": 47.019672131147544, "grad_norm": 19.85483741760254, "learning_rate": 1.1445981644104588e-05, "loss": 0.5164, "step": 14341 }, { "epoch": 47.02295081967213, "grad_norm": 8.478702545166016, "learning_rate": 1.144493090890978e-05, "loss": 0.4569, "step": 14342 }, { "epoch": 47.02622950819672, "grad_norm": 14.048447608947754, "learning_rate": 1.1443880157421869e-05, "loss": 0.6443, "step": 14343 }, { "epoch": 47.02950819672131, "grad_norm": 10.597675323486328, "learning_rate": 1.144282938965271e-05, "loss": 0.4261, "step": 14344 }, { "epoch": 47.032786885245905, "grad_norm": 10.959697723388672, "learning_rate": 1.1441778605614152e-05, "loss": 0.5996, "step": 14345 }, { "epoch": 47.03606557377049, "grad_norm": 8.793986320495605, "learning_rate": 1.1440727805318038e-05, "loss": 0.6309, "step": 14346 }, { "epoch": 47.03934426229508, "grad_norm": 7.4329094886779785, "learning_rate": 1.1439676988776217e-05, "loss": 0.4508, "step": 14347 }, { "epoch": 47.04262295081967, "grad_norm": 7.1048383712768555, "learning_rate": 1.1438626156000547e-05, "loss": 0.6627, "step": 14348 }, { "epoch": 47.045901639344265, "grad_norm": 19.79483985900879, "learning_rate": 1.143757530700287e-05, "loss": 0.4455, "step": 14349 }, { "epoch": 47.049180327868854, "grad_norm": 11.038761138916016, "learning_rate": 1.1436524441795033e-05, "loss": 0.5667, "step": 14350 }, { "epoch": 47.05245901639344, "grad_norm": 8.696375846862793, "learning_rate": 1.1435473560388894e-05, "loss": 0.5045, "step": 14351 }, { "epoch": 47.05573770491803, "grad_norm": 8.109519958496094, "learning_rate": 1.1434422662796297e-05, "loss": 0.6216, "step": 14352 }, { "epoch": 47.059016393442626, "grad_norm": 5.417132377624512, "learning_rate": 1.1433371749029093e-05, "loss": 0.5335, "step": 14353 }, { "epoch": 47.062295081967214, "grad_norm": 8.460078239440918, "learning_rate": 1.1432320819099129e-05, "loss": 0.5741, "step": 14354 }, { "epoch": 47.0655737704918, "grad_norm": 8.06650447845459, "learning_rate": 1.1431269873018263e-05, "loss": 0.8172, "step": 14355 }, { "epoch": 47.06885245901639, "grad_norm": 8.105765342712402, "learning_rate": 1.1430218910798337e-05, "loss": 0.6803, "step": 14356 }, { "epoch": 47.072131147540986, "grad_norm": 7.309317588806152, "learning_rate": 1.1429167932451208e-05, "loss": 0.5681, "step": 14357 }, { "epoch": 47.075409836065575, "grad_norm": 16.5599365234375, "learning_rate": 1.1428116937988724e-05, "loss": 0.6183, "step": 14358 }, { "epoch": 47.07868852459016, "grad_norm": 5.429152011871338, "learning_rate": 1.1427065927422737e-05, "loss": 0.5202, "step": 14359 }, { "epoch": 47.08196721311475, "grad_norm": 7.939168930053711, "learning_rate": 1.14260149007651e-05, "loss": 0.7699, "step": 14360 }, { "epoch": 47.08524590163935, "grad_norm": 8.98841381072998, "learning_rate": 1.142496385802766e-05, "loss": 0.6272, "step": 14361 }, { "epoch": 47.088524590163935, "grad_norm": 6.149864196777344, "learning_rate": 1.1423912799222273e-05, "loss": 0.7673, "step": 14362 }, { "epoch": 47.09180327868852, "grad_norm": 12.34862995147705, "learning_rate": 1.1422861724360786e-05, "loss": 0.9918, "step": 14363 }, { "epoch": 47.09508196721311, "grad_norm": 17.206144332885742, "learning_rate": 1.1421810633455052e-05, "loss": 0.9179, "step": 14364 }, { "epoch": 47.09836065573771, "grad_norm": 8.547165870666504, "learning_rate": 1.1420759526516929e-05, "loss": 0.5441, "step": 14365 }, { "epoch": 47.101639344262296, "grad_norm": 8.027731895446777, "learning_rate": 1.1419708403558264e-05, "loss": 0.7174, "step": 14366 }, { "epoch": 47.104918032786884, "grad_norm": 5.9573655128479, "learning_rate": 1.141865726459091e-05, "loss": 0.693, "step": 14367 }, { "epoch": 47.10819672131147, "grad_norm": 8.231425285339355, "learning_rate": 1.141760610962672e-05, "loss": 0.5643, "step": 14368 }, { "epoch": 47.11147540983607, "grad_norm": 6.266592979431152, "learning_rate": 1.1416554938677546e-05, "loss": 0.8429, "step": 14369 }, { "epoch": 47.114754098360656, "grad_norm": 7.413780689239502, "learning_rate": 1.1415503751755245e-05, "loss": 0.7617, "step": 14370 }, { "epoch": 47.118032786885244, "grad_norm": 7.402479648590088, "learning_rate": 1.1414452548871664e-05, "loss": 0.6789, "step": 14371 }, { "epoch": 47.12131147540983, "grad_norm": 14.711150169372559, "learning_rate": 1.1413401330038662e-05, "loss": 0.5995, "step": 14372 }, { "epoch": 47.12459016393443, "grad_norm": 7.244740962982178, "learning_rate": 1.1412350095268092e-05, "loss": 0.862, "step": 14373 }, { "epoch": 47.12786885245902, "grad_norm": 10.278322219848633, "learning_rate": 1.1411298844571803e-05, "loss": 0.8011, "step": 14374 }, { "epoch": 47.131147540983605, "grad_norm": 8.47825813293457, "learning_rate": 1.1410247577961656e-05, "loss": 0.7454, "step": 14375 }, { "epoch": 47.13442622950819, "grad_norm": 12.121207237243652, "learning_rate": 1.1409196295449496e-05, "loss": 0.6916, "step": 14376 }, { "epoch": 47.13770491803279, "grad_norm": 6.772530555725098, "learning_rate": 1.1408144997047189e-05, "loss": 0.541, "step": 14377 }, { "epoch": 47.14098360655738, "grad_norm": 8.637706756591797, "learning_rate": 1.1407093682766576e-05, "loss": 0.5002, "step": 14378 }, { "epoch": 47.144262295081965, "grad_norm": 5.831895351409912, "learning_rate": 1.1406042352619523e-05, "loss": 0.5789, "step": 14379 }, { "epoch": 47.14754098360656, "grad_norm": 7.9723100662231445, "learning_rate": 1.140499100661788e-05, "loss": 0.6535, "step": 14380 }, { "epoch": 47.15081967213115, "grad_norm": 7.357206344604492, "learning_rate": 1.1403939644773503e-05, "loss": 0.4747, "step": 14381 }, { "epoch": 47.15409836065574, "grad_norm": 6.257258892059326, "learning_rate": 1.1402888267098246e-05, "loss": 0.7199, "step": 14382 }, { "epoch": 47.157377049180326, "grad_norm": 7.896188259124756, "learning_rate": 1.1401836873603966e-05, "loss": 0.7908, "step": 14383 }, { "epoch": 47.16065573770492, "grad_norm": 10.194209098815918, "learning_rate": 1.1400785464302514e-05, "loss": 0.6709, "step": 14384 }, { "epoch": 47.16393442622951, "grad_norm": 7.933103561401367, "learning_rate": 1.1399734039205757e-05, "loss": 0.5872, "step": 14385 }, { "epoch": 47.1672131147541, "grad_norm": 10.87348461151123, "learning_rate": 1.1398682598325536e-05, "loss": 0.776, "step": 14386 }, { "epoch": 47.170491803278686, "grad_norm": 11.767049789428711, "learning_rate": 1.139763114167372e-05, "loss": 0.8705, "step": 14387 }, { "epoch": 47.17377049180328, "grad_norm": 10.951568603515625, "learning_rate": 1.1396579669262158e-05, "loss": 0.7282, "step": 14388 }, { "epoch": 47.17704918032787, "grad_norm": 37.128326416015625, "learning_rate": 1.1395528181102704e-05, "loss": 0.542, "step": 14389 }, { "epoch": 47.18032786885246, "grad_norm": 8.707133293151855, "learning_rate": 1.1394476677207224e-05, "loss": 0.4561, "step": 14390 }, { "epoch": 47.18360655737705, "grad_norm": 12.063097953796387, "learning_rate": 1.1393425157587568e-05, "loss": 0.4157, "step": 14391 }, { "epoch": 47.18688524590164, "grad_norm": 9.142691612243652, "learning_rate": 1.1392373622255595e-05, "loss": 0.6064, "step": 14392 }, { "epoch": 47.19016393442623, "grad_norm": 6.489389419555664, "learning_rate": 1.1391322071223159e-05, "loss": 0.6888, "step": 14393 }, { "epoch": 47.19344262295082, "grad_norm": 8.945734977722168, "learning_rate": 1.1390270504502122e-05, "loss": 0.7816, "step": 14394 }, { "epoch": 47.19672131147541, "grad_norm": 9.920121192932129, "learning_rate": 1.138921892210434e-05, "loss": 0.5078, "step": 14395 }, { "epoch": 47.2, "grad_norm": 7.811834335327148, "learning_rate": 1.138816732404167e-05, "loss": 0.6586, "step": 14396 }, { "epoch": 47.20327868852459, "grad_norm": 7.68681526184082, "learning_rate": 1.1387115710325967e-05, "loss": 0.5849, "step": 14397 }, { "epoch": 47.20655737704918, "grad_norm": 7.314218044281006, "learning_rate": 1.1386064080969095e-05, "loss": 0.7159, "step": 14398 }, { "epoch": 47.20983606557377, "grad_norm": 7.630645751953125, "learning_rate": 1.138501243598291e-05, "loss": 0.8489, "step": 14399 }, { "epoch": 47.21311475409836, "grad_norm": 11.12633228302002, "learning_rate": 1.1383960775379268e-05, "loss": 0.5208, "step": 14400 }, { "epoch": 47.21639344262295, "grad_norm": 6.553125381469727, "learning_rate": 1.1382909099170032e-05, "loss": 0.8773, "step": 14401 }, { "epoch": 47.21967213114754, "grad_norm": 10.48959732055664, "learning_rate": 1.1381857407367055e-05, "loss": 0.7536, "step": 14402 }, { "epoch": 47.22295081967213, "grad_norm": 9.107447624206543, "learning_rate": 1.13808056999822e-05, "loss": 1.0009, "step": 14403 }, { "epoch": 47.226229508196724, "grad_norm": 6.553058624267578, "learning_rate": 1.1379753977027323e-05, "loss": 0.8259, "step": 14404 }, { "epoch": 47.22950819672131, "grad_norm": 9.77346134185791, "learning_rate": 1.137870223851429e-05, "loss": 0.4783, "step": 14405 }, { "epoch": 47.2327868852459, "grad_norm": 7.32144021987915, "learning_rate": 1.1377650484454952e-05, "loss": 0.6928, "step": 14406 }, { "epoch": 47.23606557377049, "grad_norm": 11.31878662109375, "learning_rate": 1.1376598714861176e-05, "loss": 0.5993, "step": 14407 }, { "epoch": 47.239344262295084, "grad_norm": 6.449933052062988, "learning_rate": 1.1375546929744814e-05, "loss": 0.6234, "step": 14408 }, { "epoch": 47.24262295081967, "grad_norm": 8.735708236694336, "learning_rate": 1.1374495129117732e-05, "loss": 0.5266, "step": 14409 }, { "epoch": 47.24590163934426, "grad_norm": 6.7100629806518555, "learning_rate": 1.1373443312991787e-05, "loss": 0.6088, "step": 14410 }, { "epoch": 47.24918032786885, "grad_norm": 7.641383171081543, "learning_rate": 1.1372391481378844e-05, "loss": 0.6763, "step": 14411 }, { "epoch": 47.252459016393445, "grad_norm": 7.1328206062316895, "learning_rate": 1.1371339634290757e-05, "loss": 0.3617, "step": 14412 }, { "epoch": 47.25573770491803, "grad_norm": 8.178085327148438, "learning_rate": 1.1370287771739392e-05, "loss": 0.7825, "step": 14413 }, { "epoch": 47.25901639344262, "grad_norm": 16.44693374633789, "learning_rate": 1.1369235893736608e-05, "loss": 0.5551, "step": 14414 }, { "epoch": 47.26229508196721, "grad_norm": 8.583182334899902, "learning_rate": 1.1368184000294263e-05, "loss": 0.6068, "step": 14415 }, { "epoch": 47.265573770491805, "grad_norm": 6.704803943634033, "learning_rate": 1.1367132091424223e-05, "loss": 0.8093, "step": 14416 }, { "epoch": 47.268852459016394, "grad_norm": 8.244104385375977, "learning_rate": 1.1366080167138345e-05, "loss": 0.7, "step": 14417 }, { "epoch": 47.27213114754098, "grad_norm": 26.692026138305664, "learning_rate": 1.1365028227448496e-05, "loss": 0.7543, "step": 14418 }, { "epoch": 47.27540983606557, "grad_norm": 12.572519302368164, "learning_rate": 1.136397627236653e-05, "loss": 0.812, "step": 14419 }, { "epoch": 47.278688524590166, "grad_norm": 8.89319133758545, "learning_rate": 1.1362924301904316e-05, "loss": 0.6807, "step": 14420 }, { "epoch": 47.281967213114754, "grad_norm": 7.814323425292969, "learning_rate": 1.1361872316073714e-05, "loss": 0.5533, "step": 14421 }, { "epoch": 47.28524590163934, "grad_norm": 8.213421821594238, "learning_rate": 1.1360820314886585e-05, "loss": 0.6259, "step": 14422 }, { "epoch": 47.28852459016394, "grad_norm": 10.847583770751953, "learning_rate": 1.1359768298354793e-05, "loss": 0.9259, "step": 14423 }, { "epoch": 47.291803278688526, "grad_norm": 6.457909107208252, "learning_rate": 1.1358716266490198e-05, "loss": 0.7253, "step": 14424 }, { "epoch": 47.295081967213115, "grad_norm": 7.194932460784912, "learning_rate": 1.1357664219304665e-05, "loss": 0.752, "step": 14425 }, { "epoch": 47.2983606557377, "grad_norm": 6.034521579742432, "learning_rate": 1.1356612156810054e-05, "loss": 0.6906, "step": 14426 }, { "epoch": 47.3016393442623, "grad_norm": 11.558934211730957, "learning_rate": 1.1355560079018232e-05, "loss": 0.6918, "step": 14427 }, { "epoch": 47.30491803278689, "grad_norm": 8.308802604675293, "learning_rate": 1.1354507985941062e-05, "loss": 0.7613, "step": 14428 }, { "epoch": 47.308196721311475, "grad_norm": 6.769078254699707, "learning_rate": 1.1353455877590407e-05, "loss": 0.4373, "step": 14429 }, { "epoch": 47.31147540983606, "grad_norm": 10.363658905029297, "learning_rate": 1.1352403753978128e-05, "loss": 0.665, "step": 14430 }, { "epoch": 47.31475409836066, "grad_norm": 10.089784622192383, "learning_rate": 1.135135161511609e-05, "loss": 0.7628, "step": 14431 }, { "epoch": 47.31803278688525, "grad_norm": 6.837869167327881, "learning_rate": 1.1350299461016156e-05, "loss": 0.6285, "step": 14432 }, { "epoch": 47.321311475409836, "grad_norm": 11.690046310424805, "learning_rate": 1.1349247291690198e-05, "loss": 0.8896, "step": 14433 }, { "epoch": 47.324590163934424, "grad_norm": 7.718936920166016, "learning_rate": 1.1348195107150066e-05, "loss": 0.7378, "step": 14434 }, { "epoch": 47.32786885245902, "grad_norm": 13.594975471496582, "learning_rate": 1.1347142907407638e-05, "loss": 0.5453, "step": 14435 }, { "epoch": 47.33114754098361, "grad_norm": 7.675743103027344, "learning_rate": 1.134609069247477e-05, "loss": 0.7875, "step": 14436 }, { "epoch": 47.334426229508196, "grad_norm": 16.169658660888672, "learning_rate": 1.1345038462363332e-05, "loss": 0.7409, "step": 14437 }, { "epoch": 47.337704918032784, "grad_norm": 9.589637756347656, "learning_rate": 1.1343986217085185e-05, "loss": 0.8102, "step": 14438 }, { "epoch": 47.34098360655738, "grad_norm": 7.9305620193481445, "learning_rate": 1.1342933956652199e-05, "loss": 0.6926, "step": 14439 }, { "epoch": 47.34426229508197, "grad_norm": 24.287521362304688, "learning_rate": 1.1341881681076235e-05, "loss": 0.6314, "step": 14440 }, { "epoch": 47.34754098360656, "grad_norm": 6.005110263824463, "learning_rate": 1.1340829390369156e-05, "loss": 0.505, "step": 14441 }, { "epoch": 47.350819672131145, "grad_norm": 7.574885845184326, "learning_rate": 1.1339777084542836e-05, "loss": 0.6831, "step": 14442 }, { "epoch": 47.35409836065574, "grad_norm": 8.365605354309082, "learning_rate": 1.1338724763609134e-05, "loss": 0.8467, "step": 14443 }, { "epoch": 47.35737704918033, "grad_norm": 7.9090471267700195, "learning_rate": 1.1337672427579923e-05, "loss": 0.6959, "step": 14444 }, { "epoch": 47.36065573770492, "grad_norm": 6.161830902099609, "learning_rate": 1.1336620076467057e-05, "loss": 0.7703, "step": 14445 }, { "epoch": 47.363934426229505, "grad_norm": 8.438318252563477, "learning_rate": 1.1335567710282416e-05, "loss": 0.6407, "step": 14446 }, { "epoch": 47.3672131147541, "grad_norm": 16.065338134765625, "learning_rate": 1.1334515329037857e-05, "loss": 0.8017, "step": 14447 }, { "epoch": 47.37049180327869, "grad_norm": 8.969412803649902, "learning_rate": 1.1333462932745252e-05, "loss": 0.6402, "step": 14448 }, { "epoch": 47.37377049180328, "grad_norm": 7.766477584838867, "learning_rate": 1.1332410521416463e-05, "loss": 0.7308, "step": 14449 }, { "epoch": 47.377049180327866, "grad_norm": 6.551641464233398, "learning_rate": 1.1331358095063365e-05, "loss": 0.7886, "step": 14450 }, { "epoch": 47.38032786885246, "grad_norm": 8.714055061340332, "learning_rate": 1.1330305653697815e-05, "loss": 0.6869, "step": 14451 }, { "epoch": 47.38360655737705, "grad_norm": 9.08730697631836, "learning_rate": 1.1329253197331692e-05, "loss": 0.7822, "step": 14452 }, { "epoch": 47.38688524590164, "grad_norm": 7.054843425750732, "learning_rate": 1.132820072597685e-05, "loss": 0.6798, "step": 14453 }, { "epoch": 47.390163934426226, "grad_norm": 9.15948486328125, "learning_rate": 1.1327148239645167e-05, "loss": 0.4602, "step": 14454 }, { "epoch": 47.39344262295082, "grad_norm": 5.888309001922607, "learning_rate": 1.1326095738348508e-05, "loss": 0.2903, "step": 14455 }, { "epoch": 47.39672131147541, "grad_norm": 6.699843406677246, "learning_rate": 1.1325043222098739e-05, "loss": 0.5551, "step": 14456 }, { "epoch": 47.4, "grad_norm": 7.598295211791992, "learning_rate": 1.1323990690907734e-05, "loss": 0.6084, "step": 14457 }, { "epoch": 47.40327868852459, "grad_norm": 9.24414348602295, "learning_rate": 1.1322938144787352e-05, "loss": 0.5026, "step": 14458 }, { "epoch": 47.40655737704918, "grad_norm": 7.2904582023620605, "learning_rate": 1.132188558374947e-05, "loss": 0.8163, "step": 14459 }, { "epoch": 47.40983606557377, "grad_norm": 9.053532600402832, "learning_rate": 1.1320833007805953e-05, "loss": 0.8788, "step": 14460 }, { "epoch": 47.41311475409836, "grad_norm": 6.136829376220703, "learning_rate": 1.1319780416968673e-05, "loss": 0.566, "step": 14461 }, { "epoch": 47.41639344262295, "grad_norm": 7.6558027267456055, "learning_rate": 1.1318727811249493e-05, "loss": 0.5421, "step": 14462 }, { "epoch": 47.41967213114754, "grad_norm": 14.096968650817871, "learning_rate": 1.131767519066029e-05, "loss": 0.7506, "step": 14463 }, { "epoch": 47.42295081967213, "grad_norm": 9.002367973327637, "learning_rate": 1.1316622555212924e-05, "loss": 0.6258, "step": 14464 }, { "epoch": 47.42622950819672, "grad_norm": 13.139172554016113, "learning_rate": 1.1315569904919273e-05, "loss": 0.7759, "step": 14465 }, { "epoch": 47.429508196721315, "grad_norm": 7.820565700531006, "learning_rate": 1.1314517239791204e-05, "loss": 0.8317, "step": 14466 }, { "epoch": 47.4327868852459, "grad_norm": 9.29902172088623, "learning_rate": 1.1313464559840583e-05, "loss": 0.7785, "step": 14467 }, { "epoch": 47.43606557377049, "grad_norm": 5.740995407104492, "learning_rate": 1.1312411865079286e-05, "loss": 0.6315, "step": 14468 }, { "epoch": 47.43934426229508, "grad_norm": 15.860880851745605, "learning_rate": 1.1311359155519179e-05, "loss": 0.7198, "step": 14469 }, { "epoch": 47.442622950819676, "grad_norm": 8.00480842590332, "learning_rate": 1.1310306431172138e-05, "loss": 0.731, "step": 14470 }, { "epoch": 47.445901639344264, "grad_norm": 10.427465438842773, "learning_rate": 1.1309253692050025e-05, "loss": 0.6601, "step": 14471 }, { "epoch": 47.44918032786885, "grad_norm": 16.032556533813477, "learning_rate": 1.1308200938164717e-05, "loss": 0.5546, "step": 14472 }, { "epoch": 47.45245901639344, "grad_norm": 8.605466842651367, "learning_rate": 1.1307148169528085e-05, "loss": 0.6298, "step": 14473 }, { "epoch": 47.455737704918036, "grad_norm": 8.299731254577637, "learning_rate": 1.1306095386151997e-05, "loss": 0.4469, "step": 14474 }, { "epoch": 47.459016393442624, "grad_norm": 9.323274612426758, "learning_rate": 1.1305042588048323e-05, "loss": 0.4859, "step": 14475 }, { "epoch": 47.46229508196721, "grad_norm": 7.314053058624268, "learning_rate": 1.1303989775228942e-05, "loss": 0.4946, "step": 14476 }, { "epoch": 47.4655737704918, "grad_norm": 7.24358606338501, "learning_rate": 1.1302936947705713e-05, "loss": 0.5057, "step": 14477 }, { "epoch": 47.4688524590164, "grad_norm": 10.061698913574219, "learning_rate": 1.1301884105490522e-05, "loss": 0.6742, "step": 14478 }, { "epoch": 47.472131147540985, "grad_norm": 9.0794677734375, "learning_rate": 1.130083124859523e-05, "loss": 0.6013, "step": 14479 }, { "epoch": 47.47540983606557, "grad_norm": 9.584107398986816, "learning_rate": 1.1299778377031713e-05, "loss": 0.6004, "step": 14480 }, { "epoch": 47.47868852459016, "grad_norm": 20.357725143432617, "learning_rate": 1.1298725490811848e-05, "loss": 0.8061, "step": 14481 }, { "epoch": 47.48196721311476, "grad_norm": 7.031754970550537, "learning_rate": 1.12976725899475e-05, "loss": 0.5719, "step": 14482 }, { "epoch": 47.485245901639345, "grad_norm": 6.822843551635742, "learning_rate": 1.1296619674450545e-05, "loss": 0.3989, "step": 14483 }, { "epoch": 47.488524590163934, "grad_norm": 8.939494132995605, "learning_rate": 1.1295566744332853e-05, "loss": 0.5989, "step": 14484 }, { "epoch": 47.49180327868852, "grad_norm": 11.926737785339355, "learning_rate": 1.1294513799606301e-05, "loss": 0.7396, "step": 14485 }, { "epoch": 47.49508196721312, "grad_norm": 7.499920845031738, "learning_rate": 1.1293460840282759e-05, "loss": 0.587, "step": 14486 }, { "epoch": 47.498360655737706, "grad_norm": 6.692158222198486, "learning_rate": 1.1292407866374102e-05, "loss": 0.7394, "step": 14487 }, { "epoch": 47.501639344262294, "grad_norm": 8.160247802734375, "learning_rate": 1.12913548778922e-05, "loss": 0.6356, "step": 14488 }, { "epoch": 47.50491803278688, "grad_norm": 8.697250366210938, "learning_rate": 1.1290301874848932e-05, "loss": 0.5661, "step": 14489 }, { "epoch": 47.50819672131148, "grad_norm": 9.137574195861816, "learning_rate": 1.1289248857256167e-05, "loss": 0.6078, "step": 14490 }, { "epoch": 47.511475409836066, "grad_norm": 10.038643836975098, "learning_rate": 1.1288195825125783e-05, "loss": 0.6475, "step": 14491 }, { "epoch": 47.514754098360655, "grad_norm": 21.270679473876953, "learning_rate": 1.128714277846965e-05, "loss": 0.7573, "step": 14492 }, { "epoch": 47.51803278688524, "grad_norm": 7.2724528312683105, "learning_rate": 1.1286089717299645e-05, "loss": 0.5681, "step": 14493 }, { "epoch": 47.52131147540984, "grad_norm": 8.998860359191895, "learning_rate": 1.128503664162764e-05, "loss": 0.5622, "step": 14494 }, { "epoch": 47.52459016393443, "grad_norm": 5.6100077629089355, "learning_rate": 1.1283983551465512e-05, "loss": 0.4877, "step": 14495 }, { "epoch": 47.527868852459015, "grad_norm": 7.243520736694336, "learning_rate": 1.1282930446825133e-05, "loss": 0.6582, "step": 14496 }, { "epoch": 47.5311475409836, "grad_norm": 6.234350681304932, "learning_rate": 1.128187732771838e-05, "loss": 0.5715, "step": 14497 }, { "epoch": 47.5344262295082, "grad_norm": 32.808963775634766, "learning_rate": 1.1280824194157127e-05, "loss": 0.5663, "step": 14498 }, { "epoch": 47.53770491803279, "grad_norm": 12.314846992492676, "learning_rate": 1.1279771046153249e-05, "loss": 0.5775, "step": 14499 }, { "epoch": 47.540983606557376, "grad_norm": 12.08750057220459, "learning_rate": 1.1278717883718624e-05, "loss": 0.4563, "step": 14500 }, { "epoch": 47.544262295081964, "grad_norm": 11.57666301727295, "learning_rate": 1.127766470686512e-05, "loss": 0.3353, "step": 14501 }, { "epoch": 47.54754098360656, "grad_norm": 9.046636581420898, "learning_rate": 1.1276611515604625e-05, "loss": 0.6662, "step": 14502 }, { "epoch": 47.55081967213115, "grad_norm": 9.549796104431152, "learning_rate": 1.1275558309949004e-05, "loss": 0.7992, "step": 14503 }, { "epoch": 47.554098360655736, "grad_norm": 6.112124919891357, "learning_rate": 1.1274505089910137e-05, "loss": 0.9629, "step": 14504 }, { "epoch": 47.557377049180324, "grad_norm": 22.809240341186523, "learning_rate": 1.12734518554999e-05, "loss": 0.6309, "step": 14505 }, { "epoch": 47.56065573770492, "grad_norm": 7.244279861450195, "learning_rate": 1.1272398606730171e-05, "loss": 0.4462, "step": 14506 }, { "epoch": 47.56393442622951, "grad_norm": 7.080979824066162, "learning_rate": 1.1271345343612823e-05, "loss": 1.0116, "step": 14507 }, { "epoch": 47.5672131147541, "grad_norm": 5.879312992095947, "learning_rate": 1.1270292066159732e-05, "loss": 0.5894, "step": 14508 }, { "epoch": 47.570491803278685, "grad_norm": 7.7225165367126465, "learning_rate": 1.1269238774382782e-05, "loss": 0.5125, "step": 14509 }, { "epoch": 47.57377049180328, "grad_norm": 9.199434280395508, "learning_rate": 1.1268185468293843e-05, "loss": 0.6867, "step": 14510 }, { "epoch": 47.57704918032787, "grad_norm": 9.12990665435791, "learning_rate": 1.1267132147904794e-05, "loss": 0.5573, "step": 14511 }, { "epoch": 47.58032786885246, "grad_norm": 10.077249526977539, "learning_rate": 1.1266078813227512e-05, "loss": 0.5341, "step": 14512 }, { "epoch": 47.58360655737705, "grad_norm": 9.353693962097168, "learning_rate": 1.1265025464273878e-05, "loss": 0.8879, "step": 14513 }, { "epoch": 47.58688524590164, "grad_norm": 7.182474136352539, "learning_rate": 1.1263972101055763e-05, "loss": 0.5033, "step": 14514 }, { "epoch": 47.59016393442623, "grad_norm": 5.765330791473389, "learning_rate": 1.126291872358505e-05, "loss": 0.7014, "step": 14515 }, { "epoch": 47.59344262295082, "grad_norm": 5.946852207183838, "learning_rate": 1.1261865331873613e-05, "loss": 0.5734, "step": 14516 }, { "epoch": 47.59672131147541, "grad_norm": 7.215704917907715, "learning_rate": 1.1260811925933337e-05, "loss": 0.5934, "step": 14517 }, { "epoch": 47.6, "grad_norm": 6.152772903442383, "learning_rate": 1.1259758505776092e-05, "loss": 0.7282, "step": 14518 }, { "epoch": 47.60327868852459, "grad_norm": 7.958339214324951, "learning_rate": 1.1258705071413761e-05, "loss": 0.5689, "step": 14519 }, { "epoch": 47.60655737704918, "grad_norm": 19.729305267333984, "learning_rate": 1.1257651622858224e-05, "loss": 0.5327, "step": 14520 }, { "epoch": 47.609836065573774, "grad_norm": 7.812675476074219, "learning_rate": 1.1256598160121353e-05, "loss": 0.6306, "step": 14521 }, { "epoch": 47.61311475409836, "grad_norm": 5.9514312744140625, "learning_rate": 1.1255544683215033e-05, "loss": 0.8114, "step": 14522 }, { "epoch": 47.61639344262295, "grad_norm": 15.244545936584473, "learning_rate": 1.1254491192151143e-05, "loss": 0.7661, "step": 14523 }, { "epoch": 47.61967213114754, "grad_norm": 10.155989646911621, "learning_rate": 1.125343768694156e-05, "loss": 0.835, "step": 14524 }, { "epoch": 47.622950819672134, "grad_norm": 7.312291145324707, "learning_rate": 1.1252384167598161e-05, "loss": 0.5116, "step": 14525 }, { "epoch": 47.62622950819672, "grad_norm": 7.589524269104004, "learning_rate": 1.1251330634132831e-05, "loss": 0.405, "step": 14526 }, { "epoch": 47.62950819672131, "grad_norm": 7.327742576599121, "learning_rate": 1.1250277086557443e-05, "loss": 0.7237, "step": 14527 }, { "epoch": 47.6327868852459, "grad_norm": 8.61176586151123, "learning_rate": 1.1249223524883888e-05, "loss": 0.5728, "step": 14528 }, { "epoch": 47.636065573770495, "grad_norm": 9.214277267456055, "learning_rate": 1.1248169949124035e-05, "loss": 0.5229, "step": 14529 }, { "epoch": 47.63934426229508, "grad_norm": 6.988722324371338, "learning_rate": 1.1247116359289767e-05, "loss": 0.5548, "step": 14530 }, { "epoch": 47.64262295081967, "grad_norm": 26.04941177368164, "learning_rate": 1.1246062755392966e-05, "loss": 0.8069, "step": 14531 }, { "epoch": 47.64590163934426, "grad_norm": 7.219985008239746, "learning_rate": 1.1245009137445513e-05, "loss": 0.746, "step": 14532 }, { "epoch": 47.649180327868855, "grad_norm": 9.48386001586914, "learning_rate": 1.1243955505459286e-05, "loss": 0.8479, "step": 14533 }, { "epoch": 47.65245901639344, "grad_norm": 6.328178405761719, "learning_rate": 1.1242901859446169e-05, "loss": 0.4578, "step": 14534 }, { "epoch": 47.65573770491803, "grad_norm": 6.285423278808594, "learning_rate": 1.124184819941804e-05, "loss": 0.6868, "step": 14535 }, { "epoch": 47.65901639344262, "grad_norm": 13.495779037475586, "learning_rate": 1.1240794525386778e-05, "loss": 0.635, "step": 14536 }, { "epoch": 47.662295081967216, "grad_norm": 6.964535236358643, "learning_rate": 1.1239740837364272e-05, "loss": 0.5525, "step": 14537 }, { "epoch": 47.665573770491804, "grad_norm": 7.37103796005249, "learning_rate": 1.1238687135362398e-05, "loss": 0.4444, "step": 14538 }, { "epoch": 47.66885245901639, "grad_norm": 16.45014190673828, "learning_rate": 1.1237633419393042e-05, "loss": 0.7421, "step": 14539 }, { "epoch": 47.67213114754098, "grad_norm": 8.931370735168457, "learning_rate": 1.1236579689468076e-05, "loss": 0.5517, "step": 14540 }, { "epoch": 47.675409836065576, "grad_norm": 6.600937366485596, "learning_rate": 1.1235525945599393e-05, "loss": 0.5273, "step": 14541 }, { "epoch": 47.678688524590164, "grad_norm": 7.636086940765381, "learning_rate": 1.123447218779887e-05, "loss": 0.8025, "step": 14542 }, { "epoch": 47.68196721311475, "grad_norm": 8.959543228149414, "learning_rate": 1.1233418416078388e-05, "loss": 0.7158, "step": 14543 }, { "epoch": 47.68524590163934, "grad_norm": 6.584624767303467, "learning_rate": 1.1232364630449832e-05, "loss": 0.5894, "step": 14544 }, { "epoch": 47.68852459016394, "grad_norm": 9.517610549926758, "learning_rate": 1.1231310830925082e-05, "loss": 0.5455, "step": 14545 }, { "epoch": 47.691803278688525, "grad_norm": 7.516571521759033, "learning_rate": 1.1230257017516024e-05, "loss": 0.7103, "step": 14546 }, { "epoch": 47.69508196721311, "grad_norm": 6.985122203826904, "learning_rate": 1.1229203190234537e-05, "loss": 0.4881, "step": 14547 }, { "epoch": 47.6983606557377, "grad_norm": 8.745777130126953, "learning_rate": 1.1228149349092507e-05, "loss": 0.5117, "step": 14548 }, { "epoch": 47.7016393442623, "grad_norm": 8.080376625061035, "learning_rate": 1.1227095494101815e-05, "loss": 0.8031, "step": 14549 }, { "epoch": 47.704918032786885, "grad_norm": 7.108006000518799, "learning_rate": 1.122604162527435e-05, "loss": 0.5458, "step": 14550 }, { "epoch": 47.708196721311474, "grad_norm": 9.408122062683105, "learning_rate": 1.1224987742621984e-05, "loss": 0.5758, "step": 14551 }, { "epoch": 47.71147540983607, "grad_norm": 5.838986873626709, "learning_rate": 1.1223933846156613e-05, "loss": 0.8006, "step": 14552 }, { "epoch": 47.71475409836066, "grad_norm": 8.532038688659668, "learning_rate": 1.1222879935890112e-05, "loss": 0.828, "step": 14553 }, { "epoch": 47.718032786885246, "grad_norm": 8.01078987121582, "learning_rate": 1.1221826011834371e-05, "loss": 0.7592, "step": 14554 }, { "epoch": 47.721311475409834, "grad_norm": 22.065210342407227, "learning_rate": 1.1220772074001272e-05, "loss": 0.504, "step": 14555 }, { "epoch": 47.72459016393443, "grad_norm": 6.335508346557617, "learning_rate": 1.1219718122402695e-05, "loss": 0.4425, "step": 14556 }, { "epoch": 47.72786885245902, "grad_norm": 6.454474925994873, "learning_rate": 1.121866415705053e-05, "loss": 0.7523, "step": 14557 }, { "epoch": 47.731147540983606, "grad_norm": 11.272236824035645, "learning_rate": 1.1217610177956657e-05, "loss": 0.5785, "step": 14558 }, { "epoch": 47.734426229508195, "grad_norm": 7.424137115478516, "learning_rate": 1.1216556185132966e-05, "loss": 0.7725, "step": 14559 }, { "epoch": 47.73770491803279, "grad_norm": 8.653793334960938, "learning_rate": 1.1215502178591337e-05, "loss": 0.7794, "step": 14560 }, { "epoch": 47.74098360655738, "grad_norm": 14.035238265991211, "learning_rate": 1.1214448158343658e-05, "loss": 0.7067, "step": 14561 }, { "epoch": 47.74426229508197, "grad_norm": 6.922148704528809, "learning_rate": 1.1213394124401813e-05, "loss": 0.5367, "step": 14562 }, { "epoch": 47.747540983606555, "grad_norm": 8.09522819519043, "learning_rate": 1.1212340076777691e-05, "loss": 0.8322, "step": 14563 }, { "epoch": 47.75081967213115, "grad_norm": 7.832727909088135, "learning_rate": 1.121128601548317e-05, "loss": 0.7438, "step": 14564 }, { "epoch": 47.75409836065574, "grad_norm": 7.924817085266113, "learning_rate": 1.1210231940530141e-05, "loss": 0.5042, "step": 14565 }, { "epoch": 47.75737704918033, "grad_norm": 6.251512050628662, "learning_rate": 1.1209177851930488e-05, "loss": 0.6137, "step": 14566 }, { "epoch": 47.760655737704916, "grad_norm": 8.96841049194336, "learning_rate": 1.12081237496961e-05, "loss": 0.6754, "step": 14567 }, { "epoch": 47.76393442622951, "grad_norm": 11.492025375366211, "learning_rate": 1.120706963383886e-05, "loss": 0.4252, "step": 14568 }, { "epoch": 47.7672131147541, "grad_norm": 11.132725715637207, "learning_rate": 1.1206015504370653e-05, "loss": 0.669, "step": 14569 }, { "epoch": 47.77049180327869, "grad_norm": 8.291845321655273, "learning_rate": 1.1204961361303368e-05, "loss": 0.8545, "step": 14570 }, { "epoch": 47.773770491803276, "grad_norm": 6.459846019744873, "learning_rate": 1.120390720464889e-05, "loss": 0.5704, "step": 14571 }, { "epoch": 47.77704918032787, "grad_norm": 6.806398391723633, "learning_rate": 1.1202853034419108e-05, "loss": 0.8789, "step": 14572 }, { "epoch": 47.78032786885246, "grad_norm": 7.1875152587890625, "learning_rate": 1.1201798850625906e-05, "loss": 0.7659, "step": 14573 }, { "epoch": 47.78360655737705, "grad_norm": 7.5925774574279785, "learning_rate": 1.1200744653281175e-05, "loss": 0.7283, "step": 14574 }, { "epoch": 47.78688524590164, "grad_norm": 7.358438491821289, "learning_rate": 1.1199690442396795e-05, "loss": 0.594, "step": 14575 }, { "epoch": 47.79016393442623, "grad_norm": 10.564146041870117, "learning_rate": 1.1198636217984662e-05, "loss": 0.5806, "step": 14576 }, { "epoch": 47.79344262295082, "grad_norm": 6.307643890380859, "learning_rate": 1.1197581980056658e-05, "loss": 0.3974, "step": 14577 }, { "epoch": 47.79672131147541, "grad_norm": 9.148856163024902, "learning_rate": 1.1196527728624672e-05, "loss": 0.5119, "step": 14578 }, { "epoch": 47.8, "grad_norm": 6.1910481452941895, "learning_rate": 1.119547346370059e-05, "loss": 0.4112, "step": 14579 }, { "epoch": 47.80327868852459, "grad_norm": 7.699193000793457, "learning_rate": 1.1194419185296305e-05, "loss": 0.5744, "step": 14580 }, { "epoch": 47.80655737704918, "grad_norm": 14.052153587341309, "learning_rate": 1.1193364893423702e-05, "loss": 0.5309, "step": 14581 }, { "epoch": 47.80983606557377, "grad_norm": 9.949347496032715, "learning_rate": 1.1192310588094666e-05, "loss": 0.5339, "step": 14582 }, { "epoch": 47.81311475409836, "grad_norm": 7.470284938812256, "learning_rate": 1.1191256269321092e-05, "loss": 0.7647, "step": 14583 }, { "epoch": 47.81639344262295, "grad_norm": 5.830341339111328, "learning_rate": 1.119020193711486e-05, "loss": 0.679, "step": 14584 }, { "epoch": 47.81967213114754, "grad_norm": 9.37097454071045, "learning_rate": 1.1189147591487867e-05, "loss": 0.4979, "step": 14585 }, { "epoch": 47.82295081967213, "grad_norm": 6.902936935424805, "learning_rate": 1.1188093232451997e-05, "loss": 0.4968, "step": 14586 }, { "epoch": 47.82622950819672, "grad_norm": 8.527812957763672, "learning_rate": 1.1187038860019142e-05, "loss": 0.6629, "step": 14587 }, { "epoch": 47.829508196721314, "grad_norm": 7.644223690032959, "learning_rate": 1.1185984474201188e-05, "loss": 0.534, "step": 14588 }, { "epoch": 47.8327868852459, "grad_norm": 8.904035568237305, "learning_rate": 1.1184930075010025e-05, "loss": 0.6584, "step": 14589 }, { "epoch": 47.83606557377049, "grad_norm": 6.176032066345215, "learning_rate": 1.1183875662457546e-05, "loss": 0.5039, "step": 14590 }, { "epoch": 47.83934426229508, "grad_norm": 12.070159912109375, "learning_rate": 1.118282123655564e-05, "loss": 0.6914, "step": 14591 }, { "epoch": 47.842622950819674, "grad_norm": 7.015324115753174, "learning_rate": 1.118176679731619e-05, "loss": 0.6591, "step": 14592 }, { "epoch": 47.84590163934426, "grad_norm": 11.67524528503418, "learning_rate": 1.1180712344751092e-05, "loss": 0.5247, "step": 14593 }, { "epoch": 47.84918032786885, "grad_norm": 29.422834396362305, "learning_rate": 1.1179657878872236e-05, "loss": 0.5735, "step": 14594 }, { "epoch": 47.85245901639344, "grad_norm": 6.8764729499816895, "learning_rate": 1.117860339969151e-05, "loss": 0.6354, "step": 14595 }, { "epoch": 47.855737704918035, "grad_norm": 8.685946464538574, "learning_rate": 1.1177548907220805e-05, "loss": 0.47, "step": 14596 }, { "epoch": 47.85901639344262, "grad_norm": 7.6567888259887695, "learning_rate": 1.1176494401472013e-05, "loss": 0.5619, "step": 14597 }, { "epoch": 47.86229508196721, "grad_norm": 8.124894142150879, "learning_rate": 1.1175439882457022e-05, "loss": 0.6962, "step": 14598 }, { "epoch": 47.86557377049181, "grad_norm": 6.514386177062988, "learning_rate": 1.1174385350187723e-05, "loss": 0.8953, "step": 14599 }, { "epoch": 47.868852459016395, "grad_norm": 7.05754280090332, "learning_rate": 1.1173330804676012e-05, "loss": 0.5104, "step": 14600 }, { "epoch": 47.87213114754098, "grad_norm": 6.400777816772461, "learning_rate": 1.1172276245933772e-05, "loss": 0.75, "step": 14601 }, { "epoch": 47.87540983606557, "grad_norm": 7.076447486877441, "learning_rate": 1.1171221673972903e-05, "loss": 0.6051, "step": 14602 }, { "epoch": 47.87868852459017, "grad_norm": 7.690132141113281, "learning_rate": 1.1170167088805289e-05, "loss": 0.902, "step": 14603 }, { "epoch": 47.881967213114756, "grad_norm": 8.553336143493652, "learning_rate": 1.1169112490442826e-05, "loss": 0.4224, "step": 14604 }, { "epoch": 47.885245901639344, "grad_norm": 7.091878890991211, "learning_rate": 1.11680578788974e-05, "loss": 0.6902, "step": 14605 }, { "epoch": 47.88852459016393, "grad_norm": 6.242142200469971, "learning_rate": 1.1167003254180913e-05, "loss": 0.7043, "step": 14606 }, { "epoch": 47.89180327868853, "grad_norm": 7.945483207702637, "learning_rate": 1.1165948616305249e-05, "loss": 0.7636, "step": 14607 }, { "epoch": 47.895081967213116, "grad_norm": 7.335243225097656, "learning_rate": 1.11648939652823e-05, "loss": 0.5497, "step": 14608 }, { "epoch": 47.898360655737704, "grad_norm": 12.003118515014648, "learning_rate": 1.116383930112396e-05, "loss": 0.5894, "step": 14609 }, { "epoch": 47.90163934426229, "grad_norm": 9.2124605178833, "learning_rate": 1.1162784623842123e-05, "loss": 0.7408, "step": 14610 }, { "epoch": 47.90491803278689, "grad_norm": 10.438623428344727, "learning_rate": 1.1161729933448681e-05, "loss": 0.6623, "step": 14611 }, { "epoch": 47.90819672131148, "grad_norm": 5.739452838897705, "learning_rate": 1.1160675229955525e-05, "loss": 0.4339, "step": 14612 }, { "epoch": 47.911475409836065, "grad_norm": 8.94340991973877, "learning_rate": 1.1159620513374547e-05, "loss": 0.5207, "step": 14613 }, { "epoch": 47.91475409836065, "grad_norm": 11.35814094543457, "learning_rate": 1.1158565783717646e-05, "loss": 0.6036, "step": 14614 }, { "epoch": 47.91803278688525, "grad_norm": 8.65925121307373, "learning_rate": 1.115751104099671e-05, "loss": 0.4794, "step": 14615 }, { "epoch": 47.92131147540984, "grad_norm": 6.893983840942383, "learning_rate": 1.1156456285223634e-05, "loss": 0.5137, "step": 14616 }, { "epoch": 47.924590163934425, "grad_norm": 6.90493631362915, "learning_rate": 1.1155401516410307e-05, "loss": 0.58, "step": 14617 }, { "epoch": 47.927868852459014, "grad_norm": 49.23220443725586, "learning_rate": 1.1154346734568629e-05, "loss": 0.666, "step": 14618 }, { "epoch": 47.93114754098361, "grad_norm": 14.828032493591309, "learning_rate": 1.1153291939710494e-05, "loss": 0.7086, "step": 14619 }, { "epoch": 47.9344262295082, "grad_norm": 17.698549270629883, "learning_rate": 1.1152237131847793e-05, "loss": 0.7869, "step": 14620 }, { "epoch": 47.937704918032786, "grad_norm": 8.786402702331543, "learning_rate": 1.1151182310992417e-05, "loss": 0.4597, "step": 14621 }, { "epoch": 47.940983606557374, "grad_norm": 11.543583869934082, "learning_rate": 1.1150127477156266e-05, "loss": 0.7835, "step": 14622 }, { "epoch": 47.94426229508197, "grad_norm": 20.309186935424805, "learning_rate": 1.114907263035123e-05, "loss": 0.6852, "step": 14623 }, { "epoch": 47.94754098360656, "grad_norm": 6.587282180786133, "learning_rate": 1.1148017770589209e-05, "loss": 0.6704, "step": 14624 }, { "epoch": 47.950819672131146, "grad_norm": 7.393466949462891, "learning_rate": 1.1146962897882088e-05, "loss": 0.6361, "step": 14625 }, { "epoch": 47.954098360655735, "grad_norm": 10.804945945739746, "learning_rate": 1.1145908012241772e-05, "loss": 0.717, "step": 14626 }, { "epoch": 47.95737704918033, "grad_norm": 6.276712417602539, "learning_rate": 1.114485311368015e-05, "loss": 0.7423, "step": 14627 }, { "epoch": 47.96065573770492, "grad_norm": 9.443087577819824, "learning_rate": 1.1143798202209122e-05, "loss": 0.731, "step": 14628 }, { "epoch": 47.96393442622951, "grad_norm": 10.294981002807617, "learning_rate": 1.1142743277840575e-05, "loss": 0.4747, "step": 14629 }, { "epoch": 47.967213114754095, "grad_norm": 8.175029754638672, "learning_rate": 1.1141688340586415e-05, "loss": 0.7142, "step": 14630 }, { "epoch": 47.97049180327869, "grad_norm": 8.608470916748047, "learning_rate": 1.1140633390458526e-05, "loss": 0.5284, "step": 14631 }, { "epoch": 47.97377049180328, "grad_norm": 6.855884075164795, "learning_rate": 1.1139578427468813e-05, "loss": 0.9712, "step": 14632 }, { "epoch": 47.97704918032787, "grad_norm": 6.866791248321533, "learning_rate": 1.113852345162917e-05, "loss": 0.6234, "step": 14633 }, { "epoch": 47.980327868852456, "grad_norm": 5.635359764099121, "learning_rate": 1.1137468462951489e-05, "loss": 0.8224, "step": 14634 }, { "epoch": 47.98360655737705, "grad_norm": 8.967374801635742, "learning_rate": 1.1136413461447669e-05, "loss": 0.5919, "step": 14635 }, { "epoch": 47.98688524590164, "grad_norm": 8.606772422790527, "learning_rate": 1.1135358447129603e-05, "loss": 0.7715, "step": 14636 }, { "epoch": 47.99016393442623, "grad_norm": 8.605791091918945, "learning_rate": 1.1134303420009194e-05, "loss": 0.527, "step": 14637 }, { "epoch": 47.993442622950816, "grad_norm": 9.393595695495605, "learning_rate": 1.1133248380098332e-05, "loss": 0.7062, "step": 14638 }, { "epoch": 47.99672131147541, "grad_norm": 10.744479179382324, "learning_rate": 1.1132193327408918e-05, "loss": 0.5483, "step": 14639 }, { "epoch": 48.0, "grad_norm": 5.93894624710083, "learning_rate": 1.1131138261952845e-05, "loss": 0.5912, "step": 14640 }, { "epoch": 48.00327868852459, "grad_norm": 10.989027976989746, "learning_rate": 1.1130083183742011e-05, "loss": 0.9397, "step": 14641 }, { "epoch": 48.006557377049184, "grad_norm": 6.940354824066162, "learning_rate": 1.1129028092788319e-05, "loss": 0.5079, "step": 14642 }, { "epoch": 48.00983606557377, "grad_norm": 7.42008113861084, "learning_rate": 1.1127972989103657e-05, "loss": 0.7989, "step": 14643 }, { "epoch": 48.01311475409836, "grad_norm": 7.84572696685791, "learning_rate": 1.1126917872699928e-05, "loss": 0.4275, "step": 14644 }, { "epoch": 48.01639344262295, "grad_norm": 6.14077091217041, "learning_rate": 1.1125862743589029e-05, "loss": 0.7295, "step": 14645 }, { "epoch": 48.019672131147544, "grad_norm": 5.995779991149902, "learning_rate": 1.1124807601782856e-05, "loss": 0.4858, "step": 14646 }, { "epoch": 48.02295081967213, "grad_norm": 7.845939636230469, "learning_rate": 1.1123752447293307e-05, "loss": 0.805, "step": 14647 }, { "epoch": 48.02622950819672, "grad_norm": 8.060754776000977, "learning_rate": 1.1122697280132284e-05, "loss": 0.6898, "step": 14648 }, { "epoch": 48.02950819672131, "grad_norm": 5.355434417724609, "learning_rate": 1.1121642100311678e-05, "loss": 0.5796, "step": 14649 }, { "epoch": 48.032786885245905, "grad_norm": 5.564085483551025, "learning_rate": 1.1120586907843396e-05, "loss": 0.6183, "step": 14650 }, { "epoch": 48.03606557377049, "grad_norm": 8.071889877319336, "learning_rate": 1.1119531702739325e-05, "loss": 0.5232, "step": 14651 }, { "epoch": 48.03934426229508, "grad_norm": 9.110895156860352, "learning_rate": 1.1118476485011375e-05, "loss": 0.4745, "step": 14652 }, { "epoch": 48.04262295081967, "grad_norm": 7.591414451599121, "learning_rate": 1.1117421254671439e-05, "loss": 0.7852, "step": 14653 }, { "epoch": 48.045901639344265, "grad_norm": 7.388545513153076, "learning_rate": 1.1116366011731416e-05, "loss": 0.6805, "step": 14654 }, { "epoch": 48.049180327868854, "grad_norm": 6.14169979095459, "learning_rate": 1.1115310756203203e-05, "loss": 0.4626, "step": 14655 }, { "epoch": 48.05245901639344, "grad_norm": 6.164710521697998, "learning_rate": 1.1114255488098706e-05, "loss": 0.6816, "step": 14656 }, { "epoch": 48.05573770491803, "grad_norm": 6.288747787475586, "learning_rate": 1.1113200207429818e-05, "loss": 0.8646, "step": 14657 }, { "epoch": 48.059016393442626, "grad_norm": 13.360032081604004, "learning_rate": 1.111214491420844e-05, "loss": 0.5716, "step": 14658 }, { "epoch": 48.062295081967214, "grad_norm": 8.216853141784668, "learning_rate": 1.1111089608446473e-05, "loss": 0.7722, "step": 14659 }, { "epoch": 48.0655737704918, "grad_norm": 6.392935752868652, "learning_rate": 1.1110034290155813e-05, "loss": 0.6835, "step": 14660 }, { "epoch": 48.06885245901639, "grad_norm": 6.495833396911621, "learning_rate": 1.1108978959348364e-05, "loss": 0.6105, "step": 14661 }, { "epoch": 48.072131147540986, "grad_norm": 7.304012775421143, "learning_rate": 1.1107923616036023e-05, "loss": 0.673, "step": 14662 }, { "epoch": 48.075409836065575, "grad_norm": 9.152356147766113, "learning_rate": 1.1106868260230693e-05, "loss": 0.4971, "step": 14663 }, { "epoch": 48.07868852459016, "grad_norm": 9.044902801513672, "learning_rate": 1.110581289194427e-05, "loss": 0.5391, "step": 14664 }, { "epoch": 48.08196721311475, "grad_norm": 6.842961311340332, "learning_rate": 1.110475751118866e-05, "loss": 0.691, "step": 14665 }, { "epoch": 48.08524590163935, "grad_norm": 7.012687683105469, "learning_rate": 1.1103702117975756e-05, "loss": 0.7175, "step": 14666 }, { "epoch": 48.088524590163935, "grad_norm": 5.79514217376709, "learning_rate": 1.1102646712317468e-05, "loss": 0.5115, "step": 14667 }, { "epoch": 48.09180327868852, "grad_norm": 7.11339807510376, "learning_rate": 1.1101591294225689e-05, "loss": 0.6316, "step": 14668 }, { "epoch": 48.09508196721311, "grad_norm": 5.393686294555664, "learning_rate": 1.1100535863712321e-05, "loss": 0.7302, "step": 14669 }, { "epoch": 48.09836065573771, "grad_norm": 8.819726943969727, "learning_rate": 1.1099480420789271e-05, "loss": 0.5934, "step": 14670 }, { "epoch": 48.101639344262296, "grad_norm": 19.536169052124023, "learning_rate": 1.1098424965468432e-05, "loss": 0.567, "step": 14671 }, { "epoch": 48.104918032786884, "grad_norm": 7.155523777008057, "learning_rate": 1.1097369497761713e-05, "loss": 0.52, "step": 14672 }, { "epoch": 48.10819672131147, "grad_norm": 6.083095073699951, "learning_rate": 1.1096314017681009e-05, "loss": 0.7507, "step": 14673 }, { "epoch": 48.11147540983607, "grad_norm": 10.32831859588623, "learning_rate": 1.1095258525238228e-05, "loss": 0.4191, "step": 14674 }, { "epoch": 48.114754098360656, "grad_norm": 9.355807304382324, "learning_rate": 1.1094203020445267e-05, "loss": 0.617, "step": 14675 }, { "epoch": 48.118032786885244, "grad_norm": 5.045515060424805, "learning_rate": 1.1093147503314027e-05, "loss": 0.6925, "step": 14676 }, { "epoch": 48.12131147540983, "grad_norm": 8.240287780761719, "learning_rate": 1.1092091973856416e-05, "loss": 0.5485, "step": 14677 }, { "epoch": 48.12459016393443, "grad_norm": 6.438887119293213, "learning_rate": 1.109103643208433e-05, "loss": 0.6555, "step": 14678 }, { "epoch": 48.12786885245902, "grad_norm": 6.454638957977295, "learning_rate": 1.1089980878009675e-05, "loss": 0.5136, "step": 14679 }, { "epoch": 48.131147540983605, "grad_norm": 6.392174243927002, "learning_rate": 1.1088925311644351e-05, "loss": 0.6961, "step": 14680 }, { "epoch": 48.13442622950819, "grad_norm": 5.571985721588135, "learning_rate": 1.1087869733000262e-05, "loss": 0.3793, "step": 14681 }, { "epoch": 48.13770491803279, "grad_norm": 8.41319465637207, "learning_rate": 1.1086814142089313e-05, "loss": 0.5587, "step": 14682 }, { "epoch": 48.14098360655738, "grad_norm": 6.5944342613220215, "learning_rate": 1.1085758538923401e-05, "loss": 0.8606, "step": 14683 }, { "epoch": 48.144262295081965, "grad_norm": 6.321970462799072, "learning_rate": 1.1084702923514437e-05, "loss": 0.775, "step": 14684 }, { "epoch": 48.14754098360656, "grad_norm": 9.975909233093262, "learning_rate": 1.1083647295874318e-05, "loss": 0.5554, "step": 14685 }, { "epoch": 48.15081967213115, "grad_norm": 7.544537544250488, "learning_rate": 1.1082591656014947e-05, "loss": 0.9639, "step": 14686 }, { "epoch": 48.15409836065574, "grad_norm": 6.819507122039795, "learning_rate": 1.108153600394823e-05, "loss": 0.5823, "step": 14687 }, { "epoch": 48.157377049180326, "grad_norm": 6.814903259277344, "learning_rate": 1.108048033968607e-05, "loss": 0.8151, "step": 14688 }, { "epoch": 48.16065573770492, "grad_norm": 9.896064758300781, "learning_rate": 1.1079424663240372e-05, "loss": 0.4014, "step": 14689 }, { "epoch": 48.16393442622951, "grad_norm": 11.038471221923828, "learning_rate": 1.107836897462304e-05, "loss": 0.6453, "step": 14690 }, { "epoch": 48.1672131147541, "grad_norm": 10.36203384399414, "learning_rate": 1.1077313273845972e-05, "loss": 0.6568, "step": 14691 }, { "epoch": 48.170491803278686, "grad_norm": 7.676372528076172, "learning_rate": 1.107625756092108e-05, "loss": 0.5971, "step": 14692 }, { "epoch": 48.17377049180328, "grad_norm": 11.249423027038574, "learning_rate": 1.1075201835860266e-05, "loss": 0.5237, "step": 14693 }, { "epoch": 48.17704918032787, "grad_norm": 5.024750709533691, "learning_rate": 1.107414609867543e-05, "loss": 0.869, "step": 14694 }, { "epoch": 48.18032786885246, "grad_norm": 6.3886003494262695, "learning_rate": 1.1073090349378482e-05, "loss": 0.5232, "step": 14695 }, { "epoch": 48.18360655737705, "grad_norm": 6.692012310028076, "learning_rate": 1.1072034587981323e-05, "loss": 0.348, "step": 14696 }, { "epoch": 48.18688524590164, "grad_norm": 6.862906455993652, "learning_rate": 1.1070978814495862e-05, "loss": 0.7606, "step": 14697 }, { "epoch": 48.19016393442623, "grad_norm": 6.593994617462158, "learning_rate": 1.1069923028934e-05, "loss": 0.8429, "step": 14698 }, { "epoch": 48.19344262295082, "grad_norm": 6.287765979766846, "learning_rate": 1.1068867231307643e-05, "loss": 0.7902, "step": 14699 }, { "epoch": 48.19672131147541, "grad_norm": 6.3164896965026855, "learning_rate": 1.1067811421628696e-05, "loss": 0.4324, "step": 14700 }, { "epoch": 48.2, "grad_norm": 9.643847465515137, "learning_rate": 1.1066755599909065e-05, "loss": 0.5741, "step": 14701 }, { "epoch": 48.20327868852459, "grad_norm": 7.376796245574951, "learning_rate": 1.1065699766160658e-05, "loss": 0.5711, "step": 14702 }, { "epoch": 48.20655737704918, "grad_norm": 14.958483695983887, "learning_rate": 1.1064643920395375e-05, "loss": 0.5182, "step": 14703 }, { "epoch": 48.20983606557377, "grad_norm": 6.969545364379883, "learning_rate": 1.1063588062625128e-05, "loss": 0.7924, "step": 14704 }, { "epoch": 48.21311475409836, "grad_norm": 15.456167221069336, "learning_rate": 1.1062532192861816e-05, "loss": 0.7567, "step": 14705 }, { "epoch": 48.21639344262295, "grad_norm": 11.470434188842773, "learning_rate": 1.1061476311117351e-05, "loss": 0.7952, "step": 14706 }, { "epoch": 48.21967213114754, "grad_norm": 6.268270015716553, "learning_rate": 1.1060420417403636e-05, "loss": 0.8699, "step": 14707 }, { "epoch": 48.22295081967213, "grad_norm": 13.025057792663574, "learning_rate": 1.1059364511732583e-05, "loss": 0.4492, "step": 14708 }, { "epoch": 48.226229508196724, "grad_norm": 6.498733043670654, "learning_rate": 1.1058308594116088e-05, "loss": 0.6797, "step": 14709 }, { "epoch": 48.22950819672131, "grad_norm": 6.43353271484375, "learning_rate": 1.1057252664566066e-05, "loss": 0.6816, "step": 14710 }, { "epoch": 48.2327868852459, "grad_norm": 7.171774387359619, "learning_rate": 1.1056196723094419e-05, "loss": 0.8766, "step": 14711 }, { "epoch": 48.23606557377049, "grad_norm": 7.970706462860107, "learning_rate": 1.1055140769713057e-05, "loss": 0.8115, "step": 14712 }, { "epoch": 48.239344262295084, "grad_norm": 6.001133441925049, "learning_rate": 1.1054084804433886e-05, "loss": 0.5815, "step": 14713 }, { "epoch": 48.24262295081967, "grad_norm": 8.097561836242676, "learning_rate": 1.105302882726881e-05, "loss": 0.4779, "step": 14714 }, { "epoch": 48.24590163934426, "grad_norm": 10.138050079345703, "learning_rate": 1.1051972838229743e-05, "loss": 0.4974, "step": 14715 }, { "epoch": 48.24918032786885, "grad_norm": 8.247194290161133, "learning_rate": 1.1050916837328587e-05, "loss": 0.8132, "step": 14716 }, { "epoch": 48.252459016393445, "grad_norm": 5.374180793762207, "learning_rate": 1.104986082457725e-05, "loss": 0.4594, "step": 14717 }, { "epoch": 48.25573770491803, "grad_norm": 5.753986358642578, "learning_rate": 1.104880479998764e-05, "loss": 0.6408, "step": 14718 }, { "epoch": 48.25901639344262, "grad_norm": 7.261383056640625, "learning_rate": 1.1047748763571668e-05, "loss": 0.5455, "step": 14719 }, { "epoch": 48.26229508196721, "grad_norm": 9.826074600219727, "learning_rate": 1.1046692715341239e-05, "loss": 0.4684, "step": 14720 }, { "epoch": 48.265573770491805, "grad_norm": 6.2407355308532715, "learning_rate": 1.1045636655308261e-05, "loss": 0.5266, "step": 14721 }, { "epoch": 48.268852459016394, "grad_norm": 6.399536609649658, "learning_rate": 1.104458058348464e-05, "loss": 0.7423, "step": 14722 }, { "epoch": 48.27213114754098, "grad_norm": 6.460278511047363, "learning_rate": 1.1043524499882288e-05, "loss": 0.6421, "step": 14723 }, { "epoch": 48.27540983606557, "grad_norm": 6.681336402893066, "learning_rate": 1.1042468404513114e-05, "loss": 0.6274, "step": 14724 }, { "epoch": 48.278688524590166, "grad_norm": 13.537322044372559, "learning_rate": 1.1041412297389023e-05, "loss": 0.5766, "step": 14725 }, { "epoch": 48.281967213114754, "grad_norm": 6.484403610229492, "learning_rate": 1.1040356178521928e-05, "loss": 0.5967, "step": 14726 }, { "epoch": 48.28524590163934, "grad_norm": 8.426329612731934, "learning_rate": 1.1039300047923733e-05, "loss": 0.7899, "step": 14727 }, { "epoch": 48.28852459016394, "grad_norm": 9.804079055786133, "learning_rate": 1.103824390560635e-05, "loss": 0.4306, "step": 14728 }, { "epoch": 48.291803278688526, "grad_norm": 5.445385932922363, "learning_rate": 1.1037187751581686e-05, "loss": 0.8144, "step": 14729 }, { "epoch": 48.295081967213115, "grad_norm": 10.700357437133789, "learning_rate": 1.1036131585861655e-05, "loss": 0.4083, "step": 14730 }, { "epoch": 48.2983606557377, "grad_norm": 15.693124771118164, "learning_rate": 1.103507540845816e-05, "loss": 0.4578, "step": 14731 }, { "epoch": 48.3016393442623, "grad_norm": 13.170519828796387, "learning_rate": 1.1034019219383116e-05, "loss": 0.5433, "step": 14732 }, { "epoch": 48.30491803278689, "grad_norm": 7.358686923980713, "learning_rate": 1.1032963018648428e-05, "loss": 0.6661, "step": 14733 }, { "epoch": 48.308196721311475, "grad_norm": 8.525522232055664, "learning_rate": 1.103190680626601e-05, "loss": 0.6165, "step": 14734 }, { "epoch": 48.31147540983606, "grad_norm": 7.189215183258057, "learning_rate": 1.103085058224777e-05, "loss": 0.5854, "step": 14735 }, { "epoch": 48.31475409836066, "grad_norm": 7.792847633361816, "learning_rate": 1.1029794346605619e-05, "loss": 0.7179, "step": 14736 }, { "epoch": 48.31803278688525, "grad_norm": 6.228325843811035, "learning_rate": 1.1028738099351463e-05, "loss": 0.5558, "step": 14737 }, { "epoch": 48.321311475409836, "grad_norm": 8.050064086914062, "learning_rate": 1.1027681840497215e-05, "loss": 0.6183, "step": 14738 }, { "epoch": 48.324590163934424, "grad_norm": 8.968310356140137, "learning_rate": 1.1026625570054787e-05, "loss": 0.8092, "step": 14739 }, { "epoch": 48.32786885245902, "grad_norm": 5.195819854736328, "learning_rate": 1.1025569288036088e-05, "loss": 0.4682, "step": 14740 }, { "epoch": 48.33114754098361, "grad_norm": 7.154491424560547, "learning_rate": 1.102451299445303e-05, "loss": 0.5885, "step": 14741 }, { "epoch": 48.334426229508196, "grad_norm": 7.494875907897949, "learning_rate": 1.102345668931752e-05, "loss": 0.5443, "step": 14742 }, { "epoch": 48.337704918032784, "grad_norm": 7.295029640197754, "learning_rate": 1.1022400372641476e-05, "loss": 0.6822, "step": 14743 }, { "epoch": 48.34098360655738, "grad_norm": 9.278170585632324, "learning_rate": 1.1021344044436801e-05, "loss": 0.5224, "step": 14744 }, { "epoch": 48.34426229508197, "grad_norm": 5.859807014465332, "learning_rate": 1.1020287704715413e-05, "loss": 0.5629, "step": 14745 }, { "epoch": 48.34754098360656, "grad_norm": 6.1730780601501465, "learning_rate": 1.1019231353489217e-05, "loss": 0.6302, "step": 14746 }, { "epoch": 48.350819672131145, "grad_norm": 27.248323440551758, "learning_rate": 1.101817499077013e-05, "loss": 0.5232, "step": 14747 }, { "epoch": 48.35409836065574, "grad_norm": 6.538815498352051, "learning_rate": 1.1017118616570059e-05, "loss": 0.4765, "step": 14748 }, { "epoch": 48.35737704918033, "grad_norm": 6.489893913269043, "learning_rate": 1.1016062230900921e-05, "loss": 0.474, "step": 14749 }, { "epoch": 48.36065573770492, "grad_norm": 6.452365398406982, "learning_rate": 1.1015005833774624e-05, "loss": 0.7294, "step": 14750 }, { "epoch": 48.363934426229505, "grad_norm": 7.705890655517578, "learning_rate": 1.101394942520308e-05, "loss": 0.6015, "step": 14751 }, { "epoch": 48.3672131147541, "grad_norm": 6.063785552978516, "learning_rate": 1.1012893005198202e-05, "loss": 0.8505, "step": 14752 }, { "epoch": 48.37049180327869, "grad_norm": 7.948231220245361, "learning_rate": 1.1011836573771901e-05, "loss": 0.56, "step": 14753 }, { "epoch": 48.37377049180328, "grad_norm": 7.366585731506348, "learning_rate": 1.1010780130936094e-05, "loss": 0.8743, "step": 14754 }, { "epoch": 48.377049180327866, "grad_norm": 6.6305742263793945, "learning_rate": 1.1009723676702689e-05, "loss": 0.835, "step": 14755 }, { "epoch": 48.38032786885246, "grad_norm": 6.2967939376831055, "learning_rate": 1.1008667211083599e-05, "loss": 0.4948, "step": 14756 }, { "epoch": 48.38360655737705, "grad_norm": 9.713319778442383, "learning_rate": 1.1007610734090736e-05, "loss": 0.5133, "step": 14757 }, { "epoch": 48.38688524590164, "grad_norm": 7.996016502380371, "learning_rate": 1.1006554245736018e-05, "loss": 0.6406, "step": 14758 }, { "epoch": 48.390163934426226, "grad_norm": 5.965843200683594, "learning_rate": 1.1005497746031351e-05, "loss": 0.6849, "step": 14759 }, { "epoch": 48.39344262295082, "grad_norm": 7.1849894523620605, "learning_rate": 1.1004441234988654e-05, "loss": 0.59, "step": 14760 }, { "epoch": 48.39672131147541, "grad_norm": 7.596570014953613, "learning_rate": 1.1003384712619838e-05, "loss": 0.6589, "step": 14761 }, { "epoch": 48.4, "grad_norm": 5.351001262664795, "learning_rate": 1.1002328178936813e-05, "loss": 0.5778, "step": 14762 }, { "epoch": 48.40327868852459, "grad_norm": 39.92898941040039, "learning_rate": 1.1001271633951497e-05, "loss": 0.5758, "step": 14763 }, { "epoch": 48.40655737704918, "grad_norm": 6.061538219451904, "learning_rate": 1.1000215077675802e-05, "loss": 0.4585, "step": 14764 }, { "epoch": 48.40983606557377, "grad_norm": 8.773783683776855, "learning_rate": 1.0999158510121643e-05, "loss": 0.525, "step": 14765 }, { "epoch": 48.41311475409836, "grad_norm": 9.935824394226074, "learning_rate": 1.0998101931300931e-05, "loss": 0.6201, "step": 14766 }, { "epoch": 48.41639344262295, "grad_norm": 10.966333389282227, "learning_rate": 1.0997045341225584e-05, "loss": 0.7334, "step": 14767 }, { "epoch": 48.41967213114754, "grad_norm": 7.599612236022949, "learning_rate": 1.0995988739907513e-05, "loss": 0.5277, "step": 14768 }, { "epoch": 48.42295081967213, "grad_norm": 7.239087104797363, "learning_rate": 1.0994932127358635e-05, "loss": 0.4525, "step": 14769 }, { "epoch": 48.42622950819672, "grad_norm": 6.278576850891113, "learning_rate": 1.099387550359086e-05, "loss": 0.7141, "step": 14770 }, { "epoch": 48.429508196721315, "grad_norm": 6.488664627075195, "learning_rate": 1.099281886861611e-05, "loss": 0.654, "step": 14771 }, { "epoch": 48.4327868852459, "grad_norm": 5.766851425170898, "learning_rate": 1.099176222244629e-05, "loss": 0.6703, "step": 14772 }, { "epoch": 48.43606557377049, "grad_norm": 5.841326713562012, "learning_rate": 1.0990705565093323e-05, "loss": 0.6724, "step": 14773 }, { "epoch": 48.43934426229508, "grad_norm": 6.417330741882324, "learning_rate": 1.0989648896569118e-05, "loss": 0.626, "step": 14774 }, { "epoch": 48.442622950819676, "grad_norm": 6.810546875, "learning_rate": 1.0988592216885595e-05, "loss": 0.2711, "step": 14775 }, { "epoch": 48.445901639344264, "grad_norm": 13.73640251159668, "learning_rate": 1.0987535526054664e-05, "loss": 0.8009, "step": 14776 }, { "epoch": 48.44918032786885, "grad_norm": 6.256479740142822, "learning_rate": 1.0986478824088245e-05, "loss": 0.4921, "step": 14777 }, { "epoch": 48.45245901639344, "grad_norm": 11.04532241821289, "learning_rate": 1.0985422110998252e-05, "loss": 0.6068, "step": 14778 }, { "epoch": 48.455737704918036, "grad_norm": 6.018112659454346, "learning_rate": 1.0984365386796598e-05, "loss": 0.5213, "step": 14779 }, { "epoch": 48.459016393442624, "grad_norm": 6.341716289520264, "learning_rate": 1.0983308651495204e-05, "loss": 0.6054, "step": 14780 }, { "epoch": 48.46229508196721, "grad_norm": 22.477018356323242, "learning_rate": 1.0982251905105981e-05, "loss": 0.7512, "step": 14781 }, { "epoch": 48.4655737704918, "grad_norm": 6.049835205078125, "learning_rate": 1.0981195147640848e-05, "loss": 0.6325, "step": 14782 }, { "epoch": 48.4688524590164, "grad_norm": 9.421876907348633, "learning_rate": 1.0980138379111716e-05, "loss": 0.6188, "step": 14783 }, { "epoch": 48.472131147540985, "grad_norm": 8.070837020874023, "learning_rate": 1.0979081599530507e-05, "loss": 0.8212, "step": 14784 }, { "epoch": 48.47540983606557, "grad_norm": 8.78884220123291, "learning_rate": 1.0978024808909136e-05, "loss": 0.6414, "step": 14785 }, { "epoch": 48.47868852459016, "grad_norm": 7.0641255378723145, "learning_rate": 1.0976968007259519e-05, "loss": 0.7374, "step": 14786 }, { "epoch": 48.48196721311476, "grad_norm": 5.774123668670654, "learning_rate": 1.097591119459357e-05, "loss": 0.6001, "step": 14787 }, { "epoch": 48.485245901639345, "grad_norm": 22.338481903076172, "learning_rate": 1.0974854370923207e-05, "loss": 0.4559, "step": 14788 }, { "epoch": 48.488524590163934, "grad_norm": 6.469686985015869, "learning_rate": 1.0973797536260349e-05, "loss": 0.7105, "step": 14789 }, { "epoch": 48.49180327868852, "grad_norm": 6.2314276695251465, "learning_rate": 1.097274069061691e-05, "loss": 0.5218, "step": 14790 }, { "epoch": 48.49508196721312, "grad_norm": 7.039871692657471, "learning_rate": 1.0971683834004809e-05, "loss": 0.5598, "step": 14791 }, { "epoch": 48.498360655737706, "grad_norm": 5.9481329917907715, "learning_rate": 1.097062696643596e-05, "loss": 0.775, "step": 14792 }, { "epoch": 48.501639344262294, "grad_norm": 7.164920330047607, "learning_rate": 1.0969570087922288e-05, "loss": 0.5521, "step": 14793 }, { "epoch": 48.50491803278688, "grad_norm": 6.243056297302246, "learning_rate": 1.0968513198475699e-05, "loss": 0.7685, "step": 14794 }, { "epoch": 48.50819672131148, "grad_norm": 5.50347375869751, "learning_rate": 1.096745629810812e-05, "loss": 0.3215, "step": 14795 }, { "epoch": 48.511475409836066, "grad_norm": 8.274591445922852, "learning_rate": 1.0966399386831466e-05, "loss": 0.7983, "step": 14796 }, { "epoch": 48.514754098360655, "grad_norm": 5.511982440948486, "learning_rate": 1.0965342464657653e-05, "loss": 0.5245, "step": 14797 }, { "epoch": 48.51803278688524, "grad_norm": 5.724714279174805, "learning_rate": 1.09642855315986e-05, "loss": 0.3573, "step": 14798 }, { "epoch": 48.52131147540984, "grad_norm": 8.669530868530273, "learning_rate": 1.0963228587666226e-05, "loss": 0.4701, "step": 14799 }, { "epoch": 48.52459016393443, "grad_norm": 6.956589221954346, "learning_rate": 1.096217163287245e-05, "loss": 0.5358, "step": 14800 }, { "epoch": 48.527868852459015, "grad_norm": 7.982859134674072, "learning_rate": 1.0961114667229185e-05, "loss": 0.7321, "step": 14801 }, { "epoch": 48.5311475409836, "grad_norm": 5.377719402313232, "learning_rate": 1.0960057690748351e-05, "loss": 0.8477, "step": 14802 }, { "epoch": 48.5344262295082, "grad_norm": 6.12612771987915, "learning_rate": 1.0959000703441874e-05, "loss": 0.7279, "step": 14803 }, { "epoch": 48.53770491803279, "grad_norm": 6.350753307342529, "learning_rate": 1.095794370532166e-05, "loss": 0.7726, "step": 14804 }, { "epoch": 48.540983606557376, "grad_norm": 7.572555065155029, "learning_rate": 1.095688669639964e-05, "loss": 0.5757, "step": 14805 }, { "epoch": 48.544262295081964, "grad_norm": 18.275604248046875, "learning_rate": 1.0955829676687725e-05, "loss": 0.592, "step": 14806 }, { "epoch": 48.54754098360656, "grad_norm": 6.360780715942383, "learning_rate": 1.0954772646197838e-05, "loss": 0.6281, "step": 14807 }, { "epoch": 48.55081967213115, "grad_norm": 7.275025844573975, "learning_rate": 1.0953715604941897e-05, "loss": 0.7412, "step": 14808 }, { "epoch": 48.554098360655736, "grad_norm": 8.791701316833496, "learning_rate": 1.0952658552931822e-05, "loss": 0.5198, "step": 14809 }, { "epoch": 48.557377049180324, "grad_norm": 8.835062980651855, "learning_rate": 1.0951601490179528e-05, "loss": 0.2846, "step": 14810 }, { "epoch": 48.56065573770492, "grad_norm": 6.739607334136963, "learning_rate": 1.095054441669694e-05, "loss": 0.7464, "step": 14811 }, { "epoch": 48.56393442622951, "grad_norm": 6.699001789093018, "learning_rate": 1.0949487332495977e-05, "loss": 0.5483, "step": 14812 }, { "epoch": 48.5672131147541, "grad_norm": 9.04244613647461, "learning_rate": 1.0948430237588554e-05, "loss": 0.4911, "step": 14813 }, { "epoch": 48.570491803278685, "grad_norm": 5.778026580810547, "learning_rate": 1.0947373131986594e-05, "loss": 0.3646, "step": 14814 }, { "epoch": 48.57377049180328, "grad_norm": 6.650567054748535, "learning_rate": 1.0946316015702018e-05, "loss": 0.8791, "step": 14815 }, { "epoch": 48.57704918032787, "grad_norm": 30.533517837524414, "learning_rate": 1.0945258888746745e-05, "loss": 0.6097, "step": 14816 }, { "epoch": 48.58032786885246, "grad_norm": 5.498482704162598, "learning_rate": 1.0944201751132697e-05, "loss": 0.3773, "step": 14817 }, { "epoch": 48.58360655737705, "grad_norm": 5.619156360626221, "learning_rate": 1.094314460287179e-05, "loss": 0.6832, "step": 14818 }, { "epoch": 48.58688524590164, "grad_norm": 6.59684419631958, "learning_rate": 1.0942087443975949e-05, "loss": 0.5068, "step": 14819 }, { "epoch": 48.59016393442623, "grad_norm": 7.520699501037598, "learning_rate": 1.0941030274457089e-05, "loss": 0.6426, "step": 14820 }, { "epoch": 48.59344262295082, "grad_norm": 9.628018379211426, "learning_rate": 1.0939973094327137e-05, "loss": 0.5641, "step": 14821 }, { "epoch": 48.59672131147541, "grad_norm": 9.226649284362793, "learning_rate": 1.0938915903598008e-05, "loss": 0.645, "step": 14822 }, { "epoch": 48.6, "grad_norm": 7.628836631774902, "learning_rate": 1.0937858702281631e-05, "loss": 0.7163, "step": 14823 }, { "epoch": 48.60327868852459, "grad_norm": 7.733127117156982, "learning_rate": 1.0936801490389919e-05, "loss": 0.7292, "step": 14824 }, { "epoch": 48.60655737704918, "grad_norm": 5.823825836181641, "learning_rate": 1.0935744267934798e-05, "loss": 0.4346, "step": 14825 }, { "epoch": 48.609836065573774, "grad_norm": 5.936168670654297, "learning_rate": 1.0934687034928186e-05, "loss": 0.6877, "step": 14826 }, { "epoch": 48.61311475409836, "grad_norm": 6.013610363006592, "learning_rate": 1.0933629791382006e-05, "loss": 0.7011, "step": 14827 }, { "epoch": 48.61639344262295, "grad_norm": 5.093568801879883, "learning_rate": 1.0932572537308177e-05, "loss": 0.5597, "step": 14828 }, { "epoch": 48.61967213114754, "grad_norm": 8.646465301513672, "learning_rate": 1.0931515272718627e-05, "loss": 0.6281, "step": 14829 }, { "epoch": 48.622950819672134, "grad_norm": 8.980162620544434, "learning_rate": 1.093045799762527e-05, "loss": 0.5586, "step": 14830 }, { "epoch": 48.62622950819672, "grad_norm": 5.561872482299805, "learning_rate": 1.0929400712040035e-05, "loss": 0.9359, "step": 14831 }, { "epoch": 48.62950819672131, "grad_norm": 15.398447036743164, "learning_rate": 1.0928343415974839e-05, "loss": 0.6289, "step": 14832 }, { "epoch": 48.6327868852459, "grad_norm": 7.099035739898682, "learning_rate": 1.0927286109441603e-05, "loss": 0.477, "step": 14833 }, { "epoch": 48.636065573770495, "grad_norm": 7.185894966125488, "learning_rate": 1.0926228792452259e-05, "loss": 0.8073, "step": 14834 }, { "epoch": 48.63934426229508, "grad_norm": 5.048336029052734, "learning_rate": 1.0925171465018715e-05, "loss": 0.7115, "step": 14835 }, { "epoch": 48.64262295081967, "grad_norm": 9.372297286987305, "learning_rate": 1.0924114127152906e-05, "loss": 0.7326, "step": 14836 }, { "epoch": 48.64590163934426, "grad_norm": 6.477848529815674, "learning_rate": 1.0923056778866744e-05, "loss": 0.5824, "step": 14837 }, { "epoch": 48.649180327868855, "grad_norm": 6.859009742736816, "learning_rate": 1.0921999420172164e-05, "loss": 0.5696, "step": 14838 }, { "epoch": 48.65245901639344, "grad_norm": 7.125060558319092, "learning_rate": 1.092094205108108e-05, "loss": 0.9095, "step": 14839 }, { "epoch": 48.65573770491803, "grad_norm": 6.511751174926758, "learning_rate": 1.0919884671605414e-05, "loss": 0.4397, "step": 14840 }, { "epoch": 48.65901639344262, "grad_norm": 5.930298328399658, "learning_rate": 1.0918827281757094e-05, "loss": 0.5211, "step": 14841 }, { "epoch": 48.662295081967216, "grad_norm": 6.657510280609131, "learning_rate": 1.091776988154804e-05, "loss": 0.552, "step": 14842 }, { "epoch": 48.665573770491804, "grad_norm": 6.827516078948975, "learning_rate": 1.0916712470990176e-05, "loss": 0.481, "step": 14843 }, { "epoch": 48.66885245901639, "grad_norm": 7.356865406036377, "learning_rate": 1.0915655050095426e-05, "loss": 0.522, "step": 14844 }, { "epoch": 48.67213114754098, "grad_norm": 6.3672075271606445, "learning_rate": 1.0914597618875714e-05, "loss": 0.7021, "step": 14845 }, { "epoch": 48.675409836065576, "grad_norm": 7.977450370788574, "learning_rate": 1.0913540177342962e-05, "loss": 0.502, "step": 14846 }, { "epoch": 48.678688524590164, "grad_norm": 8.28746509552002, "learning_rate": 1.0912482725509096e-05, "loss": 0.6018, "step": 14847 }, { "epoch": 48.68196721311475, "grad_norm": 6.534135818481445, "learning_rate": 1.0911425263386037e-05, "loss": 0.7337, "step": 14848 }, { "epoch": 48.68524590163934, "grad_norm": 7.45184326171875, "learning_rate": 1.0910367790985712e-05, "loss": 0.3598, "step": 14849 }, { "epoch": 48.68852459016394, "grad_norm": 6.95286226272583, "learning_rate": 1.0909310308320042e-05, "loss": 0.6161, "step": 14850 }, { "epoch": 48.691803278688525, "grad_norm": 6.482699394226074, "learning_rate": 1.0908252815400955e-05, "loss": 0.4793, "step": 14851 }, { "epoch": 48.69508196721311, "grad_norm": 8.756794929504395, "learning_rate": 1.0907195312240372e-05, "loss": 0.5117, "step": 14852 }, { "epoch": 48.6983606557377, "grad_norm": 7.325554370880127, "learning_rate": 1.0906137798850218e-05, "loss": 0.6095, "step": 14853 }, { "epoch": 48.7016393442623, "grad_norm": 6.670931339263916, "learning_rate": 1.0905080275242421e-05, "loss": 0.5255, "step": 14854 }, { "epoch": 48.704918032786885, "grad_norm": 6.381529808044434, "learning_rate": 1.0904022741428899e-05, "loss": 0.676, "step": 14855 }, { "epoch": 48.708196721311474, "grad_norm": 7.160212516784668, "learning_rate": 1.090296519742158e-05, "loss": 0.9568, "step": 14856 }, { "epoch": 48.71147540983607, "grad_norm": 10.319835662841797, "learning_rate": 1.0901907643232392e-05, "loss": 0.7973, "step": 14857 }, { "epoch": 48.71475409836066, "grad_norm": 8.122650146484375, "learning_rate": 1.0900850078873258e-05, "loss": 0.469, "step": 14858 }, { "epoch": 48.718032786885246, "grad_norm": 5.253925323486328, "learning_rate": 1.0899792504356102e-05, "loss": 0.4735, "step": 14859 }, { "epoch": 48.721311475409834, "grad_norm": 7.617600440979004, "learning_rate": 1.0898734919692849e-05, "loss": 0.6352, "step": 14860 }, { "epoch": 48.72459016393443, "grad_norm": 6.288694858551025, "learning_rate": 1.0897677324895426e-05, "loss": 0.5306, "step": 14861 }, { "epoch": 48.72786885245902, "grad_norm": 6.284384250640869, "learning_rate": 1.0896619719975759e-05, "loss": 0.7208, "step": 14862 }, { "epoch": 48.731147540983606, "grad_norm": 6.6777753829956055, "learning_rate": 1.089556210494577e-05, "loss": 0.6674, "step": 14863 }, { "epoch": 48.734426229508195, "grad_norm": 7.926723480224609, "learning_rate": 1.089450447981739e-05, "loss": 0.6315, "step": 14864 }, { "epoch": 48.73770491803279, "grad_norm": 6.6838202476501465, "learning_rate": 1.0893446844602543e-05, "loss": 0.5619, "step": 14865 }, { "epoch": 48.74098360655738, "grad_norm": 7.010583877563477, "learning_rate": 1.0892389199313148e-05, "loss": 0.5769, "step": 14866 }, { "epoch": 48.74426229508197, "grad_norm": 6.436195373535156, "learning_rate": 1.0891331543961142e-05, "loss": 0.5851, "step": 14867 }, { "epoch": 48.747540983606555, "grad_norm": 6.2708330154418945, "learning_rate": 1.0890273878558444e-05, "loss": 0.6602, "step": 14868 }, { "epoch": 48.75081967213115, "grad_norm": 5.671324729919434, "learning_rate": 1.0889216203116984e-05, "loss": 0.5338, "step": 14869 }, { "epoch": 48.75409836065574, "grad_norm": 11.965605735778809, "learning_rate": 1.0888158517648684e-05, "loss": 0.5579, "step": 14870 }, { "epoch": 48.75737704918033, "grad_norm": 8.285603523254395, "learning_rate": 1.0887100822165476e-05, "loss": 0.5095, "step": 14871 }, { "epoch": 48.760655737704916, "grad_norm": 8.751463890075684, "learning_rate": 1.0886043116679281e-05, "loss": 0.7007, "step": 14872 }, { "epoch": 48.76393442622951, "grad_norm": 6.283163547515869, "learning_rate": 1.088498540120203e-05, "loss": 0.8453, "step": 14873 }, { "epoch": 48.7672131147541, "grad_norm": 6.5946125984191895, "learning_rate": 1.088392767574565e-05, "loss": 0.4527, "step": 14874 }, { "epoch": 48.77049180327869, "grad_norm": 16.206809997558594, "learning_rate": 1.0882869940322065e-05, "loss": 0.607, "step": 14875 }, { "epoch": 48.773770491803276, "grad_norm": 6.8592352867126465, "learning_rate": 1.0881812194943202e-05, "loss": 0.6283, "step": 14876 }, { "epoch": 48.77704918032787, "grad_norm": 7.204574108123779, "learning_rate": 1.0880754439620993e-05, "loss": 0.8454, "step": 14877 }, { "epoch": 48.78032786885246, "grad_norm": 7.62874174118042, "learning_rate": 1.0879696674367362e-05, "loss": 0.684, "step": 14878 }, { "epoch": 48.78360655737705, "grad_norm": 7.498279571533203, "learning_rate": 1.0878638899194232e-05, "loss": 0.6087, "step": 14879 }, { "epoch": 48.78688524590164, "grad_norm": 6.438529968261719, "learning_rate": 1.087758111411354e-05, "loss": 0.4528, "step": 14880 }, { "epoch": 48.79016393442623, "grad_norm": 7.865771293640137, "learning_rate": 1.0876523319137204e-05, "loss": 0.6674, "step": 14881 }, { "epoch": 48.79344262295082, "grad_norm": 5.481977462768555, "learning_rate": 1.0875465514277159e-05, "loss": 0.7537, "step": 14882 }, { "epoch": 48.79672131147541, "grad_norm": 6.170691013336182, "learning_rate": 1.0874407699545329e-05, "loss": 0.6582, "step": 14883 }, { "epoch": 48.8, "grad_norm": 5.591926097869873, "learning_rate": 1.087334987495364e-05, "loss": 0.8685, "step": 14884 }, { "epoch": 48.80327868852459, "grad_norm": 7.608999729156494, "learning_rate": 1.0872292040514027e-05, "loss": 0.6978, "step": 14885 }, { "epoch": 48.80655737704918, "grad_norm": 10.119449615478516, "learning_rate": 1.0871234196238415e-05, "loss": 0.5934, "step": 14886 }, { "epoch": 48.80983606557377, "grad_norm": 5.782100677490234, "learning_rate": 1.0870176342138728e-05, "loss": 0.5896, "step": 14887 }, { "epoch": 48.81311475409836, "grad_norm": 6.873934745788574, "learning_rate": 1.08691184782269e-05, "loss": 0.5428, "step": 14888 }, { "epoch": 48.81639344262295, "grad_norm": 5.95654821395874, "learning_rate": 1.0868060604514859e-05, "loss": 0.5515, "step": 14889 }, { "epoch": 48.81967213114754, "grad_norm": 11.900957107543945, "learning_rate": 1.086700272101453e-05, "loss": 0.5978, "step": 14890 }, { "epoch": 48.82295081967213, "grad_norm": 6.6706414222717285, "learning_rate": 1.0865944827737846e-05, "loss": 0.399, "step": 14891 }, { "epoch": 48.82622950819672, "grad_norm": 6.083041191101074, "learning_rate": 1.086488692469673e-05, "loss": 0.575, "step": 14892 }, { "epoch": 48.829508196721314, "grad_norm": 6.357637882232666, "learning_rate": 1.086382901190312e-05, "loss": 0.3688, "step": 14893 }, { "epoch": 48.8327868852459, "grad_norm": 5.668649196624756, "learning_rate": 1.0862771089368935e-05, "loss": 0.5511, "step": 14894 }, { "epoch": 48.83606557377049, "grad_norm": 5.792958736419678, "learning_rate": 1.0861713157106113e-05, "loss": 0.5687, "step": 14895 }, { "epoch": 48.83934426229508, "grad_norm": 12.715128898620605, "learning_rate": 1.0860655215126575e-05, "loss": 0.8929, "step": 14896 }, { "epoch": 48.842622950819674, "grad_norm": 6.203830718994141, "learning_rate": 1.0859597263442256e-05, "loss": 1.0621, "step": 14897 }, { "epoch": 48.84590163934426, "grad_norm": 6.823591709136963, "learning_rate": 1.0858539302065086e-05, "loss": 0.6444, "step": 14898 }, { "epoch": 48.84918032786885, "grad_norm": 10.568474769592285, "learning_rate": 1.0857481331006991e-05, "loss": 0.6235, "step": 14899 }, { "epoch": 48.85245901639344, "grad_norm": 7.03148889541626, "learning_rate": 1.08564233502799e-05, "loss": 0.513, "step": 14900 }, { "epoch": 48.855737704918035, "grad_norm": 5.846578598022461, "learning_rate": 1.0855365359895751e-05, "loss": 0.8774, "step": 14901 }, { "epoch": 48.85901639344262, "grad_norm": 7.903377056121826, "learning_rate": 1.0854307359866464e-05, "loss": 0.375, "step": 14902 }, { "epoch": 48.86229508196721, "grad_norm": 12.330601692199707, "learning_rate": 1.0853249350203977e-05, "loss": 0.5757, "step": 14903 }, { "epoch": 48.86557377049181, "grad_norm": 6.612697601318359, "learning_rate": 1.0852191330920214e-05, "loss": 0.6089, "step": 14904 }, { "epoch": 48.868852459016395, "grad_norm": 6.929153919219971, "learning_rate": 1.0851133302027106e-05, "loss": 0.4303, "step": 14905 }, { "epoch": 48.87213114754098, "grad_norm": 7.731685161590576, "learning_rate": 1.0850075263536588e-05, "loss": 0.5056, "step": 14906 }, { "epoch": 48.87540983606557, "grad_norm": 7.710940361022949, "learning_rate": 1.0849017215460584e-05, "loss": 0.7772, "step": 14907 }, { "epoch": 48.87868852459017, "grad_norm": 6.539012908935547, "learning_rate": 1.0847959157811032e-05, "loss": 0.5844, "step": 14908 }, { "epoch": 48.881967213114756, "grad_norm": 5.856342315673828, "learning_rate": 1.0846901090599856e-05, "loss": 0.5035, "step": 14909 }, { "epoch": 48.885245901639344, "grad_norm": 5.96554708480835, "learning_rate": 1.084584301383899e-05, "loss": 0.5174, "step": 14910 }, { "epoch": 48.88852459016393, "grad_norm": 5.827144145965576, "learning_rate": 1.0844784927540365e-05, "loss": 0.7958, "step": 14911 }, { "epoch": 48.89180327868853, "grad_norm": 4.906128406524658, "learning_rate": 1.084372683171591e-05, "loss": 0.675, "step": 14912 }, { "epoch": 48.895081967213116, "grad_norm": 6.628175735473633, "learning_rate": 1.0842668726377564e-05, "loss": 0.6634, "step": 14913 }, { "epoch": 48.898360655737704, "grad_norm": 18.17504119873047, "learning_rate": 1.0841610611537246e-05, "loss": 0.6606, "step": 14914 }, { "epoch": 48.90163934426229, "grad_norm": 5.439438343048096, "learning_rate": 1.0840552487206893e-05, "loss": 0.5729, "step": 14915 }, { "epoch": 48.90491803278689, "grad_norm": 6.733240127563477, "learning_rate": 1.083949435339844e-05, "loss": 0.5358, "step": 14916 }, { "epoch": 48.90819672131148, "grad_norm": 5.743377685546875, "learning_rate": 1.0838436210123815e-05, "loss": 0.5421, "step": 14917 }, { "epoch": 48.911475409836065, "grad_norm": 6.789905071258545, "learning_rate": 1.0837378057394948e-05, "loss": 0.5733, "step": 14918 }, { "epoch": 48.91475409836065, "grad_norm": 8.35663890838623, "learning_rate": 1.0836319895223774e-05, "loss": 0.7238, "step": 14919 }, { "epoch": 48.91803278688525, "grad_norm": 5.871667861938477, "learning_rate": 1.0835261723622222e-05, "loss": 0.4611, "step": 14920 }, { "epoch": 48.92131147540984, "grad_norm": 6.578817367553711, "learning_rate": 1.0834203542602228e-05, "loss": 0.8091, "step": 14921 }, { "epoch": 48.924590163934425, "grad_norm": 7.572077751159668, "learning_rate": 1.083314535217572e-05, "loss": 0.6186, "step": 14922 }, { "epoch": 48.927868852459014, "grad_norm": 6.83386754989624, "learning_rate": 1.0832087152354634e-05, "loss": 0.4643, "step": 14923 }, { "epoch": 48.93114754098361, "grad_norm": 4.928135395050049, "learning_rate": 1.0831028943150897e-05, "loss": 0.4309, "step": 14924 }, { "epoch": 48.9344262295082, "grad_norm": 9.051283836364746, "learning_rate": 1.0829970724576446e-05, "loss": 0.8249, "step": 14925 }, { "epoch": 48.937704918032786, "grad_norm": 6.1095685958862305, "learning_rate": 1.0828912496643213e-05, "loss": 0.7133, "step": 14926 }, { "epoch": 48.940983606557374, "grad_norm": 6.012814998626709, "learning_rate": 1.0827854259363131e-05, "loss": 0.6602, "step": 14927 }, { "epoch": 48.94426229508197, "grad_norm": 5.687745094299316, "learning_rate": 1.0826796012748127e-05, "loss": 0.5719, "step": 14928 }, { "epoch": 48.94754098360656, "grad_norm": 6.588103771209717, "learning_rate": 1.0825737756810142e-05, "loss": 0.7242, "step": 14929 }, { "epoch": 48.950819672131146, "grad_norm": 5.901666164398193, "learning_rate": 1.0824679491561106e-05, "loss": 0.7392, "step": 14930 }, { "epoch": 48.954098360655735, "grad_norm": 5.912858486175537, "learning_rate": 1.0823621217012948e-05, "loss": 0.536, "step": 14931 }, { "epoch": 48.95737704918033, "grad_norm": 5.978973865509033, "learning_rate": 1.0822562933177607e-05, "loss": 0.791, "step": 14932 }, { "epoch": 48.96065573770492, "grad_norm": 47.176902770996094, "learning_rate": 1.0821504640067012e-05, "loss": 0.533, "step": 14933 }, { "epoch": 48.96393442622951, "grad_norm": 7.323096752166748, "learning_rate": 1.08204463376931e-05, "loss": 0.5524, "step": 14934 }, { "epoch": 48.967213114754095, "grad_norm": 6.183892250061035, "learning_rate": 1.0819388026067799e-05, "loss": 0.6451, "step": 14935 }, { "epoch": 48.97049180327869, "grad_norm": 6.992483139038086, "learning_rate": 1.081832970520305e-05, "loss": 0.8027, "step": 14936 }, { "epoch": 48.97377049180328, "grad_norm": 6.41636323928833, "learning_rate": 1.081727137511078e-05, "loss": 0.5734, "step": 14937 }, { "epoch": 48.97704918032787, "grad_norm": 5.200691223144531, "learning_rate": 1.0816213035802924e-05, "loss": 0.5281, "step": 14938 }, { "epoch": 48.980327868852456, "grad_norm": 5.845556735992432, "learning_rate": 1.0815154687291422e-05, "loss": 0.6383, "step": 14939 }, { "epoch": 48.98360655737705, "grad_norm": 21.869186401367188, "learning_rate": 1.08140963295882e-05, "loss": 0.7034, "step": 14940 }, { "epoch": 48.98688524590164, "grad_norm": 7.4432783126831055, "learning_rate": 1.0813037962705197e-05, "loss": 0.5274, "step": 14941 }, { "epoch": 48.99016393442623, "grad_norm": 5.871609210968018, "learning_rate": 1.0811979586654343e-05, "loss": 0.4319, "step": 14942 }, { "epoch": 48.993442622950816, "grad_norm": 7.808917999267578, "learning_rate": 1.081092120144758e-05, "loss": 0.5766, "step": 14943 }, { "epoch": 48.99672131147541, "grad_norm": 6.494569301605225, "learning_rate": 1.0809862807096834e-05, "loss": 0.848, "step": 14944 }, { "epoch": 49.0, "grad_norm": 5.511397838592529, "learning_rate": 1.0808804403614044e-05, "loss": 0.9211, "step": 14945 }, { "epoch": 49.00327868852459, "grad_norm": 7.258351802825928, "learning_rate": 1.080774599101114e-05, "loss": 0.4377, "step": 14946 }, { "epoch": 49.006557377049184, "grad_norm": 5.750005722045898, "learning_rate": 1.0806687569300065e-05, "loss": 0.5795, "step": 14947 }, { "epoch": 49.00983606557377, "grad_norm": 7.493671417236328, "learning_rate": 1.0805629138492746e-05, "loss": 0.4473, "step": 14948 }, { "epoch": 49.01311475409836, "grad_norm": 6.982352256774902, "learning_rate": 1.0804570698601122e-05, "loss": 0.4988, "step": 14949 }, { "epoch": 49.01639344262295, "grad_norm": 6.044713973999023, "learning_rate": 1.0803512249637125e-05, "loss": 0.6253, "step": 14950 }, { "epoch": 49.019672131147544, "grad_norm": 7.768403053283691, "learning_rate": 1.0802453791612695e-05, "loss": 0.6004, "step": 14951 }, { "epoch": 49.02295081967213, "grad_norm": 6.951404571533203, "learning_rate": 1.080139532453976e-05, "loss": 0.7153, "step": 14952 }, { "epoch": 49.02622950819672, "grad_norm": 5.233834266662598, "learning_rate": 1.0800336848430264e-05, "loss": 0.7114, "step": 14953 }, { "epoch": 49.02950819672131, "grad_norm": 5.6427531242370605, "learning_rate": 1.0799278363296133e-05, "loss": 0.4787, "step": 14954 }, { "epoch": 49.032786885245905, "grad_norm": 8.909014701843262, "learning_rate": 1.0798219869149311e-05, "loss": 0.5457, "step": 14955 }, { "epoch": 49.03606557377049, "grad_norm": 9.892333984375, "learning_rate": 1.0797161366001731e-05, "loss": 0.5832, "step": 14956 }, { "epoch": 49.03934426229508, "grad_norm": 8.317427635192871, "learning_rate": 1.0796102853865324e-05, "loss": 0.4684, "step": 14957 }, { "epoch": 49.04262295081967, "grad_norm": 9.36500358581543, "learning_rate": 1.0795044332752034e-05, "loss": 0.4603, "step": 14958 }, { "epoch": 49.045901639344265, "grad_norm": 5.3638916015625, "learning_rate": 1.079398580267379e-05, "loss": 0.842, "step": 14959 }, { "epoch": 49.049180327868854, "grad_norm": 8.000740051269531, "learning_rate": 1.079292726364253e-05, "loss": 0.7115, "step": 14960 }, { "epoch": 49.05245901639344, "grad_norm": 7.164976596832275, "learning_rate": 1.079186871567019e-05, "loss": 0.7118, "step": 14961 }, { "epoch": 49.05573770491803, "grad_norm": 6.391007900238037, "learning_rate": 1.079081015876871e-05, "loss": 0.4509, "step": 14962 }, { "epoch": 49.059016393442626, "grad_norm": 8.341691970825195, "learning_rate": 1.078975159295002e-05, "loss": 0.4979, "step": 14963 }, { "epoch": 49.062295081967214, "grad_norm": 6.1690521240234375, "learning_rate": 1.0788693018226062e-05, "loss": 0.4282, "step": 14964 }, { "epoch": 49.0655737704918, "grad_norm": 7.251641273498535, "learning_rate": 1.078763443460877e-05, "loss": 0.5482, "step": 14965 }, { "epoch": 49.06885245901639, "grad_norm": 7.685932636260986, "learning_rate": 1.0786575842110076e-05, "loss": 0.5294, "step": 14966 }, { "epoch": 49.072131147540986, "grad_norm": 7.6160664558410645, "learning_rate": 1.0785517240741927e-05, "loss": 0.6707, "step": 14967 }, { "epoch": 49.075409836065575, "grad_norm": 6.328059196472168, "learning_rate": 1.0784458630516252e-05, "loss": 0.5935, "step": 14968 }, { "epoch": 49.07868852459016, "grad_norm": 5.316071510314941, "learning_rate": 1.0783400011444993e-05, "loss": 0.685, "step": 14969 }, { "epoch": 49.08196721311475, "grad_norm": 5.586569309234619, "learning_rate": 1.0782341383540082e-05, "loss": 0.56, "step": 14970 }, { "epoch": 49.08524590163935, "grad_norm": 6.5096116065979, "learning_rate": 1.078128274681346e-05, "loss": 0.455, "step": 14971 }, { "epoch": 49.088524590163935, "grad_norm": 8.598892211914062, "learning_rate": 1.0780224101277061e-05, "loss": 0.7327, "step": 14972 }, { "epoch": 49.09180327868852, "grad_norm": 5.162665367126465, "learning_rate": 1.0779165446942826e-05, "loss": 0.7926, "step": 14973 }, { "epoch": 49.09508196721311, "grad_norm": 5.9081244468688965, "learning_rate": 1.0778106783822687e-05, "loss": 0.35, "step": 14974 }, { "epoch": 49.09836065573771, "grad_norm": 5.872675895690918, "learning_rate": 1.077704811192859e-05, "loss": 0.7613, "step": 14975 }, { "epoch": 49.101639344262296, "grad_norm": 8.661946296691895, "learning_rate": 1.0775989431272463e-05, "loss": 0.5547, "step": 14976 }, { "epoch": 49.104918032786884, "grad_norm": 9.613870620727539, "learning_rate": 1.0774930741866249e-05, "loss": 0.8071, "step": 14977 }, { "epoch": 49.10819672131147, "grad_norm": 5.510240077972412, "learning_rate": 1.0773872043721886e-05, "loss": 0.5741, "step": 14978 }, { "epoch": 49.11147540983607, "grad_norm": 6.2167558670043945, "learning_rate": 1.0772813336851314e-05, "loss": 0.645, "step": 14979 }, { "epoch": 49.114754098360656, "grad_norm": 13.330306053161621, "learning_rate": 1.0771754621266466e-05, "loss": 0.4195, "step": 14980 }, { "epoch": 49.118032786885244, "grad_norm": 5.954540252685547, "learning_rate": 1.0770695896979281e-05, "loss": 0.4861, "step": 14981 }, { "epoch": 49.12131147540983, "grad_norm": 8.617019653320312, "learning_rate": 1.07696371640017e-05, "loss": 0.5853, "step": 14982 }, { "epoch": 49.12459016393443, "grad_norm": 15.04908561706543, "learning_rate": 1.0768578422345658e-05, "loss": 0.4957, "step": 14983 }, { "epoch": 49.12786885245902, "grad_norm": 5.449383735656738, "learning_rate": 1.0767519672023096e-05, "loss": 0.6421, "step": 14984 }, { "epoch": 49.131147540983605, "grad_norm": 5.638826847076416, "learning_rate": 1.076646091304595e-05, "loss": 0.4627, "step": 14985 }, { "epoch": 49.13442622950819, "grad_norm": 6.670285701751709, "learning_rate": 1.0765402145426164e-05, "loss": 0.5549, "step": 14986 }, { "epoch": 49.13770491803279, "grad_norm": 6.983626842498779, "learning_rate": 1.0764343369175672e-05, "loss": 0.5259, "step": 14987 }, { "epoch": 49.14098360655738, "grad_norm": 5.95883846282959, "learning_rate": 1.0763284584306412e-05, "loss": 0.5846, "step": 14988 }, { "epoch": 49.144262295081965, "grad_norm": 5.772459506988525, "learning_rate": 1.0762225790830326e-05, "loss": 0.5042, "step": 14989 }, { "epoch": 49.14754098360656, "grad_norm": 5.973732948303223, "learning_rate": 1.076116698875935e-05, "loss": 0.5988, "step": 14990 }, { "epoch": 49.15081967213115, "grad_norm": 6.800919055938721, "learning_rate": 1.0760108178105425e-05, "loss": 0.486, "step": 14991 }, { "epoch": 49.15409836065574, "grad_norm": 6.173561096191406, "learning_rate": 1.0759049358880493e-05, "loss": 0.4606, "step": 14992 }, { "epoch": 49.157377049180326, "grad_norm": 6.496742248535156, "learning_rate": 1.0757990531096487e-05, "loss": 0.539, "step": 14993 }, { "epoch": 49.16065573770492, "grad_norm": 20.139949798583984, "learning_rate": 1.0756931694765351e-05, "loss": 0.5933, "step": 14994 }, { "epoch": 49.16393442622951, "grad_norm": 5.542749404907227, "learning_rate": 1.0755872849899022e-05, "loss": 0.5439, "step": 14995 }, { "epoch": 49.1672131147541, "grad_norm": 5.985668659210205, "learning_rate": 1.0754813996509444e-05, "loss": 0.6336, "step": 14996 }, { "epoch": 49.170491803278686, "grad_norm": 8.479195594787598, "learning_rate": 1.075375513460855e-05, "loss": 0.7962, "step": 14997 }, { "epoch": 49.17377049180328, "grad_norm": 5.4497971534729, "learning_rate": 1.0752696264208284e-05, "loss": 0.4829, "step": 14998 }, { "epoch": 49.17704918032787, "grad_norm": 6.9742584228515625, "learning_rate": 1.0751637385320585e-05, "loss": 0.8966, "step": 14999 }, { "epoch": 49.18032786885246, "grad_norm": 6.277111053466797, "learning_rate": 1.0750578497957393e-05, "loss": 0.6544, "step": 15000 }, { "epoch": 49.18360655737705, "grad_norm": 5.626748561859131, "learning_rate": 1.074951960213065e-05, "loss": 0.4745, "step": 15001 }, { "epoch": 49.18688524590164, "grad_norm": 7.0006256103515625, "learning_rate": 1.0748460697852293e-05, "loss": 0.5742, "step": 15002 }, { "epoch": 49.19016393442623, "grad_norm": 23.49846839904785, "learning_rate": 1.0747401785134262e-05, "loss": 0.5665, "step": 15003 }, { "epoch": 49.19344262295082, "grad_norm": 5.233835220336914, "learning_rate": 1.07463428639885e-05, "loss": 0.7882, "step": 15004 }, { "epoch": 49.19672131147541, "grad_norm": 6.661008358001709, "learning_rate": 1.0745283934426944e-05, "loss": 0.59, "step": 15005 }, { "epoch": 49.2, "grad_norm": 10.152382850646973, "learning_rate": 1.0744224996461541e-05, "loss": 0.5658, "step": 15006 }, { "epoch": 49.20327868852459, "grad_norm": 7.223073482513428, "learning_rate": 1.0743166050104224e-05, "loss": 0.7277, "step": 15007 }, { "epoch": 49.20655737704918, "grad_norm": 8.411274909973145, "learning_rate": 1.0742107095366939e-05, "loss": 0.3989, "step": 15008 }, { "epoch": 49.20983606557377, "grad_norm": 6.510969161987305, "learning_rate": 1.0741048132261622e-05, "loss": 0.648, "step": 15009 }, { "epoch": 49.21311475409836, "grad_norm": 5.750280857086182, "learning_rate": 1.0739989160800222e-05, "loss": 0.3963, "step": 15010 }, { "epoch": 49.21639344262295, "grad_norm": 10.581404685974121, "learning_rate": 1.073893018099467e-05, "loss": 0.4591, "step": 15011 }, { "epoch": 49.21967213114754, "grad_norm": 9.586563110351562, "learning_rate": 1.0737871192856915e-05, "loss": 0.5642, "step": 15012 }, { "epoch": 49.22295081967213, "grad_norm": 10.033327102661133, "learning_rate": 1.0736812196398892e-05, "loss": 0.6273, "step": 15013 }, { "epoch": 49.226229508196724, "grad_norm": 7.0160722732543945, "learning_rate": 1.0735753191632547e-05, "loss": 0.7165, "step": 15014 }, { "epoch": 49.22950819672131, "grad_norm": 6.968958854675293, "learning_rate": 1.073469417856982e-05, "loss": 0.8334, "step": 15015 }, { "epoch": 49.2327868852459, "grad_norm": 5.626373291015625, "learning_rate": 1.0733635157222653e-05, "loss": 0.6267, "step": 15016 }, { "epoch": 49.23606557377049, "grad_norm": 5.78327751159668, "learning_rate": 1.0732576127602985e-05, "loss": 0.5125, "step": 15017 }, { "epoch": 49.239344262295084, "grad_norm": 9.838943481445312, "learning_rate": 1.073151708972276e-05, "loss": 0.9299, "step": 15018 }, { "epoch": 49.24262295081967, "grad_norm": 10.968363761901855, "learning_rate": 1.073045804359392e-05, "loss": 0.7534, "step": 15019 }, { "epoch": 49.24590163934426, "grad_norm": 8.709291458129883, "learning_rate": 1.0729398989228404e-05, "loss": 0.4941, "step": 15020 }, { "epoch": 49.24918032786885, "grad_norm": 7.971776008605957, "learning_rate": 1.0728339926638158e-05, "loss": 0.4878, "step": 15021 }, { "epoch": 49.252459016393445, "grad_norm": 6.308640003204346, "learning_rate": 1.072728085583512e-05, "loss": 0.5817, "step": 15022 }, { "epoch": 49.25573770491803, "grad_norm": 6.0841827392578125, "learning_rate": 1.0726221776831238e-05, "loss": 0.4814, "step": 15023 }, { "epoch": 49.25901639344262, "grad_norm": 7.433515548706055, "learning_rate": 1.0725162689638447e-05, "loss": 0.645, "step": 15024 }, { "epoch": 49.26229508196721, "grad_norm": 13.957275390625, "learning_rate": 1.0724103594268696e-05, "loss": 0.6231, "step": 15025 }, { "epoch": 49.265573770491805, "grad_norm": 7.81024169921875, "learning_rate": 1.0723044490733921e-05, "loss": 0.4601, "step": 15026 }, { "epoch": 49.268852459016394, "grad_norm": 6.959226608276367, "learning_rate": 1.0721985379046069e-05, "loss": 0.7788, "step": 15027 }, { "epoch": 49.27213114754098, "grad_norm": 5.990317344665527, "learning_rate": 1.072092625921708e-05, "loss": 0.511, "step": 15028 }, { "epoch": 49.27540983606557, "grad_norm": 12.401476860046387, "learning_rate": 1.0719867131258899e-05, "loss": 0.7286, "step": 15029 }, { "epoch": 49.278688524590166, "grad_norm": 14.371397018432617, "learning_rate": 1.0718807995183467e-05, "loss": 0.9016, "step": 15030 }, { "epoch": 49.281967213114754, "grad_norm": 6.465362548828125, "learning_rate": 1.0717748851002727e-05, "loss": 0.769, "step": 15031 }, { "epoch": 49.28524590163934, "grad_norm": 12.228846549987793, "learning_rate": 1.0716689698728627e-05, "loss": 0.5952, "step": 15032 }, { "epoch": 49.28852459016394, "grad_norm": 10.311524391174316, "learning_rate": 1.07156305383731e-05, "loss": 0.4349, "step": 15033 }, { "epoch": 49.291803278688526, "grad_norm": 6.4945573806762695, "learning_rate": 1.0714571369948097e-05, "loss": 0.941, "step": 15034 }, { "epoch": 49.295081967213115, "grad_norm": 6.231681823730469, "learning_rate": 1.0713512193465557e-05, "loss": 0.4818, "step": 15035 }, { "epoch": 49.2983606557377, "grad_norm": 8.565217971801758, "learning_rate": 1.0712453008937427e-05, "loss": 0.7794, "step": 15036 }, { "epoch": 49.3016393442623, "grad_norm": 6.621646404266357, "learning_rate": 1.0711393816375646e-05, "loss": 0.6156, "step": 15037 }, { "epoch": 49.30491803278689, "grad_norm": 6.188041687011719, "learning_rate": 1.0710334615792164e-05, "loss": 0.6557, "step": 15038 }, { "epoch": 49.308196721311475, "grad_norm": 9.455596923828125, "learning_rate": 1.0709275407198915e-05, "loss": 0.6194, "step": 15039 }, { "epoch": 49.31147540983606, "grad_norm": 18.19450569152832, "learning_rate": 1.0708216190607854e-05, "loss": 0.7167, "step": 15040 }, { "epoch": 49.31475409836066, "grad_norm": 5.9768805503845215, "learning_rate": 1.0707156966030916e-05, "loss": 0.7481, "step": 15041 }, { "epoch": 49.31803278688525, "grad_norm": 6.547607898712158, "learning_rate": 1.070609773348005e-05, "loss": 0.3486, "step": 15042 }, { "epoch": 49.321311475409836, "grad_norm": 10.172739028930664, "learning_rate": 1.0705038492967198e-05, "loss": 0.6321, "step": 15043 }, { "epoch": 49.324590163934424, "grad_norm": 6.925078868865967, "learning_rate": 1.07039792445043e-05, "loss": 0.7573, "step": 15044 }, { "epoch": 49.32786885245902, "grad_norm": 6.693454265594482, "learning_rate": 1.0702919988103306e-05, "loss": 0.4371, "step": 15045 }, { "epoch": 49.33114754098361, "grad_norm": 6.069827556610107, "learning_rate": 1.070186072377616e-05, "loss": 0.4028, "step": 15046 }, { "epoch": 49.334426229508196, "grad_norm": 7.121880054473877, "learning_rate": 1.0700801451534804e-05, "loss": 0.6647, "step": 15047 }, { "epoch": 49.337704918032784, "grad_norm": 14.21242904663086, "learning_rate": 1.0699742171391181e-05, "loss": 0.729, "step": 15048 }, { "epoch": 49.34098360655738, "grad_norm": 9.705982208251953, "learning_rate": 1.069868288335724e-05, "loss": 0.5922, "step": 15049 }, { "epoch": 49.34426229508197, "grad_norm": 10.504698753356934, "learning_rate": 1.069762358744492e-05, "loss": 0.699, "step": 15050 }, { "epoch": 49.34754098360656, "grad_norm": 10.144290924072266, "learning_rate": 1.0696564283666174e-05, "loss": 0.5498, "step": 15051 }, { "epoch": 49.350819672131145, "grad_norm": 5.040593147277832, "learning_rate": 1.0695504972032937e-05, "loss": 0.9081, "step": 15052 }, { "epoch": 49.35409836065574, "grad_norm": 7.758496284484863, "learning_rate": 1.069444565255716e-05, "loss": 0.6136, "step": 15053 }, { "epoch": 49.35737704918033, "grad_norm": 11.092897415161133, "learning_rate": 1.0693386325250786e-05, "loss": 0.4426, "step": 15054 }, { "epoch": 49.36065573770492, "grad_norm": 6.800536632537842, "learning_rate": 1.069232699012576e-05, "loss": 0.5305, "step": 15055 }, { "epoch": 49.363934426229505, "grad_norm": 9.869866371154785, "learning_rate": 1.0691267647194026e-05, "loss": 0.8048, "step": 15056 }, { "epoch": 49.3672131147541, "grad_norm": 6.162651062011719, "learning_rate": 1.0690208296467532e-05, "loss": 0.6339, "step": 15057 }, { "epoch": 49.37049180327869, "grad_norm": 5.447235107421875, "learning_rate": 1.0689148937958224e-05, "loss": 0.5296, "step": 15058 }, { "epoch": 49.37377049180328, "grad_norm": 8.737979888916016, "learning_rate": 1.0688089571678042e-05, "loss": 0.5583, "step": 15059 }, { "epoch": 49.377049180327866, "grad_norm": 5.3576178550720215, "learning_rate": 1.0687030197638936e-05, "loss": 0.7909, "step": 15060 }, { "epoch": 49.38032786885246, "grad_norm": 7.429678916931152, "learning_rate": 1.0685970815852848e-05, "loss": 0.421, "step": 15061 }, { "epoch": 49.38360655737705, "grad_norm": 6.69163179397583, "learning_rate": 1.068491142633173e-05, "loss": 0.6209, "step": 15062 }, { "epoch": 49.38688524590164, "grad_norm": 7.53762149810791, "learning_rate": 1.068385202908752e-05, "loss": 0.8285, "step": 15063 }, { "epoch": 49.390163934426226, "grad_norm": 8.063453674316406, "learning_rate": 1.068279262413217e-05, "loss": 0.8251, "step": 15064 }, { "epoch": 49.39344262295082, "grad_norm": 7.594091415405273, "learning_rate": 1.068173321147762e-05, "loss": 0.5519, "step": 15065 }, { "epoch": 49.39672131147541, "grad_norm": 8.090808868408203, "learning_rate": 1.0680673791135822e-05, "loss": 0.6042, "step": 15066 }, { "epoch": 49.4, "grad_norm": 6.741855621337891, "learning_rate": 1.0679614363118718e-05, "loss": 0.5908, "step": 15067 }, { "epoch": 49.40327868852459, "grad_norm": 5.5068678855896, "learning_rate": 1.0678554927438257e-05, "loss": 0.4866, "step": 15068 }, { "epoch": 49.40655737704918, "grad_norm": 6.928040504455566, "learning_rate": 1.0677495484106379e-05, "loss": 0.5951, "step": 15069 }, { "epoch": 49.40983606557377, "grad_norm": 6.874824523925781, "learning_rate": 1.067643603313504e-05, "loss": 0.7816, "step": 15070 }, { "epoch": 49.41311475409836, "grad_norm": 7.813982009887695, "learning_rate": 1.067537657453618e-05, "loss": 0.4924, "step": 15071 }, { "epoch": 49.41639344262295, "grad_norm": 8.321504592895508, "learning_rate": 1.0674317108321748e-05, "loss": 0.4635, "step": 15072 }, { "epoch": 49.41967213114754, "grad_norm": 9.67550277709961, "learning_rate": 1.0673257634503686e-05, "loss": 0.5416, "step": 15073 }, { "epoch": 49.42295081967213, "grad_norm": 10.600722312927246, "learning_rate": 1.0672198153093947e-05, "loss": 0.6445, "step": 15074 }, { "epoch": 49.42622950819672, "grad_norm": 7.2211737632751465, "learning_rate": 1.0671138664104474e-05, "loss": 0.4807, "step": 15075 }, { "epoch": 49.429508196721315, "grad_norm": 5.7790846824646, "learning_rate": 1.0670079167547214e-05, "loss": 0.6446, "step": 15076 }, { "epoch": 49.4327868852459, "grad_norm": 8.60361385345459, "learning_rate": 1.0669019663434117e-05, "loss": 0.4506, "step": 15077 }, { "epoch": 49.43606557377049, "grad_norm": 6.089115142822266, "learning_rate": 1.0667960151777124e-05, "loss": 0.5907, "step": 15078 }, { "epoch": 49.43934426229508, "grad_norm": 6.992731094360352, "learning_rate": 1.066690063258819e-05, "loss": 0.5992, "step": 15079 }, { "epoch": 49.442622950819676, "grad_norm": 5.3885626792907715, "learning_rate": 1.0665841105879255e-05, "loss": 0.6813, "step": 15080 }, { "epoch": 49.445901639344264, "grad_norm": 6.74644136428833, "learning_rate": 1.066478157166227e-05, "loss": 0.639, "step": 15081 }, { "epoch": 49.44918032786885, "grad_norm": 5.9220123291015625, "learning_rate": 1.0663722029949182e-05, "loss": 0.8207, "step": 15082 }, { "epoch": 49.45245901639344, "grad_norm": 7.835458755493164, "learning_rate": 1.066266248075194e-05, "loss": 0.5001, "step": 15083 }, { "epoch": 49.455737704918036, "grad_norm": 5.901489734649658, "learning_rate": 1.0661602924082488e-05, "loss": 0.5375, "step": 15084 }, { "epoch": 49.459016393442624, "grad_norm": 6.220546722412109, "learning_rate": 1.0660543359952773e-05, "loss": 0.7249, "step": 15085 }, { "epoch": 49.46229508196721, "grad_norm": 6.2601518630981445, "learning_rate": 1.0659483788374747e-05, "loss": 0.6829, "step": 15086 }, { "epoch": 49.4655737704918, "grad_norm": 6.046563625335693, "learning_rate": 1.0658424209360354e-05, "loss": 0.6251, "step": 15087 }, { "epoch": 49.4688524590164, "grad_norm": 6.49073600769043, "learning_rate": 1.0657364622921547e-05, "loss": 0.4703, "step": 15088 }, { "epoch": 49.472131147540985, "grad_norm": 5.407870292663574, "learning_rate": 1.0656305029070264e-05, "loss": 0.536, "step": 15089 }, { "epoch": 49.47540983606557, "grad_norm": 7.866772174835205, "learning_rate": 1.0655245427818467e-05, "loss": 0.7965, "step": 15090 }, { "epoch": 49.47868852459016, "grad_norm": 9.695903778076172, "learning_rate": 1.0654185819178092e-05, "loss": 0.3794, "step": 15091 }, { "epoch": 49.48196721311476, "grad_norm": 9.243616104125977, "learning_rate": 1.0653126203161092e-05, "loss": 0.7004, "step": 15092 }, { "epoch": 49.485245901639345, "grad_norm": 7.820132255554199, "learning_rate": 1.0652066579779414e-05, "loss": 0.7903, "step": 15093 }, { "epoch": 49.488524590163934, "grad_norm": 6.13837194442749, "learning_rate": 1.0651006949045011e-05, "loss": 0.5282, "step": 15094 }, { "epoch": 49.49180327868852, "grad_norm": 7.286993980407715, "learning_rate": 1.0649947310969823e-05, "loss": 0.5418, "step": 15095 }, { "epoch": 49.49508196721312, "grad_norm": 6.748164176940918, "learning_rate": 1.0648887665565808e-05, "loss": 0.6968, "step": 15096 }, { "epoch": 49.498360655737706, "grad_norm": 18.110122680664062, "learning_rate": 1.0647828012844907e-05, "loss": 0.7478, "step": 15097 }, { "epoch": 49.501639344262294, "grad_norm": 5.736258506774902, "learning_rate": 1.0646768352819072e-05, "loss": 0.3467, "step": 15098 }, { "epoch": 49.50491803278688, "grad_norm": 7.174786567687988, "learning_rate": 1.0645708685500251e-05, "loss": 0.6777, "step": 15099 }, { "epoch": 49.50819672131148, "grad_norm": 8.99708366394043, "learning_rate": 1.0644649010900395e-05, "loss": 0.8212, "step": 15100 }, { "epoch": 49.511475409836066, "grad_norm": 7.425000190734863, "learning_rate": 1.064358932903145e-05, "loss": 0.5113, "step": 15101 }, { "epoch": 49.514754098360655, "grad_norm": 5.624812126159668, "learning_rate": 1.0642529639905363e-05, "loss": 0.5184, "step": 15102 }, { "epoch": 49.51803278688524, "grad_norm": 10.80730152130127, "learning_rate": 1.0641469943534091e-05, "loss": 0.4859, "step": 15103 }, { "epoch": 49.52131147540984, "grad_norm": 6.855220794677734, "learning_rate": 1.0640410239929575e-05, "loss": 0.6442, "step": 15104 }, { "epoch": 49.52459016393443, "grad_norm": 6.464244365692139, "learning_rate": 1.063935052910377e-05, "loss": 0.5685, "step": 15105 }, { "epoch": 49.527868852459015, "grad_norm": 10.216785430908203, "learning_rate": 1.0638290811068623e-05, "loss": 0.3905, "step": 15106 }, { "epoch": 49.5311475409836, "grad_norm": 7.980274200439453, "learning_rate": 1.0637231085836085e-05, "loss": 0.4445, "step": 15107 }, { "epoch": 49.5344262295082, "grad_norm": 7.27890682220459, "learning_rate": 1.06361713534181e-05, "loss": 0.6415, "step": 15108 }, { "epoch": 49.53770491803279, "grad_norm": 5.968950271606445, "learning_rate": 1.0635111613826626e-05, "loss": 0.5076, "step": 15109 }, { "epoch": 49.540983606557376, "grad_norm": 5.038195610046387, "learning_rate": 1.0634051867073604e-05, "loss": 0.3277, "step": 15110 }, { "epoch": 49.544262295081964, "grad_norm": 5.104213237762451, "learning_rate": 1.063299211317099e-05, "loss": 0.5713, "step": 15111 }, { "epoch": 49.54754098360656, "grad_norm": 6.529575347900391, "learning_rate": 1.0631932352130732e-05, "loss": 0.8405, "step": 15112 }, { "epoch": 49.55081967213115, "grad_norm": 5.4971160888671875, "learning_rate": 1.0630872583964777e-05, "loss": 0.7453, "step": 15113 }, { "epoch": 49.554098360655736, "grad_norm": 6.475371360778809, "learning_rate": 1.0629812808685083e-05, "loss": 0.5024, "step": 15114 }, { "epoch": 49.557377049180324, "grad_norm": 6.853127956390381, "learning_rate": 1.0628753026303589e-05, "loss": 0.5624, "step": 15115 }, { "epoch": 49.56065573770492, "grad_norm": 6.426702499389648, "learning_rate": 1.0627693236832254e-05, "loss": 0.7168, "step": 15116 }, { "epoch": 49.56393442622951, "grad_norm": 8.22321891784668, "learning_rate": 1.0626633440283024e-05, "loss": 0.5484, "step": 15117 }, { "epoch": 49.5672131147541, "grad_norm": 8.38300895690918, "learning_rate": 1.0625573636667852e-05, "loss": 0.7618, "step": 15118 }, { "epoch": 49.570491803278685, "grad_norm": 6.74547815322876, "learning_rate": 1.0624513825998685e-05, "loss": 0.6323, "step": 15119 }, { "epoch": 49.57377049180328, "grad_norm": 9.1810302734375, "learning_rate": 1.0623454008287477e-05, "loss": 0.7521, "step": 15120 }, { "epoch": 49.57704918032787, "grad_norm": 6.216309547424316, "learning_rate": 1.0622394183546174e-05, "loss": 0.4936, "step": 15121 }, { "epoch": 49.58032786885246, "grad_norm": 7.531973361968994, "learning_rate": 1.0621334351786734e-05, "loss": 0.6514, "step": 15122 }, { "epoch": 49.58360655737705, "grad_norm": 5.836174964904785, "learning_rate": 1.0620274513021101e-05, "loss": 0.5884, "step": 15123 }, { "epoch": 49.58688524590164, "grad_norm": 20.203903198242188, "learning_rate": 1.0619214667261226e-05, "loss": 0.5442, "step": 15124 }, { "epoch": 49.59016393442623, "grad_norm": 11.407830238342285, "learning_rate": 1.0618154814519064e-05, "loss": 0.3531, "step": 15125 }, { "epoch": 49.59344262295082, "grad_norm": 5.53410005569458, "learning_rate": 1.0617094954806562e-05, "loss": 0.4582, "step": 15126 }, { "epoch": 49.59672131147541, "grad_norm": 5.906708717346191, "learning_rate": 1.0616035088135673e-05, "loss": 0.4902, "step": 15127 }, { "epoch": 49.6, "grad_norm": 5.384671211242676, "learning_rate": 1.061497521451835e-05, "loss": 0.7917, "step": 15128 }, { "epoch": 49.60327868852459, "grad_norm": 9.112936019897461, "learning_rate": 1.0613915333966537e-05, "loss": 0.6165, "step": 15129 }, { "epoch": 49.60655737704918, "grad_norm": 6.8313889503479, "learning_rate": 1.0612855446492195e-05, "loss": 0.4534, "step": 15130 }, { "epoch": 49.609836065573774, "grad_norm": 14.646757125854492, "learning_rate": 1.0611795552107271e-05, "loss": 0.6486, "step": 15131 }, { "epoch": 49.61311475409836, "grad_norm": 5.964582443237305, "learning_rate": 1.0610735650823714e-05, "loss": 0.7652, "step": 15132 }, { "epoch": 49.61639344262295, "grad_norm": 7.92437219619751, "learning_rate": 1.0609675742653478e-05, "loss": 0.8077, "step": 15133 }, { "epoch": 49.61967213114754, "grad_norm": 9.303486824035645, "learning_rate": 1.0608615827608513e-05, "loss": 0.4804, "step": 15134 }, { "epoch": 49.622950819672134, "grad_norm": 7.235762119293213, "learning_rate": 1.0607555905700775e-05, "loss": 0.5203, "step": 15135 }, { "epoch": 49.62622950819672, "grad_norm": 6.987191200256348, "learning_rate": 1.060649597694221e-05, "loss": 0.8527, "step": 15136 }, { "epoch": 49.62950819672131, "grad_norm": 11.699321746826172, "learning_rate": 1.0605436041344773e-05, "loss": 0.5421, "step": 15137 }, { "epoch": 49.6327868852459, "grad_norm": 6.3601179122924805, "learning_rate": 1.0604376098920415e-05, "loss": 0.4836, "step": 15138 }, { "epoch": 49.636065573770495, "grad_norm": 6.947165489196777, "learning_rate": 1.0603316149681087e-05, "loss": 0.5726, "step": 15139 }, { "epoch": 49.63934426229508, "grad_norm": 6.657362937927246, "learning_rate": 1.0602256193638745e-05, "loss": 0.4876, "step": 15140 }, { "epoch": 49.64262295081967, "grad_norm": 7.935811996459961, "learning_rate": 1.0601196230805335e-05, "loss": 0.6875, "step": 15141 }, { "epoch": 49.64590163934426, "grad_norm": 6.844727993011475, "learning_rate": 1.0600136261192815e-05, "loss": 0.6995, "step": 15142 }, { "epoch": 49.649180327868855, "grad_norm": 7.347574710845947, "learning_rate": 1.0599076284813132e-05, "loss": 0.8059, "step": 15143 }, { "epoch": 49.65245901639344, "grad_norm": 7.527270317077637, "learning_rate": 1.0598016301678243e-05, "loss": 0.7317, "step": 15144 }, { "epoch": 49.65573770491803, "grad_norm": 7.130140781402588, "learning_rate": 1.0596956311800097e-05, "loss": 0.5753, "step": 15145 }, { "epoch": 49.65901639344262, "grad_norm": 7.06067419052124, "learning_rate": 1.059589631519065e-05, "loss": 0.5294, "step": 15146 }, { "epoch": 49.662295081967216, "grad_norm": 9.130617141723633, "learning_rate": 1.059483631186185e-05, "loss": 0.4645, "step": 15147 }, { "epoch": 49.665573770491804, "grad_norm": 9.560452461242676, "learning_rate": 1.0593776301825653e-05, "loss": 0.8394, "step": 15148 }, { "epoch": 49.66885245901639, "grad_norm": 8.225387573242188, "learning_rate": 1.0592716285094014e-05, "loss": 0.5419, "step": 15149 }, { "epoch": 49.67213114754098, "grad_norm": 8.375105857849121, "learning_rate": 1.0591656261678878e-05, "loss": 0.681, "step": 15150 }, { "epoch": 49.675409836065576, "grad_norm": 5.146208763122559, "learning_rate": 1.0590596231592205e-05, "loss": 0.7716, "step": 15151 }, { "epoch": 49.678688524590164, "grad_norm": 7.72186279296875, "learning_rate": 1.0589536194845944e-05, "loss": 0.7802, "step": 15152 }, { "epoch": 49.68196721311475, "grad_norm": 10.618175506591797, "learning_rate": 1.0588476151452047e-05, "loss": 0.328, "step": 15153 }, { "epoch": 49.68524590163934, "grad_norm": 6.744322299957275, "learning_rate": 1.0587416101422475e-05, "loss": 0.3607, "step": 15154 }, { "epoch": 49.68852459016394, "grad_norm": 9.394618034362793, "learning_rate": 1.0586356044769173e-05, "loss": 0.6364, "step": 15155 }, { "epoch": 49.691803278688525, "grad_norm": 6.269447326660156, "learning_rate": 1.0585295981504098e-05, "loss": 0.6855, "step": 15156 }, { "epoch": 49.69508196721311, "grad_norm": 5.400161266326904, "learning_rate": 1.05842359116392e-05, "loss": 0.5447, "step": 15157 }, { "epoch": 49.6983606557377, "grad_norm": 8.754487991333008, "learning_rate": 1.0583175835186435e-05, "loss": 0.5725, "step": 15158 }, { "epoch": 49.7016393442623, "grad_norm": 7.1938157081604, "learning_rate": 1.0582115752157758e-05, "loss": 0.5644, "step": 15159 }, { "epoch": 49.704918032786885, "grad_norm": 7.631759166717529, "learning_rate": 1.0581055662565119e-05, "loss": 0.6356, "step": 15160 }, { "epoch": 49.708196721311474, "grad_norm": 8.188592910766602, "learning_rate": 1.0579995566420476e-05, "loss": 0.6659, "step": 15161 }, { "epoch": 49.71147540983607, "grad_norm": 8.936084747314453, "learning_rate": 1.0578935463735777e-05, "loss": 0.7034, "step": 15162 }, { "epoch": 49.71475409836066, "grad_norm": 25.69097137451172, "learning_rate": 1.057787535452298e-05, "loss": 0.6372, "step": 15163 }, { "epoch": 49.718032786885246, "grad_norm": 5.636134147644043, "learning_rate": 1.0576815238794039e-05, "loss": 0.3672, "step": 15164 }, { "epoch": 49.721311475409834, "grad_norm": 9.881056785583496, "learning_rate": 1.0575755116560902e-05, "loss": 0.4085, "step": 15165 }, { "epoch": 49.72459016393443, "grad_norm": 6.43379545211792, "learning_rate": 1.0574694987835531e-05, "loss": 0.5192, "step": 15166 }, { "epoch": 49.72786885245902, "grad_norm": 7.532211780548096, "learning_rate": 1.0573634852629875e-05, "loss": 0.5915, "step": 15167 }, { "epoch": 49.731147540983606, "grad_norm": 8.118779182434082, "learning_rate": 1.0572574710955891e-05, "loss": 0.7786, "step": 15168 }, { "epoch": 49.734426229508195, "grad_norm": 6.435853958129883, "learning_rate": 1.0571514562825531e-05, "loss": 0.935, "step": 15169 }, { "epoch": 49.73770491803279, "grad_norm": 13.003620147705078, "learning_rate": 1.0570454408250752e-05, "loss": 0.5247, "step": 15170 }, { "epoch": 49.74098360655738, "grad_norm": 7.267148494720459, "learning_rate": 1.0569394247243502e-05, "loss": 0.5674, "step": 15171 }, { "epoch": 49.74426229508197, "grad_norm": 7.478587627410889, "learning_rate": 1.0568334079815745e-05, "loss": 0.4492, "step": 15172 }, { "epoch": 49.747540983606555, "grad_norm": 5.376979827880859, "learning_rate": 1.0567273905979428e-05, "loss": 0.5097, "step": 15173 }, { "epoch": 49.75081967213115, "grad_norm": 6.663581848144531, "learning_rate": 1.0566213725746506e-05, "loss": 0.8055, "step": 15174 }, { "epoch": 49.75409836065574, "grad_norm": 8.20677661895752, "learning_rate": 1.0565153539128939e-05, "loss": 0.6125, "step": 15175 }, { "epoch": 49.75737704918033, "grad_norm": 8.221643447875977, "learning_rate": 1.0564093346138676e-05, "loss": 0.5793, "step": 15176 }, { "epoch": 49.760655737704916, "grad_norm": 6.8201823234558105, "learning_rate": 1.0563033146787673e-05, "loss": 0.6811, "step": 15177 }, { "epoch": 49.76393442622951, "grad_norm": 6.318205833435059, "learning_rate": 1.0561972941087887e-05, "loss": 0.6692, "step": 15178 }, { "epoch": 49.7672131147541, "grad_norm": 7.4567461013793945, "learning_rate": 1.0560912729051273e-05, "loss": 0.6303, "step": 15179 }, { "epoch": 49.77049180327869, "grad_norm": 6.931821823120117, "learning_rate": 1.0559852510689783e-05, "loss": 0.6632, "step": 15180 }, { "epoch": 49.773770491803276, "grad_norm": 10.354451179504395, "learning_rate": 1.0558792286015373e-05, "loss": 0.9403, "step": 15181 }, { "epoch": 49.77704918032787, "grad_norm": 6.3924970626831055, "learning_rate": 1.0557732055040002e-05, "loss": 0.5664, "step": 15182 }, { "epoch": 49.78032786885246, "grad_norm": 7.239099979400635, "learning_rate": 1.0556671817775622e-05, "loss": 0.7323, "step": 15183 }, { "epoch": 49.78360655737705, "grad_norm": 5.949551105499268, "learning_rate": 1.0555611574234185e-05, "loss": 0.6989, "step": 15184 }, { "epoch": 49.78688524590164, "grad_norm": 5.813044548034668, "learning_rate": 1.0554551324427654e-05, "loss": 0.7562, "step": 15185 }, { "epoch": 49.79016393442623, "grad_norm": 8.89879035949707, "learning_rate": 1.0553491068367978e-05, "loss": 0.5311, "step": 15186 }, { "epoch": 49.79344262295082, "grad_norm": 20.326915740966797, "learning_rate": 1.0552430806067113e-05, "loss": 0.648, "step": 15187 }, { "epoch": 49.79672131147541, "grad_norm": 6.670141696929932, "learning_rate": 1.055137053753702e-05, "loss": 0.4799, "step": 15188 }, { "epoch": 49.8, "grad_norm": 13.325204849243164, "learning_rate": 1.055031026278965e-05, "loss": 0.4537, "step": 15189 }, { "epoch": 49.80327868852459, "grad_norm": 8.829630851745605, "learning_rate": 1.054924998183696e-05, "loss": 0.5922, "step": 15190 }, { "epoch": 49.80655737704918, "grad_norm": 5.815114498138428, "learning_rate": 1.0548189694690902e-05, "loss": 0.3901, "step": 15191 }, { "epoch": 49.80983606557377, "grad_norm": 6.509076118469238, "learning_rate": 1.0547129401363439e-05, "loss": 0.6658, "step": 15192 }, { "epoch": 49.81311475409836, "grad_norm": 6.946609020233154, "learning_rate": 1.054606910186652e-05, "loss": 0.4542, "step": 15193 }, { "epoch": 49.81639344262295, "grad_norm": 6.993124485015869, "learning_rate": 1.0545008796212105e-05, "loss": 0.4939, "step": 15194 }, { "epoch": 49.81967213114754, "grad_norm": 6.466973781585693, "learning_rate": 1.054394848441215e-05, "loss": 0.974, "step": 15195 }, { "epoch": 49.82295081967213, "grad_norm": 8.071608543395996, "learning_rate": 1.054288816647861e-05, "loss": 0.7373, "step": 15196 }, { "epoch": 49.82622950819672, "grad_norm": 6.4932661056518555, "learning_rate": 1.0541827842423442e-05, "loss": 0.5884, "step": 15197 }, { "epoch": 49.829508196721314, "grad_norm": 8.989365577697754, "learning_rate": 1.0540767512258603e-05, "loss": 0.5298, "step": 15198 }, { "epoch": 49.8327868852459, "grad_norm": 8.328338623046875, "learning_rate": 1.0539707175996048e-05, "loss": 0.5927, "step": 15199 }, { "epoch": 49.83606557377049, "grad_norm": 10.482741355895996, "learning_rate": 1.053864683364773e-05, "loss": 0.5014, "step": 15200 }, { "epoch": 49.83934426229508, "grad_norm": 6.306061267852783, "learning_rate": 1.0537586485225611e-05, "loss": 0.4873, "step": 15201 }, { "epoch": 49.842622950819674, "grad_norm": 5.852906703948975, "learning_rate": 1.0536526130741644e-05, "loss": 0.8852, "step": 15202 }, { "epoch": 49.84590163934426, "grad_norm": 7.4676432609558105, "learning_rate": 1.053546577020779e-05, "loss": 0.8195, "step": 15203 }, { "epoch": 49.84918032786885, "grad_norm": 5.874290466308594, "learning_rate": 1.0534405403636e-05, "loss": 0.7878, "step": 15204 }, { "epoch": 49.85245901639344, "grad_norm": 7.888796806335449, "learning_rate": 1.0533345031038234e-05, "loss": 0.6932, "step": 15205 }, { "epoch": 49.855737704918035, "grad_norm": 8.812220573425293, "learning_rate": 1.0532284652426447e-05, "loss": 0.7032, "step": 15206 }, { "epoch": 49.85901639344262, "grad_norm": 6.669089317321777, "learning_rate": 1.0531224267812601e-05, "loss": 0.4259, "step": 15207 }, { "epoch": 49.86229508196721, "grad_norm": 6.382354259490967, "learning_rate": 1.0530163877208645e-05, "loss": 0.7214, "step": 15208 }, { "epoch": 49.86557377049181, "grad_norm": 6.115935325622559, "learning_rate": 1.052910348062654e-05, "loss": 0.37, "step": 15209 }, { "epoch": 49.868852459016395, "grad_norm": 5.739181995391846, "learning_rate": 1.0528043078078248e-05, "loss": 0.6118, "step": 15210 }, { "epoch": 49.87213114754098, "grad_norm": 11.695003509521484, "learning_rate": 1.0526982669575716e-05, "loss": 0.6562, "step": 15211 }, { "epoch": 49.87540983606557, "grad_norm": 8.005736351013184, "learning_rate": 1.052592225513091e-05, "loss": 0.5949, "step": 15212 }, { "epoch": 49.87868852459017, "grad_norm": 4.937090873718262, "learning_rate": 1.0524861834755779e-05, "loss": 0.5212, "step": 15213 }, { "epoch": 49.881967213114756, "grad_norm": 5.717137336730957, "learning_rate": 1.0523801408462291e-05, "loss": 0.8782, "step": 15214 }, { "epoch": 49.885245901639344, "grad_norm": 5.281268119812012, "learning_rate": 1.0522740976262393e-05, "loss": 0.4937, "step": 15215 }, { "epoch": 49.88852459016393, "grad_norm": 6.573370456695557, "learning_rate": 1.0521680538168051e-05, "loss": 0.4791, "step": 15216 }, { "epoch": 49.89180327868853, "grad_norm": 5.831937313079834, "learning_rate": 1.0520620094191214e-05, "loss": 0.7873, "step": 15217 }, { "epoch": 49.895081967213116, "grad_norm": 5.503391742706299, "learning_rate": 1.051955964434385e-05, "loss": 0.6669, "step": 15218 }, { "epoch": 49.898360655737704, "grad_norm": 6.508112907409668, "learning_rate": 1.0518499188637905e-05, "loss": 0.5943, "step": 15219 }, { "epoch": 49.90163934426229, "grad_norm": 6.277303695678711, "learning_rate": 1.0517438727085344e-05, "loss": 0.6, "step": 15220 }, { "epoch": 49.90491803278689, "grad_norm": 5.832072734832764, "learning_rate": 1.0516378259698126e-05, "loss": 0.5317, "step": 15221 }, { "epoch": 49.90819672131148, "grad_norm": 7.773735523223877, "learning_rate": 1.0515317786488203e-05, "loss": 0.5926, "step": 15222 }, { "epoch": 49.911475409836065, "grad_norm": 5.65225076675415, "learning_rate": 1.0514257307467536e-05, "loss": 0.6399, "step": 15223 }, { "epoch": 49.91475409836065, "grad_norm": 5.664931774139404, "learning_rate": 1.0513196822648089e-05, "loss": 0.7159, "step": 15224 }, { "epoch": 49.91803278688525, "grad_norm": 7.049095630645752, "learning_rate": 1.0512136332041808e-05, "loss": 0.638, "step": 15225 }, { "epoch": 49.92131147540984, "grad_norm": 8.054023742675781, "learning_rate": 1.0511075835660661e-05, "loss": 0.6824, "step": 15226 }, { "epoch": 49.924590163934425, "grad_norm": 6.354196071624756, "learning_rate": 1.0510015333516601e-05, "loss": 0.5823, "step": 15227 }, { "epoch": 49.927868852459014, "grad_norm": 6.124014854431152, "learning_rate": 1.0508954825621587e-05, "loss": 0.538, "step": 15228 }, { "epoch": 49.93114754098361, "grad_norm": 7.11667013168335, "learning_rate": 1.050789431198758e-05, "loss": 0.6516, "step": 15229 }, { "epoch": 49.9344262295082, "grad_norm": 6.879104137420654, "learning_rate": 1.0506833792626535e-05, "loss": 0.7108, "step": 15230 }, { "epoch": 49.937704918032786, "grad_norm": 5.3314080238342285, "learning_rate": 1.0505773267550411e-05, "loss": 0.7488, "step": 15231 }, { "epoch": 49.940983606557374, "grad_norm": 7.072054386138916, "learning_rate": 1.050471273677117e-05, "loss": 0.6649, "step": 15232 }, { "epoch": 49.94426229508197, "grad_norm": 5.076350688934326, "learning_rate": 1.0503652200300767e-05, "loss": 0.548, "step": 15233 }, { "epoch": 49.94754098360656, "grad_norm": 6.6922831535339355, "learning_rate": 1.0502591658151162e-05, "loss": 0.5008, "step": 15234 }, { "epoch": 49.950819672131146, "grad_norm": 7.080937385559082, "learning_rate": 1.0501531110334311e-05, "loss": 0.7224, "step": 15235 }, { "epoch": 49.954098360655735, "grad_norm": 5.778375148773193, "learning_rate": 1.0500470556862179e-05, "loss": 0.3886, "step": 15236 }, { "epoch": 49.95737704918033, "grad_norm": 8.619915008544922, "learning_rate": 1.0499409997746718e-05, "loss": 0.6571, "step": 15237 }, { "epoch": 49.96065573770492, "grad_norm": 7.019098281860352, "learning_rate": 1.0498349432999895e-05, "loss": 0.7564, "step": 15238 }, { "epoch": 49.96393442622951, "grad_norm": 10.588212013244629, "learning_rate": 1.0497288862633658e-05, "loss": 0.7433, "step": 15239 }, { "epoch": 49.967213114754095, "grad_norm": 6.6581010818481445, "learning_rate": 1.0496228286659977e-05, "loss": 0.4741, "step": 15240 }, { "epoch": 49.97049180327869, "grad_norm": 7.464980125427246, "learning_rate": 1.0495167705090804e-05, "loss": 0.4075, "step": 15241 }, { "epoch": 49.97377049180328, "grad_norm": 8.406388282775879, "learning_rate": 1.0494107117938099e-05, "loss": 0.5401, "step": 15242 }, { "epoch": 49.97704918032787, "grad_norm": 7.352214813232422, "learning_rate": 1.0493046525213824e-05, "loss": 0.7014, "step": 15243 }, { "epoch": 49.980327868852456, "grad_norm": 9.389928817749023, "learning_rate": 1.0491985926929938e-05, "loss": 0.528, "step": 15244 }, { "epoch": 49.98360655737705, "grad_norm": 6.6675519943237305, "learning_rate": 1.0490925323098395e-05, "loss": 0.5403, "step": 15245 }, { "epoch": 49.98688524590164, "grad_norm": 8.591094970703125, "learning_rate": 1.0489864713731163e-05, "loss": 0.6326, "step": 15246 }, { "epoch": 49.99016393442623, "grad_norm": 6.66070556640625, "learning_rate": 1.0488804098840195e-05, "loss": 0.407, "step": 15247 }, { "epoch": 49.993442622950816, "grad_norm": 6.503049850463867, "learning_rate": 1.0487743478437453e-05, "loss": 0.8192, "step": 15248 }, { "epoch": 49.99672131147541, "grad_norm": 6.551600933074951, "learning_rate": 1.0486682852534895e-05, "loss": 0.9738, "step": 15249 }, { "epoch": 50.0, "grad_norm": 5.011541366577148, "learning_rate": 1.0485622221144485e-05, "loss": 0.4847, "step": 15250 }, { "epoch": 50.00327868852459, "grad_norm": 5.832739353179932, "learning_rate": 1.0484561584278177e-05, "loss": 0.7134, "step": 15251 }, { "epoch": 50.006557377049184, "grad_norm": 5.8158860206604, "learning_rate": 1.0483500941947933e-05, "loss": 0.6333, "step": 15252 }, { "epoch": 50.00983606557377, "grad_norm": 6.023050785064697, "learning_rate": 1.0482440294165714e-05, "loss": 0.5168, "step": 15253 }, { "epoch": 50.01311475409836, "grad_norm": 5.036984920501709, "learning_rate": 1.0481379640943479e-05, "loss": 0.6419, "step": 15254 }, { "epoch": 50.01639344262295, "grad_norm": 5.010077476501465, "learning_rate": 1.048031898229319e-05, "loss": 0.4743, "step": 15255 }, { "epoch": 50.019672131147544, "grad_norm": 6.987621307373047, "learning_rate": 1.0479258318226799e-05, "loss": 0.4648, "step": 15256 }, { "epoch": 50.02295081967213, "grad_norm": 5.730575084686279, "learning_rate": 1.0478197648756279e-05, "loss": 0.5629, "step": 15257 }, { "epoch": 50.02622950819672, "grad_norm": 5.201999187469482, "learning_rate": 1.0477136973893578e-05, "loss": 0.7407, "step": 15258 }, { "epoch": 50.02950819672131, "grad_norm": 5.690975666046143, "learning_rate": 1.0476076293650665e-05, "loss": 0.6501, "step": 15259 }, { "epoch": 50.032786885245905, "grad_norm": 8.62901496887207, "learning_rate": 1.0475015608039496e-05, "loss": 0.7109, "step": 15260 }, { "epoch": 50.03606557377049, "grad_norm": 5.215458869934082, "learning_rate": 1.0473954917072034e-05, "loss": 0.4741, "step": 15261 }, { "epoch": 50.03934426229508, "grad_norm": 7.462728023529053, "learning_rate": 1.0472894220760233e-05, "loss": 0.4619, "step": 15262 }, { "epoch": 50.04262295081967, "grad_norm": 5.795888423919678, "learning_rate": 1.0471833519116058e-05, "loss": 0.5051, "step": 15263 }, { "epoch": 50.045901639344265, "grad_norm": 5.621397018432617, "learning_rate": 1.0470772812151473e-05, "loss": 0.5765, "step": 15264 }, { "epoch": 50.049180327868854, "grad_norm": 7.052922248840332, "learning_rate": 1.0469712099878432e-05, "loss": 0.625, "step": 15265 }, { "epoch": 50.05245901639344, "grad_norm": 5.153478145599365, "learning_rate": 1.0468651382308903e-05, "loss": 0.3943, "step": 15266 }, { "epoch": 50.05573770491803, "grad_norm": 5.708469390869141, "learning_rate": 1.0467590659454839e-05, "loss": 0.6873, "step": 15267 }, { "epoch": 50.059016393442626, "grad_norm": 8.09192943572998, "learning_rate": 1.0466529931328206e-05, "loss": 0.6662, "step": 15268 }, { "epoch": 50.062295081967214, "grad_norm": 5.914714813232422, "learning_rate": 1.0465469197940961e-05, "loss": 0.4934, "step": 15269 }, { "epoch": 50.0655737704918, "grad_norm": 5.090346336364746, "learning_rate": 1.0464408459305069e-05, "loss": 0.4456, "step": 15270 }, { "epoch": 50.06885245901639, "grad_norm": 6.866033554077148, "learning_rate": 1.0463347715432488e-05, "loss": 0.5018, "step": 15271 }, { "epoch": 50.072131147540986, "grad_norm": 6.8251848220825195, "learning_rate": 1.046228696633518e-05, "loss": 0.595, "step": 15272 }, { "epoch": 50.075409836065575, "grad_norm": 5.379079341888428, "learning_rate": 1.0461226212025103e-05, "loss": 0.6296, "step": 15273 }, { "epoch": 50.07868852459016, "grad_norm": 10.099350929260254, "learning_rate": 1.0460165452514224e-05, "loss": 0.6591, "step": 15274 }, { "epoch": 50.08196721311475, "grad_norm": 6.177614688873291, "learning_rate": 1.0459104687814501e-05, "loss": 0.4618, "step": 15275 }, { "epoch": 50.08524590163935, "grad_norm": 6.595404148101807, "learning_rate": 1.0458043917937894e-05, "loss": 0.6905, "step": 15276 }, { "epoch": 50.088524590163935, "grad_norm": 5.897698402404785, "learning_rate": 1.045698314289637e-05, "loss": 0.5355, "step": 15277 }, { "epoch": 50.09180327868852, "grad_norm": 5.726886749267578, "learning_rate": 1.0455922362701877e-05, "loss": 0.728, "step": 15278 }, { "epoch": 50.09508196721311, "grad_norm": 5.34298849105835, "learning_rate": 1.0454861577366394e-05, "loss": 0.4806, "step": 15279 }, { "epoch": 50.09836065573771, "grad_norm": 6.291690349578857, "learning_rate": 1.0453800786901868e-05, "loss": 0.6429, "step": 15280 }, { "epoch": 50.101639344262296, "grad_norm": 6.126571178436279, "learning_rate": 1.0452739991320269e-05, "loss": 0.6489, "step": 15281 }, { "epoch": 50.104918032786884, "grad_norm": 7.303030967712402, "learning_rate": 1.0451679190633555e-05, "loss": 0.5191, "step": 15282 }, { "epoch": 50.10819672131147, "grad_norm": 5.9077043533325195, "learning_rate": 1.045061838485369e-05, "loss": 0.5725, "step": 15283 }, { "epoch": 50.11147540983607, "grad_norm": 6.7540974617004395, "learning_rate": 1.044955757399263e-05, "loss": 0.5561, "step": 15284 }, { "epoch": 50.114754098360656, "grad_norm": 16.01797866821289, "learning_rate": 1.0448496758062345e-05, "loss": 0.6576, "step": 15285 }, { "epoch": 50.118032786885244, "grad_norm": 4.430802822113037, "learning_rate": 1.0447435937074793e-05, "loss": 0.5953, "step": 15286 }, { "epoch": 50.12131147540983, "grad_norm": 6.920738697052002, "learning_rate": 1.0446375111041934e-05, "loss": 0.4717, "step": 15287 }, { "epoch": 50.12459016393443, "grad_norm": 5.437392234802246, "learning_rate": 1.044531427997573e-05, "loss": 0.5532, "step": 15288 }, { "epoch": 50.12786885245902, "grad_norm": 8.32596492767334, "learning_rate": 1.0444253443888149e-05, "loss": 0.9164, "step": 15289 }, { "epoch": 50.131147540983605, "grad_norm": 7.983613967895508, "learning_rate": 1.0443192602791143e-05, "loss": 0.6417, "step": 15290 }, { "epoch": 50.13442622950819, "grad_norm": 6.794319152832031, "learning_rate": 1.0442131756696681e-05, "loss": 0.5755, "step": 15291 }, { "epoch": 50.13770491803279, "grad_norm": 13.800078392028809, "learning_rate": 1.0441070905616726e-05, "loss": 0.5804, "step": 15292 }, { "epoch": 50.14098360655738, "grad_norm": 13.954910278320312, "learning_rate": 1.0440010049563236e-05, "loss": 0.4523, "step": 15293 }, { "epoch": 50.144262295081965, "grad_norm": 6.086525917053223, "learning_rate": 1.0438949188548177e-05, "loss": 0.7051, "step": 15294 }, { "epoch": 50.14754098360656, "grad_norm": 6.861389636993408, "learning_rate": 1.0437888322583509e-05, "loss": 0.5689, "step": 15295 }, { "epoch": 50.15081967213115, "grad_norm": 6.958309650421143, "learning_rate": 1.0436827451681195e-05, "loss": 0.5392, "step": 15296 }, { "epoch": 50.15409836065574, "grad_norm": 6.757234573364258, "learning_rate": 1.0435766575853197e-05, "loss": 0.7254, "step": 15297 }, { "epoch": 50.157377049180326, "grad_norm": 6.416169166564941, "learning_rate": 1.0434705695111478e-05, "loss": 0.644, "step": 15298 }, { "epoch": 50.16065573770492, "grad_norm": 6.715419769287109, "learning_rate": 1.0433644809467998e-05, "loss": 0.71, "step": 15299 }, { "epoch": 50.16393442622951, "grad_norm": 7.431002140045166, "learning_rate": 1.0432583918934724e-05, "loss": 0.8609, "step": 15300 }, { "epoch": 50.1672131147541, "grad_norm": 9.980853080749512, "learning_rate": 1.0431523023523616e-05, "loss": 0.588, "step": 15301 }, { "epoch": 50.170491803278686, "grad_norm": 5.72769832611084, "learning_rate": 1.0430462123246638e-05, "loss": 0.616, "step": 15302 }, { "epoch": 50.17377049180328, "grad_norm": 4.786603927612305, "learning_rate": 1.0429401218115753e-05, "loss": 0.4401, "step": 15303 }, { "epoch": 50.17704918032787, "grad_norm": 7.985877990722656, "learning_rate": 1.0428340308142921e-05, "loss": 0.5777, "step": 15304 }, { "epoch": 50.18032786885246, "grad_norm": 5.374790668487549, "learning_rate": 1.0427279393340108e-05, "loss": 0.6046, "step": 15305 }, { "epoch": 50.18360655737705, "grad_norm": 6.5888991355896, "learning_rate": 1.0426218473719274e-05, "loss": 0.538, "step": 15306 }, { "epoch": 50.18688524590164, "grad_norm": 12.288590431213379, "learning_rate": 1.0425157549292385e-05, "loss": 0.4208, "step": 15307 }, { "epoch": 50.19016393442623, "grad_norm": 6.789552688598633, "learning_rate": 1.04240966200714e-05, "loss": 0.3349, "step": 15308 }, { "epoch": 50.19344262295082, "grad_norm": 7.678183078765869, "learning_rate": 1.0423035686068289e-05, "loss": 0.7, "step": 15309 }, { "epoch": 50.19672131147541, "grad_norm": 5.586103439331055, "learning_rate": 1.0421974747295007e-05, "loss": 0.511, "step": 15310 }, { "epoch": 50.2, "grad_norm": 6.470190525054932, "learning_rate": 1.0420913803763522e-05, "loss": 0.4844, "step": 15311 }, { "epoch": 50.20327868852459, "grad_norm": 8.258321762084961, "learning_rate": 1.0419852855485798e-05, "loss": 0.7492, "step": 15312 }, { "epoch": 50.20655737704918, "grad_norm": 5.526608467102051, "learning_rate": 1.0418791902473794e-05, "loss": 0.574, "step": 15313 }, { "epoch": 50.20983606557377, "grad_norm": 6.717257499694824, "learning_rate": 1.0417730944739474e-05, "loss": 0.5949, "step": 15314 }, { "epoch": 50.21311475409836, "grad_norm": 7.649520397186279, "learning_rate": 1.0416669982294808e-05, "loss": 0.6551, "step": 15315 }, { "epoch": 50.21639344262295, "grad_norm": 5.690741539001465, "learning_rate": 1.0415609015151753e-05, "loss": 0.5355, "step": 15316 }, { "epoch": 50.21967213114754, "grad_norm": 6.604517459869385, "learning_rate": 1.0414548043322272e-05, "loss": 0.638, "step": 15317 }, { "epoch": 50.22295081967213, "grad_norm": 5.774803638458252, "learning_rate": 1.0413487066818335e-05, "loss": 0.8472, "step": 15318 }, { "epoch": 50.226229508196724, "grad_norm": 6.717245101928711, "learning_rate": 1.0412426085651897e-05, "loss": 0.8366, "step": 15319 }, { "epoch": 50.22950819672131, "grad_norm": 5.95304536819458, "learning_rate": 1.041136509983493e-05, "loss": 0.4603, "step": 15320 }, { "epoch": 50.2327868852459, "grad_norm": 6.009015083312988, "learning_rate": 1.041030410937939e-05, "loss": 0.4805, "step": 15321 }, { "epoch": 50.23606557377049, "grad_norm": 6.8737473487854, "learning_rate": 1.040924311429725e-05, "loss": 0.4192, "step": 15322 }, { "epoch": 50.239344262295084, "grad_norm": 4.8251729011535645, "learning_rate": 1.0408182114600463e-05, "loss": 0.5228, "step": 15323 }, { "epoch": 50.24262295081967, "grad_norm": 6.92864465713501, "learning_rate": 1.0407121110301001e-05, "loss": 0.4175, "step": 15324 }, { "epoch": 50.24590163934426, "grad_norm": 5.956136703491211, "learning_rate": 1.0406060101410822e-05, "loss": 0.6776, "step": 15325 }, { "epoch": 50.24918032786885, "grad_norm": 6.193805694580078, "learning_rate": 1.0404999087941898e-05, "loss": 0.8284, "step": 15326 }, { "epoch": 50.252459016393445, "grad_norm": 5.68840217590332, "learning_rate": 1.0403938069906186e-05, "loss": 0.5927, "step": 15327 }, { "epoch": 50.25573770491803, "grad_norm": 6.059119701385498, "learning_rate": 1.0402877047315652e-05, "loss": 0.6573, "step": 15328 }, { "epoch": 50.25901639344262, "grad_norm": 5.68986177444458, "learning_rate": 1.040181602018226e-05, "loss": 0.5294, "step": 15329 }, { "epoch": 50.26229508196721, "grad_norm": 7.678938865661621, "learning_rate": 1.0400754988517976e-05, "loss": 0.6289, "step": 15330 }, { "epoch": 50.265573770491805, "grad_norm": 6.862599849700928, "learning_rate": 1.0399693952334762e-05, "loss": 0.5442, "step": 15331 }, { "epoch": 50.268852459016394, "grad_norm": 6.291085243225098, "learning_rate": 1.0398632911644581e-05, "loss": 0.4303, "step": 15332 }, { "epoch": 50.27213114754098, "grad_norm": 5.583219528198242, "learning_rate": 1.0397571866459403e-05, "loss": 0.716, "step": 15333 }, { "epoch": 50.27540983606557, "grad_norm": 5.61578369140625, "learning_rate": 1.0396510816791188e-05, "loss": 0.685, "step": 15334 }, { "epoch": 50.278688524590166, "grad_norm": 10.864294052124023, "learning_rate": 1.03954497626519e-05, "loss": 1.0184, "step": 15335 }, { "epoch": 50.281967213114754, "grad_norm": 5.205801963806152, "learning_rate": 1.0394388704053503e-05, "loss": 0.5521, "step": 15336 }, { "epoch": 50.28524590163934, "grad_norm": 7.303656101226807, "learning_rate": 1.0393327641007968e-05, "loss": 0.6889, "step": 15337 }, { "epoch": 50.28852459016394, "grad_norm": 6.241762161254883, "learning_rate": 1.0392266573527253e-05, "loss": 0.7068, "step": 15338 }, { "epoch": 50.291803278688526, "grad_norm": 23.28998374938965, "learning_rate": 1.0391205501623323e-05, "loss": 0.5375, "step": 15339 }, { "epoch": 50.295081967213115, "grad_norm": 5.164956092834473, "learning_rate": 1.0390144425308144e-05, "loss": 0.3767, "step": 15340 }, { "epoch": 50.2983606557377, "grad_norm": 5.724730014801025, "learning_rate": 1.0389083344593684e-05, "loss": 0.4434, "step": 15341 }, { "epoch": 50.3016393442623, "grad_norm": 6.7646331787109375, "learning_rate": 1.03880222594919e-05, "loss": 0.6337, "step": 15342 }, { "epoch": 50.30491803278689, "grad_norm": 7.927065372467041, "learning_rate": 1.0386961170014767e-05, "loss": 0.7041, "step": 15343 }, { "epoch": 50.308196721311475, "grad_norm": 20.028160095214844, "learning_rate": 1.0385900076174242e-05, "loss": 0.6709, "step": 15344 }, { "epoch": 50.31147540983606, "grad_norm": 6.771410942077637, "learning_rate": 1.038483897798229e-05, "loss": 0.6456, "step": 15345 }, { "epoch": 50.31475409836066, "grad_norm": 7.752975940704346, "learning_rate": 1.0383777875450881e-05, "loss": 0.3987, "step": 15346 }, { "epoch": 50.31803278688525, "grad_norm": 6.944606304168701, "learning_rate": 1.0382716768591979e-05, "loss": 0.6581, "step": 15347 }, { "epoch": 50.321311475409836, "grad_norm": 6.999441146850586, "learning_rate": 1.0381655657417547e-05, "loss": 0.5671, "step": 15348 }, { "epoch": 50.324590163934424, "grad_norm": 5.066143989562988, "learning_rate": 1.0380594541939546e-05, "loss": 0.6156, "step": 15349 }, { "epoch": 50.32786885245902, "grad_norm": 7.439401626586914, "learning_rate": 1.0379533422169952e-05, "loss": 0.3749, "step": 15350 }, { "epoch": 50.33114754098361, "grad_norm": 7.147132873535156, "learning_rate": 1.0378472298120719e-05, "loss": 0.6024, "step": 15351 }, { "epoch": 50.334426229508196, "grad_norm": 5.3354010581970215, "learning_rate": 1.037741116980382e-05, "loss": 0.6581, "step": 15352 }, { "epoch": 50.337704918032784, "grad_norm": 8.550941467285156, "learning_rate": 1.0376350037231217e-05, "loss": 0.409, "step": 15353 }, { "epoch": 50.34098360655738, "grad_norm": 5.341988563537598, "learning_rate": 1.0375288900414877e-05, "loss": 0.6716, "step": 15354 }, { "epoch": 50.34426229508197, "grad_norm": 6.5927557945251465, "learning_rate": 1.0374227759366764e-05, "loss": 0.6627, "step": 15355 }, { "epoch": 50.34754098360656, "grad_norm": 6.49536657333374, "learning_rate": 1.0373166614098844e-05, "loss": 0.5142, "step": 15356 }, { "epoch": 50.350819672131145, "grad_norm": 5.722182750701904, "learning_rate": 1.0372105464623083e-05, "loss": 0.6593, "step": 15357 }, { "epoch": 50.35409836065574, "grad_norm": 6.564932346343994, "learning_rate": 1.0371044310951443e-05, "loss": 0.4747, "step": 15358 }, { "epoch": 50.35737704918033, "grad_norm": 6.499285697937012, "learning_rate": 1.0369983153095897e-05, "loss": 0.4888, "step": 15359 }, { "epoch": 50.36065573770492, "grad_norm": 7.396292209625244, "learning_rate": 1.0368921991068402e-05, "loss": 0.4756, "step": 15360 }, { "epoch": 50.363934426229505, "grad_norm": 9.694701194763184, "learning_rate": 1.0367860824880932e-05, "loss": 0.6854, "step": 15361 }, { "epoch": 50.3672131147541, "grad_norm": 6.351058483123779, "learning_rate": 1.0366799654545445e-05, "loss": 0.5037, "step": 15362 }, { "epoch": 50.37049180327869, "grad_norm": 8.804122924804688, "learning_rate": 1.0365738480073912e-05, "loss": 0.4107, "step": 15363 }, { "epoch": 50.37377049180328, "grad_norm": 6.327759265899658, "learning_rate": 1.0364677301478297e-05, "loss": 0.6093, "step": 15364 }, { "epoch": 50.377049180327866, "grad_norm": 5.516840934753418, "learning_rate": 1.0363616118770567e-05, "loss": 0.5249, "step": 15365 }, { "epoch": 50.38032786885246, "grad_norm": 5.9873175621032715, "learning_rate": 1.0362554931962687e-05, "loss": 0.5444, "step": 15366 }, { "epoch": 50.38360655737705, "grad_norm": 5.617508411407471, "learning_rate": 1.0361493741066623e-05, "loss": 0.5067, "step": 15367 }, { "epoch": 50.38688524590164, "grad_norm": 7.8911542892456055, "learning_rate": 1.0360432546094341e-05, "loss": 0.7873, "step": 15368 }, { "epoch": 50.390163934426226, "grad_norm": 7.021881580352783, "learning_rate": 1.035937134705781e-05, "loss": 0.6929, "step": 15369 }, { "epoch": 50.39344262295082, "grad_norm": 7.95326042175293, "learning_rate": 1.0358310143968988e-05, "loss": 0.5926, "step": 15370 }, { "epoch": 50.39672131147541, "grad_norm": 7.797409534454346, "learning_rate": 1.0357248936839852e-05, "loss": 0.6088, "step": 15371 }, { "epoch": 50.4, "grad_norm": 5.840873718261719, "learning_rate": 1.0356187725682359e-05, "loss": 0.4237, "step": 15372 }, { "epoch": 50.40327868852459, "grad_norm": 8.745049476623535, "learning_rate": 1.035512651050848e-05, "loss": 0.5229, "step": 15373 }, { "epoch": 50.40655737704918, "grad_norm": 7.2808027267456055, "learning_rate": 1.035406529133018e-05, "loss": 0.5965, "step": 15374 }, { "epoch": 50.40983606557377, "grad_norm": 10.40013599395752, "learning_rate": 1.0353004068159427e-05, "loss": 0.5633, "step": 15375 }, { "epoch": 50.41311475409836, "grad_norm": 5.958693504333496, "learning_rate": 1.0351942841008185e-05, "loss": 0.4254, "step": 15376 }, { "epoch": 50.41639344262295, "grad_norm": 6.869832992553711, "learning_rate": 1.035088160988842e-05, "loss": 0.5974, "step": 15377 }, { "epoch": 50.41967213114754, "grad_norm": 6.773865699768066, "learning_rate": 1.0349820374812105e-05, "loss": 0.6465, "step": 15378 }, { "epoch": 50.42295081967213, "grad_norm": 9.107744216918945, "learning_rate": 1.0348759135791197e-05, "loss": 0.6715, "step": 15379 }, { "epoch": 50.42622950819672, "grad_norm": 5.289300441741943, "learning_rate": 1.0347697892837671e-05, "loss": 0.6552, "step": 15380 }, { "epoch": 50.429508196721315, "grad_norm": 7.097483158111572, "learning_rate": 1.0346636645963489e-05, "loss": 0.7907, "step": 15381 }, { "epoch": 50.4327868852459, "grad_norm": 7.034907817840576, "learning_rate": 1.0345575395180614e-05, "loss": 0.6601, "step": 15382 }, { "epoch": 50.43606557377049, "grad_norm": 6.191592216491699, "learning_rate": 1.0344514140501023e-05, "loss": 0.3548, "step": 15383 }, { "epoch": 50.43934426229508, "grad_norm": 7.999678611755371, "learning_rate": 1.034345288193667e-05, "loss": 0.502, "step": 15384 }, { "epoch": 50.442622950819676, "grad_norm": 5.586419105529785, "learning_rate": 1.0342391619499535e-05, "loss": 0.6593, "step": 15385 }, { "epoch": 50.445901639344264, "grad_norm": 5.84028959274292, "learning_rate": 1.0341330353201576e-05, "loss": 0.6793, "step": 15386 }, { "epoch": 50.44918032786885, "grad_norm": 8.897612571716309, "learning_rate": 1.0340269083054764e-05, "loss": 0.4187, "step": 15387 }, { "epoch": 50.45245901639344, "grad_norm": 5.8478312492370605, "learning_rate": 1.0339207809071062e-05, "loss": 0.6213, "step": 15388 }, { "epoch": 50.455737704918036, "grad_norm": 4.833101272583008, "learning_rate": 1.033814653126244e-05, "loss": 0.8087, "step": 15389 }, { "epoch": 50.459016393442624, "grad_norm": 6.495335578918457, "learning_rate": 1.0337085249640864e-05, "loss": 0.6186, "step": 15390 }, { "epoch": 50.46229508196721, "grad_norm": 6.853925704956055, "learning_rate": 1.0336023964218302e-05, "loss": 0.7962, "step": 15391 }, { "epoch": 50.4655737704918, "grad_norm": 5.816927909851074, "learning_rate": 1.0334962675006722e-05, "loss": 0.5982, "step": 15392 }, { "epoch": 50.4688524590164, "grad_norm": 6.051721572875977, "learning_rate": 1.0333901382018085e-05, "loss": 0.5927, "step": 15393 }, { "epoch": 50.472131147540985, "grad_norm": 7.9081854820251465, "learning_rate": 1.0332840085264366e-05, "loss": 0.6836, "step": 15394 }, { "epoch": 50.47540983606557, "grad_norm": 6.133779525756836, "learning_rate": 1.033177878475753e-05, "loss": 0.2802, "step": 15395 }, { "epoch": 50.47868852459016, "grad_norm": 5.460951805114746, "learning_rate": 1.0330717480509539e-05, "loss": 0.5729, "step": 15396 }, { "epoch": 50.48196721311476, "grad_norm": 6.313816547393799, "learning_rate": 1.0329656172532368e-05, "loss": 0.6366, "step": 15397 }, { "epoch": 50.485245901639345, "grad_norm": 5.888743877410889, "learning_rate": 1.032859486083798e-05, "loss": 0.5166, "step": 15398 }, { "epoch": 50.488524590163934, "grad_norm": 7.59492301940918, "learning_rate": 1.032753354543834e-05, "loss": 0.6846, "step": 15399 }, { "epoch": 50.49180327868852, "grad_norm": 5.645501136779785, "learning_rate": 1.0326472226345423e-05, "loss": 0.6386, "step": 15400 }, { "epoch": 50.49508196721312, "grad_norm": 6.283524036407471, "learning_rate": 1.0325410903571192e-05, "loss": 0.6258, "step": 15401 }, { "epoch": 50.498360655737706, "grad_norm": 8.290828704833984, "learning_rate": 1.0324349577127614e-05, "loss": 0.5612, "step": 15402 }, { "epoch": 50.501639344262294, "grad_norm": 6.703327655792236, "learning_rate": 1.0323288247026656e-05, "loss": 0.6401, "step": 15403 }, { "epoch": 50.50491803278688, "grad_norm": 10.083576202392578, "learning_rate": 1.0322226913280289e-05, "loss": 0.5907, "step": 15404 }, { "epoch": 50.50819672131148, "grad_norm": 5.6826629638671875, "learning_rate": 1.0321165575900479e-05, "loss": 0.2534, "step": 15405 }, { "epoch": 50.511475409836066, "grad_norm": 6.052348613739014, "learning_rate": 1.0320104234899191e-05, "loss": 0.7081, "step": 15406 }, { "epoch": 50.514754098360655, "grad_norm": 6.305770397186279, "learning_rate": 1.0319042890288398e-05, "loss": 0.6398, "step": 15407 }, { "epoch": 50.51803278688524, "grad_norm": 6.6399946212768555, "learning_rate": 1.031798154208006e-05, "loss": 0.6006, "step": 15408 }, { "epoch": 50.52131147540984, "grad_norm": 6.4060540199279785, "learning_rate": 1.0316920190286152e-05, "loss": 0.5895, "step": 15409 }, { "epoch": 50.52459016393443, "grad_norm": 7.033258438110352, "learning_rate": 1.0315858834918639e-05, "loss": 0.475, "step": 15410 }, { "epoch": 50.527868852459015, "grad_norm": 6.1337056159973145, "learning_rate": 1.031479747598949e-05, "loss": 0.5274, "step": 15411 }, { "epoch": 50.5311475409836, "grad_norm": 5.879618167877197, "learning_rate": 1.0313736113510671e-05, "loss": 0.7154, "step": 15412 }, { "epoch": 50.5344262295082, "grad_norm": 7.028587341308594, "learning_rate": 1.0312674747494154e-05, "loss": 0.4056, "step": 15413 }, { "epoch": 50.53770491803279, "grad_norm": 6.306259632110596, "learning_rate": 1.0311613377951898e-05, "loss": 0.4267, "step": 15414 }, { "epoch": 50.540983606557376, "grad_norm": 9.929688453674316, "learning_rate": 1.0310552004895884e-05, "loss": 0.4128, "step": 15415 }, { "epoch": 50.544262295081964, "grad_norm": 7.396369457244873, "learning_rate": 1.030949062833807e-05, "loss": 0.473, "step": 15416 }, { "epoch": 50.54754098360656, "grad_norm": 8.979903221130371, "learning_rate": 1.0308429248290428e-05, "loss": 0.672, "step": 15417 }, { "epoch": 50.55081967213115, "grad_norm": 6.38640022277832, "learning_rate": 1.0307367864764925e-05, "loss": 0.3941, "step": 15418 }, { "epoch": 50.554098360655736, "grad_norm": 5.936664581298828, "learning_rate": 1.0306306477773531e-05, "loss": 0.5585, "step": 15419 }, { "epoch": 50.557377049180324, "grad_norm": 6.0124921798706055, "learning_rate": 1.0305245087328213e-05, "loss": 0.6279, "step": 15420 }, { "epoch": 50.56065573770492, "grad_norm": 5.6215500831604, "learning_rate": 1.0304183693440936e-05, "loss": 0.5547, "step": 15421 }, { "epoch": 50.56393442622951, "grad_norm": 11.076129913330078, "learning_rate": 1.0303122296123675e-05, "loss": 0.6304, "step": 15422 }, { "epoch": 50.5672131147541, "grad_norm": 5.872660160064697, "learning_rate": 1.0302060895388393e-05, "loss": 0.5778, "step": 15423 }, { "epoch": 50.570491803278685, "grad_norm": 7.027966022491455, "learning_rate": 1.0300999491247058e-05, "loss": 0.4859, "step": 15424 }, { "epoch": 50.57377049180328, "grad_norm": 10.452228546142578, "learning_rate": 1.0299938083711646e-05, "loss": 0.7087, "step": 15425 }, { "epoch": 50.57704918032787, "grad_norm": 8.851778030395508, "learning_rate": 1.0298876672794118e-05, "loss": 0.4812, "step": 15426 }, { "epoch": 50.58032786885246, "grad_norm": 6.584134101867676, "learning_rate": 1.0297815258506444e-05, "loss": 0.9798, "step": 15427 }, { "epoch": 50.58360655737705, "grad_norm": 6.323247909545898, "learning_rate": 1.0296753840860595e-05, "loss": 0.7448, "step": 15428 }, { "epoch": 50.58688524590164, "grad_norm": 7.966767311096191, "learning_rate": 1.0295692419868537e-05, "loss": 0.5145, "step": 15429 }, { "epoch": 50.59016393442623, "grad_norm": 5.773740291595459, "learning_rate": 1.0294630995542241e-05, "loss": 0.5562, "step": 15430 }, { "epoch": 50.59344262295082, "grad_norm": 5.740335464477539, "learning_rate": 1.0293569567893673e-05, "loss": 0.7509, "step": 15431 }, { "epoch": 50.59672131147541, "grad_norm": 7.74537992477417, "learning_rate": 1.0292508136934803e-05, "loss": 0.6292, "step": 15432 }, { "epoch": 50.6, "grad_norm": 4.7570624351501465, "learning_rate": 1.0291446702677598e-05, "loss": 0.9496, "step": 15433 }, { "epoch": 50.60327868852459, "grad_norm": 6.147977352142334, "learning_rate": 1.029038526513403e-05, "loss": 0.3637, "step": 15434 }, { "epoch": 50.60655737704918, "grad_norm": 8.376757621765137, "learning_rate": 1.0289323824316067e-05, "loss": 0.6878, "step": 15435 }, { "epoch": 50.609836065573774, "grad_norm": 7.274339199066162, "learning_rate": 1.0288262380235674e-05, "loss": 0.6661, "step": 15436 }, { "epoch": 50.61311475409836, "grad_norm": 5.785559177398682, "learning_rate": 1.0287200932904826e-05, "loss": 0.5124, "step": 15437 }, { "epoch": 50.61639344262295, "grad_norm": 5.431987762451172, "learning_rate": 1.0286139482335486e-05, "loss": 0.6147, "step": 15438 }, { "epoch": 50.61967213114754, "grad_norm": 7.517271041870117, "learning_rate": 1.028507802853963e-05, "loss": 0.6552, "step": 15439 }, { "epoch": 50.622950819672134, "grad_norm": 6.667243480682373, "learning_rate": 1.0284016571529219e-05, "loss": 0.3223, "step": 15440 }, { "epoch": 50.62622950819672, "grad_norm": 6.921613693237305, "learning_rate": 1.0282955111316228e-05, "loss": 0.5694, "step": 15441 }, { "epoch": 50.62950819672131, "grad_norm": 7.0273942947387695, "learning_rate": 1.0281893647912621e-05, "loss": 0.3133, "step": 15442 }, { "epoch": 50.6327868852459, "grad_norm": 6.777805805206299, "learning_rate": 1.0280832181330376e-05, "loss": 0.7493, "step": 15443 }, { "epoch": 50.636065573770495, "grad_norm": 5.865252494812012, "learning_rate": 1.0279770711581451e-05, "loss": 0.7556, "step": 15444 }, { "epoch": 50.63934426229508, "grad_norm": 29.541654586791992, "learning_rate": 1.0278709238677821e-05, "loss": 0.7014, "step": 15445 }, { "epoch": 50.64262295081967, "grad_norm": 21.3970947265625, "learning_rate": 1.0277647762631457e-05, "loss": 0.6743, "step": 15446 }, { "epoch": 50.64590163934426, "grad_norm": 5.1398820877075195, "learning_rate": 1.0276586283454323e-05, "loss": 0.7336, "step": 15447 }, { "epoch": 50.649180327868855, "grad_norm": 5.708261966705322, "learning_rate": 1.0275524801158394e-05, "loss": 0.4814, "step": 15448 }, { "epoch": 50.65245901639344, "grad_norm": 6.182188987731934, "learning_rate": 1.0274463315755634e-05, "loss": 0.3813, "step": 15449 }, { "epoch": 50.65573770491803, "grad_norm": 5.7796735763549805, "learning_rate": 1.0273401827258014e-05, "loss": 0.6055, "step": 15450 }, { "epoch": 50.65901639344262, "grad_norm": 5.928875923156738, "learning_rate": 1.0272340335677506e-05, "loss": 0.614, "step": 15451 }, { "epoch": 50.662295081967216, "grad_norm": 6.964934825897217, "learning_rate": 1.0271278841026075e-05, "loss": 0.6611, "step": 15452 }, { "epoch": 50.665573770491804, "grad_norm": 18.702987670898438, "learning_rate": 1.0270217343315696e-05, "loss": 0.66, "step": 15453 }, { "epoch": 50.66885245901639, "grad_norm": 6.346250057220459, "learning_rate": 1.0269155842558333e-05, "loss": 0.7353, "step": 15454 }, { "epoch": 50.67213114754098, "grad_norm": 5.218949794769287, "learning_rate": 1.026809433876596e-05, "loss": 0.6167, "step": 15455 }, { "epoch": 50.675409836065576, "grad_norm": 7.237767219543457, "learning_rate": 1.0267032831950546e-05, "loss": 0.439, "step": 15456 }, { "epoch": 50.678688524590164, "grad_norm": 6.3627824783325195, "learning_rate": 1.0265971322124057e-05, "loss": 0.527, "step": 15457 }, { "epoch": 50.68196721311475, "grad_norm": 6.9412102699279785, "learning_rate": 1.0264909809298465e-05, "loss": 0.5781, "step": 15458 }, { "epoch": 50.68524590163934, "grad_norm": 5.258938312530518, "learning_rate": 1.0263848293485738e-05, "loss": 0.5372, "step": 15459 }, { "epoch": 50.68852459016394, "grad_norm": 5.548292636871338, "learning_rate": 1.0262786774697847e-05, "loss": 0.6533, "step": 15460 }, { "epoch": 50.691803278688525, "grad_norm": 6.410022258758545, "learning_rate": 1.0261725252946766e-05, "loss": 0.5694, "step": 15461 }, { "epoch": 50.69508196721311, "grad_norm": 16.995750427246094, "learning_rate": 1.0260663728244458e-05, "loss": 0.7541, "step": 15462 }, { "epoch": 50.6983606557377, "grad_norm": 31.16933822631836, "learning_rate": 1.0259602200602896e-05, "loss": 0.6003, "step": 15463 }, { "epoch": 50.7016393442623, "grad_norm": 12.611649513244629, "learning_rate": 1.0258540670034047e-05, "loss": 0.4006, "step": 15464 }, { "epoch": 50.704918032786885, "grad_norm": 6.406881332397461, "learning_rate": 1.0257479136549889e-05, "loss": 0.4652, "step": 15465 }, { "epoch": 50.708196721311474, "grad_norm": 9.09545612335205, "learning_rate": 1.0256417600162378e-05, "loss": 0.6204, "step": 15466 }, { "epoch": 50.71147540983607, "grad_norm": 5.6626715660095215, "learning_rate": 1.02553560608835e-05, "loss": 0.5988, "step": 15467 }, { "epoch": 50.71475409836066, "grad_norm": 5.7842888832092285, "learning_rate": 1.025429451872521e-05, "loss": 0.3531, "step": 15468 }, { "epoch": 50.718032786885246, "grad_norm": 4.925449371337891, "learning_rate": 1.0253232973699489e-05, "loss": 0.5177, "step": 15469 }, { "epoch": 50.721311475409834, "grad_norm": 6.657938003540039, "learning_rate": 1.0252171425818303e-05, "loss": 0.5995, "step": 15470 }, { "epoch": 50.72459016393443, "grad_norm": 5.746499538421631, "learning_rate": 1.025110987509362e-05, "loss": 0.5409, "step": 15471 }, { "epoch": 50.72786885245902, "grad_norm": 4.719254016876221, "learning_rate": 1.0250048321537413e-05, "loss": 0.808, "step": 15472 }, { "epoch": 50.731147540983606, "grad_norm": 7.448540687561035, "learning_rate": 1.0248986765161651e-05, "loss": 0.4802, "step": 15473 }, { "epoch": 50.734426229508195, "grad_norm": 7.408658981323242, "learning_rate": 1.0247925205978305e-05, "loss": 0.6843, "step": 15474 }, { "epoch": 50.73770491803279, "grad_norm": 6.421923637390137, "learning_rate": 1.0246863643999343e-05, "loss": 0.7799, "step": 15475 }, { "epoch": 50.74098360655738, "grad_norm": 7.643650054931641, "learning_rate": 1.0245802079236737e-05, "loss": 0.4268, "step": 15476 }, { "epoch": 50.74426229508197, "grad_norm": 6.1488037109375, "learning_rate": 1.0244740511702458e-05, "loss": 0.5331, "step": 15477 }, { "epoch": 50.747540983606555, "grad_norm": 7.19518518447876, "learning_rate": 1.0243678941408475e-05, "loss": 0.6161, "step": 15478 }, { "epoch": 50.75081967213115, "grad_norm": 6.250339508056641, "learning_rate": 1.0242617368366759e-05, "loss": 0.52, "step": 15479 }, { "epoch": 50.75409836065574, "grad_norm": 8.876255989074707, "learning_rate": 1.0241555792589277e-05, "loss": 0.6367, "step": 15480 }, { "epoch": 50.75737704918033, "grad_norm": 9.225122451782227, "learning_rate": 1.0240494214088004e-05, "loss": 0.6859, "step": 15481 }, { "epoch": 50.760655737704916, "grad_norm": 6.978959560394287, "learning_rate": 1.023943263287491e-05, "loss": 0.4369, "step": 15482 }, { "epoch": 50.76393442622951, "grad_norm": 7.04545783996582, "learning_rate": 1.0238371048961966e-05, "loss": 0.698, "step": 15483 }, { "epoch": 50.7672131147541, "grad_norm": 9.16642951965332, "learning_rate": 1.0237309462361136e-05, "loss": 0.6373, "step": 15484 }, { "epoch": 50.77049180327869, "grad_norm": 5.397300720214844, "learning_rate": 1.0236247873084397e-05, "loss": 1.0086, "step": 15485 }, { "epoch": 50.773770491803276, "grad_norm": 4.912954330444336, "learning_rate": 1.0235186281143718e-05, "loss": 0.8225, "step": 15486 }, { "epoch": 50.77704918032787, "grad_norm": 6.009403705596924, "learning_rate": 1.023412468655107e-05, "loss": 0.6425, "step": 15487 }, { "epoch": 50.78032786885246, "grad_norm": 7.0755510330200195, "learning_rate": 1.023306308931842e-05, "loss": 0.7206, "step": 15488 }, { "epoch": 50.78360655737705, "grad_norm": 20.182161331176758, "learning_rate": 1.0232001489457744e-05, "loss": 0.7428, "step": 15489 }, { "epoch": 50.78688524590164, "grad_norm": 8.259140014648438, "learning_rate": 1.0230939886981009e-05, "loss": 0.6696, "step": 15490 }, { "epoch": 50.79016393442623, "grad_norm": 7.4190802574157715, "learning_rate": 1.0229878281900188e-05, "loss": 0.6511, "step": 15491 }, { "epoch": 50.79344262295082, "grad_norm": 5.957425117492676, "learning_rate": 1.0228816674227247e-05, "loss": 0.3955, "step": 15492 }, { "epoch": 50.79672131147541, "grad_norm": 8.799967765808105, "learning_rate": 1.0227755063974163e-05, "loss": 0.469, "step": 15493 }, { "epoch": 50.8, "grad_norm": 8.847149848937988, "learning_rate": 1.02266934511529e-05, "loss": 0.6288, "step": 15494 }, { "epoch": 50.80327868852459, "grad_norm": 6.918036937713623, "learning_rate": 1.0225631835775438e-05, "loss": 0.7808, "step": 15495 }, { "epoch": 50.80655737704918, "grad_norm": 6.010513782501221, "learning_rate": 1.022457021785374e-05, "loss": 0.6426, "step": 15496 }, { "epoch": 50.80983606557377, "grad_norm": 6.411816596984863, "learning_rate": 1.0223508597399778e-05, "loss": 0.6961, "step": 15497 }, { "epoch": 50.81311475409836, "grad_norm": 8.292764663696289, "learning_rate": 1.0222446974425528e-05, "loss": 0.734, "step": 15498 }, { "epoch": 50.81639344262295, "grad_norm": 5.6869611740112305, "learning_rate": 1.0221385348942953e-05, "loss": 0.6152, "step": 15499 }, { "epoch": 50.81967213114754, "grad_norm": 7.590259552001953, "learning_rate": 1.022032372096403e-05, "loss": 0.5922, "step": 15500 }, { "epoch": 50.82295081967213, "grad_norm": 7.208896160125732, "learning_rate": 1.0219262090500725e-05, "loss": 0.6223, "step": 15501 }, { "epoch": 50.82622950819672, "grad_norm": 8.92049503326416, "learning_rate": 1.0218200457565018e-05, "loss": 0.5574, "step": 15502 }, { "epoch": 50.829508196721314, "grad_norm": 5.99317741394043, "learning_rate": 1.0217138822168869e-05, "loss": 0.9475, "step": 15503 }, { "epoch": 50.8327868852459, "grad_norm": 7.92331075668335, "learning_rate": 1.0216077184324255e-05, "loss": 0.4233, "step": 15504 }, { "epoch": 50.83606557377049, "grad_norm": 6.064359188079834, "learning_rate": 1.0215015544043145e-05, "loss": 0.4627, "step": 15505 }, { "epoch": 50.83934426229508, "grad_norm": 6.7442708015441895, "learning_rate": 1.0213953901337512e-05, "loss": 0.452, "step": 15506 }, { "epoch": 50.842622950819674, "grad_norm": 6.195938587188721, "learning_rate": 1.0212892256219328e-05, "loss": 0.6528, "step": 15507 }, { "epoch": 50.84590163934426, "grad_norm": 5.627532958984375, "learning_rate": 1.0211830608700561e-05, "loss": 0.6593, "step": 15508 }, { "epoch": 50.84918032786885, "grad_norm": 7.45072603225708, "learning_rate": 1.0210768958793184e-05, "loss": 0.6779, "step": 15509 }, { "epoch": 50.85245901639344, "grad_norm": 5.969399929046631, "learning_rate": 1.020970730650917e-05, "loss": 0.5391, "step": 15510 }, { "epoch": 50.855737704918035, "grad_norm": 10.489444732666016, "learning_rate": 1.0208645651860485e-05, "loss": 0.5855, "step": 15511 }, { "epoch": 50.85901639344262, "grad_norm": 5.527322292327881, "learning_rate": 1.0207583994859103e-05, "loss": 0.545, "step": 15512 }, { "epoch": 50.86229508196721, "grad_norm": 6.393553256988525, "learning_rate": 1.0206522335516999e-05, "loss": 0.6382, "step": 15513 }, { "epoch": 50.86557377049181, "grad_norm": 5.808358669281006, "learning_rate": 1.0205460673846138e-05, "loss": 0.6727, "step": 15514 }, { "epoch": 50.868852459016395, "grad_norm": 6.14246940612793, "learning_rate": 1.0204399009858495e-05, "loss": 0.5563, "step": 15515 }, { "epoch": 50.87213114754098, "grad_norm": 5.468254566192627, "learning_rate": 1.020333734356604e-05, "loss": 0.3941, "step": 15516 }, { "epoch": 50.87540983606557, "grad_norm": 6.754340648651123, "learning_rate": 1.0202275674980746e-05, "loss": 0.5529, "step": 15517 }, { "epoch": 50.87868852459017, "grad_norm": 5.55044412612915, "learning_rate": 1.0201214004114584e-05, "loss": 0.6952, "step": 15518 }, { "epoch": 50.881967213114756, "grad_norm": 8.146317481994629, "learning_rate": 1.0200152330979525e-05, "loss": 0.6059, "step": 15519 }, { "epoch": 50.885245901639344, "grad_norm": 6.584167957305908, "learning_rate": 1.0199090655587538e-05, "loss": 0.5073, "step": 15520 }, { "epoch": 50.88852459016393, "grad_norm": 6.270788192749023, "learning_rate": 1.01980289779506e-05, "loss": 0.9122, "step": 15521 }, { "epoch": 50.89180327868853, "grad_norm": 7.842076778411865, "learning_rate": 1.0196967298080678e-05, "loss": 0.6576, "step": 15522 }, { "epoch": 50.895081967213116, "grad_norm": 5.543795108795166, "learning_rate": 1.0195905615989743e-05, "loss": 0.6169, "step": 15523 }, { "epoch": 50.898360655737704, "grad_norm": 5.181527614593506, "learning_rate": 1.0194843931689772e-05, "loss": 0.5184, "step": 15524 }, { "epoch": 50.90163934426229, "grad_norm": 6.086549282073975, "learning_rate": 1.019378224519273e-05, "loss": 0.524, "step": 15525 }, { "epoch": 50.90491803278689, "grad_norm": 6.684284687042236, "learning_rate": 1.0192720556510593e-05, "loss": 0.6566, "step": 15526 }, { "epoch": 50.90819672131148, "grad_norm": 5.197571277618408, "learning_rate": 1.0191658865655332e-05, "loss": 0.5904, "step": 15527 }, { "epoch": 50.911475409836065, "grad_norm": 8.151644706726074, "learning_rate": 1.0190597172638918e-05, "loss": 0.4901, "step": 15528 }, { "epoch": 50.91475409836065, "grad_norm": 5.89856481552124, "learning_rate": 1.018953547747332e-05, "loss": 0.4728, "step": 15529 }, { "epoch": 50.91803278688525, "grad_norm": 5.534886360168457, "learning_rate": 1.0188473780170516e-05, "loss": 0.5643, "step": 15530 }, { "epoch": 50.92131147540984, "grad_norm": 5.899926662445068, "learning_rate": 1.0187412080742472e-05, "loss": 0.6105, "step": 15531 }, { "epoch": 50.924590163934425, "grad_norm": 8.296836853027344, "learning_rate": 1.0186350379201163e-05, "loss": 0.6902, "step": 15532 }, { "epoch": 50.927868852459014, "grad_norm": 7.776079177856445, "learning_rate": 1.0185288675558558e-05, "loss": 0.6486, "step": 15533 }, { "epoch": 50.93114754098361, "grad_norm": 7.047513961791992, "learning_rate": 1.0184226969826632e-05, "loss": 0.5639, "step": 15534 }, { "epoch": 50.9344262295082, "grad_norm": 5.387800693511963, "learning_rate": 1.0183165262017355e-05, "loss": 0.7051, "step": 15535 }, { "epoch": 50.937704918032786, "grad_norm": 6.456338882446289, "learning_rate": 1.01821035521427e-05, "loss": 0.6859, "step": 15536 }, { "epoch": 50.940983606557374, "grad_norm": 6.795660972595215, "learning_rate": 1.0181041840214639e-05, "loss": 0.6861, "step": 15537 }, { "epoch": 50.94426229508197, "grad_norm": 5.459826469421387, "learning_rate": 1.0179980126245139e-05, "loss": 0.4966, "step": 15538 }, { "epoch": 50.94754098360656, "grad_norm": 7.1329827308654785, "learning_rate": 1.0178918410246179e-05, "loss": 0.5178, "step": 15539 }, { "epoch": 50.950819672131146, "grad_norm": 6.978858470916748, "learning_rate": 1.0177856692229727e-05, "loss": 0.5914, "step": 15540 }, { "epoch": 50.954098360655735, "grad_norm": 5.6400957107543945, "learning_rate": 1.0176794972207758e-05, "loss": 0.4605, "step": 15541 }, { "epoch": 50.95737704918033, "grad_norm": 8.478601455688477, "learning_rate": 1.0175733250192238e-05, "loss": 0.6953, "step": 15542 }, { "epoch": 50.96065573770492, "grad_norm": 6.407919406890869, "learning_rate": 1.0174671526195146e-05, "loss": 0.6268, "step": 15543 }, { "epoch": 50.96393442622951, "grad_norm": 7.2986979484558105, "learning_rate": 1.017360980022845e-05, "loss": 0.8937, "step": 15544 }, { "epoch": 50.967213114754095, "grad_norm": 5.974765300750732, "learning_rate": 1.0172548072304122e-05, "loss": 0.7984, "step": 15545 }, { "epoch": 50.97049180327869, "grad_norm": 5.455580234527588, "learning_rate": 1.0171486342434135e-05, "loss": 0.5951, "step": 15546 }, { "epoch": 50.97377049180328, "grad_norm": 6.840050220489502, "learning_rate": 1.0170424610630465e-05, "loss": 0.7979, "step": 15547 }, { "epoch": 50.97704918032787, "grad_norm": 6.3516621589660645, "learning_rate": 1.0169362876905078e-05, "loss": 0.8215, "step": 15548 }, { "epoch": 50.980327868852456, "grad_norm": 8.981758117675781, "learning_rate": 1.0168301141269947e-05, "loss": 0.6112, "step": 15549 }, { "epoch": 50.98360655737705, "grad_norm": 10.923443794250488, "learning_rate": 1.0167239403737048e-05, "loss": 0.607, "step": 15550 }, { "epoch": 50.98688524590164, "grad_norm": 4.918138027191162, "learning_rate": 1.0166177664318348e-05, "loss": 0.4113, "step": 15551 }, { "epoch": 50.99016393442623, "grad_norm": 4.886340618133545, "learning_rate": 1.0165115923025824e-05, "loss": 0.4304, "step": 15552 }, { "epoch": 50.993442622950816, "grad_norm": 7.921698093414307, "learning_rate": 1.0164054179871445e-05, "loss": 0.5498, "step": 15553 }, { "epoch": 50.99672131147541, "grad_norm": 6.177511692047119, "learning_rate": 1.0162992434867187e-05, "loss": 0.5121, "step": 15554 }, { "epoch": 51.0, "grad_norm": 5.6935272216796875, "learning_rate": 1.0161930688025018e-05, "loss": 0.6305, "step": 15555 }, { "epoch": 51.00327868852459, "grad_norm": 5.941214561462402, "learning_rate": 1.0160868939356911e-05, "loss": 0.5803, "step": 15556 }, { "epoch": 51.006557377049184, "grad_norm": 10.181350708007812, "learning_rate": 1.015980718887484e-05, "loss": 0.5646, "step": 15557 }, { "epoch": 51.00983606557377, "grad_norm": 18.19756317138672, "learning_rate": 1.015874543659078e-05, "loss": 0.5363, "step": 15558 }, { "epoch": 51.01311475409836, "grad_norm": 5.45437479019165, "learning_rate": 1.0157683682516695e-05, "loss": 0.5715, "step": 15559 }, { "epoch": 51.01639344262295, "grad_norm": 7.0719733238220215, "learning_rate": 1.0156621926664566e-05, "loss": 0.7711, "step": 15560 }, { "epoch": 51.019672131147544, "grad_norm": 5.364407539367676, "learning_rate": 1.0155560169046362e-05, "loss": 0.6276, "step": 15561 }, { "epoch": 51.02295081967213, "grad_norm": 6.241873264312744, "learning_rate": 1.0154498409674051e-05, "loss": 0.516, "step": 15562 }, { "epoch": 51.02622950819672, "grad_norm": 7.063600063323975, "learning_rate": 1.0153436648559616e-05, "loss": 0.6054, "step": 15563 }, { "epoch": 51.02950819672131, "grad_norm": 149.20082092285156, "learning_rate": 1.015237488571502e-05, "loss": 0.7203, "step": 15564 }, { "epoch": 51.032786885245905, "grad_norm": 10.03141975402832, "learning_rate": 1.015131312115224e-05, "loss": 0.6333, "step": 15565 }, { "epoch": 51.03606557377049, "grad_norm": 10.780366897583008, "learning_rate": 1.0150251354883245e-05, "loss": 0.6031, "step": 15566 }, { "epoch": 51.03934426229508, "grad_norm": 5.969034194946289, "learning_rate": 1.0149189586920011e-05, "loss": 0.7334, "step": 15567 }, { "epoch": 51.04262295081967, "grad_norm": 6.582772254943848, "learning_rate": 1.0148127817274509e-05, "loss": 0.4492, "step": 15568 }, { "epoch": 51.045901639344265, "grad_norm": 5.607635498046875, "learning_rate": 1.0147066045958714e-05, "loss": 0.5077, "step": 15569 }, { "epoch": 51.049180327868854, "grad_norm": 6.626095771789551, "learning_rate": 1.0146004272984594e-05, "loss": 0.5635, "step": 15570 }, { "epoch": 51.05245901639344, "grad_norm": 5.946279048919678, "learning_rate": 1.0144942498364125e-05, "loss": 0.4799, "step": 15571 }, { "epoch": 51.05573770491803, "grad_norm": 5.588367462158203, "learning_rate": 1.0143880722109279e-05, "loss": 0.389, "step": 15572 }, { "epoch": 51.059016393442626, "grad_norm": 8.934011459350586, "learning_rate": 1.0142818944232028e-05, "loss": 0.5032, "step": 15573 }, { "epoch": 51.062295081967214, "grad_norm": 6.584336757659912, "learning_rate": 1.0141757164744346e-05, "loss": 0.462, "step": 15574 }, { "epoch": 51.0655737704918, "grad_norm": 12.868361473083496, "learning_rate": 1.01406953836582e-05, "loss": 0.7665, "step": 15575 }, { "epoch": 51.06885245901639, "grad_norm": 7.07656192779541, "learning_rate": 1.0139633600985572e-05, "loss": 0.5601, "step": 15576 }, { "epoch": 51.072131147540986, "grad_norm": 7.45924711227417, "learning_rate": 1.013857181673843e-05, "loss": 0.4505, "step": 15577 }, { "epoch": 51.075409836065575, "grad_norm": 5.742427825927734, "learning_rate": 1.0137510030928745e-05, "loss": 0.3575, "step": 15578 }, { "epoch": 51.07868852459016, "grad_norm": 9.759066581726074, "learning_rate": 1.0136448243568491e-05, "loss": 0.5638, "step": 15579 }, { "epoch": 51.08196721311475, "grad_norm": 24.519216537475586, "learning_rate": 1.0135386454669642e-05, "loss": 0.817, "step": 15580 }, { "epoch": 51.08524590163935, "grad_norm": 6.86306095123291, "learning_rate": 1.0134324664244168e-05, "loss": 0.4932, "step": 15581 }, { "epoch": 51.088524590163935, "grad_norm": 8.690171241760254, "learning_rate": 1.0133262872304048e-05, "loss": 0.6508, "step": 15582 }, { "epoch": 51.09180327868852, "grad_norm": 6.7640180587768555, "learning_rate": 1.0132201078861246e-05, "loss": 0.8207, "step": 15583 }, { "epoch": 51.09508196721311, "grad_norm": 7.049817085266113, "learning_rate": 1.0131139283927743e-05, "loss": 0.592, "step": 15584 }, { "epoch": 51.09836065573771, "grad_norm": 6.305959701538086, "learning_rate": 1.0130077487515503e-05, "loss": 0.5544, "step": 15585 }, { "epoch": 51.101639344262296, "grad_norm": 6.005290508270264, "learning_rate": 1.012901568963651e-05, "loss": 0.861, "step": 15586 }, { "epoch": 51.104918032786884, "grad_norm": 8.842267036437988, "learning_rate": 1.012795389030273e-05, "loss": 0.4301, "step": 15587 }, { "epoch": 51.10819672131147, "grad_norm": 6.139540672302246, "learning_rate": 1.0126892089526132e-05, "loss": 0.5638, "step": 15588 }, { "epoch": 51.11147540983607, "grad_norm": 7.62492561340332, "learning_rate": 1.01258302873187e-05, "loss": 0.5616, "step": 15589 }, { "epoch": 51.114754098360656, "grad_norm": 7.091348648071289, "learning_rate": 1.0124768483692395e-05, "loss": 0.682, "step": 15590 }, { "epoch": 51.118032786885244, "grad_norm": 6.2773356437683105, "learning_rate": 1.01237066786592e-05, "loss": 0.4251, "step": 15591 }, { "epoch": 51.12131147540983, "grad_norm": 15.882781028747559, "learning_rate": 1.0122644872231082e-05, "loss": 0.7602, "step": 15592 }, { "epoch": 51.12459016393443, "grad_norm": 8.55191421508789, "learning_rate": 1.0121583064420015e-05, "loss": 0.4644, "step": 15593 }, { "epoch": 51.12786885245902, "grad_norm": 7.15433931350708, "learning_rate": 1.0120521255237974e-05, "loss": 0.5789, "step": 15594 }, { "epoch": 51.131147540983605, "grad_norm": 6.801063537597656, "learning_rate": 1.011945944469693e-05, "loss": 0.6326, "step": 15595 }, { "epoch": 51.13442622950819, "grad_norm": 5.7367658615112305, "learning_rate": 1.0118397632808857e-05, "loss": 0.5365, "step": 15596 }, { "epoch": 51.13770491803279, "grad_norm": 8.54080581665039, "learning_rate": 1.0117335819585725e-05, "loss": 0.6499, "step": 15597 }, { "epoch": 51.14098360655738, "grad_norm": 10.18297004699707, "learning_rate": 1.0116274005039513e-05, "loss": 0.4847, "step": 15598 }, { "epoch": 51.144262295081965, "grad_norm": 8.52506160736084, "learning_rate": 1.0115212189182189e-05, "loss": 0.4165, "step": 15599 }, { "epoch": 51.14754098360656, "grad_norm": 10.857247352600098, "learning_rate": 1.0114150372025729e-05, "loss": 0.4928, "step": 15600 }, { "epoch": 51.15081967213115, "grad_norm": 13.032299995422363, "learning_rate": 1.0113088553582101e-05, "loss": 0.67, "step": 15601 }, { "epoch": 51.15409836065574, "grad_norm": 5.425284385681152, "learning_rate": 1.0112026733863288e-05, "loss": 0.6814, "step": 15602 }, { "epoch": 51.157377049180326, "grad_norm": 28.935489654541016, "learning_rate": 1.0110964912881251e-05, "loss": 0.4363, "step": 15603 }, { "epoch": 51.16065573770492, "grad_norm": 5.4832329750061035, "learning_rate": 1.0109903090647975e-05, "loss": 0.4779, "step": 15604 }, { "epoch": 51.16393442622951, "grad_norm": 6.810887813568115, "learning_rate": 1.0108841267175423e-05, "loss": 0.567, "step": 15605 }, { "epoch": 51.1672131147541, "grad_norm": 5.639620304107666, "learning_rate": 1.0107779442475576e-05, "loss": 0.6143, "step": 15606 }, { "epoch": 51.170491803278686, "grad_norm": 7.510917663574219, "learning_rate": 1.01067176165604e-05, "loss": 0.4604, "step": 15607 }, { "epoch": 51.17377049180328, "grad_norm": 5.103109836578369, "learning_rate": 1.0105655789441875e-05, "loss": 0.6475, "step": 15608 }, { "epoch": 51.17704918032787, "grad_norm": 6.201663017272949, "learning_rate": 1.0104593961131967e-05, "loss": 0.6516, "step": 15609 }, { "epoch": 51.18032786885246, "grad_norm": 6.618631362915039, "learning_rate": 1.0103532131642659e-05, "loss": 0.4626, "step": 15610 }, { "epoch": 51.18360655737705, "grad_norm": 6.134774684906006, "learning_rate": 1.0102470300985914e-05, "loss": 0.3723, "step": 15611 }, { "epoch": 51.18688524590164, "grad_norm": 7.739288330078125, "learning_rate": 1.0101408469173713e-05, "loss": 0.6869, "step": 15612 }, { "epoch": 51.19016393442623, "grad_norm": 9.138039588928223, "learning_rate": 1.0100346636218024e-05, "loss": 0.7505, "step": 15613 }, { "epoch": 51.19344262295082, "grad_norm": 7.89291524887085, "learning_rate": 1.0099284802130822e-05, "loss": 0.4573, "step": 15614 }, { "epoch": 51.19672131147541, "grad_norm": 5.494915962219238, "learning_rate": 1.0098222966924082e-05, "loss": 0.6179, "step": 15615 }, { "epoch": 51.2, "grad_norm": 5.221426486968994, "learning_rate": 1.0097161130609774e-05, "loss": 0.5188, "step": 15616 }, { "epoch": 51.20327868852459, "grad_norm": 6.157618522644043, "learning_rate": 1.0096099293199877e-05, "loss": 0.74, "step": 15617 }, { "epoch": 51.20655737704918, "grad_norm": 5.5643768310546875, "learning_rate": 1.0095037454706356e-05, "loss": 0.8335, "step": 15618 }, { "epoch": 51.20983606557377, "grad_norm": 5.564155578613281, "learning_rate": 1.0093975615141193e-05, "loss": 0.3941, "step": 15619 }, { "epoch": 51.21311475409836, "grad_norm": 6.608044147491455, "learning_rate": 1.0092913774516354e-05, "loss": 0.3659, "step": 15620 }, { "epoch": 51.21639344262295, "grad_norm": 7.981223106384277, "learning_rate": 1.0091851932843819e-05, "loss": 0.8528, "step": 15621 }, { "epoch": 51.21967213114754, "grad_norm": 9.933952331542969, "learning_rate": 1.0090790090135554e-05, "loss": 0.5504, "step": 15622 }, { "epoch": 51.22295081967213, "grad_norm": 6.48325777053833, "learning_rate": 1.0089728246403539e-05, "loss": 0.6545, "step": 15623 }, { "epoch": 51.226229508196724, "grad_norm": 5.6884870529174805, "learning_rate": 1.0088666401659746e-05, "loss": 0.8318, "step": 15624 }, { "epoch": 51.22950819672131, "grad_norm": 5.943525314331055, "learning_rate": 1.0087604555916144e-05, "loss": 0.5963, "step": 15625 }, { "epoch": 51.2327868852459, "grad_norm": 6.2796430587768555, "learning_rate": 1.0086542709184712e-05, "loss": 0.6729, "step": 15626 }, { "epoch": 51.23606557377049, "grad_norm": 5.544823169708252, "learning_rate": 1.0085480861477418e-05, "loss": 0.6592, "step": 15627 }, { "epoch": 51.239344262295084, "grad_norm": 5.808265209197998, "learning_rate": 1.0084419012806242e-05, "loss": 0.7664, "step": 15628 }, { "epoch": 51.24262295081967, "grad_norm": 5.427576541900635, "learning_rate": 1.008335716318315e-05, "loss": 0.3161, "step": 15629 }, { "epoch": 51.24590163934426, "grad_norm": 7.309499740600586, "learning_rate": 1.0082295312620123e-05, "loss": 0.5383, "step": 15630 }, { "epoch": 51.24918032786885, "grad_norm": 8.19662857055664, "learning_rate": 1.0081233461129127e-05, "loss": 0.5983, "step": 15631 }, { "epoch": 51.252459016393445, "grad_norm": 10.587814331054688, "learning_rate": 1.0080171608722143e-05, "loss": 0.4586, "step": 15632 }, { "epoch": 51.25573770491803, "grad_norm": 6.144745826721191, "learning_rate": 1.0079109755411138e-05, "loss": 0.5047, "step": 15633 }, { "epoch": 51.25901639344262, "grad_norm": 7.757521629333496, "learning_rate": 1.007804790120809e-05, "loss": 0.7743, "step": 15634 }, { "epoch": 51.26229508196721, "grad_norm": 5.865911960601807, "learning_rate": 1.0076986046124968e-05, "loss": 0.8115, "step": 15635 }, { "epoch": 51.265573770491805, "grad_norm": 66.76019287109375, "learning_rate": 1.0075924190173752e-05, "loss": 0.614, "step": 15636 }, { "epoch": 51.268852459016394, "grad_norm": 8.943325996398926, "learning_rate": 1.0074862333366412e-05, "loss": 0.6337, "step": 15637 }, { "epoch": 51.27213114754098, "grad_norm": 6.258296012878418, "learning_rate": 1.0073800475714918e-05, "loss": 0.5477, "step": 15638 }, { "epoch": 51.27540983606557, "grad_norm": 5.755367755889893, "learning_rate": 1.0072738617231248e-05, "loss": 0.5023, "step": 15639 }, { "epoch": 51.278688524590166, "grad_norm": 6.2064008712768555, "learning_rate": 1.0071676757927375e-05, "loss": 0.7982, "step": 15640 }, { "epoch": 51.281967213114754, "grad_norm": 8.53021240234375, "learning_rate": 1.0070614897815273e-05, "loss": 0.7373, "step": 15641 }, { "epoch": 51.28524590163934, "grad_norm": 18.71367073059082, "learning_rate": 1.006955303690691e-05, "loss": 0.3395, "step": 15642 }, { "epoch": 51.28852459016394, "grad_norm": 6.009650230407715, "learning_rate": 1.006849117521427e-05, "loss": 0.5821, "step": 15643 }, { "epoch": 51.291803278688526, "grad_norm": 6.23176908493042, "learning_rate": 1.0067429312749317e-05, "loss": 0.8051, "step": 15644 }, { "epoch": 51.295081967213115, "grad_norm": 7.022642135620117, "learning_rate": 1.0066367449524031e-05, "loss": 0.6387, "step": 15645 }, { "epoch": 51.2983606557377, "grad_norm": 6.797717571258545, "learning_rate": 1.006530558555038e-05, "loss": 0.6324, "step": 15646 }, { "epoch": 51.3016393442623, "grad_norm": 6.267714023590088, "learning_rate": 1.0064243720840343e-05, "loss": 0.3559, "step": 15647 }, { "epoch": 51.30491803278689, "grad_norm": 5.944473743438721, "learning_rate": 1.006318185540589e-05, "loss": 0.6646, "step": 15648 }, { "epoch": 51.308196721311475, "grad_norm": 6.172646522521973, "learning_rate": 1.0062119989258998e-05, "loss": 0.6302, "step": 15649 }, { "epoch": 51.31147540983606, "grad_norm": 8.308951377868652, "learning_rate": 1.006105812241164e-05, "loss": 0.7152, "step": 15650 }, { "epoch": 51.31475409836066, "grad_norm": 7.000118732452393, "learning_rate": 1.0059996254875784e-05, "loss": 0.4092, "step": 15651 }, { "epoch": 51.31803278688525, "grad_norm": 7.044820308685303, "learning_rate": 1.0058934386663411e-05, "loss": 0.5709, "step": 15652 }, { "epoch": 51.321311475409836, "grad_norm": 6.160356521606445, "learning_rate": 1.0057872517786486e-05, "loss": 0.575, "step": 15653 }, { "epoch": 51.324590163934424, "grad_norm": 8.462925910949707, "learning_rate": 1.0056810648256994e-05, "loss": 0.5112, "step": 15654 }, { "epoch": 51.32786885245902, "grad_norm": 13.286498069763184, "learning_rate": 1.00557487780869e-05, "loss": 0.8486, "step": 15655 }, { "epoch": 51.33114754098361, "grad_norm": 6.413308620452881, "learning_rate": 1.0054686907288183e-05, "loss": 0.4076, "step": 15656 }, { "epoch": 51.334426229508196, "grad_norm": 6.24637508392334, "learning_rate": 1.005362503587281e-05, "loss": 0.5233, "step": 15657 }, { "epoch": 51.337704918032784, "grad_norm": 6.132882595062256, "learning_rate": 1.0052563163852764e-05, "loss": 0.7405, "step": 15658 }, { "epoch": 51.34098360655738, "grad_norm": 7.558099746704102, "learning_rate": 1.0051501291240008e-05, "loss": 0.4516, "step": 15659 }, { "epoch": 51.34426229508197, "grad_norm": 33.20283889770508, "learning_rate": 1.0050439418046528e-05, "loss": 0.4384, "step": 15660 }, { "epoch": 51.34754098360656, "grad_norm": 7.072065830230713, "learning_rate": 1.0049377544284286e-05, "loss": 0.4767, "step": 15661 }, { "epoch": 51.350819672131145, "grad_norm": 6.205316543579102, "learning_rate": 1.0048315669965263e-05, "loss": 0.7683, "step": 15662 }, { "epoch": 51.35409836065574, "grad_norm": 39.223995208740234, "learning_rate": 1.0047253795101433e-05, "loss": 0.5095, "step": 15663 }, { "epoch": 51.35737704918033, "grad_norm": 6.371974945068359, "learning_rate": 1.0046191919704762e-05, "loss": 0.4078, "step": 15664 }, { "epoch": 51.36065573770492, "grad_norm": 5.633475303649902, "learning_rate": 1.004513004378723e-05, "loss": 0.4564, "step": 15665 }, { "epoch": 51.363934426229505, "grad_norm": 5.795104026794434, "learning_rate": 1.0044068167360814e-05, "loss": 0.5748, "step": 15666 }, { "epoch": 51.3672131147541, "grad_norm": 5.86693811416626, "learning_rate": 1.004300629043748e-05, "loss": 0.7121, "step": 15667 }, { "epoch": 51.37049180327869, "grad_norm": 5.8836283683776855, "learning_rate": 1.0041944413029208e-05, "loss": 0.6287, "step": 15668 }, { "epoch": 51.37377049180328, "grad_norm": 6.557249069213867, "learning_rate": 1.0040882535147964e-05, "loss": 0.5255, "step": 15669 }, { "epoch": 51.377049180327866, "grad_norm": 5.8878092765808105, "learning_rate": 1.003982065680573e-05, "loss": 0.484, "step": 15670 }, { "epoch": 51.38032786885246, "grad_norm": 12.771737098693848, "learning_rate": 1.003875877801448e-05, "loss": 0.4303, "step": 15671 }, { "epoch": 51.38360655737705, "grad_norm": 9.99680233001709, "learning_rate": 1.003769689878618e-05, "loss": 0.4306, "step": 15672 }, { "epoch": 51.38688524590164, "grad_norm": 12.356406211853027, "learning_rate": 1.003663501913281e-05, "loss": 0.7383, "step": 15673 }, { "epoch": 51.390163934426226, "grad_norm": 6.902515411376953, "learning_rate": 1.003557313906634e-05, "loss": 0.5296, "step": 15674 }, { "epoch": 51.39344262295082, "grad_norm": 6.097275257110596, "learning_rate": 1.003451125859875e-05, "loss": 0.6367, "step": 15675 }, { "epoch": 51.39672131147541, "grad_norm": 7.382289409637451, "learning_rate": 1.0033449377742009e-05, "loss": 0.5756, "step": 15676 }, { "epoch": 51.4, "grad_norm": 8.08734130859375, "learning_rate": 1.003238749650809e-05, "loss": 0.7511, "step": 15677 }, { "epoch": 51.40327868852459, "grad_norm": 6.028621673583984, "learning_rate": 1.0031325614908969e-05, "loss": 0.764, "step": 15678 }, { "epoch": 51.40655737704918, "grad_norm": 6.030648231506348, "learning_rate": 1.0030263732956615e-05, "loss": 0.6359, "step": 15679 }, { "epoch": 51.40983606557377, "grad_norm": 5.013180255889893, "learning_rate": 1.002920185066301e-05, "loss": 0.4345, "step": 15680 }, { "epoch": 51.41311475409836, "grad_norm": 7.650173187255859, "learning_rate": 1.0028139968040123e-05, "loss": 0.591, "step": 15681 }, { "epoch": 51.41639344262295, "grad_norm": 6.692300796508789, "learning_rate": 1.002707808509993e-05, "loss": 0.7174, "step": 15682 }, { "epoch": 51.41967213114754, "grad_norm": 6.464206695556641, "learning_rate": 1.0026016201854401e-05, "loss": 0.564, "step": 15683 }, { "epoch": 51.42295081967213, "grad_norm": 6.903127670288086, "learning_rate": 1.0024954318315514e-05, "loss": 0.4487, "step": 15684 }, { "epoch": 51.42622950819672, "grad_norm": 15.123461723327637, "learning_rate": 1.0023892434495239e-05, "loss": 0.5435, "step": 15685 }, { "epoch": 51.429508196721315, "grad_norm": 12.144688606262207, "learning_rate": 1.0022830550405555e-05, "loss": 0.8617, "step": 15686 }, { "epoch": 51.4327868852459, "grad_norm": 7.054344177246094, "learning_rate": 1.002176866605843e-05, "loss": 0.5659, "step": 15687 }, { "epoch": 51.43606557377049, "grad_norm": 7.982448577880859, "learning_rate": 1.0020706781465843e-05, "loss": 0.6152, "step": 15688 }, { "epoch": 51.43934426229508, "grad_norm": 5.352867603302002, "learning_rate": 1.0019644896639766e-05, "loss": 0.3908, "step": 15689 }, { "epoch": 51.442622950819676, "grad_norm": 7.126133441925049, "learning_rate": 1.001858301159217e-05, "loss": 0.5672, "step": 15690 }, { "epoch": 51.445901639344264, "grad_norm": 5.675264835357666, "learning_rate": 1.0017521126335035e-05, "loss": 0.4608, "step": 15691 }, { "epoch": 51.44918032786885, "grad_norm": 6.249650478363037, "learning_rate": 1.0016459240880325e-05, "loss": 0.5102, "step": 15692 }, { "epoch": 51.45245901639344, "grad_norm": 5.858804702758789, "learning_rate": 1.0015397355240022e-05, "loss": 0.568, "step": 15693 }, { "epoch": 51.455737704918036, "grad_norm": 6.2791876792907715, "learning_rate": 1.0014335469426099e-05, "loss": 0.6575, "step": 15694 }, { "epoch": 51.459016393442624, "grad_norm": 6.387041091918945, "learning_rate": 1.0013273583450528e-05, "loss": 0.7135, "step": 15695 }, { "epoch": 51.46229508196721, "grad_norm": 7.101873397827148, "learning_rate": 1.0012211697325285e-05, "loss": 0.3604, "step": 15696 }, { "epoch": 51.4655737704918, "grad_norm": 8.955474853515625, "learning_rate": 1.0011149811062342e-05, "loss": 0.4312, "step": 15697 }, { "epoch": 51.4688524590164, "grad_norm": 6.13527250289917, "learning_rate": 1.0010087924673673e-05, "loss": 0.6856, "step": 15698 }, { "epoch": 51.472131147540985, "grad_norm": 5.15459680557251, "learning_rate": 1.0009026038171251e-05, "loss": 0.5182, "step": 15699 }, { "epoch": 51.47540983606557, "grad_norm": 6.745092868804932, "learning_rate": 1.0007964151567052e-05, "loss": 0.8697, "step": 15700 }, { "epoch": 51.47868852459016, "grad_norm": 5.238093376159668, "learning_rate": 1.0006902264873048e-05, "loss": 0.5362, "step": 15701 }, { "epoch": 51.48196721311476, "grad_norm": 7.132930278778076, "learning_rate": 1.0005840378101218e-05, "loss": 0.8992, "step": 15702 }, { "epoch": 51.485245901639345, "grad_norm": 5.634613513946533, "learning_rate": 1.0004778491263527e-05, "loss": 0.4391, "step": 15703 }, { "epoch": 51.488524590163934, "grad_norm": 5.704335689544678, "learning_rate": 1.0003716604371956e-05, "loss": 0.7664, "step": 15704 }, { "epoch": 51.49180327868852, "grad_norm": 5.994667053222656, "learning_rate": 1.0002654717438474e-05, "loss": 0.5755, "step": 15705 }, { "epoch": 51.49508196721312, "grad_norm": 5.0655012130737305, "learning_rate": 1.000159283047506e-05, "loss": 0.5725, "step": 15706 }, { "epoch": 51.498360655737706, "grad_norm": 5.28315544128418, "learning_rate": 1.0000530943493683e-05, "loss": 0.3414, "step": 15707 }, { "epoch": 51.501639344262294, "grad_norm": 12.996968269348145, "learning_rate": 9.99946905650632e-06, "loss": 0.4223, "step": 15708 }, { "epoch": 51.50491803278688, "grad_norm": 5.893325328826904, "learning_rate": 9.998407169524945e-06, "loss": 0.5531, "step": 15709 }, { "epoch": 51.50819672131148, "grad_norm": 6.727612018585205, "learning_rate": 9.997345282561527e-06, "loss": 0.5652, "step": 15710 }, { "epoch": 51.511475409836066, "grad_norm": 6.105374813079834, "learning_rate": 9.996283395628047e-06, "loss": 0.5802, "step": 15711 }, { "epoch": 51.514754098360655, "grad_norm": 5.985555171966553, "learning_rate": 9.995221508736475e-06, "loss": 0.677, "step": 15712 }, { "epoch": 51.51803278688524, "grad_norm": 7.011867046356201, "learning_rate": 9.994159621898787e-06, "loss": 0.5924, "step": 15713 }, { "epoch": 51.52131147540984, "grad_norm": 5.651366233825684, "learning_rate": 9.993097735126955e-06, "loss": 0.6937, "step": 15714 }, { "epoch": 51.52459016393443, "grad_norm": 4.999655723571777, "learning_rate": 9.99203584843295e-06, "loss": 0.3897, "step": 15715 }, { "epoch": 51.527868852459015, "grad_norm": 6.197417736053467, "learning_rate": 9.990973961828752e-06, "loss": 0.4854, "step": 15716 }, { "epoch": 51.5311475409836, "grad_norm": 11.74309253692627, "learning_rate": 9.989912075326332e-06, "loss": 0.511, "step": 15717 }, { "epoch": 51.5344262295082, "grad_norm": 10.000397682189941, "learning_rate": 9.988850188937662e-06, "loss": 0.6105, "step": 15718 }, { "epoch": 51.53770491803279, "grad_norm": 5.757511138916016, "learning_rate": 9.987788302674716e-06, "loss": 0.5306, "step": 15719 }, { "epoch": 51.540983606557376, "grad_norm": 10.273489952087402, "learning_rate": 9.986726416549473e-06, "loss": 0.6565, "step": 15720 }, { "epoch": 51.544262295081964, "grad_norm": 6.014926910400391, "learning_rate": 9.985664530573903e-06, "loss": 0.732, "step": 15721 }, { "epoch": 51.54754098360656, "grad_norm": 6.863338947296143, "learning_rate": 9.98460264475998e-06, "loss": 0.5216, "step": 15722 }, { "epoch": 51.55081967213115, "grad_norm": 5.3853936195373535, "learning_rate": 9.983540759119677e-06, "loss": 0.4014, "step": 15723 }, { "epoch": 51.554098360655736, "grad_norm": 5.617900371551514, "learning_rate": 9.982478873664969e-06, "loss": 0.6278, "step": 15724 }, { "epoch": 51.557377049180324, "grad_norm": 6.3231658935546875, "learning_rate": 9.981416988407833e-06, "loss": 0.7609, "step": 15725 }, { "epoch": 51.56065573770492, "grad_norm": 13.138405799865723, "learning_rate": 9.98035510336024e-06, "loss": 0.6597, "step": 15726 }, { "epoch": 51.56393442622951, "grad_norm": 6.102141380310059, "learning_rate": 9.97929321853416e-06, "loss": 0.3562, "step": 15727 }, { "epoch": 51.5672131147541, "grad_norm": 7.311944484710693, "learning_rate": 9.978231333941569e-06, "loss": 0.3946, "step": 15728 }, { "epoch": 51.570491803278685, "grad_norm": 6.940069675445557, "learning_rate": 9.977169449594446e-06, "loss": 0.6331, "step": 15729 }, { "epoch": 51.57377049180328, "grad_norm": 6.223721504211426, "learning_rate": 9.976107565504762e-06, "loss": 0.7695, "step": 15730 }, { "epoch": 51.57704918032787, "grad_norm": 6.068001747131348, "learning_rate": 9.97504568168449e-06, "loss": 0.3076, "step": 15731 }, { "epoch": 51.58032786885246, "grad_norm": 6.38257360458374, "learning_rate": 9.973983798145599e-06, "loss": 0.5897, "step": 15732 }, { "epoch": 51.58360655737705, "grad_norm": 8.007011413574219, "learning_rate": 9.972921914900073e-06, "loss": 0.4952, "step": 15733 }, { "epoch": 51.58688524590164, "grad_norm": 6.229778289794922, "learning_rate": 9.971860031959879e-06, "loss": 0.7528, "step": 15734 }, { "epoch": 51.59016393442623, "grad_norm": 5.875252723693848, "learning_rate": 9.970798149336993e-06, "loss": 0.6364, "step": 15735 }, { "epoch": 51.59344262295082, "grad_norm": 5.34467887878418, "learning_rate": 9.969736267043385e-06, "loss": 0.6677, "step": 15736 }, { "epoch": 51.59672131147541, "grad_norm": 7.576934337615967, "learning_rate": 9.968674385091035e-06, "loss": 0.5502, "step": 15737 }, { "epoch": 51.6, "grad_norm": 6.8452911376953125, "learning_rate": 9.967612503491915e-06, "loss": 0.5801, "step": 15738 }, { "epoch": 51.60327868852459, "grad_norm": 13.834564208984375, "learning_rate": 9.966550622257996e-06, "loss": 0.7603, "step": 15739 }, { "epoch": 51.60655737704918, "grad_norm": 5.500766754150391, "learning_rate": 9.965488741401254e-06, "loss": 0.5363, "step": 15740 }, { "epoch": 51.609836065573774, "grad_norm": 6.199801921844482, "learning_rate": 9.96442686093366e-06, "loss": 0.8022, "step": 15741 }, { "epoch": 51.61311475409836, "grad_norm": 8.753763198852539, "learning_rate": 9.963364980867192e-06, "loss": 0.548, "step": 15742 }, { "epoch": 51.61639344262295, "grad_norm": 7.927262783050537, "learning_rate": 9.962303101213825e-06, "loss": 0.7489, "step": 15743 }, { "epoch": 51.61967213114754, "grad_norm": 11.150168418884277, "learning_rate": 9.961241221985524e-06, "loss": 0.7243, "step": 15744 }, { "epoch": 51.622950819672134, "grad_norm": 7.398331165313721, "learning_rate": 9.960179343194271e-06, "loss": 0.6078, "step": 15745 }, { "epoch": 51.62622950819672, "grad_norm": 6.157739639282227, "learning_rate": 9.959117464852038e-06, "loss": 0.7173, "step": 15746 }, { "epoch": 51.62950819672131, "grad_norm": 7.579026222229004, "learning_rate": 9.958055586970796e-06, "loss": 0.6681, "step": 15747 }, { "epoch": 51.6327868852459, "grad_norm": 6.474698066711426, "learning_rate": 9.956993709562523e-06, "loss": 0.5367, "step": 15748 }, { "epoch": 51.636065573770495, "grad_norm": 6.847188472747803, "learning_rate": 9.95593183263919e-06, "loss": 0.76, "step": 15749 }, { "epoch": 51.63934426229508, "grad_norm": 6.447234153747559, "learning_rate": 9.954869956212772e-06, "loss": 0.5915, "step": 15750 }, { "epoch": 51.64262295081967, "grad_norm": 6.249451637268066, "learning_rate": 9.95380808029524e-06, "loss": 0.64, "step": 15751 }, { "epoch": 51.64590163934426, "grad_norm": 6.725220680236816, "learning_rate": 9.952746204898574e-06, "loss": 0.5753, "step": 15752 }, { "epoch": 51.649180327868855, "grad_norm": 8.656397819519043, "learning_rate": 9.95168433003474e-06, "loss": 0.6827, "step": 15753 }, { "epoch": 51.65245901639344, "grad_norm": 5.4237470626831055, "learning_rate": 9.950622455715716e-06, "loss": 0.6529, "step": 15754 }, { "epoch": 51.65573770491803, "grad_norm": 8.225964546203613, "learning_rate": 9.949560581953476e-06, "loss": 0.6448, "step": 15755 }, { "epoch": 51.65901639344262, "grad_norm": 5.346190929412842, "learning_rate": 9.948498708759993e-06, "loss": 0.2694, "step": 15756 }, { "epoch": 51.662295081967216, "grad_norm": 7.373110771179199, "learning_rate": 9.947436836147241e-06, "loss": 0.5836, "step": 15757 }, { "epoch": 51.665573770491804, "grad_norm": 6.076767921447754, "learning_rate": 9.946374964127191e-06, "loss": 0.6304, "step": 15758 }, { "epoch": 51.66885245901639, "grad_norm": 5.277736186981201, "learning_rate": 9.94531309271182e-06, "loss": 0.5483, "step": 15759 }, { "epoch": 51.67213114754098, "grad_norm": 6.4522705078125, "learning_rate": 9.944251221913103e-06, "loss": 0.7933, "step": 15760 }, { "epoch": 51.675409836065576, "grad_norm": 7.679404258728027, "learning_rate": 9.943189351743011e-06, "loss": 0.7016, "step": 15761 }, { "epoch": 51.678688524590164, "grad_norm": 7.7435407638549805, "learning_rate": 9.942127482213514e-06, "loss": 0.7446, "step": 15762 }, { "epoch": 51.68196721311475, "grad_norm": 7.657179355621338, "learning_rate": 9.941065613336594e-06, "loss": 0.4138, "step": 15763 }, { "epoch": 51.68524590163934, "grad_norm": 5.967105865478516, "learning_rate": 9.940003745124219e-06, "loss": 0.5416, "step": 15764 }, { "epoch": 51.68852459016394, "grad_norm": 7.450067043304443, "learning_rate": 9.938941877588366e-06, "loss": 0.5592, "step": 15765 }, { "epoch": 51.691803278688525, "grad_norm": 6.901947021484375, "learning_rate": 9.937880010741007e-06, "loss": 0.5326, "step": 15766 }, { "epoch": 51.69508196721311, "grad_norm": 5.352034568786621, "learning_rate": 9.93681814459411e-06, "loss": 0.7346, "step": 15767 }, { "epoch": 51.6983606557377, "grad_norm": 6.208311080932617, "learning_rate": 9.935756279159659e-06, "loss": 0.5534, "step": 15768 }, { "epoch": 51.7016393442623, "grad_norm": 8.326066017150879, "learning_rate": 9.934694414449623e-06, "loss": 0.5824, "step": 15769 }, { "epoch": 51.704918032786885, "grad_norm": 6.6047587394714355, "learning_rate": 9.933632550475974e-06, "loss": 0.6497, "step": 15770 }, { "epoch": 51.708196721311474, "grad_norm": 6.607907772064209, "learning_rate": 9.932570687250685e-06, "loss": 0.6461, "step": 15771 }, { "epoch": 51.71147540983607, "grad_norm": 30.48235511779785, "learning_rate": 9.931508824785734e-06, "loss": 0.6133, "step": 15772 }, { "epoch": 51.71475409836066, "grad_norm": 16.330371856689453, "learning_rate": 9.930446963093091e-06, "loss": 0.4684, "step": 15773 }, { "epoch": 51.718032786885246, "grad_norm": 7.502769470214844, "learning_rate": 9.929385102184733e-06, "loss": 0.6523, "step": 15774 }, { "epoch": 51.721311475409834, "grad_norm": 8.929461479187012, "learning_rate": 9.928323242072627e-06, "loss": 0.7711, "step": 15775 }, { "epoch": 51.72459016393443, "grad_norm": 6.966616630554199, "learning_rate": 9.927261382768757e-06, "loss": 0.601, "step": 15776 }, { "epoch": 51.72786885245902, "grad_norm": 6.317121505737305, "learning_rate": 9.926199524285085e-06, "loss": 0.3925, "step": 15777 }, { "epoch": 51.731147540983606, "grad_norm": 6.509772777557373, "learning_rate": 9.925137666633593e-06, "loss": 0.5141, "step": 15778 }, { "epoch": 51.734426229508195, "grad_norm": 6.760141372680664, "learning_rate": 9.924075809826253e-06, "loss": 0.6961, "step": 15779 }, { "epoch": 51.73770491803279, "grad_norm": 7.784511566162109, "learning_rate": 9.92301395387503e-06, "loss": 0.4851, "step": 15780 }, { "epoch": 51.74098360655738, "grad_norm": 8.078523635864258, "learning_rate": 9.921952098791912e-06, "loss": 0.3894, "step": 15781 }, { "epoch": 51.74426229508197, "grad_norm": 6.2087082862854, "learning_rate": 9.920890244588866e-06, "loss": 0.6292, "step": 15782 }, { "epoch": 51.747540983606555, "grad_norm": 6.033185005187988, "learning_rate": 9.919828391277862e-06, "loss": 0.2678, "step": 15783 }, { "epoch": 51.75081967213115, "grad_norm": 8.07028579711914, "learning_rate": 9.918766538870873e-06, "loss": 0.5115, "step": 15784 }, { "epoch": 51.75409836065574, "grad_norm": 5.294632434844971, "learning_rate": 9.91770468737988e-06, "loss": 0.585, "step": 15785 }, { "epoch": 51.75737704918033, "grad_norm": 6.167482376098633, "learning_rate": 9.916642836816853e-06, "loss": 0.9466, "step": 15786 }, { "epoch": 51.760655737704916, "grad_norm": 9.980916023254395, "learning_rate": 9.915580987193763e-06, "loss": 0.7373, "step": 15787 }, { "epoch": 51.76393442622951, "grad_norm": 32.38526916503906, "learning_rate": 9.914519138522582e-06, "loss": 0.7643, "step": 15788 }, { "epoch": 51.7672131147541, "grad_norm": 6.728320121765137, "learning_rate": 9.913457290815291e-06, "loss": 0.5229, "step": 15789 }, { "epoch": 51.77049180327869, "grad_norm": 5.582259654998779, "learning_rate": 9.912395444083858e-06, "loss": 0.4983, "step": 15790 }, { "epoch": 51.773770491803276, "grad_norm": 7.367626190185547, "learning_rate": 9.911333598340259e-06, "loss": 0.7681, "step": 15791 }, { "epoch": 51.77704918032787, "grad_norm": 6.91196870803833, "learning_rate": 9.910271753596465e-06, "loss": 0.7467, "step": 15792 }, { "epoch": 51.78032786885246, "grad_norm": 7.4633917808532715, "learning_rate": 9.909209909864446e-06, "loss": 0.5193, "step": 15793 }, { "epoch": 51.78360655737705, "grad_norm": 5.740241050720215, "learning_rate": 9.908148067156184e-06, "loss": 0.6726, "step": 15794 }, { "epoch": 51.78688524590164, "grad_norm": 13.127321243286133, "learning_rate": 9.907086225483649e-06, "loss": 0.8213, "step": 15795 }, { "epoch": 51.79016393442623, "grad_norm": 7.310378551483154, "learning_rate": 9.906024384858812e-06, "loss": 0.3761, "step": 15796 }, { "epoch": 51.79344262295082, "grad_norm": 5.631579399108887, "learning_rate": 9.904962545293643e-06, "loss": 0.6433, "step": 15797 }, { "epoch": 51.79672131147541, "grad_norm": 6.990187644958496, "learning_rate": 9.903900706800128e-06, "loss": 0.6166, "step": 15798 }, { "epoch": 51.8, "grad_norm": 9.9450044631958, "learning_rate": 9.90283886939023e-06, "loss": 0.5166, "step": 15799 }, { "epoch": 51.80327868852459, "grad_norm": 8.219626426696777, "learning_rate": 9.901777033075921e-06, "loss": 0.6211, "step": 15800 }, { "epoch": 51.80655737704918, "grad_norm": 4.771205425262451, "learning_rate": 9.90071519786918e-06, "loss": 0.7529, "step": 15801 }, { "epoch": 51.80983606557377, "grad_norm": 6.009335041046143, "learning_rate": 9.89965336378198e-06, "loss": 0.5836, "step": 15802 }, { "epoch": 51.81311475409836, "grad_norm": 7.879099369049072, "learning_rate": 9.89859153082629e-06, "loss": 0.5164, "step": 15803 }, { "epoch": 51.81639344262295, "grad_norm": 9.07332706451416, "learning_rate": 9.89752969901409e-06, "loss": 0.443, "step": 15804 }, { "epoch": 51.81967213114754, "grad_norm": 7.938433647155762, "learning_rate": 9.896467868357346e-06, "loss": 0.7161, "step": 15805 }, { "epoch": 51.82295081967213, "grad_norm": 7.5721964836120605, "learning_rate": 9.895406038868031e-06, "loss": 0.3327, "step": 15806 }, { "epoch": 51.82622950819672, "grad_norm": 5.984653949737549, "learning_rate": 9.894344210558128e-06, "loss": 0.439, "step": 15807 }, { "epoch": 51.829508196721314, "grad_norm": 6.971688270568848, "learning_rate": 9.893282383439602e-06, "loss": 0.7563, "step": 15808 }, { "epoch": 51.8327868852459, "grad_norm": 8.044026374816895, "learning_rate": 9.89222055752443e-06, "loss": 0.5057, "step": 15809 }, { "epoch": 51.83606557377049, "grad_norm": 6.004344463348389, "learning_rate": 9.891158732824577e-06, "loss": 0.6766, "step": 15810 }, { "epoch": 51.83934426229508, "grad_norm": 6.984397888183594, "learning_rate": 9.890096909352029e-06, "loss": 0.2902, "step": 15811 }, { "epoch": 51.842622950819674, "grad_norm": 8.119158744812012, "learning_rate": 9.88903508711875e-06, "loss": 0.6158, "step": 15812 }, { "epoch": 51.84590163934426, "grad_norm": 10.850682258605957, "learning_rate": 9.887973266136717e-06, "loss": 0.7025, "step": 15813 }, { "epoch": 51.84918032786885, "grad_norm": 5.2919769287109375, "learning_rate": 9.886911446417898e-06, "loss": 0.5724, "step": 15814 }, { "epoch": 51.85245901639344, "grad_norm": 5.005631923675537, "learning_rate": 9.885849627974274e-06, "loss": 0.7238, "step": 15815 }, { "epoch": 51.855737704918035, "grad_norm": 10.49589729309082, "learning_rate": 9.884787810817815e-06, "loss": 0.5037, "step": 15816 }, { "epoch": 51.85901639344262, "grad_norm": 8.332881927490234, "learning_rate": 9.883725994960492e-06, "loss": 0.5672, "step": 15817 }, { "epoch": 51.86229508196721, "grad_norm": 8.60456657409668, "learning_rate": 9.882664180414278e-06, "loss": 0.5055, "step": 15818 }, { "epoch": 51.86557377049181, "grad_norm": 6.98388147354126, "learning_rate": 9.881602367191145e-06, "loss": 0.7607, "step": 15819 }, { "epoch": 51.868852459016395, "grad_norm": 6.829037189483643, "learning_rate": 9.880540555303073e-06, "loss": 0.512, "step": 15820 }, { "epoch": 51.87213114754098, "grad_norm": 7.708855628967285, "learning_rate": 9.87947874476203e-06, "loss": 0.4316, "step": 15821 }, { "epoch": 51.87540983606557, "grad_norm": 5.923009395599365, "learning_rate": 9.878416935579988e-06, "loss": 0.6717, "step": 15822 }, { "epoch": 51.87868852459017, "grad_norm": 6.423023223876953, "learning_rate": 9.87735512776892e-06, "loss": 0.5564, "step": 15823 }, { "epoch": 51.881967213114756, "grad_norm": 6.509840488433838, "learning_rate": 9.876293321340803e-06, "loss": 0.6204, "step": 15824 }, { "epoch": 51.885245901639344, "grad_norm": 11.094673156738281, "learning_rate": 9.875231516307608e-06, "loss": 0.8197, "step": 15825 }, { "epoch": 51.88852459016393, "grad_norm": 6.2983527183532715, "learning_rate": 9.874169712681304e-06, "loss": 0.6299, "step": 15826 }, { "epoch": 51.89180327868853, "grad_norm": 6.22373104095459, "learning_rate": 9.873107910473868e-06, "loss": 0.7552, "step": 15827 }, { "epoch": 51.895081967213116, "grad_norm": 6.20015811920166, "learning_rate": 9.872046109697274e-06, "loss": 0.4288, "step": 15828 }, { "epoch": 51.898360655737704, "grad_norm": 10.383678436279297, "learning_rate": 9.870984310363492e-06, "loss": 0.5768, "step": 15829 }, { "epoch": 51.90163934426229, "grad_norm": 9.554520606994629, "learning_rate": 9.869922512484498e-06, "loss": 0.5708, "step": 15830 }, { "epoch": 51.90491803278689, "grad_norm": 8.16402530670166, "learning_rate": 9.868860716072262e-06, "loss": 0.6367, "step": 15831 }, { "epoch": 51.90819672131148, "grad_norm": 7.3758368492126465, "learning_rate": 9.867798921138757e-06, "loss": 1.0184, "step": 15832 }, { "epoch": 51.911475409836065, "grad_norm": 12.379013061523438, "learning_rate": 9.866737127695956e-06, "loss": 0.7462, "step": 15833 }, { "epoch": 51.91475409836065, "grad_norm": 6.878396511077881, "learning_rate": 9.865675335755835e-06, "loss": 0.57, "step": 15834 }, { "epoch": 51.91803278688525, "grad_norm": 7.967200756072998, "learning_rate": 9.864613545330363e-06, "loss": 0.5295, "step": 15835 }, { "epoch": 51.92131147540984, "grad_norm": 7.515201568603516, "learning_rate": 9.86355175643151e-06, "loss": 0.6081, "step": 15836 }, { "epoch": 51.924590163934425, "grad_norm": 6.106812953948975, "learning_rate": 9.862489969071258e-06, "loss": 0.4333, "step": 15837 }, { "epoch": 51.927868852459014, "grad_norm": 6.012806415557861, "learning_rate": 9.861428183261575e-06, "loss": 0.5393, "step": 15838 }, { "epoch": 51.93114754098361, "grad_norm": 6.01549768447876, "learning_rate": 9.860366399014431e-06, "loss": 0.6338, "step": 15839 }, { "epoch": 51.9344262295082, "grad_norm": 5.984035968780518, "learning_rate": 9.8593046163418e-06, "loss": 0.5558, "step": 15840 }, { "epoch": 51.937704918032786, "grad_norm": 4.956657886505127, "learning_rate": 9.858242835255657e-06, "loss": 0.3017, "step": 15841 }, { "epoch": 51.940983606557374, "grad_norm": 6.954530715942383, "learning_rate": 9.857181055767974e-06, "loss": 0.5873, "step": 15842 }, { "epoch": 51.94426229508197, "grad_norm": 20.237138748168945, "learning_rate": 9.856119277890725e-06, "loss": 0.727, "step": 15843 }, { "epoch": 51.94754098360656, "grad_norm": 8.03769302368164, "learning_rate": 9.85505750163588e-06, "loss": 0.3483, "step": 15844 }, { "epoch": 51.950819672131146, "grad_norm": 6.221029758453369, "learning_rate": 9.853995727015406e-06, "loss": 0.6778, "step": 15845 }, { "epoch": 51.954098360655735, "grad_norm": 8.180827140808105, "learning_rate": 9.852933954041288e-06, "loss": 0.5307, "step": 15846 }, { "epoch": 51.95737704918033, "grad_norm": 6.771246433258057, "learning_rate": 9.851872182725493e-06, "loss": 0.4862, "step": 15847 }, { "epoch": 51.96065573770492, "grad_norm": 8.086465835571289, "learning_rate": 9.850810413079992e-06, "loss": 0.6883, "step": 15848 }, { "epoch": 51.96393442622951, "grad_norm": 7.676433563232422, "learning_rate": 9.849748645116755e-06, "loss": 0.7775, "step": 15849 }, { "epoch": 51.967213114754095, "grad_norm": 5.600639820098877, "learning_rate": 9.848686878847763e-06, "loss": 0.5832, "step": 15850 }, { "epoch": 51.97049180327869, "grad_norm": 5.8933563232421875, "learning_rate": 9.847625114284984e-06, "loss": 0.6017, "step": 15851 }, { "epoch": 51.97377049180328, "grad_norm": 7.456014633178711, "learning_rate": 9.846563351440389e-06, "loss": 0.4862, "step": 15852 }, { "epoch": 51.97704918032787, "grad_norm": 7.18085241317749, "learning_rate": 9.845501590325949e-06, "loss": 0.5178, "step": 15853 }, { "epoch": 51.980327868852456, "grad_norm": 6.087518215179443, "learning_rate": 9.844439830953641e-06, "loss": 0.6316, "step": 15854 }, { "epoch": 51.98360655737705, "grad_norm": 16.792451858520508, "learning_rate": 9.843378073335438e-06, "loss": 0.3262, "step": 15855 }, { "epoch": 51.98688524590164, "grad_norm": 34.319393157958984, "learning_rate": 9.842316317483306e-06, "loss": 0.7906, "step": 15856 }, { "epoch": 51.99016393442623, "grad_norm": 6.255070209503174, "learning_rate": 9.841254563409226e-06, "loss": 0.5428, "step": 15857 }, { "epoch": 51.993442622950816, "grad_norm": 9.666746139526367, "learning_rate": 9.840192811125162e-06, "loss": 0.6333, "step": 15858 }, { "epoch": 51.99672131147541, "grad_norm": 5.362728118896484, "learning_rate": 9.839131060643092e-06, "loss": 0.5639, "step": 15859 }, { "epoch": 52.0, "grad_norm": 6.151626110076904, "learning_rate": 9.838069311974986e-06, "loss": 0.4669, "step": 15860 }, { "epoch": 52.00327868852459, "grad_norm": 6.406005382537842, "learning_rate": 9.837007565132818e-06, "loss": 0.5581, "step": 15861 }, { "epoch": 52.006557377049184, "grad_norm": 5.5408220291137695, "learning_rate": 9.835945820128555e-06, "loss": 0.4079, "step": 15862 }, { "epoch": 52.00983606557377, "grad_norm": 6.295530796051025, "learning_rate": 9.834884076974178e-06, "loss": 0.6002, "step": 15863 }, { "epoch": 52.01311475409836, "grad_norm": 5.8729047775268555, "learning_rate": 9.833822335681655e-06, "loss": 0.5649, "step": 15864 }, { "epoch": 52.01639344262295, "grad_norm": 5.776330471038818, "learning_rate": 9.832760596262959e-06, "loss": 0.4197, "step": 15865 }, { "epoch": 52.019672131147544, "grad_norm": 7.796670913696289, "learning_rate": 9.831698858730054e-06, "loss": 0.5277, "step": 15866 }, { "epoch": 52.02295081967213, "grad_norm": 5.802420616149902, "learning_rate": 9.830637123094924e-06, "loss": 0.4244, "step": 15867 }, { "epoch": 52.02622950819672, "grad_norm": 6.083981037139893, "learning_rate": 9.82957538936954e-06, "loss": 0.501, "step": 15868 }, { "epoch": 52.02950819672131, "grad_norm": 5.30446195602417, "learning_rate": 9.828513657565866e-06, "loss": 0.5609, "step": 15869 }, { "epoch": 52.032786885245905, "grad_norm": 6.752926349639893, "learning_rate": 9.827451927695883e-06, "loss": 0.5439, "step": 15870 }, { "epoch": 52.03606557377049, "grad_norm": 5.0780253410339355, "learning_rate": 9.826390199771552e-06, "loss": 0.6074, "step": 15871 }, { "epoch": 52.03934426229508, "grad_norm": 8.33780288696289, "learning_rate": 9.825328473804856e-06, "loss": 0.7551, "step": 15872 }, { "epoch": 52.04262295081967, "grad_norm": 5.620293617248535, "learning_rate": 9.824266749807765e-06, "loss": 0.4963, "step": 15873 }, { "epoch": 52.045901639344265, "grad_norm": 14.726486206054688, "learning_rate": 9.823205027792247e-06, "loss": 0.6019, "step": 15874 }, { "epoch": 52.049180327868854, "grad_norm": 6.0489020347595215, "learning_rate": 9.822143307770273e-06, "loss": 0.6208, "step": 15875 }, { "epoch": 52.05245901639344, "grad_norm": 7.564269542694092, "learning_rate": 9.821081589753823e-06, "loss": 0.4941, "step": 15876 }, { "epoch": 52.05573770491803, "grad_norm": 6.825573444366455, "learning_rate": 9.820019873754864e-06, "loss": 0.6454, "step": 15877 }, { "epoch": 52.059016393442626, "grad_norm": 9.612688064575195, "learning_rate": 9.818958159785368e-06, "loss": 0.5371, "step": 15878 }, { "epoch": 52.062295081967214, "grad_norm": 6.044797420501709, "learning_rate": 9.8178964478573e-06, "loss": 0.6881, "step": 15879 }, { "epoch": 52.0655737704918, "grad_norm": 9.104260444641113, "learning_rate": 9.816834737982646e-06, "loss": 0.5321, "step": 15880 }, { "epoch": 52.06885245901639, "grad_norm": 6.900725841522217, "learning_rate": 9.815773030173371e-06, "loss": 0.5311, "step": 15881 }, { "epoch": 52.072131147540986, "grad_norm": 6.70552921295166, "learning_rate": 9.814711324441443e-06, "loss": 0.4725, "step": 15882 }, { "epoch": 52.075409836065575, "grad_norm": 6.4722795486450195, "learning_rate": 9.813649620798842e-06, "loss": 0.5536, "step": 15883 }, { "epoch": 52.07868852459016, "grad_norm": 5.272800922393799, "learning_rate": 9.812587919257532e-06, "loss": 0.4789, "step": 15884 }, { "epoch": 52.08196721311475, "grad_norm": 9.771484375, "learning_rate": 9.811526219829485e-06, "loss": 0.5321, "step": 15885 }, { "epoch": 52.08524590163935, "grad_norm": 5.212612152099609, "learning_rate": 9.810464522526682e-06, "loss": 0.6029, "step": 15886 }, { "epoch": 52.088524590163935, "grad_norm": 5.945560455322266, "learning_rate": 9.809402827361088e-06, "loss": 0.7072, "step": 15887 }, { "epoch": 52.09180327868852, "grad_norm": 7.401712894439697, "learning_rate": 9.80834113434467e-06, "loss": 0.4951, "step": 15888 }, { "epoch": 52.09508196721311, "grad_norm": 5.5648722648620605, "learning_rate": 9.807279443489408e-06, "loss": 0.3908, "step": 15889 }, { "epoch": 52.09836065573771, "grad_norm": 7.554324626922607, "learning_rate": 9.806217754807272e-06, "loss": 0.6664, "step": 15890 }, { "epoch": 52.101639344262296, "grad_norm": 5.423572540283203, "learning_rate": 9.805156068310233e-06, "loss": 0.5663, "step": 15891 }, { "epoch": 52.104918032786884, "grad_norm": 6.057597637176514, "learning_rate": 9.804094384010256e-06, "loss": 0.7375, "step": 15892 }, { "epoch": 52.10819672131147, "grad_norm": 6.745274543762207, "learning_rate": 9.803032701919325e-06, "loss": 0.5049, "step": 15893 }, { "epoch": 52.11147540983607, "grad_norm": 6.183442115783691, "learning_rate": 9.801971022049404e-06, "loss": 0.668, "step": 15894 }, { "epoch": 52.114754098360656, "grad_norm": 5.465805530548096, "learning_rate": 9.800909344412464e-06, "loss": 0.6777, "step": 15895 }, { "epoch": 52.118032786885244, "grad_norm": 15.81839370727539, "learning_rate": 9.79984766902048e-06, "loss": 0.5217, "step": 15896 }, { "epoch": 52.12131147540983, "grad_norm": 7.333935737609863, "learning_rate": 9.798785995885417e-06, "loss": 0.7742, "step": 15897 }, { "epoch": 52.12459016393443, "grad_norm": 5.857363700866699, "learning_rate": 9.797724325019256e-06, "loss": 0.438, "step": 15898 }, { "epoch": 52.12786885245902, "grad_norm": 6.1318678855896, "learning_rate": 9.796662656433963e-06, "loss": 0.4313, "step": 15899 }, { "epoch": 52.131147540983605, "grad_norm": 6.847790718078613, "learning_rate": 9.79560099014151e-06, "loss": 0.5528, "step": 15900 }, { "epoch": 52.13442622950819, "grad_norm": 7.279943466186523, "learning_rate": 9.794539326153864e-06, "loss": 0.5446, "step": 15901 }, { "epoch": 52.13770491803279, "grad_norm": 6.368527412414551, "learning_rate": 9.793477664483004e-06, "loss": 0.5603, "step": 15902 }, { "epoch": 52.14098360655738, "grad_norm": 40.41158676147461, "learning_rate": 9.792416005140899e-06, "loss": 0.6086, "step": 15903 }, { "epoch": 52.144262295081965, "grad_norm": 6.732078552246094, "learning_rate": 9.79135434813952e-06, "loss": 0.7097, "step": 15904 }, { "epoch": 52.14754098360656, "grad_norm": 5.693325996398926, "learning_rate": 9.790292693490832e-06, "loss": 0.7037, "step": 15905 }, { "epoch": 52.15081967213115, "grad_norm": 9.169118881225586, "learning_rate": 9.789231041206817e-06, "loss": 0.458, "step": 15906 }, { "epoch": 52.15409836065574, "grad_norm": 6.330801010131836, "learning_rate": 9.78816939129944e-06, "loss": 0.3546, "step": 15907 }, { "epoch": 52.157377049180326, "grad_norm": 6.577383995056152, "learning_rate": 9.787107743780674e-06, "loss": 0.4269, "step": 15908 }, { "epoch": 52.16065573770492, "grad_norm": 6.577029705047607, "learning_rate": 9.786046098662491e-06, "loss": 0.4796, "step": 15909 }, { "epoch": 52.16393442622951, "grad_norm": 5.481816291809082, "learning_rate": 9.784984455956856e-06, "loss": 0.6299, "step": 15910 }, { "epoch": 52.1672131147541, "grad_norm": 7.80746603012085, "learning_rate": 9.783922815675747e-06, "loss": 0.4423, "step": 15911 }, { "epoch": 52.170491803278686, "grad_norm": 4.7437052726745605, "learning_rate": 9.782861177831134e-06, "loss": 0.741, "step": 15912 }, { "epoch": 52.17377049180328, "grad_norm": 5.7827630043029785, "learning_rate": 9.781799542434987e-06, "loss": 0.3781, "step": 15913 }, { "epoch": 52.17704918032787, "grad_norm": 7.282674789428711, "learning_rate": 9.780737909499276e-06, "loss": 0.5223, "step": 15914 }, { "epoch": 52.18032786885246, "grad_norm": 7.37640905380249, "learning_rate": 9.779676279035972e-06, "loss": 0.5159, "step": 15915 }, { "epoch": 52.18360655737705, "grad_norm": 45.456809997558594, "learning_rate": 9.77861465105705e-06, "loss": 0.5054, "step": 15916 }, { "epoch": 52.18688524590164, "grad_norm": 8.69706916809082, "learning_rate": 9.777553025574478e-06, "loss": 0.7648, "step": 15917 }, { "epoch": 52.19016393442623, "grad_norm": 7.9622697830200195, "learning_rate": 9.776491402600222e-06, "loss": 0.6661, "step": 15918 }, { "epoch": 52.19344262295082, "grad_norm": 5.003466606140137, "learning_rate": 9.775429782146262e-06, "loss": 0.4167, "step": 15919 }, { "epoch": 52.19672131147541, "grad_norm": 7.606758117675781, "learning_rate": 9.774368164224565e-06, "loss": 0.5112, "step": 15920 }, { "epoch": 52.2, "grad_norm": 37.13191223144531, "learning_rate": 9.773306548847102e-06, "loss": 0.4758, "step": 15921 }, { "epoch": 52.20327868852459, "grad_norm": 11.444894790649414, "learning_rate": 9.772244936025844e-06, "loss": 0.6479, "step": 15922 }, { "epoch": 52.20655737704918, "grad_norm": 7.9094157218933105, "learning_rate": 9.771183325772753e-06, "loss": 0.3816, "step": 15923 }, { "epoch": 52.20983606557377, "grad_norm": 5.91074275970459, "learning_rate": 9.770121718099817e-06, "loss": 0.3706, "step": 15924 }, { "epoch": 52.21311475409836, "grad_norm": 6.54730224609375, "learning_rate": 9.769060113018996e-06, "loss": 0.6394, "step": 15925 }, { "epoch": 52.21639344262295, "grad_norm": 22.2036190032959, "learning_rate": 9.767998510542261e-06, "loss": 0.5768, "step": 15926 }, { "epoch": 52.21967213114754, "grad_norm": 7.107048988342285, "learning_rate": 9.766936910681581e-06, "loss": 0.6542, "step": 15927 }, { "epoch": 52.22295081967213, "grad_norm": 5.837501525878906, "learning_rate": 9.765875313448934e-06, "loss": 0.5287, "step": 15928 }, { "epoch": 52.226229508196724, "grad_norm": 7.460611343383789, "learning_rate": 9.764813718856285e-06, "loss": 0.7465, "step": 15929 }, { "epoch": 52.22950819672131, "grad_norm": 4.790797710418701, "learning_rate": 9.763752126915607e-06, "loss": 0.5892, "step": 15930 }, { "epoch": 52.2327868852459, "grad_norm": 5.573244094848633, "learning_rate": 9.762690537638864e-06, "loss": 0.4731, "step": 15931 }, { "epoch": 52.23606557377049, "grad_norm": 7.131863117218018, "learning_rate": 9.761628951038037e-06, "loss": 0.6224, "step": 15932 }, { "epoch": 52.239344262295084, "grad_norm": 6.614334583282471, "learning_rate": 9.760567367125092e-06, "loss": 0.4362, "step": 15933 }, { "epoch": 52.24262295081967, "grad_norm": 18.86419677734375, "learning_rate": 9.759505785911999e-06, "loss": 0.5547, "step": 15934 }, { "epoch": 52.24590163934426, "grad_norm": 5.523133754730225, "learning_rate": 9.758444207410725e-06, "loss": 0.501, "step": 15935 }, { "epoch": 52.24918032786885, "grad_norm": 5.678282737731934, "learning_rate": 9.757382631633245e-06, "loss": 0.3522, "step": 15936 }, { "epoch": 52.252459016393445, "grad_norm": 7.3897833824157715, "learning_rate": 9.75632105859153e-06, "loss": 0.5701, "step": 15937 }, { "epoch": 52.25573770491803, "grad_norm": 6.093331813812256, "learning_rate": 9.755259488297544e-06, "loss": 0.6306, "step": 15938 }, { "epoch": 52.25901639344262, "grad_norm": 5.243187427520752, "learning_rate": 9.754197920763266e-06, "loss": 0.6703, "step": 15939 }, { "epoch": 52.26229508196721, "grad_norm": 5.045833587646484, "learning_rate": 9.75313635600066e-06, "loss": 0.7561, "step": 15940 }, { "epoch": 52.265573770491805, "grad_norm": 7.064342975616455, "learning_rate": 9.752074794021697e-06, "loss": 0.7582, "step": 15941 }, { "epoch": 52.268852459016394, "grad_norm": 9.147279739379883, "learning_rate": 9.751013234838352e-06, "loss": 0.4868, "step": 15942 }, { "epoch": 52.27213114754098, "grad_norm": 6.0491623878479, "learning_rate": 9.749951678462592e-06, "loss": 0.6322, "step": 15943 }, { "epoch": 52.27540983606557, "grad_norm": 5.84971284866333, "learning_rate": 9.74889012490638e-06, "loss": 0.5378, "step": 15944 }, { "epoch": 52.278688524590166, "grad_norm": 7.529764652252197, "learning_rate": 9.747828574181698e-06, "loss": 0.6415, "step": 15945 }, { "epoch": 52.281967213114754, "grad_norm": 5.409321308135986, "learning_rate": 9.746767026300513e-06, "loss": 0.4287, "step": 15946 }, { "epoch": 52.28524590163934, "grad_norm": 9.845365524291992, "learning_rate": 9.745705481274792e-06, "loss": 0.6557, "step": 15947 }, { "epoch": 52.28852459016394, "grad_norm": 6.4131951332092285, "learning_rate": 9.744643939116507e-06, "loss": 0.5718, "step": 15948 }, { "epoch": 52.291803278688526, "grad_norm": 6.140010356903076, "learning_rate": 9.74358239983762e-06, "loss": 0.5776, "step": 15949 }, { "epoch": 52.295081967213115, "grad_norm": 5.240838527679443, "learning_rate": 9.742520863450116e-06, "loss": 0.5389, "step": 15950 }, { "epoch": 52.2983606557377, "grad_norm": 6.05620813369751, "learning_rate": 9.741459329965955e-06, "loss": 0.4834, "step": 15951 }, { "epoch": 52.3016393442623, "grad_norm": 5.8463592529296875, "learning_rate": 9.74039779939711e-06, "loss": 0.3389, "step": 15952 }, { "epoch": 52.30491803278689, "grad_norm": 6.971675395965576, "learning_rate": 9.739336271755542e-06, "loss": 0.601, "step": 15953 }, { "epoch": 52.308196721311475, "grad_norm": 5.878177642822266, "learning_rate": 9.738274747053236e-06, "loss": 0.516, "step": 15954 }, { "epoch": 52.31147540983606, "grad_norm": 4.467952728271484, "learning_rate": 9.737213225302154e-06, "loss": 0.4576, "step": 15955 }, { "epoch": 52.31475409836066, "grad_norm": 7.423654556274414, "learning_rate": 9.736151706514265e-06, "loss": 0.6873, "step": 15956 }, { "epoch": 52.31803278688525, "grad_norm": 7.384032249450684, "learning_rate": 9.735090190701537e-06, "loss": 0.4742, "step": 15957 }, { "epoch": 52.321311475409836, "grad_norm": 5.764104843139648, "learning_rate": 9.734028677875946e-06, "loss": 0.5895, "step": 15958 }, { "epoch": 52.324590163934424, "grad_norm": 4.73929500579834, "learning_rate": 9.73296716804946e-06, "loss": 0.677, "step": 15959 }, { "epoch": 52.32786885245902, "grad_norm": 9.85693359375, "learning_rate": 9.731905661234044e-06, "loss": 0.5473, "step": 15960 }, { "epoch": 52.33114754098361, "grad_norm": 7.064686298370361, "learning_rate": 9.730844157441668e-06, "loss": 0.5205, "step": 15961 }, { "epoch": 52.334426229508196, "grad_norm": 4.639379024505615, "learning_rate": 9.729782656684307e-06, "loss": 0.4224, "step": 15962 }, { "epoch": 52.337704918032784, "grad_norm": 6.089849472045898, "learning_rate": 9.728721158973927e-06, "loss": 0.571, "step": 15963 }, { "epoch": 52.34098360655738, "grad_norm": 7.4484148025512695, "learning_rate": 9.727659664322497e-06, "loss": 0.7007, "step": 15964 }, { "epoch": 52.34426229508197, "grad_norm": 6.005549907684326, "learning_rate": 9.72659817274199e-06, "loss": 0.5081, "step": 15965 }, { "epoch": 52.34754098360656, "grad_norm": 6.187479019165039, "learning_rate": 9.72553668424437e-06, "loss": 0.7612, "step": 15966 }, { "epoch": 52.350819672131145, "grad_norm": 5.690515995025635, "learning_rate": 9.72447519884161e-06, "loss": 0.7103, "step": 15967 }, { "epoch": 52.35409836065574, "grad_norm": 7.119373321533203, "learning_rate": 9.72341371654568e-06, "loss": 0.6735, "step": 15968 }, { "epoch": 52.35737704918033, "grad_norm": 5.617478847503662, "learning_rate": 9.722352237368548e-06, "loss": 0.5175, "step": 15969 }, { "epoch": 52.36065573770492, "grad_norm": 5.805098056793213, "learning_rate": 9.721290761322179e-06, "loss": 0.5472, "step": 15970 }, { "epoch": 52.363934426229505, "grad_norm": 7.877858638763428, "learning_rate": 9.72022928841855e-06, "loss": 0.6416, "step": 15971 }, { "epoch": 52.3672131147541, "grad_norm": 6.0059356689453125, "learning_rate": 9.719167818669629e-06, "loss": 0.4128, "step": 15972 }, { "epoch": 52.37049180327869, "grad_norm": 5.676172256469727, "learning_rate": 9.71810635208738e-06, "loss": 0.4587, "step": 15973 }, { "epoch": 52.37377049180328, "grad_norm": 6.046025276184082, "learning_rate": 9.717044888683777e-06, "loss": 0.5673, "step": 15974 }, { "epoch": 52.377049180327866, "grad_norm": 6.559584140777588, "learning_rate": 9.715983428470783e-06, "loss": 0.5654, "step": 15975 }, { "epoch": 52.38032786885246, "grad_norm": 6.847189903259277, "learning_rate": 9.714921971460374e-06, "loss": 0.5079, "step": 15976 }, { "epoch": 52.38360655737705, "grad_norm": 6.011751651763916, "learning_rate": 9.713860517664517e-06, "loss": 0.4985, "step": 15977 }, { "epoch": 52.38688524590164, "grad_norm": 5.658729076385498, "learning_rate": 9.712799067095179e-06, "loss": 0.432, "step": 15978 }, { "epoch": 52.390163934426226, "grad_norm": 5.768616676330566, "learning_rate": 9.711737619764326e-06, "loss": 0.5085, "step": 15979 }, { "epoch": 52.39344262295082, "grad_norm": 7.943954944610596, "learning_rate": 9.710676175683936e-06, "loss": 0.5246, "step": 15980 }, { "epoch": 52.39672131147541, "grad_norm": 6.6471967697143555, "learning_rate": 9.709614734865972e-06, "loss": 0.355, "step": 15981 }, { "epoch": 52.4, "grad_norm": 4.744348049163818, "learning_rate": 9.708553297322407e-06, "loss": 0.5623, "step": 15982 }, { "epoch": 52.40327868852459, "grad_norm": 7.142024040222168, "learning_rate": 9.707491863065199e-06, "loss": 0.7943, "step": 15983 }, { "epoch": 52.40655737704918, "grad_norm": 5.807527542114258, "learning_rate": 9.706430432106329e-06, "loss": 0.383, "step": 15984 }, { "epoch": 52.40983606557377, "grad_norm": 9.666152000427246, "learning_rate": 9.705369004457764e-06, "loss": 0.7348, "step": 15985 }, { "epoch": 52.41311475409836, "grad_norm": 7.1554694175720215, "learning_rate": 9.704307580131467e-06, "loss": 0.4393, "step": 15986 }, { "epoch": 52.41639344262295, "grad_norm": 6.9442315101623535, "learning_rate": 9.703246159139408e-06, "loss": 0.3875, "step": 15987 }, { "epoch": 52.41967213114754, "grad_norm": 6.748520374298096, "learning_rate": 9.702184741493556e-06, "loss": 0.7204, "step": 15988 }, { "epoch": 52.42295081967213, "grad_norm": 6.1232476234436035, "learning_rate": 9.701123327205884e-06, "loss": 0.4568, "step": 15989 }, { "epoch": 52.42622950819672, "grad_norm": 7.95118522644043, "learning_rate": 9.700061916288355e-06, "loss": 0.6454, "step": 15990 }, { "epoch": 52.429508196721315, "grad_norm": 6.432979106903076, "learning_rate": 9.699000508752943e-06, "loss": 0.5056, "step": 15991 }, { "epoch": 52.4327868852459, "grad_norm": 15.510037422180176, "learning_rate": 9.69793910461161e-06, "loss": 0.4072, "step": 15992 }, { "epoch": 52.43606557377049, "grad_norm": 6.136436939239502, "learning_rate": 9.696877703876328e-06, "loss": 0.6579, "step": 15993 }, { "epoch": 52.43934426229508, "grad_norm": 10.950151443481445, "learning_rate": 9.695816306559066e-06, "loss": 0.5076, "step": 15994 }, { "epoch": 52.442622950819676, "grad_norm": 5.514118671417236, "learning_rate": 9.694754912671792e-06, "loss": 0.5872, "step": 15995 }, { "epoch": 52.445901639344264, "grad_norm": 8.951745986938477, "learning_rate": 9.693693522226472e-06, "loss": 0.6501, "step": 15996 }, { "epoch": 52.44918032786885, "grad_norm": 6.694944381713867, "learning_rate": 9.692632135235077e-06, "loss": 0.6406, "step": 15997 }, { "epoch": 52.45245901639344, "grad_norm": 6.065084934234619, "learning_rate": 9.691570751709576e-06, "loss": 0.5706, "step": 15998 }, { "epoch": 52.455737704918036, "grad_norm": 8.299656867980957, "learning_rate": 9.690509371661934e-06, "loss": 0.7432, "step": 15999 }, { "epoch": 52.459016393442624, "grad_norm": 6.595486164093018, "learning_rate": 9.689447995104121e-06, "loss": 0.6473, "step": 16000 }, { "epoch": 52.46229508196721, "grad_norm": 11.28085994720459, "learning_rate": 9.6883866220481e-06, "loss": 0.6203, "step": 16001 }, { "epoch": 52.4655737704918, "grad_norm": 6.627284049987793, "learning_rate": 9.687325252505849e-06, "loss": 0.6492, "step": 16002 }, { "epoch": 52.4688524590164, "grad_norm": 4.334766864776611, "learning_rate": 9.68626388648933e-06, "loss": 0.4721, "step": 16003 }, { "epoch": 52.472131147540985, "grad_norm": 7.630101680755615, "learning_rate": 9.685202524010515e-06, "loss": 0.6505, "step": 16004 }, { "epoch": 52.47540983606557, "grad_norm": 8.77035140991211, "learning_rate": 9.684141165081361e-06, "loss": 0.5585, "step": 16005 }, { "epoch": 52.47868852459016, "grad_norm": 6.536677360534668, "learning_rate": 9.68307980971385e-06, "loss": 0.4878, "step": 16006 }, { "epoch": 52.48196721311476, "grad_norm": 6.230498790740967, "learning_rate": 9.682018457919942e-06, "loss": 0.4087, "step": 16007 }, { "epoch": 52.485245901639345, "grad_norm": 5.103813171386719, "learning_rate": 9.680957109711609e-06, "loss": 0.7286, "step": 16008 }, { "epoch": 52.488524590163934, "grad_norm": 5.74440336227417, "learning_rate": 9.679895765100809e-06, "loss": 0.5564, "step": 16009 }, { "epoch": 52.49180327868852, "grad_norm": 6.023528099060059, "learning_rate": 9.678834424099523e-06, "loss": 0.7096, "step": 16010 }, { "epoch": 52.49508196721312, "grad_norm": 12.989182472229004, "learning_rate": 9.677773086719714e-06, "loss": 0.3836, "step": 16011 }, { "epoch": 52.498360655737706, "grad_norm": 6.665407657623291, "learning_rate": 9.676711752973347e-06, "loss": 0.4594, "step": 16012 }, { "epoch": 52.501639344262294, "grad_norm": 7.465179443359375, "learning_rate": 9.67565042287239e-06, "loss": 0.5194, "step": 16013 }, { "epoch": 52.50491803278688, "grad_norm": 5.317169189453125, "learning_rate": 9.67458909642881e-06, "loss": 0.7776, "step": 16014 }, { "epoch": 52.50819672131148, "grad_norm": 8.274517059326172, "learning_rate": 9.673527773654578e-06, "loss": 0.4375, "step": 16015 }, { "epoch": 52.511475409836066, "grad_norm": 5.809957027435303, "learning_rate": 9.672466454561662e-06, "loss": 0.5666, "step": 16016 }, { "epoch": 52.514754098360655, "grad_norm": 6.532939434051514, "learning_rate": 9.671405139162025e-06, "loss": 0.6911, "step": 16017 }, { "epoch": 52.51803278688524, "grad_norm": 4.997485637664795, "learning_rate": 9.670343827467635e-06, "loss": 0.5643, "step": 16018 }, { "epoch": 52.52131147540984, "grad_norm": 6.469729900360107, "learning_rate": 9.669282519490465e-06, "loss": 0.8594, "step": 16019 }, { "epoch": 52.52459016393443, "grad_norm": 9.524246215820312, "learning_rate": 9.668221215242475e-06, "loss": 0.5698, "step": 16020 }, { "epoch": 52.527868852459015, "grad_norm": 6.014908790588379, "learning_rate": 9.66715991473564e-06, "loss": 0.4965, "step": 16021 }, { "epoch": 52.5311475409836, "grad_norm": 6.144665241241455, "learning_rate": 9.666098617981918e-06, "loss": 0.5634, "step": 16022 }, { "epoch": 52.5344262295082, "grad_norm": 6.818005084991455, "learning_rate": 9.665037324993282e-06, "loss": 0.6824, "step": 16023 }, { "epoch": 52.53770491803279, "grad_norm": 9.468311309814453, "learning_rate": 9.663976035781701e-06, "loss": 0.6008, "step": 16024 }, { "epoch": 52.540983606557376, "grad_norm": 7.980011463165283, "learning_rate": 9.662914750359141e-06, "loss": 0.8537, "step": 16025 }, { "epoch": 52.544262295081964, "grad_norm": 15.16441822052002, "learning_rate": 9.661853468737565e-06, "loss": 0.7652, "step": 16026 }, { "epoch": 52.54754098360656, "grad_norm": 7.145744800567627, "learning_rate": 9.66079219092894e-06, "loss": 0.6863, "step": 16027 }, { "epoch": 52.55081967213115, "grad_norm": 11.574974060058594, "learning_rate": 9.65973091694524e-06, "loss": 0.6605, "step": 16028 }, { "epoch": 52.554098360655736, "grad_norm": 5.790059566497803, "learning_rate": 9.658669646798427e-06, "loss": 0.5084, "step": 16029 }, { "epoch": 52.557377049180324, "grad_norm": 5.437068939208984, "learning_rate": 9.65760838050047e-06, "loss": 0.471, "step": 16030 }, { "epoch": 52.56065573770492, "grad_norm": 5.684667587280273, "learning_rate": 9.656547118063328e-06, "loss": 0.6737, "step": 16031 }, { "epoch": 52.56393442622951, "grad_norm": 39.28125, "learning_rate": 9.65548585949898e-06, "loss": 0.5311, "step": 16032 }, { "epoch": 52.5672131147541, "grad_norm": 5.583925247192383, "learning_rate": 9.654424604819388e-06, "loss": 0.3845, "step": 16033 }, { "epoch": 52.570491803278685, "grad_norm": 5.3992767333984375, "learning_rate": 9.653363354036516e-06, "loss": 0.5416, "step": 16034 }, { "epoch": 52.57377049180328, "grad_norm": 6.333907127380371, "learning_rate": 9.65230210716233e-06, "loss": 0.5459, "step": 16035 }, { "epoch": 52.57704918032787, "grad_norm": 5.659541606903076, "learning_rate": 9.651240864208803e-06, "loss": 0.4722, "step": 16036 }, { "epoch": 52.58032786885246, "grad_norm": 7.4729838371276855, "learning_rate": 9.650179625187897e-06, "loss": 0.8083, "step": 16037 }, { "epoch": 52.58360655737705, "grad_norm": 7.142016410827637, "learning_rate": 9.649118390111581e-06, "loss": 0.7376, "step": 16038 }, { "epoch": 52.58688524590164, "grad_norm": 6.0458269119262695, "learning_rate": 9.648057158991819e-06, "loss": 0.6198, "step": 16039 }, { "epoch": 52.59016393442623, "grad_norm": 5.675718307495117, "learning_rate": 9.646995931840575e-06, "loss": 0.5525, "step": 16040 }, { "epoch": 52.59344262295082, "grad_norm": 6.1611528396606445, "learning_rate": 9.645934708669822e-06, "loss": 0.494, "step": 16041 }, { "epoch": 52.59672131147541, "grad_norm": 10.037274360656738, "learning_rate": 9.644873489491524e-06, "loss": 0.477, "step": 16042 }, { "epoch": 52.6, "grad_norm": 6.190574645996094, "learning_rate": 9.643812274317644e-06, "loss": 0.6565, "step": 16043 }, { "epoch": 52.60327868852459, "grad_norm": 6.018360137939453, "learning_rate": 9.642751063160151e-06, "loss": 0.6372, "step": 16044 }, { "epoch": 52.60655737704918, "grad_norm": 6.472246170043945, "learning_rate": 9.641689856031015e-06, "loss": 0.6143, "step": 16045 }, { "epoch": 52.609836065573774, "grad_norm": 35.77772903442383, "learning_rate": 9.640628652942195e-06, "loss": 0.4345, "step": 16046 }, { "epoch": 52.61311475409836, "grad_norm": 5.710467338562012, "learning_rate": 9.639567453905662e-06, "loss": 0.2966, "step": 16047 }, { "epoch": 52.61639344262295, "grad_norm": 5.423633575439453, "learning_rate": 9.638506258933378e-06, "loss": 0.7577, "step": 16048 }, { "epoch": 52.61967213114754, "grad_norm": 5.401009559631348, "learning_rate": 9.637445068037315e-06, "loss": 0.8837, "step": 16049 }, { "epoch": 52.622950819672134, "grad_norm": 6.186271667480469, "learning_rate": 9.636383881229436e-06, "loss": 0.4024, "step": 16050 }, { "epoch": 52.62622950819672, "grad_norm": 5.3942155838012695, "learning_rate": 9.635322698521706e-06, "loss": 0.5497, "step": 16051 }, { "epoch": 52.62950819672131, "grad_norm": 9.7080717086792, "learning_rate": 9.634261519926093e-06, "loss": 0.7756, "step": 16052 }, { "epoch": 52.6327868852459, "grad_norm": 11.936403274536133, "learning_rate": 9.633200345454557e-06, "loss": 0.4899, "step": 16053 }, { "epoch": 52.636065573770495, "grad_norm": 15.115058898925781, "learning_rate": 9.632139175119072e-06, "loss": 0.7139, "step": 16054 }, { "epoch": 52.63934426229508, "grad_norm": 8.903112411499023, "learning_rate": 9.6310780089316e-06, "loss": 0.4437, "step": 16055 }, { "epoch": 52.64262295081967, "grad_norm": 5.747001647949219, "learning_rate": 9.630016846904108e-06, "loss": 0.3519, "step": 16056 }, { "epoch": 52.64590163934426, "grad_norm": 5.4322919845581055, "learning_rate": 9.628955689048557e-06, "loss": 0.5719, "step": 16057 }, { "epoch": 52.649180327868855, "grad_norm": 6.335714817047119, "learning_rate": 9.62789453537692e-06, "loss": 0.5559, "step": 16058 }, { "epoch": 52.65245901639344, "grad_norm": 5.286917209625244, "learning_rate": 9.62683338590116e-06, "loss": 0.632, "step": 16059 }, { "epoch": 52.65573770491803, "grad_norm": 7.717405319213867, "learning_rate": 9.62577224063324e-06, "loss": 0.4989, "step": 16060 }, { "epoch": 52.65901639344262, "grad_norm": 7.763818740844727, "learning_rate": 9.624711099585123e-06, "loss": 0.6609, "step": 16061 }, { "epoch": 52.662295081967216, "grad_norm": 6.0637078285217285, "learning_rate": 9.623649962768784e-06, "loss": 0.5484, "step": 16062 }, { "epoch": 52.665573770491804, "grad_norm": 5.6063971519470215, "learning_rate": 9.622588830196182e-06, "loss": 0.5095, "step": 16063 }, { "epoch": 52.66885245901639, "grad_norm": 7.349023342132568, "learning_rate": 9.621527701879284e-06, "loss": 0.5271, "step": 16064 }, { "epoch": 52.67213114754098, "grad_norm": 11.463651657104492, "learning_rate": 9.620466577830055e-06, "loss": 0.4257, "step": 16065 }, { "epoch": 52.675409836065576, "grad_norm": 6.6733479499816895, "learning_rate": 9.619405458060454e-06, "loss": 0.418, "step": 16066 }, { "epoch": 52.678688524590164, "grad_norm": 8.752942085266113, "learning_rate": 9.618344342582458e-06, "loss": 0.4671, "step": 16067 }, { "epoch": 52.68196721311475, "grad_norm": 5.770309925079346, "learning_rate": 9.617283231408026e-06, "loss": 0.4416, "step": 16068 }, { "epoch": 52.68524590163934, "grad_norm": 6.65659236907959, "learning_rate": 9.61622212454912e-06, "loss": 0.6343, "step": 16069 }, { "epoch": 52.68852459016394, "grad_norm": 8.152321815490723, "learning_rate": 9.615161022017709e-06, "loss": 0.6346, "step": 16070 }, { "epoch": 52.691803278688525, "grad_norm": 6.15780782699585, "learning_rate": 9.614099923825761e-06, "loss": 0.2498, "step": 16071 }, { "epoch": 52.69508196721311, "grad_norm": 5.594552516937256, "learning_rate": 9.613038829985235e-06, "loss": 0.6234, "step": 16072 }, { "epoch": 52.6983606557377, "grad_norm": 6.976528644561768, "learning_rate": 9.611977740508101e-06, "loss": 0.9286, "step": 16073 }, { "epoch": 52.7016393442623, "grad_norm": 6.148772716522217, "learning_rate": 9.610916655406319e-06, "loss": 0.5578, "step": 16074 }, { "epoch": 52.704918032786885, "grad_norm": 6.284124851226807, "learning_rate": 9.609855574691856e-06, "loss": 0.5463, "step": 16075 }, { "epoch": 52.708196721311474, "grad_norm": 25.666826248168945, "learning_rate": 9.608794498376678e-06, "loss": 0.3535, "step": 16076 }, { "epoch": 52.71147540983607, "grad_norm": 5.841794013977051, "learning_rate": 9.607733426472752e-06, "loss": 0.4754, "step": 16077 }, { "epoch": 52.71475409836066, "grad_norm": 6.966536998748779, "learning_rate": 9.606672358992037e-06, "loss": 0.596, "step": 16078 }, { "epoch": 52.718032786885246, "grad_norm": 6.140520095825195, "learning_rate": 9.605611295946497e-06, "loss": 0.6854, "step": 16079 }, { "epoch": 52.721311475409834, "grad_norm": 6.612903118133545, "learning_rate": 9.604550237348103e-06, "loss": 0.4875, "step": 16080 }, { "epoch": 52.72459016393443, "grad_norm": 10.125617980957031, "learning_rate": 9.603489183208816e-06, "loss": 0.7155, "step": 16081 }, { "epoch": 52.72786885245902, "grad_norm": 11.716465950012207, "learning_rate": 9.602428133540602e-06, "loss": 0.5097, "step": 16082 }, { "epoch": 52.731147540983606, "grad_norm": 19.64655876159668, "learning_rate": 9.601367088355419e-06, "loss": 0.5096, "step": 16083 }, { "epoch": 52.734426229508195, "grad_norm": 11.812214851379395, "learning_rate": 9.600306047665241e-06, "loss": 0.7405, "step": 16084 }, { "epoch": 52.73770491803279, "grad_norm": 6.354491233825684, "learning_rate": 9.599245011482027e-06, "loss": 0.5764, "step": 16085 }, { "epoch": 52.74098360655738, "grad_norm": 6.5736165046691895, "learning_rate": 9.598183979817743e-06, "loss": 0.3827, "step": 16086 }, { "epoch": 52.74426229508197, "grad_norm": 5.778043746948242, "learning_rate": 9.59712295268435e-06, "loss": 0.6827, "step": 16087 }, { "epoch": 52.747540983606555, "grad_norm": 6.3782477378845215, "learning_rate": 9.596061930093816e-06, "loss": 0.4474, "step": 16088 }, { "epoch": 52.75081967213115, "grad_norm": 8.573070526123047, "learning_rate": 9.595000912058105e-06, "loss": 0.8261, "step": 16089 }, { "epoch": 52.75409836065574, "grad_norm": 5.952584266662598, "learning_rate": 9.59393989858918e-06, "loss": 0.6093, "step": 16090 }, { "epoch": 52.75737704918033, "grad_norm": 6.645490646362305, "learning_rate": 9.592878889699004e-06, "loss": 0.5293, "step": 16091 }, { "epoch": 52.760655737704916, "grad_norm": 6.484915733337402, "learning_rate": 9.591817885399538e-06, "loss": 0.6596, "step": 16092 }, { "epoch": 52.76393442622951, "grad_norm": 9.355034828186035, "learning_rate": 9.590756885702755e-06, "loss": 0.7115, "step": 16093 }, { "epoch": 52.7672131147541, "grad_norm": 5.832984924316406, "learning_rate": 9.589695890620611e-06, "loss": 0.4584, "step": 16094 }, { "epoch": 52.77049180327869, "grad_norm": 7.03230094909668, "learning_rate": 9.588634900165074e-06, "loss": 0.7461, "step": 16095 }, { "epoch": 52.773770491803276, "grad_norm": 6.621452808380127, "learning_rate": 9.587573914348103e-06, "loss": 0.6283, "step": 16096 }, { "epoch": 52.77704918032787, "grad_norm": 4.956811904907227, "learning_rate": 9.586512933181668e-06, "loss": 0.3997, "step": 16097 }, { "epoch": 52.78032786885246, "grad_norm": 11.795114517211914, "learning_rate": 9.585451956677731e-06, "loss": 0.7221, "step": 16098 }, { "epoch": 52.78360655737705, "grad_norm": 5.123837947845459, "learning_rate": 9.58439098484825e-06, "loss": 0.6132, "step": 16099 }, { "epoch": 52.78688524590164, "grad_norm": 11.587071418762207, "learning_rate": 9.583330017705193e-06, "loss": 0.6011, "step": 16100 }, { "epoch": 52.79016393442623, "grad_norm": 4.8741865158081055, "learning_rate": 9.582269055260528e-06, "loss": 0.64, "step": 16101 }, { "epoch": 52.79344262295082, "grad_norm": 6.7738847732543945, "learning_rate": 9.581208097526209e-06, "loss": 0.6297, "step": 16102 }, { "epoch": 52.79672131147541, "grad_norm": 11.313751220703125, "learning_rate": 9.580147144514207e-06, "loss": 0.4069, "step": 16103 }, { "epoch": 52.8, "grad_norm": 7.460206985473633, "learning_rate": 9.579086196236483e-06, "loss": 0.3616, "step": 16104 }, { "epoch": 52.80327868852459, "grad_norm": 4.699544906616211, "learning_rate": 9.578025252704994e-06, "loss": 0.6605, "step": 16105 }, { "epoch": 52.80655737704918, "grad_norm": 6.9197516441345215, "learning_rate": 9.576964313931715e-06, "loss": 0.4244, "step": 16106 }, { "epoch": 52.80983606557377, "grad_norm": 10.967769622802734, "learning_rate": 9.575903379928601e-06, "loss": 0.6682, "step": 16107 }, { "epoch": 52.81311475409836, "grad_norm": 8.959473609924316, "learning_rate": 9.57484245070762e-06, "loss": 0.6588, "step": 16108 }, { "epoch": 52.81639344262295, "grad_norm": 6.435243606567383, "learning_rate": 9.573781526280726e-06, "loss": 0.6812, "step": 16109 }, { "epoch": 52.81967213114754, "grad_norm": 10.03688907623291, "learning_rate": 9.572720606659895e-06, "loss": 0.4868, "step": 16110 }, { "epoch": 52.82295081967213, "grad_norm": 38.94749450683594, "learning_rate": 9.571659691857082e-06, "loss": 0.6199, "step": 16111 }, { "epoch": 52.82622950819672, "grad_norm": 15.381044387817383, "learning_rate": 9.570598781884252e-06, "loss": 0.5123, "step": 16112 }, { "epoch": 52.829508196721314, "grad_norm": 6.150472640991211, "learning_rate": 9.569537876753361e-06, "loss": 0.7116, "step": 16113 }, { "epoch": 52.8327868852459, "grad_norm": 7.683679103851318, "learning_rate": 9.568476976476384e-06, "loss": 0.4812, "step": 16114 }, { "epoch": 52.83606557377049, "grad_norm": 6.979431629180908, "learning_rate": 9.567416081065278e-06, "loss": 0.6038, "step": 16115 }, { "epoch": 52.83934426229508, "grad_norm": 24.261882781982422, "learning_rate": 9.566355190532003e-06, "loss": 0.5464, "step": 16116 }, { "epoch": 52.842622950819674, "grad_norm": 6.183450698852539, "learning_rate": 9.565294304888527e-06, "loss": 0.2619, "step": 16117 }, { "epoch": 52.84590163934426, "grad_norm": 6.358801364898682, "learning_rate": 9.564233424146804e-06, "loss": 0.5721, "step": 16118 }, { "epoch": 52.84918032786885, "grad_norm": 8.537226676940918, "learning_rate": 9.563172548318808e-06, "loss": 0.7114, "step": 16119 }, { "epoch": 52.85245901639344, "grad_norm": 6.206014633178711, "learning_rate": 9.562111677416495e-06, "loss": 0.4915, "step": 16120 }, { "epoch": 52.855737704918035, "grad_norm": 6.4081621170043945, "learning_rate": 9.561050811451828e-06, "loss": 0.5668, "step": 16121 }, { "epoch": 52.85901639344262, "grad_norm": 5.733719825744629, "learning_rate": 9.559989950436764e-06, "loss": 0.3168, "step": 16122 }, { "epoch": 52.86229508196721, "grad_norm": 8.293377876281738, "learning_rate": 9.558929094383276e-06, "loss": 0.5443, "step": 16123 }, { "epoch": 52.86557377049181, "grad_norm": 8.016880989074707, "learning_rate": 9.55786824330332e-06, "loss": 0.6019, "step": 16124 }, { "epoch": 52.868852459016395, "grad_norm": 5.636847972869873, "learning_rate": 9.556807397208859e-06, "loss": 0.9513, "step": 16125 }, { "epoch": 52.87213114754098, "grad_norm": 7.295370101928711, "learning_rate": 9.555746556111855e-06, "loss": 0.6935, "step": 16126 }, { "epoch": 52.87540983606557, "grad_norm": 6.0369157791137695, "learning_rate": 9.554685720024273e-06, "loss": 0.6073, "step": 16127 }, { "epoch": 52.87868852459017, "grad_norm": 6.683729648590088, "learning_rate": 9.553624888958068e-06, "loss": 0.5037, "step": 16128 }, { "epoch": 52.881967213114756, "grad_norm": 21.111478805541992, "learning_rate": 9.55256406292521e-06, "loss": 0.3928, "step": 16129 }, { "epoch": 52.885245901639344, "grad_norm": 5.759777069091797, "learning_rate": 9.551503241937658e-06, "loss": 0.3828, "step": 16130 }, { "epoch": 52.88852459016393, "grad_norm": 7.794114112854004, "learning_rate": 9.55044242600737e-06, "loss": 0.607, "step": 16131 }, { "epoch": 52.89180327868853, "grad_norm": 6.623783111572266, "learning_rate": 9.549381615146314e-06, "loss": 0.5837, "step": 16132 }, { "epoch": 52.895081967213116, "grad_norm": 6.7649431228637695, "learning_rate": 9.548320809366449e-06, "loss": 0.4394, "step": 16133 }, { "epoch": 52.898360655737704, "grad_norm": 6.554825782775879, "learning_rate": 9.547260008679734e-06, "loss": 0.6087, "step": 16134 }, { "epoch": 52.90163934426229, "grad_norm": 6.029399871826172, "learning_rate": 9.546199213098134e-06, "loss": 0.6434, "step": 16135 }, { "epoch": 52.90491803278689, "grad_norm": 6.631113529205322, "learning_rate": 9.54513842263361e-06, "loss": 0.5849, "step": 16136 }, { "epoch": 52.90819672131148, "grad_norm": 9.528669357299805, "learning_rate": 9.544077637298124e-06, "loss": 0.9573, "step": 16137 }, { "epoch": 52.911475409836065, "grad_norm": 6.943094730377197, "learning_rate": 9.543016857103637e-06, "loss": 0.7505, "step": 16138 }, { "epoch": 52.91475409836065, "grad_norm": 5.955068111419678, "learning_rate": 9.541956082062111e-06, "loss": 0.677, "step": 16139 }, { "epoch": 52.91803278688525, "grad_norm": 7.087087631225586, "learning_rate": 9.5408953121855e-06, "loss": 0.5563, "step": 16140 }, { "epoch": 52.92131147540984, "grad_norm": 6.296808242797852, "learning_rate": 9.539834547485777e-06, "loss": 0.6277, "step": 16141 }, { "epoch": 52.924590163934425, "grad_norm": 6.705737590789795, "learning_rate": 9.538773787974898e-06, "loss": 0.6268, "step": 16142 }, { "epoch": 52.927868852459014, "grad_norm": 5.671756744384766, "learning_rate": 9.537713033664825e-06, "loss": 0.8225, "step": 16143 }, { "epoch": 52.93114754098361, "grad_norm": 5.556878089904785, "learning_rate": 9.536652284567514e-06, "loss": 0.6726, "step": 16144 }, { "epoch": 52.9344262295082, "grad_norm": 6.0001630783081055, "learning_rate": 9.535591540694933e-06, "loss": 0.5139, "step": 16145 }, { "epoch": 52.937704918032786, "grad_norm": 5.86125373840332, "learning_rate": 9.53453080205904e-06, "loss": 0.5402, "step": 16146 }, { "epoch": 52.940983606557374, "grad_norm": 5.739921569824219, "learning_rate": 9.533470068671798e-06, "loss": 0.7506, "step": 16147 }, { "epoch": 52.94426229508197, "grad_norm": 6.277987003326416, "learning_rate": 9.532409340545161e-06, "loss": 0.4306, "step": 16148 }, { "epoch": 52.94754098360656, "grad_norm": 4.962800025939941, "learning_rate": 9.5313486176911e-06, "loss": 0.6814, "step": 16149 }, { "epoch": 52.950819672131146, "grad_norm": 5.985501766204834, "learning_rate": 9.53028790012157e-06, "loss": 0.7076, "step": 16150 }, { "epoch": 52.954098360655735, "grad_norm": 6.752567291259766, "learning_rate": 9.529227187848529e-06, "loss": 0.5199, "step": 16151 }, { "epoch": 52.95737704918033, "grad_norm": 5.507514476776123, "learning_rate": 9.528166480883943e-06, "loss": 0.5798, "step": 16152 }, { "epoch": 52.96065573770492, "grad_norm": 6.450991153717041, "learning_rate": 9.52710577923977e-06, "loss": 0.6593, "step": 16153 }, { "epoch": 52.96393442622951, "grad_norm": 5.633079528808594, "learning_rate": 9.526045082927971e-06, "loss": 0.5884, "step": 16154 }, { "epoch": 52.967213114754095, "grad_norm": 6.375864505767822, "learning_rate": 9.524984391960508e-06, "loss": 0.8481, "step": 16155 }, { "epoch": 52.97049180327869, "grad_norm": 5.620851516723633, "learning_rate": 9.52392370634934e-06, "loss": 0.7883, "step": 16156 }, { "epoch": 52.97377049180328, "grad_norm": 5.701192378997803, "learning_rate": 9.522863026106421e-06, "loss": 0.7306, "step": 16157 }, { "epoch": 52.97704918032787, "grad_norm": 8.85935115814209, "learning_rate": 9.521802351243724e-06, "loss": 0.7168, "step": 16158 }, { "epoch": 52.980327868852456, "grad_norm": 6.765563488006592, "learning_rate": 9.520741681773203e-06, "loss": 0.5924, "step": 16159 }, { "epoch": 52.98360655737705, "grad_norm": 6.271327972412109, "learning_rate": 9.519681017706817e-06, "loss": 0.6778, "step": 16160 }, { "epoch": 52.98688524590164, "grad_norm": 6.517463207244873, "learning_rate": 9.518620359056521e-06, "loss": 0.5538, "step": 16161 }, { "epoch": 52.99016393442623, "grad_norm": 7.675543785095215, "learning_rate": 9.517559705834288e-06, "loss": 0.5745, "step": 16162 }, { "epoch": 52.993442622950816, "grad_norm": 5.917856216430664, "learning_rate": 9.51649905805207e-06, "loss": 0.4286, "step": 16163 }, { "epoch": 52.99672131147541, "grad_norm": 5.5460710525512695, "learning_rate": 9.515438415721828e-06, "loss": 0.7137, "step": 16164 }, { "epoch": 53.0, "grad_norm": 6.5280327796936035, "learning_rate": 9.514377778855521e-06, "loss": 0.6126, "step": 16165 }, { "epoch": 53.00327868852459, "grad_norm": 5.660841464996338, "learning_rate": 9.513317147465105e-06, "loss": 0.5496, "step": 16166 }, { "epoch": 53.006557377049184, "grad_norm": 12.925932884216309, "learning_rate": 9.51225652156255e-06, "loss": 0.5165, "step": 16167 }, { "epoch": 53.00983606557377, "grad_norm": 6.676304340362549, "learning_rate": 9.511195901159809e-06, "loss": 0.677, "step": 16168 }, { "epoch": 53.01311475409836, "grad_norm": 6.020583629608154, "learning_rate": 9.510135286268842e-06, "loss": 0.5717, "step": 16169 }, { "epoch": 53.01639344262295, "grad_norm": 4.902417182922363, "learning_rate": 9.509074676901605e-06, "loss": 0.3989, "step": 16170 }, { "epoch": 53.019672131147544, "grad_norm": 8.48719310760498, "learning_rate": 9.508014073070066e-06, "loss": 0.3141, "step": 16171 }, { "epoch": 53.02295081967213, "grad_norm": 22.714094161987305, "learning_rate": 9.506953474786179e-06, "loss": 0.5883, "step": 16172 }, { "epoch": 53.02622950819672, "grad_norm": 8.286821365356445, "learning_rate": 9.505892882061905e-06, "loss": 0.4506, "step": 16173 }, { "epoch": 53.02950819672131, "grad_norm": 6.305464267730713, "learning_rate": 9.504832294909198e-06, "loss": 0.749, "step": 16174 }, { "epoch": 53.032786885245905, "grad_norm": 5.443095684051514, "learning_rate": 9.503771713340026e-06, "loss": 0.657, "step": 16175 }, { "epoch": 53.03606557377049, "grad_norm": 5.729711532592773, "learning_rate": 9.502711137366343e-06, "loss": 0.6039, "step": 16176 }, { "epoch": 53.03934426229508, "grad_norm": 5.486715316772461, "learning_rate": 9.501650567000108e-06, "loss": 0.4822, "step": 16177 }, { "epoch": 53.04262295081967, "grad_norm": 5.003005504608154, "learning_rate": 9.500590002253283e-06, "loss": 0.5013, "step": 16178 }, { "epoch": 53.045901639344265, "grad_norm": 12.260398864746094, "learning_rate": 9.499529443137823e-06, "loss": 0.5161, "step": 16179 }, { "epoch": 53.049180327868854, "grad_norm": 6.065624237060547, "learning_rate": 9.49846888966569e-06, "loss": 0.7669, "step": 16180 }, { "epoch": 53.05245901639344, "grad_norm": 5.357353210449219, "learning_rate": 9.497408341848842e-06, "loss": 0.7642, "step": 16181 }, { "epoch": 53.05573770491803, "grad_norm": 6.620537281036377, "learning_rate": 9.496347799699238e-06, "loss": 0.6567, "step": 16182 }, { "epoch": 53.059016393442626, "grad_norm": 4.976013660430908, "learning_rate": 9.495287263228834e-06, "loss": 0.6313, "step": 16183 }, { "epoch": 53.062295081967214, "grad_norm": 6.332184791564941, "learning_rate": 9.49422673244959e-06, "loss": 0.4469, "step": 16184 }, { "epoch": 53.0655737704918, "grad_norm": 6.379767417907715, "learning_rate": 9.493166207373469e-06, "loss": 0.5936, "step": 16185 }, { "epoch": 53.06885245901639, "grad_norm": 5.07990026473999, "learning_rate": 9.492105688012426e-06, "loss": 0.5154, "step": 16186 }, { "epoch": 53.072131147540986, "grad_norm": 6.348669528961182, "learning_rate": 9.491045174378415e-06, "loss": 0.4467, "step": 16187 }, { "epoch": 53.075409836065575, "grad_norm": 6.185232639312744, "learning_rate": 9.489984666483402e-06, "loss": 0.5044, "step": 16188 }, { "epoch": 53.07868852459016, "grad_norm": 12.948249816894531, "learning_rate": 9.488924164339342e-06, "loss": 0.3133, "step": 16189 }, { "epoch": 53.08196721311475, "grad_norm": 8.084793090820312, "learning_rate": 9.487863667958197e-06, "loss": 0.6566, "step": 16190 }, { "epoch": 53.08524590163935, "grad_norm": 6.505950927734375, "learning_rate": 9.486803177351918e-06, "loss": 0.5162, "step": 16191 }, { "epoch": 53.088524590163935, "grad_norm": 6.421402931213379, "learning_rate": 9.485742692532462e-06, "loss": 0.3685, "step": 16192 }, { "epoch": 53.09180327868852, "grad_norm": 43.63235092163086, "learning_rate": 9.484682213511798e-06, "loss": 0.5383, "step": 16193 }, { "epoch": 53.09508196721311, "grad_norm": 5.588198661804199, "learning_rate": 9.483621740301879e-06, "loss": 0.6731, "step": 16194 }, { "epoch": 53.09836065573771, "grad_norm": 5.620391368865967, "learning_rate": 9.48256127291466e-06, "loss": 0.3586, "step": 16195 }, { "epoch": 53.101639344262296, "grad_norm": 9.706119537353516, "learning_rate": 9.481500811362097e-06, "loss": 0.5367, "step": 16196 }, { "epoch": 53.104918032786884, "grad_norm": 9.17538070678711, "learning_rate": 9.480440355656154e-06, "loss": 0.4804, "step": 16197 }, { "epoch": 53.10819672131147, "grad_norm": 5.229379653930664, "learning_rate": 9.479379905808787e-06, "loss": 0.5846, "step": 16198 }, { "epoch": 53.11147540983607, "grad_norm": 17.574548721313477, "learning_rate": 9.478319461831955e-06, "loss": 0.7887, "step": 16199 }, { "epoch": 53.114754098360656, "grad_norm": 7.840076923370361, "learning_rate": 9.477259023737606e-06, "loss": 0.674, "step": 16200 }, { "epoch": 53.118032786885244, "grad_norm": 10.22420883178711, "learning_rate": 9.476198591537712e-06, "loss": 0.4672, "step": 16201 }, { "epoch": 53.12131147540983, "grad_norm": 5.735589504241943, "learning_rate": 9.475138165244223e-06, "loss": 0.3987, "step": 16202 }, { "epoch": 53.12459016393443, "grad_norm": 6.758022308349609, "learning_rate": 9.474077744869095e-06, "loss": 0.5197, "step": 16203 }, { "epoch": 53.12786885245902, "grad_norm": 9.875575065612793, "learning_rate": 9.473017330424287e-06, "loss": 0.2846, "step": 16204 }, { "epoch": 53.131147540983605, "grad_norm": 7.007097244262695, "learning_rate": 9.471956921921757e-06, "loss": 0.5354, "step": 16205 }, { "epoch": 53.13442622950819, "grad_norm": 5.924775123596191, "learning_rate": 9.470896519373463e-06, "loss": 0.5329, "step": 16206 }, { "epoch": 53.13770491803279, "grad_norm": 7.429110527038574, "learning_rate": 9.469836122791358e-06, "loss": 0.5793, "step": 16207 }, { "epoch": 53.14098360655738, "grad_norm": 6.301951885223389, "learning_rate": 9.468775732187406e-06, "loss": 0.6132, "step": 16208 }, { "epoch": 53.144262295081965, "grad_norm": 12.887718200683594, "learning_rate": 9.467715347573555e-06, "loss": 0.8254, "step": 16209 }, { "epoch": 53.14754098360656, "grad_norm": 5.254944801330566, "learning_rate": 9.466654968961767e-06, "loss": 0.5899, "step": 16210 }, { "epoch": 53.15081967213115, "grad_norm": 5.592700004577637, "learning_rate": 9.465594596364004e-06, "loss": 0.578, "step": 16211 }, { "epoch": 53.15409836065574, "grad_norm": 23.9863224029541, "learning_rate": 9.464534229792216e-06, "loss": 0.6519, "step": 16212 }, { "epoch": 53.157377049180326, "grad_norm": 5.817082405090332, "learning_rate": 9.463473869258356e-06, "loss": 0.7149, "step": 16213 }, { "epoch": 53.16065573770492, "grad_norm": 5.884301662445068, "learning_rate": 9.46241351477439e-06, "loss": 0.7312, "step": 16214 }, { "epoch": 53.16393442622951, "grad_norm": 5.537357330322266, "learning_rate": 9.461353166352274e-06, "loss": 0.5441, "step": 16215 }, { "epoch": 53.1672131147541, "grad_norm": 6.612993240356445, "learning_rate": 9.460292824003957e-06, "loss": 0.4801, "step": 16216 }, { "epoch": 53.170491803278686, "grad_norm": 6.035366535186768, "learning_rate": 9.459232487741403e-06, "loss": 0.4987, "step": 16217 }, { "epoch": 53.17377049180328, "grad_norm": 11.768117904663086, "learning_rate": 9.458172157576558e-06, "loss": 0.5728, "step": 16218 }, { "epoch": 53.17704918032787, "grad_norm": 22.789676666259766, "learning_rate": 9.457111833521392e-06, "loss": 0.5158, "step": 16219 }, { "epoch": 53.18032786885246, "grad_norm": 6.669564247131348, "learning_rate": 9.456051515587852e-06, "loss": 0.7493, "step": 16220 }, { "epoch": 53.18360655737705, "grad_norm": 7.060020446777344, "learning_rate": 9.4549912037879e-06, "loss": 0.6002, "step": 16221 }, { "epoch": 53.18688524590164, "grad_norm": 10.246698379516602, "learning_rate": 9.45393089813348e-06, "loss": 0.7067, "step": 16222 }, { "epoch": 53.19016393442623, "grad_norm": 5.520205020904541, "learning_rate": 9.452870598636565e-06, "loss": 0.6597, "step": 16223 }, { "epoch": 53.19344262295082, "grad_norm": 5.474496364593506, "learning_rate": 9.451810305309101e-06, "loss": 0.6064, "step": 16224 }, { "epoch": 53.19672131147541, "grad_norm": 7.718969821929932, "learning_rate": 9.450750018163047e-06, "loss": 0.5822, "step": 16225 }, { "epoch": 53.2, "grad_norm": 5.742212295532227, "learning_rate": 9.449689737210352e-06, "loss": 0.6455, "step": 16226 }, { "epoch": 53.20327868852459, "grad_norm": 5.50349760055542, "learning_rate": 9.448629462462983e-06, "loss": 0.7058, "step": 16227 }, { "epoch": 53.20655737704918, "grad_norm": 8.03785514831543, "learning_rate": 9.447569193932889e-06, "loss": 0.7387, "step": 16228 }, { "epoch": 53.20983606557377, "grad_norm": 11.319184303283691, "learning_rate": 9.446508931632027e-06, "loss": 0.7529, "step": 16229 }, { "epoch": 53.21311475409836, "grad_norm": 6.060954570770264, "learning_rate": 9.44544867557235e-06, "loss": 0.5857, "step": 16230 }, { "epoch": 53.21639344262295, "grad_norm": 5.419262409210205, "learning_rate": 9.444388425765816e-06, "loss": 0.4463, "step": 16231 }, { "epoch": 53.21967213114754, "grad_norm": 7.402733325958252, "learning_rate": 9.443328182224383e-06, "loss": 0.5008, "step": 16232 }, { "epoch": 53.22295081967213, "grad_norm": 5.7914018630981445, "learning_rate": 9.44226794496e-06, "loss": 0.5665, "step": 16233 }, { "epoch": 53.226229508196724, "grad_norm": 7.311293125152588, "learning_rate": 9.44120771398463e-06, "loss": 0.3362, "step": 16234 }, { "epoch": 53.22950819672131, "grad_norm": 7.139823913574219, "learning_rate": 9.44014748931022e-06, "loss": 0.6772, "step": 16235 }, { "epoch": 53.2327868852459, "grad_norm": 6.993179798126221, "learning_rate": 9.439087270948728e-06, "loss": 0.7438, "step": 16236 }, { "epoch": 53.23606557377049, "grad_norm": 5.802633285522461, "learning_rate": 9.438027058912115e-06, "loss": 0.518, "step": 16237 }, { "epoch": 53.239344262295084, "grad_norm": 6.00738525390625, "learning_rate": 9.43696685321233e-06, "loss": 0.8007, "step": 16238 }, { "epoch": 53.24262295081967, "grad_norm": 5.821923732757568, "learning_rate": 9.435906653861326e-06, "loss": 0.4166, "step": 16239 }, { "epoch": 53.24590163934426, "grad_norm": 6.248871803283691, "learning_rate": 9.434846460871064e-06, "loss": 0.3577, "step": 16240 }, { "epoch": 53.24918032786885, "grad_norm": 5.542257308959961, "learning_rate": 9.433786274253496e-06, "loss": 0.6104, "step": 16241 }, { "epoch": 53.252459016393445, "grad_norm": 5.887877464294434, "learning_rate": 9.432726094020577e-06, "loss": 0.3249, "step": 16242 }, { "epoch": 53.25573770491803, "grad_norm": 6.241225719451904, "learning_rate": 9.43166592018426e-06, "loss": 0.6429, "step": 16243 }, { "epoch": 53.25901639344262, "grad_norm": 5.876757621765137, "learning_rate": 9.430605752756497e-06, "loss": 0.5907, "step": 16244 }, { "epoch": 53.26229508196721, "grad_norm": 8.314291000366211, "learning_rate": 9.429545591749251e-06, "loss": 0.5023, "step": 16245 }, { "epoch": 53.265573770491805, "grad_norm": 4.80697774887085, "learning_rate": 9.428485437174472e-06, "loss": 0.4778, "step": 16246 }, { "epoch": 53.268852459016394, "grad_norm": 6.02160120010376, "learning_rate": 9.427425289044114e-06, "loss": 0.5763, "step": 16247 }, { "epoch": 53.27213114754098, "grad_norm": 7.2846479415893555, "learning_rate": 9.426365147370124e-06, "loss": 0.6788, "step": 16248 }, { "epoch": 53.27540983606557, "grad_norm": 7.829866886138916, "learning_rate": 9.42530501216447e-06, "loss": 0.4065, "step": 16249 }, { "epoch": 53.278688524590166, "grad_norm": 5.776814937591553, "learning_rate": 9.4242448834391e-06, "loss": 0.5132, "step": 16250 }, { "epoch": 53.281967213114754, "grad_norm": 6.280598163604736, "learning_rate": 9.423184761205966e-06, "loss": 0.6653, "step": 16251 }, { "epoch": 53.28524590163934, "grad_norm": 7.330111503601074, "learning_rate": 9.42212464547702e-06, "loss": 0.5802, "step": 16252 }, { "epoch": 53.28852459016394, "grad_norm": 9.649398803710938, "learning_rate": 9.421064536264225e-06, "loss": 0.511, "step": 16253 }, { "epoch": 53.291803278688526, "grad_norm": 4.995138645172119, "learning_rate": 9.420004433579529e-06, "loss": 0.6118, "step": 16254 }, { "epoch": 53.295081967213115, "grad_norm": 8.522012710571289, "learning_rate": 9.418944337434884e-06, "loss": 0.3228, "step": 16255 }, { "epoch": 53.2983606557377, "grad_norm": 8.458906173706055, "learning_rate": 9.417884247842245e-06, "loss": 0.4546, "step": 16256 }, { "epoch": 53.3016393442623, "grad_norm": 4.858337879180908, "learning_rate": 9.416824164813567e-06, "loss": 0.5786, "step": 16257 }, { "epoch": 53.30491803278689, "grad_norm": 5.5730814933776855, "learning_rate": 9.415764088360804e-06, "loss": 0.7449, "step": 16258 }, { "epoch": 53.308196721311475, "grad_norm": 5.977020263671875, "learning_rate": 9.414704018495905e-06, "loss": 0.8856, "step": 16259 }, { "epoch": 53.31147540983606, "grad_norm": 5.540119647979736, "learning_rate": 9.413643955230832e-06, "loss": 0.5626, "step": 16260 }, { "epoch": 53.31475409836066, "grad_norm": 5.606637954711914, "learning_rate": 9.412583898577527e-06, "loss": 0.694, "step": 16261 }, { "epoch": 53.31803278688525, "grad_norm": 4.543483257293701, "learning_rate": 9.411523848547955e-06, "loss": 0.634, "step": 16262 }, { "epoch": 53.321311475409836, "grad_norm": 6.1379570960998535, "learning_rate": 9.410463805154059e-06, "loss": 0.3966, "step": 16263 }, { "epoch": 53.324590163934424, "grad_norm": 6.9147868156433105, "learning_rate": 9.4094037684078e-06, "loss": 0.4677, "step": 16264 }, { "epoch": 53.32786885245902, "grad_norm": 9.082742691040039, "learning_rate": 9.408343738321126e-06, "loss": 0.6531, "step": 16265 }, { "epoch": 53.33114754098361, "grad_norm": 9.089875221252441, "learning_rate": 9.40728371490599e-06, "loss": 0.6496, "step": 16266 }, { "epoch": 53.334426229508196, "grad_norm": 5.226529121398926, "learning_rate": 9.40622369817435e-06, "loss": 0.6216, "step": 16267 }, { "epoch": 53.337704918032784, "grad_norm": 5.127921104431152, "learning_rate": 9.405163688138153e-06, "loss": 0.5363, "step": 16268 }, { "epoch": 53.34098360655738, "grad_norm": 6.643636226654053, "learning_rate": 9.404103684809357e-06, "loss": 0.6226, "step": 16269 }, { "epoch": 53.34426229508197, "grad_norm": 5.109539031982422, "learning_rate": 9.403043688199905e-06, "loss": 0.613, "step": 16270 }, { "epoch": 53.34754098360656, "grad_norm": 4.672423839569092, "learning_rate": 9.401983698321759e-06, "loss": 0.3241, "step": 16271 }, { "epoch": 53.350819672131145, "grad_norm": 5.971572399139404, "learning_rate": 9.400923715186871e-06, "loss": 0.6933, "step": 16272 }, { "epoch": 53.35409836065574, "grad_norm": 5.5970234870910645, "learning_rate": 9.399863738807192e-06, "loss": 0.5747, "step": 16273 }, { "epoch": 53.35737704918033, "grad_norm": 5.14905309677124, "learning_rate": 9.398803769194667e-06, "loss": 0.4933, "step": 16274 }, { "epoch": 53.36065573770492, "grad_norm": 5.930868625640869, "learning_rate": 9.397743806361258e-06, "loss": 0.5624, "step": 16275 }, { "epoch": 53.363934426229505, "grad_norm": 4.595748424530029, "learning_rate": 9.396683850318916e-06, "loss": 0.501, "step": 16276 }, { "epoch": 53.3672131147541, "grad_norm": 7.80321741104126, "learning_rate": 9.39562390107959e-06, "loss": 0.6326, "step": 16277 }, { "epoch": 53.37049180327869, "grad_norm": 6.764552116394043, "learning_rate": 9.394563958655229e-06, "loss": 0.6798, "step": 16278 }, { "epoch": 53.37377049180328, "grad_norm": 4.497920513153076, "learning_rate": 9.393504023057792e-06, "loss": 0.5079, "step": 16279 }, { "epoch": 53.377049180327866, "grad_norm": 5.473724842071533, "learning_rate": 9.39244409429923e-06, "loss": 0.6035, "step": 16280 }, { "epoch": 53.38032786885246, "grad_norm": 5.238735675811768, "learning_rate": 9.39138417239149e-06, "loss": 0.5318, "step": 16281 }, { "epoch": 53.38360655737705, "grad_norm": 5.5229058265686035, "learning_rate": 9.390324257346527e-06, "loss": 0.7541, "step": 16282 }, { "epoch": 53.38688524590164, "grad_norm": 5.720398902893066, "learning_rate": 9.389264349176288e-06, "loss": 0.5342, "step": 16283 }, { "epoch": 53.390163934426226, "grad_norm": 7.111474990844727, "learning_rate": 9.388204447892732e-06, "loss": 0.5376, "step": 16284 }, { "epoch": 53.39344262295082, "grad_norm": 5.079817771911621, "learning_rate": 9.387144553507807e-06, "loss": 0.4611, "step": 16285 }, { "epoch": 53.39672131147541, "grad_norm": 6.104548454284668, "learning_rate": 9.386084666033464e-06, "loss": 0.4682, "step": 16286 }, { "epoch": 53.4, "grad_norm": 5.426632881164551, "learning_rate": 9.385024785481653e-06, "loss": 0.6101, "step": 16287 }, { "epoch": 53.40327868852459, "grad_norm": 5.188615322113037, "learning_rate": 9.38396491186433e-06, "loss": 0.5326, "step": 16288 }, { "epoch": 53.40655737704918, "grad_norm": 4.930406093597412, "learning_rate": 9.382905045193441e-06, "loss": 0.4211, "step": 16289 }, { "epoch": 53.40983606557377, "grad_norm": 7.181222438812256, "learning_rate": 9.38184518548094e-06, "loss": 0.7601, "step": 16290 }, { "epoch": 53.41311475409836, "grad_norm": 6.6281232833862305, "learning_rate": 9.380785332738776e-06, "loss": 0.6821, "step": 16291 }, { "epoch": 53.41639344262295, "grad_norm": 7.411776065826416, "learning_rate": 9.379725486978902e-06, "loss": 0.6772, "step": 16292 }, { "epoch": 53.41967213114754, "grad_norm": 7.535318374633789, "learning_rate": 9.37866564821327e-06, "loss": 0.4574, "step": 16293 }, { "epoch": 53.42295081967213, "grad_norm": 6.593757629394531, "learning_rate": 9.377605816453828e-06, "loss": 0.3818, "step": 16294 }, { "epoch": 53.42622950819672, "grad_norm": 5.750981330871582, "learning_rate": 9.376545991712528e-06, "loss": 0.3635, "step": 16295 }, { "epoch": 53.429508196721315, "grad_norm": 5.640390396118164, "learning_rate": 9.375486174001317e-06, "loss": 0.3603, "step": 16296 }, { "epoch": 53.4327868852459, "grad_norm": 5.086368083953857, "learning_rate": 9.37442636333215e-06, "loss": 0.4411, "step": 16297 }, { "epoch": 53.43606557377049, "grad_norm": 7.936099529266357, "learning_rate": 9.373366559716979e-06, "loss": 0.5254, "step": 16298 }, { "epoch": 53.43934426229508, "grad_norm": 6.4702911376953125, "learning_rate": 9.37230676316775e-06, "loss": 0.5644, "step": 16299 }, { "epoch": 53.442622950819676, "grad_norm": 6.906724452972412, "learning_rate": 9.371246973696411e-06, "loss": 0.6055, "step": 16300 }, { "epoch": 53.445901639344264, "grad_norm": 5.768744468688965, "learning_rate": 9.37018719131492e-06, "loss": 0.4345, "step": 16301 }, { "epoch": 53.44918032786885, "grad_norm": 6.221554756164551, "learning_rate": 9.369127416035225e-06, "loss": 0.3374, "step": 16302 }, { "epoch": 53.45245901639344, "grad_norm": 4.855294227600098, "learning_rate": 9.368067647869273e-06, "loss": 0.6191, "step": 16303 }, { "epoch": 53.455737704918036, "grad_norm": 5.963212013244629, "learning_rate": 9.367007886829011e-06, "loss": 0.5895, "step": 16304 }, { "epoch": 53.459016393442624, "grad_norm": 5.492661476135254, "learning_rate": 9.365948132926397e-06, "loss": 0.4735, "step": 16305 }, { "epoch": 53.46229508196721, "grad_norm": 5.0735650062561035, "learning_rate": 9.364888386173379e-06, "loss": 0.5062, "step": 16306 }, { "epoch": 53.4655737704918, "grad_norm": 6.250614643096924, "learning_rate": 9.363828646581902e-06, "loss": 0.5891, "step": 16307 }, { "epoch": 53.4688524590164, "grad_norm": 7.374943256378174, "learning_rate": 9.36276891416392e-06, "loss": 0.7366, "step": 16308 }, { "epoch": 53.472131147540985, "grad_norm": 10.13064193725586, "learning_rate": 9.361709188931378e-06, "loss": 0.619, "step": 16309 }, { "epoch": 53.47540983606557, "grad_norm": 12.209515571594238, "learning_rate": 9.360649470896231e-06, "loss": 0.6903, "step": 16310 }, { "epoch": 53.47868852459016, "grad_norm": 27.901487350463867, "learning_rate": 9.359589760070427e-06, "loss": 0.5217, "step": 16311 }, { "epoch": 53.48196721311476, "grad_norm": 12.253768920898438, "learning_rate": 9.358530056465912e-06, "loss": 0.624, "step": 16312 }, { "epoch": 53.485245901639345, "grad_norm": 8.247638702392578, "learning_rate": 9.357470360094637e-06, "loss": 0.3651, "step": 16313 }, { "epoch": 53.488524590163934, "grad_norm": 6.316215991973877, "learning_rate": 9.356410670968555e-06, "loss": 0.4594, "step": 16314 }, { "epoch": 53.49180327868852, "grad_norm": 6.058167457580566, "learning_rate": 9.355350989099607e-06, "loss": 0.4694, "step": 16315 }, { "epoch": 53.49508196721312, "grad_norm": 5.641458034515381, "learning_rate": 9.354291314499752e-06, "loss": 0.6017, "step": 16316 }, { "epoch": 53.498360655737706, "grad_norm": 5.238840103149414, "learning_rate": 9.353231647180931e-06, "loss": 0.692, "step": 16317 }, { "epoch": 53.501639344262294, "grad_norm": 6.652454853057861, "learning_rate": 9.352171987155094e-06, "loss": 0.4961, "step": 16318 }, { "epoch": 53.50491803278688, "grad_norm": 5.238038063049316, "learning_rate": 9.351112334434195e-06, "loss": 0.801, "step": 16319 }, { "epoch": 53.50819672131148, "grad_norm": 38.186702728271484, "learning_rate": 9.350052689030178e-06, "loss": 0.6481, "step": 16320 }, { "epoch": 53.511475409836066, "grad_norm": 11.978151321411133, "learning_rate": 9.348993050954996e-06, "loss": 0.4401, "step": 16321 }, { "epoch": 53.514754098360655, "grad_norm": 8.085856437683105, "learning_rate": 9.347933420220586e-06, "loss": 0.6892, "step": 16322 }, { "epoch": 53.51803278688524, "grad_norm": 6.829607009887695, "learning_rate": 9.346873796838911e-06, "loss": 0.582, "step": 16323 }, { "epoch": 53.52131147540984, "grad_norm": 6.4060444831848145, "learning_rate": 9.345814180821913e-06, "loss": 0.4449, "step": 16324 }, { "epoch": 53.52459016393443, "grad_norm": 5.624240875244141, "learning_rate": 9.344754572181538e-06, "loss": 0.6032, "step": 16325 }, { "epoch": 53.527868852459015, "grad_norm": 5.884087085723877, "learning_rate": 9.343694970929736e-06, "loss": 0.6275, "step": 16326 }, { "epoch": 53.5311475409836, "grad_norm": 5.7278242111206055, "learning_rate": 9.342635377078456e-06, "loss": 0.7169, "step": 16327 }, { "epoch": 53.5344262295082, "grad_norm": 6.999364852905273, "learning_rate": 9.341575790639649e-06, "loss": 0.5581, "step": 16328 }, { "epoch": 53.53770491803279, "grad_norm": 5.597329616546631, "learning_rate": 9.340516211625258e-06, "loss": 0.4401, "step": 16329 }, { "epoch": 53.540983606557376, "grad_norm": 7.229821681976318, "learning_rate": 9.339456640047227e-06, "loss": 0.7705, "step": 16330 }, { "epoch": 53.544262295081964, "grad_norm": 5.204751968383789, "learning_rate": 9.338397075917515e-06, "loss": 0.2304, "step": 16331 }, { "epoch": 53.54754098360656, "grad_norm": 12.523114204406738, "learning_rate": 9.337337519248064e-06, "loss": 0.409, "step": 16332 }, { "epoch": 53.55081967213115, "grad_norm": 6.509654521942139, "learning_rate": 9.336277970050821e-06, "loss": 0.5034, "step": 16333 }, { "epoch": 53.554098360655736, "grad_norm": 5.983335018157959, "learning_rate": 9.335218428337735e-06, "loss": 0.4026, "step": 16334 }, { "epoch": 53.557377049180324, "grad_norm": 5.775701522827148, "learning_rate": 9.334158894120747e-06, "loss": 0.4321, "step": 16335 }, { "epoch": 53.56065573770492, "grad_norm": 5.640870094299316, "learning_rate": 9.333099367411813e-06, "loss": 0.69, "step": 16336 }, { "epoch": 53.56393442622951, "grad_norm": 6.576274394989014, "learning_rate": 9.332039848222878e-06, "loss": 0.7215, "step": 16337 }, { "epoch": 53.5672131147541, "grad_norm": 7.055671215057373, "learning_rate": 9.330980336565887e-06, "loss": 0.5812, "step": 16338 }, { "epoch": 53.570491803278685, "grad_norm": 5.811868667602539, "learning_rate": 9.329920832452786e-06, "loss": 0.4732, "step": 16339 }, { "epoch": 53.57377049180328, "grad_norm": 7.263028621673584, "learning_rate": 9.32886133589553e-06, "loss": 0.5584, "step": 16340 }, { "epoch": 53.57704918032787, "grad_norm": 6.730127334594727, "learning_rate": 9.327801846906055e-06, "loss": 0.509, "step": 16341 }, { "epoch": 53.58032786885246, "grad_norm": 7.41069221496582, "learning_rate": 9.326742365496316e-06, "loss": 0.4543, "step": 16342 }, { "epoch": 53.58360655737705, "grad_norm": 5.941397190093994, "learning_rate": 9.325682891678257e-06, "loss": 0.5724, "step": 16343 }, { "epoch": 53.58688524590164, "grad_norm": 5.7775115966796875, "learning_rate": 9.324623425463823e-06, "loss": 0.6671, "step": 16344 }, { "epoch": 53.59016393442623, "grad_norm": 6.257525444030762, "learning_rate": 9.323563966864962e-06, "loss": 0.7693, "step": 16345 }, { "epoch": 53.59344262295082, "grad_norm": 7.856921672821045, "learning_rate": 9.322504515893623e-06, "loss": 0.6156, "step": 16346 }, { "epoch": 53.59672131147541, "grad_norm": 8.991279602050781, "learning_rate": 9.321445072561748e-06, "loss": 0.3667, "step": 16347 }, { "epoch": 53.6, "grad_norm": 4.9564208984375, "learning_rate": 9.320385636881283e-06, "loss": 0.708, "step": 16348 }, { "epoch": 53.60327868852459, "grad_norm": 5.573885440826416, "learning_rate": 9.31932620886418e-06, "loss": 0.5715, "step": 16349 }, { "epoch": 53.60655737704918, "grad_norm": 5.6518330574035645, "learning_rate": 9.318266788522382e-06, "loss": 0.7371, "step": 16350 }, { "epoch": 53.609836065573774, "grad_norm": 5.6346659660339355, "learning_rate": 9.317207375867835e-06, "loss": 0.4742, "step": 16351 }, { "epoch": 53.61311475409836, "grad_norm": 6.995874404907227, "learning_rate": 9.31614797091248e-06, "loss": 0.6208, "step": 16352 }, { "epoch": 53.61639344262295, "grad_norm": 5.6337971687316895, "learning_rate": 9.315088573668273e-06, "loss": 0.5888, "step": 16353 }, { "epoch": 53.61967213114754, "grad_norm": 5.759492874145508, "learning_rate": 9.314029184147153e-06, "loss": 0.3794, "step": 16354 }, { "epoch": 53.622950819672134, "grad_norm": 4.726874828338623, "learning_rate": 9.312969802361069e-06, "loss": 0.5135, "step": 16355 }, { "epoch": 53.62622950819672, "grad_norm": 5.493017196655273, "learning_rate": 9.311910428321958e-06, "loss": 0.6143, "step": 16356 }, { "epoch": 53.62950819672131, "grad_norm": 6.895394802093506, "learning_rate": 9.310851062041779e-06, "loss": 0.7323, "step": 16357 }, { "epoch": 53.6327868852459, "grad_norm": 8.833252906799316, "learning_rate": 9.30979170353247e-06, "loss": 0.9334, "step": 16358 }, { "epoch": 53.636065573770495, "grad_norm": 6.2644524574279785, "learning_rate": 9.308732352805976e-06, "loss": 0.6437, "step": 16359 }, { "epoch": 53.63934426229508, "grad_norm": 7.534945964813232, "learning_rate": 9.307673009874244e-06, "loss": 0.4216, "step": 16360 }, { "epoch": 53.64262295081967, "grad_norm": 5.467163562774658, "learning_rate": 9.306613674749216e-06, "loss": 0.5081, "step": 16361 }, { "epoch": 53.64590163934426, "grad_norm": 5.777050495147705, "learning_rate": 9.305554347442842e-06, "loss": 0.8235, "step": 16362 }, { "epoch": 53.649180327868855, "grad_norm": 5.608716011047363, "learning_rate": 9.304495027967066e-06, "loss": 0.5082, "step": 16363 }, { "epoch": 53.65245901639344, "grad_norm": 6.7973175048828125, "learning_rate": 9.303435716333831e-06, "loss": 0.4307, "step": 16364 }, { "epoch": 53.65573770491803, "grad_norm": 8.63534164428711, "learning_rate": 9.302376412555078e-06, "loss": 0.4665, "step": 16365 }, { "epoch": 53.65901639344262, "grad_norm": 6.327050685882568, "learning_rate": 9.301317116642763e-06, "loss": 0.4487, "step": 16366 }, { "epoch": 53.662295081967216, "grad_norm": 7.048511505126953, "learning_rate": 9.300257828608822e-06, "loss": 0.3921, "step": 16367 }, { "epoch": 53.665573770491804, "grad_norm": 6.271712303161621, "learning_rate": 9.299198548465199e-06, "loss": 0.5459, "step": 16368 }, { "epoch": 53.66885245901639, "grad_norm": 5.018996238708496, "learning_rate": 9.298139276223841e-06, "loss": 0.7298, "step": 16369 }, { "epoch": 53.67213114754098, "grad_norm": 8.19443130493164, "learning_rate": 9.297080011896696e-06, "loss": 0.5777, "step": 16370 }, { "epoch": 53.675409836065576, "grad_norm": 8.099336624145508, "learning_rate": 9.296020755495701e-06, "loss": 0.4788, "step": 16371 }, { "epoch": 53.678688524590164, "grad_norm": 5.503259181976318, "learning_rate": 9.294961507032807e-06, "loss": 0.4303, "step": 16372 }, { "epoch": 53.68196721311475, "grad_norm": 16.731918334960938, "learning_rate": 9.293902266519955e-06, "loss": 0.4258, "step": 16373 }, { "epoch": 53.68524590163934, "grad_norm": 7.44286584854126, "learning_rate": 9.292843033969085e-06, "loss": 0.6671, "step": 16374 }, { "epoch": 53.68852459016394, "grad_norm": 6.152052402496338, "learning_rate": 9.291783809392148e-06, "loss": 0.5195, "step": 16375 }, { "epoch": 53.691803278688525, "grad_norm": 5.0855889320373535, "learning_rate": 9.290724592801087e-06, "loss": 0.649, "step": 16376 }, { "epoch": 53.69508196721311, "grad_norm": 6.237380027770996, "learning_rate": 9.289665384207842e-06, "loss": 0.2864, "step": 16377 }, { "epoch": 53.6983606557377, "grad_norm": 7.478559970855713, "learning_rate": 9.288606183624355e-06, "loss": 0.7784, "step": 16378 }, { "epoch": 53.7016393442623, "grad_norm": 7.1169753074646, "learning_rate": 9.287546991062577e-06, "loss": 0.5544, "step": 16379 }, { "epoch": 53.704918032786885, "grad_norm": 5.586382865905762, "learning_rate": 9.286487806534446e-06, "loss": 0.4787, "step": 16380 }, { "epoch": 53.708196721311474, "grad_norm": 5.511780738830566, "learning_rate": 9.28542863005191e-06, "loss": 0.6207, "step": 16381 }, { "epoch": 53.71147540983607, "grad_norm": 5.299402713775635, "learning_rate": 9.284369461626902e-06, "loss": 0.6041, "step": 16382 }, { "epoch": 53.71475409836066, "grad_norm": 6.404176712036133, "learning_rate": 9.283310301271378e-06, "loss": 0.5485, "step": 16383 }, { "epoch": 53.718032786885246, "grad_norm": 4.826950550079346, "learning_rate": 9.282251148997275e-06, "loss": 0.8039, "step": 16384 }, { "epoch": 53.721311475409834, "grad_norm": 5.672093391418457, "learning_rate": 9.281192004816538e-06, "loss": 0.3623, "step": 16385 }, { "epoch": 53.72459016393443, "grad_norm": 7.870621681213379, "learning_rate": 9.280132868741106e-06, "loss": 0.8895, "step": 16386 }, { "epoch": 53.72786885245902, "grad_norm": 5.819435119628906, "learning_rate": 9.279073740782922e-06, "loss": 0.5833, "step": 16387 }, { "epoch": 53.731147540983606, "grad_norm": 6.267920970916748, "learning_rate": 9.278014620953934e-06, "loss": 0.653, "step": 16388 }, { "epoch": 53.734426229508195, "grad_norm": 5.730108737945557, "learning_rate": 9.276955509266084e-06, "loss": 0.6729, "step": 16389 }, { "epoch": 53.73770491803279, "grad_norm": 7.089193820953369, "learning_rate": 9.27589640573131e-06, "loss": 0.5745, "step": 16390 }, { "epoch": 53.74098360655738, "grad_norm": 12.142077445983887, "learning_rate": 9.274837310361555e-06, "loss": 0.4975, "step": 16391 }, { "epoch": 53.74426229508197, "grad_norm": 5.833502292633057, "learning_rate": 9.273778223168766e-06, "loss": 0.6203, "step": 16392 }, { "epoch": 53.747540983606555, "grad_norm": 7.01474142074585, "learning_rate": 9.272719144164883e-06, "loss": 0.4973, "step": 16393 }, { "epoch": 53.75081967213115, "grad_norm": 6.193288326263428, "learning_rate": 9.271660073361844e-06, "loss": 0.4645, "step": 16394 }, { "epoch": 53.75409836065574, "grad_norm": 9.379762649536133, "learning_rate": 9.270601010771598e-06, "loss": 0.6089, "step": 16395 }, { "epoch": 53.75737704918033, "grad_norm": 4.708430767059326, "learning_rate": 9.269541956406084e-06, "loss": 0.6496, "step": 16396 }, { "epoch": 53.760655737704916, "grad_norm": 6.2575297355651855, "learning_rate": 9.268482910277242e-06, "loss": 0.3846, "step": 16397 }, { "epoch": 53.76393442622951, "grad_norm": 5.198685169219971, "learning_rate": 9.267423872397019e-06, "loss": 0.4618, "step": 16398 }, { "epoch": 53.7672131147541, "grad_norm": 6.321434020996094, "learning_rate": 9.266364842777352e-06, "loss": 0.4221, "step": 16399 }, { "epoch": 53.77049180327869, "grad_norm": 6.15485143661499, "learning_rate": 9.265305821430182e-06, "loss": 0.5452, "step": 16400 }, { "epoch": 53.773770491803276, "grad_norm": 5.91018533706665, "learning_rate": 9.264246808367454e-06, "loss": 0.4061, "step": 16401 }, { "epoch": 53.77704918032787, "grad_norm": 6.585245609283447, "learning_rate": 9.263187803601112e-06, "loss": 0.5572, "step": 16402 }, { "epoch": 53.78032786885246, "grad_norm": 5.1890997886657715, "learning_rate": 9.262128807143092e-06, "loss": 0.4684, "step": 16403 }, { "epoch": 53.78360655737705, "grad_norm": 5.314553737640381, "learning_rate": 9.26106981900533e-06, "loss": 0.5543, "step": 16404 }, { "epoch": 53.78688524590164, "grad_norm": 5.371603488922119, "learning_rate": 9.260010839199782e-06, "loss": 0.8994, "step": 16405 }, { "epoch": 53.79016393442623, "grad_norm": 7.25354528427124, "learning_rate": 9.25895186773838e-06, "loss": 0.4767, "step": 16406 }, { "epoch": 53.79344262295082, "grad_norm": 8.836186408996582, "learning_rate": 9.257892904633066e-06, "loss": 0.6475, "step": 16407 }, { "epoch": 53.79672131147541, "grad_norm": 5.371884822845459, "learning_rate": 9.256833949895776e-06, "loss": 0.7037, "step": 16408 }, { "epoch": 53.8, "grad_norm": 6.666891574859619, "learning_rate": 9.255775003538462e-06, "loss": 0.4089, "step": 16409 }, { "epoch": 53.80327868852459, "grad_norm": 4.971498489379883, "learning_rate": 9.254716065573057e-06, "loss": 0.5511, "step": 16410 }, { "epoch": 53.80655737704918, "grad_norm": 6.032467365264893, "learning_rate": 9.253657136011504e-06, "loss": 0.7141, "step": 16411 }, { "epoch": 53.80983606557377, "grad_norm": 5.917241096496582, "learning_rate": 9.252598214865743e-06, "loss": 0.5703, "step": 16412 }, { "epoch": 53.81311475409836, "grad_norm": 5.981856822967529, "learning_rate": 9.251539302147709e-06, "loss": 0.6328, "step": 16413 }, { "epoch": 53.81639344262295, "grad_norm": 9.456737518310547, "learning_rate": 9.250480397869354e-06, "loss": 0.6385, "step": 16414 }, { "epoch": 53.81967213114754, "grad_norm": 5.850083827972412, "learning_rate": 9.249421502042608e-06, "loss": 0.6256, "step": 16415 }, { "epoch": 53.82295081967213, "grad_norm": 9.574448585510254, "learning_rate": 9.24836261467942e-06, "loss": 0.6553, "step": 16416 }, { "epoch": 53.82622950819672, "grad_norm": 8.467619895935059, "learning_rate": 9.247303735791718e-06, "loss": 0.5618, "step": 16417 }, { "epoch": 53.829508196721314, "grad_norm": 6.302248001098633, "learning_rate": 9.246244865391453e-06, "loss": 0.4766, "step": 16418 }, { "epoch": 53.8327868852459, "grad_norm": 17.073089599609375, "learning_rate": 9.245186003490561e-06, "loss": 0.3847, "step": 16419 }, { "epoch": 53.83606557377049, "grad_norm": 6.031689167022705, "learning_rate": 9.24412715010098e-06, "loss": 0.5831, "step": 16420 }, { "epoch": 53.83934426229508, "grad_norm": 11.23121452331543, "learning_rate": 9.24306830523465e-06, "loss": 0.5783, "step": 16421 }, { "epoch": 53.842622950819674, "grad_norm": 5.887843132019043, "learning_rate": 9.242009468903516e-06, "loss": 0.5275, "step": 16422 }, { "epoch": 53.84590163934426, "grad_norm": 6.065671443939209, "learning_rate": 9.240950641119509e-06, "loss": 0.4726, "step": 16423 }, { "epoch": 53.84918032786885, "grad_norm": 5.306525707244873, "learning_rate": 9.239891821894576e-06, "loss": 0.3382, "step": 16424 }, { "epoch": 53.85245901639344, "grad_norm": 6.432379245758057, "learning_rate": 9.238833011240653e-06, "loss": 0.5126, "step": 16425 }, { "epoch": 53.855737704918035, "grad_norm": 5.671448230743408, "learning_rate": 9.237774209169677e-06, "loss": 0.564, "step": 16426 }, { "epoch": 53.85901639344262, "grad_norm": 7.430566787719727, "learning_rate": 9.23671541569359e-06, "loss": 0.7734, "step": 16427 }, { "epoch": 53.86229508196721, "grad_norm": 6.13045597076416, "learning_rate": 9.235656630824332e-06, "loss": 0.4532, "step": 16428 }, { "epoch": 53.86557377049181, "grad_norm": 7.276266574859619, "learning_rate": 9.23459785457384e-06, "loss": 0.418, "step": 16429 }, { "epoch": 53.868852459016395, "grad_norm": 5.280250549316406, "learning_rate": 9.233539086954048e-06, "loss": 0.5449, "step": 16430 }, { "epoch": 53.87213114754098, "grad_norm": 7.26027250289917, "learning_rate": 9.232480327976906e-06, "loss": 0.4929, "step": 16431 }, { "epoch": 53.87540983606557, "grad_norm": 7.470063209533691, "learning_rate": 9.231421577654344e-06, "loss": 0.4626, "step": 16432 }, { "epoch": 53.87868852459017, "grad_norm": 5.82319974899292, "learning_rate": 9.230362835998305e-06, "loss": 0.6329, "step": 16433 }, { "epoch": 53.881967213114756, "grad_norm": 6.165448188781738, "learning_rate": 9.22930410302072e-06, "loss": 0.3632, "step": 16434 }, { "epoch": 53.885245901639344, "grad_norm": 6.457863807678223, "learning_rate": 9.228245378733537e-06, "loss": 0.515, "step": 16435 }, { "epoch": 53.88852459016393, "grad_norm": 5.963606357574463, "learning_rate": 9.22718666314869e-06, "loss": 0.4171, "step": 16436 }, { "epoch": 53.89180327868853, "grad_norm": 6.711272716522217, "learning_rate": 9.226127956278115e-06, "loss": 0.5424, "step": 16437 }, { "epoch": 53.895081967213116, "grad_norm": 7.7312235832214355, "learning_rate": 9.225069258133754e-06, "loss": 0.4232, "step": 16438 }, { "epoch": 53.898360655737704, "grad_norm": 4.762138366699219, "learning_rate": 9.224010568727539e-06, "loss": 0.6624, "step": 16439 }, { "epoch": 53.90163934426229, "grad_norm": 6.8840813636779785, "learning_rate": 9.222951888071415e-06, "loss": 0.4104, "step": 16440 }, { "epoch": 53.90491803278689, "grad_norm": 6.640676498413086, "learning_rate": 9.221893216177316e-06, "loss": 0.4822, "step": 16441 }, { "epoch": 53.90819672131148, "grad_norm": 5.090117454528809, "learning_rate": 9.220834553057179e-06, "loss": 0.7383, "step": 16442 }, { "epoch": 53.911475409836065, "grad_norm": 5.126798152923584, "learning_rate": 9.21977589872294e-06, "loss": 0.767, "step": 16443 }, { "epoch": 53.91475409836065, "grad_norm": 7.792845249176025, "learning_rate": 9.218717253186544e-06, "loss": 0.6309, "step": 16444 }, { "epoch": 53.91803278688525, "grad_norm": 5.399314880371094, "learning_rate": 9.217658616459922e-06, "loss": 0.4357, "step": 16445 }, { "epoch": 53.92131147540984, "grad_norm": 6.966377258300781, "learning_rate": 9.216599988555012e-06, "loss": 0.6399, "step": 16446 }, { "epoch": 53.924590163934425, "grad_norm": 6.344176769256592, "learning_rate": 9.215541369483748e-06, "loss": 0.5831, "step": 16447 }, { "epoch": 53.927868852459014, "grad_norm": 6.880161762237549, "learning_rate": 9.214482759258074e-06, "loss": 0.5854, "step": 16448 }, { "epoch": 53.93114754098361, "grad_norm": 8.630051612854004, "learning_rate": 9.213424157889926e-06, "loss": 0.6291, "step": 16449 }, { "epoch": 53.9344262295082, "grad_norm": 5.656536102294922, "learning_rate": 9.212365565391234e-06, "loss": 0.5424, "step": 16450 }, { "epoch": 53.937704918032786, "grad_norm": 6.358635902404785, "learning_rate": 9.211306981773943e-06, "loss": 0.4889, "step": 16451 }, { "epoch": 53.940983606557374, "grad_norm": 6.43220853805542, "learning_rate": 9.210248407049982e-06, "loss": 0.6436, "step": 16452 }, { "epoch": 53.94426229508197, "grad_norm": 5.908609390258789, "learning_rate": 9.209189841231293e-06, "loss": 0.6486, "step": 16453 }, { "epoch": 53.94754098360656, "grad_norm": 6.8449788093566895, "learning_rate": 9.208131284329811e-06, "loss": 0.442, "step": 16454 }, { "epoch": 53.950819672131146, "grad_norm": 7.528491973876953, "learning_rate": 9.207072736357475e-06, "loss": 0.6322, "step": 16455 }, { "epoch": 53.954098360655735, "grad_norm": 5.180127143859863, "learning_rate": 9.206014197326211e-06, "loss": 0.6405, "step": 16456 }, { "epoch": 53.95737704918033, "grad_norm": 6.557319164276123, "learning_rate": 9.204955667247969e-06, "loss": 0.561, "step": 16457 }, { "epoch": 53.96065573770492, "grad_norm": 8.210922241210938, "learning_rate": 9.203897146134678e-06, "loss": 0.3186, "step": 16458 }, { "epoch": 53.96393442622951, "grad_norm": 6.471127986907959, "learning_rate": 9.202838633998274e-06, "loss": 0.5802, "step": 16459 }, { "epoch": 53.967213114754095, "grad_norm": 5.648171901702881, "learning_rate": 9.201780130850689e-06, "loss": 0.6692, "step": 16460 }, { "epoch": 53.97049180327869, "grad_norm": 5.144619941711426, "learning_rate": 9.200721636703866e-06, "loss": 0.5331, "step": 16461 }, { "epoch": 53.97377049180328, "grad_norm": 10.041584968566895, "learning_rate": 9.19966315156974e-06, "loss": 0.4143, "step": 16462 }, { "epoch": 53.97704918032787, "grad_norm": 6.917643070220947, "learning_rate": 9.198604675460242e-06, "loss": 0.6315, "step": 16463 }, { "epoch": 53.980327868852456, "grad_norm": 5.166810989379883, "learning_rate": 9.197546208387312e-06, "loss": 0.4415, "step": 16464 }, { "epoch": 53.98360655737705, "grad_norm": 5.7429518699646, "learning_rate": 9.196487750362876e-06, "loss": 0.8618, "step": 16465 }, { "epoch": 53.98688524590164, "grad_norm": 5.474179744720459, "learning_rate": 9.195429301398881e-06, "loss": 0.7099, "step": 16466 }, { "epoch": 53.99016393442623, "grad_norm": 8.061277389526367, "learning_rate": 9.194370861507257e-06, "loss": 0.4741, "step": 16467 }, { "epoch": 53.993442622950816, "grad_norm": 5.863176345825195, "learning_rate": 9.193312430699942e-06, "loss": 0.6718, "step": 16468 }, { "epoch": 53.99672131147541, "grad_norm": 6.718182563781738, "learning_rate": 9.19225400898886e-06, "loss": 0.6228, "step": 16469 }, { "epoch": 54.0, "grad_norm": 5.802156925201416, "learning_rate": 9.19119559638596e-06, "loss": 0.5228, "step": 16470 }, { "epoch": 54.00327868852459, "grad_norm": 6.103662014007568, "learning_rate": 9.19013719290317e-06, "loss": 0.4476, "step": 16471 }, { "epoch": 54.006557377049184, "grad_norm": 6.314140796661377, "learning_rate": 9.189078798552425e-06, "loss": 0.563, "step": 16472 }, { "epoch": 54.00983606557377, "grad_norm": 7.865981578826904, "learning_rate": 9.188020413345657e-06, "loss": 0.5981, "step": 16473 }, { "epoch": 54.01311475409836, "grad_norm": 5.831554412841797, "learning_rate": 9.186962037294806e-06, "loss": 0.362, "step": 16474 }, { "epoch": 54.01639344262295, "grad_norm": 6.362646579742432, "learning_rate": 9.185903670411803e-06, "loss": 0.3689, "step": 16475 }, { "epoch": 54.019672131147544, "grad_norm": 6.827358245849609, "learning_rate": 9.184845312708581e-06, "loss": 0.4491, "step": 16476 }, { "epoch": 54.02295081967213, "grad_norm": 9.611931800842285, "learning_rate": 9.183786964197077e-06, "loss": 0.5797, "step": 16477 }, { "epoch": 54.02622950819672, "grad_norm": 6.716209888458252, "learning_rate": 9.182728624889223e-06, "loss": 0.3801, "step": 16478 }, { "epoch": 54.02950819672131, "grad_norm": 5.796951770782471, "learning_rate": 9.181670294796953e-06, "loss": 0.512, "step": 16479 }, { "epoch": 54.032786885245905, "grad_norm": 7.422697067260742, "learning_rate": 9.180611973932203e-06, "loss": 0.3319, "step": 16480 }, { "epoch": 54.03606557377049, "grad_norm": 6.617609024047852, "learning_rate": 9.179553662306905e-06, "loss": 0.6393, "step": 16481 }, { "epoch": 54.03934426229508, "grad_norm": 5.184098720550537, "learning_rate": 9.178495359932988e-06, "loss": 0.5125, "step": 16482 }, { "epoch": 54.04262295081967, "grad_norm": 5.221158981323242, "learning_rate": 9.177437066822396e-06, "loss": 0.5873, "step": 16483 }, { "epoch": 54.045901639344265, "grad_norm": 5.952685356140137, "learning_rate": 9.176378782987054e-06, "loss": 0.4036, "step": 16484 }, { "epoch": 54.049180327868854, "grad_norm": 5.7888503074646, "learning_rate": 9.175320508438899e-06, "loss": 0.4214, "step": 16485 }, { "epoch": 54.05245901639344, "grad_norm": 5.66182804107666, "learning_rate": 9.174262243189858e-06, "loss": 0.3892, "step": 16486 }, { "epoch": 54.05573770491803, "grad_norm": 5.686047077178955, "learning_rate": 9.173203987251873e-06, "loss": 0.5805, "step": 16487 }, { "epoch": 54.059016393442626, "grad_norm": 11.458773612976074, "learning_rate": 9.172145740636872e-06, "loss": 0.5546, "step": 16488 }, { "epoch": 54.062295081967214, "grad_norm": 5.21331787109375, "learning_rate": 9.17108750335679e-06, "loss": 0.4934, "step": 16489 }, { "epoch": 54.0655737704918, "grad_norm": 11.947866439819336, "learning_rate": 9.170029275423557e-06, "loss": 0.6972, "step": 16490 }, { "epoch": 54.06885245901639, "grad_norm": 6.206542015075684, "learning_rate": 9.168971056849105e-06, "loss": 0.5119, "step": 16491 }, { "epoch": 54.072131147540986, "grad_norm": 5.677396297454834, "learning_rate": 9.16791284764537e-06, "loss": 0.6778, "step": 16492 }, { "epoch": 54.075409836065575, "grad_norm": 5.182802677154541, "learning_rate": 9.166854647824284e-06, "loss": 0.218, "step": 16493 }, { "epoch": 54.07868852459016, "grad_norm": 5.748135089874268, "learning_rate": 9.165796457397778e-06, "loss": 0.5318, "step": 16494 }, { "epoch": 54.08196721311475, "grad_norm": 7.930154323577881, "learning_rate": 9.164738276377778e-06, "loss": 0.6333, "step": 16495 }, { "epoch": 54.08524590163935, "grad_norm": 8.70552921295166, "learning_rate": 9.16368010477623e-06, "loss": 0.4764, "step": 16496 }, { "epoch": 54.088524590163935, "grad_norm": 5.733948707580566, "learning_rate": 9.162621942605055e-06, "loss": 0.6104, "step": 16497 }, { "epoch": 54.09180327868852, "grad_norm": 6.1741838455200195, "learning_rate": 9.161563789876192e-06, "loss": 0.4751, "step": 16498 }, { "epoch": 54.09508196721311, "grad_norm": 6.175321578979492, "learning_rate": 9.160505646601562e-06, "loss": 0.3311, "step": 16499 }, { "epoch": 54.09836065573771, "grad_norm": 6.443478584289551, "learning_rate": 9.159447512793109e-06, "loss": 0.8196, "step": 16500 }, { "epoch": 54.101639344262296, "grad_norm": 5.303830623626709, "learning_rate": 9.158389388462759e-06, "loss": 0.5685, "step": 16501 }, { "epoch": 54.104918032786884, "grad_norm": 8.542132377624512, "learning_rate": 9.157331273622441e-06, "loss": 0.7069, "step": 16502 }, { "epoch": 54.10819672131147, "grad_norm": 6.397563457489014, "learning_rate": 9.156273168284091e-06, "loss": 0.4731, "step": 16503 }, { "epoch": 54.11147540983607, "grad_norm": 6.183116436004639, "learning_rate": 9.155215072459636e-06, "loss": 0.832, "step": 16504 }, { "epoch": 54.114754098360656, "grad_norm": 5.81862735748291, "learning_rate": 9.154156986161013e-06, "loss": 0.508, "step": 16505 }, { "epoch": 54.118032786885244, "grad_norm": 5.211629390716553, "learning_rate": 9.153098909400146e-06, "loss": 0.276, "step": 16506 }, { "epoch": 54.12131147540983, "grad_norm": 6.505779266357422, "learning_rate": 9.152040842188973e-06, "loss": 0.5134, "step": 16507 }, { "epoch": 54.12459016393443, "grad_norm": 6.546084403991699, "learning_rate": 9.150982784539419e-06, "loss": 0.5218, "step": 16508 }, { "epoch": 54.12786885245902, "grad_norm": 12.368691444396973, "learning_rate": 9.149924736463415e-06, "loss": 0.3883, "step": 16509 }, { "epoch": 54.131147540983605, "grad_norm": 7.7358903884887695, "learning_rate": 9.148866697972897e-06, "loss": 0.5906, "step": 16510 }, { "epoch": 54.13442622950819, "grad_norm": 5.068387985229492, "learning_rate": 9.147808669079791e-06, "loss": 0.4729, "step": 16511 }, { "epoch": 54.13770491803279, "grad_norm": 8.73631763458252, "learning_rate": 9.146750649796025e-06, "loss": 0.4831, "step": 16512 }, { "epoch": 54.14098360655738, "grad_norm": 6.356575012207031, "learning_rate": 9.145692640133536e-06, "loss": 0.6146, "step": 16513 }, { "epoch": 54.144262295081965, "grad_norm": 7.24301815032959, "learning_rate": 9.144634640104252e-06, "loss": 0.7541, "step": 16514 }, { "epoch": 54.14754098360656, "grad_norm": 5.170797348022461, "learning_rate": 9.143576649720101e-06, "loss": 0.5745, "step": 16515 }, { "epoch": 54.15081967213115, "grad_norm": 6.811440944671631, "learning_rate": 9.142518668993015e-06, "loss": 0.6028, "step": 16516 }, { "epoch": 54.15409836065574, "grad_norm": 6.06710147857666, "learning_rate": 9.141460697934916e-06, "loss": 0.6504, "step": 16517 }, { "epoch": 54.157377049180326, "grad_norm": 5.607608795166016, "learning_rate": 9.140402736557745e-06, "loss": 0.334, "step": 16518 }, { "epoch": 54.16065573770492, "grad_norm": 7.263942241668701, "learning_rate": 9.139344784873429e-06, "loss": 0.467, "step": 16519 }, { "epoch": 54.16393442622951, "grad_norm": 6.043437957763672, "learning_rate": 9.138286842893894e-06, "loss": 0.4197, "step": 16520 }, { "epoch": 54.1672131147541, "grad_norm": 5.778901100158691, "learning_rate": 9.137228910631065e-06, "loss": 0.6609, "step": 16521 }, { "epoch": 54.170491803278686, "grad_norm": 4.482123374938965, "learning_rate": 9.136170988096883e-06, "loss": 0.4072, "step": 16522 }, { "epoch": 54.17377049180328, "grad_norm": 23.983680725097656, "learning_rate": 9.135113075303271e-06, "loss": 0.4799, "step": 16523 }, { "epoch": 54.17704918032787, "grad_norm": 7.174365043640137, "learning_rate": 9.13405517226216e-06, "loss": 0.8146, "step": 16524 }, { "epoch": 54.18032786885246, "grad_norm": 5.774660587310791, "learning_rate": 9.13299727898547e-06, "loss": 0.4704, "step": 16525 }, { "epoch": 54.18360655737705, "grad_norm": 6.049591064453125, "learning_rate": 9.131939395485143e-06, "loss": 0.5023, "step": 16526 }, { "epoch": 54.18688524590164, "grad_norm": 6.8831868171691895, "learning_rate": 9.130881521773103e-06, "loss": 0.5115, "step": 16527 }, { "epoch": 54.19016393442623, "grad_norm": 5.281366348266602, "learning_rate": 9.129823657861276e-06, "loss": 0.6732, "step": 16528 }, { "epoch": 54.19344262295082, "grad_norm": 6.497047424316406, "learning_rate": 9.128765803761589e-06, "loss": 0.5587, "step": 16529 }, { "epoch": 54.19672131147541, "grad_norm": 8.561881065368652, "learning_rate": 9.127707959485975e-06, "loss": 0.4381, "step": 16530 }, { "epoch": 54.2, "grad_norm": 11.382436752319336, "learning_rate": 9.126650125046361e-06, "loss": 0.5296, "step": 16531 }, { "epoch": 54.20327868852459, "grad_norm": 6.381418704986572, "learning_rate": 9.125592300454675e-06, "loss": 0.5608, "step": 16532 }, { "epoch": 54.20655737704918, "grad_norm": 6.2053046226501465, "learning_rate": 9.124534485722846e-06, "loss": 0.6479, "step": 16533 }, { "epoch": 54.20983606557377, "grad_norm": 6.835563659667969, "learning_rate": 9.123476680862799e-06, "loss": 0.6297, "step": 16534 }, { "epoch": 54.21311475409836, "grad_norm": 5.841323375701904, "learning_rate": 9.122418885886464e-06, "loss": 0.4951, "step": 16535 }, { "epoch": 54.21639344262295, "grad_norm": 7.285590171813965, "learning_rate": 9.12136110080577e-06, "loss": 0.5429, "step": 16536 }, { "epoch": 54.21967213114754, "grad_norm": 5.17940616607666, "learning_rate": 9.120303325632643e-06, "loss": 0.5394, "step": 16537 }, { "epoch": 54.22295081967213, "grad_norm": 5.7179646492004395, "learning_rate": 9.119245560379007e-06, "loss": 0.4821, "step": 16538 }, { "epoch": 54.226229508196724, "grad_norm": 7.029606342315674, "learning_rate": 9.118187805056798e-06, "loss": 0.5237, "step": 16539 }, { "epoch": 54.22950819672131, "grad_norm": 5.1587605476379395, "learning_rate": 9.117130059677938e-06, "loss": 0.5729, "step": 16540 }, { "epoch": 54.2327868852459, "grad_norm": 6.250030517578125, "learning_rate": 9.116072324254354e-06, "loss": 0.3958, "step": 16541 }, { "epoch": 54.23606557377049, "grad_norm": 6.089869976043701, "learning_rate": 9.115014598797973e-06, "loss": 0.6104, "step": 16542 }, { "epoch": 54.239344262295084, "grad_norm": 9.810985565185547, "learning_rate": 9.113956883320719e-06, "loss": 0.5906, "step": 16543 }, { "epoch": 54.24262295081967, "grad_norm": 4.952756881713867, "learning_rate": 9.112899177834528e-06, "loss": 0.7571, "step": 16544 }, { "epoch": 54.24590163934426, "grad_norm": 5.2078704833984375, "learning_rate": 9.11184148235132e-06, "loss": 0.5175, "step": 16545 }, { "epoch": 54.24918032786885, "grad_norm": 6.004544258117676, "learning_rate": 9.110783796883021e-06, "loss": 0.4167, "step": 16546 }, { "epoch": 54.252459016393445, "grad_norm": 5.846768856048584, "learning_rate": 9.109726121441558e-06, "loss": 0.4664, "step": 16547 }, { "epoch": 54.25573770491803, "grad_norm": 5.4566779136657715, "learning_rate": 9.10866845603886e-06, "loss": 0.337, "step": 16548 }, { "epoch": 54.25901639344262, "grad_norm": 4.2705912590026855, "learning_rate": 9.107610800686855e-06, "loss": 0.3688, "step": 16549 }, { "epoch": 54.26229508196721, "grad_norm": 5.106936931610107, "learning_rate": 9.106553155397464e-06, "loss": 0.5219, "step": 16550 }, { "epoch": 54.265573770491805, "grad_norm": 6.565209865570068, "learning_rate": 9.105495520182612e-06, "loss": 0.5833, "step": 16551 }, { "epoch": 54.268852459016394, "grad_norm": 6.440389156341553, "learning_rate": 9.104437895054232e-06, "loss": 0.3987, "step": 16552 }, { "epoch": 54.27213114754098, "grad_norm": 17.15711212158203, "learning_rate": 9.103380280024244e-06, "loss": 0.3797, "step": 16553 }, { "epoch": 54.27540983606557, "grad_norm": 6.020164489746094, "learning_rate": 9.102322675104578e-06, "loss": 0.5138, "step": 16554 }, { "epoch": 54.278688524590166, "grad_norm": 5.521937370300293, "learning_rate": 9.101265080307153e-06, "loss": 0.6395, "step": 16555 }, { "epoch": 54.281967213114754, "grad_norm": 6.996729850769043, "learning_rate": 9.1002074956439e-06, "loss": 0.5093, "step": 16556 }, { "epoch": 54.28524590163934, "grad_norm": 13.633252143859863, "learning_rate": 9.099149921126746e-06, "loss": 0.4221, "step": 16557 }, { "epoch": 54.28852459016394, "grad_norm": 7.283973217010498, "learning_rate": 9.09809235676761e-06, "loss": 0.5304, "step": 16558 }, { "epoch": 54.291803278688526, "grad_norm": 6.401472091674805, "learning_rate": 9.097034802578421e-06, "loss": 0.6349, "step": 16559 }, { "epoch": 54.295081967213115, "grad_norm": 7.188736438751221, "learning_rate": 9.095977258571104e-06, "loss": 0.5136, "step": 16560 }, { "epoch": 54.2983606557377, "grad_norm": 6.605559349060059, "learning_rate": 9.094919724757582e-06, "loss": 0.7165, "step": 16561 }, { "epoch": 54.3016393442623, "grad_norm": 4.750585079193115, "learning_rate": 9.093862201149785e-06, "loss": 0.5162, "step": 16562 }, { "epoch": 54.30491803278689, "grad_norm": 5.854217529296875, "learning_rate": 9.092804687759633e-06, "loss": 0.7184, "step": 16563 }, { "epoch": 54.308196721311475, "grad_norm": 6.3313775062561035, "learning_rate": 9.091747184599045e-06, "loss": 0.3282, "step": 16564 }, { "epoch": 54.31147540983606, "grad_norm": 9.508696556091309, "learning_rate": 9.090689691679958e-06, "loss": 0.583, "step": 16565 }, { "epoch": 54.31475409836066, "grad_norm": 5.845407485961914, "learning_rate": 9.08963220901429e-06, "loss": 0.8146, "step": 16566 }, { "epoch": 54.31803278688525, "grad_norm": 5.331130027770996, "learning_rate": 9.088574736613965e-06, "loss": 0.3909, "step": 16567 }, { "epoch": 54.321311475409836, "grad_norm": 7.170238494873047, "learning_rate": 9.087517274490909e-06, "loss": 0.3716, "step": 16568 }, { "epoch": 54.324590163934424, "grad_norm": 5.874871730804443, "learning_rate": 9.086459822657038e-06, "loss": 0.5493, "step": 16569 }, { "epoch": 54.32786885245902, "grad_norm": 5.027512550354004, "learning_rate": 9.085402381124287e-06, "loss": 0.6594, "step": 16570 }, { "epoch": 54.33114754098361, "grad_norm": 5.754326820373535, "learning_rate": 9.084344949904576e-06, "loss": 0.6417, "step": 16571 }, { "epoch": 54.334426229508196, "grad_norm": 5.72603178024292, "learning_rate": 9.083287529009827e-06, "loss": 0.6911, "step": 16572 }, { "epoch": 54.337704918032784, "grad_norm": 5.725379943847656, "learning_rate": 9.082230118451962e-06, "loss": 0.6559, "step": 16573 }, { "epoch": 54.34098360655738, "grad_norm": 8.012691497802734, "learning_rate": 9.08117271824291e-06, "loss": 0.6074, "step": 16574 }, { "epoch": 54.34426229508197, "grad_norm": 4.773938179016113, "learning_rate": 9.080115328394588e-06, "loss": 0.4519, "step": 16575 }, { "epoch": 54.34754098360656, "grad_norm": 5.420166015625, "learning_rate": 9.079057948918925e-06, "loss": 0.6217, "step": 16576 }, { "epoch": 54.350819672131145, "grad_norm": 9.63947868347168, "learning_rate": 9.07800057982784e-06, "loss": 0.7697, "step": 16577 }, { "epoch": 54.35409836065574, "grad_norm": 5.934177875518799, "learning_rate": 9.076943221133254e-06, "loss": 0.4698, "step": 16578 }, { "epoch": 54.35737704918033, "grad_norm": 5.27864408493042, "learning_rate": 9.075885872847096e-06, "loss": 0.7342, "step": 16579 }, { "epoch": 54.36065573770492, "grad_norm": 7.633417129516602, "learning_rate": 9.074828534981286e-06, "loss": 0.614, "step": 16580 }, { "epoch": 54.363934426229505, "grad_norm": 5.863992214202881, "learning_rate": 9.073771207547746e-06, "loss": 0.4002, "step": 16581 }, { "epoch": 54.3672131147541, "grad_norm": 5.065723419189453, "learning_rate": 9.072713890558397e-06, "loss": 0.5192, "step": 16582 }, { "epoch": 54.37049180327869, "grad_norm": 5.9412713050842285, "learning_rate": 9.071656584025164e-06, "loss": 0.6346, "step": 16583 }, { "epoch": 54.37377049180328, "grad_norm": 6.4387311935424805, "learning_rate": 9.070599287959968e-06, "loss": 0.4558, "step": 16584 }, { "epoch": 54.377049180327866, "grad_norm": 5.299626350402832, "learning_rate": 9.069542002374733e-06, "loss": 0.4164, "step": 16585 }, { "epoch": 54.38032786885246, "grad_norm": 7.040130615234375, "learning_rate": 9.068484727281377e-06, "loss": 0.5108, "step": 16586 }, { "epoch": 54.38360655737705, "grad_norm": 14.33513355255127, "learning_rate": 9.067427462691827e-06, "loss": 0.6152, "step": 16587 }, { "epoch": 54.38688524590164, "grad_norm": 5.136730670928955, "learning_rate": 9.066370208617999e-06, "loss": 0.3969, "step": 16588 }, { "epoch": 54.390163934426226, "grad_norm": 7.962399482727051, "learning_rate": 9.065312965071819e-06, "loss": 0.5135, "step": 16589 }, { "epoch": 54.39344262295082, "grad_norm": 7.079571723937988, "learning_rate": 9.064255732065209e-06, "loss": 0.573, "step": 16590 }, { "epoch": 54.39672131147541, "grad_norm": 10.357539176940918, "learning_rate": 9.063198509610083e-06, "loss": 0.5401, "step": 16591 }, { "epoch": 54.4, "grad_norm": 7.868309020996094, "learning_rate": 9.062141297718372e-06, "loss": 0.5874, "step": 16592 }, { "epoch": 54.40327868852459, "grad_norm": 12.288429260253906, "learning_rate": 9.061084096401994e-06, "loss": 0.5281, "step": 16593 }, { "epoch": 54.40655737704918, "grad_norm": 9.046089172363281, "learning_rate": 9.060026905672868e-06, "loss": 0.6012, "step": 16594 }, { "epoch": 54.40983606557377, "grad_norm": 6.4665327072143555, "learning_rate": 9.058969725542913e-06, "loss": 0.3762, "step": 16595 }, { "epoch": 54.41311475409836, "grad_norm": 6.272141933441162, "learning_rate": 9.057912556024056e-06, "loss": 0.3926, "step": 16596 }, { "epoch": 54.41639344262295, "grad_norm": 5.9535064697265625, "learning_rate": 9.056855397128214e-06, "loss": 0.4643, "step": 16597 }, { "epoch": 54.41967213114754, "grad_norm": 7.116267681121826, "learning_rate": 9.05579824886731e-06, "loss": 0.5314, "step": 16598 }, { "epoch": 54.42295081967213, "grad_norm": 8.437457084655762, "learning_rate": 9.054741111253257e-06, "loss": 0.5479, "step": 16599 }, { "epoch": 54.42622950819672, "grad_norm": 5.385234355926514, "learning_rate": 9.053683984297983e-06, "loss": 0.413, "step": 16600 }, { "epoch": 54.429508196721315, "grad_norm": 6.787578105926514, "learning_rate": 9.05262686801341e-06, "loss": 0.8667, "step": 16601 }, { "epoch": 54.4327868852459, "grad_norm": 4.975378036499023, "learning_rate": 9.05156976241145e-06, "loss": 0.4892, "step": 16602 }, { "epoch": 54.43606557377049, "grad_norm": 5.97359561920166, "learning_rate": 9.05051266750403e-06, "loss": 0.5236, "step": 16603 }, { "epoch": 54.43934426229508, "grad_norm": 5.9979472160339355, "learning_rate": 9.049455583303061e-06, "loss": 0.6353, "step": 16604 }, { "epoch": 54.442622950819676, "grad_norm": 7.897766590118408, "learning_rate": 9.048398509820473e-06, "loss": 0.6993, "step": 16605 }, { "epoch": 54.445901639344264, "grad_norm": 15.209992408752441, "learning_rate": 9.047341447068183e-06, "loss": 0.608, "step": 16606 }, { "epoch": 54.44918032786885, "grad_norm": 6.247537136077881, "learning_rate": 9.046284395058104e-06, "loss": 0.4743, "step": 16607 }, { "epoch": 54.45245901639344, "grad_norm": 5.722104072570801, "learning_rate": 9.045227353802162e-06, "loss": 0.5563, "step": 16608 }, { "epoch": 54.455737704918036, "grad_norm": 6.4683518409729, "learning_rate": 9.044170323312276e-06, "loss": 0.3975, "step": 16609 }, { "epoch": 54.459016393442624, "grad_norm": 5.63347053527832, "learning_rate": 9.043113303600363e-06, "loss": 0.6027, "step": 16610 }, { "epoch": 54.46229508196721, "grad_norm": 6.868808746337891, "learning_rate": 9.042056294678342e-06, "loss": 0.4528, "step": 16611 }, { "epoch": 54.4655737704918, "grad_norm": 6.23650598526001, "learning_rate": 9.04099929655813e-06, "loss": 0.4225, "step": 16612 }, { "epoch": 54.4688524590164, "grad_norm": 5.247714996337891, "learning_rate": 9.03994230925165e-06, "loss": 0.6028, "step": 16613 }, { "epoch": 54.472131147540985, "grad_norm": 7.0507426261901855, "learning_rate": 9.03888533277082e-06, "loss": 0.4401, "step": 16614 }, { "epoch": 54.47540983606557, "grad_norm": 5.63631534576416, "learning_rate": 9.037828367127556e-06, "loss": 0.5466, "step": 16615 }, { "epoch": 54.47868852459016, "grad_norm": 5.278692245483398, "learning_rate": 9.036771412333777e-06, "loss": 0.4836, "step": 16616 }, { "epoch": 54.48196721311476, "grad_norm": 11.370400428771973, "learning_rate": 9.0357144684014e-06, "loss": 0.4883, "step": 16617 }, { "epoch": 54.485245901639345, "grad_norm": 5.939797878265381, "learning_rate": 9.034657535342349e-06, "loss": 0.3621, "step": 16618 }, { "epoch": 54.488524590163934, "grad_norm": 6.253355979919434, "learning_rate": 9.033600613168537e-06, "loss": 0.3555, "step": 16619 }, { "epoch": 54.49180327868852, "grad_norm": 7.099003791809082, "learning_rate": 9.032543701891885e-06, "loss": 0.5995, "step": 16620 }, { "epoch": 54.49508196721312, "grad_norm": 4.742010116577148, "learning_rate": 9.031486801524301e-06, "loss": 0.4651, "step": 16621 }, { "epoch": 54.498360655737706, "grad_norm": 5.385980606079102, "learning_rate": 9.030429912077715e-06, "loss": 0.61, "step": 16622 }, { "epoch": 54.501639344262294, "grad_norm": 9.305886268615723, "learning_rate": 9.029373033564041e-06, "loss": 0.5601, "step": 16623 }, { "epoch": 54.50491803278688, "grad_norm": 8.70183277130127, "learning_rate": 9.028316165995196e-06, "loss": 0.4771, "step": 16624 }, { "epoch": 54.50819672131148, "grad_norm": 6.885620594024658, "learning_rate": 9.027259309383092e-06, "loss": 0.5255, "step": 16625 }, { "epoch": 54.511475409836066, "grad_norm": 5.637942314147949, "learning_rate": 9.026202463739653e-06, "loss": 0.6559, "step": 16626 }, { "epoch": 54.514754098360655, "grad_norm": 5.659266948699951, "learning_rate": 9.025145629076797e-06, "loss": 0.5801, "step": 16627 }, { "epoch": 54.51803278688524, "grad_norm": 5.447757720947266, "learning_rate": 9.024088805406434e-06, "loss": 0.4809, "step": 16628 }, { "epoch": 54.52131147540984, "grad_norm": 5.572902202606201, "learning_rate": 9.023031992740488e-06, "loss": 0.5622, "step": 16629 }, { "epoch": 54.52459016393443, "grad_norm": 4.793338775634766, "learning_rate": 9.021975191090866e-06, "loss": 0.5291, "step": 16630 }, { "epoch": 54.527868852459015, "grad_norm": 5.619112491607666, "learning_rate": 9.020918400469494e-06, "loss": 0.6069, "step": 16631 }, { "epoch": 54.5311475409836, "grad_norm": 5.463886260986328, "learning_rate": 9.019861620888286e-06, "loss": 0.4749, "step": 16632 }, { "epoch": 54.5344262295082, "grad_norm": 5.213128089904785, "learning_rate": 9.018804852359158e-06, "loss": 0.4981, "step": 16633 }, { "epoch": 54.53770491803279, "grad_norm": 6.053102016448975, "learning_rate": 9.01774809489402e-06, "loss": 0.4671, "step": 16634 }, { "epoch": 54.540983606557376, "grad_norm": 4.4997663497924805, "learning_rate": 9.016691348504798e-06, "loss": 0.851, "step": 16635 }, { "epoch": 54.544262295081964, "grad_norm": 4.858460426330566, "learning_rate": 9.015634613203404e-06, "loss": 0.4077, "step": 16636 }, { "epoch": 54.54754098360656, "grad_norm": 5.690419673919678, "learning_rate": 9.01457788900175e-06, "loss": 0.4449, "step": 16637 }, { "epoch": 54.55081967213115, "grad_norm": 7.0732293128967285, "learning_rate": 9.013521175911755e-06, "loss": 0.319, "step": 16638 }, { "epoch": 54.554098360655736, "grad_norm": 6.194698810577393, "learning_rate": 9.012464473945338e-06, "loss": 0.5517, "step": 16639 }, { "epoch": 54.557377049180324, "grad_norm": 4.476680755615234, "learning_rate": 9.011407783114407e-06, "loss": 0.5062, "step": 16640 }, { "epoch": 54.56065573770492, "grad_norm": 5.159882068634033, "learning_rate": 9.010351103430885e-06, "loss": 0.4694, "step": 16641 }, { "epoch": 54.56393442622951, "grad_norm": 6.505926132202148, "learning_rate": 9.009294434906682e-06, "loss": 0.5882, "step": 16642 }, { "epoch": 54.5672131147541, "grad_norm": 5.409250259399414, "learning_rate": 9.008237777553712e-06, "loss": 0.9127, "step": 16643 }, { "epoch": 54.570491803278685, "grad_norm": 6.392435550689697, "learning_rate": 9.007181131383894e-06, "loss": 0.5821, "step": 16644 }, { "epoch": 54.57377049180328, "grad_norm": 5.671857833862305, "learning_rate": 9.006124496409141e-06, "loss": 0.6803, "step": 16645 }, { "epoch": 54.57704918032787, "grad_norm": 5.191391944885254, "learning_rate": 9.00506787264137e-06, "loss": 0.591, "step": 16646 }, { "epoch": 54.58032786885246, "grad_norm": 4.776426792144775, "learning_rate": 9.004011260092489e-06, "loss": 0.6694, "step": 16647 }, { "epoch": 54.58360655737705, "grad_norm": 6.501727104187012, "learning_rate": 9.002954658774417e-06, "loss": 0.9282, "step": 16648 }, { "epoch": 54.58688524590164, "grad_norm": 10.078068733215332, "learning_rate": 9.00189806869907e-06, "loss": 0.3987, "step": 16649 }, { "epoch": 54.59016393442623, "grad_norm": 5.793273448944092, "learning_rate": 9.000841489878362e-06, "loss": 0.4359, "step": 16650 }, { "epoch": 54.59344262295082, "grad_norm": 5.430945873260498, "learning_rate": 8.9997849223242e-06, "loss": 0.5432, "step": 16651 }, { "epoch": 54.59672131147541, "grad_norm": 10.88771915435791, "learning_rate": 8.998728366048506e-06, "loss": 0.6936, "step": 16652 }, { "epoch": 54.6, "grad_norm": 7.788542747497559, "learning_rate": 8.99767182106319e-06, "loss": 0.57, "step": 16653 }, { "epoch": 54.60327868852459, "grad_norm": 4.9423370361328125, "learning_rate": 8.996615287380168e-06, "loss": 0.565, "step": 16654 }, { "epoch": 54.60655737704918, "grad_norm": 9.938158988952637, "learning_rate": 8.995558765011351e-06, "loss": 0.4814, "step": 16655 }, { "epoch": 54.609836065573774, "grad_norm": 4.433919906616211, "learning_rate": 8.99450225396865e-06, "loss": 0.3786, "step": 16656 }, { "epoch": 54.61311475409836, "grad_norm": 5.5505757331848145, "learning_rate": 8.993445754263985e-06, "loss": 0.6111, "step": 16657 }, { "epoch": 54.61639344262295, "grad_norm": 4.9212493896484375, "learning_rate": 8.992389265909265e-06, "loss": 0.7282, "step": 16658 }, { "epoch": 54.61967213114754, "grad_norm": 5.532207012176514, "learning_rate": 8.991332788916406e-06, "loss": 0.7277, "step": 16659 }, { "epoch": 54.622950819672134, "grad_norm": 4.726972579956055, "learning_rate": 8.990276323297313e-06, "loss": 0.5633, "step": 16660 }, { "epoch": 54.62622950819672, "grad_norm": 7.239782333374023, "learning_rate": 8.989219869063909e-06, "loss": 0.632, "step": 16661 }, { "epoch": 54.62950819672131, "grad_norm": 6.4554009437561035, "learning_rate": 8.9881634262281e-06, "loss": 0.3928, "step": 16662 }, { "epoch": 54.6327868852459, "grad_norm": 6.88578987121582, "learning_rate": 8.987106994801801e-06, "loss": 0.4573, "step": 16663 }, { "epoch": 54.636065573770495, "grad_norm": 7.7577290534973145, "learning_rate": 8.986050574796922e-06, "loss": 0.5741, "step": 16664 }, { "epoch": 54.63934426229508, "grad_norm": 5.362838268280029, "learning_rate": 8.984994166225379e-06, "loss": 0.4476, "step": 16665 }, { "epoch": 54.64262295081967, "grad_norm": 8.257637977600098, "learning_rate": 8.983937769099082e-06, "loss": 0.5896, "step": 16666 }, { "epoch": 54.64590163934426, "grad_norm": 5.519138336181641, "learning_rate": 8.982881383429943e-06, "loss": 0.8349, "step": 16667 }, { "epoch": 54.649180327868855, "grad_norm": 6.020674705505371, "learning_rate": 8.981825009229873e-06, "loss": 0.6554, "step": 16668 }, { "epoch": 54.65245901639344, "grad_norm": 5.289924621582031, "learning_rate": 8.980768646510785e-06, "loss": 0.3947, "step": 16669 }, { "epoch": 54.65573770491803, "grad_norm": 5.344423770904541, "learning_rate": 8.97971229528459e-06, "loss": 0.395, "step": 16670 }, { "epoch": 54.65901639344262, "grad_norm": 8.613818168640137, "learning_rate": 8.978655955563202e-06, "loss": 0.5811, "step": 16671 }, { "epoch": 54.662295081967216, "grad_norm": 5.704570770263672, "learning_rate": 8.97759962735853e-06, "loss": 0.5, "step": 16672 }, { "epoch": 54.665573770491804, "grad_norm": 5.517332077026367, "learning_rate": 8.97654331068248e-06, "loss": 0.6971, "step": 16673 }, { "epoch": 54.66885245901639, "grad_norm": 5.448511600494385, "learning_rate": 8.975487005546972e-06, "loss": 0.6241, "step": 16674 }, { "epoch": 54.67213114754098, "grad_norm": 7.078586101531982, "learning_rate": 8.974430711963915e-06, "loss": 0.6787, "step": 16675 }, { "epoch": 54.675409836065576, "grad_norm": 6.728757858276367, "learning_rate": 8.973374429945218e-06, "loss": 0.7507, "step": 16676 }, { "epoch": 54.678688524590164, "grad_norm": 17.640159606933594, "learning_rate": 8.972318159502785e-06, "loss": 0.7669, "step": 16677 }, { "epoch": 54.68196721311475, "grad_norm": 6.651074409484863, "learning_rate": 8.97126190064854e-06, "loss": 0.556, "step": 16678 }, { "epoch": 54.68524590163934, "grad_norm": 6.09693717956543, "learning_rate": 8.970205653394386e-06, "loss": 0.68, "step": 16679 }, { "epoch": 54.68852459016394, "grad_norm": 6.204862117767334, "learning_rate": 8.969149417752234e-06, "loss": 0.638, "step": 16680 }, { "epoch": 54.691803278688525, "grad_norm": 5.545426368713379, "learning_rate": 8.968093193733995e-06, "loss": 0.6223, "step": 16681 }, { "epoch": 54.69508196721311, "grad_norm": 5.563514232635498, "learning_rate": 8.96703698135157e-06, "loss": 0.7719, "step": 16682 }, { "epoch": 54.6983606557377, "grad_norm": 5.441518306732178, "learning_rate": 8.965980780616886e-06, "loss": 0.5666, "step": 16683 }, { "epoch": 54.7016393442623, "grad_norm": 8.448432922363281, "learning_rate": 8.964924591541842e-06, "loss": 0.6638, "step": 16684 }, { "epoch": 54.704918032786885, "grad_norm": 4.770487308502197, "learning_rate": 8.96386841413835e-06, "loss": 0.5067, "step": 16685 }, { "epoch": 54.708196721311474, "grad_norm": 6.430941104888916, "learning_rate": 8.962812248418314e-06, "loss": 0.6299, "step": 16686 }, { "epoch": 54.71147540983607, "grad_norm": 19.710113525390625, "learning_rate": 8.961756094393652e-06, "loss": 0.638, "step": 16687 }, { "epoch": 54.71475409836066, "grad_norm": 6.721874713897705, "learning_rate": 8.96069995207627e-06, "loss": 0.7582, "step": 16688 }, { "epoch": 54.718032786885246, "grad_norm": 6.06935453414917, "learning_rate": 8.959643821478077e-06, "loss": 0.5585, "step": 16689 }, { "epoch": 54.721311475409834, "grad_norm": 6.882693290710449, "learning_rate": 8.958587702610977e-06, "loss": 0.6315, "step": 16690 }, { "epoch": 54.72459016393443, "grad_norm": 5.220676422119141, "learning_rate": 8.95753159548689e-06, "loss": 0.4584, "step": 16691 }, { "epoch": 54.72786885245902, "grad_norm": 11.995279312133789, "learning_rate": 8.956475500117715e-06, "loss": 0.5459, "step": 16692 }, { "epoch": 54.731147540983606, "grad_norm": 6.4267754554748535, "learning_rate": 8.955419416515363e-06, "loss": 0.6883, "step": 16693 }, { "epoch": 54.734426229508195, "grad_norm": 6.4576497077941895, "learning_rate": 8.954363344691744e-06, "loss": 0.7119, "step": 16694 }, { "epoch": 54.73770491803279, "grad_norm": 5.872575283050537, "learning_rate": 8.953307284658765e-06, "loss": 0.7662, "step": 16695 }, { "epoch": 54.74098360655738, "grad_norm": 6.688438415527344, "learning_rate": 8.952251236428334e-06, "loss": 0.644, "step": 16696 }, { "epoch": 54.74426229508197, "grad_norm": 6.457990646362305, "learning_rate": 8.951195200012361e-06, "loss": 0.392, "step": 16697 }, { "epoch": 54.747540983606555, "grad_norm": 6.78503942489624, "learning_rate": 8.950139175422754e-06, "loss": 0.4532, "step": 16698 }, { "epoch": 54.75081967213115, "grad_norm": 8.09686279296875, "learning_rate": 8.949083162671414e-06, "loss": 0.7267, "step": 16699 }, { "epoch": 54.75409836065574, "grad_norm": 43.99750900268555, "learning_rate": 8.948027161770259e-06, "loss": 0.7013, "step": 16700 }, { "epoch": 54.75737704918033, "grad_norm": 5.3108320236206055, "learning_rate": 8.946971172731192e-06, "loss": 0.4429, "step": 16701 }, { "epoch": 54.760655737704916, "grad_norm": 5.394267559051514, "learning_rate": 8.945915195566119e-06, "loss": 0.4006, "step": 16702 }, { "epoch": 54.76393442622951, "grad_norm": 5.938387393951416, "learning_rate": 8.944859230286945e-06, "loss": 0.6062, "step": 16703 }, { "epoch": 54.7672131147541, "grad_norm": 8.412020683288574, "learning_rate": 8.943803276905583e-06, "loss": 0.5826, "step": 16704 }, { "epoch": 54.77049180327869, "grad_norm": 8.645631790161133, "learning_rate": 8.942747335433938e-06, "loss": 0.7759, "step": 16705 }, { "epoch": 54.773770491803276, "grad_norm": 8.023641586303711, "learning_rate": 8.941691405883916e-06, "loss": 0.853, "step": 16706 }, { "epoch": 54.77704918032787, "grad_norm": 5.285221576690674, "learning_rate": 8.940635488267424e-06, "loss": 0.5132, "step": 16707 }, { "epoch": 54.78032786885246, "grad_norm": 7.666092872619629, "learning_rate": 8.939579582596363e-06, "loss": 0.4243, "step": 16708 }, { "epoch": 54.78360655737705, "grad_norm": 5.252579212188721, "learning_rate": 8.93852368888265e-06, "loss": 0.6158, "step": 16709 }, { "epoch": 54.78688524590164, "grad_norm": 7.016092777252197, "learning_rate": 8.937467807138185e-06, "loss": 0.763, "step": 16710 }, { "epoch": 54.79016393442623, "grad_norm": 7.41347074508667, "learning_rate": 8.936411937374877e-06, "loss": 0.56, "step": 16711 }, { "epoch": 54.79344262295082, "grad_norm": 4.948461532592773, "learning_rate": 8.935356079604624e-06, "loss": 0.4658, "step": 16712 }, { "epoch": 54.79672131147541, "grad_norm": 4.840911388397217, "learning_rate": 8.934300233839344e-06, "loss": 0.3265, "step": 16713 }, { "epoch": 54.8, "grad_norm": 5.016445159912109, "learning_rate": 8.933244400090937e-06, "loss": 0.6007, "step": 16714 }, { "epoch": 54.80327868852459, "grad_norm": 5.201272964477539, "learning_rate": 8.932188578371308e-06, "loss": 0.3696, "step": 16715 }, { "epoch": 54.80655737704918, "grad_norm": 10.921676635742188, "learning_rate": 8.931132768692358e-06, "loss": 0.4708, "step": 16716 }, { "epoch": 54.80983606557377, "grad_norm": 5.630578517913818, "learning_rate": 8.930076971066003e-06, "loss": 0.6577, "step": 16717 }, { "epoch": 54.81311475409836, "grad_norm": 6.504702091217041, "learning_rate": 8.929021185504142e-06, "loss": 0.6368, "step": 16718 }, { "epoch": 54.81639344262295, "grad_norm": 6.142575263977051, "learning_rate": 8.927965412018678e-06, "loss": 0.4937, "step": 16719 }, { "epoch": 54.81967213114754, "grad_norm": 6.280807018280029, "learning_rate": 8.926909650621523e-06, "loss": 0.5616, "step": 16720 }, { "epoch": 54.82295081967213, "grad_norm": 8.546876907348633, "learning_rate": 8.925853901324573e-06, "loss": 0.7609, "step": 16721 }, { "epoch": 54.82622950819672, "grad_norm": 4.742159366607666, "learning_rate": 8.924798164139738e-06, "loss": 0.7635, "step": 16722 }, { "epoch": 54.829508196721314, "grad_norm": 5.700442314147949, "learning_rate": 8.923742439078922e-06, "loss": 0.5053, "step": 16723 }, { "epoch": 54.8327868852459, "grad_norm": 8.445189476013184, "learning_rate": 8.922686726154031e-06, "loss": 0.6064, "step": 16724 }, { "epoch": 54.83606557377049, "grad_norm": 6.324920177459717, "learning_rate": 8.921631025376962e-06, "loss": 0.7392, "step": 16725 }, { "epoch": 54.83934426229508, "grad_norm": 5.630836486816406, "learning_rate": 8.92057533675963e-06, "loss": 0.4983, "step": 16726 }, { "epoch": 54.842622950819674, "grad_norm": 11.137147903442383, "learning_rate": 8.919519660313933e-06, "loss": 0.403, "step": 16727 }, { "epoch": 54.84590163934426, "grad_norm": 5.798643112182617, "learning_rate": 8.918463996051774e-06, "loss": 0.7154, "step": 16728 }, { "epoch": 54.84918032786885, "grad_norm": 10.090531349182129, "learning_rate": 8.917408343985054e-06, "loss": 0.472, "step": 16729 }, { "epoch": 54.85245901639344, "grad_norm": 6.786309719085693, "learning_rate": 8.916352704125686e-06, "loss": 0.5345, "step": 16730 }, { "epoch": 54.855737704918035, "grad_norm": 5.035593032836914, "learning_rate": 8.915297076485567e-06, "loss": 0.5342, "step": 16731 }, { "epoch": 54.85901639344262, "grad_norm": 10.177556037902832, "learning_rate": 8.914241461076602e-06, "loss": 0.4095, "step": 16732 }, { "epoch": 54.86229508196721, "grad_norm": 5.869778633117676, "learning_rate": 8.913185857910692e-06, "loss": 0.5973, "step": 16733 }, { "epoch": 54.86557377049181, "grad_norm": 10.357062339782715, "learning_rate": 8.91213026699974e-06, "loss": 0.6439, "step": 16734 }, { "epoch": 54.868852459016395, "grad_norm": 6.549577236175537, "learning_rate": 8.91107468835565e-06, "loss": 0.3972, "step": 16735 }, { "epoch": 54.87213114754098, "grad_norm": 9.263876914978027, "learning_rate": 8.910019121990329e-06, "loss": 0.5203, "step": 16736 }, { "epoch": 54.87540983606557, "grad_norm": 6.308429718017578, "learning_rate": 8.908963567915675e-06, "loss": 0.3854, "step": 16737 }, { "epoch": 54.87868852459017, "grad_norm": 7.006216049194336, "learning_rate": 8.907908026143586e-06, "loss": 0.6813, "step": 16738 }, { "epoch": 54.881967213114756, "grad_norm": 8.514342308044434, "learning_rate": 8.906852496685975e-06, "loss": 0.4232, "step": 16739 }, { "epoch": 54.885245901639344, "grad_norm": 5.501942157745361, "learning_rate": 8.905796979554738e-06, "loss": 0.55, "step": 16740 }, { "epoch": 54.88852459016393, "grad_norm": 7.319940567016602, "learning_rate": 8.904741474761777e-06, "loss": 0.6157, "step": 16741 }, { "epoch": 54.89180327868853, "grad_norm": 4.764806747436523, "learning_rate": 8.903685982318991e-06, "loss": 0.4738, "step": 16742 }, { "epoch": 54.895081967213116, "grad_norm": 7.6953816413879395, "learning_rate": 8.90263050223829e-06, "loss": 0.3379, "step": 16743 }, { "epoch": 54.898360655737704, "grad_norm": 7.73011589050293, "learning_rate": 8.90157503453157e-06, "loss": 0.427, "step": 16744 }, { "epoch": 54.90163934426229, "grad_norm": 6.271720886230469, "learning_rate": 8.900519579210732e-06, "loss": 0.5971, "step": 16745 }, { "epoch": 54.90491803278689, "grad_norm": 6.529937267303467, "learning_rate": 8.89946413628768e-06, "loss": 0.6115, "step": 16746 }, { "epoch": 54.90819672131148, "grad_norm": 6.0180230140686035, "learning_rate": 8.898408705774316e-06, "loss": 0.3954, "step": 16747 }, { "epoch": 54.911475409836065, "grad_norm": 7.545874118804932, "learning_rate": 8.897353287682535e-06, "loss": 0.7041, "step": 16748 }, { "epoch": 54.91475409836065, "grad_norm": 8.865153312683105, "learning_rate": 8.896297882024246e-06, "loss": 0.5145, "step": 16749 }, { "epoch": 54.91803278688525, "grad_norm": 5.789910316467285, "learning_rate": 8.895242488811346e-06, "loss": 0.3816, "step": 16750 }, { "epoch": 54.92131147540984, "grad_norm": 5.90394401550293, "learning_rate": 8.894187108055734e-06, "loss": 0.547, "step": 16751 }, { "epoch": 54.924590163934425, "grad_norm": 11.43974494934082, "learning_rate": 8.893131739769309e-06, "loss": 0.6205, "step": 16752 }, { "epoch": 54.927868852459014, "grad_norm": 8.111340522766113, "learning_rate": 8.89207638396398e-06, "loss": 0.528, "step": 16753 }, { "epoch": 54.93114754098361, "grad_norm": 7.120993614196777, "learning_rate": 8.891021040651641e-06, "loss": 0.5689, "step": 16754 }, { "epoch": 54.9344262295082, "grad_norm": 5.879833221435547, "learning_rate": 8.889965709844187e-06, "loss": 0.6817, "step": 16755 }, { "epoch": 54.937704918032786, "grad_norm": 5.455955505371094, "learning_rate": 8.88891039155353e-06, "loss": 0.7319, "step": 16756 }, { "epoch": 54.940983606557374, "grad_norm": 5.413668155670166, "learning_rate": 8.887855085791563e-06, "loss": 0.5171, "step": 16757 }, { "epoch": 54.94426229508197, "grad_norm": 6.180312633514404, "learning_rate": 8.886799792570186e-06, "loss": 0.5385, "step": 16758 }, { "epoch": 54.94754098360656, "grad_norm": 9.23403263092041, "learning_rate": 8.885744511901298e-06, "loss": 0.3404, "step": 16759 }, { "epoch": 54.950819672131146, "grad_norm": 7.857837677001953, "learning_rate": 8.884689243796795e-06, "loss": 0.5035, "step": 16760 }, { "epoch": 54.954098360655735, "grad_norm": 5.166606903076172, "learning_rate": 8.883633988268586e-06, "loss": 0.4202, "step": 16761 }, { "epoch": 54.95737704918033, "grad_norm": 5.486268997192383, "learning_rate": 8.882578745328565e-06, "loss": 0.6674, "step": 16762 }, { "epoch": 54.96065573770492, "grad_norm": 6.289015769958496, "learning_rate": 8.881523514988628e-06, "loss": 0.6472, "step": 16763 }, { "epoch": 54.96393442622951, "grad_norm": 12.090112686157227, "learning_rate": 8.880468297260673e-06, "loss": 0.6151, "step": 16764 }, { "epoch": 54.967213114754095, "grad_norm": 6.903905868530273, "learning_rate": 8.879413092156608e-06, "loss": 0.377, "step": 16765 }, { "epoch": 54.97049180327869, "grad_norm": 8.678471565246582, "learning_rate": 8.878357899688324e-06, "loss": 0.3291, "step": 16766 }, { "epoch": 54.97377049180328, "grad_norm": 5.540585517883301, "learning_rate": 8.87730271986772e-06, "loss": 0.5865, "step": 16767 }, { "epoch": 54.97704918032787, "grad_norm": 8.706128120422363, "learning_rate": 8.876247552706693e-06, "loss": 0.6063, "step": 16768 }, { "epoch": 54.980327868852456, "grad_norm": 5.318716526031494, "learning_rate": 8.875192398217147e-06, "loss": 0.292, "step": 16769 }, { "epoch": 54.98360655737705, "grad_norm": 7.755611419677734, "learning_rate": 8.874137256410974e-06, "loss": 0.6367, "step": 16770 }, { "epoch": 54.98688524590164, "grad_norm": 10.564651489257812, "learning_rate": 8.873082127300077e-06, "loss": 0.481, "step": 16771 }, { "epoch": 54.99016393442623, "grad_norm": 5.747557163238525, "learning_rate": 8.872027010896347e-06, "loss": 0.6054, "step": 16772 }, { "epoch": 54.993442622950816, "grad_norm": 5.4189252853393555, "learning_rate": 8.870971907211685e-06, "loss": 0.2764, "step": 16773 }, { "epoch": 54.99672131147541, "grad_norm": 5.134701728820801, "learning_rate": 8.86991681625799e-06, "loss": 0.5773, "step": 16774 }, { "epoch": 55.0, "grad_norm": 6.198216915130615, "learning_rate": 8.868861738047158e-06, "loss": 0.5243, "step": 16775 }, { "epoch": 55.00327868852459, "grad_norm": 10.238003730773926, "learning_rate": 8.867806672591087e-06, "loss": 0.8105, "step": 16776 }, { "epoch": 55.006557377049184, "grad_norm": 5.629358768463135, "learning_rate": 8.866751619901671e-06, "loss": 0.53, "step": 16777 }, { "epoch": 55.00983606557377, "grad_norm": 5.578287124633789, "learning_rate": 8.86569657999081e-06, "loss": 0.6252, "step": 16778 }, { "epoch": 55.01311475409836, "grad_norm": 6.300577163696289, "learning_rate": 8.864641552870399e-06, "loss": 0.6999, "step": 16779 }, { "epoch": 55.01639344262295, "grad_norm": 6.224828243255615, "learning_rate": 8.863586538552336e-06, "loss": 0.7379, "step": 16780 }, { "epoch": 55.019672131147544, "grad_norm": 6.7678446769714355, "learning_rate": 8.862531537048513e-06, "loss": 0.2714, "step": 16781 }, { "epoch": 55.02295081967213, "grad_norm": 7.677234172821045, "learning_rate": 8.861476548370833e-06, "loss": 0.6358, "step": 16782 }, { "epoch": 55.02622950819672, "grad_norm": 8.017635345458984, "learning_rate": 8.860421572531189e-06, "loss": 0.7162, "step": 16783 }, { "epoch": 55.02950819672131, "grad_norm": 6.241706371307373, "learning_rate": 8.859366609541476e-06, "loss": 0.4463, "step": 16784 }, { "epoch": 55.032786885245905, "grad_norm": 9.88022232055664, "learning_rate": 8.858311659413592e-06, "loss": 0.6382, "step": 16785 }, { "epoch": 55.03606557377049, "grad_norm": 5.594107627868652, "learning_rate": 8.857256722159425e-06, "loss": 0.5515, "step": 16786 }, { "epoch": 55.03934426229508, "grad_norm": 4.934355735778809, "learning_rate": 8.856201797790883e-06, "loss": 0.2753, "step": 16787 }, { "epoch": 55.04262295081967, "grad_norm": 5.397049903869629, "learning_rate": 8.855146886319853e-06, "loss": 0.5374, "step": 16788 }, { "epoch": 55.045901639344265, "grad_norm": 7.87908935546875, "learning_rate": 8.854091987758233e-06, "loss": 0.679, "step": 16789 }, { "epoch": 55.049180327868854, "grad_norm": 4.815673351287842, "learning_rate": 8.853037102117914e-06, "loss": 0.3994, "step": 16790 }, { "epoch": 55.05245901639344, "grad_norm": 5.021688938140869, "learning_rate": 8.851982229410797e-06, "loss": 0.9484, "step": 16791 }, { "epoch": 55.05573770491803, "grad_norm": 6.274880886077881, "learning_rate": 8.850927369648774e-06, "loss": 0.3766, "step": 16792 }, { "epoch": 55.059016393442626, "grad_norm": 8.890168190002441, "learning_rate": 8.84987252284374e-06, "loss": 0.5168, "step": 16793 }, { "epoch": 55.062295081967214, "grad_norm": 6.240467071533203, "learning_rate": 8.848817689007584e-06, "loss": 0.605, "step": 16794 }, { "epoch": 55.0655737704918, "grad_norm": 14.385992050170898, "learning_rate": 8.84776286815221e-06, "loss": 0.3925, "step": 16795 }, { "epoch": 55.06885245901639, "grad_norm": 4.675320625305176, "learning_rate": 8.84670806028951e-06, "loss": 0.3315, "step": 16796 }, { "epoch": 55.072131147540986, "grad_norm": 5.074512958526611, "learning_rate": 8.845653265431373e-06, "loss": 0.4341, "step": 16797 }, { "epoch": 55.075409836065575, "grad_norm": 6.633705139160156, "learning_rate": 8.844598483589695e-06, "loss": 0.4567, "step": 16798 }, { "epoch": 55.07868852459016, "grad_norm": 6.214848041534424, "learning_rate": 8.843543714776371e-06, "loss": 0.5571, "step": 16799 }, { "epoch": 55.08196721311475, "grad_norm": 5.807356834411621, "learning_rate": 8.842488959003294e-06, "loss": 0.4059, "step": 16800 }, { "epoch": 55.08524590163935, "grad_norm": 5.752938270568848, "learning_rate": 8.841434216282356e-06, "loss": 0.3873, "step": 16801 }, { "epoch": 55.088524590163935, "grad_norm": 5.250070095062256, "learning_rate": 8.840379486625456e-06, "loss": 0.5979, "step": 16802 }, { "epoch": 55.09180327868852, "grad_norm": 6.547812461853027, "learning_rate": 8.839324770044479e-06, "loss": 0.7559, "step": 16803 }, { "epoch": 55.09508196721311, "grad_norm": 5.543231964111328, "learning_rate": 8.838270066551322e-06, "loss": 0.5183, "step": 16804 }, { "epoch": 55.09836065573771, "grad_norm": 5.977529525756836, "learning_rate": 8.83721537615788e-06, "loss": 0.4924, "step": 16805 }, { "epoch": 55.101639344262296, "grad_norm": 5.875951766967773, "learning_rate": 8.836160698876044e-06, "loss": 0.5309, "step": 16806 }, { "epoch": 55.104918032786884, "grad_norm": 5.265860557556152, "learning_rate": 8.835106034717701e-06, "loss": 0.6486, "step": 16807 }, { "epoch": 55.10819672131147, "grad_norm": 9.584705352783203, "learning_rate": 8.834051383694754e-06, "loss": 0.3693, "step": 16808 }, { "epoch": 55.11147540983607, "grad_norm": 5.421410083770752, "learning_rate": 8.83299674581909e-06, "loss": 0.4059, "step": 16809 }, { "epoch": 55.114754098360656, "grad_norm": 5.359689235687256, "learning_rate": 8.831942121102602e-06, "loss": 0.3211, "step": 16810 }, { "epoch": 55.118032786885244, "grad_norm": 7.457252025604248, "learning_rate": 8.83088750955718e-06, "loss": 0.5562, "step": 16811 }, { "epoch": 55.12131147540983, "grad_norm": 20.137052536010742, "learning_rate": 8.829832911194713e-06, "loss": 0.5932, "step": 16812 }, { "epoch": 55.12459016393443, "grad_norm": 6.386427402496338, "learning_rate": 8.8287783260271e-06, "loss": 0.516, "step": 16813 }, { "epoch": 55.12786885245902, "grad_norm": 5.085849761962891, "learning_rate": 8.82772375406623e-06, "loss": 0.5299, "step": 16814 }, { "epoch": 55.131147540983605, "grad_norm": 6.133168697357178, "learning_rate": 8.826669195323992e-06, "loss": 0.4054, "step": 16815 }, { "epoch": 55.13442622950819, "grad_norm": 5.433010578155518, "learning_rate": 8.825614649812277e-06, "loss": 0.7043, "step": 16816 }, { "epoch": 55.13770491803279, "grad_norm": 6.572488307952881, "learning_rate": 8.82456011754298e-06, "loss": 0.4318, "step": 16817 }, { "epoch": 55.14098360655738, "grad_norm": 6.421525478363037, "learning_rate": 8.82350559852799e-06, "loss": 0.4617, "step": 16818 }, { "epoch": 55.144262295081965, "grad_norm": 5.459883213043213, "learning_rate": 8.822451092779198e-06, "loss": 0.5766, "step": 16819 }, { "epoch": 55.14754098360656, "grad_norm": 8.918734550476074, "learning_rate": 8.82139660030849e-06, "loss": 0.5947, "step": 16820 }, { "epoch": 55.15081967213115, "grad_norm": 5.752705097198486, "learning_rate": 8.820342121127765e-06, "loss": 0.4693, "step": 16821 }, { "epoch": 55.15409836065574, "grad_norm": 9.66819953918457, "learning_rate": 8.819287655248911e-06, "loss": 0.6033, "step": 16822 }, { "epoch": 55.157377049180326, "grad_norm": 12.20596694946289, "learning_rate": 8.818233202683815e-06, "loss": 0.6081, "step": 16823 }, { "epoch": 55.16065573770492, "grad_norm": 5.7722601890563965, "learning_rate": 8.817178763444366e-06, "loss": 0.4398, "step": 16824 }, { "epoch": 55.16393442622951, "grad_norm": 6.107646465301514, "learning_rate": 8.816124337542456e-06, "loss": 0.7756, "step": 16825 }, { "epoch": 55.1672131147541, "grad_norm": 6.995474815368652, "learning_rate": 8.815069924989977e-06, "loss": 0.289, "step": 16826 }, { "epoch": 55.170491803278686, "grad_norm": 7.14056921005249, "learning_rate": 8.814015525798814e-06, "loss": 0.443, "step": 16827 }, { "epoch": 55.17377049180328, "grad_norm": 14.569724082946777, "learning_rate": 8.812961139980862e-06, "loss": 0.7003, "step": 16828 }, { "epoch": 55.17704918032787, "grad_norm": 10.364631652832031, "learning_rate": 8.811906767548005e-06, "loss": 0.5413, "step": 16829 }, { "epoch": 55.18032786885246, "grad_norm": 5.62627649307251, "learning_rate": 8.810852408512135e-06, "loss": 0.3421, "step": 16830 }, { "epoch": 55.18360655737705, "grad_norm": 6.73516321182251, "learning_rate": 8.809798062885143e-06, "loss": 0.5427, "step": 16831 }, { "epoch": 55.18688524590164, "grad_norm": 5.584915637969971, "learning_rate": 8.808743730678915e-06, "loss": 0.5059, "step": 16832 }, { "epoch": 55.19016393442623, "grad_norm": 22.993656158447266, "learning_rate": 8.807689411905336e-06, "loss": 0.455, "step": 16833 }, { "epoch": 55.19344262295082, "grad_norm": 5.7736496925354, "learning_rate": 8.806635106576301e-06, "loss": 0.4832, "step": 16834 }, { "epoch": 55.19672131147541, "grad_norm": 6.685474395751953, "learning_rate": 8.805580814703698e-06, "loss": 0.5027, "step": 16835 }, { "epoch": 55.2, "grad_norm": 7.450351715087891, "learning_rate": 8.804526536299413e-06, "loss": 0.4315, "step": 16836 }, { "epoch": 55.20327868852459, "grad_norm": 4.017519950866699, "learning_rate": 8.803472271375333e-06, "loss": 0.1174, "step": 16837 }, { "epoch": 55.20655737704918, "grad_norm": 8.73047924041748, "learning_rate": 8.802418019943343e-06, "loss": 0.5464, "step": 16838 }, { "epoch": 55.20983606557377, "grad_norm": 5.098113536834717, "learning_rate": 8.801363782015341e-06, "loss": 0.3691, "step": 16839 }, { "epoch": 55.21311475409836, "grad_norm": 8.266618728637695, "learning_rate": 8.800309557603208e-06, "loss": 0.4489, "step": 16840 }, { "epoch": 55.21639344262295, "grad_norm": 6.936565399169922, "learning_rate": 8.799255346718831e-06, "loss": 0.5872, "step": 16841 }, { "epoch": 55.21967213114754, "grad_norm": 10.058753967285156, "learning_rate": 8.798201149374095e-06, "loss": 0.4593, "step": 16842 }, { "epoch": 55.22295081967213, "grad_norm": 5.934009075164795, "learning_rate": 8.797146965580895e-06, "loss": 0.3752, "step": 16843 }, { "epoch": 55.226229508196724, "grad_norm": 5.751741409301758, "learning_rate": 8.796092795351114e-06, "loss": 0.4666, "step": 16844 }, { "epoch": 55.22950819672131, "grad_norm": 5.859621524810791, "learning_rate": 8.795038638696637e-06, "loss": 0.5064, "step": 16845 }, { "epoch": 55.2327868852459, "grad_norm": 10.479604721069336, "learning_rate": 8.793984495629349e-06, "loss": 0.4891, "step": 16846 }, { "epoch": 55.23606557377049, "grad_norm": 5.187058448791504, "learning_rate": 8.792930366161142e-06, "loss": 0.4456, "step": 16847 }, { "epoch": 55.239344262295084, "grad_norm": 6.357839584350586, "learning_rate": 8.791876250303903e-06, "loss": 0.4705, "step": 16848 }, { "epoch": 55.24262295081967, "grad_norm": 5.125405311584473, "learning_rate": 8.790822148069515e-06, "loss": 0.6842, "step": 16849 }, { "epoch": 55.24590163934426, "grad_norm": 8.435200691223145, "learning_rate": 8.78976805946986e-06, "loss": 0.4869, "step": 16850 }, { "epoch": 55.24918032786885, "grad_norm": 7.07246732711792, "learning_rate": 8.788713984516832e-06, "loss": 0.4457, "step": 16851 }, { "epoch": 55.252459016393445, "grad_norm": 8.46501636505127, "learning_rate": 8.787659923222314e-06, "loss": 0.6109, "step": 16852 }, { "epoch": 55.25573770491803, "grad_norm": 5.928464889526367, "learning_rate": 8.78660587559819e-06, "loss": 0.4552, "step": 16853 }, { "epoch": 55.25901639344262, "grad_norm": 6.782065391540527, "learning_rate": 8.785551841656345e-06, "loss": 0.4502, "step": 16854 }, { "epoch": 55.26229508196721, "grad_norm": 5.466383457183838, "learning_rate": 8.784497821408665e-06, "loss": 0.5137, "step": 16855 }, { "epoch": 55.265573770491805, "grad_norm": 6.4012980461120605, "learning_rate": 8.78344381486704e-06, "loss": 0.6396, "step": 16856 }, { "epoch": 55.268852459016394, "grad_norm": 5.792861461639404, "learning_rate": 8.782389822043345e-06, "loss": 0.4846, "step": 16857 }, { "epoch": 55.27213114754098, "grad_norm": 8.443886756896973, "learning_rate": 8.781335842949475e-06, "loss": 0.5716, "step": 16858 }, { "epoch": 55.27540983606557, "grad_norm": 7.022960186004639, "learning_rate": 8.780281877597309e-06, "loss": 0.5895, "step": 16859 }, { "epoch": 55.278688524590166, "grad_norm": 7.792892932891846, "learning_rate": 8.779227925998732e-06, "loss": 0.6184, "step": 16860 }, { "epoch": 55.281967213114754, "grad_norm": 5.978908538818359, "learning_rate": 8.778173988165632e-06, "loss": 0.494, "step": 16861 }, { "epoch": 55.28524590163934, "grad_norm": 11.61947250366211, "learning_rate": 8.77712006410989e-06, "loss": 0.5197, "step": 16862 }, { "epoch": 55.28852459016394, "grad_norm": 6.833374500274658, "learning_rate": 8.776066153843392e-06, "loss": 0.5148, "step": 16863 }, { "epoch": 55.291803278688526, "grad_norm": 7.445070266723633, "learning_rate": 8.775012257378016e-06, "loss": 0.5027, "step": 16864 }, { "epoch": 55.295081967213115, "grad_norm": 7.488694190979004, "learning_rate": 8.773958374725654e-06, "loss": 0.3568, "step": 16865 }, { "epoch": 55.2983606557377, "grad_norm": 5.7709503173828125, "learning_rate": 8.772904505898186e-06, "loss": 0.3752, "step": 16866 }, { "epoch": 55.3016393442623, "grad_norm": 7.221858501434326, "learning_rate": 8.771850650907498e-06, "loss": 0.5592, "step": 16867 }, { "epoch": 55.30491803278689, "grad_norm": 6.001994609832764, "learning_rate": 8.770796809765464e-06, "loss": 0.6639, "step": 16868 }, { "epoch": 55.308196721311475, "grad_norm": 4.956543922424316, "learning_rate": 8.769742982483978e-06, "loss": 0.6136, "step": 16869 }, { "epoch": 55.31147540983606, "grad_norm": 25.115020751953125, "learning_rate": 8.768689169074921e-06, "loss": 0.6321, "step": 16870 }, { "epoch": 55.31475409836066, "grad_norm": 6.560214042663574, "learning_rate": 8.767635369550173e-06, "loss": 0.5182, "step": 16871 }, { "epoch": 55.31803278688525, "grad_norm": 8.433202743530273, "learning_rate": 8.766581583921613e-06, "loss": 0.6465, "step": 16872 }, { "epoch": 55.321311475409836, "grad_norm": 8.31830883026123, "learning_rate": 8.765527812201133e-06, "loss": 0.4802, "step": 16873 }, { "epoch": 55.324590163934424, "grad_norm": 6.60896110534668, "learning_rate": 8.764474054400609e-06, "loss": 0.4963, "step": 16874 }, { "epoch": 55.32786885245902, "grad_norm": 8.17850399017334, "learning_rate": 8.763420310531926e-06, "loss": 0.5573, "step": 16875 }, { "epoch": 55.33114754098361, "grad_norm": 6.0677056312561035, "learning_rate": 8.762366580606965e-06, "loss": 0.3953, "step": 16876 }, { "epoch": 55.334426229508196, "grad_norm": 6.2167534828186035, "learning_rate": 8.761312864637602e-06, "loss": 0.2965, "step": 16877 }, { "epoch": 55.337704918032784, "grad_norm": 8.025954246520996, "learning_rate": 8.76025916263573e-06, "loss": 0.8123, "step": 16878 }, { "epoch": 55.34098360655738, "grad_norm": 5.563591003417969, "learning_rate": 8.759205474613224e-06, "loss": 0.5254, "step": 16879 }, { "epoch": 55.34426229508197, "grad_norm": 5.998836994171143, "learning_rate": 8.758151800581965e-06, "loss": 0.4629, "step": 16880 }, { "epoch": 55.34754098360656, "grad_norm": 8.977783203125, "learning_rate": 8.757098140553834e-06, "loss": 0.3855, "step": 16881 }, { "epoch": 55.350819672131145, "grad_norm": 4.819433212280273, "learning_rate": 8.756044494540717e-06, "loss": 0.592, "step": 16882 }, { "epoch": 55.35409836065574, "grad_norm": 5.565530300140381, "learning_rate": 8.75499086255449e-06, "loss": 0.3399, "step": 16883 }, { "epoch": 55.35737704918033, "grad_norm": 5.193437576293945, "learning_rate": 8.753937244607037e-06, "loss": 0.5285, "step": 16884 }, { "epoch": 55.36065573770492, "grad_norm": 5.591837406158447, "learning_rate": 8.752883640710235e-06, "loss": 0.6507, "step": 16885 }, { "epoch": 55.363934426229505, "grad_norm": 5.194459438323975, "learning_rate": 8.751830050875969e-06, "loss": 0.4237, "step": 16886 }, { "epoch": 55.3672131147541, "grad_norm": 8.30785846710205, "learning_rate": 8.750776475116117e-06, "loss": 0.6137, "step": 16887 }, { "epoch": 55.37049180327869, "grad_norm": 5.886996269226074, "learning_rate": 8.749722913442558e-06, "loss": 0.5548, "step": 16888 }, { "epoch": 55.37377049180328, "grad_norm": 7.089505672454834, "learning_rate": 8.748669365867174e-06, "loss": 0.5981, "step": 16889 }, { "epoch": 55.377049180327866, "grad_norm": 5.649902820587158, "learning_rate": 8.74761583240184e-06, "loss": 0.4799, "step": 16890 }, { "epoch": 55.38032786885246, "grad_norm": 5.772951126098633, "learning_rate": 8.746562313058444e-06, "loss": 0.3262, "step": 16891 }, { "epoch": 55.38360655737705, "grad_norm": 16.136079788208008, "learning_rate": 8.74550880784886e-06, "loss": 0.4845, "step": 16892 }, { "epoch": 55.38688524590164, "grad_norm": 5.576738357543945, "learning_rate": 8.74445531678497e-06, "loss": 0.748, "step": 16893 }, { "epoch": 55.390163934426226, "grad_norm": 9.868165969848633, "learning_rate": 8.743401839878647e-06, "loss": 0.6378, "step": 16894 }, { "epoch": 55.39344262295082, "grad_norm": 5.4998931884765625, "learning_rate": 8.74234837714178e-06, "loss": 0.5799, "step": 16895 }, { "epoch": 55.39672131147541, "grad_norm": 7.380049705505371, "learning_rate": 8.74129492858624e-06, "loss": 0.4994, "step": 16896 }, { "epoch": 55.4, "grad_norm": 5.247944355010986, "learning_rate": 8.740241494223911e-06, "loss": 0.7317, "step": 16897 }, { "epoch": 55.40327868852459, "grad_norm": 6.533492565155029, "learning_rate": 8.739188074066665e-06, "loss": 0.2543, "step": 16898 }, { "epoch": 55.40655737704918, "grad_norm": 5.225692272186279, "learning_rate": 8.738134668126387e-06, "loss": 0.599, "step": 16899 }, { "epoch": 55.40983606557377, "grad_norm": 65.51408386230469, "learning_rate": 8.737081276414953e-06, "loss": 0.4125, "step": 16900 }, { "epoch": 55.41311475409836, "grad_norm": 7.318562030792236, "learning_rate": 8.736027898944242e-06, "loss": 0.5018, "step": 16901 }, { "epoch": 55.41639344262295, "grad_norm": 7.97905969619751, "learning_rate": 8.734974535726129e-06, "loss": 0.4713, "step": 16902 }, { "epoch": 55.41967213114754, "grad_norm": 6.190436363220215, "learning_rate": 8.73392118677249e-06, "loss": 0.5541, "step": 16903 }, { "epoch": 55.42295081967213, "grad_norm": 5.552785873413086, "learning_rate": 8.73286785209521e-06, "loss": 0.415, "step": 16904 }, { "epoch": 55.42622950819672, "grad_norm": 7.126108646392822, "learning_rate": 8.731814531706162e-06, "loss": 0.563, "step": 16905 }, { "epoch": 55.429508196721315, "grad_norm": 7.675639629364014, "learning_rate": 8.730761225617222e-06, "loss": 0.6359, "step": 16906 }, { "epoch": 55.4327868852459, "grad_norm": 5.449158191680908, "learning_rate": 8.729707933840268e-06, "loss": 0.5494, "step": 16907 }, { "epoch": 55.43606557377049, "grad_norm": 6.946269989013672, "learning_rate": 8.72865465638718e-06, "loss": 0.5962, "step": 16908 }, { "epoch": 55.43934426229508, "grad_norm": 4.7730021476745605, "learning_rate": 8.727601393269832e-06, "loss": 0.6113, "step": 16909 }, { "epoch": 55.442622950819676, "grad_norm": 7.354958534240723, "learning_rate": 8.726548144500104e-06, "loss": 0.5183, "step": 16910 }, { "epoch": 55.445901639344264, "grad_norm": 7.6092209815979, "learning_rate": 8.725494910089866e-06, "loss": 0.3894, "step": 16911 }, { "epoch": 55.44918032786885, "grad_norm": 10.517852783203125, "learning_rate": 8.724441690050997e-06, "loss": 0.675, "step": 16912 }, { "epoch": 55.45245901639344, "grad_norm": 27.280132293701172, "learning_rate": 8.723388484395378e-06, "loss": 0.4143, "step": 16913 }, { "epoch": 55.455737704918036, "grad_norm": 5.1254987716674805, "learning_rate": 8.722335293134881e-06, "loss": 0.3641, "step": 16914 }, { "epoch": 55.459016393442624, "grad_norm": 5.5043768882751465, "learning_rate": 8.721282116281382e-06, "loss": 0.5891, "step": 16915 }, { "epoch": 55.46229508196721, "grad_norm": 6.659063816070557, "learning_rate": 8.720228953846753e-06, "loss": 0.9282, "step": 16916 }, { "epoch": 55.4655737704918, "grad_norm": 5.541275978088379, "learning_rate": 8.719175805842876e-06, "loss": 0.4755, "step": 16917 }, { "epoch": 55.4688524590164, "grad_norm": 5.681547164916992, "learning_rate": 8.718122672281623e-06, "loss": 0.4989, "step": 16918 }, { "epoch": 55.472131147540985, "grad_norm": 6.108654499053955, "learning_rate": 8.717069553174872e-06, "loss": 0.3382, "step": 16919 }, { "epoch": 55.47540983606557, "grad_norm": 6.989298343658447, "learning_rate": 8.71601644853449e-06, "loss": 0.4001, "step": 16920 }, { "epoch": 55.47868852459016, "grad_norm": 7.529048919677734, "learning_rate": 8.714963358372361e-06, "loss": 0.5022, "step": 16921 }, { "epoch": 55.48196721311476, "grad_norm": 7.079346656799316, "learning_rate": 8.713910282700359e-06, "loss": 0.5292, "step": 16922 }, { "epoch": 55.485245901639345, "grad_norm": 5.59089469909668, "learning_rate": 8.712857221530353e-06, "loss": 0.5562, "step": 16923 }, { "epoch": 55.488524590163934, "grad_norm": 8.684277534484863, "learning_rate": 8.711804174874217e-06, "loss": 0.6032, "step": 16924 }, { "epoch": 55.49180327868852, "grad_norm": 19.10722541809082, "learning_rate": 8.710751142743833e-06, "loss": 0.7418, "step": 16925 }, { "epoch": 55.49508196721312, "grad_norm": 13.575912475585938, "learning_rate": 8.70969812515107e-06, "loss": 0.5177, "step": 16926 }, { "epoch": 55.498360655737706, "grad_norm": 5.839859485626221, "learning_rate": 8.708645122107802e-06, "loss": 0.5645, "step": 16927 }, { "epoch": 55.501639344262294, "grad_norm": 8.571059226989746, "learning_rate": 8.707592133625903e-06, "loss": 0.3883, "step": 16928 }, { "epoch": 55.50491803278688, "grad_norm": 5.992301940917969, "learning_rate": 8.706539159717243e-06, "loss": 0.4413, "step": 16929 }, { "epoch": 55.50819672131148, "grad_norm": 22.134342193603516, "learning_rate": 8.7054862003937e-06, "loss": 0.7796, "step": 16930 }, { "epoch": 55.511475409836066, "grad_norm": 5.236535549163818, "learning_rate": 8.70443325566715e-06, "loss": 0.3582, "step": 16931 }, { "epoch": 55.514754098360655, "grad_norm": 6.089406490325928, "learning_rate": 8.703380325549458e-06, "loss": 0.6309, "step": 16932 }, { "epoch": 55.51803278688524, "grad_norm": 6.066342830657959, "learning_rate": 8.7023274100525e-06, "loss": 0.8164, "step": 16933 }, { "epoch": 55.52131147540984, "grad_norm": 6.714346408843994, "learning_rate": 8.701274509188154e-06, "loss": 0.5886, "step": 16934 }, { "epoch": 55.52459016393443, "grad_norm": 9.63365650177002, "learning_rate": 8.700221622968288e-06, "loss": 0.4369, "step": 16935 }, { "epoch": 55.527868852459015, "grad_norm": 6.42001485824585, "learning_rate": 8.699168751404771e-06, "loss": 0.5826, "step": 16936 }, { "epoch": 55.5311475409836, "grad_norm": 7.443531513214111, "learning_rate": 8.69811589450948e-06, "loss": 0.4624, "step": 16937 }, { "epoch": 55.5344262295082, "grad_norm": 5.768198013305664, "learning_rate": 8.697063052294288e-06, "loss": 0.3676, "step": 16938 }, { "epoch": 55.53770491803279, "grad_norm": 6.532167434692383, "learning_rate": 8.696010224771063e-06, "loss": 0.5061, "step": 16939 }, { "epoch": 55.540983606557376, "grad_norm": 5.7533392906188965, "learning_rate": 8.69495741195168e-06, "loss": 0.5681, "step": 16940 }, { "epoch": 55.544262295081964, "grad_norm": 6.388035297393799, "learning_rate": 8.69390461384801e-06, "loss": 0.5936, "step": 16941 }, { "epoch": 55.54754098360656, "grad_norm": 7.166860103607178, "learning_rate": 8.692851830471917e-06, "loss": 0.442, "step": 16942 }, { "epoch": 55.55081967213115, "grad_norm": 6.979635715484619, "learning_rate": 8.691799061835285e-06, "loss": 0.5262, "step": 16943 }, { "epoch": 55.554098360655736, "grad_norm": 5.470364570617676, "learning_rate": 8.690746307949977e-06, "loss": 0.3535, "step": 16944 }, { "epoch": 55.557377049180324, "grad_norm": 8.402368545532227, "learning_rate": 8.689693568827868e-06, "loss": 0.3462, "step": 16945 }, { "epoch": 55.56065573770492, "grad_norm": 5.955906391143799, "learning_rate": 8.688640844480821e-06, "loss": 0.5792, "step": 16946 }, { "epoch": 55.56393442622951, "grad_norm": 5.270143985748291, "learning_rate": 8.687588134920715e-06, "loss": 0.6804, "step": 16947 }, { "epoch": 55.5672131147541, "grad_norm": 7.3423943519592285, "learning_rate": 8.686535440159419e-06, "loss": 0.6422, "step": 16948 }, { "epoch": 55.570491803278685, "grad_norm": 5.974186420440674, "learning_rate": 8.685482760208801e-06, "loss": 0.3934, "step": 16949 }, { "epoch": 55.57377049180328, "grad_norm": 6.977441787719727, "learning_rate": 8.684430095080729e-06, "loss": 0.7391, "step": 16950 }, { "epoch": 55.57704918032787, "grad_norm": 5.59398078918457, "learning_rate": 8.683377444787078e-06, "loss": 0.4973, "step": 16951 }, { "epoch": 55.58032786885246, "grad_norm": 4.9390363693237305, "learning_rate": 8.682324809339716e-06, "loss": 0.6066, "step": 16952 }, { "epoch": 55.58360655737705, "grad_norm": 6.308946132659912, "learning_rate": 8.68127218875051e-06, "loss": 0.5813, "step": 16953 }, { "epoch": 55.58688524590164, "grad_norm": 6.24751615524292, "learning_rate": 8.680219583031333e-06, "loss": 0.7249, "step": 16954 }, { "epoch": 55.59016393442623, "grad_norm": 7.110569477081299, "learning_rate": 8.679166992194047e-06, "loss": 0.4381, "step": 16955 }, { "epoch": 55.59344262295082, "grad_norm": 5.748382568359375, "learning_rate": 8.678114416250531e-06, "loss": 0.4363, "step": 16956 }, { "epoch": 55.59672131147541, "grad_norm": 5.620516300201416, "learning_rate": 8.67706185521265e-06, "loss": 0.5078, "step": 16957 }, { "epoch": 55.6, "grad_norm": 7.456800937652588, "learning_rate": 8.676009309092273e-06, "loss": 0.4942, "step": 16958 }, { "epoch": 55.60327868852459, "grad_norm": 6.934826374053955, "learning_rate": 8.674956777901261e-06, "loss": 0.6035, "step": 16959 }, { "epoch": 55.60655737704918, "grad_norm": 7.371889114379883, "learning_rate": 8.673904261651494e-06, "loss": 0.6138, "step": 16960 }, { "epoch": 55.609836065573774, "grad_norm": 6.548951148986816, "learning_rate": 8.672851760354836e-06, "loss": 0.6161, "step": 16961 }, { "epoch": 55.61311475409836, "grad_norm": 4.20605993270874, "learning_rate": 8.671799274023152e-06, "loss": 0.7134, "step": 16962 }, { "epoch": 55.61639344262295, "grad_norm": 7.760376930236816, "learning_rate": 8.670746802668313e-06, "loss": 0.6001, "step": 16963 }, { "epoch": 55.61967213114754, "grad_norm": 25.678075790405273, "learning_rate": 8.669694346302186e-06, "loss": 0.4918, "step": 16964 }, { "epoch": 55.622950819672134, "grad_norm": 4.508646011352539, "learning_rate": 8.668641904936639e-06, "loss": 0.5411, "step": 16965 }, { "epoch": 55.62622950819672, "grad_norm": 6.979032039642334, "learning_rate": 8.667589478583539e-06, "loss": 0.538, "step": 16966 }, { "epoch": 55.62950819672131, "grad_norm": 5.175772666931152, "learning_rate": 8.666537067254753e-06, "loss": 0.7634, "step": 16967 }, { "epoch": 55.6327868852459, "grad_norm": 6.339950084686279, "learning_rate": 8.665484670962145e-06, "loss": 0.35, "step": 16968 }, { "epoch": 55.636065573770495, "grad_norm": 4.656966686248779, "learning_rate": 8.664432289717588e-06, "loss": 0.5968, "step": 16969 }, { "epoch": 55.63934426229508, "grad_norm": 5.438530445098877, "learning_rate": 8.663379923532945e-06, "loss": 0.341, "step": 16970 }, { "epoch": 55.64262295081967, "grad_norm": 5.043007850646973, "learning_rate": 8.662327572420084e-06, "loss": 0.6333, "step": 16971 }, { "epoch": 55.64590163934426, "grad_norm": 44.967281341552734, "learning_rate": 8.661275236390866e-06, "loss": 0.4354, "step": 16972 }, { "epoch": 55.649180327868855, "grad_norm": 12.022843360900879, "learning_rate": 8.660222915457166e-06, "loss": 0.616, "step": 16973 }, { "epoch": 55.65245901639344, "grad_norm": 20.599346160888672, "learning_rate": 8.659170609630845e-06, "loss": 0.349, "step": 16974 }, { "epoch": 55.65573770491803, "grad_norm": 8.026301383972168, "learning_rate": 8.65811831892377e-06, "loss": 0.5174, "step": 16975 }, { "epoch": 55.65901639344262, "grad_norm": 8.183859825134277, "learning_rate": 8.657066043347803e-06, "loss": 0.7005, "step": 16976 }, { "epoch": 55.662295081967216, "grad_norm": 6.482605934143066, "learning_rate": 8.656013782914817e-06, "loss": 0.4352, "step": 16977 }, { "epoch": 55.665573770491804, "grad_norm": 5.915404319763184, "learning_rate": 8.65496153763667e-06, "loss": 0.4463, "step": 16978 }, { "epoch": 55.66885245901639, "grad_norm": 6.0469489097595215, "learning_rate": 8.653909307525232e-06, "loss": 0.7361, "step": 16979 }, { "epoch": 55.67213114754098, "grad_norm": 5.691841125488281, "learning_rate": 8.652857092592367e-06, "loss": 0.4916, "step": 16980 }, { "epoch": 55.675409836065576, "grad_norm": 14.452570915222168, "learning_rate": 8.651804892849933e-06, "loss": 0.672, "step": 16981 }, { "epoch": 55.678688524590164, "grad_norm": 17.408586502075195, "learning_rate": 8.650752708309807e-06, "loss": 0.5474, "step": 16982 }, { "epoch": 55.68196721311475, "grad_norm": 8.115346908569336, "learning_rate": 8.649700538983845e-06, "loss": 0.5248, "step": 16983 }, { "epoch": 55.68524590163934, "grad_norm": 6.074370861053467, "learning_rate": 8.648648384883916e-06, "loss": 0.6264, "step": 16984 }, { "epoch": 55.68852459016394, "grad_norm": 6.463759422302246, "learning_rate": 8.647596246021874e-06, "loss": 0.8069, "step": 16985 }, { "epoch": 55.691803278688525, "grad_norm": 13.404629707336426, "learning_rate": 8.646544122409596e-06, "loss": 0.3214, "step": 16986 }, { "epoch": 55.69508196721311, "grad_norm": 11.257555961608887, "learning_rate": 8.64549201405894e-06, "loss": 0.4162, "step": 16987 }, { "epoch": 55.6983606557377, "grad_norm": 6.839756011962891, "learning_rate": 8.64443992098177e-06, "loss": 0.418, "step": 16988 }, { "epoch": 55.7016393442623, "grad_norm": 5.406571865081787, "learning_rate": 8.643387843189947e-06, "loss": 0.5222, "step": 16989 }, { "epoch": 55.704918032786885, "grad_norm": 6.8359832763671875, "learning_rate": 8.642335780695339e-06, "loss": 0.5823, "step": 16990 }, { "epoch": 55.708196721311474, "grad_norm": 6.143679141998291, "learning_rate": 8.641283733509804e-06, "loss": 0.5145, "step": 16991 }, { "epoch": 55.71147540983607, "grad_norm": 5.762940406799316, "learning_rate": 8.640231701645213e-06, "loss": 0.446, "step": 16992 }, { "epoch": 55.71475409836066, "grad_norm": 10.402270317077637, "learning_rate": 8.639179685113419e-06, "loss": 0.7117, "step": 16993 }, { "epoch": 55.718032786885246, "grad_norm": 5.298048496246338, "learning_rate": 8.638127683926287e-06, "loss": 0.7395, "step": 16994 }, { "epoch": 55.721311475409834, "grad_norm": 5.489259243011475, "learning_rate": 8.637075698095686e-06, "loss": 0.4931, "step": 16995 }, { "epoch": 55.72459016393443, "grad_norm": 6.283894062042236, "learning_rate": 8.636023727633472e-06, "loss": 0.4536, "step": 16996 }, { "epoch": 55.72786885245902, "grad_norm": 6.547752380371094, "learning_rate": 8.63497177255151e-06, "loss": 0.48, "step": 16997 }, { "epoch": 55.731147540983606, "grad_norm": 5.634087085723877, "learning_rate": 8.633919832861655e-06, "loss": 0.3979, "step": 16998 }, { "epoch": 55.734426229508195, "grad_norm": 6.130757808685303, "learning_rate": 8.632867908575779e-06, "loss": 0.7873, "step": 16999 }, { "epoch": 55.73770491803279, "grad_norm": 5.943072319030762, "learning_rate": 8.631815999705739e-06, "loss": 0.5684, "step": 17000 }, { "epoch": 55.74098360655738, "grad_norm": 5.5789666175842285, "learning_rate": 8.630764106263397e-06, "loss": 0.4594, "step": 17001 }, { "epoch": 55.74426229508197, "grad_norm": 5.342076778411865, "learning_rate": 8.629712228260613e-06, "loss": 0.5069, "step": 17002 }, { "epoch": 55.747540983606555, "grad_norm": 8.576746940612793, "learning_rate": 8.628660365709243e-06, "loss": 0.3742, "step": 17003 }, { "epoch": 55.75081967213115, "grad_norm": 5.736108303070068, "learning_rate": 8.627608518621159e-06, "loss": 0.3145, "step": 17004 }, { "epoch": 55.75409836065574, "grad_norm": 6.591038227081299, "learning_rate": 8.626556687008214e-06, "loss": 0.4871, "step": 17005 }, { "epoch": 55.75737704918033, "grad_norm": 6.127923965454102, "learning_rate": 8.625504870882271e-06, "loss": 0.7016, "step": 17006 }, { "epoch": 55.760655737704916, "grad_norm": 11.111503601074219, "learning_rate": 8.624453070255186e-06, "loss": 0.4169, "step": 17007 }, { "epoch": 55.76393442622951, "grad_norm": 42.3442497253418, "learning_rate": 8.623401285138828e-06, "loss": 0.513, "step": 17008 }, { "epoch": 55.7672131147541, "grad_norm": 6.924853801727295, "learning_rate": 8.622349515545051e-06, "loss": 0.7351, "step": 17009 }, { "epoch": 55.77049180327869, "grad_norm": 5.785147190093994, "learning_rate": 8.621297761485715e-06, "loss": 0.4773, "step": 17010 }, { "epoch": 55.773770491803276, "grad_norm": 6.64357852935791, "learning_rate": 8.620246022972675e-06, "loss": 0.7436, "step": 17011 }, { "epoch": 55.77704918032787, "grad_norm": 5.900843143463135, "learning_rate": 8.619194300017802e-06, "loss": 0.6591, "step": 17012 }, { "epoch": 55.78032786885246, "grad_norm": 8.916460990905762, "learning_rate": 8.618142592632949e-06, "loss": 0.5992, "step": 17013 }, { "epoch": 55.78360655737705, "grad_norm": 7.423300266265869, "learning_rate": 8.617090900829972e-06, "loss": 0.562, "step": 17014 }, { "epoch": 55.78688524590164, "grad_norm": 6.821228504180908, "learning_rate": 8.616039224620736e-06, "loss": 0.563, "step": 17015 }, { "epoch": 55.79016393442623, "grad_norm": 6.378488540649414, "learning_rate": 8.614987564017094e-06, "loss": 0.5822, "step": 17016 }, { "epoch": 55.79344262295082, "grad_norm": 5.19882345199585, "learning_rate": 8.613935919030908e-06, "loss": 0.793, "step": 17017 }, { "epoch": 55.79672131147541, "grad_norm": 6.267538070678711, "learning_rate": 8.612884289674034e-06, "loss": 0.5367, "step": 17018 }, { "epoch": 55.8, "grad_norm": 10.115389823913574, "learning_rate": 8.611832675958335e-06, "loss": 0.6877, "step": 17019 }, { "epoch": 55.80327868852459, "grad_norm": 6.669264316558838, "learning_rate": 8.610781077895664e-06, "loss": 0.5078, "step": 17020 }, { "epoch": 55.80655737704918, "grad_norm": 8.544739723205566, "learning_rate": 8.60972949549788e-06, "loss": 0.3936, "step": 17021 }, { "epoch": 55.80983606557377, "grad_norm": 5.248128414154053, "learning_rate": 8.608677928776846e-06, "loss": 0.6742, "step": 17022 }, { "epoch": 55.81311475409836, "grad_norm": 9.263054847717285, "learning_rate": 8.60762637774441e-06, "loss": 0.4526, "step": 17023 }, { "epoch": 55.81639344262295, "grad_norm": 7.474516868591309, "learning_rate": 8.606574842412434e-06, "loss": 0.3741, "step": 17024 }, { "epoch": 55.81967213114754, "grad_norm": 8.73281192779541, "learning_rate": 8.60552332279278e-06, "loss": 0.4681, "step": 17025 }, { "epoch": 55.82295081967213, "grad_norm": 6.430505275726318, "learning_rate": 8.604471818897297e-06, "loss": 0.4012, "step": 17026 }, { "epoch": 55.82622950819672, "grad_norm": 6.589148044586182, "learning_rate": 8.603420330737849e-06, "loss": 0.7057, "step": 17027 }, { "epoch": 55.829508196721314, "grad_norm": 5.51871919631958, "learning_rate": 8.602368858326287e-06, "loss": 0.6357, "step": 17028 }, { "epoch": 55.8327868852459, "grad_norm": 9.950784683227539, "learning_rate": 8.601317401674465e-06, "loss": 0.5517, "step": 17029 }, { "epoch": 55.83606557377049, "grad_norm": 6.026828289031982, "learning_rate": 8.600265960794247e-06, "loss": 0.3, "step": 17030 }, { "epoch": 55.83934426229508, "grad_norm": 10.666349411010742, "learning_rate": 8.599214535697487e-06, "loss": 0.3498, "step": 17031 }, { "epoch": 55.842622950819674, "grad_norm": 7.138983249664307, "learning_rate": 8.598163126396039e-06, "loss": 0.7001, "step": 17032 }, { "epoch": 55.84590163934426, "grad_norm": 6.415463447570801, "learning_rate": 8.597111732901756e-06, "loss": 0.5515, "step": 17033 }, { "epoch": 55.84918032786885, "grad_norm": 6.5042290687561035, "learning_rate": 8.5960603552265e-06, "loss": 0.5767, "step": 17034 }, { "epoch": 55.85245901639344, "grad_norm": 6.958820343017578, "learning_rate": 8.595008993382124e-06, "loss": 0.6174, "step": 17035 }, { "epoch": 55.855737704918035, "grad_norm": 6.926235198974609, "learning_rate": 8.593957647380482e-06, "loss": 0.4679, "step": 17036 }, { "epoch": 55.85901639344262, "grad_norm": 5.398918151855469, "learning_rate": 8.592906317233426e-06, "loss": 0.6083, "step": 17037 }, { "epoch": 55.86229508196721, "grad_norm": 5.9781670570373535, "learning_rate": 8.591855002952816e-06, "loss": 0.5246, "step": 17038 }, { "epoch": 55.86557377049181, "grad_norm": 5.649588108062744, "learning_rate": 8.590803704550507e-06, "loss": 0.6648, "step": 17039 }, { "epoch": 55.868852459016395, "grad_norm": 8.126808166503906, "learning_rate": 8.589752422038351e-06, "loss": 0.4885, "step": 17040 }, { "epoch": 55.87213114754098, "grad_norm": 7.92280912399292, "learning_rate": 8.5887011554282e-06, "loss": 0.6619, "step": 17041 }, { "epoch": 55.87540983606557, "grad_norm": 9.073689460754395, "learning_rate": 8.587649904731911e-06, "loss": 0.5256, "step": 17042 }, { "epoch": 55.87868852459017, "grad_norm": 5.281968593597412, "learning_rate": 8.586598669961341e-06, "loss": 0.6648, "step": 17043 }, { "epoch": 55.881967213114756, "grad_norm": 5.509151458740234, "learning_rate": 8.585547451128338e-06, "loss": 0.4349, "step": 17044 }, { "epoch": 55.885245901639344, "grad_norm": 6.130960941314697, "learning_rate": 8.58449624824476e-06, "loss": 0.3615, "step": 17045 }, { "epoch": 55.88852459016393, "grad_norm": 8.084589958190918, "learning_rate": 8.583445061322458e-06, "loss": 0.5722, "step": 17046 }, { "epoch": 55.89180327868853, "grad_norm": 6.985802173614502, "learning_rate": 8.582393890373282e-06, "loss": 0.5341, "step": 17047 }, { "epoch": 55.895081967213116, "grad_norm": 5.448307514190674, "learning_rate": 8.581342735409096e-06, "loss": 0.4385, "step": 17048 }, { "epoch": 55.898360655737704, "grad_norm": 6.280616760253906, "learning_rate": 8.580291596441741e-06, "loss": 0.4919, "step": 17049 }, { "epoch": 55.90163934426229, "grad_norm": 5.2032599449157715, "learning_rate": 8.579240473483073e-06, "loss": 0.5661, "step": 17050 }, { "epoch": 55.90491803278689, "grad_norm": 5.416530609130859, "learning_rate": 8.57818936654495e-06, "loss": 0.6522, "step": 17051 }, { "epoch": 55.90819672131148, "grad_norm": 8.927088737487793, "learning_rate": 8.577138275639219e-06, "loss": 0.5391, "step": 17052 }, { "epoch": 55.911475409836065, "grad_norm": 7.259984493255615, "learning_rate": 8.576087200777732e-06, "loss": 0.5073, "step": 17053 }, { "epoch": 55.91475409836065, "grad_norm": 4.755051136016846, "learning_rate": 8.575036141972346e-06, "loss": 0.8103, "step": 17054 }, { "epoch": 55.91803278688525, "grad_norm": 5.804258346557617, "learning_rate": 8.573985099234902e-06, "loss": 0.665, "step": 17055 }, { "epoch": 55.92131147540984, "grad_norm": 8.51426887512207, "learning_rate": 8.572934072577264e-06, "loss": 0.503, "step": 17056 }, { "epoch": 55.924590163934425, "grad_norm": 7.149511814117432, "learning_rate": 8.571883062011279e-06, "loss": 0.3788, "step": 17057 }, { "epoch": 55.927868852459014, "grad_norm": 5.145199298858643, "learning_rate": 8.570832067548796e-06, "loss": 0.7186, "step": 17058 }, { "epoch": 55.93114754098361, "grad_norm": 5.346372127532959, "learning_rate": 8.569781089201663e-06, "loss": 0.4692, "step": 17059 }, { "epoch": 55.9344262295082, "grad_norm": 6.048830986022949, "learning_rate": 8.56873012698174e-06, "loss": 0.6172, "step": 17060 }, { "epoch": 55.937704918032786, "grad_norm": 6.293796062469482, "learning_rate": 8.567679180900872e-06, "loss": 0.7433, "step": 17061 }, { "epoch": 55.940983606557374, "grad_norm": 7.55079984664917, "learning_rate": 8.566628250970912e-06, "loss": 0.4014, "step": 17062 }, { "epoch": 55.94426229508197, "grad_norm": 6.385034084320068, "learning_rate": 8.565577337203705e-06, "loss": 0.3727, "step": 17063 }, { "epoch": 55.94754098360656, "grad_norm": 6.2313408851623535, "learning_rate": 8.564526439611108e-06, "loss": 0.5033, "step": 17064 }, { "epoch": 55.950819672131146, "grad_norm": 5.771002292633057, "learning_rate": 8.563475558204968e-06, "loss": 0.6573, "step": 17065 }, { "epoch": 55.954098360655735, "grad_norm": 6.615884304046631, "learning_rate": 8.562424692997136e-06, "loss": 0.3899, "step": 17066 }, { "epoch": 55.95737704918033, "grad_norm": 6.408566474914551, "learning_rate": 8.561373843999457e-06, "loss": 0.5644, "step": 17067 }, { "epoch": 55.96065573770492, "grad_norm": 4.528681755065918, "learning_rate": 8.560323011223784e-06, "loss": 0.9065, "step": 17068 }, { "epoch": 55.96393442622951, "grad_norm": 9.315053939819336, "learning_rate": 8.559272194681967e-06, "loss": 0.3876, "step": 17069 }, { "epoch": 55.967213114754095, "grad_norm": 4.613624095916748, "learning_rate": 8.558221394385853e-06, "loss": 0.8771, "step": 17070 }, { "epoch": 55.97049180327869, "grad_norm": 7.062511920928955, "learning_rate": 8.557170610347293e-06, "loss": 0.6276, "step": 17071 }, { "epoch": 55.97377049180328, "grad_norm": 7.63931941986084, "learning_rate": 8.556119842578133e-06, "loss": 0.7363, "step": 17072 }, { "epoch": 55.97704918032787, "grad_norm": 5.757155418395996, "learning_rate": 8.555069091090222e-06, "loss": 0.6106, "step": 17073 }, { "epoch": 55.980327868852456, "grad_norm": 5.8663458824157715, "learning_rate": 8.554018355895413e-06, "loss": 0.4548, "step": 17074 }, { "epoch": 55.98360655737705, "grad_norm": 4.892886638641357, "learning_rate": 8.55296763700555e-06, "loss": 0.7459, "step": 17075 }, { "epoch": 55.98688524590164, "grad_norm": 7.629070281982422, "learning_rate": 8.551916934432479e-06, "loss": 0.515, "step": 17076 }, { "epoch": 55.99016393442623, "grad_norm": 6.340507984161377, "learning_rate": 8.550866248188052e-06, "loss": 0.7308, "step": 17077 }, { "epoch": 55.993442622950816, "grad_norm": 5.265913486480713, "learning_rate": 8.549815578284115e-06, "loss": 0.519, "step": 17078 }, { "epoch": 55.99672131147541, "grad_norm": 11.583414077758789, "learning_rate": 8.548764924732516e-06, "loss": 0.8389, "step": 17079 }, { "epoch": 56.0, "grad_norm": 6.169971466064453, "learning_rate": 8.5477142875451e-06, "loss": 0.7295, "step": 17080 }, { "epoch": 56.00327868852459, "grad_norm": 6.116215229034424, "learning_rate": 8.546663666733712e-06, "loss": 0.5364, "step": 17081 }, { "epoch": 56.006557377049184, "grad_norm": 5.4036688804626465, "learning_rate": 8.545613062310207e-06, "loss": 0.4331, "step": 17082 }, { "epoch": 56.00983606557377, "grad_norm": 4.978102684020996, "learning_rate": 8.544562474286426e-06, "loss": 0.4899, "step": 17083 }, { "epoch": 56.01311475409836, "grad_norm": 7.145491600036621, "learning_rate": 8.543511902674217e-06, "loss": 0.6832, "step": 17084 }, { "epoch": 56.01639344262295, "grad_norm": 7.1461944580078125, "learning_rate": 8.54246134748542e-06, "loss": 0.5774, "step": 17085 }, { "epoch": 56.019672131147544, "grad_norm": 5.422879695892334, "learning_rate": 8.541410808731894e-06, "loss": 0.5475, "step": 17086 }, { "epoch": 56.02295081967213, "grad_norm": 6.098763942718506, "learning_rate": 8.540360286425476e-06, "loss": 0.3965, "step": 17087 }, { "epoch": 56.02622950819672, "grad_norm": 5.400501728057861, "learning_rate": 8.539309780578013e-06, "loss": 0.5993, "step": 17088 }, { "epoch": 56.02950819672131, "grad_norm": 4.883519649505615, "learning_rate": 8.538259291201347e-06, "loss": 0.4222, "step": 17089 }, { "epoch": 56.032786885245905, "grad_norm": 5.159879207611084, "learning_rate": 8.537208818307331e-06, "loss": 0.2368, "step": 17090 }, { "epoch": 56.03606557377049, "grad_norm": 6.338431358337402, "learning_rate": 8.536158361907808e-06, "loss": 0.7562, "step": 17091 }, { "epoch": 56.03934426229508, "grad_norm": 5.673121929168701, "learning_rate": 8.535107922014621e-06, "loss": 0.4776, "step": 17092 }, { "epoch": 56.04262295081967, "grad_norm": 5.749150276184082, "learning_rate": 8.534057498639613e-06, "loss": 0.5829, "step": 17093 }, { "epoch": 56.045901639344265, "grad_norm": 8.490425109863281, "learning_rate": 8.533007091794631e-06, "loss": 0.5092, "step": 17094 }, { "epoch": 56.049180327868854, "grad_norm": 5.065845489501953, "learning_rate": 8.531956701491522e-06, "loss": 0.5637, "step": 17095 }, { "epoch": 56.05245901639344, "grad_norm": 6.2667341232299805, "learning_rate": 8.530906327742123e-06, "loss": 0.3901, "step": 17096 }, { "epoch": 56.05573770491803, "grad_norm": 5.449757099151611, "learning_rate": 8.529855970558287e-06, "loss": 0.6483, "step": 17097 }, { "epoch": 56.059016393442626, "grad_norm": 10.850994110107422, "learning_rate": 8.528805629951851e-06, "loss": 0.5624, "step": 17098 }, { "epoch": 56.062295081967214, "grad_norm": 5.986231327056885, "learning_rate": 8.527755305934663e-06, "loss": 0.533, "step": 17099 }, { "epoch": 56.0655737704918, "grad_norm": 9.619776725769043, "learning_rate": 8.526704998518563e-06, "loss": 0.5686, "step": 17100 }, { "epoch": 56.06885245901639, "grad_norm": 5.113493919372559, "learning_rate": 8.525654707715397e-06, "loss": 0.4266, "step": 17101 }, { "epoch": 56.072131147540986, "grad_norm": 5.646784782409668, "learning_rate": 8.524604433537006e-06, "loss": 0.3694, "step": 17102 }, { "epoch": 56.075409836065575, "grad_norm": 5.43759822845459, "learning_rate": 8.523554175995234e-06, "loss": 0.4072, "step": 17103 }, { "epoch": 56.07868852459016, "grad_norm": 6.567662239074707, "learning_rate": 8.522503935101926e-06, "loss": 0.5942, "step": 17104 }, { "epoch": 56.08196721311475, "grad_norm": 5.281989097595215, "learning_rate": 8.52145371086892e-06, "loss": 0.3544, "step": 17105 }, { "epoch": 56.08524590163935, "grad_norm": 7.383183002471924, "learning_rate": 8.520403503308065e-06, "loss": 0.5426, "step": 17106 }, { "epoch": 56.088524590163935, "grad_norm": 6.685805797576904, "learning_rate": 8.51935331243119e-06, "loss": 0.5789, "step": 17107 }, { "epoch": 56.09180327868852, "grad_norm": 4.9146199226379395, "learning_rate": 8.518303138250154e-06, "loss": 0.4594, "step": 17108 }, { "epoch": 56.09508196721311, "grad_norm": 4.761651039123535, "learning_rate": 8.51725298077679e-06, "loss": 0.8321, "step": 17109 }, { "epoch": 56.09836065573771, "grad_norm": 12.110909461975098, "learning_rate": 8.516202840022939e-06, "loss": 0.4721, "step": 17110 }, { "epoch": 56.101639344262296, "grad_norm": 6.966042518615723, "learning_rate": 8.51515271600044e-06, "loss": 0.4319, "step": 17111 }, { "epoch": 56.104918032786884, "grad_norm": 5.4354705810546875, "learning_rate": 8.514102608721141e-06, "loss": 0.6897, "step": 17112 }, { "epoch": 56.10819672131147, "grad_norm": 5.448331832885742, "learning_rate": 8.513052518196883e-06, "loss": 0.5435, "step": 17113 }, { "epoch": 56.11147540983607, "grad_norm": 6.730520248413086, "learning_rate": 8.512002444439502e-06, "loss": 0.4355, "step": 17114 }, { "epoch": 56.114754098360656, "grad_norm": 5.284761428833008, "learning_rate": 8.510952387460836e-06, "loss": 0.5995, "step": 17115 }, { "epoch": 56.118032786885244, "grad_norm": 8.139838218688965, "learning_rate": 8.509902347272734e-06, "loss": 0.4956, "step": 17116 }, { "epoch": 56.12131147540983, "grad_norm": 5.949148654937744, "learning_rate": 8.508852323887033e-06, "loss": 0.608, "step": 17117 }, { "epoch": 56.12459016393443, "grad_norm": 8.844958305358887, "learning_rate": 8.507802317315573e-06, "loss": 0.5257, "step": 17118 }, { "epoch": 56.12786885245902, "grad_norm": 5.943018913269043, "learning_rate": 8.506752327570194e-06, "loss": 0.5915, "step": 17119 }, { "epoch": 56.131147540983605, "grad_norm": 6.174524784088135, "learning_rate": 8.50570235466273e-06, "loss": 0.4878, "step": 17120 }, { "epoch": 56.13442622950819, "grad_norm": 8.30931568145752, "learning_rate": 8.504652398605028e-06, "loss": 0.5333, "step": 17121 }, { "epoch": 56.13770491803279, "grad_norm": 6.7593584060668945, "learning_rate": 8.503602459408929e-06, "loss": 0.2996, "step": 17122 }, { "epoch": 56.14098360655738, "grad_norm": 5.815489768981934, "learning_rate": 8.502552537086262e-06, "loss": 0.4815, "step": 17123 }, { "epoch": 56.144262295081965, "grad_norm": 5.638123512268066, "learning_rate": 8.501502631648874e-06, "loss": 0.7755, "step": 17124 }, { "epoch": 56.14754098360656, "grad_norm": 5.758248329162598, "learning_rate": 8.500452743108604e-06, "loss": 0.4359, "step": 17125 }, { "epoch": 56.15081967213115, "grad_norm": 8.306086540222168, "learning_rate": 8.499402871477286e-06, "loss": 0.4769, "step": 17126 }, { "epoch": 56.15409836065574, "grad_norm": 6.781825065612793, "learning_rate": 8.498353016766763e-06, "loss": 0.514, "step": 17127 }, { "epoch": 56.157377049180326, "grad_norm": 4.711350440979004, "learning_rate": 8.497303178988869e-06, "loss": 0.5816, "step": 17128 }, { "epoch": 56.16065573770492, "grad_norm": 8.392274856567383, "learning_rate": 8.496253358155444e-06, "loss": 0.4197, "step": 17129 }, { "epoch": 56.16393442622951, "grad_norm": 6.365407943725586, "learning_rate": 8.495203554278328e-06, "loss": 0.4687, "step": 17130 }, { "epoch": 56.1672131147541, "grad_norm": 9.159866333007812, "learning_rate": 8.494153767369357e-06, "loss": 0.2795, "step": 17131 }, { "epoch": 56.170491803278686, "grad_norm": 8.695056915283203, "learning_rate": 8.493103997440367e-06, "loss": 0.6009, "step": 17132 }, { "epoch": 56.17377049180328, "grad_norm": 5.1017584800720215, "learning_rate": 8.492054244503193e-06, "loss": 0.3214, "step": 17133 }, { "epoch": 56.17704918032787, "grad_norm": 4.89459228515625, "learning_rate": 8.491004508569679e-06, "loss": 0.4688, "step": 17134 }, { "epoch": 56.18032786885246, "grad_norm": 4.696197509765625, "learning_rate": 8.489954789651658e-06, "loss": 0.4453, "step": 17135 }, { "epoch": 56.18360655737705, "grad_norm": 5.17916202545166, "learning_rate": 8.488905087760966e-06, "loss": 0.5958, "step": 17136 }, { "epoch": 56.18688524590164, "grad_norm": 5.43186092376709, "learning_rate": 8.487855402909438e-06, "loss": 0.5191, "step": 17137 }, { "epoch": 56.19016393442623, "grad_norm": 5.748224258422852, "learning_rate": 8.486805735108916e-06, "loss": 0.5923, "step": 17138 }, { "epoch": 56.19344262295082, "grad_norm": 7.184156894683838, "learning_rate": 8.485756084371233e-06, "loss": 0.6644, "step": 17139 }, { "epoch": 56.19672131147541, "grad_norm": 6.646535873413086, "learning_rate": 8.484706450708223e-06, "loss": 0.6328, "step": 17140 }, { "epoch": 56.2, "grad_norm": 8.436978340148926, "learning_rate": 8.48365683413172e-06, "loss": 0.3409, "step": 17141 }, { "epoch": 56.20327868852459, "grad_norm": 5.818031311035156, "learning_rate": 8.482607234653568e-06, "loss": 0.4161, "step": 17142 }, { "epoch": 56.20655737704918, "grad_norm": 5.351778030395508, "learning_rate": 8.481557652285596e-06, "loss": 0.5403, "step": 17143 }, { "epoch": 56.20983606557377, "grad_norm": 6.025547027587891, "learning_rate": 8.48050808703964e-06, "loss": 0.4189, "step": 17144 }, { "epoch": 56.21311475409836, "grad_norm": 5.487768650054932, "learning_rate": 8.479458538927536e-06, "loss": 0.3577, "step": 17145 }, { "epoch": 56.21639344262295, "grad_norm": 5.695289134979248, "learning_rate": 8.478409007961113e-06, "loss": 0.4263, "step": 17146 }, { "epoch": 56.21967213114754, "grad_norm": 8.053215026855469, "learning_rate": 8.477359494152215e-06, "loss": 0.5484, "step": 17147 }, { "epoch": 56.22295081967213, "grad_norm": 6.443281650543213, "learning_rate": 8.476309997512672e-06, "loss": 0.5761, "step": 17148 }, { "epoch": 56.226229508196724, "grad_norm": 6.471095085144043, "learning_rate": 8.475260518054316e-06, "loss": 0.403, "step": 17149 }, { "epoch": 56.22950819672131, "grad_norm": 5.667386054992676, "learning_rate": 8.474211055788984e-06, "loss": 0.493, "step": 17150 }, { "epoch": 56.2327868852459, "grad_norm": 6.110734939575195, "learning_rate": 8.47316161072851e-06, "loss": 0.5837, "step": 17151 }, { "epoch": 56.23606557377049, "grad_norm": 7.394548416137695, "learning_rate": 8.472112182884724e-06, "loss": 0.631, "step": 17152 }, { "epoch": 56.239344262295084, "grad_norm": 6.117658615112305, "learning_rate": 8.471062772269463e-06, "loss": 0.5554, "step": 17153 }, { "epoch": 56.24262295081967, "grad_norm": 5.221480846405029, "learning_rate": 8.470013378894559e-06, "loss": 0.3629, "step": 17154 }, { "epoch": 56.24590163934426, "grad_norm": 21.354127883911133, "learning_rate": 8.468964002771842e-06, "loss": 0.6805, "step": 17155 }, { "epoch": 56.24918032786885, "grad_norm": 6.185773849487305, "learning_rate": 8.467914643913153e-06, "loss": 0.5929, "step": 17156 }, { "epoch": 56.252459016393445, "grad_norm": 5.790744304656982, "learning_rate": 8.466865302330317e-06, "loss": 0.7539, "step": 17157 }, { "epoch": 56.25573770491803, "grad_norm": 5.468581676483154, "learning_rate": 8.46581597803517e-06, "loss": 0.4833, "step": 17158 }, { "epoch": 56.25901639344262, "grad_norm": 6.108795642852783, "learning_rate": 8.464766671039538e-06, "loss": 0.8007, "step": 17159 }, { "epoch": 56.26229508196721, "grad_norm": 5.775638103485107, "learning_rate": 8.463717381355263e-06, "loss": 0.582, "step": 17160 }, { "epoch": 56.265573770491805, "grad_norm": 7.210494518280029, "learning_rate": 8.46266810899417e-06, "loss": 0.3219, "step": 17161 }, { "epoch": 56.268852459016394, "grad_norm": 7.161315441131592, "learning_rate": 8.461618853968095e-06, "loss": 0.5795, "step": 17162 }, { "epoch": 56.27213114754098, "grad_norm": 6.370486259460449, "learning_rate": 8.460569616288862e-06, "loss": 0.5911, "step": 17163 }, { "epoch": 56.27540983606557, "grad_norm": 8.21976375579834, "learning_rate": 8.459520395968312e-06, "loss": 0.4623, "step": 17164 }, { "epoch": 56.278688524590166, "grad_norm": 10.628722190856934, "learning_rate": 8.45847119301827e-06, "loss": 0.4671, "step": 17165 }, { "epoch": 56.281967213114754, "grad_norm": 6.293430328369141, "learning_rate": 8.457422007450568e-06, "loss": 0.5047, "step": 17166 }, { "epoch": 56.28524590163934, "grad_norm": 5.799104690551758, "learning_rate": 8.456372839277033e-06, "loss": 0.3214, "step": 17167 }, { "epoch": 56.28852459016394, "grad_norm": 7.378109931945801, "learning_rate": 8.455323688509504e-06, "loss": 0.6339, "step": 17168 }, { "epoch": 56.291803278688526, "grad_norm": 5.948485374450684, "learning_rate": 8.454274555159805e-06, "loss": 0.5875, "step": 17169 }, { "epoch": 56.295081967213115, "grad_norm": 9.231645584106445, "learning_rate": 8.453225439239769e-06, "loss": 0.3631, "step": 17170 }, { "epoch": 56.2983606557377, "grad_norm": 5.789882659912109, "learning_rate": 8.452176340761224e-06, "loss": 0.3906, "step": 17171 }, { "epoch": 56.3016393442623, "grad_norm": 6.192158222198486, "learning_rate": 8.451127259735996e-06, "loss": 0.7001, "step": 17172 }, { "epoch": 56.30491803278689, "grad_norm": 6.482852458953857, "learning_rate": 8.450078196175921e-06, "loss": 0.4724, "step": 17173 }, { "epoch": 56.308196721311475, "grad_norm": 5.017152786254883, "learning_rate": 8.449029150092828e-06, "loss": 0.3435, "step": 17174 }, { "epoch": 56.31147540983606, "grad_norm": 5.108558177947998, "learning_rate": 8.447980121498541e-06, "loss": 0.6086, "step": 17175 }, { "epoch": 56.31475409836066, "grad_norm": 5.37526273727417, "learning_rate": 8.446931110404892e-06, "loss": 0.3812, "step": 17176 }, { "epoch": 56.31803278688525, "grad_norm": 4.797333717346191, "learning_rate": 8.445882116823711e-06, "loss": 0.5221, "step": 17177 }, { "epoch": 56.321311475409836, "grad_norm": 6.189193248748779, "learning_rate": 8.444833140766824e-06, "loss": 0.3139, "step": 17178 }, { "epoch": 56.324590163934424, "grad_norm": 5.121956825256348, "learning_rate": 8.443784182246062e-06, "loss": 0.3415, "step": 17179 }, { "epoch": 56.32786885245902, "grad_norm": 7.628768444061279, "learning_rate": 8.44273524127325e-06, "loss": 0.5632, "step": 17180 }, { "epoch": 56.33114754098361, "grad_norm": 6.00595235824585, "learning_rate": 8.441686317860219e-06, "loss": 0.4632, "step": 17181 }, { "epoch": 56.334426229508196, "grad_norm": 8.892910957336426, "learning_rate": 8.440637412018792e-06, "loss": 0.8858, "step": 17182 }, { "epoch": 56.337704918032784, "grad_norm": 5.9124603271484375, "learning_rate": 8.439588523760802e-06, "loss": 0.3638, "step": 17183 }, { "epoch": 56.34098360655738, "grad_norm": 6.306292533874512, "learning_rate": 8.438539653098073e-06, "loss": 0.6303, "step": 17184 }, { "epoch": 56.34426229508197, "grad_norm": 5.565991401672363, "learning_rate": 8.43749080004243e-06, "loss": 0.7443, "step": 17185 }, { "epoch": 56.34754098360656, "grad_norm": 9.048876762390137, "learning_rate": 8.436441964605708e-06, "loss": 0.4807, "step": 17186 }, { "epoch": 56.350819672131145, "grad_norm": 6.405335903167725, "learning_rate": 8.435393146799727e-06, "loss": 0.5381, "step": 17187 }, { "epoch": 56.35409836065574, "grad_norm": 7.962364196777344, "learning_rate": 8.434344346636314e-06, "loss": 0.4523, "step": 17188 }, { "epoch": 56.35737704918033, "grad_norm": 6.99259090423584, "learning_rate": 8.433295564127294e-06, "loss": 0.5143, "step": 17189 }, { "epoch": 56.36065573770492, "grad_norm": 5.954000473022461, "learning_rate": 8.432246799284498e-06, "loss": 0.3795, "step": 17190 }, { "epoch": 56.363934426229505, "grad_norm": 6.085325241088867, "learning_rate": 8.431198052119753e-06, "loss": 0.4176, "step": 17191 }, { "epoch": 56.3672131147541, "grad_norm": 7.228283882141113, "learning_rate": 8.430149322644878e-06, "loss": 0.6386, "step": 17192 }, { "epoch": 56.37049180327869, "grad_norm": 7.388492107391357, "learning_rate": 8.429100610871698e-06, "loss": 0.361, "step": 17193 }, { "epoch": 56.37377049180328, "grad_norm": 9.504697799682617, "learning_rate": 8.428051916812046e-06, "loss": 0.4765, "step": 17194 }, { "epoch": 56.377049180327866, "grad_norm": 5.58759069442749, "learning_rate": 8.427003240477743e-06, "loss": 0.4074, "step": 17195 }, { "epoch": 56.38032786885246, "grad_norm": 7.882652282714844, "learning_rate": 8.425954581880614e-06, "loss": 0.4314, "step": 17196 }, { "epoch": 56.38360655737705, "grad_norm": 5.451951503753662, "learning_rate": 8.424905941032484e-06, "loss": 0.5086, "step": 17197 }, { "epoch": 56.38688524590164, "grad_norm": 7.145381927490234, "learning_rate": 8.423857317945174e-06, "loss": 0.3898, "step": 17198 }, { "epoch": 56.390163934426226, "grad_norm": 5.9539642333984375, "learning_rate": 8.422808712630513e-06, "loss": 0.715, "step": 17199 }, { "epoch": 56.39344262295082, "grad_norm": 8.466346740722656, "learning_rate": 8.421760125100327e-06, "loss": 0.4093, "step": 17200 }, { "epoch": 56.39672131147541, "grad_norm": 5.516547679901123, "learning_rate": 8.420711555366434e-06, "loss": 0.6052, "step": 17201 }, { "epoch": 56.4, "grad_norm": 5.309063911437988, "learning_rate": 8.419663003440657e-06, "loss": 0.4779, "step": 17202 }, { "epoch": 56.40327868852459, "grad_norm": 6.123836994171143, "learning_rate": 8.418614469334826e-06, "loss": 0.4857, "step": 17203 }, { "epoch": 56.40655737704918, "grad_norm": 5.426207542419434, "learning_rate": 8.417565953060762e-06, "loss": 0.3944, "step": 17204 }, { "epoch": 56.40983606557377, "grad_norm": 5.842979431152344, "learning_rate": 8.416517454630287e-06, "loss": 0.5697, "step": 17205 }, { "epoch": 56.41311475409836, "grad_norm": 5.898947715759277, "learning_rate": 8.415468974055221e-06, "loss": 0.5155, "step": 17206 }, { "epoch": 56.41639344262295, "grad_norm": 8.243270874023438, "learning_rate": 8.414420511347393e-06, "loss": 0.5895, "step": 17207 }, { "epoch": 56.41967213114754, "grad_norm": 12.444408416748047, "learning_rate": 8.41337206651862e-06, "loss": 0.6296, "step": 17208 }, { "epoch": 56.42295081967213, "grad_norm": 5.364538669586182, "learning_rate": 8.41232363958073e-06, "loss": 0.6066, "step": 17209 }, { "epoch": 56.42622950819672, "grad_norm": 7.342099189758301, "learning_rate": 8.41127523054554e-06, "loss": 0.477, "step": 17210 }, { "epoch": 56.429508196721315, "grad_norm": 5.5477447509765625, "learning_rate": 8.410226839424871e-06, "loss": 0.497, "step": 17211 }, { "epoch": 56.4327868852459, "grad_norm": 5.012729167938232, "learning_rate": 8.40917846623055e-06, "loss": 0.8627, "step": 17212 }, { "epoch": 56.43606557377049, "grad_norm": 8.57170581817627, "learning_rate": 8.408130110974398e-06, "loss": 0.3374, "step": 17213 }, { "epoch": 56.43934426229508, "grad_norm": 4.941926002502441, "learning_rate": 8.407081773668231e-06, "loss": 0.1583, "step": 17214 }, { "epoch": 56.442622950819676, "grad_norm": 13.589319229125977, "learning_rate": 8.40603345432387e-06, "loss": 0.4735, "step": 17215 }, { "epoch": 56.445901639344264, "grad_norm": 45.19287109375, "learning_rate": 8.404985152953144e-06, "loss": 0.597, "step": 17216 }, { "epoch": 56.44918032786885, "grad_norm": 6.075030326843262, "learning_rate": 8.40393686956787e-06, "loss": 0.5933, "step": 17217 }, { "epoch": 56.45245901639344, "grad_norm": 4.729991436004639, "learning_rate": 8.402888604179866e-06, "loss": 0.4399, "step": 17218 }, { "epoch": 56.455737704918036, "grad_norm": 5.059980392456055, "learning_rate": 8.40184035680095e-06, "loss": 0.4215, "step": 17219 }, { "epoch": 56.459016393442624, "grad_norm": 25.180177688598633, "learning_rate": 8.40079212744295e-06, "loss": 0.6692, "step": 17220 }, { "epoch": 56.46229508196721, "grad_norm": 6.4478349685668945, "learning_rate": 8.399743916117679e-06, "loss": 0.3705, "step": 17221 }, { "epoch": 56.4655737704918, "grad_norm": 9.99416446685791, "learning_rate": 8.398695722836963e-06, "loss": 0.4084, "step": 17222 }, { "epoch": 56.4688524590164, "grad_norm": 7.748241901397705, "learning_rate": 8.397647547612615e-06, "loss": 0.4873, "step": 17223 }, { "epoch": 56.472131147540985, "grad_norm": 7.231114864349365, "learning_rate": 8.396599390456453e-06, "loss": 0.5029, "step": 17224 }, { "epoch": 56.47540983606557, "grad_norm": 5.383636951446533, "learning_rate": 8.395551251380304e-06, "loss": 0.6551, "step": 17225 }, { "epoch": 56.47868852459016, "grad_norm": 5.44440221786499, "learning_rate": 8.394503130395984e-06, "loss": 0.5936, "step": 17226 }, { "epoch": 56.48196721311476, "grad_norm": 5.990206718444824, "learning_rate": 8.39345502751531e-06, "loss": 0.5192, "step": 17227 }, { "epoch": 56.485245901639345, "grad_norm": 6.622363567352295, "learning_rate": 8.392406942750097e-06, "loss": 0.4804, "step": 17228 }, { "epoch": 56.488524590163934, "grad_norm": 5.495450496673584, "learning_rate": 8.391358876112172e-06, "loss": 0.64, "step": 17229 }, { "epoch": 56.49180327868852, "grad_norm": 6.057428359985352, "learning_rate": 8.390310827613345e-06, "loss": 0.4366, "step": 17230 }, { "epoch": 56.49508196721312, "grad_norm": 11.750741004943848, "learning_rate": 8.389262797265439e-06, "loss": 0.6201, "step": 17231 }, { "epoch": 56.498360655737706, "grad_norm": 6.008547306060791, "learning_rate": 8.388214785080267e-06, "loss": 0.5428, "step": 17232 }, { "epoch": 56.501639344262294, "grad_norm": 4.916327953338623, "learning_rate": 8.387166791069653e-06, "loss": 0.452, "step": 17233 }, { "epoch": 56.50491803278688, "grad_norm": 6.860289573669434, "learning_rate": 8.386118815245407e-06, "loss": 0.6586, "step": 17234 }, { "epoch": 56.50819672131148, "grad_norm": 5.617778301239014, "learning_rate": 8.385070857619353e-06, "loss": 0.6763, "step": 17235 }, { "epoch": 56.511475409836066, "grad_norm": 5.318525791168213, "learning_rate": 8.384022918203303e-06, "loss": 0.6978, "step": 17236 }, { "epoch": 56.514754098360655, "grad_norm": 7.085527420043945, "learning_rate": 8.38297499700907e-06, "loss": 0.4374, "step": 17237 }, { "epoch": 56.51803278688524, "grad_norm": 6.857274055480957, "learning_rate": 8.381927094048481e-06, "loss": 0.4211, "step": 17238 }, { "epoch": 56.52131147540984, "grad_norm": 5.357641696929932, "learning_rate": 8.380879209333346e-06, "loss": 0.3428, "step": 17239 }, { "epoch": 56.52459016393443, "grad_norm": 5.470857620239258, "learning_rate": 8.37983134287548e-06, "loss": 0.5831, "step": 17240 }, { "epoch": 56.527868852459015, "grad_norm": 9.23009967803955, "learning_rate": 8.378783494686698e-06, "loss": 0.4359, "step": 17241 }, { "epoch": 56.5311475409836, "grad_norm": 6.032407283782959, "learning_rate": 8.37773566477882e-06, "loss": 0.4297, "step": 17242 }, { "epoch": 56.5344262295082, "grad_norm": 5.78645658493042, "learning_rate": 8.376687853163662e-06, "loss": 0.4816, "step": 17243 }, { "epoch": 56.53770491803279, "grad_norm": 5.619021415710449, "learning_rate": 8.375640059853033e-06, "loss": 0.528, "step": 17244 }, { "epoch": 56.540983606557376, "grad_norm": 4.99103307723999, "learning_rate": 8.374592284858748e-06, "loss": 0.7357, "step": 17245 }, { "epoch": 56.544262295081964, "grad_norm": 19.156597137451172, "learning_rate": 8.37354452819263e-06, "loss": 0.3516, "step": 17246 }, { "epoch": 56.54754098360656, "grad_norm": 5.963207244873047, "learning_rate": 8.372496789866488e-06, "loss": 0.6432, "step": 17247 }, { "epoch": 56.55081967213115, "grad_norm": 10.195049285888672, "learning_rate": 8.371449069892137e-06, "loss": 0.6068, "step": 17248 }, { "epoch": 56.554098360655736, "grad_norm": 5.927477836608887, "learning_rate": 8.37040136828139e-06, "loss": 0.6084, "step": 17249 }, { "epoch": 56.557377049180324, "grad_norm": 5.3887858390808105, "learning_rate": 8.36935368504606e-06, "loss": 0.4711, "step": 17250 }, { "epoch": 56.56065573770492, "grad_norm": 5.889270782470703, "learning_rate": 8.368306020197965e-06, "loss": 0.5006, "step": 17251 }, { "epoch": 56.56393442622951, "grad_norm": 5.424232482910156, "learning_rate": 8.367258373748916e-06, "loss": 0.3772, "step": 17252 }, { "epoch": 56.5672131147541, "grad_norm": 5.297119617462158, "learning_rate": 8.366210745710728e-06, "loss": 0.2894, "step": 17253 }, { "epoch": 56.570491803278685, "grad_norm": 5.9366631507873535, "learning_rate": 8.365163136095207e-06, "loss": 0.5018, "step": 17254 }, { "epoch": 56.57377049180328, "grad_norm": 4.55872917175293, "learning_rate": 8.364115544914175e-06, "loss": 0.7081, "step": 17255 }, { "epoch": 56.57704918032787, "grad_norm": 6.459661483764648, "learning_rate": 8.363067972179442e-06, "loss": 0.4953, "step": 17256 }, { "epoch": 56.58032786885246, "grad_norm": 6.004605770111084, "learning_rate": 8.362020417902819e-06, "loss": 0.6219, "step": 17257 }, { "epoch": 56.58360655737705, "grad_norm": 7.369476795196533, "learning_rate": 8.360972882096117e-06, "loss": 0.3889, "step": 17258 }, { "epoch": 56.58688524590164, "grad_norm": 5.705878734588623, "learning_rate": 8.359925364771154e-06, "loss": 0.6329, "step": 17259 }, { "epoch": 56.59016393442623, "grad_norm": 6.188769817352295, "learning_rate": 8.358877865939733e-06, "loss": 0.2931, "step": 17260 }, { "epoch": 56.59344262295082, "grad_norm": 5.523849964141846, "learning_rate": 8.357830385613674e-06, "loss": 0.5863, "step": 17261 }, { "epoch": 56.59672131147541, "grad_norm": 5.048842430114746, "learning_rate": 8.356782923804785e-06, "loss": 0.7826, "step": 17262 }, { "epoch": 56.6, "grad_norm": 5.734825134277344, "learning_rate": 8.355735480524874e-06, "loss": 0.448, "step": 17263 }, { "epoch": 56.60327868852459, "grad_norm": 6.3038859367370605, "learning_rate": 8.354688055785756e-06, "loss": 0.8142, "step": 17264 }, { "epoch": 56.60655737704918, "grad_norm": 6.3250651359558105, "learning_rate": 8.353640649599242e-06, "loss": 0.634, "step": 17265 }, { "epoch": 56.609836065573774, "grad_norm": 5.353889465332031, "learning_rate": 8.352593261977143e-06, "loss": 0.6081, "step": 17266 }, { "epoch": 56.61311475409836, "grad_norm": 9.446573257446289, "learning_rate": 8.351545892931262e-06, "loss": 0.435, "step": 17267 }, { "epoch": 56.61639344262295, "grad_norm": 8.135986328125, "learning_rate": 8.35049854247342e-06, "loss": 0.6249, "step": 17268 }, { "epoch": 56.61967213114754, "grad_norm": 9.028569221496582, "learning_rate": 8.349451210615421e-06, "loss": 0.717, "step": 17269 }, { "epoch": 56.622950819672134, "grad_norm": 6.49995756149292, "learning_rate": 8.348403897369077e-06, "loss": 0.4976, "step": 17270 }, { "epoch": 56.62622950819672, "grad_norm": 12.887594223022461, "learning_rate": 8.347356602746191e-06, "loss": 0.6927, "step": 17271 }, { "epoch": 56.62950819672131, "grad_norm": 5.7760419845581055, "learning_rate": 8.346309326758583e-06, "loss": 0.6407, "step": 17272 }, { "epoch": 56.6327868852459, "grad_norm": 5.969536304473877, "learning_rate": 8.345262069418056e-06, "loss": 0.5042, "step": 17273 }, { "epoch": 56.636065573770495, "grad_norm": 6.777558326721191, "learning_rate": 8.34421483073642e-06, "loss": 0.5578, "step": 17274 }, { "epoch": 56.63934426229508, "grad_norm": 7.264989852905273, "learning_rate": 8.343167610725485e-06, "loss": 0.7754, "step": 17275 }, { "epoch": 56.64262295081967, "grad_norm": 9.832403182983398, "learning_rate": 8.342120409397052e-06, "loss": 0.4788, "step": 17276 }, { "epoch": 56.64590163934426, "grad_norm": 5.017004489898682, "learning_rate": 8.341073226762938e-06, "loss": 0.5401, "step": 17277 }, { "epoch": 56.649180327868855, "grad_norm": 5.598363876342773, "learning_rate": 8.34002606283495e-06, "loss": 0.8891, "step": 17278 }, { "epoch": 56.65245901639344, "grad_norm": 5.05504846572876, "learning_rate": 8.338978917624894e-06, "loss": 0.3463, "step": 17279 }, { "epoch": 56.65573770491803, "grad_norm": 10.14142894744873, "learning_rate": 8.337931791144572e-06, "loss": 0.8227, "step": 17280 }, { "epoch": 56.65901639344262, "grad_norm": 5.475444316864014, "learning_rate": 8.336884683405802e-06, "loss": 0.4832, "step": 17281 }, { "epoch": 56.662295081967216, "grad_norm": 7.021737098693848, "learning_rate": 8.335837594420389e-06, "loss": 0.4914, "step": 17282 }, { "epoch": 56.665573770491804, "grad_norm": 5.676885604858398, "learning_rate": 8.334790524200134e-06, "loss": 0.6715, "step": 17283 }, { "epoch": 56.66885245901639, "grad_norm": 9.32848072052002, "learning_rate": 8.333743472756844e-06, "loss": 0.5463, "step": 17284 }, { "epoch": 56.67213114754098, "grad_norm": 4.9925217628479, "learning_rate": 8.332696440102334e-06, "loss": 0.598, "step": 17285 }, { "epoch": 56.675409836065576, "grad_norm": 5.632884979248047, "learning_rate": 8.331649426248405e-06, "loss": 0.6492, "step": 17286 }, { "epoch": 56.678688524590164, "grad_norm": 8.393845558166504, "learning_rate": 8.33060243120686e-06, "loss": 0.4196, "step": 17287 }, { "epoch": 56.68196721311475, "grad_norm": 4.917845726013184, "learning_rate": 8.329555454989512e-06, "loss": 0.8223, "step": 17288 }, { "epoch": 56.68524590163934, "grad_norm": 5.449925422668457, "learning_rate": 8.32850849760816e-06, "loss": 0.5726, "step": 17289 }, { "epoch": 56.68852459016394, "grad_norm": 8.743341445922852, "learning_rate": 8.327461559074613e-06, "loss": 0.5443, "step": 17290 }, { "epoch": 56.691803278688525, "grad_norm": 5.882908344268799, "learning_rate": 8.326414639400678e-06, "loss": 0.3941, "step": 17291 }, { "epoch": 56.69508196721311, "grad_norm": 6.98029899597168, "learning_rate": 8.325367738598157e-06, "loss": 0.4799, "step": 17292 }, { "epoch": 56.6983606557377, "grad_norm": 4.931762218475342, "learning_rate": 8.324320856678854e-06, "loss": 0.5638, "step": 17293 }, { "epoch": 56.7016393442623, "grad_norm": 6.46143913269043, "learning_rate": 8.323273993654577e-06, "loss": 0.5157, "step": 17294 }, { "epoch": 56.704918032786885, "grad_norm": 13.435486793518066, "learning_rate": 8.322227149537131e-06, "loss": 0.6964, "step": 17295 }, { "epoch": 56.708196721311474, "grad_norm": 7.765055179595947, "learning_rate": 8.321180324338319e-06, "loss": 0.4803, "step": 17296 }, { "epoch": 56.71147540983607, "grad_norm": 5.397225856781006, "learning_rate": 8.320133518069939e-06, "loss": 0.3577, "step": 17297 }, { "epoch": 56.71475409836066, "grad_norm": 6.169031620025635, "learning_rate": 8.319086730743804e-06, "loss": 0.5777, "step": 17298 }, { "epoch": 56.718032786885246, "grad_norm": 6.313667297363281, "learning_rate": 8.318039962371713e-06, "loss": 0.5279, "step": 17299 }, { "epoch": 56.721311475409834, "grad_norm": 6.863283157348633, "learning_rate": 8.316993212965472e-06, "loss": 0.5298, "step": 17300 }, { "epoch": 56.72459016393443, "grad_norm": 6.0648417472839355, "learning_rate": 8.315946482536883e-06, "loss": 0.5117, "step": 17301 }, { "epoch": 56.72786885245902, "grad_norm": 11.285530090332031, "learning_rate": 8.314899771097742e-06, "loss": 0.5896, "step": 17302 }, { "epoch": 56.731147540983606, "grad_norm": 66.89422607421875, "learning_rate": 8.313853078659864e-06, "loss": 0.5304, "step": 17303 }, { "epoch": 56.734426229508195, "grad_norm": 5.942285060882568, "learning_rate": 8.312806405235045e-06, "loss": 0.5043, "step": 17304 }, { "epoch": 56.73770491803279, "grad_norm": 6.527319431304932, "learning_rate": 8.311759750835089e-06, "loss": 0.3211, "step": 17305 }, { "epoch": 56.74098360655738, "grad_norm": 6.103428840637207, "learning_rate": 8.31071311547179e-06, "loss": 0.3717, "step": 17306 }, { "epoch": 56.74426229508197, "grad_norm": 8.038166046142578, "learning_rate": 8.309666499156964e-06, "loss": 0.5271, "step": 17307 }, { "epoch": 56.747540983606555, "grad_norm": 6.303310394287109, "learning_rate": 8.308619901902406e-06, "loss": 0.5905, "step": 17308 }, { "epoch": 56.75081967213115, "grad_norm": 9.35822582244873, "learning_rate": 8.307573323719915e-06, "loss": 0.8046, "step": 17309 }, { "epoch": 56.75409836065574, "grad_norm": 6.1670122146606445, "learning_rate": 8.306526764621292e-06, "loss": 0.5436, "step": 17310 }, { "epoch": 56.75737704918033, "grad_norm": 7.830843448638916, "learning_rate": 8.305480224618344e-06, "loss": 0.5048, "step": 17311 }, { "epoch": 56.760655737704916, "grad_norm": 6.8178181648254395, "learning_rate": 8.30443370372287e-06, "loss": 0.516, "step": 17312 }, { "epoch": 56.76393442622951, "grad_norm": 5.922804355621338, "learning_rate": 8.303387201946665e-06, "loss": 0.5157, "step": 17313 }, { "epoch": 56.7672131147541, "grad_norm": 5.118797779083252, "learning_rate": 8.302340719301535e-06, "loss": 0.9292, "step": 17314 }, { "epoch": 56.77049180327869, "grad_norm": 5.5292205810546875, "learning_rate": 8.301294255799278e-06, "loss": 0.4681, "step": 17315 }, { "epoch": 56.773770491803276, "grad_norm": 5.174983978271484, "learning_rate": 8.300247811451693e-06, "loss": 0.5908, "step": 17316 }, { "epoch": 56.77704918032787, "grad_norm": 7.4111127853393555, "learning_rate": 8.299201386270585e-06, "loss": 0.5298, "step": 17317 }, { "epoch": 56.78032786885246, "grad_norm": 5.763591289520264, "learning_rate": 8.298154980267747e-06, "loss": 0.7714, "step": 17318 }, { "epoch": 56.78360655737705, "grad_norm": 9.143176078796387, "learning_rate": 8.297108593454979e-06, "loss": 0.4589, "step": 17319 }, { "epoch": 56.78688524590164, "grad_norm": 5.784299850463867, "learning_rate": 8.296062225844086e-06, "loss": 0.5227, "step": 17320 }, { "epoch": 56.79016393442623, "grad_norm": 7.922661781311035, "learning_rate": 8.295015877446863e-06, "loss": 0.5343, "step": 17321 }, { "epoch": 56.79344262295082, "grad_norm": 4.658591270446777, "learning_rate": 8.293969548275108e-06, "loss": 0.4129, "step": 17322 }, { "epoch": 56.79672131147541, "grad_norm": 5.608694076538086, "learning_rate": 8.292923238340617e-06, "loss": 0.5297, "step": 17323 }, { "epoch": 56.8, "grad_norm": 6.0407209396362305, "learning_rate": 8.291876947655197e-06, "loss": 0.8137, "step": 17324 }, { "epoch": 56.80327868852459, "grad_norm": 5.391891956329346, "learning_rate": 8.29083067623064e-06, "loss": 0.6023, "step": 17325 }, { "epoch": 56.80655737704918, "grad_norm": 5.607795238494873, "learning_rate": 8.289784424078742e-06, "loss": 0.4579, "step": 17326 }, { "epoch": 56.80983606557377, "grad_norm": 5.682486534118652, "learning_rate": 8.288738191211303e-06, "loss": 0.632, "step": 17327 }, { "epoch": 56.81311475409836, "grad_norm": 9.353818893432617, "learning_rate": 8.287691977640117e-06, "loss": 0.5307, "step": 17328 }, { "epoch": 56.81639344262295, "grad_norm": 6.23787784576416, "learning_rate": 8.286645783376989e-06, "loss": 0.7661, "step": 17329 }, { "epoch": 56.81967213114754, "grad_norm": 6.374183177947998, "learning_rate": 8.28559960843371e-06, "loss": 0.3823, "step": 17330 }, { "epoch": 56.82295081967213, "grad_norm": 92.8211898803711, "learning_rate": 8.284553452822079e-06, "loss": 0.5396, "step": 17331 }, { "epoch": 56.82622950819672, "grad_norm": 6.771693706512451, "learning_rate": 8.283507316553885e-06, "loss": 0.4608, "step": 17332 }, { "epoch": 56.829508196721314, "grad_norm": 5.04325008392334, "learning_rate": 8.282461199640938e-06, "loss": 0.5976, "step": 17333 }, { "epoch": 56.8327868852459, "grad_norm": 9.257501602172852, "learning_rate": 8.281415102095025e-06, "loss": 0.4611, "step": 17334 }, { "epoch": 56.83606557377049, "grad_norm": 14.842646598815918, "learning_rate": 8.280369023927944e-06, "loss": 0.6792, "step": 17335 }, { "epoch": 56.83934426229508, "grad_norm": 6.318428993225098, "learning_rate": 8.279322965151486e-06, "loss": 0.4483, "step": 17336 }, { "epoch": 56.842622950819674, "grad_norm": 7.266195297241211, "learning_rate": 8.278276925777456e-06, "loss": 0.4773, "step": 17337 }, { "epoch": 56.84590163934426, "grad_norm": 6.329562187194824, "learning_rate": 8.277230905817642e-06, "loss": 0.441, "step": 17338 }, { "epoch": 56.84918032786885, "grad_norm": 10.908085823059082, "learning_rate": 8.276184905283838e-06, "loss": 0.4974, "step": 17339 }, { "epoch": 56.85245901639344, "grad_norm": 6.864776134490967, "learning_rate": 8.275138924187846e-06, "loss": 0.5879, "step": 17340 }, { "epoch": 56.855737704918035, "grad_norm": 5.345987796783447, "learning_rate": 8.274092962541452e-06, "loss": 0.4681, "step": 17341 }, { "epoch": 56.85901639344262, "grad_norm": 5.923664569854736, "learning_rate": 8.273047020356455e-06, "loss": 0.4573, "step": 17342 }, { "epoch": 56.86229508196721, "grad_norm": 20.288118362426758, "learning_rate": 8.272001097644651e-06, "loss": 0.4275, "step": 17343 }, { "epoch": 56.86557377049181, "grad_norm": 7.348247051239014, "learning_rate": 8.270955194417832e-06, "loss": 0.5513, "step": 17344 }, { "epoch": 56.868852459016395, "grad_norm": 7.698775768280029, "learning_rate": 8.269909310687787e-06, "loss": 0.5108, "step": 17345 }, { "epoch": 56.87213114754098, "grad_norm": 5.832586765289307, "learning_rate": 8.268863446466315e-06, "loss": 0.5486, "step": 17346 }, { "epoch": 56.87540983606557, "grad_norm": 6.720149993896484, "learning_rate": 8.267817601765208e-06, "loss": 0.637, "step": 17347 }, { "epoch": 56.87868852459017, "grad_norm": 7.461721897125244, "learning_rate": 8.26677177659626e-06, "loss": 0.5368, "step": 17348 }, { "epoch": 56.881967213114756, "grad_norm": 7.743000030517578, "learning_rate": 8.265725970971257e-06, "loss": 0.5199, "step": 17349 }, { "epoch": 56.885245901639344, "grad_norm": 6.539090633392334, "learning_rate": 8.264680184902003e-06, "loss": 0.683, "step": 17350 }, { "epoch": 56.88852459016393, "grad_norm": 5.713127613067627, "learning_rate": 8.26363441840028e-06, "loss": 0.3574, "step": 17351 }, { "epoch": 56.89180327868853, "grad_norm": 11.45748519897461, "learning_rate": 8.26258867147789e-06, "loss": 0.4818, "step": 17352 }, { "epoch": 56.895081967213116, "grad_norm": 6.754047870635986, "learning_rate": 8.261542944146615e-06, "loss": 0.5288, "step": 17353 }, { "epoch": 56.898360655737704, "grad_norm": 5.9069647789001465, "learning_rate": 8.260497236418248e-06, "loss": 0.5101, "step": 17354 }, { "epoch": 56.90163934426229, "grad_norm": 5.916552543640137, "learning_rate": 8.259451548304588e-06, "loss": 0.5694, "step": 17355 }, { "epoch": 56.90491803278689, "grad_norm": 5.127313137054443, "learning_rate": 8.258405879817421e-06, "loss": 0.6778, "step": 17356 }, { "epoch": 56.90819672131148, "grad_norm": 5.209167003631592, "learning_rate": 8.257360230968538e-06, "loss": 0.5484, "step": 17357 }, { "epoch": 56.911475409836065, "grad_norm": 7.103074073791504, "learning_rate": 8.256314601769727e-06, "loss": 0.286, "step": 17358 }, { "epoch": 56.91475409836065, "grad_norm": 6.41834831237793, "learning_rate": 8.255268992232785e-06, "loss": 0.6014, "step": 17359 }, { "epoch": 56.91803278688525, "grad_norm": 5.85117244720459, "learning_rate": 8.2542234023695e-06, "loss": 0.7316, "step": 17360 }, { "epoch": 56.92131147540984, "grad_norm": 5.869135856628418, "learning_rate": 8.25317783219166e-06, "loss": 0.3253, "step": 17361 }, { "epoch": 56.924590163934425, "grad_norm": 8.71901798248291, "learning_rate": 8.252132281711054e-06, "loss": 0.5744, "step": 17362 }, { "epoch": 56.927868852459014, "grad_norm": 10.650074005126953, "learning_rate": 8.251086750939477e-06, "loss": 0.5614, "step": 17363 }, { "epoch": 56.93114754098361, "grad_norm": 6.865490913391113, "learning_rate": 8.250041239888715e-06, "loss": 0.667, "step": 17364 }, { "epoch": 56.9344262295082, "grad_norm": 22.810562133789062, "learning_rate": 8.24899574857056e-06, "loss": 0.4144, "step": 17365 }, { "epoch": 56.937704918032786, "grad_norm": 5.687273025512695, "learning_rate": 8.247950276996792e-06, "loss": 0.7163, "step": 17366 }, { "epoch": 56.940983606557374, "grad_norm": 6.178530693054199, "learning_rate": 8.246904825179209e-06, "loss": 0.4461, "step": 17367 }, { "epoch": 56.94426229508197, "grad_norm": 5.433596611022949, "learning_rate": 8.2458593931296e-06, "loss": 0.6424, "step": 17368 }, { "epoch": 56.94754098360656, "grad_norm": 8.949585914611816, "learning_rate": 8.244813980859747e-06, "loss": 0.4423, "step": 17369 }, { "epoch": 56.950819672131146, "grad_norm": 7.02848482131958, "learning_rate": 8.243768588381442e-06, "loss": 0.6244, "step": 17370 }, { "epoch": 56.954098360655735, "grad_norm": 6.289370059967041, "learning_rate": 8.24272321570647e-06, "loss": 0.3286, "step": 17371 }, { "epoch": 56.95737704918033, "grad_norm": 5.77639102935791, "learning_rate": 8.241677862846624e-06, "loss": 0.402, "step": 17372 }, { "epoch": 56.96065573770492, "grad_norm": 6.380990982055664, "learning_rate": 8.240632529813689e-06, "loss": 0.6674, "step": 17373 }, { "epoch": 56.96393442622951, "grad_norm": 6.62030553817749, "learning_rate": 8.23958721661945e-06, "loss": 0.3121, "step": 17374 }, { "epoch": 56.967213114754095, "grad_norm": 9.807565689086914, "learning_rate": 8.238541923275692e-06, "loss": 0.3813, "step": 17375 }, { "epoch": 56.97049180327869, "grad_norm": 5.523325443267822, "learning_rate": 8.237496649794208e-06, "loss": 0.4549, "step": 17376 }, { "epoch": 56.97377049180328, "grad_norm": 5.546377182006836, "learning_rate": 8.236451396186784e-06, "loss": 0.6234, "step": 17377 }, { "epoch": 56.97704918032787, "grad_norm": 5.368933200836182, "learning_rate": 8.235406162465204e-06, "loss": 0.413, "step": 17378 }, { "epoch": 56.980327868852456, "grad_norm": 5.292575359344482, "learning_rate": 8.234360948641255e-06, "loss": 0.6345, "step": 17379 }, { "epoch": 56.98360655737705, "grad_norm": 7.039669513702393, "learning_rate": 8.233315754726716e-06, "loss": 0.7361, "step": 17380 }, { "epoch": 56.98688524590164, "grad_norm": 7.518184185028076, "learning_rate": 8.232270580733384e-06, "loss": 0.4757, "step": 17381 }, { "epoch": 56.99016393442623, "grad_norm": 5.3541340827941895, "learning_rate": 8.23122542667304e-06, "loss": 0.5067, "step": 17382 }, { "epoch": 56.993442622950816, "grad_norm": 8.385770797729492, "learning_rate": 8.230180292557465e-06, "loss": 0.582, "step": 17383 }, { "epoch": 56.99672131147541, "grad_norm": 6.09077787399292, "learning_rate": 8.229135178398447e-06, "loss": 0.8787, "step": 17384 }, { "epoch": 57.0, "grad_norm": 6.083042144775391, "learning_rate": 8.228090084207773e-06, "loss": 0.4809, "step": 17385 }, { "epoch": 57.00327868852459, "grad_norm": 6.035197734832764, "learning_rate": 8.227045009997226e-06, "loss": 0.4173, "step": 17386 }, { "epoch": 57.006557377049184, "grad_norm": 7.473371982574463, "learning_rate": 8.225999955778592e-06, "loss": 0.4987, "step": 17387 }, { "epoch": 57.00983606557377, "grad_norm": 5.47982931137085, "learning_rate": 8.224954921563647e-06, "loss": 0.4326, "step": 17388 }, { "epoch": 57.01311475409836, "grad_norm": 9.138108253479004, "learning_rate": 8.223909907364184e-06, "loss": 0.3327, "step": 17389 }, { "epoch": 57.01639344262295, "grad_norm": 6.137675762176514, "learning_rate": 8.222864913191986e-06, "loss": 0.4953, "step": 17390 }, { "epoch": 57.019672131147544, "grad_norm": 33.3101921081543, "learning_rate": 8.221819939058832e-06, "loss": 0.6737, "step": 17391 }, { "epoch": 57.02295081967213, "grad_norm": 6.840500831604004, "learning_rate": 8.220774984976504e-06, "loss": 0.5745, "step": 17392 }, { "epoch": 57.02622950819672, "grad_norm": 5.777340412139893, "learning_rate": 8.21973005095679e-06, "loss": 0.4078, "step": 17393 }, { "epoch": 57.02950819672131, "grad_norm": 5.3795599937438965, "learning_rate": 8.218685137011473e-06, "loss": 0.9363, "step": 17394 }, { "epoch": 57.032786885245905, "grad_norm": 4.894881248474121, "learning_rate": 8.217640243152329e-06, "loss": 0.4794, "step": 17395 }, { "epoch": 57.03606557377049, "grad_norm": 6.259644031524658, "learning_rate": 8.21659536939115e-06, "loss": 0.3544, "step": 17396 }, { "epoch": 57.03934426229508, "grad_norm": 10.885713577270508, "learning_rate": 8.215550515739708e-06, "loss": 0.6712, "step": 17397 }, { "epoch": 57.04262295081967, "grad_norm": 86.46542358398438, "learning_rate": 8.214505682209788e-06, "loss": 0.6136, "step": 17398 }, { "epoch": 57.045901639344265, "grad_norm": 8.42795181274414, "learning_rate": 8.213460868813177e-06, "loss": 0.6679, "step": 17399 }, { "epoch": 57.049180327868854, "grad_norm": 6.956614017486572, "learning_rate": 8.212416075561651e-06, "loss": 0.4614, "step": 17400 }, { "epoch": 57.05245901639344, "grad_norm": 5.834012031555176, "learning_rate": 8.21137130246699e-06, "loss": 0.4843, "step": 17401 }, { "epoch": 57.05573770491803, "grad_norm": 5.967968940734863, "learning_rate": 8.21032654954098e-06, "loss": 0.6663, "step": 17402 }, { "epoch": 57.059016393442626, "grad_norm": 7.269352912902832, "learning_rate": 8.2092818167954e-06, "loss": 0.4375, "step": 17403 }, { "epoch": 57.062295081967214, "grad_norm": 8.1605806350708, "learning_rate": 8.208237104242029e-06, "loss": 0.2973, "step": 17404 }, { "epoch": 57.0655737704918, "grad_norm": 6.222962379455566, "learning_rate": 8.207192411892645e-06, "loss": 0.3877, "step": 17405 }, { "epoch": 57.06885245901639, "grad_norm": 5.924925804138184, "learning_rate": 8.20614773975903e-06, "loss": 0.5361, "step": 17406 }, { "epoch": 57.072131147540986, "grad_norm": 5.06299352645874, "learning_rate": 8.205103087852967e-06, "loss": 0.4795, "step": 17407 }, { "epoch": 57.075409836065575, "grad_norm": 7.2096028327941895, "learning_rate": 8.204058456186233e-06, "loss": 0.4397, "step": 17408 }, { "epoch": 57.07868852459016, "grad_norm": 6.036830902099609, "learning_rate": 8.203013844770608e-06, "loss": 0.8014, "step": 17409 }, { "epoch": 57.08196721311475, "grad_norm": 5.885981559753418, "learning_rate": 8.201969253617865e-06, "loss": 0.6499, "step": 17410 }, { "epoch": 57.08524590163935, "grad_norm": 5.544901371002197, "learning_rate": 8.200924682739794e-06, "loss": 0.4558, "step": 17411 }, { "epoch": 57.088524590163935, "grad_norm": 7.242051601409912, "learning_rate": 8.199880132148166e-06, "loss": 0.5424, "step": 17412 }, { "epoch": 57.09180327868852, "grad_norm": 9.336807250976562, "learning_rate": 8.198835601854762e-06, "loss": 0.5014, "step": 17413 }, { "epoch": 57.09508196721311, "grad_norm": 9.305561065673828, "learning_rate": 8.197791091871355e-06, "loss": 0.4283, "step": 17414 }, { "epoch": 57.09836065573771, "grad_norm": 5.7894086837768555, "learning_rate": 8.19674660220973e-06, "loss": 0.4987, "step": 17415 }, { "epoch": 57.101639344262296, "grad_norm": 6.863433837890625, "learning_rate": 8.195702132881664e-06, "loss": 0.5309, "step": 17416 }, { "epoch": 57.104918032786884, "grad_norm": 7.85467004776001, "learning_rate": 8.194657683898932e-06, "loss": 0.4451, "step": 17417 }, { "epoch": 57.10819672131147, "grad_norm": 5.73064661026001, "learning_rate": 8.193613255273309e-06, "loss": 0.3567, "step": 17418 }, { "epoch": 57.11147540983607, "grad_norm": 6.493467330932617, "learning_rate": 8.192568847016575e-06, "loss": 0.4703, "step": 17419 }, { "epoch": 57.114754098360656, "grad_norm": 6.898606777191162, "learning_rate": 8.19152445914051e-06, "loss": 0.6464, "step": 17420 }, { "epoch": 57.118032786885244, "grad_norm": 7.114324569702148, "learning_rate": 8.190480091656884e-06, "loss": 0.6457, "step": 17421 }, { "epoch": 57.12131147540983, "grad_norm": 6.724134922027588, "learning_rate": 8.189435744577477e-06, "loss": 0.3933, "step": 17422 }, { "epoch": 57.12459016393443, "grad_norm": 5.68582820892334, "learning_rate": 8.188391417914064e-06, "loss": 0.3797, "step": 17423 }, { "epoch": 57.12786885245902, "grad_norm": 7.6652655601501465, "learning_rate": 8.187347111678422e-06, "loss": 0.4585, "step": 17424 }, { "epoch": 57.131147540983605, "grad_norm": 6.8135294914245605, "learning_rate": 8.186302825882327e-06, "loss": 0.5221, "step": 17425 }, { "epoch": 57.13442622950819, "grad_norm": 5.887909412384033, "learning_rate": 8.185258560537552e-06, "loss": 0.7658, "step": 17426 }, { "epoch": 57.13770491803279, "grad_norm": 7.8456807136535645, "learning_rate": 8.184214315655876e-06, "loss": 0.4265, "step": 17427 }, { "epoch": 57.14098360655738, "grad_norm": 5.66783332824707, "learning_rate": 8.183170091249067e-06, "loss": 0.5573, "step": 17428 }, { "epoch": 57.144262295081965, "grad_norm": 5.816445827484131, "learning_rate": 8.182125887328906e-06, "loss": 0.5355, "step": 17429 }, { "epoch": 57.14754098360656, "grad_norm": 6.349671840667725, "learning_rate": 8.18108170390717e-06, "loss": 0.5365, "step": 17430 }, { "epoch": 57.15081967213115, "grad_norm": 5.942906379699707, "learning_rate": 8.180037540995626e-06, "loss": 0.4155, "step": 17431 }, { "epoch": 57.15409836065574, "grad_norm": 5.149715423583984, "learning_rate": 8.178993398606046e-06, "loss": 0.6821, "step": 17432 }, { "epoch": 57.157377049180326, "grad_norm": 5.699929714202881, "learning_rate": 8.177949276750215e-06, "loss": 0.7022, "step": 17433 }, { "epoch": 57.16065573770492, "grad_norm": 5.103941440582275, "learning_rate": 8.1769051754399e-06, "loss": 0.6286, "step": 17434 }, { "epoch": 57.16393442622951, "grad_norm": 6.185720443725586, "learning_rate": 8.175861094686875e-06, "loss": 0.6541, "step": 17435 }, { "epoch": 57.1672131147541, "grad_norm": 6.816788196563721, "learning_rate": 8.174817034502908e-06, "loss": 0.359, "step": 17436 }, { "epoch": 57.170491803278686, "grad_norm": 5.222375392913818, "learning_rate": 8.17377299489978e-06, "loss": 0.6416, "step": 17437 }, { "epoch": 57.17377049180328, "grad_norm": 7.344355583190918, "learning_rate": 8.172728975889261e-06, "loss": 0.7634, "step": 17438 }, { "epoch": 57.17704918032787, "grad_norm": 5.141895771026611, "learning_rate": 8.171684977483122e-06, "loss": 0.3063, "step": 17439 }, { "epoch": 57.18032786885246, "grad_norm": 4.851030349731445, "learning_rate": 8.170640999693138e-06, "loss": 0.2719, "step": 17440 }, { "epoch": 57.18360655737705, "grad_norm": 9.950884819030762, "learning_rate": 8.169597042531073e-06, "loss": 0.5735, "step": 17441 }, { "epoch": 57.18688524590164, "grad_norm": 5.7971954345703125, "learning_rate": 8.168553106008709e-06, "loss": 0.6049, "step": 17442 }, { "epoch": 57.19016393442623, "grad_norm": 6.127923488616943, "learning_rate": 8.167509190137813e-06, "loss": 0.2657, "step": 17443 }, { "epoch": 57.19344262295082, "grad_norm": 69.9089126586914, "learning_rate": 8.166465294930155e-06, "loss": 0.5724, "step": 17444 }, { "epoch": 57.19672131147541, "grad_norm": 6.6097307205200195, "learning_rate": 8.165421420397506e-06, "loss": 0.573, "step": 17445 }, { "epoch": 57.2, "grad_norm": 6.131328105926514, "learning_rate": 8.16437756655164e-06, "loss": 0.6336, "step": 17446 }, { "epoch": 57.20327868852459, "grad_norm": 7.698493480682373, "learning_rate": 8.163333733404327e-06, "loss": 0.6727, "step": 17447 }, { "epoch": 57.20655737704918, "grad_norm": 12.79494857788086, "learning_rate": 8.16228992096733e-06, "loss": 0.4044, "step": 17448 }, { "epoch": 57.20983606557377, "grad_norm": 6.992492198944092, "learning_rate": 8.16124612925243e-06, "loss": 0.5733, "step": 17449 }, { "epoch": 57.21311475409836, "grad_norm": 7.064945697784424, "learning_rate": 8.16020235827139e-06, "loss": 0.5369, "step": 17450 }, { "epoch": 57.21639344262295, "grad_norm": 4.61784029006958, "learning_rate": 8.15915860803598e-06, "loss": 0.6367, "step": 17451 }, { "epoch": 57.21967213114754, "grad_norm": 5.942108154296875, "learning_rate": 8.158114878557973e-06, "loss": 0.6917, "step": 17452 }, { "epoch": 57.22295081967213, "grad_norm": 6.081997871398926, "learning_rate": 8.157071169849136e-06, "loss": 0.38, "step": 17453 }, { "epoch": 57.226229508196724, "grad_norm": 12.03248119354248, "learning_rate": 8.156027481921233e-06, "loss": 0.435, "step": 17454 }, { "epoch": 57.22950819672131, "grad_norm": 5.99257755279541, "learning_rate": 8.154983814786045e-06, "loss": 0.4829, "step": 17455 }, { "epoch": 57.2327868852459, "grad_norm": 8.74074649810791, "learning_rate": 8.153940168455328e-06, "loss": 0.5723, "step": 17456 }, { "epoch": 57.23606557377049, "grad_norm": 7.495961666107178, "learning_rate": 8.152896542940859e-06, "loss": 0.6426, "step": 17457 }, { "epoch": 57.239344262295084, "grad_norm": 5.644755840301514, "learning_rate": 8.151852938254394e-06, "loss": 0.6043, "step": 17458 }, { "epoch": 57.24262295081967, "grad_norm": 5.316960334777832, "learning_rate": 8.150809354407716e-06, "loss": 0.5709, "step": 17459 }, { "epoch": 57.24590163934426, "grad_norm": 6.838612079620361, "learning_rate": 8.149765791412583e-06, "loss": 0.5794, "step": 17460 }, { "epoch": 57.24918032786885, "grad_norm": 5.921379089355469, "learning_rate": 8.148722249280765e-06, "loss": 0.5336, "step": 17461 }, { "epoch": 57.252459016393445, "grad_norm": 7.990331649780273, "learning_rate": 8.147678728024025e-06, "loss": 0.4918, "step": 17462 }, { "epoch": 57.25573770491803, "grad_norm": 5.8455071449279785, "learning_rate": 8.146635227654136e-06, "loss": 0.432, "step": 17463 }, { "epoch": 57.25901639344262, "grad_norm": 4.488613128662109, "learning_rate": 8.145591748182863e-06, "loss": 0.1452, "step": 17464 }, { "epoch": 57.26229508196721, "grad_norm": 6.189518928527832, "learning_rate": 8.144548289621972e-06, "loss": 0.535, "step": 17465 }, { "epoch": 57.265573770491805, "grad_norm": 11.454854011535645, "learning_rate": 8.143504851983226e-06, "loss": 0.6759, "step": 17466 }, { "epoch": 57.268852459016394, "grad_norm": 8.002091407775879, "learning_rate": 8.142461435278392e-06, "loss": 0.5414, "step": 17467 }, { "epoch": 57.27213114754098, "grad_norm": 7.187269687652588, "learning_rate": 8.14141803951924e-06, "loss": 0.4221, "step": 17468 }, { "epoch": 57.27540983606557, "grad_norm": 6.04367208480835, "learning_rate": 8.14037466471753e-06, "loss": 0.7841, "step": 17469 }, { "epoch": 57.278688524590166, "grad_norm": 5.792959690093994, "learning_rate": 8.13933131088503e-06, "loss": 0.5208, "step": 17470 }, { "epoch": 57.281967213114754, "grad_norm": 5.8426032066345215, "learning_rate": 8.1382879780335e-06, "loss": 0.6392, "step": 17471 }, { "epoch": 57.28524590163934, "grad_norm": 6.378244876861572, "learning_rate": 8.137244666174712e-06, "loss": 0.659, "step": 17472 }, { "epoch": 57.28852459016394, "grad_norm": 6.33282995223999, "learning_rate": 8.136201375320429e-06, "loss": 0.7344, "step": 17473 }, { "epoch": 57.291803278688526, "grad_norm": 7.905891418457031, "learning_rate": 8.135158105482412e-06, "loss": 0.3955, "step": 17474 }, { "epoch": 57.295081967213115, "grad_norm": 5.642116069793701, "learning_rate": 8.134114856672423e-06, "loss": 0.4701, "step": 17475 }, { "epoch": 57.2983606557377, "grad_norm": 8.616493225097656, "learning_rate": 8.133071628902233e-06, "loss": 0.4662, "step": 17476 }, { "epoch": 57.3016393442623, "grad_norm": 9.609010696411133, "learning_rate": 8.1320284221836e-06, "loss": 0.5244, "step": 17477 }, { "epoch": 57.30491803278689, "grad_norm": 9.227751731872559, "learning_rate": 8.13098523652829e-06, "loss": 0.5182, "step": 17478 }, { "epoch": 57.308196721311475, "grad_norm": 14.642786979675293, "learning_rate": 8.129942071948066e-06, "loss": 0.5103, "step": 17479 }, { "epoch": 57.31147540983606, "grad_norm": 10.500853538513184, "learning_rate": 8.128898928454684e-06, "loss": 0.3557, "step": 17480 }, { "epoch": 57.31475409836066, "grad_norm": 5.113583564758301, "learning_rate": 8.127855806059916e-06, "loss": 0.6094, "step": 17481 }, { "epoch": 57.31803278688525, "grad_norm": 5.0926384925842285, "learning_rate": 8.126812704775522e-06, "loss": 0.6305, "step": 17482 }, { "epoch": 57.321311475409836, "grad_norm": 6.762992858886719, "learning_rate": 8.12576962461326e-06, "loss": 0.6984, "step": 17483 }, { "epoch": 57.324590163934424, "grad_norm": 5.3246541023254395, "learning_rate": 8.124726565584892e-06, "loss": 0.5615, "step": 17484 }, { "epoch": 57.32786885245902, "grad_norm": 8.00933837890625, "learning_rate": 8.123683527702183e-06, "loss": 0.4334, "step": 17485 }, { "epoch": 57.33114754098361, "grad_norm": 6.67797327041626, "learning_rate": 8.122640510976896e-06, "loss": 0.3405, "step": 17486 }, { "epoch": 57.334426229508196, "grad_norm": 17.90448760986328, "learning_rate": 8.121597515420789e-06, "loss": 0.4098, "step": 17487 }, { "epoch": 57.337704918032784, "grad_norm": 8.636873245239258, "learning_rate": 8.12055454104562e-06, "loss": 0.5816, "step": 17488 }, { "epoch": 57.34098360655738, "grad_norm": 5.529993057250977, "learning_rate": 8.119511587863153e-06, "loss": 0.4156, "step": 17489 }, { "epoch": 57.34426229508197, "grad_norm": 7.773838520050049, "learning_rate": 8.118468655885153e-06, "loss": 0.4695, "step": 17490 }, { "epoch": 57.34754098360656, "grad_norm": 5.7001423835754395, "learning_rate": 8.11742574512337e-06, "loss": 0.4487, "step": 17491 }, { "epoch": 57.350819672131145, "grad_norm": 5.879978179931641, "learning_rate": 8.116382855589572e-06, "loss": 0.3679, "step": 17492 }, { "epoch": 57.35409836065574, "grad_norm": 7.454506874084473, "learning_rate": 8.115339987295512e-06, "loss": 0.491, "step": 17493 }, { "epoch": 57.35737704918033, "grad_norm": 5.777925968170166, "learning_rate": 8.114297140252955e-06, "loss": 0.479, "step": 17494 }, { "epoch": 57.36065573770492, "grad_norm": 5.536827087402344, "learning_rate": 8.113254314473662e-06, "loss": 0.6011, "step": 17495 }, { "epoch": 57.363934426229505, "grad_norm": 5.358865737915039, "learning_rate": 8.112211509969386e-06, "loss": 0.4671, "step": 17496 }, { "epoch": 57.3672131147541, "grad_norm": 5.998638153076172, "learning_rate": 8.111168726751884e-06, "loss": 0.5481, "step": 17497 }, { "epoch": 57.37049180327869, "grad_norm": 5.812997341156006, "learning_rate": 8.110125964832922e-06, "loss": 0.7335, "step": 17498 }, { "epoch": 57.37377049180328, "grad_norm": 6.081528186798096, "learning_rate": 8.109083224224256e-06, "loss": 0.5216, "step": 17499 }, { "epoch": 57.377049180327866, "grad_norm": 14.367470741271973, "learning_rate": 8.10804050493764e-06, "loss": 0.3977, "step": 17500 }, { "epoch": 57.38032786885246, "grad_norm": 5.389697551727295, "learning_rate": 8.106997806984835e-06, "loss": 0.464, "step": 17501 }, { "epoch": 57.38360655737705, "grad_norm": 6.914373874664307, "learning_rate": 8.1059551303776e-06, "loss": 0.7414, "step": 17502 }, { "epoch": 57.38688524590164, "grad_norm": 6.641388416290283, "learning_rate": 8.104912475127687e-06, "loss": 0.3727, "step": 17503 }, { "epoch": 57.390163934426226, "grad_norm": 7.992474555969238, "learning_rate": 8.103869841246859e-06, "loss": 0.4714, "step": 17504 }, { "epoch": 57.39344262295082, "grad_norm": 6.835867404937744, "learning_rate": 8.102827228746872e-06, "loss": 0.4323, "step": 17505 }, { "epoch": 57.39672131147541, "grad_norm": 8.15070915222168, "learning_rate": 8.101784637639474e-06, "loss": 0.7601, "step": 17506 }, { "epoch": 57.4, "grad_norm": 5.132661819458008, "learning_rate": 8.100742067936432e-06, "loss": 0.2515, "step": 17507 }, { "epoch": 57.40327868852459, "grad_norm": 6.579267978668213, "learning_rate": 8.099699519649499e-06, "loss": 0.482, "step": 17508 }, { "epoch": 57.40655737704918, "grad_norm": 7.080760478973389, "learning_rate": 8.09865699279043e-06, "loss": 0.3959, "step": 17509 }, { "epoch": 57.40983606557377, "grad_norm": 130.65908813476562, "learning_rate": 8.097614487370974e-06, "loss": 0.524, "step": 17510 }, { "epoch": 57.41311475409836, "grad_norm": 6.8274006843566895, "learning_rate": 8.096572003402899e-06, "loss": 0.6391, "step": 17511 }, { "epoch": 57.41639344262295, "grad_norm": 4.745253562927246, "learning_rate": 8.095529540897952e-06, "loss": 0.3684, "step": 17512 }, { "epoch": 57.41967213114754, "grad_norm": 7.507323265075684, "learning_rate": 8.094487099867891e-06, "loss": 0.5192, "step": 17513 }, { "epoch": 57.42295081967213, "grad_norm": 4.339879035949707, "learning_rate": 8.093444680324464e-06, "loss": 0.5981, "step": 17514 }, { "epoch": 57.42622950819672, "grad_norm": 9.198022842407227, "learning_rate": 8.092402282279435e-06, "loss": 0.768, "step": 17515 }, { "epoch": 57.429508196721315, "grad_norm": 8.156496047973633, "learning_rate": 8.091359905744553e-06, "loss": 0.606, "step": 17516 }, { "epoch": 57.4327868852459, "grad_norm": 6.217830657958984, "learning_rate": 8.090317550731575e-06, "loss": 0.6915, "step": 17517 }, { "epoch": 57.43606557377049, "grad_norm": 5.189431190490723, "learning_rate": 8.08927521725225e-06, "loss": 0.4482, "step": 17518 }, { "epoch": 57.43934426229508, "grad_norm": 5.842344284057617, "learning_rate": 8.088232905318329e-06, "loss": 0.5904, "step": 17519 }, { "epoch": 57.442622950819676, "grad_norm": 4.922623634338379, "learning_rate": 8.087190614941577e-06, "loss": 0.5776, "step": 17520 }, { "epoch": 57.445901639344264, "grad_norm": 7.030287265777588, "learning_rate": 8.086148346133736e-06, "loss": 0.4061, "step": 17521 }, { "epoch": 57.44918032786885, "grad_norm": 9.08008098602295, "learning_rate": 8.085106098906565e-06, "loss": 0.8988, "step": 17522 }, { "epoch": 57.45245901639344, "grad_norm": 4.869889259338379, "learning_rate": 8.084063873271809e-06, "loss": 0.6744, "step": 17523 }, { "epoch": 57.455737704918036, "grad_norm": 5.025221347808838, "learning_rate": 8.083021669241227e-06, "loss": 0.558, "step": 17524 }, { "epoch": 57.459016393442624, "grad_norm": 26.00420379638672, "learning_rate": 8.081979486826571e-06, "loss": 0.5325, "step": 17525 }, { "epoch": 57.46229508196721, "grad_norm": 6.424673557281494, "learning_rate": 8.080937326039587e-06, "loss": 0.5605, "step": 17526 }, { "epoch": 57.4655737704918, "grad_norm": 5.575877666473389, "learning_rate": 8.079895186892031e-06, "loss": 0.3538, "step": 17527 }, { "epoch": 57.4688524590164, "grad_norm": 6.059744358062744, "learning_rate": 8.078853069395656e-06, "loss": 0.2873, "step": 17528 }, { "epoch": 57.472131147540985, "grad_norm": 8.431693077087402, "learning_rate": 8.077810973562209e-06, "loss": 0.5626, "step": 17529 }, { "epoch": 57.47540983606557, "grad_norm": 10.405978202819824, "learning_rate": 8.07676889940344e-06, "loss": 0.7052, "step": 17530 }, { "epoch": 57.47868852459016, "grad_norm": 6.216203689575195, "learning_rate": 8.075726846931102e-06, "loss": 0.2933, "step": 17531 }, { "epoch": 57.48196721311476, "grad_norm": 5.993939399719238, "learning_rate": 8.074684816156945e-06, "loss": 0.5721, "step": 17532 }, { "epoch": 57.485245901639345, "grad_norm": 44.42925262451172, "learning_rate": 8.073642807092716e-06, "loss": 0.4415, "step": 17533 }, { "epoch": 57.488524590163934, "grad_norm": 5.8649749755859375, "learning_rate": 8.072600819750171e-06, "loss": 0.5628, "step": 17534 }, { "epoch": 57.49180327868852, "grad_norm": 5.278695106506348, "learning_rate": 8.071558854141056e-06, "loss": 0.3978, "step": 17535 }, { "epoch": 57.49508196721312, "grad_norm": 6.383720397949219, "learning_rate": 8.070516910277115e-06, "loss": 0.6836, "step": 17536 }, { "epoch": 57.498360655737706, "grad_norm": 8.52746295928955, "learning_rate": 8.069474988170107e-06, "loss": 0.5427, "step": 17537 }, { "epoch": 57.501639344262294, "grad_norm": 7.720803737640381, "learning_rate": 8.068433087831774e-06, "loss": 0.4823, "step": 17538 }, { "epoch": 57.50491803278688, "grad_norm": 5.413039207458496, "learning_rate": 8.067391209273868e-06, "loss": 0.307, "step": 17539 }, { "epoch": 57.50819672131148, "grad_norm": 6.945010185241699, "learning_rate": 8.06634935250813e-06, "loss": 0.6365, "step": 17540 }, { "epoch": 57.511475409836066, "grad_norm": 7.601130962371826, "learning_rate": 8.065307517546319e-06, "loss": 0.4077, "step": 17541 }, { "epoch": 57.514754098360655, "grad_norm": 35.97774124145508, "learning_rate": 8.064265704400177e-06, "loss": 0.4234, "step": 17542 }, { "epoch": 57.51803278688524, "grad_norm": 8.611624717712402, "learning_rate": 8.063223913081452e-06, "loss": 0.4139, "step": 17543 }, { "epoch": 57.52131147540984, "grad_norm": 6.334884166717529, "learning_rate": 8.062182143601891e-06, "loss": 0.767, "step": 17544 }, { "epoch": 57.52459016393443, "grad_norm": 6.128857612609863, "learning_rate": 8.061140395973237e-06, "loss": 0.4072, "step": 17545 }, { "epoch": 57.527868852459015, "grad_norm": 7.316311836242676, "learning_rate": 8.060098670207244e-06, "loss": 0.4729, "step": 17546 }, { "epoch": 57.5311475409836, "grad_norm": 7.431239604949951, "learning_rate": 8.059056966315657e-06, "loss": 0.4091, "step": 17547 }, { "epoch": 57.5344262295082, "grad_norm": 5.256757736206055, "learning_rate": 8.05801528431022e-06, "loss": 0.3724, "step": 17548 }, { "epoch": 57.53770491803279, "grad_norm": 5.752669811248779, "learning_rate": 8.056973624202676e-06, "loss": 0.3775, "step": 17549 }, { "epoch": 57.540983606557376, "grad_norm": 6.727786540985107, "learning_rate": 8.055931986004777e-06, "loss": 0.4211, "step": 17550 }, { "epoch": 57.544262295081964, "grad_norm": 11.430267333984375, "learning_rate": 8.05489036972827e-06, "loss": 0.4685, "step": 17551 }, { "epoch": 57.54754098360656, "grad_norm": 6.441929817199707, "learning_rate": 8.053848775384892e-06, "loss": 0.4808, "step": 17552 }, { "epoch": 57.55081967213115, "grad_norm": 5.046205043792725, "learning_rate": 8.052807202986392e-06, "loss": 0.64, "step": 17553 }, { "epoch": 57.554098360655736, "grad_norm": 6.2643327713012695, "learning_rate": 8.051765652544517e-06, "loss": 0.6893, "step": 17554 }, { "epoch": 57.557377049180324, "grad_norm": 6.000067710876465, "learning_rate": 8.050724124071012e-06, "loss": 0.5141, "step": 17555 }, { "epoch": 57.56065573770492, "grad_norm": 7.254049777984619, "learning_rate": 8.049682617577615e-06, "loss": 0.4539, "step": 17556 }, { "epoch": 57.56393442622951, "grad_norm": 8.39995002746582, "learning_rate": 8.048641133076077e-06, "loss": 0.6929, "step": 17557 }, { "epoch": 57.5672131147541, "grad_norm": 6.423000335693359, "learning_rate": 8.047599670578139e-06, "loss": 0.5886, "step": 17558 }, { "epoch": 57.570491803278685, "grad_norm": 6.288772106170654, "learning_rate": 8.046558230095543e-06, "loss": 0.509, "step": 17559 }, { "epoch": 57.57377049180328, "grad_norm": 7.404725074768066, "learning_rate": 8.045516811640038e-06, "loss": 0.5299, "step": 17560 }, { "epoch": 57.57704918032787, "grad_norm": 7.439483642578125, "learning_rate": 8.044475415223361e-06, "loss": 0.5283, "step": 17561 }, { "epoch": 57.58032786885246, "grad_norm": 12.324801445007324, "learning_rate": 8.043434040857254e-06, "loss": 0.3748, "step": 17562 }, { "epoch": 57.58360655737705, "grad_norm": 7.180344104766846, "learning_rate": 8.042392688553465e-06, "loss": 0.6153, "step": 17563 }, { "epoch": 57.58688524590164, "grad_norm": 5.201741695404053, "learning_rate": 8.041351358323734e-06, "loss": 0.6398, "step": 17564 }, { "epoch": 57.59016393442623, "grad_norm": 5.794839382171631, "learning_rate": 8.040310050179805e-06, "loss": 0.5611, "step": 17565 }, { "epoch": 57.59344262295082, "grad_norm": 7.559598922729492, "learning_rate": 8.039268764133413e-06, "loss": 0.4386, "step": 17566 }, { "epoch": 57.59672131147541, "grad_norm": 7.255367279052734, "learning_rate": 8.038227500196306e-06, "loss": 0.4491, "step": 17567 }, { "epoch": 57.6, "grad_norm": 5.968574047088623, "learning_rate": 8.037186258380226e-06, "loss": 0.6219, "step": 17568 }, { "epoch": 57.60327868852459, "grad_norm": 5.768794536590576, "learning_rate": 8.036145038696913e-06, "loss": 0.5616, "step": 17569 }, { "epoch": 57.60655737704918, "grad_norm": 7.5623698234558105, "learning_rate": 8.035103841158103e-06, "loss": 0.6421, "step": 17570 }, { "epoch": 57.609836065573774, "grad_norm": 6.809295654296875, "learning_rate": 8.034062665775538e-06, "loss": 0.6585, "step": 17571 }, { "epoch": 57.61311475409836, "grad_norm": 5.609536170959473, "learning_rate": 8.033021512560965e-06, "loss": 0.4031, "step": 17572 }, { "epoch": 57.61639344262295, "grad_norm": 5.747374534606934, "learning_rate": 8.031980381526119e-06, "loss": 0.2167, "step": 17573 }, { "epoch": 57.61967213114754, "grad_norm": 5.645951747894287, "learning_rate": 8.030939272682741e-06, "loss": 0.4372, "step": 17574 }, { "epoch": 57.622950819672134, "grad_norm": 7.425445556640625, "learning_rate": 8.029898186042564e-06, "loss": 0.2998, "step": 17575 }, { "epoch": 57.62622950819672, "grad_norm": 7.293303966522217, "learning_rate": 8.028857121617339e-06, "loss": 0.4621, "step": 17576 }, { "epoch": 57.62950819672131, "grad_norm": 5.7542877197265625, "learning_rate": 8.0278160794188e-06, "loss": 0.5927, "step": 17577 }, { "epoch": 57.6327868852459, "grad_norm": 8.107995986938477, "learning_rate": 8.026775059458685e-06, "loss": 0.5356, "step": 17578 }, { "epoch": 57.636065573770495, "grad_norm": 5.671265125274658, "learning_rate": 8.025734061748727e-06, "loss": 0.4876, "step": 17579 }, { "epoch": 57.63934426229508, "grad_norm": 6.511307716369629, "learning_rate": 8.024693086300677e-06, "loss": 0.5765, "step": 17580 }, { "epoch": 57.64262295081967, "grad_norm": 5.500657558441162, "learning_rate": 8.023652133126264e-06, "loss": 0.6212, "step": 17581 }, { "epoch": 57.64590163934426, "grad_norm": 5.537803649902344, "learning_rate": 8.022611202237228e-06, "loss": 0.6433, "step": 17582 }, { "epoch": 57.649180327868855, "grad_norm": 9.47774887084961, "learning_rate": 8.021570293645307e-06, "loss": 0.5622, "step": 17583 }, { "epoch": 57.65245901639344, "grad_norm": 4.5847392082214355, "learning_rate": 8.020529407362237e-06, "loss": 0.4438, "step": 17584 }, { "epoch": 57.65573770491803, "grad_norm": 5.674600124359131, "learning_rate": 8.019488543399754e-06, "loss": 0.3984, "step": 17585 }, { "epoch": 57.65901639344262, "grad_norm": 6.0438103675842285, "learning_rate": 8.018447701769602e-06, "loss": 0.516, "step": 17586 }, { "epoch": 57.662295081967216, "grad_norm": 8.293478012084961, "learning_rate": 8.01740688248351e-06, "loss": 0.335, "step": 17587 }, { "epoch": 57.665573770491804, "grad_norm": 6.187015056610107, "learning_rate": 8.016366085553215e-06, "loss": 0.4236, "step": 17588 }, { "epoch": 57.66885245901639, "grad_norm": 6.377377986907959, "learning_rate": 8.015325310990457e-06, "loss": 0.4024, "step": 17589 }, { "epoch": 57.67213114754098, "grad_norm": 4.69951057434082, "learning_rate": 8.014284558806971e-06, "loss": 0.3811, "step": 17590 }, { "epoch": 57.675409836065576, "grad_norm": 9.508023262023926, "learning_rate": 8.01324382901449e-06, "loss": 0.3801, "step": 17591 }, { "epoch": 57.678688524590164, "grad_norm": 8.224979400634766, "learning_rate": 8.012203121624747e-06, "loss": 0.7742, "step": 17592 }, { "epoch": 57.68196721311475, "grad_norm": 9.153914451599121, "learning_rate": 8.011162436649484e-06, "loss": 0.5027, "step": 17593 }, { "epoch": 57.68524590163934, "grad_norm": 6.137845039367676, "learning_rate": 8.010121774100433e-06, "loss": 0.4543, "step": 17594 }, { "epoch": 57.68852459016394, "grad_norm": 6.37811279296875, "learning_rate": 8.009081133989329e-06, "loss": 0.6559, "step": 17595 }, { "epoch": 57.691803278688525, "grad_norm": 6.018367767333984, "learning_rate": 8.008040516327904e-06, "loss": 0.5288, "step": 17596 }, { "epoch": 57.69508196721311, "grad_norm": 5.0048747062683105, "learning_rate": 8.00699992112789e-06, "loss": 0.4203, "step": 17597 }, { "epoch": 57.6983606557377, "grad_norm": 12.07026481628418, "learning_rate": 8.005959348401026e-06, "loss": 0.4759, "step": 17598 }, { "epoch": 57.7016393442623, "grad_norm": 7.707202434539795, "learning_rate": 8.004918798159046e-06, "loss": 0.4765, "step": 17599 }, { "epoch": 57.704918032786885, "grad_norm": 6.596018314361572, "learning_rate": 8.00387827041368e-06, "loss": 0.6922, "step": 17600 }, { "epoch": 57.708196721311474, "grad_norm": 6.127962112426758, "learning_rate": 8.00283776517666e-06, "loss": 0.3267, "step": 17601 }, { "epoch": 57.71147540983607, "grad_norm": 9.283143997192383, "learning_rate": 8.001797282459721e-06, "loss": 0.653, "step": 17602 }, { "epoch": 57.71475409836066, "grad_norm": 5.215557098388672, "learning_rate": 8.000756822274597e-06, "loss": 0.6372, "step": 17603 }, { "epoch": 57.718032786885246, "grad_norm": 6.18234395980835, "learning_rate": 7.999716384633019e-06, "loss": 0.3942, "step": 17604 }, { "epoch": 57.721311475409834, "grad_norm": 6.15190315246582, "learning_rate": 7.998675969546714e-06, "loss": 0.4621, "step": 17605 }, { "epoch": 57.72459016393443, "grad_norm": 6.578107833862305, "learning_rate": 7.997635577027423e-06, "loss": 0.6492, "step": 17606 }, { "epoch": 57.72786885245902, "grad_norm": 8.36598014831543, "learning_rate": 7.99659520708687e-06, "loss": 0.4355, "step": 17607 }, { "epoch": 57.731147540983606, "grad_norm": 5.988822937011719, "learning_rate": 7.99555485973679e-06, "loss": 0.6034, "step": 17608 }, { "epoch": 57.734426229508195, "grad_norm": 5.248520851135254, "learning_rate": 7.994514534988916e-06, "loss": 0.6187, "step": 17609 }, { "epoch": 57.73770491803279, "grad_norm": 6.816869258880615, "learning_rate": 7.993474232854973e-06, "loss": 0.3457, "step": 17610 }, { "epoch": 57.74098360655738, "grad_norm": 6.525203704833984, "learning_rate": 7.992433953346694e-06, "loss": 0.7297, "step": 17611 }, { "epoch": 57.74426229508197, "grad_norm": 6.917039394378662, "learning_rate": 7.99139369647581e-06, "loss": 0.4025, "step": 17612 }, { "epoch": 57.747540983606555, "grad_norm": 5.70409631729126, "learning_rate": 7.99035346225405e-06, "loss": 0.4413, "step": 17613 }, { "epoch": 57.75081967213115, "grad_norm": 6.120303630828857, "learning_rate": 7.989313250693143e-06, "loss": 0.4265, "step": 17614 }, { "epoch": 57.75409836065574, "grad_norm": 9.048364639282227, "learning_rate": 7.988273061804822e-06, "loss": 0.5645, "step": 17615 }, { "epoch": 57.75737704918033, "grad_norm": 6.686361789703369, "learning_rate": 7.987232895600813e-06, "loss": 0.6151, "step": 17616 }, { "epoch": 57.760655737704916, "grad_norm": 6.0220255851745605, "learning_rate": 7.986192752092847e-06, "loss": 0.5085, "step": 17617 }, { "epoch": 57.76393442622951, "grad_norm": 4.840855121612549, "learning_rate": 7.985152631292649e-06, "loss": 0.4036, "step": 17618 }, { "epoch": 57.7672131147541, "grad_norm": 6.4033284187316895, "learning_rate": 7.984112533211951e-06, "loss": 0.5027, "step": 17619 }, { "epoch": 57.77049180327869, "grad_norm": 5.454995632171631, "learning_rate": 7.983072457862482e-06, "loss": 0.5602, "step": 17620 }, { "epoch": 57.773770491803276, "grad_norm": 5.803003787994385, "learning_rate": 7.98203240525597e-06, "loss": 0.468, "step": 17621 }, { "epoch": 57.77704918032787, "grad_norm": 7.010842323303223, "learning_rate": 7.980992375404137e-06, "loss": 0.7521, "step": 17622 }, { "epoch": 57.78032786885246, "grad_norm": 5.189426898956299, "learning_rate": 7.979952368318713e-06, "loss": 0.5798, "step": 17623 }, { "epoch": 57.78360655737705, "grad_norm": 7.712646961212158, "learning_rate": 7.97891238401143e-06, "loss": 0.5612, "step": 17624 }, { "epoch": 57.78688524590164, "grad_norm": 5.7882609367370605, "learning_rate": 7.97787242249401e-06, "loss": 0.5209, "step": 17625 }, { "epoch": 57.79016393442623, "grad_norm": 12.07283878326416, "learning_rate": 7.976832483778183e-06, "loss": 0.5202, "step": 17626 }, { "epoch": 57.79344262295082, "grad_norm": 9.761514663696289, "learning_rate": 7.975792567875666e-06, "loss": 0.5302, "step": 17627 }, { "epoch": 57.79672131147541, "grad_norm": 7.981705665588379, "learning_rate": 7.974752674798198e-06, "loss": 0.5504, "step": 17628 }, { "epoch": 57.8, "grad_norm": 29.010536193847656, "learning_rate": 7.9737128045575e-06, "loss": 0.4113, "step": 17629 }, { "epoch": 57.80327868852459, "grad_norm": 5.585472583770752, "learning_rate": 7.972672957165297e-06, "loss": 0.534, "step": 17630 }, { "epoch": 57.80655737704918, "grad_norm": 5.284025192260742, "learning_rate": 7.97163313263331e-06, "loss": 0.391, "step": 17631 }, { "epoch": 57.80983606557377, "grad_norm": 4.679044723510742, "learning_rate": 7.970593330973273e-06, "loss": 0.7948, "step": 17632 }, { "epoch": 57.81311475409836, "grad_norm": 6.372527599334717, "learning_rate": 7.969553552196905e-06, "loss": 0.6704, "step": 17633 }, { "epoch": 57.81639344262295, "grad_norm": 6.88586950302124, "learning_rate": 7.968513796315932e-06, "loss": 0.7296, "step": 17634 }, { "epoch": 57.81967213114754, "grad_norm": 6.964271545410156, "learning_rate": 7.967474063342076e-06, "loss": 0.6913, "step": 17635 }, { "epoch": 57.82295081967213, "grad_norm": 5.526949882507324, "learning_rate": 7.966434353287063e-06, "loss": 0.5445, "step": 17636 }, { "epoch": 57.82622950819672, "grad_norm": 7.953380107879639, "learning_rate": 7.965394666162621e-06, "loss": 0.819, "step": 17637 }, { "epoch": 57.829508196721314, "grad_norm": 12.326960563659668, "learning_rate": 7.964355001980466e-06, "loss": 0.5237, "step": 17638 }, { "epoch": 57.8327868852459, "grad_norm": 5.647979736328125, "learning_rate": 7.963315360752326e-06, "loss": 0.4817, "step": 17639 }, { "epoch": 57.83606557377049, "grad_norm": 5.13771390914917, "learning_rate": 7.962275742489925e-06, "loss": 0.6448, "step": 17640 }, { "epoch": 57.83934426229508, "grad_norm": 7.460903167724609, "learning_rate": 7.96123614720498e-06, "loss": 0.5371, "step": 17641 }, { "epoch": 57.842622950819674, "grad_norm": 6.582875728607178, "learning_rate": 7.96019657490922e-06, "loss": 0.4815, "step": 17642 }, { "epoch": 57.84590163934426, "grad_norm": 4.92060661315918, "learning_rate": 7.959157025614365e-06, "loss": 0.2428, "step": 17643 }, { "epoch": 57.84918032786885, "grad_norm": 6.587345123291016, "learning_rate": 7.958117499332132e-06, "loss": 0.4118, "step": 17644 }, { "epoch": 57.85245901639344, "grad_norm": 5.1884236335754395, "learning_rate": 7.95707799607425e-06, "loss": 0.2417, "step": 17645 }, { "epoch": 57.855737704918035, "grad_norm": 6.384946823120117, "learning_rate": 7.95603851585244e-06, "loss": 0.3401, "step": 17646 }, { "epoch": 57.85901639344262, "grad_norm": 7.920818328857422, "learning_rate": 7.954999058678419e-06, "loss": 0.4338, "step": 17647 }, { "epoch": 57.86229508196721, "grad_norm": 5.353178977966309, "learning_rate": 7.953959624563911e-06, "loss": 0.7147, "step": 17648 }, { "epoch": 57.86557377049181, "grad_norm": 6.23505163192749, "learning_rate": 7.952920213520632e-06, "loss": 0.4838, "step": 17649 }, { "epoch": 57.868852459016395, "grad_norm": 6.393881797790527, "learning_rate": 7.95188082556031e-06, "loss": 0.3464, "step": 17650 }, { "epoch": 57.87213114754098, "grad_norm": 5.651327610015869, "learning_rate": 7.950841460694661e-06, "loss": 0.486, "step": 17651 }, { "epoch": 57.87540983606557, "grad_norm": 6.439677715301514, "learning_rate": 7.949802118935403e-06, "loss": 0.4093, "step": 17652 }, { "epoch": 57.87868852459017, "grad_norm": 5.770885944366455, "learning_rate": 7.948762800294256e-06, "loss": 0.5809, "step": 17653 }, { "epoch": 57.881967213114756, "grad_norm": 5.4118828773498535, "learning_rate": 7.947723504782945e-06, "loss": 0.6201, "step": 17654 }, { "epoch": 57.885245901639344, "grad_norm": 5.580496788024902, "learning_rate": 7.946684232413185e-06, "loss": 0.4192, "step": 17655 }, { "epoch": 57.88852459016393, "grad_norm": 5.127854824066162, "learning_rate": 7.945644983196695e-06, "loss": 0.5079, "step": 17656 }, { "epoch": 57.89180327868853, "grad_norm": 5.473077774047852, "learning_rate": 7.944605757145191e-06, "loss": 0.4191, "step": 17657 }, { "epoch": 57.895081967213116, "grad_norm": 5.792304039001465, "learning_rate": 7.943566554270397e-06, "loss": 0.4154, "step": 17658 }, { "epoch": 57.898360655737704, "grad_norm": 6.137293815612793, "learning_rate": 7.942527374584029e-06, "loss": 0.4339, "step": 17659 }, { "epoch": 57.90163934426229, "grad_norm": 8.922748565673828, "learning_rate": 7.941488218097803e-06, "loss": 0.5441, "step": 17660 }, { "epoch": 57.90491803278689, "grad_norm": 4.794007301330566, "learning_rate": 7.940449084823436e-06, "loss": 0.2996, "step": 17661 }, { "epoch": 57.90819672131148, "grad_norm": 5.272706985473633, "learning_rate": 7.939409974772648e-06, "loss": 0.4489, "step": 17662 }, { "epoch": 57.911475409836065, "grad_norm": 4.517035007476807, "learning_rate": 7.938370887957156e-06, "loss": 0.4087, "step": 17663 }, { "epoch": 57.91475409836065, "grad_norm": 6.875876426696777, "learning_rate": 7.937331824388673e-06, "loss": 0.5283, "step": 17664 }, { "epoch": 57.91803278688525, "grad_norm": 6.425549030303955, "learning_rate": 7.936292784078921e-06, "loss": 0.7884, "step": 17665 }, { "epoch": 57.92131147540984, "grad_norm": 6.440075874328613, "learning_rate": 7.935253767039613e-06, "loss": 0.5504, "step": 17666 }, { "epoch": 57.924590163934425, "grad_norm": 11.670039176940918, "learning_rate": 7.934214773282464e-06, "loss": 0.6163, "step": 17667 }, { "epoch": 57.927868852459014, "grad_norm": 6.135198593139648, "learning_rate": 7.933175802819193e-06, "loss": 0.6538, "step": 17668 }, { "epoch": 57.93114754098361, "grad_norm": 4.953433036804199, "learning_rate": 7.932136855661516e-06, "loss": 0.4961, "step": 17669 }, { "epoch": 57.9344262295082, "grad_norm": 6.6078877449035645, "learning_rate": 7.93109793182114e-06, "loss": 0.4384, "step": 17670 }, { "epoch": 57.937704918032786, "grad_norm": 5.987552165985107, "learning_rate": 7.93005903130979e-06, "loss": 0.6154, "step": 17671 }, { "epoch": 57.940983606557374, "grad_norm": 5.750931739807129, "learning_rate": 7.929020154139178e-06, "loss": 0.452, "step": 17672 }, { "epoch": 57.94426229508197, "grad_norm": 6.545449733734131, "learning_rate": 7.927981300321014e-06, "loss": 0.4893, "step": 17673 }, { "epoch": 57.94754098360656, "grad_norm": 6.6610541343688965, "learning_rate": 7.926942469867018e-06, "loss": 0.4428, "step": 17674 }, { "epoch": 57.950819672131146, "grad_norm": 4.822134017944336, "learning_rate": 7.925903662788897e-06, "loss": 0.4414, "step": 17675 }, { "epoch": 57.954098360655735, "grad_norm": 5.844691753387451, "learning_rate": 7.924864879098371e-06, "loss": 0.4934, "step": 17676 }, { "epoch": 57.95737704918033, "grad_norm": 5.873969554901123, "learning_rate": 7.923826118807153e-06, "loss": 0.6194, "step": 17677 }, { "epoch": 57.96065573770492, "grad_norm": 6.829896450042725, "learning_rate": 7.922787381926954e-06, "loss": 0.6613, "step": 17678 }, { "epoch": 57.96393442622951, "grad_norm": 5.7936224937438965, "learning_rate": 7.921748668469481e-06, "loss": 0.4319, "step": 17679 }, { "epoch": 57.967213114754095, "grad_norm": 5.5773091316223145, "learning_rate": 7.92070997844646e-06, "loss": 0.5945, "step": 17680 }, { "epoch": 57.97049180327869, "grad_norm": 5.8298516273498535, "learning_rate": 7.919671311869593e-06, "loss": 0.3917, "step": 17681 }, { "epoch": 57.97377049180328, "grad_norm": 7.0429511070251465, "learning_rate": 7.918632668750596e-06, "loss": 0.7532, "step": 17682 }, { "epoch": 57.97704918032787, "grad_norm": 6.328023910522461, "learning_rate": 7.917594049101176e-06, "loss": 0.4473, "step": 17683 }, { "epoch": 57.980327868852456, "grad_norm": 5.199809551239014, "learning_rate": 7.916555452933052e-06, "loss": 0.5484, "step": 17684 }, { "epoch": 57.98360655737705, "grad_norm": 6.572872638702393, "learning_rate": 7.915516880257931e-06, "loss": 0.6036, "step": 17685 }, { "epoch": 57.98688524590164, "grad_norm": 5.55393648147583, "learning_rate": 7.914478331087525e-06, "loss": 0.4692, "step": 17686 }, { "epoch": 57.99016393442623, "grad_norm": 5.182100772857666, "learning_rate": 7.913439805433543e-06, "loss": 0.4216, "step": 17687 }, { "epoch": 57.993442622950816, "grad_norm": 6.263994216918945, "learning_rate": 7.912401303307696e-06, "loss": 0.5985, "step": 17688 }, { "epoch": 57.99672131147541, "grad_norm": 5.971940994262695, "learning_rate": 7.911362824721696e-06, "loss": 0.7215, "step": 17689 }, { "epoch": 58.0, "grad_norm": 5.563159942626953, "learning_rate": 7.91032436968725e-06, "loss": 0.2793, "step": 17690 }, { "epoch": 58.00327868852459, "grad_norm": 10.37740707397461, "learning_rate": 7.90928593821607e-06, "loss": 0.5534, "step": 17691 }, { "epoch": 58.006557377049184, "grad_norm": 5.360239028930664, "learning_rate": 7.908247530319866e-06, "loss": 0.5092, "step": 17692 }, { "epoch": 58.00983606557377, "grad_norm": 4.590024948120117, "learning_rate": 7.907209146010348e-06, "loss": 0.6002, "step": 17693 }, { "epoch": 58.01311475409836, "grad_norm": 4.971186637878418, "learning_rate": 7.90617078529922e-06, "loss": 0.4724, "step": 17694 }, { "epoch": 58.01639344262295, "grad_norm": 5.42384147644043, "learning_rate": 7.905132448198195e-06, "loss": 0.7005, "step": 17695 }, { "epoch": 58.019672131147544, "grad_norm": 5.733391284942627, "learning_rate": 7.904094134718975e-06, "loss": 0.5309, "step": 17696 }, { "epoch": 58.02295081967213, "grad_norm": 6.722494125366211, "learning_rate": 7.903055844873277e-06, "loss": 0.4447, "step": 17697 }, { "epoch": 58.02622950819672, "grad_norm": 6.393270492553711, "learning_rate": 7.902017578672804e-06, "loss": 0.3303, "step": 17698 }, { "epoch": 58.02950819672131, "grad_norm": 7.4681396484375, "learning_rate": 7.900979336129267e-06, "loss": 0.4577, "step": 17699 }, { "epoch": 58.032786885245905, "grad_norm": 9.696915626525879, "learning_rate": 7.899941117254369e-06, "loss": 0.3915, "step": 17700 }, { "epoch": 58.03606557377049, "grad_norm": 8.065771102905273, "learning_rate": 7.898902922059814e-06, "loss": 0.5777, "step": 17701 }, { "epoch": 58.03934426229508, "grad_norm": 5.766279697418213, "learning_rate": 7.897864750557317e-06, "loss": 0.6138, "step": 17702 }, { "epoch": 58.04262295081967, "grad_norm": 8.25191879272461, "learning_rate": 7.89682660275858e-06, "loss": 0.6423, "step": 17703 }, { "epoch": 58.045901639344265, "grad_norm": 4.426129341125488, "learning_rate": 7.895788478675312e-06, "loss": 0.534, "step": 17704 }, { "epoch": 58.049180327868854, "grad_norm": 5.825259685516357, "learning_rate": 7.894750378319212e-06, "loss": 0.6154, "step": 17705 }, { "epoch": 58.05245901639344, "grad_norm": 6.450291633605957, "learning_rate": 7.893712301701992e-06, "loss": 0.4247, "step": 17706 }, { "epoch": 58.05573770491803, "grad_norm": 7.468085765838623, "learning_rate": 7.89267424883536e-06, "loss": 0.4303, "step": 17707 }, { "epoch": 58.059016393442626, "grad_norm": 6.327722072601318, "learning_rate": 7.891636219731013e-06, "loss": 0.2388, "step": 17708 }, { "epoch": 58.062295081967214, "grad_norm": 4.4634904861450195, "learning_rate": 7.890598214400658e-06, "loss": 0.3775, "step": 17709 }, { "epoch": 58.0655737704918, "grad_norm": 5.597836971282959, "learning_rate": 7.889560232856003e-06, "loss": 0.6638, "step": 17710 }, { "epoch": 58.06885245901639, "grad_norm": 5.930564880371094, "learning_rate": 7.888522275108753e-06, "loss": 0.4152, "step": 17711 }, { "epoch": 58.072131147540986, "grad_norm": 5.8950724601745605, "learning_rate": 7.88748434117061e-06, "loss": 0.6675, "step": 17712 }, { "epoch": 58.075409836065575, "grad_norm": 6.271056652069092, "learning_rate": 7.886446431053277e-06, "loss": 0.5698, "step": 17713 }, { "epoch": 58.07868852459016, "grad_norm": 7.779079914093018, "learning_rate": 7.885408544768453e-06, "loss": 0.4161, "step": 17714 }, { "epoch": 58.08196721311475, "grad_norm": 5.648375511169434, "learning_rate": 7.884370682327851e-06, "loss": 0.4886, "step": 17715 }, { "epoch": 58.08524590163935, "grad_norm": 5.411637306213379, "learning_rate": 7.88333284374317e-06, "loss": 0.69, "step": 17716 }, { "epoch": 58.088524590163935, "grad_norm": 5.26649284362793, "learning_rate": 7.882295029026108e-06, "loss": 0.8249, "step": 17717 }, { "epoch": 58.09180327868852, "grad_norm": 5.040118217468262, "learning_rate": 7.881257238188373e-06, "loss": 0.5988, "step": 17718 }, { "epoch": 58.09508196721311, "grad_norm": 8.12717056274414, "learning_rate": 7.880219471241667e-06, "loss": 0.5511, "step": 17719 }, { "epoch": 58.09836065573771, "grad_norm": 6.057557106018066, "learning_rate": 7.87918172819769e-06, "loss": 0.7201, "step": 17720 }, { "epoch": 58.101639344262296, "grad_norm": 5.805436134338379, "learning_rate": 7.878144009068144e-06, "loss": 0.7253, "step": 17721 }, { "epoch": 58.104918032786884, "grad_norm": 5.6456522941589355, "learning_rate": 7.877106313864729e-06, "loss": 0.4162, "step": 17722 }, { "epoch": 58.10819672131147, "grad_norm": 5.10209846496582, "learning_rate": 7.876068642599148e-06, "loss": 0.5373, "step": 17723 }, { "epoch": 58.11147540983607, "grad_norm": 4.8088908195495605, "learning_rate": 7.875030995283102e-06, "loss": 0.5796, "step": 17724 }, { "epoch": 58.114754098360656, "grad_norm": 9.914749145507812, "learning_rate": 7.873993371928293e-06, "loss": 0.4729, "step": 17725 }, { "epoch": 58.118032786885244, "grad_norm": 6.156875133514404, "learning_rate": 7.87295577254642e-06, "loss": 0.4723, "step": 17726 }, { "epoch": 58.12131147540983, "grad_norm": 5.746081352233887, "learning_rate": 7.871918197149176e-06, "loss": 0.3918, "step": 17727 }, { "epoch": 58.12459016393443, "grad_norm": 8.92257308959961, "learning_rate": 7.870880645748271e-06, "loss": 0.5405, "step": 17728 }, { "epoch": 58.12786885245902, "grad_norm": 5.52915096282959, "learning_rate": 7.8698431183554e-06, "loss": 0.6753, "step": 17729 }, { "epoch": 58.131147540983605, "grad_norm": 4.872424602508545, "learning_rate": 7.868805614982264e-06, "loss": 0.5685, "step": 17730 }, { "epoch": 58.13442622950819, "grad_norm": 6.570324897766113, "learning_rate": 7.867768135640556e-06, "loss": 0.7022, "step": 17731 }, { "epoch": 58.13770491803279, "grad_norm": 7.158968448638916, "learning_rate": 7.866730680341984e-06, "loss": 0.4811, "step": 17732 }, { "epoch": 58.14098360655738, "grad_norm": 5.955663204193115, "learning_rate": 7.86569324909824e-06, "loss": 0.6641, "step": 17733 }, { "epoch": 58.144262295081965, "grad_norm": 4.645221710205078, "learning_rate": 7.864655841921027e-06, "loss": 0.6053, "step": 17734 }, { "epoch": 58.14754098360656, "grad_norm": 9.652194023132324, "learning_rate": 7.863618458822031e-06, "loss": 0.6655, "step": 17735 }, { "epoch": 58.15081967213115, "grad_norm": 7.214565753936768, "learning_rate": 7.862581099812966e-06, "loss": 0.5309, "step": 17736 }, { "epoch": 58.15409836065574, "grad_norm": 7.955652236938477, "learning_rate": 7.86154376490552e-06, "loss": 0.3715, "step": 17737 }, { "epoch": 58.157377049180326, "grad_norm": 6.1852312088012695, "learning_rate": 7.860506454111392e-06, "loss": 0.4, "step": 17738 }, { "epoch": 58.16065573770492, "grad_norm": 16.292675018310547, "learning_rate": 7.859469167442278e-06, "loss": 0.3568, "step": 17739 }, { "epoch": 58.16393442622951, "grad_norm": 5.613344669342041, "learning_rate": 7.85843190490987e-06, "loss": 0.6702, "step": 17740 }, { "epoch": 58.1672131147541, "grad_norm": 6.437047958374023, "learning_rate": 7.857394666525873e-06, "loss": 0.6481, "step": 17741 }, { "epoch": 58.170491803278686, "grad_norm": 5.463171482086182, "learning_rate": 7.856357452301981e-06, "loss": 0.3941, "step": 17742 }, { "epoch": 58.17377049180328, "grad_norm": 6.897839069366455, "learning_rate": 7.855320262249883e-06, "loss": 0.7533, "step": 17743 }, { "epoch": 58.17704918032787, "grad_norm": 5.474143028259277, "learning_rate": 7.85428309638128e-06, "loss": 0.3976, "step": 17744 }, { "epoch": 58.18032786885246, "grad_norm": 6.082594871520996, "learning_rate": 7.853245954707868e-06, "loss": 0.616, "step": 17745 }, { "epoch": 58.18360655737705, "grad_norm": 7.062047004699707, "learning_rate": 7.852208837241337e-06, "loss": 0.6399, "step": 17746 }, { "epoch": 58.18688524590164, "grad_norm": 6.1109795570373535, "learning_rate": 7.851171743993388e-06, "loss": 0.5128, "step": 17747 }, { "epoch": 58.19016393442623, "grad_norm": 4.892744541168213, "learning_rate": 7.85013467497571e-06, "loss": 0.4889, "step": 17748 }, { "epoch": 58.19344262295082, "grad_norm": 7.111536026000977, "learning_rate": 7.849097630199996e-06, "loss": 0.6286, "step": 17749 }, { "epoch": 58.19672131147541, "grad_norm": 5.586358070373535, "learning_rate": 7.848060609677948e-06, "loss": 0.4869, "step": 17750 }, { "epoch": 58.2, "grad_norm": 23.11502456665039, "learning_rate": 7.847023613421251e-06, "loss": 0.4853, "step": 17751 }, { "epoch": 58.20327868852459, "grad_norm": 5.507260799407959, "learning_rate": 7.845986641441604e-06, "loss": 0.5898, "step": 17752 }, { "epoch": 58.20655737704918, "grad_norm": 5.359488487243652, "learning_rate": 7.844949693750691e-06, "loss": 0.7463, "step": 17753 }, { "epoch": 58.20983606557377, "grad_norm": 5.521161079406738, "learning_rate": 7.843912770360218e-06, "loss": 0.3616, "step": 17754 }, { "epoch": 58.21311475409836, "grad_norm": 7.641562461853027, "learning_rate": 7.842875871281868e-06, "loss": 0.3606, "step": 17755 }, { "epoch": 58.21639344262295, "grad_norm": 5.900763988494873, "learning_rate": 7.841838996527336e-06, "loss": 0.3342, "step": 17756 }, { "epoch": 58.21967213114754, "grad_norm": 6.858489513397217, "learning_rate": 7.840802146108308e-06, "loss": 0.6061, "step": 17757 }, { "epoch": 58.22295081967213, "grad_norm": 5.219177722930908, "learning_rate": 7.839765320036486e-06, "loss": 1.0086, "step": 17758 }, { "epoch": 58.226229508196724, "grad_norm": 6.5477986335754395, "learning_rate": 7.838728518323557e-06, "loss": 0.5215, "step": 17759 }, { "epoch": 58.22950819672131, "grad_norm": 5.688523769378662, "learning_rate": 7.83769174098121e-06, "loss": 0.6497, "step": 17760 }, { "epoch": 58.2327868852459, "grad_norm": 4.395579814910889, "learning_rate": 7.836654988021132e-06, "loss": 0.5359, "step": 17761 }, { "epoch": 58.23606557377049, "grad_norm": 10.540325164794922, "learning_rate": 7.835618259455024e-06, "loss": 0.5287, "step": 17762 }, { "epoch": 58.239344262295084, "grad_norm": 19.7032527923584, "learning_rate": 7.834581555294569e-06, "loss": 0.3975, "step": 17763 }, { "epoch": 58.24262295081967, "grad_norm": 6.1593918800354, "learning_rate": 7.83354487555146e-06, "loss": 0.5647, "step": 17764 }, { "epoch": 58.24590163934426, "grad_norm": 17.892704010009766, "learning_rate": 7.832508220237384e-06, "loss": 0.6017, "step": 17765 }, { "epoch": 58.24918032786885, "grad_norm": 4.86968469619751, "learning_rate": 7.831471589364027e-06, "loss": 0.6256, "step": 17766 }, { "epoch": 58.252459016393445, "grad_norm": 5.799281597137451, "learning_rate": 7.830434982943089e-06, "loss": 0.5918, "step": 17767 }, { "epoch": 58.25573770491803, "grad_norm": 5.420718193054199, "learning_rate": 7.82939840098625e-06, "loss": 0.4522, "step": 17768 }, { "epoch": 58.25901639344262, "grad_norm": 4.840809345245361, "learning_rate": 7.828361843505198e-06, "loss": 0.4667, "step": 17769 }, { "epoch": 58.26229508196721, "grad_norm": 7.676987648010254, "learning_rate": 7.827325310511627e-06, "loss": 0.3706, "step": 17770 }, { "epoch": 58.265573770491805, "grad_norm": 6.4730000495910645, "learning_rate": 7.826288802017222e-06, "loss": 0.4722, "step": 17771 }, { "epoch": 58.268852459016394, "grad_norm": 5.0495805740356445, "learning_rate": 7.82525231803367e-06, "loss": 0.4802, "step": 17772 }, { "epoch": 58.27213114754098, "grad_norm": 5.2000956535339355, "learning_rate": 7.824215858572661e-06, "loss": 0.5579, "step": 17773 }, { "epoch": 58.27540983606557, "grad_norm": 7.172280311584473, "learning_rate": 7.823179423645877e-06, "loss": 0.3591, "step": 17774 }, { "epoch": 58.278688524590166, "grad_norm": 5.925094127655029, "learning_rate": 7.822143013265014e-06, "loss": 0.4215, "step": 17775 }, { "epoch": 58.281967213114754, "grad_norm": 6.09780216217041, "learning_rate": 7.821106627441748e-06, "loss": 0.4406, "step": 17776 }, { "epoch": 58.28524590163934, "grad_norm": 5.520883560180664, "learning_rate": 7.820070266187772e-06, "loss": 0.571, "step": 17777 }, { "epoch": 58.28852459016394, "grad_norm": 5.658589839935303, "learning_rate": 7.819033929514772e-06, "loss": 0.5053, "step": 17778 }, { "epoch": 58.291803278688526, "grad_norm": 5.463234901428223, "learning_rate": 7.817997617434427e-06, "loss": 0.4196, "step": 17779 }, { "epoch": 58.295081967213115, "grad_norm": 7.998175621032715, "learning_rate": 7.816961329958432e-06, "loss": 0.3646, "step": 17780 }, { "epoch": 58.2983606557377, "grad_norm": 5.724450588226318, "learning_rate": 7.815925067098466e-06, "loss": 0.3931, "step": 17781 }, { "epoch": 58.3016393442623, "grad_norm": 7.700363636016846, "learning_rate": 7.814888828866219e-06, "loss": 0.6855, "step": 17782 }, { "epoch": 58.30491803278689, "grad_norm": 4.726619720458984, "learning_rate": 7.813852615273366e-06, "loss": 0.4538, "step": 17783 }, { "epoch": 58.308196721311475, "grad_norm": 6.634302616119385, "learning_rate": 7.812816426331602e-06, "loss": 0.5038, "step": 17784 }, { "epoch": 58.31147540983606, "grad_norm": 5.774437427520752, "learning_rate": 7.811780262052608e-06, "loss": 0.482, "step": 17785 }, { "epoch": 58.31475409836066, "grad_norm": 6.175914764404297, "learning_rate": 7.810744122448067e-06, "loss": 0.4668, "step": 17786 }, { "epoch": 58.31803278688525, "grad_norm": 8.76215648651123, "learning_rate": 7.809708007529656e-06, "loss": 0.5038, "step": 17787 }, { "epoch": 58.321311475409836, "grad_norm": 5.399044036865234, "learning_rate": 7.808671917309071e-06, "loss": 0.5056, "step": 17788 }, { "epoch": 58.324590163934424, "grad_norm": 5.799635410308838, "learning_rate": 7.807635851797987e-06, "loss": 0.489, "step": 17789 }, { "epoch": 58.32786885245902, "grad_norm": 6.81012487411499, "learning_rate": 7.806599811008089e-06, "loss": 0.7133, "step": 17790 }, { "epoch": 58.33114754098361, "grad_norm": 5.0959248542785645, "learning_rate": 7.805563794951059e-06, "loss": 0.5996, "step": 17791 }, { "epoch": 58.334426229508196, "grad_norm": 4.936400890350342, "learning_rate": 7.804527803638574e-06, "loss": 0.4954, "step": 17792 }, { "epoch": 58.337704918032784, "grad_norm": 8.075624465942383, "learning_rate": 7.803491837082324e-06, "loss": 0.5458, "step": 17793 }, { "epoch": 58.34098360655738, "grad_norm": 6.8480353355407715, "learning_rate": 7.802455895293988e-06, "loss": 0.567, "step": 17794 }, { "epoch": 58.34426229508197, "grad_norm": 8.470329284667969, "learning_rate": 7.80141997828525e-06, "loss": 0.5068, "step": 17795 }, { "epoch": 58.34754098360656, "grad_norm": 5.502252101898193, "learning_rate": 7.800384086067779e-06, "loss": 0.3615, "step": 17796 }, { "epoch": 58.350819672131145, "grad_norm": 4.4414896965026855, "learning_rate": 7.79934821865327e-06, "loss": 0.3088, "step": 17797 }, { "epoch": 58.35409836065574, "grad_norm": 5.416991710662842, "learning_rate": 7.798312376053398e-06, "loss": 0.553, "step": 17798 }, { "epoch": 58.35737704918033, "grad_norm": 20.87626075744629, "learning_rate": 7.79727655827984e-06, "loss": 0.5771, "step": 17799 }, { "epoch": 58.36065573770492, "grad_norm": 19.824060440063477, "learning_rate": 7.796240765344281e-06, "loss": 0.3716, "step": 17800 }, { "epoch": 58.363934426229505, "grad_norm": 4.42420768737793, "learning_rate": 7.795204997258402e-06, "loss": 0.2667, "step": 17801 }, { "epoch": 58.3672131147541, "grad_norm": 28.343036651611328, "learning_rate": 7.794169254033874e-06, "loss": 0.5148, "step": 17802 }, { "epoch": 58.37049180327869, "grad_norm": 7.296948432922363, "learning_rate": 7.793133535682384e-06, "loss": 0.6634, "step": 17803 }, { "epoch": 58.37377049180328, "grad_norm": 5.5656256675720215, "learning_rate": 7.79209784221561e-06, "loss": 0.6754, "step": 17804 }, { "epoch": 58.377049180327866, "grad_norm": 13.016180992126465, "learning_rate": 7.79106217364522e-06, "loss": 0.4508, "step": 17805 }, { "epoch": 58.38032786885246, "grad_norm": 7.232964992523193, "learning_rate": 7.790026529982909e-06, "loss": 0.6417, "step": 17806 }, { "epoch": 58.38360655737705, "grad_norm": 7.149396896362305, "learning_rate": 7.788990911240344e-06, "loss": 0.7661, "step": 17807 }, { "epoch": 58.38688524590164, "grad_norm": 5.704885482788086, "learning_rate": 7.787955317429208e-06, "loss": 0.3982, "step": 17808 }, { "epoch": 58.390163934426226, "grad_norm": 5.179266929626465, "learning_rate": 7.78691974856117e-06, "loss": 0.2076, "step": 17809 }, { "epoch": 58.39344262295082, "grad_norm": 4.915866851806641, "learning_rate": 7.785884204647916e-06, "loss": 0.3074, "step": 17810 }, { "epoch": 58.39672131147541, "grad_norm": 5.637394428253174, "learning_rate": 7.784848685701121e-06, "loss": 0.4944, "step": 17811 }, { "epoch": 58.4, "grad_norm": 5.944599151611328, "learning_rate": 7.78381319173246e-06, "loss": 0.5743, "step": 17812 }, { "epoch": 58.40327868852459, "grad_norm": 4.902307987213135, "learning_rate": 7.782777722753605e-06, "loss": 0.3919, "step": 17813 }, { "epoch": 58.40655737704918, "grad_norm": 6.48116397857666, "learning_rate": 7.781742278776241e-06, "loss": 0.3817, "step": 17814 }, { "epoch": 58.40983606557377, "grad_norm": 5.576909065246582, "learning_rate": 7.78070685981204e-06, "loss": 0.456, "step": 17815 }, { "epoch": 58.41311475409836, "grad_norm": 5.922900199890137, "learning_rate": 7.779671465872676e-06, "loss": 0.5332, "step": 17816 }, { "epoch": 58.41639344262295, "grad_norm": 6.391364097595215, "learning_rate": 7.778636096969823e-06, "loss": 0.4063, "step": 17817 }, { "epoch": 58.41967213114754, "grad_norm": 11.225075721740723, "learning_rate": 7.777600753115157e-06, "loss": 0.2855, "step": 17818 }, { "epoch": 58.42295081967213, "grad_norm": 10.640689849853516, "learning_rate": 7.776565434320354e-06, "loss": 0.4625, "step": 17819 }, { "epoch": 58.42622950819672, "grad_norm": 6.6756181716918945, "learning_rate": 7.775530140597089e-06, "loss": 0.6323, "step": 17820 }, { "epoch": 58.429508196721315, "grad_norm": 7.74136209487915, "learning_rate": 7.774494871957036e-06, "loss": 0.5389, "step": 17821 }, { "epoch": 58.4327868852459, "grad_norm": 5.737575531005859, "learning_rate": 7.773459628411862e-06, "loss": 0.3311, "step": 17822 }, { "epoch": 58.43606557377049, "grad_norm": 10.056262969970703, "learning_rate": 7.77242440997325e-06, "loss": 0.3897, "step": 17823 }, { "epoch": 58.43934426229508, "grad_norm": 6.377999782562256, "learning_rate": 7.771389216652867e-06, "loss": 0.3802, "step": 17824 }, { "epoch": 58.442622950819676, "grad_norm": 5.4414849281311035, "learning_rate": 7.770354048462387e-06, "loss": 0.359, "step": 17825 }, { "epoch": 58.445901639344264, "grad_norm": 5.619715213775635, "learning_rate": 7.769318905413483e-06, "loss": 0.3678, "step": 17826 }, { "epoch": 58.44918032786885, "grad_norm": 6.701112747192383, "learning_rate": 7.76828378751783e-06, "loss": 0.4702, "step": 17827 }, { "epoch": 58.45245901639344, "grad_norm": 6.43436861038208, "learning_rate": 7.767248694787097e-06, "loss": 0.4547, "step": 17828 }, { "epoch": 58.455737704918036, "grad_norm": 8.266270637512207, "learning_rate": 7.766213627232957e-06, "loss": 0.4969, "step": 17829 }, { "epoch": 58.459016393442624, "grad_norm": 5.441984176635742, "learning_rate": 7.765178584867081e-06, "loss": 0.5424, "step": 17830 }, { "epoch": 58.46229508196721, "grad_norm": 6.487973690032959, "learning_rate": 7.764143567701138e-06, "loss": 0.5139, "step": 17831 }, { "epoch": 58.4655737704918, "grad_norm": 6.17922306060791, "learning_rate": 7.763108575746802e-06, "loss": 0.6017, "step": 17832 }, { "epoch": 58.4688524590164, "grad_norm": 13.55722427368164, "learning_rate": 7.762073609015745e-06, "loss": 0.3615, "step": 17833 }, { "epoch": 58.472131147540985, "grad_norm": 6.202855110168457, "learning_rate": 7.761038667519633e-06, "loss": 0.6172, "step": 17834 }, { "epoch": 58.47540983606557, "grad_norm": 7.061850070953369, "learning_rate": 7.760003751270135e-06, "loss": 0.3865, "step": 17835 }, { "epoch": 58.47868852459016, "grad_norm": 5.877394676208496, "learning_rate": 7.758968860278927e-06, "loss": 0.2778, "step": 17836 }, { "epoch": 58.48196721311476, "grad_norm": 5.5417070388793945, "learning_rate": 7.757933994557676e-06, "loss": 0.4353, "step": 17837 }, { "epoch": 58.485245901639345, "grad_norm": 6.386565685272217, "learning_rate": 7.756899154118049e-06, "loss": 0.3959, "step": 17838 }, { "epoch": 58.488524590163934, "grad_norm": 7.654256820678711, "learning_rate": 7.755864338971714e-06, "loss": 0.5792, "step": 17839 }, { "epoch": 58.49180327868852, "grad_norm": 5.194331645965576, "learning_rate": 7.754829549130345e-06, "loss": 0.539, "step": 17840 }, { "epoch": 58.49508196721312, "grad_norm": 8.058684349060059, "learning_rate": 7.753794784605608e-06, "loss": 0.2924, "step": 17841 }, { "epoch": 58.498360655737706, "grad_norm": 6.771834850311279, "learning_rate": 7.75276004540917e-06, "loss": 0.4393, "step": 17842 }, { "epoch": 58.501639344262294, "grad_norm": 6.005102157592773, "learning_rate": 7.751725331552698e-06, "loss": 0.5306, "step": 17843 }, { "epoch": 58.50491803278688, "grad_norm": 5.922774791717529, "learning_rate": 7.750690643047858e-06, "loss": 0.3605, "step": 17844 }, { "epoch": 58.50819672131148, "grad_norm": 7.020001411437988, "learning_rate": 7.749655979906323e-06, "loss": 0.3637, "step": 17845 }, { "epoch": 58.511475409836066, "grad_norm": 5.7084574699401855, "learning_rate": 7.748621342139757e-06, "loss": 0.6978, "step": 17846 }, { "epoch": 58.514754098360655, "grad_norm": 7.4034504890441895, "learning_rate": 7.747586729759825e-06, "loss": 0.6183, "step": 17847 }, { "epoch": 58.51803278688524, "grad_norm": 6.182117462158203, "learning_rate": 7.746552142778191e-06, "loss": 0.6792, "step": 17848 }, { "epoch": 58.52131147540984, "grad_norm": 5.341371059417725, "learning_rate": 7.74551758120653e-06, "loss": 0.5039, "step": 17849 }, { "epoch": 58.52459016393443, "grad_norm": 6.150085926055908, "learning_rate": 7.744483045056502e-06, "loss": 0.5443, "step": 17850 }, { "epoch": 58.527868852459015, "grad_norm": 6.457474708557129, "learning_rate": 7.743448534339768e-06, "loss": 0.6404, "step": 17851 }, { "epoch": 58.5311475409836, "grad_norm": 12.320352554321289, "learning_rate": 7.742414049068003e-06, "loss": 0.7555, "step": 17852 }, { "epoch": 58.5344262295082, "grad_norm": 8.381734848022461, "learning_rate": 7.741379589252864e-06, "loss": 0.3656, "step": 17853 }, { "epoch": 58.53770491803279, "grad_norm": 5.79266357421875, "learning_rate": 7.740345154906018e-06, "loss": 0.3922, "step": 17854 }, { "epoch": 58.540983606557376, "grad_norm": 5.484411716461182, "learning_rate": 7.739310746039133e-06, "loss": 0.4274, "step": 17855 }, { "epoch": 58.544262295081964, "grad_norm": 7.196597099304199, "learning_rate": 7.73827636266387e-06, "loss": 0.5333, "step": 17856 }, { "epoch": 58.54754098360656, "grad_norm": 6.8520965576171875, "learning_rate": 7.737242004791888e-06, "loss": 0.5697, "step": 17857 }, { "epoch": 58.55081967213115, "grad_norm": 5.827664852142334, "learning_rate": 7.736207672434857e-06, "loss": 0.4852, "step": 17858 }, { "epoch": 58.554098360655736, "grad_norm": 5.428650856018066, "learning_rate": 7.735173365604441e-06, "loss": 0.3512, "step": 17859 }, { "epoch": 58.557377049180324, "grad_norm": 5.955320358276367, "learning_rate": 7.734139084312299e-06, "loss": 0.4674, "step": 17860 }, { "epoch": 58.56065573770492, "grad_norm": 11.92383861541748, "learning_rate": 7.73310482857009e-06, "loss": 0.5794, "step": 17861 }, { "epoch": 58.56393442622951, "grad_norm": 6.815570831298828, "learning_rate": 7.732070598389486e-06, "loss": 0.4023, "step": 17862 }, { "epoch": 58.5672131147541, "grad_norm": 9.801218032836914, "learning_rate": 7.731036393782146e-06, "loss": 0.6968, "step": 17863 }, { "epoch": 58.570491803278685, "grad_norm": 5.614098072052002, "learning_rate": 7.730002214759726e-06, "loss": 0.414, "step": 17864 }, { "epoch": 58.57377049180328, "grad_norm": 5.33419942855835, "learning_rate": 7.728968061333894e-06, "loss": 0.4832, "step": 17865 }, { "epoch": 58.57704918032787, "grad_norm": 6.592799663543701, "learning_rate": 7.727933933516303e-06, "loss": 0.5189, "step": 17866 }, { "epoch": 58.58032786885246, "grad_norm": 4.892024993896484, "learning_rate": 7.726899831318624e-06, "loss": 0.565, "step": 17867 }, { "epoch": 58.58360655737705, "grad_norm": 6.2755327224731445, "learning_rate": 7.725865754752513e-06, "loss": 0.382, "step": 17868 }, { "epoch": 58.58688524590164, "grad_norm": 5.605582237243652, "learning_rate": 7.72483170382963e-06, "loss": 0.611, "step": 17869 }, { "epoch": 58.59016393442623, "grad_norm": 6.479397296905518, "learning_rate": 7.72379767856163e-06, "loss": 0.5663, "step": 17870 }, { "epoch": 58.59344262295082, "grad_norm": 6.752437591552734, "learning_rate": 7.722763678960183e-06, "loss": 0.6201, "step": 17871 }, { "epoch": 58.59672131147541, "grad_norm": 5.728765487670898, "learning_rate": 7.721729705036942e-06, "loss": 0.3397, "step": 17872 }, { "epoch": 58.6, "grad_norm": 4.99493408203125, "learning_rate": 7.720695756803569e-06, "loss": 0.2464, "step": 17873 }, { "epoch": 58.60327868852459, "grad_norm": 5.811609745025635, "learning_rate": 7.719661834271717e-06, "loss": 0.5792, "step": 17874 }, { "epoch": 58.60655737704918, "grad_norm": 11.320159912109375, "learning_rate": 7.718627937453052e-06, "loss": 0.5121, "step": 17875 }, { "epoch": 58.609836065573774, "grad_norm": 6.096377849578857, "learning_rate": 7.717594066359228e-06, "loss": 0.3921, "step": 17876 }, { "epoch": 58.61311475409836, "grad_norm": 14.464513778686523, "learning_rate": 7.716560221001906e-06, "loss": 0.3502, "step": 17877 }, { "epoch": 58.61639344262295, "grad_norm": 5.5400309562683105, "learning_rate": 7.715526401392739e-06, "loss": 0.6365, "step": 17878 }, { "epoch": 58.61967213114754, "grad_norm": 6.643665313720703, "learning_rate": 7.714492607543387e-06, "loss": 0.5325, "step": 17879 }, { "epoch": 58.622950819672134, "grad_norm": 5.035245895385742, "learning_rate": 7.71345883946551e-06, "loss": 0.4599, "step": 17880 }, { "epoch": 58.62622950819672, "grad_norm": 5.967806816101074, "learning_rate": 7.71242509717076e-06, "loss": 0.5401, "step": 17881 }, { "epoch": 58.62950819672131, "grad_norm": 5.826223373413086, "learning_rate": 7.711391380670797e-06, "loss": 0.6483, "step": 17882 }, { "epoch": 58.6327868852459, "grad_norm": 6.019268989562988, "learning_rate": 7.710357689977273e-06, "loss": 0.3998, "step": 17883 }, { "epoch": 58.636065573770495, "grad_norm": 5.498697280883789, "learning_rate": 7.709324025101847e-06, "loss": 0.4055, "step": 17884 }, { "epoch": 58.63934426229508, "grad_norm": 6.498928070068359, "learning_rate": 7.708290386056177e-06, "loss": 0.8352, "step": 17885 }, { "epoch": 58.64262295081967, "grad_norm": 9.302690505981445, "learning_rate": 7.707256772851914e-06, "loss": 0.4549, "step": 17886 }, { "epoch": 58.64590163934426, "grad_norm": 8.063526153564453, "learning_rate": 7.706223185500712e-06, "loss": 0.4632, "step": 17887 }, { "epoch": 58.649180327868855, "grad_norm": 6.909219741821289, "learning_rate": 7.705189624014233e-06, "loss": 0.4233, "step": 17888 }, { "epoch": 58.65245901639344, "grad_norm": 5.12011194229126, "learning_rate": 7.704156088404125e-06, "loss": 0.3471, "step": 17889 }, { "epoch": 58.65573770491803, "grad_norm": 6.150508880615234, "learning_rate": 7.703122578682047e-06, "loss": 0.404, "step": 17890 }, { "epoch": 58.65901639344262, "grad_norm": 5.1305036544799805, "learning_rate": 7.702089094859649e-06, "loss": 0.7871, "step": 17891 }, { "epoch": 58.662295081967216, "grad_norm": 5.503812313079834, "learning_rate": 7.70105563694858e-06, "loss": 0.783, "step": 17892 }, { "epoch": 58.665573770491804, "grad_norm": 6.1485772132873535, "learning_rate": 7.700022204960504e-06, "loss": 0.6468, "step": 17893 }, { "epoch": 58.66885245901639, "grad_norm": 5.68778657913208, "learning_rate": 7.69898879890707e-06, "loss": 0.5318, "step": 17894 }, { "epoch": 58.67213114754098, "grad_norm": 6.510458469390869, "learning_rate": 7.69795541879993e-06, "loss": 0.4902, "step": 17895 }, { "epoch": 58.675409836065576, "grad_norm": 18.260501861572266, "learning_rate": 7.696922064650731e-06, "loss": 0.5046, "step": 17896 }, { "epoch": 58.678688524590164, "grad_norm": 11.40810489654541, "learning_rate": 7.695888736471135e-06, "loss": 0.4254, "step": 17897 }, { "epoch": 58.68196721311475, "grad_norm": 9.55597972869873, "learning_rate": 7.69485543427279e-06, "loss": 0.7559, "step": 17898 }, { "epoch": 58.68524590163934, "grad_norm": 10.623652458190918, "learning_rate": 7.693822158067345e-06, "loss": 0.5566, "step": 17899 }, { "epoch": 58.68852459016394, "grad_norm": 5.722211837768555, "learning_rate": 7.69278890786645e-06, "loss": 0.2782, "step": 17900 }, { "epoch": 58.691803278688525, "grad_norm": 18.300098419189453, "learning_rate": 7.69175568368176e-06, "loss": 0.5466, "step": 17901 }, { "epoch": 58.69508196721311, "grad_norm": 5.195021629333496, "learning_rate": 7.69072248552493e-06, "loss": 0.4962, "step": 17902 }, { "epoch": 58.6983606557377, "grad_norm": 6.236324787139893, "learning_rate": 7.6896893134076e-06, "loss": 0.6537, "step": 17903 }, { "epoch": 58.7016393442623, "grad_norm": 4.910327911376953, "learning_rate": 7.688656167341426e-06, "loss": 0.3715, "step": 17904 }, { "epoch": 58.704918032786885, "grad_norm": 8.083295822143555, "learning_rate": 7.687623047338056e-06, "loss": 0.4946, "step": 17905 }, { "epoch": 58.708196721311474, "grad_norm": 7.169905185699463, "learning_rate": 7.686589953409142e-06, "loss": 0.5673, "step": 17906 }, { "epoch": 58.71147540983607, "grad_norm": 6.193923473358154, "learning_rate": 7.68555688556633e-06, "loss": 0.4758, "step": 17907 }, { "epoch": 58.71475409836066, "grad_norm": 18.028039932250977, "learning_rate": 7.684523843821273e-06, "loss": 0.416, "step": 17908 }, { "epoch": 58.718032786885246, "grad_norm": 6.353867053985596, "learning_rate": 7.683490828185615e-06, "loss": 0.4413, "step": 17909 }, { "epoch": 58.721311475409834, "grad_norm": 6.277824401855469, "learning_rate": 7.682457838671006e-06, "loss": 0.8396, "step": 17910 }, { "epoch": 58.72459016393443, "grad_norm": 7.863775253295898, "learning_rate": 7.681424875289097e-06, "loss": 0.3887, "step": 17911 }, { "epoch": 58.72786885245902, "grad_norm": 7.818727016448975, "learning_rate": 7.680391938051534e-06, "loss": 0.29, "step": 17912 }, { "epoch": 58.731147540983606, "grad_norm": 9.20755386352539, "learning_rate": 7.679359026969959e-06, "loss": 0.5154, "step": 17913 }, { "epoch": 58.734426229508195, "grad_norm": 4.722561836242676, "learning_rate": 7.678326142056028e-06, "loss": 0.4638, "step": 17914 }, { "epoch": 58.73770491803279, "grad_norm": 12.331340789794922, "learning_rate": 7.677293283321383e-06, "loss": 0.5977, "step": 17915 }, { "epoch": 58.74098360655738, "grad_norm": 6.507721424102783, "learning_rate": 7.67626045077767e-06, "loss": 0.6976, "step": 17916 }, { "epoch": 58.74426229508197, "grad_norm": 5.869348049163818, "learning_rate": 7.675227644436538e-06, "loss": 0.4276, "step": 17917 }, { "epoch": 58.747540983606555, "grad_norm": 4.793578147888184, "learning_rate": 7.674194864309628e-06, "loss": 0.4599, "step": 17918 }, { "epoch": 58.75081967213115, "grad_norm": 5.81561803817749, "learning_rate": 7.673162110408592e-06, "loss": 0.6703, "step": 17919 }, { "epoch": 58.75409836065574, "grad_norm": 8.556401252746582, "learning_rate": 7.672129382745075e-06, "loss": 0.5625, "step": 17920 }, { "epoch": 58.75737704918033, "grad_norm": 5.024188995361328, "learning_rate": 7.671096681330717e-06, "loss": 0.6842, "step": 17921 }, { "epoch": 58.760655737704916, "grad_norm": 7.86615514755249, "learning_rate": 7.670064006177162e-06, "loss": 0.5185, "step": 17922 }, { "epoch": 58.76393442622951, "grad_norm": 5.430068492889404, "learning_rate": 7.669031357296062e-06, "loss": 0.4056, "step": 17923 }, { "epoch": 58.7672131147541, "grad_norm": 5.597435474395752, "learning_rate": 7.667998734699058e-06, "loss": 0.5338, "step": 17924 }, { "epoch": 58.77049180327869, "grad_norm": 5.665971279144287, "learning_rate": 7.66696613839779e-06, "loss": 0.2901, "step": 17925 }, { "epoch": 58.773770491803276, "grad_norm": 6.6520867347717285, "learning_rate": 7.665933568403903e-06, "loss": 0.529, "step": 17926 }, { "epoch": 58.77704918032787, "grad_norm": 6.227785110473633, "learning_rate": 7.664901024729047e-06, "loss": 0.3829, "step": 17927 }, { "epoch": 58.78032786885246, "grad_norm": 7.322120666503906, "learning_rate": 7.663868507384857e-06, "loss": 0.5989, "step": 17928 }, { "epoch": 58.78360655737705, "grad_norm": 5.643197059631348, "learning_rate": 7.66283601638298e-06, "loss": 0.6324, "step": 17929 }, { "epoch": 58.78688524590164, "grad_norm": 5.141270637512207, "learning_rate": 7.661803551735056e-06, "loss": 0.649, "step": 17930 }, { "epoch": 58.79016393442623, "grad_norm": 5.544393539428711, "learning_rate": 7.660771113452725e-06, "loss": 0.3423, "step": 17931 }, { "epoch": 58.79344262295082, "grad_norm": 6.588715076446533, "learning_rate": 7.659738701547637e-06, "loss": 0.466, "step": 17932 }, { "epoch": 58.79672131147541, "grad_norm": 8.554610252380371, "learning_rate": 7.658706316031425e-06, "loss": 0.4138, "step": 17933 }, { "epoch": 58.8, "grad_norm": 6.9976983070373535, "learning_rate": 7.657673956915735e-06, "loss": 0.6206, "step": 17934 }, { "epoch": 58.80327868852459, "grad_norm": 6.053384304046631, "learning_rate": 7.656641624212205e-06, "loss": 0.3626, "step": 17935 }, { "epoch": 58.80655737704918, "grad_norm": 15.528139114379883, "learning_rate": 7.655609317932478e-06, "loss": 0.3784, "step": 17936 }, { "epoch": 58.80983606557377, "grad_norm": 7.423750400543213, "learning_rate": 7.654577038088195e-06, "loss": 0.6084, "step": 17937 }, { "epoch": 58.81311475409836, "grad_norm": 6.642483234405518, "learning_rate": 7.653544784690995e-06, "loss": 0.4223, "step": 17938 }, { "epoch": 58.81639344262295, "grad_norm": 11.37791633605957, "learning_rate": 7.652512557752513e-06, "loss": 0.6237, "step": 17939 }, { "epoch": 58.81967213114754, "grad_norm": 4.751214981079102, "learning_rate": 7.651480357284396e-06, "loss": 0.532, "step": 17940 }, { "epoch": 58.82295081967213, "grad_norm": 5.634212493896484, "learning_rate": 7.650448183298279e-06, "loss": 0.7378, "step": 17941 }, { "epoch": 58.82622950819672, "grad_norm": 7.05553674697876, "learning_rate": 7.649416035805803e-06, "loss": 0.4114, "step": 17942 }, { "epoch": 58.829508196721314, "grad_norm": 8.099528312683105, "learning_rate": 7.648383914818605e-06, "loss": 0.6073, "step": 17943 }, { "epoch": 58.8327868852459, "grad_norm": 5.025986671447754, "learning_rate": 7.64735182034832e-06, "loss": 0.2488, "step": 17944 }, { "epoch": 58.83606557377049, "grad_norm": 5.975917339324951, "learning_rate": 7.646319752406592e-06, "loss": 0.3574, "step": 17945 }, { "epoch": 58.83934426229508, "grad_norm": 5.404271602630615, "learning_rate": 7.645287711005057e-06, "loss": 0.6178, "step": 17946 }, { "epoch": 58.842622950819674, "grad_norm": 5.065099716186523, "learning_rate": 7.644255696155352e-06, "loss": 0.6275, "step": 17947 }, { "epoch": 58.84590163934426, "grad_norm": 10.63205337524414, "learning_rate": 7.643223707869108e-06, "loss": 0.6422, "step": 17948 }, { "epoch": 58.84918032786885, "grad_norm": 5.5089240074157715, "learning_rate": 7.642191746157972e-06, "loss": 0.4073, "step": 17949 }, { "epoch": 58.85245901639344, "grad_norm": 5.8864264488220215, "learning_rate": 7.641159811033574e-06, "loss": 0.5611, "step": 17950 }, { "epoch": 58.855737704918035, "grad_norm": 6.485363483428955, "learning_rate": 7.640127902507553e-06, "loss": 0.3693, "step": 17951 }, { "epoch": 58.85901639344262, "grad_norm": 6.289712905883789, "learning_rate": 7.63909602059154e-06, "loss": 0.4724, "step": 17952 }, { "epoch": 58.86229508196721, "grad_norm": 5.690564155578613, "learning_rate": 7.638064165297177e-06, "loss": 0.2976, "step": 17953 }, { "epoch": 58.86557377049181, "grad_norm": 6.1320695877075195, "learning_rate": 7.637032336636098e-06, "loss": 0.5041, "step": 17954 }, { "epoch": 58.868852459016395, "grad_norm": 5.741088390350342, "learning_rate": 7.636000534619935e-06, "loss": 0.4842, "step": 17955 }, { "epoch": 58.87213114754098, "grad_norm": 8.413436889648438, "learning_rate": 7.634968759260322e-06, "loss": 0.5986, "step": 17956 }, { "epoch": 58.87540983606557, "grad_norm": 21.491313934326172, "learning_rate": 7.633937010568895e-06, "loss": 0.4161, "step": 17957 }, { "epoch": 58.87868852459017, "grad_norm": 6.214447975158691, "learning_rate": 7.632905288557291e-06, "loss": 0.4285, "step": 17958 }, { "epoch": 58.881967213114756, "grad_norm": 8.185519218444824, "learning_rate": 7.631873593237141e-06, "loss": 0.4962, "step": 17959 }, { "epoch": 58.885245901639344, "grad_norm": 7.796353816986084, "learning_rate": 7.630841924620076e-06, "loss": 0.7612, "step": 17960 }, { "epoch": 58.88852459016393, "grad_norm": 4.7092695236206055, "learning_rate": 7.629810282717733e-06, "loss": 0.6132, "step": 17961 }, { "epoch": 58.89180327868853, "grad_norm": 5.528362274169922, "learning_rate": 7.628778667541743e-06, "loss": 0.5808, "step": 17962 }, { "epoch": 58.895081967213116, "grad_norm": 4.858065605163574, "learning_rate": 7.627747079103738e-06, "loss": 0.2043, "step": 17963 }, { "epoch": 58.898360655737704, "grad_norm": 6.524773120880127, "learning_rate": 7.626715517415354e-06, "loss": 0.376, "step": 17964 }, { "epoch": 58.90163934426229, "grad_norm": 5.602194309234619, "learning_rate": 7.6256839824882165e-06, "loss": 0.7219, "step": 17965 }, { "epoch": 58.90491803278689, "grad_norm": 4.892178535461426, "learning_rate": 7.6246524743339624e-06, "loss": 0.5227, "step": 17966 }, { "epoch": 58.90819672131148, "grad_norm": 5.4192328453063965, "learning_rate": 7.6236209929642226e-06, "loss": 0.4483, "step": 17967 }, { "epoch": 58.911475409836065, "grad_norm": 5.86264705657959, "learning_rate": 7.6225895383906275e-06, "loss": 0.5918, "step": 17968 }, { "epoch": 58.91475409836065, "grad_norm": 5.774790287017822, "learning_rate": 7.621558110624807e-06, "loss": 0.4391, "step": 17969 }, { "epoch": 58.91803278688525, "grad_norm": 5.384639739990234, "learning_rate": 7.620526709678387e-06, "loss": 0.7292, "step": 17970 }, { "epoch": 58.92131147540984, "grad_norm": 7.565809726715088, "learning_rate": 7.619495335563008e-06, "loss": 0.3377, "step": 17971 }, { "epoch": 58.924590163934425, "grad_norm": 7.971530437469482, "learning_rate": 7.618463988290292e-06, "loss": 0.6932, "step": 17972 }, { "epoch": 58.927868852459014, "grad_norm": 8.578234672546387, "learning_rate": 7.617432667871873e-06, "loss": 0.5094, "step": 17973 }, { "epoch": 58.93114754098361, "grad_norm": 5.887557029724121, "learning_rate": 7.616401374319372e-06, "loss": 0.4805, "step": 17974 }, { "epoch": 58.9344262295082, "grad_norm": 5.486734867095947, "learning_rate": 7.615370107644429e-06, "loss": 0.22, "step": 17975 }, { "epoch": 58.937704918032786, "grad_norm": 5.208939552307129, "learning_rate": 7.6143388678586675e-06, "loss": 0.5577, "step": 17976 }, { "epoch": 58.940983606557374, "grad_norm": 5.436453819274902, "learning_rate": 7.613307654973715e-06, "loss": 0.6109, "step": 17977 }, { "epoch": 58.94426229508197, "grad_norm": 10.235306739807129, "learning_rate": 7.612276469001196e-06, "loss": 0.6196, "step": 17978 }, { "epoch": 58.94754098360656, "grad_norm": 6.288056373596191, "learning_rate": 7.611245309952747e-06, "loss": 0.6384, "step": 17979 }, { "epoch": 58.950819672131146, "grad_norm": 5.791390419006348, "learning_rate": 7.610214177839992e-06, "loss": 0.4089, "step": 17980 }, { "epoch": 58.954098360655735, "grad_norm": 6.940154075622559, "learning_rate": 7.609183072674555e-06, "loss": 0.4404, "step": 17981 }, { "epoch": 58.95737704918033, "grad_norm": 6.943901538848877, "learning_rate": 7.608151994468066e-06, "loss": 0.4784, "step": 17982 }, { "epoch": 58.96065573770492, "grad_norm": 6.360527038574219, "learning_rate": 7.6071209432321444e-06, "loss": 0.4766, "step": 17983 }, { "epoch": 58.96393442622951, "grad_norm": 7.172450065612793, "learning_rate": 7.606089918978428e-06, "loss": 0.5065, "step": 17984 }, { "epoch": 58.967213114754095, "grad_norm": 9.810664176940918, "learning_rate": 7.605058921718535e-06, "loss": 0.5792, "step": 17985 }, { "epoch": 58.97049180327869, "grad_norm": 5.189474105834961, "learning_rate": 7.6040279514640926e-06, "loss": 0.533, "step": 17986 }, { "epoch": 58.97377049180328, "grad_norm": 5.141684055328369, "learning_rate": 7.602997008226725e-06, "loss": 0.4959, "step": 17987 }, { "epoch": 58.97704918032787, "grad_norm": 6.352732181549072, "learning_rate": 7.601966092018062e-06, "loss": 0.3937, "step": 17988 }, { "epoch": 58.980327868852456, "grad_norm": 5.751543045043945, "learning_rate": 7.6009352028497205e-06, "loss": 0.4794, "step": 17989 }, { "epoch": 58.98360655737705, "grad_norm": 7.244879722595215, "learning_rate": 7.599904340733333e-06, "loss": 0.5099, "step": 17990 }, { "epoch": 58.98688524590164, "grad_norm": 6.485562801361084, "learning_rate": 7.5988735056805166e-06, "loss": 0.5042, "step": 17991 }, { "epoch": 58.99016393442623, "grad_norm": 5.233864784240723, "learning_rate": 7.597842697702897e-06, "loss": 0.2079, "step": 17992 }, { "epoch": 58.993442622950816, "grad_norm": 6.464066982269287, "learning_rate": 7.596811916812102e-06, "loss": 0.3141, "step": 17993 }, { "epoch": 58.99672131147541, "grad_norm": 11.381659507751465, "learning_rate": 7.595781163019751e-06, "loss": 0.4118, "step": 17994 }, { "epoch": 59.0, "grad_norm": 6.295008182525635, "learning_rate": 7.594750436337467e-06, "loss": 0.4184, "step": 17995 }, { "epoch": 59.00327868852459, "grad_norm": 7.10610294342041, "learning_rate": 7.5937197367768675e-06, "loss": 0.4575, "step": 17996 }, { "epoch": 59.006557377049184, "grad_norm": 5.200035572052002, "learning_rate": 7.592689064349585e-06, "loss": 0.3729, "step": 17997 }, { "epoch": 59.00983606557377, "grad_norm": 5.522238254547119, "learning_rate": 7.591658419067237e-06, "loss": 0.6762, "step": 17998 }, { "epoch": 59.01311475409836, "grad_norm": 5.723299026489258, "learning_rate": 7.590627800941444e-06, "loss": 0.5328, "step": 17999 }, { "epoch": 59.01639344262295, "grad_norm": 5.306302070617676, "learning_rate": 7.589597209983823e-06, "loss": 0.4405, "step": 18000 }, { "epoch": 59.019672131147544, "grad_norm": 6.335328578948975, "learning_rate": 7.5885666462060035e-06, "loss": 0.6256, "step": 18001 }, { "epoch": 59.02295081967213, "grad_norm": 6.588009834289551, "learning_rate": 7.587536109619603e-06, "loss": 0.4947, "step": 18002 }, { "epoch": 59.02622950819672, "grad_norm": 8.149227142333984, "learning_rate": 7.586505600236241e-06, "loss": 0.3097, "step": 18003 }, { "epoch": 59.02950819672131, "grad_norm": 6.698704719543457, "learning_rate": 7.585475118067533e-06, "loss": 0.4636, "step": 18004 }, { "epoch": 59.032786885245905, "grad_norm": 8.694586753845215, "learning_rate": 7.584444663125109e-06, "loss": 0.6917, "step": 18005 }, { "epoch": 59.03606557377049, "grad_norm": 21.097881317138672, "learning_rate": 7.583414235420583e-06, "loss": 0.5704, "step": 18006 }, { "epoch": 59.03934426229508, "grad_norm": 6.888615131378174, "learning_rate": 7.5823838349655736e-06, "loss": 0.2962, "step": 18007 }, { "epoch": 59.04262295081967, "grad_norm": 4.96084451675415, "learning_rate": 7.581353461771699e-06, "loss": 0.7018, "step": 18008 }, { "epoch": 59.045901639344265, "grad_norm": 4.9811787605285645, "learning_rate": 7.580323115850576e-06, "loss": 0.2464, "step": 18009 }, { "epoch": 59.049180327868854, "grad_norm": 18.316648483276367, "learning_rate": 7.57929279721383e-06, "loss": 0.6895, "step": 18010 }, { "epoch": 59.05245901639344, "grad_norm": 6.777124881744385, "learning_rate": 7.578262505873074e-06, "loss": 0.5259, "step": 18011 }, { "epoch": 59.05573770491803, "grad_norm": 6.879733085632324, "learning_rate": 7.577232241839923e-06, "loss": 0.4944, "step": 18012 }, { "epoch": 59.059016393442626, "grad_norm": 5.096394062042236, "learning_rate": 7.5762020051259995e-06, "loss": 0.3774, "step": 18013 }, { "epoch": 59.062295081967214, "grad_norm": 6.7285027503967285, "learning_rate": 7.57517179574292e-06, "loss": 0.5511, "step": 18014 }, { "epoch": 59.0655737704918, "grad_norm": 4.446370601654053, "learning_rate": 7.574141613702297e-06, "loss": 0.279, "step": 18015 }, { "epoch": 59.06885245901639, "grad_norm": 12.155115127563477, "learning_rate": 7.573111459015753e-06, "loss": 0.5926, "step": 18016 }, { "epoch": 59.072131147540986, "grad_norm": 5.249619007110596, "learning_rate": 7.572081331694897e-06, "loss": 0.4823, "step": 18017 }, { "epoch": 59.075409836065575, "grad_norm": 5.586396217346191, "learning_rate": 7.571051231751352e-06, "loss": 0.5501, "step": 18018 }, { "epoch": 59.07868852459016, "grad_norm": 4.508670330047607, "learning_rate": 7.5700211591967265e-06, "loss": 0.2766, "step": 18019 }, { "epoch": 59.08196721311475, "grad_norm": 7.514678478240967, "learning_rate": 7.568991114042642e-06, "loss": 0.6177, "step": 18020 }, { "epoch": 59.08524590163935, "grad_norm": 5.746631145477295, "learning_rate": 7.567961096300709e-06, "loss": 0.3451, "step": 18021 }, { "epoch": 59.088524590163935, "grad_norm": 5.497029781341553, "learning_rate": 7.5669311059825404e-06, "loss": 0.4011, "step": 18022 }, { "epoch": 59.09180327868852, "grad_norm": 5.998776435852051, "learning_rate": 7.565901143099756e-06, "loss": 0.5788, "step": 18023 }, { "epoch": 59.09508196721311, "grad_norm": 9.181697845458984, "learning_rate": 7.564871207663968e-06, "loss": 0.5004, "step": 18024 }, { "epoch": 59.09836065573771, "grad_norm": 5.246798992156982, "learning_rate": 7.563841299686791e-06, "loss": 0.4025, "step": 18025 }, { "epoch": 59.101639344262296, "grad_norm": 6.312819480895996, "learning_rate": 7.56281141917983e-06, "loss": 0.4647, "step": 18026 }, { "epoch": 59.104918032786884, "grad_norm": 9.023597717285156, "learning_rate": 7.561781566154709e-06, "loss": 0.3989, "step": 18027 }, { "epoch": 59.10819672131147, "grad_norm": 10.10240650177002, "learning_rate": 7.560751740623035e-06, "loss": 0.3705, "step": 18028 }, { "epoch": 59.11147540983607, "grad_norm": 8.24223518371582, "learning_rate": 7.559721942596422e-06, "loss": 0.89, "step": 18029 }, { "epoch": 59.114754098360656, "grad_norm": 7.251692295074463, "learning_rate": 7.558692172086477e-06, "loss": 0.5228, "step": 18030 }, { "epoch": 59.118032786885244, "grad_norm": 6.331822872161865, "learning_rate": 7.557662429104821e-06, "loss": 0.5138, "step": 18031 }, { "epoch": 59.12131147540983, "grad_norm": 7.120110511779785, "learning_rate": 7.556632713663059e-06, "loss": 0.7536, "step": 18032 }, { "epoch": 59.12459016393443, "grad_norm": 5.880123138427734, "learning_rate": 7.555603025772805e-06, "loss": 0.547, "step": 18033 }, { "epoch": 59.12786885245902, "grad_norm": 6.050203323364258, "learning_rate": 7.5545733654456676e-06, "loss": 0.4322, "step": 18034 }, { "epoch": 59.131147540983605, "grad_norm": 6.880124568939209, "learning_rate": 7.5535437326932545e-06, "loss": 0.4436, "step": 18035 }, { "epoch": 59.13442622950819, "grad_norm": 6.478512763977051, "learning_rate": 7.552514127527184e-06, "loss": 0.4787, "step": 18036 }, { "epoch": 59.13770491803279, "grad_norm": 4.852758407592773, "learning_rate": 7.551484549959061e-06, "loss": 0.7379, "step": 18037 }, { "epoch": 59.14098360655738, "grad_norm": 6.935403823852539, "learning_rate": 7.550455000000493e-06, "loss": 0.3808, "step": 18038 }, { "epoch": 59.144262295081965, "grad_norm": 6.81744909286499, "learning_rate": 7.549425477663093e-06, "loss": 0.4662, "step": 18039 }, { "epoch": 59.14754098360656, "grad_norm": 5.632108211517334, "learning_rate": 7.54839598295847e-06, "loss": 0.6101, "step": 18040 }, { "epoch": 59.15081967213115, "grad_norm": 5.567182540893555, "learning_rate": 7.54736651589823e-06, "loss": 0.4284, "step": 18041 }, { "epoch": 59.15409836065574, "grad_norm": 5.186421871185303, "learning_rate": 7.5463370764939815e-06, "loss": 0.4813, "step": 18042 }, { "epoch": 59.157377049180326, "grad_norm": 6.215610504150391, "learning_rate": 7.545307664757334e-06, "loss": 0.5181, "step": 18043 }, { "epoch": 59.16065573770492, "grad_norm": 5.823329448699951, "learning_rate": 7.544278280699897e-06, "loss": 0.3421, "step": 18044 }, { "epoch": 59.16393442622951, "grad_norm": 11.988067626953125, "learning_rate": 7.543248924333274e-06, "loss": 0.4543, "step": 18045 }, { "epoch": 59.1672131147541, "grad_norm": 6.250500679016113, "learning_rate": 7.542219595669074e-06, "loss": 0.595, "step": 18046 }, { "epoch": 59.170491803278686, "grad_norm": 6.362295150756836, "learning_rate": 7.541190294718905e-06, "loss": 0.3962, "step": 18047 }, { "epoch": 59.17377049180328, "grad_norm": 6.949495792388916, "learning_rate": 7.540161021494368e-06, "loss": 0.4614, "step": 18048 }, { "epoch": 59.17704918032787, "grad_norm": 4.744352340698242, "learning_rate": 7.539131776007077e-06, "loss": 0.3561, "step": 18049 }, { "epoch": 59.18032786885246, "grad_norm": 5.49688720703125, "learning_rate": 7.538102558268634e-06, "loss": 0.4253, "step": 18050 }, { "epoch": 59.18360655737705, "grad_norm": 5.505434513092041, "learning_rate": 7.5370733682906434e-06, "loss": 0.2773, "step": 18051 }, { "epoch": 59.18688524590164, "grad_norm": 6.632933139801025, "learning_rate": 7.536044206084708e-06, "loss": 0.5395, "step": 18052 }, { "epoch": 59.19016393442623, "grad_norm": 5.854467868804932, "learning_rate": 7.535015071662441e-06, "loss": 0.6241, "step": 18053 }, { "epoch": 59.19344262295082, "grad_norm": 6.356134414672852, "learning_rate": 7.533985965035441e-06, "loss": 0.304, "step": 18054 }, { "epoch": 59.19672131147541, "grad_norm": 7.100595474243164, "learning_rate": 7.532956886215313e-06, "loss": 0.5612, "step": 18055 }, { "epoch": 59.2, "grad_norm": 8.40214729309082, "learning_rate": 7.531927835213657e-06, "loss": 0.3825, "step": 18056 }, { "epoch": 59.20327868852459, "grad_norm": 7.95596170425415, "learning_rate": 7.530898812042085e-06, "loss": 0.2939, "step": 18057 }, { "epoch": 59.20655737704918, "grad_norm": 5.307715892791748, "learning_rate": 7.5298698167121975e-06, "loss": 0.5199, "step": 18058 }, { "epoch": 59.20983606557377, "grad_norm": 5.418290615081787, "learning_rate": 7.528840849235595e-06, "loss": 0.6195, "step": 18059 }, { "epoch": 59.21311475409836, "grad_norm": 5.031886100769043, "learning_rate": 7.527811909623881e-06, "loss": 0.3182, "step": 18060 }, { "epoch": 59.21639344262295, "grad_norm": 8.62543773651123, "learning_rate": 7.526782997888654e-06, "loss": 0.58, "step": 18061 }, { "epoch": 59.21967213114754, "grad_norm": 6.0978240966796875, "learning_rate": 7.525754114041526e-06, "loss": 0.4171, "step": 18062 }, { "epoch": 59.22295081967213, "grad_norm": 6.81887674331665, "learning_rate": 7.524725258094091e-06, "loss": 0.4035, "step": 18063 }, { "epoch": 59.226229508196724, "grad_norm": 9.741868019104004, "learning_rate": 7.523696430057953e-06, "loss": 0.3774, "step": 18064 }, { "epoch": 59.22950819672131, "grad_norm": 6.221596717834473, "learning_rate": 7.522667629944709e-06, "loss": 0.3781, "step": 18065 }, { "epoch": 59.2327868852459, "grad_norm": 6.504371166229248, "learning_rate": 7.521638857765966e-06, "loss": 0.5454, "step": 18066 }, { "epoch": 59.23606557377049, "grad_norm": 7.834141254425049, "learning_rate": 7.520610113533322e-06, "loss": 0.4329, "step": 18067 }, { "epoch": 59.239344262295084, "grad_norm": 6.3406758308410645, "learning_rate": 7.519581397258376e-06, "loss": 0.9038, "step": 18068 }, { "epoch": 59.24262295081967, "grad_norm": 6.104572772979736, "learning_rate": 7.518552708952728e-06, "loss": 0.3931, "step": 18069 }, { "epoch": 59.24590163934426, "grad_norm": 3.993776321411133, "learning_rate": 7.51752404862798e-06, "loss": 0.3714, "step": 18070 }, { "epoch": 59.24918032786885, "grad_norm": 5.979124546051025, "learning_rate": 7.516495416295728e-06, "loss": 0.3054, "step": 18071 }, { "epoch": 59.252459016393445, "grad_norm": 6.352170467376709, "learning_rate": 7.515466811967574e-06, "loss": 0.7989, "step": 18072 }, { "epoch": 59.25573770491803, "grad_norm": 15.726851463317871, "learning_rate": 7.514438235655115e-06, "loss": 0.481, "step": 18073 }, { "epoch": 59.25901639344262, "grad_norm": 7.448095798492432, "learning_rate": 7.513409687369946e-06, "loss": 0.5844, "step": 18074 }, { "epoch": 59.26229508196721, "grad_norm": 7.926487922668457, "learning_rate": 7.512381167123671e-06, "loss": 0.6024, "step": 18075 }, { "epoch": 59.265573770491805, "grad_norm": 7.614421367645264, "learning_rate": 7.5113526749278855e-06, "loss": 0.4896, "step": 18076 }, { "epoch": 59.268852459016394, "grad_norm": 5.810935020446777, "learning_rate": 7.510324210794187e-06, "loss": 0.5801, "step": 18077 }, { "epoch": 59.27213114754098, "grad_norm": 7.0561065673828125, "learning_rate": 7.509295774734165e-06, "loss": 0.6362, "step": 18078 }, { "epoch": 59.27540983606557, "grad_norm": 6.164111614227295, "learning_rate": 7.508267366759429e-06, "loss": 0.3147, "step": 18079 }, { "epoch": 59.278688524590166, "grad_norm": 6.915841102600098, "learning_rate": 7.507238986881569e-06, "loss": 0.2554, "step": 18080 }, { "epoch": 59.281967213114754, "grad_norm": 6.098780632019043, "learning_rate": 7.506210635112181e-06, "loss": 0.5702, "step": 18081 }, { "epoch": 59.28524590163934, "grad_norm": 12.410914421081543, "learning_rate": 7.5051823114628565e-06, "loss": 0.5343, "step": 18082 }, { "epoch": 59.28852459016394, "grad_norm": 6.813591957092285, "learning_rate": 7.5041540159452e-06, "loss": 0.3806, "step": 18083 }, { "epoch": 59.291803278688526, "grad_norm": 6.117751598358154, "learning_rate": 7.503125748570801e-06, "loss": 0.377, "step": 18084 }, { "epoch": 59.295081967213115, "grad_norm": 8.706225395202637, "learning_rate": 7.502097509351256e-06, "loss": 0.4756, "step": 18085 }, { "epoch": 59.2983606557377, "grad_norm": 5.530850887298584, "learning_rate": 7.501069298298159e-06, "loss": 0.7222, "step": 18086 }, { "epoch": 59.3016393442623, "grad_norm": 7.541712284088135, "learning_rate": 7.5000411154231e-06, "loss": 0.6774, "step": 18087 }, { "epoch": 59.30491803278689, "grad_norm": 6.0418596267700195, "learning_rate": 7.499012960737679e-06, "loss": 0.557, "step": 18088 }, { "epoch": 59.308196721311475, "grad_norm": 5.306676387786865, "learning_rate": 7.49798483425349e-06, "loss": 0.4775, "step": 18089 }, { "epoch": 59.31147540983606, "grad_norm": 5.507652759552002, "learning_rate": 7.496956735982122e-06, "loss": 0.4178, "step": 18090 }, { "epoch": 59.31475409836066, "grad_norm": 5.096678256988525, "learning_rate": 7.495928665935166e-06, "loss": 0.7179, "step": 18091 }, { "epoch": 59.31803278688525, "grad_norm": 7.4543585777282715, "learning_rate": 7.4949006241242205e-06, "loss": 0.7015, "step": 18092 }, { "epoch": 59.321311475409836, "grad_norm": 9.819916725158691, "learning_rate": 7.493872610560876e-06, "loss": 0.5578, "step": 18093 }, { "epoch": 59.324590163934424, "grad_norm": 7.409583568572998, "learning_rate": 7.492844625256721e-06, "loss": 0.5161, "step": 18094 }, { "epoch": 59.32786885245902, "grad_norm": 5.5502471923828125, "learning_rate": 7.491816668223351e-06, "loss": 0.4578, "step": 18095 }, { "epoch": 59.33114754098361, "grad_norm": 5.6492109298706055, "learning_rate": 7.490788739472357e-06, "loss": 0.4505, "step": 18096 }, { "epoch": 59.334426229508196, "grad_norm": 6.014265537261963, "learning_rate": 7.489760839015327e-06, "loss": 0.473, "step": 18097 }, { "epoch": 59.337704918032784, "grad_norm": 6.785030364990234, "learning_rate": 7.4887329668638545e-06, "loss": 0.2405, "step": 18098 }, { "epoch": 59.34098360655738, "grad_norm": 6.187463283538818, "learning_rate": 7.48770512302953e-06, "loss": 0.5786, "step": 18099 }, { "epoch": 59.34426229508197, "grad_norm": 19.07294273376465, "learning_rate": 7.48667730752394e-06, "loss": 0.303, "step": 18100 }, { "epoch": 59.34754098360656, "grad_norm": 6.616947174072266, "learning_rate": 7.485649520358678e-06, "loss": 0.5591, "step": 18101 }, { "epoch": 59.350819672131145, "grad_norm": 6.100866317749023, "learning_rate": 7.4846217615453325e-06, "loss": 0.2695, "step": 18102 }, { "epoch": 59.35409836065574, "grad_norm": 6.8131632804870605, "learning_rate": 7.483594031095491e-06, "loss": 0.5896, "step": 18103 }, { "epoch": 59.35737704918033, "grad_norm": 8.434727668762207, "learning_rate": 7.482566329020741e-06, "loss": 0.467, "step": 18104 }, { "epoch": 59.36065573770492, "grad_norm": 6.062759876251221, "learning_rate": 7.481538655332676e-06, "loss": 0.5808, "step": 18105 }, { "epoch": 59.363934426229505, "grad_norm": 5.656887531280518, "learning_rate": 7.480511010042882e-06, "loss": 0.6049, "step": 18106 }, { "epoch": 59.3672131147541, "grad_norm": 6.719060897827148, "learning_rate": 7.479483393162945e-06, "loss": 0.5514, "step": 18107 }, { "epoch": 59.37049180327869, "grad_norm": 9.79804801940918, "learning_rate": 7.47845580470445e-06, "loss": 0.4909, "step": 18108 }, { "epoch": 59.37377049180328, "grad_norm": 5.4609832763671875, "learning_rate": 7.477428244678993e-06, "loss": 0.2726, "step": 18109 }, { "epoch": 59.377049180327866, "grad_norm": 6.0365471839904785, "learning_rate": 7.476400713098153e-06, "loss": 0.3667, "step": 18110 }, { "epoch": 59.38032786885246, "grad_norm": 6.664258003234863, "learning_rate": 7.47537320997352e-06, "loss": 0.3308, "step": 18111 }, { "epoch": 59.38360655737705, "grad_norm": 7.502882957458496, "learning_rate": 7.4743457353166795e-06, "loss": 0.4081, "step": 18112 }, { "epoch": 59.38688524590164, "grad_norm": 4.9949116706848145, "learning_rate": 7.4733182891392105e-06, "loss": 0.4257, "step": 18113 }, { "epoch": 59.390163934426226, "grad_norm": 7.023400783538818, "learning_rate": 7.472290871452711e-06, "loss": 0.4958, "step": 18114 }, { "epoch": 59.39344262295082, "grad_norm": 6.431552410125732, "learning_rate": 7.47126348226876e-06, "loss": 0.4137, "step": 18115 }, { "epoch": 59.39672131147541, "grad_norm": 5.049530029296875, "learning_rate": 7.470236121598941e-06, "loss": 0.4957, "step": 18116 }, { "epoch": 59.4, "grad_norm": 9.207192420959473, "learning_rate": 7.469208789454838e-06, "loss": 0.4941, "step": 18117 }, { "epoch": 59.40327868852459, "grad_norm": 11.788863182067871, "learning_rate": 7.4681814858480405e-06, "loss": 0.5226, "step": 18118 }, { "epoch": 59.40655737704918, "grad_norm": 8.018219947814941, "learning_rate": 7.467154210790128e-06, "loss": 0.3376, "step": 18119 }, { "epoch": 59.40983606557377, "grad_norm": 6.777156352996826, "learning_rate": 7.466126964292685e-06, "loss": 0.2836, "step": 18120 }, { "epoch": 59.41311475409836, "grad_norm": 21.512754440307617, "learning_rate": 7.465099746367295e-06, "loss": 0.4667, "step": 18121 }, { "epoch": 59.41639344262295, "grad_norm": 6.006605625152588, "learning_rate": 7.464072557025541e-06, "loss": 0.6092, "step": 18122 }, { "epoch": 59.41967213114754, "grad_norm": 5.900323390960693, "learning_rate": 7.463045396279007e-06, "loss": 0.4049, "step": 18123 }, { "epoch": 59.42295081967213, "grad_norm": 6.114371299743652, "learning_rate": 7.462018264139273e-06, "loss": 0.2848, "step": 18124 }, { "epoch": 59.42622950819672, "grad_norm": 5.70488977432251, "learning_rate": 7.460991160617923e-06, "loss": 0.293, "step": 18125 }, { "epoch": 59.429508196721315, "grad_norm": 7.370985984802246, "learning_rate": 7.459964085726535e-06, "loss": 0.5, "step": 18126 }, { "epoch": 59.4327868852459, "grad_norm": 5.168221473693848, "learning_rate": 7.458937039476692e-06, "loss": 0.3964, "step": 18127 }, { "epoch": 59.43606557377049, "grad_norm": 5.918875694274902, "learning_rate": 7.457910021879981e-06, "loss": 0.4292, "step": 18128 }, { "epoch": 59.43934426229508, "grad_norm": 5.21673583984375, "learning_rate": 7.456883032947975e-06, "loss": 0.386, "step": 18129 }, { "epoch": 59.442622950819676, "grad_norm": 5.454895973205566, "learning_rate": 7.455856072692254e-06, "loss": 0.5389, "step": 18130 }, { "epoch": 59.445901639344264, "grad_norm": 5.029868125915527, "learning_rate": 7.4548291411244045e-06, "loss": 0.3468, "step": 18131 }, { "epoch": 59.44918032786885, "grad_norm": 5.938611030578613, "learning_rate": 7.4538022382560025e-06, "loss": 0.4247, "step": 18132 }, { "epoch": 59.45245901639344, "grad_norm": 10.558710098266602, "learning_rate": 7.45277536409863e-06, "loss": 0.4573, "step": 18133 }, { "epoch": 59.455737704918036, "grad_norm": 5.40260648727417, "learning_rate": 7.451748518663857e-06, "loss": 0.5807, "step": 18134 }, { "epoch": 59.459016393442624, "grad_norm": 5.988015174865723, "learning_rate": 7.450721701963274e-06, "loss": 0.4247, "step": 18135 }, { "epoch": 59.46229508196721, "grad_norm": 5.121267318725586, "learning_rate": 7.449694914008454e-06, "loss": 0.5358, "step": 18136 }, { "epoch": 59.4655737704918, "grad_norm": 5.207005500793457, "learning_rate": 7.448668154810976e-06, "loss": 0.5561, "step": 18137 }, { "epoch": 59.4688524590164, "grad_norm": 4.829812526702881, "learning_rate": 7.447641424382417e-06, "loss": 0.4593, "step": 18138 }, { "epoch": 59.472131147540985, "grad_norm": 8.112963676452637, "learning_rate": 7.446614722734351e-06, "loss": 0.5122, "step": 18139 }, { "epoch": 59.47540983606557, "grad_norm": 10.617671966552734, "learning_rate": 7.445588049878363e-06, "loss": 0.1819, "step": 18140 }, { "epoch": 59.47868852459016, "grad_norm": 5.151144504547119, "learning_rate": 7.444561405826025e-06, "loss": 0.4481, "step": 18141 }, { "epoch": 59.48196721311476, "grad_norm": 6.191043376922607, "learning_rate": 7.443534790588915e-06, "loss": 0.4268, "step": 18142 }, { "epoch": 59.485245901639345, "grad_norm": 5.7608489990234375, "learning_rate": 7.442508204178604e-06, "loss": 0.5487, "step": 18143 }, { "epoch": 59.488524590163934, "grad_norm": 10.938326835632324, "learning_rate": 7.441481646606675e-06, "loss": 0.5502, "step": 18144 }, { "epoch": 59.49180327868852, "grad_norm": 10.686399459838867, "learning_rate": 7.440455117884702e-06, "loss": 0.5135, "step": 18145 }, { "epoch": 59.49508196721312, "grad_norm": 10.52600383758545, "learning_rate": 7.439428618024258e-06, "loss": 0.6952, "step": 18146 }, { "epoch": 59.498360655737706, "grad_norm": 4.676273822784424, "learning_rate": 7.438402147036915e-06, "loss": 0.4304, "step": 18147 }, { "epoch": 59.501639344262294, "grad_norm": 4.833675384521484, "learning_rate": 7.437375704934255e-06, "loss": 0.4701, "step": 18148 }, { "epoch": 59.50491803278688, "grad_norm": 5.01988410949707, "learning_rate": 7.436349291727849e-06, "loss": 0.5072, "step": 18149 }, { "epoch": 59.50819672131148, "grad_norm": 4.788185119628906, "learning_rate": 7.435322907429268e-06, "loss": 0.5555, "step": 18150 }, { "epoch": 59.511475409836066, "grad_norm": 6.870795726776123, "learning_rate": 7.434296552050089e-06, "loss": 0.5717, "step": 18151 }, { "epoch": 59.514754098360655, "grad_norm": 3.996731758117676, "learning_rate": 7.433270225601883e-06, "loss": 0.5225, "step": 18152 }, { "epoch": 59.51803278688524, "grad_norm": 5.5215301513671875, "learning_rate": 7.432243928096224e-06, "loss": 0.5991, "step": 18153 }, { "epoch": 59.52131147540984, "grad_norm": 7.426488876342773, "learning_rate": 7.431217659544685e-06, "loss": 0.52, "step": 18154 }, { "epoch": 59.52459016393443, "grad_norm": 5.224482536315918, "learning_rate": 7.4301914199588395e-06, "loss": 0.5129, "step": 18155 }, { "epoch": 59.527868852459015, "grad_norm": 5.802313327789307, "learning_rate": 7.4291652093502535e-06, "loss": 0.4881, "step": 18156 }, { "epoch": 59.5311475409836, "grad_norm": 4.369580268859863, "learning_rate": 7.428139027730505e-06, "loss": 0.5083, "step": 18157 }, { "epoch": 59.5344262295082, "grad_norm": 6.855970859527588, "learning_rate": 7.427112875111165e-06, "loss": 0.4688, "step": 18158 }, { "epoch": 59.53770491803279, "grad_norm": 13.02380657196045, "learning_rate": 7.426086751503802e-06, "loss": 0.4984, "step": 18159 }, { "epoch": 59.540983606557376, "grad_norm": 8.979796409606934, "learning_rate": 7.425060656919983e-06, "loss": 0.5437, "step": 18160 }, { "epoch": 59.544262295081964, "grad_norm": 4.658001899719238, "learning_rate": 7.424034591371285e-06, "loss": 0.3892, "step": 18161 }, { "epoch": 59.54754098360656, "grad_norm": 4.337292671203613, "learning_rate": 7.423008554869278e-06, "loss": 0.5114, "step": 18162 }, { "epoch": 59.55081967213115, "grad_norm": 5.35179328918457, "learning_rate": 7.421982547425528e-06, "loss": 0.651, "step": 18163 }, { "epoch": 59.554098360655736, "grad_norm": 8.433292388916016, "learning_rate": 7.420956569051604e-06, "loss": 0.6107, "step": 18164 }, { "epoch": 59.557377049180324, "grad_norm": 4.841742038726807, "learning_rate": 7.419930619759073e-06, "loss": 0.5305, "step": 18165 }, { "epoch": 59.56065573770492, "grad_norm": 4.879771709442139, "learning_rate": 7.418904699559511e-06, "loss": 0.6526, "step": 18166 }, { "epoch": 59.56393442622951, "grad_norm": 5.266697883605957, "learning_rate": 7.4178788084644815e-06, "loss": 0.4447, "step": 18167 }, { "epoch": 59.5672131147541, "grad_norm": 5.606625556945801, "learning_rate": 7.4168529464855535e-06, "loss": 0.4494, "step": 18168 }, { "epoch": 59.570491803278685, "grad_norm": 4.9054718017578125, "learning_rate": 7.415827113634291e-06, "loss": 0.425, "step": 18169 }, { "epoch": 59.57377049180328, "grad_norm": 5.116751194000244, "learning_rate": 7.414801309922268e-06, "loss": 0.6226, "step": 18170 }, { "epoch": 59.57704918032787, "grad_norm": 6.503231048583984, "learning_rate": 7.413775535361049e-06, "loss": 0.4397, "step": 18171 }, { "epoch": 59.58032786885246, "grad_norm": 8.150128364562988, "learning_rate": 7.412749789962199e-06, "loss": 0.463, "step": 18172 }, { "epoch": 59.58360655737705, "grad_norm": 5.48211145401001, "learning_rate": 7.411724073737281e-06, "loss": 0.5391, "step": 18173 }, { "epoch": 59.58688524590164, "grad_norm": 4.842531204223633, "learning_rate": 7.4106983866978695e-06, "loss": 0.6259, "step": 18174 }, { "epoch": 59.59016393442623, "grad_norm": 5.818932056427002, "learning_rate": 7.409672728855526e-06, "loss": 0.5106, "step": 18175 }, { "epoch": 59.59344262295082, "grad_norm": 6.225547790527344, "learning_rate": 7.408647100221813e-06, "loss": 0.6697, "step": 18176 }, { "epoch": 59.59672131147541, "grad_norm": 5.909910678863525, "learning_rate": 7.407621500808302e-06, "loss": 0.6118, "step": 18177 }, { "epoch": 59.6, "grad_norm": 7.058380603790283, "learning_rate": 7.40659593062655e-06, "loss": 0.6702, "step": 18178 }, { "epoch": 59.60327868852459, "grad_norm": 10.143940925598145, "learning_rate": 7.405570389688126e-06, "loss": 0.5678, "step": 18179 }, { "epoch": 59.60655737704918, "grad_norm": 6.084280014038086, "learning_rate": 7.4045448780045955e-06, "loss": 0.4307, "step": 18180 }, { "epoch": 59.609836065573774, "grad_norm": 17.286518096923828, "learning_rate": 7.403519395587522e-06, "loss": 0.6207, "step": 18181 }, { "epoch": 59.61311475409836, "grad_norm": 6.666109085083008, "learning_rate": 7.402493942448462e-06, "loss": 0.4344, "step": 18182 }, { "epoch": 59.61639344262295, "grad_norm": 5.583950042724609, "learning_rate": 7.401468518598984e-06, "loss": 0.6844, "step": 18183 }, { "epoch": 59.61967213114754, "grad_norm": 8.030562400817871, "learning_rate": 7.400443124050654e-06, "loss": 0.7587, "step": 18184 }, { "epoch": 59.622950819672134, "grad_norm": 5.6029133796691895, "learning_rate": 7.39941775881503e-06, "loss": 0.6397, "step": 18185 }, { "epoch": 59.62622950819672, "grad_norm": 5.775704860687256, "learning_rate": 7.39839242290367e-06, "loss": 0.652, "step": 18186 }, { "epoch": 59.62950819672131, "grad_norm": 8.189724922180176, "learning_rate": 7.397367116328145e-06, "loss": 0.4585, "step": 18187 }, { "epoch": 59.6327868852459, "grad_norm": 5.8873748779296875, "learning_rate": 7.396341839100012e-06, "loss": 0.5391, "step": 18188 }, { "epoch": 59.636065573770495, "grad_norm": 5.527717590332031, "learning_rate": 7.39531659123083e-06, "loss": 0.4943, "step": 18189 }, { "epoch": 59.63934426229508, "grad_norm": 6.4006028175354, "learning_rate": 7.394291372732164e-06, "loss": 0.5031, "step": 18190 }, { "epoch": 59.64262295081967, "grad_norm": 8.679915428161621, "learning_rate": 7.393266183615566e-06, "loss": 0.689, "step": 18191 }, { "epoch": 59.64590163934426, "grad_norm": 6.368305206298828, "learning_rate": 7.392241023892608e-06, "loss": 0.515, "step": 18192 }, { "epoch": 59.649180327868855, "grad_norm": 7.182920455932617, "learning_rate": 7.391215893574844e-06, "loss": 0.2552, "step": 18193 }, { "epoch": 59.65245901639344, "grad_norm": 5.359612464904785, "learning_rate": 7.390190792673831e-06, "loss": 0.4377, "step": 18194 }, { "epoch": 59.65573770491803, "grad_norm": 7.743608474731445, "learning_rate": 7.389165721201128e-06, "loss": 0.5098, "step": 18195 }, { "epoch": 59.65901639344262, "grad_norm": 5.2388176918029785, "learning_rate": 7.3881406791683e-06, "loss": 0.6294, "step": 18196 }, { "epoch": 59.662295081967216, "grad_norm": 4.997123718261719, "learning_rate": 7.387115666586901e-06, "loss": 0.5672, "step": 18197 }, { "epoch": 59.665573770491804, "grad_norm": 5.185101509094238, "learning_rate": 7.38609068346849e-06, "loss": 0.3885, "step": 18198 }, { "epoch": 59.66885245901639, "grad_norm": 4.884692668914795, "learning_rate": 7.385065729824621e-06, "loss": 0.5017, "step": 18199 }, { "epoch": 59.67213114754098, "grad_norm": 8.046870231628418, "learning_rate": 7.384040805666857e-06, "loss": 0.6174, "step": 18200 }, { "epoch": 59.675409836065576, "grad_norm": 11.1668062210083, "learning_rate": 7.383015911006754e-06, "loss": 0.4588, "step": 18201 }, { "epoch": 59.678688524590164, "grad_norm": 5.8800859451293945, "learning_rate": 7.381991045855868e-06, "loss": 0.5326, "step": 18202 }, { "epoch": 59.68196721311475, "grad_norm": 16.17552947998047, "learning_rate": 7.380966210225751e-06, "loss": 0.4687, "step": 18203 }, { "epoch": 59.68524590163934, "grad_norm": 7.556417465209961, "learning_rate": 7.379941404127965e-06, "loss": 0.491, "step": 18204 }, { "epoch": 59.68852459016394, "grad_norm": 6.617776393890381, "learning_rate": 7.378916627574066e-06, "loss": 0.3409, "step": 18205 }, { "epoch": 59.691803278688525, "grad_norm": 5.9651265144348145, "learning_rate": 7.377891880575604e-06, "loss": 0.3554, "step": 18206 }, { "epoch": 59.69508196721311, "grad_norm": 6.435739040374756, "learning_rate": 7.376867163144139e-06, "loss": 0.6368, "step": 18207 }, { "epoch": 59.6983606557377, "grad_norm": 5.267922401428223, "learning_rate": 7.375842475291224e-06, "loss": 0.48, "step": 18208 }, { "epoch": 59.7016393442623, "grad_norm": 10.430866241455078, "learning_rate": 7.374817817028413e-06, "loss": 0.3326, "step": 18209 }, { "epoch": 59.704918032786885, "grad_norm": 8.245409965515137, "learning_rate": 7.373793188367262e-06, "loss": 0.4077, "step": 18210 }, { "epoch": 59.708196721311474, "grad_norm": 19.830211639404297, "learning_rate": 7.372768589319323e-06, "loss": 0.5606, "step": 18211 }, { "epoch": 59.71147540983607, "grad_norm": 6.810337543487549, "learning_rate": 7.371744019896145e-06, "loss": 0.5759, "step": 18212 }, { "epoch": 59.71475409836066, "grad_norm": 5.623940944671631, "learning_rate": 7.370719480109292e-06, "loss": 0.5587, "step": 18213 }, { "epoch": 59.718032786885246, "grad_norm": 6.424192428588867, "learning_rate": 7.36969496997031e-06, "loss": 0.3485, "step": 18214 }, { "epoch": 59.721311475409834, "grad_norm": 6.59706974029541, "learning_rate": 7.3686704894907525e-06, "loss": 0.6152, "step": 18215 }, { "epoch": 59.72459016393443, "grad_norm": 5.352814674377441, "learning_rate": 7.367646038682171e-06, "loss": 0.6808, "step": 18216 }, { "epoch": 59.72786885245902, "grad_norm": 7.739981651306152, "learning_rate": 7.366621617556111e-06, "loss": 0.3793, "step": 18217 }, { "epoch": 59.731147540983606, "grad_norm": 5.560703277587891, "learning_rate": 7.365597226124137e-06, "loss": 0.2752, "step": 18218 }, { "epoch": 59.734426229508195, "grad_norm": 4.692548751831055, "learning_rate": 7.364572864397792e-06, "loss": 0.5896, "step": 18219 }, { "epoch": 59.73770491803279, "grad_norm": 4.826564788818359, "learning_rate": 7.363548532388629e-06, "loss": 0.4665, "step": 18220 }, { "epoch": 59.74098360655738, "grad_norm": 6.501272201538086, "learning_rate": 7.362524230108193e-06, "loss": 0.4383, "step": 18221 }, { "epoch": 59.74426229508197, "grad_norm": 5.694631099700928, "learning_rate": 7.3614999575680435e-06, "loss": 0.5529, "step": 18222 }, { "epoch": 59.747540983606555, "grad_norm": 5.70945405960083, "learning_rate": 7.360475714779724e-06, "loss": 0.3566, "step": 18223 }, { "epoch": 59.75081967213115, "grad_norm": 4.895397663116455, "learning_rate": 7.3594515017547866e-06, "loss": 0.7959, "step": 18224 }, { "epoch": 59.75409836065574, "grad_norm": 7.593491554260254, "learning_rate": 7.358427318504773e-06, "loss": 0.4327, "step": 18225 }, { "epoch": 59.75737704918033, "grad_norm": 6.640545845031738, "learning_rate": 7.357403165041243e-06, "loss": 0.6362, "step": 18226 }, { "epoch": 59.760655737704916, "grad_norm": 20.0477352142334, "learning_rate": 7.3563790413757405e-06, "loss": 0.4575, "step": 18227 }, { "epoch": 59.76393442622951, "grad_norm": 5.204133033752441, "learning_rate": 7.355354947519812e-06, "loss": 0.5408, "step": 18228 }, { "epoch": 59.7672131147541, "grad_norm": 6.018953323364258, "learning_rate": 7.3543308834850055e-06, "loss": 0.4052, "step": 18229 }, { "epoch": 59.77049180327869, "grad_norm": 5.156673431396484, "learning_rate": 7.3533068492828685e-06, "loss": 0.5954, "step": 18230 }, { "epoch": 59.773770491803276, "grad_norm": 5.522793292999268, "learning_rate": 7.35228284492495e-06, "loss": 0.4052, "step": 18231 }, { "epoch": 59.77704918032787, "grad_norm": 6.804337978363037, "learning_rate": 7.3512588704227934e-06, "loss": 0.574, "step": 18232 }, { "epoch": 59.78032786885246, "grad_norm": 4.6522603034973145, "learning_rate": 7.35023492578795e-06, "loss": 0.3378, "step": 18233 }, { "epoch": 59.78360655737705, "grad_norm": 11.148625373840332, "learning_rate": 7.34921101103196e-06, "loss": 0.5564, "step": 18234 }, { "epoch": 59.78688524590164, "grad_norm": 10.983508110046387, "learning_rate": 7.348187126166373e-06, "loss": 0.6137, "step": 18235 }, { "epoch": 59.79016393442623, "grad_norm": 4.534161567687988, "learning_rate": 7.347163271202735e-06, "loss": 0.4501, "step": 18236 }, { "epoch": 59.79344262295082, "grad_norm": 21.399642944335938, "learning_rate": 7.34613944615259e-06, "loss": 0.5873, "step": 18237 }, { "epoch": 59.79672131147541, "grad_norm": 5.923981666564941, "learning_rate": 7.3451156510274755e-06, "loss": 0.4656, "step": 18238 }, { "epoch": 59.8, "grad_norm": 6.386430263519287, "learning_rate": 7.344091885838949e-06, "loss": 0.5275, "step": 18239 }, { "epoch": 59.80327868852459, "grad_norm": 7.409054756164551, "learning_rate": 7.3430681505985464e-06, "loss": 0.5462, "step": 18240 }, { "epoch": 59.80655737704918, "grad_norm": 6.2303786277771, "learning_rate": 7.342044445317813e-06, "loss": 0.5385, "step": 18241 }, { "epoch": 59.80983606557377, "grad_norm": 5.516757011413574, "learning_rate": 7.3410207700082936e-06, "loss": 0.5921, "step": 18242 }, { "epoch": 59.81311475409836, "grad_norm": 11.718676567077637, "learning_rate": 7.339997124681524e-06, "loss": 0.1768, "step": 18243 }, { "epoch": 59.81639344262295, "grad_norm": 9.813600540161133, "learning_rate": 7.338973509349056e-06, "loss": 0.4961, "step": 18244 }, { "epoch": 59.81967213114754, "grad_norm": 16.282312393188477, "learning_rate": 7.33794992402243e-06, "loss": 0.2387, "step": 18245 }, { "epoch": 59.82295081967213, "grad_norm": 5.574871063232422, "learning_rate": 7.3369263687131864e-06, "loss": 0.6688, "step": 18246 }, { "epoch": 59.82622950819672, "grad_norm": 6.128056526184082, "learning_rate": 7.335902843432862e-06, "loss": 0.7977, "step": 18247 }, { "epoch": 59.829508196721314, "grad_norm": 7.361967086791992, "learning_rate": 7.334879348193009e-06, "loss": 0.597, "step": 18248 }, { "epoch": 59.8327868852459, "grad_norm": 7.106823921203613, "learning_rate": 7.333855883005161e-06, "loss": 0.5699, "step": 18249 }, { "epoch": 59.83606557377049, "grad_norm": 11.6441011428833, "learning_rate": 7.33283244788086e-06, "loss": 0.4475, "step": 18250 }, { "epoch": 59.83934426229508, "grad_norm": 19.797143936157227, "learning_rate": 7.331809042831641e-06, "loss": 0.4751, "step": 18251 }, { "epoch": 59.842622950819674, "grad_norm": 5.334736347198486, "learning_rate": 7.330785667869056e-06, "loss": 0.5351, "step": 18252 }, { "epoch": 59.84590163934426, "grad_norm": 9.979217529296875, "learning_rate": 7.329762323004635e-06, "loss": 0.4628, "step": 18253 }, { "epoch": 59.84918032786885, "grad_norm": 7.198431968688965, "learning_rate": 7.328739008249922e-06, "loss": 0.5643, "step": 18254 }, { "epoch": 59.85245901639344, "grad_norm": 7.823173999786377, "learning_rate": 7.327715723616452e-06, "loss": 0.6684, "step": 18255 }, { "epoch": 59.855737704918035, "grad_norm": 9.872672080993652, "learning_rate": 7.326692469115766e-06, "loss": 0.7072, "step": 18256 }, { "epoch": 59.85901639344262, "grad_norm": 4.909833908081055, "learning_rate": 7.325669244759402e-06, "loss": 0.4915, "step": 18257 }, { "epoch": 59.86229508196721, "grad_norm": 7.030871868133545, "learning_rate": 7.324646050558898e-06, "loss": 0.4479, "step": 18258 }, { "epoch": 59.86557377049181, "grad_norm": 7.836763858795166, "learning_rate": 7.323622886525792e-06, "loss": 0.5962, "step": 18259 }, { "epoch": 59.868852459016395, "grad_norm": 5.354595184326172, "learning_rate": 7.32259975267162e-06, "loss": 0.3541, "step": 18260 }, { "epoch": 59.87213114754098, "grad_norm": 6.162463665008545, "learning_rate": 7.321576649007919e-06, "loss": 0.401, "step": 18261 }, { "epoch": 59.87540983606557, "grad_norm": 6.1170830726623535, "learning_rate": 7.3205535755462275e-06, "loss": 0.4102, "step": 18262 }, { "epoch": 59.87868852459017, "grad_norm": 6.820425987243652, "learning_rate": 7.319530532298081e-06, "loss": 0.3702, "step": 18263 }, { "epoch": 59.881967213114756, "grad_norm": 6.808528900146484, "learning_rate": 7.318507519275013e-06, "loss": 0.3853, "step": 18264 }, { "epoch": 59.885245901639344, "grad_norm": 4.703642845153809, "learning_rate": 7.31748453648856e-06, "loss": 0.4084, "step": 18265 }, { "epoch": 59.88852459016393, "grad_norm": 6.814476490020752, "learning_rate": 7.31646158395026e-06, "loss": 0.5238, "step": 18266 }, { "epoch": 59.89180327868853, "grad_norm": 11.667115211486816, "learning_rate": 7.315438661671648e-06, "loss": 0.277, "step": 18267 }, { "epoch": 59.895081967213116, "grad_norm": 6.047363758087158, "learning_rate": 7.314415769664254e-06, "loss": 0.5838, "step": 18268 }, { "epoch": 59.898360655737704, "grad_norm": 6.566939830780029, "learning_rate": 7.31339290793961e-06, "loss": 0.7508, "step": 18269 }, { "epoch": 59.90163934426229, "grad_norm": 6.796633720397949, "learning_rate": 7.31237007650926e-06, "loss": 0.4894, "step": 18270 }, { "epoch": 59.90491803278689, "grad_norm": 11.033044815063477, "learning_rate": 7.311347275384731e-06, "loss": 0.3792, "step": 18271 }, { "epoch": 59.90819672131148, "grad_norm": 7.701704978942871, "learning_rate": 7.310324504577557e-06, "loss": 0.6195, "step": 18272 }, { "epoch": 59.911475409836065, "grad_norm": 7.5268073081970215, "learning_rate": 7.309301764099267e-06, "loss": 0.6113, "step": 18273 }, { "epoch": 59.91475409836065, "grad_norm": 8.882464408874512, "learning_rate": 7.308279053961401e-06, "loss": 0.4785, "step": 18274 }, { "epoch": 59.91803278688525, "grad_norm": 9.060686111450195, "learning_rate": 7.307256374175489e-06, "loss": 0.6333, "step": 18275 }, { "epoch": 59.92131147540984, "grad_norm": 6.756965160369873, "learning_rate": 7.306233724753059e-06, "loss": 0.5683, "step": 18276 }, { "epoch": 59.924590163934425, "grad_norm": 5.9981489181518555, "learning_rate": 7.305211105705641e-06, "loss": 0.8331, "step": 18277 }, { "epoch": 59.927868852459014, "grad_norm": 5.77836275100708, "learning_rate": 7.304188517044774e-06, "loss": 0.4321, "step": 18278 }, { "epoch": 59.93114754098361, "grad_norm": 8.329638481140137, "learning_rate": 7.3031659587819836e-06, "loss": 0.601, "step": 18279 }, { "epoch": 59.9344262295082, "grad_norm": 4.8712615966796875, "learning_rate": 7.302143430928803e-06, "loss": 0.6452, "step": 18280 }, { "epoch": 59.937704918032786, "grad_norm": 8.461082458496094, "learning_rate": 7.301120933496757e-06, "loss": 0.7143, "step": 18281 }, { "epoch": 59.940983606557374, "grad_norm": 10.704648971557617, "learning_rate": 7.3000984664973785e-06, "loss": 0.7277, "step": 18282 }, { "epoch": 59.94426229508197, "grad_norm": 6.677211761474609, "learning_rate": 7.299076029942198e-06, "loss": 0.5209, "step": 18283 }, { "epoch": 59.94754098360656, "grad_norm": 4.528295040130615, "learning_rate": 7.298053623842745e-06, "loss": 0.4653, "step": 18284 }, { "epoch": 59.950819672131146, "grad_norm": 5.532732009887695, "learning_rate": 7.297031248210544e-06, "loss": 0.8176, "step": 18285 }, { "epoch": 59.954098360655735, "grad_norm": 6.362600326538086, "learning_rate": 7.296008903057127e-06, "loss": 0.6555, "step": 18286 }, { "epoch": 59.95737704918033, "grad_norm": 5.70570707321167, "learning_rate": 7.294986588394022e-06, "loss": 0.5996, "step": 18287 }, { "epoch": 59.96065573770492, "grad_norm": 5.9070916175842285, "learning_rate": 7.2939643042327546e-06, "loss": 0.55, "step": 18288 }, { "epoch": 59.96393442622951, "grad_norm": 5.1695556640625, "learning_rate": 7.292942050584855e-06, "loss": 0.3395, "step": 18289 }, { "epoch": 59.967213114754095, "grad_norm": 6.506582736968994, "learning_rate": 7.2919198274618486e-06, "loss": 0.4901, "step": 18290 }, { "epoch": 59.97049180327869, "grad_norm": 5.694591045379639, "learning_rate": 7.2908976348752565e-06, "loss": 0.5907, "step": 18291 }, { "epoch": 59.97377049180328, "grad_norm": 6.4729838371276855, "learning_rate": 7.289875472836616e-06, "loss": 0.3865, "step": 18292 }, { "epoch": 59.97704918032787, "grad_norm": 5.750102996826172, "learning_rate": 7.288853341357447e-06, "loss": 0.4797, "step": 18293 }, { "epoch": 59.980327868852456, "grad_norm": 9.033018112182617, "learning_rate": 7.2878312404492745e-06, "loss": 0.5416, "step": 18294 }, { "epoch": 59.98360655737705, "grad_norm": 7.089393615722656, "learning_rate": 7.286809170123621e-06, "loss": 0.486, "step": 18295 }, { "epoch": 59.98688524590164, "grad_norm": 5.609871864318848, "learning_rate": 7.28578713039202e-06, "loss": 0.401, "step": 18296 }, { "epoch": 59.99016393442623, "grad_norm": 5.314874172210693, "learning_rate": 7.2847651212659905e-06, "loss": 0.5458, "step": 18297 }, { "epoch": 59.993442622950816, "grad_norm": 5.125736236572266, "learning_rate": 7.283743142757058e-06, "loss": 0.4353, "step": 18298 }, { "epoch": 59.99672131147541, "grad_norm": 8.274686813354492, "learning_rate": 7.28272119487674e-06, "loss": 0.6613, "step": 18299 }, { "epoch": 60.0, "grad_norm": 27.95447540283203, "learning_rate": 7.2816992776365714e-06, "loss": 0.5726, "step": 18300 }, { "epoch": 60.00327868852459, "grad_norm": 6.287128925323486, "learning_rate": 7.28067739104807e-06, "loss": 0.5051, "step": 18301 }, { "epoch": 60.006557377049184, "grad_norm": 6.524101734161377, "learning_rate": 7.279655535122758e-06, "loss": 0.4748, "step": 18302 }, { "epoch": 60.00983606557377, "grad_norm": 13.395689010620117, "learning_rate": 7.278633709872158e-06, "loss": 0.4833, "step": 18303 }, { "epoch": 60.01311475409836, "grad_norm": 5.020880699157715, "learning_rate": 7.2776119153077895e-06, "loss": 0.4344, "step": 18304 }, { "epoch": 60.01639344262295, "grad_norm": 6.331357479095459, "learning_rate": 7.27659015144118e-06, "loss": 0.4754, "step": 18305 }, { "epoch": 60.019672131147544, "grad_norm": 65.7576675415039, "learning_rate": 7.275568418283848e-06, "loss": 0.3788, "step": 18306 }, { "epoch": 60.02295081967213, "grad_norm": 8.032896041870117, "learning_rate": 7.274546715847315e-06, "loss": 0.6682, "step": 18307 }, { "epoch": 60.02622950819672, "grad_norm": 5.977480888366699, "learning_rate": 7.273525044143098e-06, "loss": 0.3942, "step": 18308 }, { "epoch": 60.02950819672131, "grad_norm": 7.123363018035889, "learning_rate": 7.2725034031827245e-06, "loss": 0.3603, "step": 18309 }, { "epoch": 60.032786885245905, "grad_norm": 7.252102375030518, "learning_rate": 7.271481792977713e-06, "loss": 0.5137, "step": 18310 }, { "epoch": 60.03606557377049, "grad_norm": 6.162479400634766, "learning_rate": 7.270460213539577e-06, "loss": 0.4708, "step": 18311 }, { "epoch": 60.03934426229508, "grad_norm": 5.583600044250488, "learning_rate": 7.26943866487984e-06, "loss": 0.5717, "step": 18312 }, { "epoch": 60.04262295081967, "grad_norm": 11.856842994689941, "learning_rate": 7.268417147010024e-06, "loss": 0.4317, "step": 18313 }, { "epoch": 60.045901639344265, "grad_norm": 9.329724311828613, "learning_rate": 7.267395659941643e-06, "loss": 0.5344, "step": 18314 }, { "epoch": 60.049180327868854, "grad_norm": 24.681875228881836, "learning_rate": 7.266374203686217e-06, "loss": 0.4196, "step": 18315 }, { "epoch": 60.05245901639344, "grad_norm": 4.952592372894287, "learning_rate": 7.2653527782552665e-06, "loss": 0.5102, "step": 18316 }, { "epoch": 60.05573770491803, "grad_norm": 9.42237663269043, "learning_rate": 7.264331383660302e-06, "loss": 0.6677, "step": 18317 }, { "epoch": 60.059016393442626, "grad_norm": 9.457701683044434, "learning_rate": 7.26331001991285e-06, "loss": 0.6084, "step": 18318 }, { "epoch": 60.062295081967214, "grad_norm": 7.762451171875, "learning_rate": 7.2622886870244226e-06, "loss": 0.6647, "step": 18319 }, { "epoch": 60.0655737704918, "grad_norm": 17.889041900634766, "learning_rate": 7.2612673850065366e-06, "loss": 0.4819, "step": 18320 }, { "epoch": 60.06885245901639, "grad_norm": 6.011801242828369, "learning_rate": 7.2602461138707035e-06, "loss": 0.6118, "step": 18321 }, { "epoch": 60.072131147540986, "grad_norm": 6.308102607727051, "learning_rate": 7.25922487362845e-06, "loss": 0.4233, "step": 18322 }, { "epoch": 60.075409836065575, "grad_norm": 5.989294528961182, "learning_rate": 7.258203664291285e-06, "loss": 0.3611, "step": 18323 }, { "epoch": 60.07868852459016, "grad_norm": 6.384429454803467, "learning_rate": 7.257182485870724e-06, "loss": 0.4087, "step": 18324 }, { "epoch": 60.08196721311475, "grad_norm": 6.066230297088623, "learning_rate": 7.256161338378278e-06, "loss": 0.3276, "step": 18325 }, { "epoch": 60.08524590163935, "grad_norm": 7.530919551849365, "learning_rate": 7.255140221825472e-06, "loss": 0.4474, "step": 18326 }, { "epoch": 60.088524590163935, "grad_norm": 5.797288417816162, "learning_rate": 7.254119136223812e-06, "loss": 0.3657, "step": 18327 }, { "epoch": 60.09180327868852, "grad_norm": 10.755661010742188, "learning_rate": 7.253098081584813e-06, "loss": 0.5012, "step": 18328 }, { "epoch": 60.09508196721311, "grad_norm": 12.540244102478027, "learning_rate": 7.252077057919991e-06, "loss": 0.529, "step": 18329 }, { "epoch": 60.09836065573771, "grad_norm": 7.083999156951904, "learning_rate": 7.251056065240852e-06, "loss": 0.5172, "step": 18330 }, { "epoch": 60.101639344262296, "grad_norm": 5.7731170654296875, "learning_rate": 7.250035103558919e-06, "loss": 0.4038, "step": 18331 }, { "epoch": 60.104918032786884, "grad_norm": 7.9162397384643555, "learning_rate": 7.249014172885699e-06, "loss": 0.3315, "step": 18332 }, { "epoch": 60.10819672131147, "grad_norm": 7.723625183105469, "learning_rate": 7.247993273232705e-06, "loss": 0.4249, "step": 18333 }, { "epoch": 60.11147540983607, "grad_norm": 6.438400745391846, "learning_rate": 7.246972404611443e-06, "loss": 0.4065, "step": 18334 }, { "epoch": 60.114754098360656, "grad_norm": 6.923949241638184, "learning_rate": 7.245951567033435e-06, "loss": 0.6302, "step": 18335 }, { "epoch": 60.118032786885244, "grad_norm": 5.961907386779785, "learning_rate": 7.2449307605101845e-06, "loss": 0.3862, "step": 18336 }, { "epoch": 60.12131147540983, "grad_norm": 19.07215118408203, "learning_rate": 7.243909985053205e-06, "loss": 0.6241, "step": 18337 }, { "epoch": 60.12459016393443, "grad_norm": 5.813098430633545, "learning_rate": 7.242889240674005e-06, "loss": 0.3657, "step": 18338 }, { "epoch": 60.12786885245902, "grad_norm": 5.457507133483887, "learning_rate": 7.241868527384096e-06, "loss": 0.3489, "step": 18339 }, { "epoch": 60.131147540983605, "grad_norm": 8.828726768493652, "learning_rate": 7.240847845194987e-06, "loss": 0.672, "step": 18340 }, { "epoch": 60.13442622950819, "grad_norm": 14.644115447998047, "learning_rate": 7.239827194118187e-06, "loss": 0.7419, "step": 18341 }, { "epoch": 60.13770491803279, "grad_norm": 6.113751411437988, "learning_rate": 7.238806574165208e-06, "loss": 0.42, "step": 18342 }, { "epoch": 60.14098360655738, "grad_norm": 8.47494888305664, "learning_rate": 7.237785985347549e-06, "loss": 0.5789, "step": 18343 }, { "epoch": 60.144262295081965, "grad_norm": 9.809698104858398, "learning_rate": 7.236765427676732e-06, "loss": 0.4473, "step": 18344 }, { "epoch": 60.14754098360656, "grad_norm": 5.89670991897583, "learning_rate": 7.235744901164257e-06, "loss": 0.4949, "step": 18345 }, { "epoch": 60.15081967213115, "grad_norm": 5.686342716217041, "learning_rate": 7.234724405821631e-06, "loss": 0.5873, "step": 18346 }, { "epoch": 60.15409836065574, "grad_norm": 5.974398136138916, "learning_rate": 7.233703941660359e-06, "loss": 0.5256, "step": 18347 }, { "epoch": 60.157377049180326, "grad_norm": 6.403046131134033, "learning_rate": 7.232683508691956e-06, "loss": 0.6531, "step": 18348 }, { "epoch": 60.16065573770492, "grad_norm": 5.1436262130737305, "learning_rate": 7.231663106927924e-06, "loss": 0.4755, "step": 18349 }, { "epoch": 60.16393442622951, "grad_norm": 7.635043144226074, "learning_rate": 7.230642736379767e-06, "loss": 0.331, "step": 18350 }, { "epoch": 60.1672131147541, "grad_norm": 7.47966194152832, "learning_rate": 7.229622397058991e-06, "loss": 0.3808, "step": 18351 }, { "epoch": 60.170491803278686, "grad_norm": 4.457812786102295, "learning_rate": 7.2286020889771055e-06, "loss": 0.6035, "step": 18352 }, { "epoch": 60.17377049180328, "grad_norm": 4.959975242614746, "learning_rate": 7.227581812145612e-06, "loss": 0.4347, "step": 18353 }, { "epoch": 60.17704918032787, "grad_norm": 6.975470542907715, "learning_rate": 7.226561566576018e-06, "loss": 0.5329, "step": 18354 }, { "epoch": 60.18032786885246, "grad_norm": 5.679511547088623, "learning_rate": 7.2255413522798256e-06, "loss": 0.675, "step": 18355 }, { "epoch": 60.18360655737705, "grad_norm": 15.548099517822266, "learning_rate": 7.224521169268535e-06, "loss": 0.4302, "step": 18356 }, { "epoch": 60.18688524590164, "grad_norm": 5.844682693481445, "learning_rate": 7.223501017553658e-06, "loss": 0.425, "step": 18357 }, { "epoch": 60.19016393442623, "grad_norm": 9.343491554260254, "learning_rate": 7.222480897146693e-06, "loss": 0.3887, "step": 18358 }, { "epoch": 60.19344262295082, "grad_norm": 4.931613922119141, "learning_rate": 7.2214608080591444e-06, "loss": 0.5636, "step": 18359 }, { "epoch": 60.19672131147541, "grad_norm": 5.379639625549316, "learning_rate": 7.220440750302511e-06, "loss": 0.5858, "step": 18360 }, { "epoch": 60.2, "grad_norm": 7.993409156799316, "learning_rate": 7.219420723888301e-06, "loss": 0.2617, "step": 18361 }, { "epoch": 60.20327868852459, "grad_norm": 6.167775630950928, "learning_rate": 7.218400728828013e-06, "loss": 0.4399, "step": 18362 }, { "epoch": 60.20655737704918, "grad_norm": 6.823301792144775, "learning_rate": 7.217380765133149e-06, "loss": 0.7633, "step": 18363 }, { "epoch": 60.20983606557377, "grad_norm": 5.973438739776611, "learning_rate": 7.216360832815208e-06, "loss": 0.5075, "step": 18364 }, { "epoch": 60.21311475409836, "grad_norm": 5.107072830200195, "learning_rate": 7.215340931885695e-06, "loss": 0.3879, "step": 18365 }, { "epoch": 60.21639344262295, "grad_norm": 7.135645866394043, "learning_rate": 7.214321062356109e-06, "loss": 0.6648, "step": 18366 }, { "epoch": 60.21967213114754, "grad_norm": 7.744473457336426, "learning_rate": 7.213301224237947e-06, "loss": 0.3871, "step": 18367 }, { "epoch": 60.22295081967213, "grad_norm": 6.808117389678955, "learning_rate": 7.2122814175427145e-06, "loss": 0.6385, "step": 18368 }, { "epoch": 60.226229508196724, "grad_norm": 5.4221577644348145, "learning_rate": 7.211261642281904e-06, "loss": 0.5322, "step": 18369 }, { "epoch": 60.22950819672131, "grad_norm": 5.893610954284668, "learning_rate": 7.2102418984670186e-06, "loss": 0.4781, "step": 18370 }, { "epoch": 60.2327868852459, "grad_norm": 6.526658058166504, "learning_rate": 7.209222186109559e-06, "loss": 0.5832, "step": 18371 }, { "epoch": 60.23606557377049, "grad_norm": 5.754510402679443, "learning_rate": 7.2082025052210205e-06, "loss": 0.5411, "step": 18372 }, { "epoch": 60.239344262295084, "grad_norm": 4.143344402313232, "learning_rate": 7.207182855812896e-06, "loss": 0.2654, "step": 18373 }, { "epoch": 60.24262295081967, "grad_norm": 11.0397367477417, "learning_rate": 7.206163237896695e-06, "loss": 0.5104, "step": 18374 }, { "epoch": 60.24590163934426, "grad_norm": 8.394732475280762, "learning_rate": 7.2051436514839064e-06, "loss": 0.4095, "step": 18375 }, { "epoch": 60.24918032786885, "grad_norm": 5.71002197265625, "learning_rate": 7.20412409658603e-06, "loss": 0.511, "step": 18376 }, { "epoch": 60.252459016393445, "grad_norm": 6.2499613761901855, "learning_rate": 7.203104573214557e-06, "loss": 0.3286, "step": 18377 }, { "epoch": 60.25573770491803, "grad_norm": 6.998089790344238, "learning_rate": 7.2020850813809925e-06, "loss": 0.6248, "step": 18378 }, { "epoch": 60.25901639344262, "grad_norm": 5.975849151611328, "learning_rate": 7.201065621096828e-06, "loss": 0.4415, "step": 18379 }, { "epoch": 60.26229508196721, "grad_norm": 7.087259769439697, "learning_rate": 7.200046192373558e-06, "loss": 0.501, "step": 18380 }, { "epoch": 60.265573770491805, "grad_norm": 6.813600540161133, "learning_rate": 7.199026795222679e-06, "loss": 0.4731, "step": 18381 }, { "epoch": 60.268852459016394, "grad_norm": 7.020580768585205, "learning_rate": 7.19800742965568e-06, "loss": 0.5988, "step": 18382 }, { "epoch": 60.27213114754098, "grad_norm": 6.230724811553955, "learning_rate": 7.196988095684067e-06, "loss": 0.7015, "step": 18383 }, { "epoch": 60.27540983606557, "grad_norm": 4.974668025970459, "learning_rate": 7.1959687933193255e-06, "loss": 0.7952, "step": 18384 }, { "epoch": 60.278688524590166, "grad_norm": 6.271519660949707, "learning_rate": 7.194949522572952e-06, "loss": 0.3841, "step": 18385 }, { "epoch": 60.281967213114754, "grad_norm": 5.738349914550781, "learning_rate": 7.193930283456435e-06, "loss": 0.5725, "step": 18386 }, { "epoch": 60.28524590163934, "grad_norm": 5.102591037750244, "learning_rate": 7.192911075981276e-06, "loss": 0.3807, "step": 18387 }, { "epoch": 60.28852459016394, "grad_norm": 9.42128849029541, "learning_rate": 7.191891900158963e-06, "loss": 0.394, "step": 18388 }, { "epoch": 60.291803278688526, "grad_norm": 7.2022576332092285, "learning_rate": 7.190872756000988e-06, "loss": 0.4283, "step": 18389 }, { "epoch": 60.295081967213115, "grad_norm": 5.982369422912598, "learning_rate": 7.189853643518841e-06, "loss": 0.3705, "step": 18390 }, { "epoch": 60.2983606557377, "grad_norm": 5.380509376525879, "learning_rate": 7.188834562724019e-06, "loss": 0.413, "step": 18391 }, { "epoch": 60.3016393442623, "grad_norm": 6.061769008636475, "learning_rate": 7.18781551362801e-06, "loss": 0.7758, "step": 18392 }, { "epoch": 60.30491803278689, "grad_norm": 7.7548065185546875, "learning_rate": 7.1867964962423035e-06, "loss": 0.3411, "step": 18393 }, { "epoch": 60.308196721311475, "grad_norm": 6.135356426239014, "learning_rate": 7.185777510578392e-06, "loss": 0.5917, "step": 18394 }, { "epoch": 60.31147540983606, "grad_norm": 6.5195770263671875, "learning_rate": 7.184758556647764e-06, "loss": 0.5013, "step": 18395 }, { "epoch": 60.31475409836066, "grad_norm": 5.693729400634766, "learning_rate": 7.1837396344619116e-06, "loss": 0.5735, "step": 18396 }, { "epoch": 60.31803278688525, "grad_norm": 6.361696243286133, "learning_rate": 7.182720744032323e-06, "loss": 0.7897, "step": 18397 }, { "epoch": 60.321311475409836, "grad_norm": 5.514211654663086, "learning_rate": 7.181701885370489e-06, "loss": 0.58, "step": 18398 }, { "epoch": 60.324590163934424, "grad_norm": 5.747169017791748, "learning_rate": 7.180683058487892e-06, "loss": 0.5106, "step": 18399 }, { "epoch": 60.32786885245902, "grad_norm": 5.15228271484375, "learning_rate": 7.179664263396029e-06, "loss": 0.3655, "step": 18400 }, { "epoch": 60.33114754098361, "grad_norm": 6.5312910079956055, "learning_rate": 7.178645500106383e-06, "loss": 0.5788, "step": 18401 }, { "epoch": 60.334426229508196, "grad_norm": 5.114316463470459, "learning_rate": 7.1776267686304435e-06, "loss": 0.4214, "step": 18402 }, { "epoch": 60.337704918032784, "grad_norm": 7.025761604309082, "learning_rate": 7.176608068979691e-06, "loss": 0.6642, "step": 18403 }, { "epoch": 60.34098360655738, "grad_norm": 5.063161849975586, "learning_rate": 7.175589401165623e-06, "loss": 0.6971, "step": 18404 }, { "epoch": 60.34426229508197, "grad_norm": 5.360881328582764, "learning_rate": 7.174570765199722e-06, "loss": 0.5151, "step": 18405 }, { "epoch": 60.34754098360656, "grad_norm": 11.352231979370117, "learning_rate": 7.173552161093473e-06, "loss": 0.5037, "step": 18406 }, { "epoch": 60.350819672131145, "grad_norm": 5.4061689376831055, "learning_rate": 7.1725335888583635e-06, "loss": 0.439, "step": 18407 }, { "epoch": 60.35409836065574, "grad_norm": 5.573762893676758, "learning_rate": 7.171515048505871e-06, "loss": 0.502, "step": 18408 }, { "epoch": 60.35737704918033, "grad_norm": 8.99824047088623, "learning_rate": 7.170496540047492e-06, "loss": 0.769, "step": 18409 }, { "epoch": 60.36065573770492, "grad_norm": 7.662112236022949, "learning_rate": 7.169478063494708e-06, "loss": 0.5267, "step": 18410 }, { "epoch": 60.363934426229505, "grad_norm": 7.748769760131836, "learning_rate": 7.1684596188590006e-06, "loss": 0.2719, "step": 18411 }, { "epoch": 60.3672131147541, "grad_norm": 5.054444313049316, "learning_rate": 7.167441206151851e-06, "loss": 0.4472, "step": 18412 }, { "epoch": 60.37049180327869, "grad_norm": 5.817728519439697, "learning_rate": 7.166422825384751e-06, "loss": 0.5721, "step": 18413 }, { "epoch": 60.37377049180328, "grad_norm": 4.498892784118652, "learning_rate": 7.16540447656918e-06, "loss": 0.4917, "step": 18414 }, { "epoch": 60.377049180327866, "grad_norm": 5.090480804443359, "learning_rate": 7.164386159716621e-06, "loss": 0.7004, "step": 18415 }, { "epoch": 60.38032786885246, "grad_norm": 5.104236125946045, "learning_rate": 7.163367874838552e-06, "loss": 0.5165, "step": 18416 }, { "epoch": 60.38360655737705, "grad_norm": 8.119161605834961, "learning_rate": 7.162349621946463e-06, "loss": 0.3343, "step": 18417 }, { "epoch": 60.38688524590164, "grad_norm": 5.610811710357666, "learning_rate": 7.161331401051832e-06, "loss": 0.3844, "step": 18418 }, { "epoch": 60.390163934426226, "grad_norm": 8.083191871643066, "learning_rate": 7.16031321216614e-06, "loss": 0.4647, "step": 18419 }, { "epoch": 60.39344262295082, "grad_norm": 5.75972318649292, "learning_rate": 7.159295055300871e-06, "loss": 0.4914, "step": 18420 }, { "epoch": 60.39672131147541, "grad_norm": 14.503113746643066, "learning_rate": 7.1582769304675026e-06, "loss": 0.4034, "step": 18421 }, { "epoch": 60.4, "grad_norm": 7.331011772155762, "learning_rate": 7.157258837677514e-06, "loss": 0.3247, "step": 18422 }, { "epoch": 60.40327868852459, "grad_norm": 5.664118766784668, "learning_rate": 7.156240776942393e-06, "loss": 0.6884, "step": 18423 }, { "epoch": 60.40655737704918, "grad_norm": 5.395394802093506, "learning_rate": 7.155222748273612e-06, "loss": 0.3072, "step": 18424 }, { "epoch": 60.40983606557377, "grad_norm": 4.6475019454956055, "learning_rate": 7.1542047516826484e-06, "loss": 0.4677, "step": 18425 }, { "epoch": 60.41311475409836, "grad_norm": 6.60302209854126, "learning_rate": 7.15318678718099e-06, "loss": 0.5121, "step": 18426 }, { "epoch": 60.41639344262295, "grad_norm": 5.548412322998047, "learning_rate": 7.1521688547801095e-06, "loss": 0.4486, "step": 18427 }, { "epoch": 60.41967213114754, "grad_norm": 7.185670852661133, "learning_rate": 7.151150954491486e-06, "loss": 0.4366, "step": 18428 }, { "epoch": 60.42295081967213, "grad_norm": 5.357021331787109, "learning_rate": 7.150133086326594e-06, "loss": 0.5148, "step": 18429 }, { "epoch": 60.42622950819672, "grad_norm": 7.235208034515381, "learning_rate": 7.14911525029692e-06, "loss": 0.5992, "step": 18430 }, { "epoch": 60.429508196721315, "grad_norm": 6.0771894454956055, "learning_rate": 7.1480974464139345e-06, "loss": 0.5178, "step": 18431 }, { "epoch": 60.4327868852459, "grad_norm": 5.746613025665283, "learning_rate": 7.147079674689115e-06, "loss": 0.5855, "step": 18432 }, { "epoch": 60.43606557377049, "grad_norm": 4.873105525970459, "learning_rate": 7.1460619351339385e-06, "loss": 0.471, "step": 18433 }, { "epoch": 60.43934426229508, "grad_norm": 5.806019306182861, "learning_rate": 7.145044227759879e-06, "loss": 0.51, "step": 18434 }, { "epoch": 60.442622950819676, "grad_norm": 8.623343467712402, "learning_rate": 7.144026552578416e-06, "loss": 0.3635, "step": 18435 }, { "epoch": 60.445901639344264, "grad_norm": 7.6855974197387695, "learning_rate": 7.143008909601023e-06, "loss": 0.4542, "step": 18436 }, { "epoch": 60.44918032786885, "grad_norm": 23.143760681152344, "learning_rate": 7.141991298839177e-06, "loss": 0.3521, "step": 18437 }, { "epoch": 60.45245901639344, "grad_norm": 7.498063087463379, "learning_rate": 7.140973720304345e-06, "loss": 0.4047, "step": 18438 }, { "epoch": 60.455737704918036, "grad_norm": 7.985072135925293, "learning_rate": 7.139956174008011e-06, "loss": 0.6188, "step": 18439 }, { "epoch": 60.459016393442624, "grad_norm": 7.927491188049316, "learning_rate": 7.138938659961645e-06, "loss": 0.6676, "step": 18440 }, { "epoch": 60.46229508196721, "grad_norm": 5.586878299713135, "learning_rate": 7.137921178176721e-06, "loss": 0.6351, "step": 18441 }, { "epoch": 60.4655737704918, "grad_norm": 4.8071608543396, "learning_rate": 7.1369037286647085e-06, "loss": 0.3781, "step": 18442 }, { "epoch": 60.4688524590164, "grad_norm": 11.980940818786621, "learning_rate": 7.135886311437086e-06, "loss": 0.457, "step": 18443 }, { "epoch": 60.472131147540985, "grad_norm": 7.753899097442627, "learning_rate": 7.134868926505323e-06, "loss": 0.5556, "step": 18444 }, { "epoch": 60.47540983606557, "grad_norm": 4.813102722167969, "learning_rate": 7.13385157388089e-06, "loss": 0.3458, "step": 18445 }, { "epoch": 60.47868852459016, "grad_norm": 6.975668907165527, "learning_rate": 7.132834253575263e-06, "loss": 0.515, "step": 18446 }, { "epoch": 60.48196721311476, "grad_norm": 5.003784656524658, "learning_rate": 7.131816965599908e-06, "loss": 0.4879, "step": 18447 }, { "epoch": 60.485245901639345, "grad_norm": 6.233222484588623, "learning_rate": 7.130799709966302e-06, "loss": 0.6107, "step": 18448 }, { "epoch": 60.488524590163934, "grad_norm": 5.682703971862793, "learning_rate": 7.12978248668591e-06, "loss": 0.7502, "step": 18449 }, { "epoch": 60.49180327868852, "grad_norm": 7.008433818817139, "learning_rate": 7.128765295770206e-06, "loss": 0.355, "step": 18450 }, { "epoch": 60.49508196721312, "grad_norm": 7.881011962890625, "learning_rate": 7.127748137230658e-06, "loss": 0.4264, "step": 18451 }, { "epoch": 60.498360655737706, "grad_norm": 5.668242454528809, "learning_rate": 7.126731011078736e-06, "loss": 0.645, "step": 18452 }, { "epoch": 60.501639344262294, "grad_norm": 5.582984447479248, "learning_rate": 7.125713917325909e-06, "loss": 0.2561, "step": 18453 }, { "epoch": 60.50491803278688, "grad_norm": 6.534367084503174, "learning_rate": 7.124696855983648e-06, "loss": 0.6778, "step": 18454 }, { "epoch": 60.50819672131148, "grad_norm": 5.989309787750244, "learning_rate": 7.123679827063416e-06, "loss": 0.5437, "step": 18455 }, { "epoch": 60.511475409836066, "grad_norm": 8.142800331115723, "learning_rate": 7.122662830576688e-06, "loss": 0.4555, "step": 18456 }, { "epoch": 60.514754098360655, "grad_norm": 5.824881076812744, "learning_rate": 7.121645866534928e-06, "loss": 0.2924, "step": 18457 }, { "epoch": 60.51803278688524, "grad_norm": 5.392277240753174, "learning_rate": 7.120628934949605e-06, "loss": 0.5552, "step": 18458 }, { "epoch": 60.52131147540984, "grad_norm": 6.769390106201172, "learning_rate": 7.119612035832183e-06, "loss": 0.3364, "step": 18459 }, { "epoch": 60.52459016393443, "grad_norm": 5.782087802886963, "learning_rate": 7.118595169194127e-06, "loss": 0.4827, "step": 18460 }, { "epoch": 60.527868852459015, "grad_norm": 8.51606273651123, "learning_rate": 7.11757833504691e-06, "loss": 0.2737, "step": 18461 }, { "epoch": 60.5311475409836, "grad_norm": 6.3970465660095215, "learning_rate": 7.116561533401993e-06, "loss": 0.3214, "step": 18462 }, { "epoch": 60.5344262295082, "grad_norm": 9.19316291809082, "learning_rate": 7.115544764270846e-06, "loss": 0.4252, "step": 18463 }, { "epoch": 60.53770491803279, "grad_norm": 5.890507698059082, "learning_rate": 7.114528027664923e-06, "loss": 0.4706, "step": 18464 }, { "epoch": 60.540983606557376, "grad_norm": 6.5562052726745605, "learning_rate": 7.113511323595703e-06, "loss": 0.6751, "step": 18465 }, { "epoch": 60.544262295081964, "grad_norm": 6.834248065948486, "learning_rate": 7.112494652074643e-06, "loss": 0.4566, "step": 18466 }, { "epoch": 60.54754098360656, "grad_norm": 8.062845230102539, "learning_rate": 7.111478013113209e-06, "loss": 0.5089, "step": 18467 }, { "epoch": 60.55081967213115, "grad_norm": 6.864516258239746, "learning_rate": 7.1104614067228595e-06, "loss": 0.4247, "step": 18468 }, { "epoch": 60.554098360655736, "grad_norm": 5.281577110290527, "learning_rate": 7.109444832915064e-06, "loss": 0.6743, "step": 18469 }, { "epoch": 60.557377049180324, "grad_norm": 5.404684066772461, "learning_rate": 7.1084282917012855e-06, "loss": 0.3414, "step": 18470 }, { "epoch": 60.56065573770492, "grad_norm": 5.998877048492432, "learning_rate": 7.1074117830929856e-06, "loss": 0.3781, "step": 18471 }, { "epoch": 60.56393442622951, "grad_norm": 7.059334754943848, "learning_rate": 7.106395307101621e-06, "loss": 0.4569, "step": 18472 }, { "epoch": 60.5672131147541, "grad_norm": 14.64732837677002, "learning_rate": 7.105378863738659e-06, "loss": 0.4772, "step": 18473 }, { "epoch": 60.570491803278685, "grad_norm": 5.234104156494141, "learning_rate": 7.104362453015561e-06, "loss": 0.4861, "step": 18474 }, { "epoch": 60.57377049180328, "grad_norm": 5.320705413818359, "learning_rate": 7.1033460749437864e-06, "loss": 0.4336, "step": 18475 }, { "epoch": 60.57704918032787, "grad_norm": 4.947061538696289, "learning_rate": 7.102329729534798e-06, "loss": 0.8117, "step": 18476 }, { "epoch": 60.58032786885246, "grad_norm": 4.755163669586182, "learning_rate": 7.101313416800053e-06, "loss": 0.4026, "step": 18477 }, { "epoch": 60.58360655737705, "grad_norm": 5.08998441696167, "learning_rate": 7.1002971367510135e-06, "loss": 0.5779, "step": 18478 }, { "epoch": 60.58688524590164, "grad_norm": 4.647351264953613, "learning_rate": 7.099280889399138e-06, "loss": 0.3175, "step": 18479 }, { "epoch": 60.59016393442623, "grad_norm": 5.414317607879639, "learning_rate": 7.09826467475589e-06, "loss": 0.5559, "step": 18480 }, { "epoch": 60.59344262295082, "grad_norm": 10.094792366027832, "learning_rate": 7.097248492832719e-06, "loss": 0.4182, "step": 18481 }, { "epoch": 60.59672131147541, "grad_norm": 5.8816447257995605, "learning_rate": 7.096232343641094e-06, "loss": 0.3009, "step": 18482 }, { "epoch": 60.6, "grad_norm": 12.012073516845703, "learning_rate": 7.095216227192467e-06, "loss": 0.5211, "step": 18483 }, { "epoch": 60.60327868852459, "grad_norm": 6.427789688110352, "learning_rate": 7.0942001434983e-06, "loss": 0.4424, "step": 18484 }, { "epoch": 60.60655737704918, "grad_norm": 5.2946553230285645, "learning_rate": 7.093184092570044e-06, "loss": 0.4739, "step": 18485 }, { "epoch": 60.609836065573774, "grad_norm": 5.060964584350586, "learning_rate": 7.092168074419159e-06, "loss": 0.6219, "step": 18486 }, { "epoch": 60.61311475409836, "grad_norm": 4.488432884216309, "learning_rate": 7.091152089057105e-06, "loss": 0.635, "step": 18487 }, { "epoch": 60.61639344262295, "grad_norm": 5.925988674163818, "learning_rate": 7.0901361364953354e-06, "loss": 0.3888, "step": 18488 }, { "epoch": 60.61967213114754, "grad_norm": 4.568512916564941, "learning_rate": 7.089120216745307e-06, "loss": 0.5765, "step": 18489 }, { "epoch": 60.622950819672134, "grad_norm": 6.030982971191406, "learning_rate": 7.08810432981847e-06, "loss": 0.5481, "step": 18490 }, { "epoch": 60.62622950819672, "grad_norm": 6.592434406280518, "learning_rate": 7.087088475726289e-06, "loss": 0.4114, "step": 18491 }, { "epoch": 60.62950819672131, "grad_norm": 5.458046913146973, "learning_rate": 7.086072654480214e-06, "loss": 0.4432, "step": 18492 }, { "epoch": 60.6327868852459, "grad_norm": 6.40226411819458, "learning_rate": 7.0850568660917e-06, "loss": 0.7376, "step": 18493 }, { "epoch": 60.636065573770495, "grad_norm": 13.399742126464844, "learning_rate": 7.084041110572195e-06, "loss": 0.6028, "step": 18494 }, { "epoch": 60.63934426229508, "grad_norm": 7.611358642578125, "learning_rate": 7.083025387933165e-06, "loss": 0.4462, "step": 18495 }, { "epoch": 60.64262295081967, "grad_norm": 5.644845962524414, "learning_rate": 7.0820096981860545e-06, "loss": 0.4345, "step": 18496 }, { "epoch": 60.64590163934426, "grad_norm": 7.503567218780518, "learning_rate": 7.0809940413423186e-06, "loss": 0.4305, "step": 18497 }, { "epoch": 60.649180327868855, "grad_norm": 5.345452785491943, "learning_rate": 7.079978417413409e-06, "loss": 0.5484, "step": 18498 }, { "epoch": 60.65245901639344, "grad_norm": 5.9558939933776855, "learning_rate": 7.078962826410778e-06, "loss": 0.5106, "step": 18499 }, { "epoch": 60.65573770491803, "grad_norm": 6.35484504699707, "learning_rate": 7.0779472683458795e-06, "loss": 0.4297, "step": 18500 }, { "epoch": 60.65901639344262, "grad_norm": 7.434081554412842, "learning_rate": 7.076931743230162e-06, "loss": 0.5963, "step": 18501 }, { "epoch": 60.662295081967216, "grad_norm": 5.480169296264648, "learning_rate": 7.075916251075081e-06, "loss": 0.5419, "step": 18502 }, { "epoch": 60.665573770491804, "grad_norm": 7.603070259094238, "learning_rate": 7.074900791892083e-06, "loss": 0.3661, "step": 18503 }, { "epoch": 60.66885245901639, "grad_norm": 14.288747787475586, "learning_rate": 7.07388536569262e-06, "loss": 0.3243, "step": 18504 }, { "epoch": 60.67213114754098, "grad_norm": 6.781064510345459, "learning_rate": 7.072869972488143e-06, "loss": 0.3554, "step": 18505 }, { "epoch": 60.675409836065576, "grad_norm": 6.054487705230713, "learning_rate": 7.071854612290101e-06, "loss": 0.3924, "step": 18506 }, { "epoch": 60.678688524590164, "grad_norm": 6.842104911804199, "learning_rate": 7.070839285109938e-06, "loss": 0.4896, "step": 18507 }, { "epoch": 60.68196721311475, "grad_norm": 5.254290580749512, "learning_rate": 7.069823990959113e-06, "loss": 0.2958, "step": 18508 }, { "epoch": 60.68524590163934, "grad_norm": 10.001884460449219, "learning_rate": 7.068808729849068e-06, "loss": 0.3919, "step": 18509 }, { "epoch": 60.68852459016394, "grad_norm": 7.163288116455078, "learning_rate": 7.067793501791253e-06, "loss": 0.3837, "step": 18510 }, { "epoch": 60.691803278688525, "grad_norm": 6.20015811920166, "learning_rate": 7.0667783067971145e-06, "loss": 0.5619, "step": 18511 }, { "epoch": 60.69508196721311, "grad_norm": 7.207054138183594, "learning_rate": 7.0657631448780965e-06, "loss": 0.6513, "step": 18512 }, { "epoch": 60.6983606557377, "grad_norm": 5.849660396575928, "learning_rate": 7.0647480160456525e-06, "loss": 0.3216, "step": 18513 }, { "epoch": 60.7016393442623, "grad_norm": 5.3280463218688965, "learning_rate": 7.0637329203112285e-06, "loss": 0.4282, "step": 18514 }, { "epoch": 60.704918032786885, "grad_norm": 4.906342506408691, "learning_rate": 7.062717857686268e-06, "loss": 0.5158, "step": 18515 }, { "epoch": 60.708196721311474, "grad_norm": 4.304863452911377, "learning_rate": 7.061702828182214e-06, "loss": 0.5766, "step": 18516 }, { "epoch": 60.71147540983607, "grad_norm": 5.147524356842041, "learning_rate": 7.0606878318105195e-06, "loss": 0.5634, "step": 18517 }, { "epoch": 60.71475409836066, "grad_norm": 6.623865127563477, "learning_rate": 7.0596728685826255e-06, "loss": 0.3727, "step": 18518 }, { "epoch": 60.718032786885246, "grad_norm": 5.043946743011475, "learning_rate": 7.058657938509979e-06, "loss": 0.5372, "step": 18519 }, { "epoch": 60.721311475409834, "grad_norm": 4.370885848999023, "learning_rate": 7.0576430416040155e-06, "loss": 0.724, "step": 18520 }, { "epoch": 60.72459016393443, "grad_norm": 7.996795177459717, "learning_rate": 7.056628177876192e-06, "loss": 0.2954, "step": 18521 }, { "epoch": 60.72786885245902, "grad_norm": 7.247392177581787, "learning_rate": 7.055613347337944e-06, "loss": 0.5326, "step": 18522 }, { "epoch": 60.731147540983606, "grad_norm": 5.7885355949401855, "learning_rate": 7.054598550000719e-06, "loss": 0.4714, "step": 18523 }, { "epoch": 60.734426229508195, "grad_norm": 6.342527866363525, "learning_rate": 7.0535837858759545e-06, "loss": 0.4229, "step": 18524 }, { "epoch": 60.73770491803279, "grad_norm": 4.88975715637207, "learning_rate": 7.052569054975098e-06, "loss": 0.3744, "step": 18525 }, { "epoch": 60.74098360655738, "grad_norm": 6.5582194328308105, "learning_rate": 7.051554357309591e-06, "loss": 0.4677, "step": 18526 }, { "epoch": 60.74426229508197, "grad_norm": 5.838510513305664, "learning_rate": 7.050539692890872e-06, "loss": 0.6386, "step": 18527 }, { "epoch": 60.747540983606555, "grad_norm": 5.274491786956787, "learning_rate": 7.0495250617303865e-06, "loss": 0.4278, "step": 18528 }, { "epoch": 60.75081967213115, "grad_norm": 6.721029281616211, "learning_rate": 7.048510463839572e-06, "loss": 0.2598, "step": 18529 }, { "epoch": 60.75409836065574, "grad_norm": 7.571269989013672, "learning_rate": 7.047495899229872e-06, "loss": 0.5554, "step": 18530 }, { "epoch": 60.75737704918033, "grad_norm": 5.640737056732178, "learning_rate": 7.0464813679127255e-06, "loss": 0.4615, "step": 18531 }, { "epoch": 60.760655737704916, "grad_norm": 6.525398254394531, "learning_rate": 7.045466869899573e-06, "loss": 0.4161, "step": 18532 }, { "epoch": 60.76393442622951, "grad_norm": 5.001621246337891, "learning_rate": 7.0444524052018514e-06, "loss": 0.3902, "step": 18533 }, { "epoch": 60.7672131147541, "grad_norm": 11.238823890686035, "learning_rate": 7.043437973831002e-06, "loss": 0.3353, "step": 18534 }, { "epoch": 60.77049180327869, "grad_norm": 5.48768949508667, "learning_rate": 7.042423575798466e-06, "loss": 0.3743, "step": 18535 }, { "epoch": 60.773770491803276, "grad_norm": 5.419964790344238, "learning_rate": 7.04140921111568e-06, "loss": 0.4492, "step": 18536 }, { "epoch": 60.77704918032787, "grad_norm": 5.4688825607299805, "learning_rate": 7.040394879794079e-06, "loss": 0.5668, "step": 18537 }, { "epoch": 60.78032786885246, "grad_norm": 4.637360095977783, "learning_rate": 7.039380581845101e-06, "loss": 0.3895, "step": 18538 }, { "epoch": 60.78360655737705, "grad_norm": 5.680313587188721, "learning_rate": 7.038366317280188e-06, "loss": 0.4223, "step": 18539 }, { "epoch": 60.78688524590164, "grad_norm": 5.265122890472412, "learning_rate": 7.037352086110776e-06, "loss": 0.4608, "step": 18540 }, { "epoch": 60.79016393442623, "grad_norm": 5.934937000274658, "learning_rate": 7.036337888348298e-06, "loss": 0.5302, "step": 18541 }, { "epoch": 60.79344262295082, "grad_norm": 5.408741474151611, "learning_rate": 7.035323724004188e-06, "loss": 0.409, "step": 18542 }, { "epoch": 60.79672131147541, "grad_norm": 5.131235122680664, "learning_rate": 7.03430959308989e-06, "loss": 0.3256, "step": 18543 }, { "epoch": 60.8, "grad_norm": 8.166781425476074, "learning_rate": 7.033295495616834e-06, "loss": 0.5215, "step": 18544 }, { "epoch": 60.80327868852459, "grad_norm": 5.7177414894104, "learning_rate": 7.032281431596456e-06, "loss": 0.5009, "step": 18545 }, { "epoch": 60.80655737704918, "grad_norm": 6.11515474319458, "learning_rate": 7.031267401040187e-06, "loss": 0.4381, "step": 18546 }, { "epoch": 60.80983606557377, "grad_norm": 8.078348159790039, "learning_rate": 7.030253403959468e-06, "loss": 0.4084, "step": 18547 }, { "epoch": 60.81311475409836, "grad_norm": 6.516199588775635, "learning_rate": 7.0292394403657305e-06, "loss": 0.3634, "step": 18548 }, { "epoch": 60.81639344262295, "grad_norm": 4.986730098724365, "learning_rate": 7.028225510270406e-06, "loss": 0.3212, "step": 18549 }, { "epoch": 60.81967213114754, "grad_norm": 9.832573890686035, "learning_rate": 7.02721161368493e-06, "loss": 0.434, "step": 18550 }, { "epoch": 60.82295081967213, "grad_norm": 6.232848644256592, "learning_rate": 7.026197750620729e-06, "loss": 0.5817, "step": 18551 }, { "epoch": 60.82622950819672, "grad_norm": 6.427237033843994, "learning_rate": 7.025183921089246e-06, "loss": 0.4457, "step": 18552 }, { "epoch": 60.829508196721314, "grad_norm": 7.470165252685547, "learning_rate": 7.024170125101906e-06, "loss": 0.4163, "step": 18553 }, { "epoch": 60.8327868852459, "grad_norm": 5.2681884765625, "learning_rate": 7.02315636267014e-06, "loss": 0.2914, "step": 18554 }, { "epoch": 60.83606557377049, "grad_norm": 5.804135799407959, "learning_rate": 7.022142633805382e-06, "loss": 0.4783, "step": 18555 }, { "epoch": 60.83934426229508, "grad_norm": 5.184676647186279, "learning_rate": 7.021128938519063e-06, "loss": 0.5125, "step": 18556 }, { "epoch": 60.842622950819674, "grad_norm": 7.174688339233398, "learning_rate": 7.02011527682261e-06, "loss": 0.4561, "step": 18557 }, { "epoch": 60.84590163934426, "grad_norm": 8.667317390441895, "learning_rate": 7.0191016487274585e-06, "loss": 0.4476, "step": 18558 }, { "epoch": 60.84918032786885, "grad_norm": 6.276975631713867, "learning_rate": 7.018088054245034e-06, "loss": 0.265, "step": 18559 }, { "epoch": 60.85245901639344, "grad_norm": 6.433574676513672, "learning_rate": 7.017074493386765e-06, "loss": 0.9138, "step": 18560 }, { "epoch": 60.855737704918035, "grad_norm": 5.836852073669434, "learning_rate": 7.016060966164087e-06, "loss": 0.4006, "step": 18561 }, { "epoch": 60.85901639344262, "grad_norm": 9.112305641174316, "learning_rate": 7.015047472588422e-06, "loss": 0.6792, "step": 18562 }, { "epoch": 60.86229508196721, "grad_norm": 8.06220531463623, "learning_rate": 7.014034012671202e-06, "loss": 0.5257, "step": 18563 }, { "epoch": 60.86557377049181, "grad_norm": 4.886238098144531, "learning_rate": 7.013020586423848e-06, "loss": 0.4633, "step": 18564 }, { "epoch": 60.868852459016395, "grad_norm": 4.714631080627441, "learning_rate": 7.012007193857797e-06, "loss": 0.4997, "step": 18565 }, { "epoch": 60.87213114754098, "grad_norm": 4.334720134735107, "learning_rate": 7.0109938349844706e-06, "loss": 0.4849, "step": 18566 }, { "epoch": 60.87540983606557, "grad_norm": 7.349946022033691, "learning_rate": 7.009980509815297e-06, "loss": 0.4446, "step": 18567 }, { "epoch": 60.87868852459017, "grad_norm": 6.074704647064209, "learning_rate": 7.0089672183617e-06, "loss": 0.6524, "step": 18568 }, { "epoch": 60.881967213114756, "grad_norm": 9.445035934448242, "learning_rate": 7.007953960635109e-06, "loss": 0.5347, "step": 18569 }, { "epoch": 60.885245901639344, "grad_norm": 7.256669044494629, "learning_rate": 7.006940736646949e-06, "loss": 0.3947, "step": 18570 }, { "epoch": 60.88852459016393, "grad_norm": 7.14434289932251, "learning_rate": 7.005927546408644e-06, "loss": 0.5379, "step": 18571 }, { "epoch": 60.89180327868853, "grad_norm": 5.540091037750244, "learning_rate": 7.004914389931615e-06, "loss": 0.5305, "step": 18572 }, { "epoch": 60.895081967213116, "grad_norm": 7.727910995483398, "learning_rate": 7.003901267227296e-06, "loss": 0.553, "step": 18573 }, { "epoch": 60.898360655737704, "grad_norm": 4.586578845977783, "learning_rate": 7.002888178307102e-06, "loss": 0.7114, "step": 18574 }, { "epoch": 60.90163934426229, "grad_norm": 4.458915710449219, "learning_rate": 7.001875123182462e-06, "loss": 0.5443, "step": 18575 }, { "epoch": 60.90491803278689, "grad_norm": 4.080703258514404, "learning_rate": 7.0008621018647984e-06, "loss": 0.6109, "step": 18576 }, { "epoch": 60.90819672131148, "grad_norm": 7.776191711425781, "learning_rate": 6.9998491143655264e-06, "loss": 0.6073, "step": 18577 }, { "epoch": 60.911475409836065, "grad_norm": 6.047122478485107, "learning_rate": 6.998836160696081e-06, "loss": 0.3917, "step": 18578 }, { "epoch": 60.91475409836065, "grad_norm": 7.61574125289917, "learning_rate": 6.997823240867877e-06, "loss": 0.6239, "step": 18579 }, { "epoch": 60.91803278688525, "grad_norm": 7.186073303222656, "learning_rate": 6.996810354892336e-06, "loss": 0.8657, "step": 18580 }, { "epoch": 60.92131147540984, "grad_norm": 5.966784954071045, "learning_rate": 6.995797502780881e-06, "loss": 0.6857, "step": 18581 }, { "epoch": 60.924590163934425, "grad_norm": 5.484071254730225, "learning_rate": 6.9947846845449355e-06, "loss": 0.4269, "step": 18582 }, { "epoch": 60.927868852459014, "grad_norm": 5.242921352386475, "learning_rate": 6.993771900195914e-06, "loss": 0.3858, "step": 18583 }, { "epoch": 60.93114754098361, "grad_norm": 5.460531711578369, "learning_rate": 6.992759149745242e-06, "loss": 0.6011, "step": 18584 }, { "epoch": 60.9344262295082, "grad_norm": 5.9689555168151855, "learning_rate": 6.991746433204335e-06, "loss": 0.3062, "step": 18585 }, { "epoch": 60.937704918032786, "grad_norm": 5.398545742034912, "learning_rate": 6.990733750584615e-06, "loss": 0.4572, "step": 18586 }, { "epoch": 60.940983606557374, "grad_norm": 4.539023399353027, "learning_rate": 6.989721101897504e-06, "loss": 0.5293, "step": 18587 }, { "epoch": 60.94426229508197, "grad_norm": 8.753783226013184, "learning_rate": 6.988708487154416e-06, "loss": 0.5644, "step": 18588 }, { "epoch": 60.94754098360656, "grad_norm": 4.659858703613281, "learning_rate": 6.987695906366771e-06, "loss": 0.3219, "step": 18589 }, { "epoch": 60.950819672131146, "grad_norm": 4.5949602127075195, "learning_rate": 6.9866833595459846e-06, "loss": 0.563, "step": 18590 }, { "epoch": 60.954098360655735, "grad_norm": 5.6830525398254395, "learning_rate": 6.985670846703478e-06, "loss": 0.4841, "step": 18591 }, { "epoch": 60.95737704918033, "grad_norm": 10.904850006103516, "learning_rate": 6.984658367850669e-06, "loss": 0.4512, "step": 18592 }, { "epoch": 60.96065573770492, "grad_norm": 7.4491963386535645, "learning_rate": 6.983645922998969e-06, "loss": 0.5811, "step": 18593 }, { "epoch": 60.96393442622951, "grad_norm": 9.282596588134766, "learning_rate": 6.9826335121597965e-06, "loss": 0.6168, "step": 18594 }, { "epoch": 60.967213114754095, "grad_norm": 10.71690845489502, "learning_rate": 6.981621135344572e-06, "loss": 0.5716, "step": 18595 }, { "epoch": 60.97049180327869, "grad_norm": 5.59208869934082, "learning_rate": 6.980608792564706e-06, "loss": 0.7349, "step": 18596 }, { "epoch": 60.97377049180328, "grad_norm": 7.274704456329346, "learning_rate": 6.979596483831615e-06, "loss": 0.4506, "step": 18597 }, { "epoch": 60.97704918032787, "grad_norm": 6.697422504425049, "learning_rate": 6.9785842091567115e-06, "loss": 0.3826, "step": 18598 }, { "epoch": 60.980327868852456, "grad_norm": 5.822083950042725, "learning_rate": 6.977571968551416e-06, "loss": 0.5977, "step": 18599 }, { "epoch": 60.98360655737705, "grad_norm": 6.184313774108887, "learning_rate": 6.9765597620271396e-06, "loss": 0.4813, "step": 18600 }, { "epoch": 60.98688524590164, "grad_norm": 6.02006721496582, "learning_rate": 6.975547589595295e-06, "loss": 0.793, "step": 18601 }, { "epoch": 60.99016393442623, "grad_norm": 6.7987589836120605, "learning_rate": 6.9745354512672955e-06, "loss": 0.7721, "step": 18602 }, { "epoch": 60.993442622950816, "grad_norm": 5.553346157073975, "learning_rate": 6.973523347054552e-06, "loss": 0.501, "step": 18603 }, { "epoch": 60.99672131147541, "grad_norm": 6.972323894500732, "learning_rate": 6.972511276968481e-06, "loss": 0.3237, "step": 18604 }, { "epoch": 61.0, "grad_norm": 8.617061614990234, "learning_rate": 6.971499241020495e-06, "loss": 0.4364, "step": 18605 }, { "epoch": 61.00327868852459, "grad_norm": 5.950150489807129, "learning_rate": 6.970487239222001e-06, "loss": 0.4244, "step": 18606 }, { "epoch": 61.006557377049184, "grad_norm": 15.544989585876465, "learning_rate": 6.9694752715844135e-06, "loss": 0.4575, "step": 18607 }, { "epoch": 61.00983606557377, "grad_norm": 5.463570594787598, "learning_rate": 6.968463338119147e-06, "loss": 0.5996, "step": 18608 }, { "epoch": 61.01311475409836, "grad_norm": 5.672248363494873, "learning_rate": 6.967451438837605e-06, "loss": 0.5512, "step": 18609 }, { "epoch": 61.01639344262295, "grad_norm": 5.355010509490967, "learning_rate": 6.966439573751202e-06, "loss": 0.4104, "step": 18610 }, { "epoch": 61.019672131147544, "grad_norm": 10.685994148254395, "learning_rate": 6.965427742871346e-06, "loss": 0.5147, "step": 18611 }, { "epoch": 61.02295081967213, "grad_norm": 17.589157104492188, "learning_rate": 6.964415946209448e-06, "loss": 0.374, "step": 18612 }, { "epoch": 61.02622950819672, "grad_norm": 7.1154913902282715, "learning_rate": 6.963404183776916e-06, "loss": 0.3163, "step": 18613 }, { "epoch": 61.02950819672131, "grad_norm": 6.944863319396973, "learning_rate": 6.9623924555851616e-06, "loss": 0.357, "step": 18614 }, { "epoch": 61.032786885245905, "grad_norm": 5.964659214019775, "learning_rate": 6.96138076164559e-06, "loss": 0.4836, "step": 18615 }, { "epoch": 61.03606557377049, "grad_norm": 5.72276496887207, "learning_rate": 6.960369101969605e-06, "loss": 0.4546, "step": 18616 }, { "epoch": 61.03934426229508, "grad_norm": 6.393891334533691, "learning_rate": 6.9593574765686235e-06, "loss": 0.2615, "step": 18617 }, { "epoch": 61.04262295081967, "grad_norm": 6.691226005554199, "learning_rate": 6.958345885454047e-06, "loss": 0.4491, "step": 18618 }, { "epoch": 61.045901639344265, "grad_norm": 6.711193561553955, "learning_rate": 6.957334328637284e-06, "loss": 0.3901, "step": 18619 }, { "epoch": 61.049180327868854, "grad_norm": 4.847066879272461, "learning_rate": 6.956322806129736e-06, "loss": 0.7129, "step": 18620 }, { "epoch": 61.05245901639344, "grad_norm": 5.581048488616943, "learning_rate": 6.955311317942817e-06, "loss": 0.4073, "step": 18621 }, { "epoch": 61.05573770491803, "grad_norm": 5.935662269592285, "learning_rate": 6.954299864087929e-06, "loss": 0.4813, "step": 18622 }, { "epoch": 61.059016393442626, "grad_norm": 8.118130683898926, "learning_rate": 6.953288444576476e-06, "loss": 0.5542, "step": 18623 }, { "epoch": 61.062295081967214, "grad_norm": 6.697980880737305, "learning_rate": 6.95227705941986e-06, "loss": 0.5024, "step": 18624 }, { "epoch": 61.0655737704918, "grad_norm": 4.839078903198242, "learning_rate": 6.951265708629493e-06, "loss": 0.3676, "step": 18625 }, { "epoch": 61.06885245901639, "grad_norm": 5.746207237243652, "learning_rate": 6.950254392216774e-06, "loss": 0.4661, "step": 18626 }, { "epoch": 61.072131147540986, "grad_norm": 6.295750141143799, "learning_rate": 6.949243110193109e-06, "loss": 0.3528, "step": 18627 }, { "epoch": 61.075409836065575, "grad_norm": 6.694079875946045, "learning_rate": 6.9482318625698986e-06, "loss": 0.7019, "step": 18628 }, { "epoch": 61.07868852459016, "grad_norm": 9.07143783569336, "learning_rate": 6.947220649358544e-06, "loss": 0.4451, "step": 18629 }, { "epoch": 61.08196721311475, "grad_norm": 4.89586067199707, "learning_rate": 6.946209470570455e-06, "loss": 0.3744, "step": 18630 }, { "epoch": 61.08524590163935, "grad_norm": 6.739912509918213, "learning_rate": 6.9451983262170286e-06, "loss": 0.623, "step": 18631 }, { "epoch": 61.088524590163935, "grad_norm": 6.208187580108643, "learning_rate": 6.944187216309668e-06, "loss": 0.3498, "step": 18632 }, { "epoch": 61.09180327868852, "grad_norm": 5.412003993988037, "learning_rate": 6.943176140859769e-06, "loss": 0.5941, "step": 18633 }, { "epoch": 61.09508196721311, "grad_norm": 5.783820629119873, "learning_rate": 6.9421650998787415e-06, "loss": 0.5241, "step": 18634 }, { "epoch": 61.09836065573771, "grad_norm": 5.199333667755127, "learning_rate": 6.941154093377982e-06, "loss": 0.4407, "step": 18635 }, { "epoch": 61.101639344262296, "grad_norm": 5.608132362365723, "learning_rate": 6.940143121368888e-06, "loss": 0.7031, "step": 18636 }, { "epoch": 61.104918032786884, "grad_norm": 5.07982873916626, "learning_rate": 6.939132183862862e-06, "loss": 0.6557, "step": 18637 }, { "epoch": 61.10819672131147, "grad_norm": 6.377830982208252, "learning_rate": 6.938121280871306e-06, "loss": 0.5202, "step": 18638 }, { "epoch": 61.11147540983607, "grad_norm": 5.450130462646484, "learning_rate": 6.937110412405613e-06, "loss": 0.4543, "step": 18639 }, { "epoch": 61.114754098360656, "grad_norm": 6.679447650909424, "learning_rate": 6.9360995784771865e-06, "loss": 0.5432, "step": 18640 }, { "epoch": 61.118032786885244, "grad_norm": 4.784732818603516, "learning_rate": 6.935088779097426e-06, "loss": 0.6323, "step": 18641 }, { "epoch": 61.12131147540983, "grad_norm": 5.123453617095947, "learning_rate": 6.934078014277719e-06, "loss": 0.7477, "step": 18642 }, { "epoch": 61.12459016393443, "grad_norm": 6.142219066619873, "learning_rate": 6.933067284029474e-06, "loss": 0.3, "step": 18643 }, { "epoch": 61.12786885245902, "grad_norm": 6.6757001876831055, "learning_rate": 6.932056588364084e-06, "loss": 0.5175, "step": 18644 }, { "epoch": 61.131147540983605, "grad_norm": 5.4358625411987305, "learning_rate": 6.931045927292948e-06, "loss": 0.3743, "step": 18645 }, { "epoch": 61.13442622950819, "grad_norm": 33.054656982421875, "learning_rate": 6.930035300827456e-06, "loss": 0.3577, "step": 18646 }, { "epoch": 61.13770491803279, "grad_norm": 6.849374294281006, "learning_rate": 6.929024708979011e-06, "loss": 0.2882, "step": 18647 }, { "epoch": 61.14098360655738, "grad_norm": 7.678847312927246, "learning_rate": 6.928014151759004e-06, "loss": 0.5371, "step": 18648 }, { "epoch": 61.144262295081965, "grad_norm": 4.895730495452881, "learning_rate": 6.9270036291788335e-06, "loss": 0.4262, "step": 18649 }, { "epoch": 61.14754098360656, "grad_norm": 4.9083709716796875, "learning_rate": 6.925993141249887e-06, "loss": 0.4831, "step": 18650 }, { "epoch": 61.15081967213115, "grad_norm": 8.445356369018555, "learning_rate": 6.924982687983569e-06, "loss": 0.4611, "step": 18651 }, { "epoch": 61.15409836065574, "grad_norm": 5.605813980102539, "learning_rate": 6.9239722693912684e-06, "loss": 0.3532, "step": 18652 }, { "epoch": 61.157377049180326, "grad_norm": 7.205173969268799, "learning_rate": 6.9229618854843785e-06, "loss": 0.3591, "step": 18653 }, { "epoch": 61.16065573770492, "grad_norm": 10.27800178527832, "learning_rate": 6.921951536274292e-06, "loss": 0.6248, "step": 18654 }, { "epoch": 61.16393442622951, "grad_norm": 7.21998405456543, "learning_rate": 6.9209412217724e-06, "loss": 0.3011, "step": 18655 }, { "epoch": 61.1672131147541, "grad_norm": 5.551111698150635, "learning_rate": 6.9199309419900984e-06, "loss": 0.6871, "step": 18656 }, { "epoch": 61.170491803278686, "grad_norm": 9.948816299438477, "learning_rate": 6.91892069693878e-06, "loss": 0.3457, "step": 18657 }, { "epoch": 61.17377049180328, "grad_norm": 6.916080474853516, "learning_rate": 6.917910486629833e-06, "loss": 0.5027, "step": 18658 }, { "epoch": 61.17704918032787, "grad_norm": 4.890048503875732, "learning_rate": 6.916900311074647e-06, "loss": 0.4205, "step": 18659 }, { "epoch": 61.18032786885246, "grad_norm": 6.727169036865234, "learning_rate": 6.915890170284619e-06, "loss": 0.4903, "step": 18660 }, { "epoch": 61.18360655737705, "grad_norm": 7.164816379547119, "learning_rate": 6.914880064271136e-06, "loss": 0.3475, "step": 18661 }, { "epoch": 61.18688524590164, "grad_norm": 8.574438095092773, "learning_rate": 6.913869993045586e-06, "loss": 0.5417, "step": 18662 }, { "epoch": 61.19016393442623, "grad_norm": 5.767229080200195, "learning_rate": 6.912859956619361e-06, "loss": 0.5256, "step": 18663 }, { "epoch": 61.19344262295082, "grad_norm": 6.870741367340088, "learning_rate": 6.911849955003851e-06, "loss": 0.5506, "step": 18664 }, { "epoch": 61.19672131147541, "grad_norm": 7.224279403686523, "learning_rate": 6.9108399882104426e-06, "loss": 0.5525, "step": 18665 }, { "epoch": 61.2, "grad_norm": 4.898059844970703, "learning_rate": 6.909830056250527e-06, "loss": 0.5932, "step": 18666 }, { "epoch": 61.20327868852459, "grad_norm": 7.217142581939697, "learning_rate": 6.9088201591354895e-06, "loss": 0.4889, "step": 18667 }, { "epoch": 61.20655737704918, "grad_norm": 8.629301071166992, "learning_rate": 6.907810296876716e-06, "loss": 0.4614, "step": 18668 }, { "epoch": 61.20983606557377, "grad_norm": 8.18376636505127, "learning_rate": 6.9068004694856e-06, "loss": 0.4112, "step": 18669 }, { "epoch": 61.21311475409836, "grad_norm": 7.875329971313477, "learning_rate": 6.905790676973525e-06, "loss": 0.4782, "step": 18670 }, { "epoch": 61.21639344262295, "grad_norm": 5.452026844024658, "learning_rate": 6.904780919351876e-06, "loss": 0.5622, "step": 18671 }, { "epoch": 61.21967213114754, "grad_norm": 7.543729782104492, "learning_rate": 6.903771196632039e-06, "loss": 0.4139, "step": 18672 }, { "epoch": 61.22295081967213, "grad_norm": 6.508970260620117, "learning_rate": 6.902761508825403e-06, "loss": 0.5543, "step": 18673 }, { "epoch": 61.226229508196724, "grad_norm": 5.139227390289307, "learning_rate": 6.9017518559433525e-06, "loss": 0.3757, "step": 18674 }, { "epoch": 61.22950819672131, "grad_norm": 6.494587421417236, "learning_rate": 6.90074223799727e-06, "loss": 0.3653, "step": 18675 }, { "epoch": 61.2327868852459, "grad_norm": 11.078182220458984, "learning_rate": 6.899732654998538e-06, "loss": 0.6629, "step": 18676 }, { "epoch": 61.23606557377049, "grad_norm": 5.4892144203186035, "learning_rate": 6.898723106958547e-06, "loss": 0.6827, "step": 18677 }, { "epoch": 61.239344262295084, "grad_norm": 7.344062805175781, "learning_rate": 6.897713593888678e-06, "loss": 0.578, "step": 18678 }, { "epoch": 61.24262295081967, "grad_norm": 12.27878189086914, "learning_rate": 6.896704115800314e-06, "loss": 0.5699, "step": 18679 }, { "epoch": 61.24590163934426, "grad_norm": 5.03525447845459, "learning_rate": 6.895694672704838e-06, "loss": 0.4438, "step": 18680 }, { "epoch": 61.24918032786885, "grad_norm": 8.836709976196289, "learning_rate": 6.894685264613627e-06, "loss": 0.5254, "step": 18681 }, { "epoch": 61.252459016393445, "grad_norm": 5.16627311706543, "learning_rate": 6.893675891538073e-06, "loss": 0.6667, "step": 18682 }, { "epoch": 61.25573770491803, "grad_norm": 7.187644958496094, "learning_rate": 6.892666553489553e-06, "loss": 0.383, "step": 18683 }, { "epoch": 61.25901639344262, "grad_norm": 6.591732501983643, "learning_rate": 6.891657250479449e-06, "loss": 0.5821, "step": 18684 }, { "epoch": 61.26229508196721, "grad_norm": 7.3801350593566895, "learning_rate": 6.890647982519137e-06, "loss": 0.4269, "step": 18685 }, { "epoch": 61.265573770491805, "grad_norm": 4.869991302490234, "learning_rate": 6.889638749620006e-06, "loss": 0.5851, "step": 18686 }, { "epoch": 61.268852459016394, "grad_norm": 8.863504409790039, "learning_rate": 6.8886295517934324e-06, "loss": 0.5232, "step": 18687 }, { "epoch": 61.27213114754098, "grad_norm": 34.21745681762695, "learning_rate": 6.887620389050793e-06, "loss": 0.4439, "step": 18688 }, { "epoch": 61.27540983606557, "grad_norm": 21.16828727722168, "learning_rate": 6.886611261403468e-06, "loss": 0.429, "step": 18689 }, { "epoch": 61.278688524590166, "grad_norm": 10.003475189208984, "learning_rate": 6.885602168862844e-06, "loss": 0.4342, "step": 18690 }, { "epoch": 61.281967213114754, "grad_norm": 7.957633972167969, "learning_rate": 6.88459311144029e-06, "loss": 0.4877, "step": 18691 }, { "epoch": 61.28524590163934, "grad_norm": 6.135322570800781, "learning_rate": 6.883584089147189e-06, "loss": 0.4424, "step": 18692 }, { "epoch": 61.28852459016394, "grad_norm": 4.770648956298828, "learning_rate": 6.882575101994918e-06, "loss": 0.4556, "step": 18693 }, { "epoch": 61.291803278688526, "grad_norm": 9.883115768432617, "learning_rate": 6.881566149994853e-06, "loss": 0.2187, "step": 18694 }, { "epoch": 61.295081967213115, "grad_norm": 5.381983757019043, "learning_rate": 6.8805572331583715e-06, "loss": 0.4206, "step": 18695 }, { "epoch": 61.2983606557377, "grad_norm": 8.42142391204834, "learning_rate": 6.879548351496852e-06, "loss": 0.6537, "step": 18696 }, { "epoch": 61.3016393442623, "grad_norm": 6.758634090423584, "learning_rate": 6.8785395050216696e-06, "loss": 0.4322, "step": 18697 }, { "epoch": 61.30491803278689, "grad_norm": 5.149003982543945, "learning_rate": 6.8775306937441964e-06, "loss": 0.5706, "step": 18698 }, { "epoch": 61.308196721311475, "grad_norm": 4.867870330810547, "learning_rate": 6.876521917675814e-06, "loss": 0.4808, "step": 18699 }, { "epoch": 61.31147540983606, "grad_norm": 7.161802768707275, "learning_rate": 6.875513176827895e-06, "loss": 0.7322, "step": 18700 }, { "epoch": 61.31475409836066, "grad_norm": 6.19511604309082, "learning_rate": 6.874504471211815e-06, "loss": 0.5604, "step": 18701 }, { "epoch": 61.31803278688525, "grad_norm": 4.836481094360352, "learning_rate": 6.873495800838941e-06, "loss": 0.5737, "step": 18702 }, { "epoch": 61.321311475409836, "grad_norm": 5.322042465209961, "learning_rate": 6.872487165720658e-06, "loss": 0.439, "step": 18703 }, { "epoch": 61.324590163934424, "grad_norm": 5.5308451652526855, "learning_rate": 6.871478565868333e-06, "loss": 0.4049, "step": 18704 }, { "epoch": 61.32786885245902, "grad_norm": 5.93025016784668, "learning_rate": 6.87047000129334e-06, "loss": 0.5231, "step": 18705 }, { "epoch": 61.33114754098361, "grad_norm": 4.586895942687988, "learning_rate": 6.869461472007052e-06, "loss": 0.4085, "step": 18706 }, { "epoch": 61.334426229508196, "grad_norm": 6.042543411254883, "learning_rate": 6.868452978020836e-06, "loss": 0.5191, "step": 18707 }, { "epoch": 61.337704918032784, "grad_norm": 32.56474304199219, "learning_rate": 6.867444519346074e-06, "loss": 0.4843, "step": 18708 }, { "epoch": 61.34098360655738, "grad_norm": 4.403741836547852, "learning_rate": 6.8664360959941314e-06, "loss": 0.3724, "step": 18709 }, { "epoch": 61.34426229508197, "grad_norm": 7.184848785400391, "learning_rate": 6.86542770797638e-06, "loss": 0.3443, "step": 18710 }, { "epoch": 61.34754098360656, "grad_norm": 5.353841781616211, "learning_rate": 6.864419355304186e-06, "loss": 0.5977, "step": 18711 }, { "epoch": 61.350819672131145, "grad_norm": 7.63667106628418, "learning_rate": 6.86341103798893e-06, "loss": 0.2886, "step": 18712 }, { "epoch": 61.35409836065574, "grad_norm": 5.306610584259033, "learning_rate": 6.862402756041973e-06, "loss": 0.3788, "step": 18713 }, { "epoch": 61.35737704918033, "grad_norm": 13.648733139038086, "learning_rate": 6.861394509474688e-06, "loss": 0.4776, "step": 18714 }, { "epoch": 61.36065573770492, "grad_norm": 7.136877059936523, "learning_rate": 6.860386298298442e-06, "loss": 0.6112, "step": 18715 }, { "epoch": 61.363934426229505, "grad_norm": 15.042476654052734, "learning_rate": 6.859378122524605e-06, "loss": 0.4889, "step": 18716 }, { "epoch": 61.3672131147541, "grad_norm": 8.245381355285645, "learning_rate": 6.8583699821645455e-06, "loss": 0.4953, "step": 18717 }, { "epoch": 61.37049180327869, "grad_norm": 4.270979404449463, "learning_rate": 6.85736187722963e-06, "loss": 0.3733, "step": 18718 }, { "epoch": 61.37377049180328, "grad_norm": 4.941294193267822, "learning_rate": 6.856353807731228e-06, "loss": 0.3249, "step": 18719 }, { "epoch": 61.377049180327866, "grad_norm": 6.679074287414551, "learning_rate": 6.855345773680705e-06, "loss": 0.4737, "step": 18720 }, { "epoch": 61.38032786885246, "grad_norm": 4.556170463562012, "learning_rate": 6.854337775089428e-06, "loss": 0.3914, "step": 18721 }, { "epoch": 61.38360655737705, "grad_norm": 6.83312463760376, "learning_rate": 6.853329811968764e-06, "loss": 0.7305, "step": 18722 }, { "epoch": 61.38688524590164, "grad_norm": 5.7344818115234375, "learning_rate": 6.852321884330078e-06, "loss": 0.1625, "step": 18723 }, { "epoch": 61.390163934426226, "grad_norm": 5.491135597229004, "learning_rate": 6.851313992184732e-06, "loss": 0.4894, "step": 18724 }, { "epoch": 61.39344262295082, "grad_norm": 6.738638877868652, "learning_rate": 6.8503061355440994e-06, "loss": 0.576, "step": 18725 }, { "epoch": 61.39672131147541, "grad_norm": 5.18238639831543, "learning_rate": 6.849298314419539e-06, "loss": 0.5556, "step": 18726 }, { "epoch": 61.4, "grad_norm": 5.583690166473389, "learning_rate": 6.848290528822417e-06, "loss": 0.5427, "step": 18727 }, { "epoch": 61.40327868852459, "grad_norm": 7.391262531280518, "learning_rate": 6.847282778764095e-06, "loss": 0.3443, "step": 18728 }, { "epoch": 61.40655737704918, "grad_norm": 5.842179775238037, "learning_rate": 6.846275064255934e-06, "loss": 0.3213, "step": 18729 }, { "epoch": 61.40983606557377, "grad_norm": 5.168333530426025, "learning_rate": 6.845267385309304e-06, "loss": 0.2887, "step": 18730 }, { "epoch": 61.41311475409836, "grad_norm": 5.723628997802734, "learning_rate": 6.844259741935566e-06, "loss": 0.4252, "step": 18731 }, { "epoch": 61.41639344262295, "grad_norm": 20.403074264526367, "learning_rate": 6.843252134146079e-06, "loss": 0.567, "step": 18732 }, { "epoch": 61.41967213114754, "grad_norm": 46.516151428222656, "learning_rate": 6.842244561952203e-06, "loss": 0.6292, "step": 18733 }, { "epoch": 61.42295081967213, "grad_norm": 5.8936638832092285, "learning_rate": 6.841237025365308e-06, "loss": 0.6415, "step": 18734 }, { "epoch": 61.42622950819672, "grad_norm": 6.011394023895264, "learning_rate": 6.840229524396749e-06, "loss": 0.3547, "step": 18735 }, { "epoch": 61.429508196721315, "grad_norm": 25.0404109954834, "learning_rate": 6.839222059057886e-06, "loss": 0.7086, "step": 18736 }, { "epoch": 61.4327868852459, "grad_norm": 5.875582218170166, "learning_rate": 6.838214629360078e-06, "loss": 0.3747, "step": 18737 }, { "epoch": 61.43606557377049, "grad_norm": 6.131124019622803, "learning_rate": 6.837207235314691e-06, "loss": 0.4696, "step": 18738 }, { "epoch": 61.43934426229508, "grad_norm": 5.681937217712402, "learning_rate": 6.836199876933082e-06, "loss": 0.4632, "step": 18739 }, { "epoch": 61.442622950819676, "grad_norm": 4.578794479370117, "learning_rate": 6.8351925542266085e-06, "loss": 0.4763, "step": 18740 }, { "epoch": 61.445901639344264, "grad_norm": 5.733302116394043, "learning_rate": 6.834185267206627e-06, "loss": 0.6912, "step": 18741 }, { "epoch": 61.44918032786885, "grad_norm": 6.790212631225586, "learning_rate": 6.8331780158844985e-06, "loss": 0.5016, "step": 18742 }, { "epoch": 61.45245901639344, "grad_norm": 8.122361183166504, "learning_rate": 6.832170800271583e-06, "loss": 0.4504, "step": 18743 }, { "epoch": 61.455737704918036, "grad_norm": 5.7383270263671875, "learning_rate": 6.8311636203792316e-06, "loss": 0.381, "step": 18744 }, { "epoch": 61.459016393442624, "grad_norm": 6.194742679595947, "learning_rate": 6.8301564762188084e-06, "loss": 0.7248, "step": 18745 }, { "epoch": 61.46229508196721, "grad_norm": 7.053046226501465, "learning_rate": 6.8291493678016654e-06, "loss": 0.4265, "step": 18746 }, { "epoch": 61.4655737704918, "grad_norm": 5.396124839782715, "learning_rate": 6.82814229513916e-06, "loss": 0.5171, "step": 18747 }, { "epoch": 61.4688524590164, "grad_norm": 6.1119608879089355, "learning_rate": 6.827135258242649e-06, "loss": 0.5561, "step": 18748 }, { "epoch": 61.472131147540985, "grad_norm": 6.9644575119018555, "learning_rate": 6.826128257123487e-06, "loss": 0.4817, "step": 18749 }, { "epoch": 61.47540983606557, "grad_norm": 9.16081428527832, "learning_rate": 6.8251212917930265e-06, "loss": 0.6518, "step": 18750 }, { "epoch": 61.47868852459016, "grad_norm": 6.499769687652588, "learning_rate": 6.824114362262626e-06, "loss": 0.3855, "step": 18751 }, { "epoch": 61.48196721311476, "grad_norm": 6.057611465454102, "learning_rate": 6.8231074685436405e-06, "loss": 0.3794, "step": 18752 }, { "epoch": 61.485245901639345, "grad_norm": 7.6844162940979, "learning_rate": 6.822100610647421e-06, "loss": 0.5497, "step": 18753 }, { "epoch": 61.488524590163934, "grad_norm": 5.114741325378418, "learning_rate": 6.82109378858532e-06, "loss": 0.6401, "step": 18754 }, { "epoch": 61.49180327868852, "grad_norm": 7.193020820617676, "learning_rate": 6.820087002368689e-06, "loss": 0.6032, "step": 18755 }, { "epoch": 61.49508196721312, "grad_norm": 5.6750640869140625, "learning_rate": 6.8190802520088875e-06, "loss": 0.4528, "step": 18756 }, { "epoch": 61.498360655737706, "grad_norm": 6.545973777770996, "learning_rate": 6.818073537517263e-06, "loss": 0.4521, "step": 18757 }, { "epoch": 61.501639344262294, "grad_norm": 5.606899261474609, "learning_rate": 6.817066858905167e-06, "loss": 0.5297, "step": 18758 }, { "epoch": 61.50491803278688, "grad_norm": 36.321876525878906, "learning_rate": 6.816060216183949e-06, "loss": 0.5196, "step": 18759 }, { "epoch": 61.50819672131148, "grad_norm": 6.000838756561279, "learning_rate": 6.815053609364965e-06, "loss": 0.512, "step": 18760 }, { "epoch": 61.511475409836066, "grad_norm": 5.092351913452148, "learning_rate": 6.814047038459564e-06, "loss": 0.3661, "step": 18761 }, { "epoch": 61.514754098360655, "grad_norm": 6.401322364807129, "learning_rate": 6.8130405034790944e-06, "loss": 0.4102, "step": 18762 }, { "epoch": 61.51803278688524, "grad_norm": 8.902664184570312, "learning_rate": 6.812034004434904e-06, "loss": 0.4033, "step": 18763 }, { "epoch": 61.52131147540984, "grad_norm": 5.787964344024658, "learning_rate": 6.8110275413383485e-06, "loss": 0.305, "step": 18764 }, { "epoch": 61.52459016393443, "grad_norm": 6.128907680511475, "learning_rate": 6.810021114200773e-06, "loss": 0.3939, "step": 18765 }, { "epoch": 61.527868852459015, "grad_norm": 5.169722080230713, "learning_rate": 6.809014723033526e-06, "loss": 0.447, "step": 18766 }, { "epoch": 61.5311475409836, "grad_norm": 8.122986793518066, "learning_rate": 6.808008367847954e-06, "loss": 0.5337, "step": 18767 }, { "epoch": 61.5344262295082, "grad_norm": 5.739190101623535, "learning_rate": 6.807002048655405e-06, "loss": 0.5621, "step": 18768 }, { "epoch": 61.53770491803279, "grad_norm": 7.057145118713379, "learning_rate": 6.805995765467233e-06, "loss": 0.4453, "step": 18769 }, { "epoch": 61.540983606557376, "grad_norm": 6.49885892868042, "learning_rate": 6.804989518294775e-06, "loss": 0.6573, "step": 18770 }, { "epoch": 61.544262295081964, "grad_norm": 9.694037437438965, "learning_rate": 6.8039833071493845e-06, "loss": 0.4353, "step": 18771 }, { "epoch": 61.54754098360656, "grad_norm": 8.218618392944336, "learning_rate": 6.802977132042403e-06, "loss": 0.3855, "step": 18772 }, { "epoch": 61.55081967213115, "grad_norm": 6.89317512512207, "learning_rate": 6.801970992985181e-06, "loss": 0.4806, "step": 18773 }, { "epoch": 61.554098360655736, "grad_norm": 6.851690769195557, "learning_rate": 6.8009648899890615e-06, "loss": 0.5304, "step": 18774 }, { "epoch": 61.557377049180324, "grad_norm": 5.359865665435791, "learning_rate": 6.799958823065388e-06, "loss": 0.5961, "step": 18775 }, { "epoch": 61.56065573770492, "grad_norm": 6.112198352813721, "learning_rate": 6.798952792225504e-06, "loss": 0.5364, "step": 18776 }, { "epoch": 61.56393442622951, "grad_norm": 6.618502140045166, "learning_rate": 6.797946797480756e-06, "loss": 0.511, "step": 18777 }, { "epoch": 61.5672131147541, "grad_norm": 10.770336151123047, "learning_rate": 6.79694083884249e-06, "loss": 0.3968, "step": 18778 }, { "epoch": 61.570491803278685, "grad_norm": 7.7763895988464355, "learning_rate": 6.795934916322047e-06, "loss": 0.5719, "step": 18779 }, { "epoch": 61.57377049180328, "grad_norm": 6.3418474197387695, "learning_rate": 6.794929029930768e-06, "loss": 0.592, "step": 18780 }, { "epoch": 61.57704918032787, "grad_norm": 5.483340740203857, "learning_rate": 6.793923179679992e-06, "loss": 0.5054, "step": 18781 }, { "epoch": 61.58032786885246, "grad_norm": 5.846269130706787, "learning_rate": 6.792917365581071e-06, "loss": 0.4554, "step": 18782 }, { "epoch": 61.58360655737705, "grad_norm": 5.230118751525879, "learning_rate": 6.79191158764534e-06, "loss": 0.3065, "step": 18783 }, { "epoch": 61.58688524590164, "grad_norm": 5.479377746582031, "learning_rate": 6.790905845884142e-06, "loss": 0.3814, "step": 18784 }, { "epoch": 61.59016393442623, "grad_norm": 8.761768341064453, "learning_rate": 6.789900140308814e-06, "loss": 0.4388, "step": 18785 }, { "epoch": 61.59344262295082, "grad_norm": 5.437146186828613, "learning_rate": 6.788894470930703e-06, "loss": 0.6056, "step": 18786 }, { "epoch": 61.59672131147541, "grad_norm": 6.563270092010498, "learning_rate": 6.787888837761145e-06, "loss": 0.7395, "step": 18787 }, { "epoch": 61.6, "grad_norm": 8.37049674987793, "learning_rate": 6.786883240811479e-06, "loss": 0.5188, "step": 18788 }, { "epoch": 61.60327868852459, "grad_norm": 6.00824499130249, "learning_rate": 6.785877680093044e-06, "loss": 0.6687, "step": 18789 }, { "epoch": 61.60655737704918, "grad_norm": 7.035928726196289, "learning_rate": 6.784872155617181e-06, "loss": 0.4049, "step": 18790 }, { "epoch": 61.609836065573774, "grad_norm": 9.509645462036133, "learning_rate": 6.7838666673952305e-06, "loss": 0.4694, "step": 18791 }, { "epoch": 61.61311475409836, "grad_norm": 5.791297435760498, "learning_rate": 6.782861215438525e-06, "loss": 0.4972, "step": 18792 }, { "epoch": 61.61639344262295, "grad_norm": 5.314696311950684, "learning_rate": 6.781855799758403e-06, "loss": 0.6226, "step": 18793 }, { "epoch": 61.61967213114754, "grad_norm": 10.11699104309082, "learning_rate": 6.780850420366204e-06, "loss": 0.6696, "step": 18794 }, { "epoch": 61.622950819672134, "grad_norm": 5.447444915771484, "learning_rate": 6.779845077273264e-06, "loss": 0.3375, "step": 18795 }, { "epoch": 61.62622950819672, "grad_norm": 6.143112659454346, "learning_rate": 6.778839770490919e-06, "loss": 0.4953, "step": 18796 }, { "epoch": 61.62950819672131, "grad_norm": 6.977324485778809, "learning_rate": 6.777834500030505e-06, "loss": 0.6091, "step": 18797 }, { "epoch": 61.6327868852459, "grad_norm": 4.954934597015381, "learning_rate": 6.776829265903355e-06, "loss": 0.501, "step": 18798 }, { "epoch": 61.636065573770495, "grad_norm": 5.469021320343018, "learning_rate": 6.775824068120809e-06, "loss": 0.273, "step": 18799 }, { "epoch": 61.63934426229508, "grad_norm": 7.744697093963623, "learning_rate": 6.774818906694196e-06, "loss": 0.621, "step": 18800 }, { "epoch": 61.64262295081967, "grad_norm": 6.800828456878662, "learning_rate": 6.773813781634856e-06, "loss": 0.5551, "step": 18801 }, { "epoch": 61.64590163934426, "grad_norm": 7.65367317199707, "learning_rate": 6.772808692954118e-06, "loss": 0.3398, "step": 18802 }, { "epoch": 61.649180327868855, "grad_norm": 5.646847248077393, "learning_rate": 6.771803640663317e-06, "loss": 0.5981, "step": 18803 }, { "epoch": 61.65245901639344, "grad_norm": 5.155401229858398, "learning_rate": 6.770798624773789e-06, "loss": 0.5668, "step": 18804 }, { "epoch": 61.65573770491803, "grad_norm": 7.9650373458862305, "learning_rate": 6.769793645296863e-06, "loss": 0.3402, "step": 18805 }, { "epoch": 61.65901639344262, "grad_norm": 5.098432540893555, "learning_rate": 6.768788702243874e-06, "loss": 0.3307, "step": 18806 }, { "epoch": 61.662295081967216, "grad_norm": 6.133101940155029, "learning_rate": 6.767783795626146e-06, "loss": 0.5522, "step": 18807 }, { "epoch": 61.665573770491804, "grad_norm": 9.845747947692871, "learning_rate": 6.766778925455022e-06, "loss": 0.2528, "step": 18808 }, { "epoch": 61.66885245901639, "grad_norm": 4.96187162399292, "learning_rate": 6.765774091741826e-06, "loss": 0.2455, "step": 18809 }, { "epoch": 61.67213114754098, "grad_norm": 5.91251802444458, "learning_rate": 6.764769294497891e-06, "loss": 0.4452, "step": 18810 }, { "epoch": 61.675409836065576, "grad_norm": 6.178462982177734, "learning_rate": 6.763764533734541e-06, "loss": 0.7148, "step": 18811 }, { "epoch": 61.678688524590164, "grad_norm": 6.418684959411621, "learning_rate": 6.762759809463115e-06, "loss": 0.4864, "step": 18812 }, { "epoch": 61.68196721311475, "grad_norm": 9.258338928222656, "learning_rate": 6.761755121694939e-06, "loss": 0.4777, "step": 18813 }, { "epoch": 61.68524590163934, "grad_norm": 6.390157699584961, "learning_rate": 6.7607504704413396e-06, "loss": 0.4349, "step": 18814 }, { "epoch": 61.68852459016394, "grad_norm": 5.979433536529541, "learning_rate": 6.759745855713645e-06, "loss": 0.4178, "step": 18815 }, { "epoch": 61.691803278688525, "grad_norm": 5.543521404266357, "learning_rate": 6.758741277523187e-06, "loss": 0.763, "step": 18816 }, { "epoch": 61.69508196721311, "grad_norm": 8.523181915283203, "learning_rate": 6.757736735881291e-06, "loss": 0.5693, "step": 18817 }, { "epoch": 61.6983606557377, "grad_norm": 8.576324462890625, "learning_rate": 6.7567322307992834e-06, "loss": 0.8635, "step": 18818 }, { "epoch": 61.7016393442623, "grad_norm": 13.197797775268555, "learning_rate": 6.7557277622884944e-06, "loss": 0.4669, "step": 18819 }, { "epoch": 61.704918032786885, "grad_norm": 5.230228424072266, "learning_rate": 6.754723330360242e-06, "loss": 0.638, "step": 18820 }, { "epoch": 61.708196721311474, "grad_norm": 5.920011520385742, "learning_rate": 6.753718935025863e-06, "loss": 0.6624, "step": 18821 }, { "epoch": 61.71147540983607, "grad_norm": 5.648591995239258, "learning_rate": 6.752714576296679e-06, "loss": 0.6222, "step": 18822 }, { "epoch": 61.71475409836066, "grad_norm": 9.239394187927246, "learning_rate": 6.751710254184012e-06, "loss": 0.3635, "step": 18823 }, { "epoch": 61.718032786885246, "grad_norm": 5.194358825683594, "learning_rate": 6.750705968699189e-06, "loss": 0.6492, "step": 18824 }, { "epoch": 61.721311475409834, "grad_norm": 7.683828353881836, "learning_rate": 6.749701719853539e-06, "loss": 0.7744, "step": 18825 }, { "epoch": 61.72459016393443, "grad_norm": 14.136263847351074, "learning_rate": 6.748697507658377e-06, "loss": 0.6266, "step": 18826 }, { "epoch": 61.72786885245902, "grad_norm": 6.152515411376953, "learning_rate": 6.747693332125034e-06, "loss": 0.5471, "step": 18827 }, { "epoch": 61.731147540983606, "grad_norm": 5.329272747039795, "learning_rate": 6.746689193264827e-06, "loss": 0.336, "step": 18828 }, { "epoch": 61.734426229508195, "grad_norm": 7.127070426940918, "learning_rate": 6.745685091089082e-06, "loss": 0.5158, "step": 18829 }, { "epoch": 61.73770491803279, "grad_norm": 16.598344802856445, "learning_rate": 6.744681025609125e-06, "loss": 0.4334, "step": 18830 }, { "epoch": 61.74098360655738, "grad_norm": 5.3717474937438965, "learning_rate": 6.7436769968362726e-06, "loss": 0.4573, "step": 18831 }, { "epoch": 61.74426229508197, "grad_norm": 4.972967147827148, "learning_rate": 6.742673004781849e-06, "loss": 0.4991, "step": 18832 }, { "epoch": 61.747540983606555, "grad_norm": 5.669678211212158, "learning_rate": 6.741669049457169e-06, "loss": 0.2389, "step": 18833 }, { "epoch": 61.75081967213115, "grad_norm": 5.553776264190674, "learning_rate": 6.740665130873563e-06, "loss": 0.5126, "step": 18834 }, { "epoch": 61.75409836065574, "grad_norm": 5.536866664886475, "learning_rate": 6.739661249042346e-06, "loss": 0.6585, "step": 18835 }, { "epoch": 61.75737704918033, "grad_norm": 5.317737102508545, "learning_rate": 6.7386574039748385e-06, "loss": 0.2594, "step": 18836 }, { "epoch": 61.760655737704916, "grad_norm": 5.888546466827393, "learning_rate": 6.737653595682356e-06, "loss": 0.3413, "step": 18837 }, { "epoch": 61.76393442622951, "grad_norm": 5.861685276031494, "learning_rate": 6.736649824176225e-06, "loss": 0.7407, "step": 18838 }, { "epoch": 61.7672131147541, "grad_norm": 8.534449577331543, "learning_rate": 6.73564608946776e-06, "loss": 0.4436, "step": 18839 }, { "epoch": 61.77049180327869, "grad_norm": 4.962850570678711, "learning_rate": 6.73464239156828e-06, "loss": 0.2236, "step": 18840 }, { "epoch": 61.773770491803276, "grad_norm": 5.054999351501465, "learning_rate": 6.733638730489098e-06, "loss": 0.3123, "step": 18841 }, { "epoch": 61.77704918032787, "grad_norm": 5.772908687591553, "learning_rate": 6.732635106241539e-06, "loss": 0.3724, "step": 18842 }, { "epoch": 61.78032786885246, "grad_norm": 5.126317024230957, "learning_rate": 6.731631518836916e-06, "loss": 0.4839, "step": 18843 }, { "epoch": 61.78360655737705, "grad_norm": 5.604670524597168, "learning_rate": 6.730627968286547e-06, "loss": 0.4542, "step": 18844 }, { "epoch": 61.78688524590164, "grad_norm": 10.924942970275879, "learning_rate": 6.7296244546017465e-06, "loss": 0.3766, "step": 18845 }, { "epoch": 61.79016393442623, "grad_norm": 4.777414798736572, "learning_rate": 6.728620977793827e-06, "loss": 0.4583, "step": 18846 }, { "epoch": 61.79344262295082, "grad_norm": 4.715536117553711, "learning_rate": 6.72761753787411e-06, "loss": 0.5284, "step": 18847 }, { "epoch": 61.79672131147541, "grad_norm": 5.03481912612915, "learning_rate": 6.726614134853909e-06, "loss": 0.4858, "step": 18848 }, { "epoch": 61.8, "grad_norm": 5.961306571960449, "learning_rate": 6.725610768744535e-06, "loss": 0.5231, "step": 18849 }, { "epoch": 61.80327868852459, "grad_norm": 4.970012187957764, "learning_rate": 6.724607439557303e-06, "loss": 0.3698, "step": 18850 }, { "epoch": 61.80655737704918, "grad_norm": 6.252862930297852, "learning_rate": 6.723604147303531e-06, "loss": 0.5451, "step": 18851 }, { "epoch": 61.80983606557377, "grad_norm": 4.9751129150390625, "learning_rate": 6.7226008919945266e-06, "loss": 0.366, "step": 18852 }, { "epoch": 61.81311475409836, "grad_norm": 5.6317243576049805, "learning_rate": 6.721597673641607e-06, "loss": 0.4467, "step": 18853 }, { "epoch": 61.81639344262295, "grad_norm": 5.5281500816345215, "learning_rate": 6.720594492256079e-06, "loss": 0.371, "step": 18854 }, { "epoch": 61.81967213114754, "grad_norm": 4.950811386108398, "learning_rate": 6.719591347849259e-06, "loss": 0.4583, "step": 18855 }, { "epoch": 61.82295081967213, "grad_norm": 5.123846530914307, "learning_rate": 6.718588240432459e-06, "loss": 0.338, "step": 18856 }, { "epoch": 61.82622950819672, "grad_norm": 5.39591121673584, "learning_rate": 6.717585170016988e-06, "loss": 0.8081, "step": 18857 }, { "epoch": 61.829508196721314, "grad_norm": 7.184122562408447, "learning_rate": 6.716582136614158e-06, "loss": 0.631, "step": 18858 }, { "epoch": 61.8327868852459, "grad_norm": 6.670955657958984, "learning_rate": 6.715579140235273e-06, "loss": 0.5553, "step": 18859 }, { "epoch": 61.83606557377049, "grad_norm": 4.7343854904174805, "learning_rate": 6.714576180891653e-06, "loss": 0.4836, "step": 18860 }, { "epoch": 61.83934426229508, "grad_norm": 5.555050373077393, "learning_rate": 6.713573258594603e-06, "loss": 0.5778, "step": 18861 }, { "epoch": 61.842622950819674, "grad_norm": 5.630875110626221, "learning_rate": 6.712570373355432e-06, "loss": 0.504, "step": 18862 }, { "epoch": 61.84590163934426, "grad_norm": 6.451164245605469, "learning_rate": 6.711567525185442e-06, "loss": 0.435, "step": 18863 }, { "epoch": 61.84918032786885, "grad_norm": 4.682004928588867, "learning_rate": 6.710564714095952e-06, "loss": 0.615, "step": 18864 }, { "epoch": 61.85245901639344, "grad_norm": 6.064910888671875, "learning_rate": 6.709561940098266e-06, "loss": 0.3358, "step": 18865 }, { "epoch": 61.855737704918035, "grad_norm": 5.845155239105225, "learning_rate": 6.7085592032036885e-06, "loss": 0.3059, "step": 18866 }, { "epoch": 61.85901639344262, "grad_norm": 5.510107040405273, "learning_rate": 6.707556503423526e-06, "loss": 0.5747, "step": 18867 }, { "epoch": 61.86229508196721, "grad_norm": 5.412928104400635, "learning_rate": 6.70655384076909e-06, "loss": 0.5247, "step": 18868 }, { "epoch": 61.86557377049181, "grad_norm": 5.945957183837891, "learning_rate": 6.705551215251684e-06, "loss": 0.6809, "step": 18869 }, { "epoch": 61.868852459016395, "grad_norm": 5.815785884857178, "learning_rate": 6.704548626882614e-06, "loss": 0.5715, "step": 18870 }, { "epoch": 61.87213114754098, "grad_norm": 4.641343116760254, "learning_rate": 6.703546075673185e-06, "loss": 0.3811, "step": 18871 }, { "epoch": 61.87540983606557, "grad_norm": 5.3006181716918945, "learning_rate": 6.702543561634697e-06, "loss": 0.6223, "step": 18872 }, { "epoch": 61.87868852459017, "grad_norm": 4.9853668212890625, "learning_rate": 6.701541084778463e-06, "loss": 0.3782, "step": 18873 }, { "epoch": 61.881967213114756, "grad_norm": 4.797996997833252, "learning_rate": 6.700538645115782e-06, "loss": 0.4984, "step": 18874 }, { "epoch": 61.885245901639344, "grad_norm": 6.771423816680908, "learning_rate": 6.6995362426579565e-06, "loss": 0.5259, "step": 18875 }, { "epoch": 61.88852459016393, "grad_norm": 44.303619384765625, "learning_rate": 6.698533877416292e-06, "loss": 0.335, "step": 18876 }, { "epoch": 61.89180327868853, "grad_norm": 6.191349506378174, "learning_rate": 6.697531549402093e-06, "loss": 0.3852, "step": 18877 }, { "epoch": 61.895081967213116, "grad_norm": 5.742753982543945, "learning_rate": 6.696529258626658e-06, "loss": 0.5743, "step": 18878 }, { "epoch": 61.898360655737704, "grad_norm": 4.730785846710205, "learning_rate": 6.69552700510129e-06, "loss": 0.2304, "step": 18879 }, { "epoch": 61.90163934426229, "grad_norm": 5.971762180328369, "learning_rate": 6.694524788837291e-06, "loss": 0.5146, "step": 18880 }, { "epoch": 61.90491803278689, "grad_norm": 7.739920139312744, "learning_rate": 6.693522609845963e-06, "loss": 0.6274, "step": 18881 }, { "epoch": 61.90819672131148, "grad_norm": 6.65142297744751, "learning_rate": 6.692520468138603e-06, "loss": 0.5051, "step": 18882 }, { "epoch": 61.911475409836065, "grad_norm": 6.62681770324707, "learning_rate": 6.691518363726517e-06, "loss": 0.4383, "step": 18883 }, { "epoch": 61.91475409836065, "grad_norm": 6.9371442794799805, "learning_rate": 6.690516296621e-06, "loss": 0.4842, "step": 18884 }, { "epoch": 61.91803278688525, "grad_norm": 4.52512788772583, "learning_rate": 6.689514266833349e-06, "loss": 0.4027, "step": 18885 }, { "epoch": 61.92131147540984, "grad_norm": 5.1234893798828125, "learning_rate": 6.688512274374871e-06, "loss": 0.3911, "step": 18886 }, { "epoch": 61.924590163934425, "grad_norm": 8.88949966430664, "learning_rate": 6.68751031925686e-06, "loss": 0.316, "step": 18887 }, { "epoch": 61.927868852459014, "grad_norm": 6.944571018218994, "learning_rate": 6.686508401490615e-06, "loss": 0.4229, "step": 18888 }, { "epoch": 61.93114754098361, "grad_norm": 5.322226524353027, "learning_rate": 6.685506521087428e-06, "loss": 0.3506, "step": 18889 }, { "epoch": 61.9344262295082, "grad_norm": 8.198110580444336, "learning_rate": 6.684504678058606e-06, "loss": 0.5501, "step": 18890 }, { "epoch": 61.937704918032786, "grad_norm": 7.20501184463501, "learning_rate": 6.683502872415441e-06, "loss": 0.461, "step": 18891 }, { "epoch": 61.940983606557374, "grad_norm": 5.565701484680176, "learning_rate": 6.682501104169228e-06, "loss": 0.6541, "step": 18892 }, { "epoch": 61.94426229508197, "grad_norm": 5.570834636688232, "learning_rate": 6.6814993733312615e-06, "loss": 0.4836, "step": 18893 }, { "epoch": 61.94754098360656, "grad_norm": 5.448910236358643, "learning_rate": 6.680497679912844e-06, "loss": 0.3207, "step": 18894 }, { "epoch": 61.950819672131146, "grad_norm": 6.857260227203369, "learning_rate": 6.679496023925267e-06, "loss": 0.6071, "step": 18895 }, { "epoch": 61.954098360655735, "grad_norm": 5.647094249725342, "learning_rate": 6.6784944053798236e-06, "loss": 0.4325, "step": 18896 }, { "epoch": 61.95737704918033, "grad_norm": 5.91330623626709, "learning_rate": 6.67749282428781e-06, "loss": 0.3772, "step": 18897 }, { "epoch": 61.96065573770492, "grad_norm": 5.172360897064209, "learning_rate": 6.6764912806605155e-06, "loss": 0.4045, "step": 18898 }, { "epoch": 61.96393442622951, "grad_norm": 9.7217435836792, "learning_rate": 6.675489774509241e-06, "loss": 0.5499, "step": 18899 }, { "epoch": 61.967213114754095, "grad_norm": 6.432253360748291, "learning_rate": 6.674488305845276e-06, "loss": 0.458, "step": 18900 }, { "epoch": 61.97049180327869, "grad_norm": 5.3850274085998535, "learning_rate": 6.673486874679913e-06, "loss": 0.4165, "step": 18901 }, { "epoch": 61.97377049180328, "grad_norm": 4.5986528396606445, "learning_rate": 6.67248548102444e-06, "loss": 0.4047, "step": 18902 }, { "epoch": 61.97704918032787, "grad_norm": 5.559268951416016, "learning_rate": 6.671484124890157e-06, "loss": 0.4815, "step": 18903 }, { "epoch": 61.980327868852456, "grad_norm": 6.018940448760986, "learning_rate": 6.670482806288352e-06, "loss": 0.5562, "step": 18904 }, { "epoch": 61.98360655737705, "grad_norm": 6.516745090484619, "learning_rate": 6.6694815252303125e-06, "loss": 0.3458, "step": 18905 }, { "epoch": 61.98688524590164, "grad_norm": 12.195235252380371, "learning_rate": 6.668480281727332e-06, "loss": 0.728, "step": 18906 }, { "epoch": 61.99016393442623, "grad_norm": 5.420645236968994, "learning_rate": 6.6674790757907016e-06, "loss": 0.6293, "step": 18907 }, { "epoch": 61.993442622950816, "grad_norm": 5.244666576385498, "learning_rate": 6.666477907431707e-06, "loss": 0.5361, "step": 18908 }, { "epoch": 61.99672131147541, "grad_norm": 6.5230326652526855, "learning_rate": 6.665476776661643e-06, "loss": 0.3712, "step": 18909 }, { "epoch": 62.0, "grad_norm": 5.192956447601318, "learning_rate": 6.664475683491797e-06, "loss": 0.4265, "step": 18910 }, { "epoch": 62.00327868852459, "grad_norm": 5.017729759216309, "learning_rate": 6.663474627933449e-06, "loss": 0.3446, "step": 18911 }, { "epoch": 62.006557377049184, "grad_norm": 4.69301176071167, "learning_rate": 6.662473609997899e-06, "loss": 0.3176, "step": 18912 }, { "epoch": 62.00983606557377, "grad_norm": 6.320192813873291, "learning_rate": 6.661472629696429e-06, "loss": 0.5807, "step": 18913 }, { "epoch": 62.01311475409836, "grad_norm": 30.620532989501953, "learning_rate": 6.660471687040327e-06, "loss": 0.4112, "step": 18914 }, { "epoch": 62.01639344262295, "grad_norm": 5.694930553436279, "learning_rate": 6.659470782040873e-06, "loss": 0.4307, "step": 18915 }, { "epoch": 62.019672131147544, "grad_norm": 5.0534563064575195, "learning_rate": 6.658469914709365e-06, "loss": 0.4054, "step": 18916 }, { "epoch": 62.02295081967213, "grad_norm": 9.363202095031738, "learning_rate": 6.657469085057083e-06, "loss": 0.472, "step": 18917 }, { "epoch": 62.02622950819672, "grad_norm": 15.193269729614258, "learning_rate": 6.656468293095313e-06, "loss": 0.4861, "step": 18918 }, { "epoch": 62.02950819672131, "grad_norm": 6.492856979370117, "learning_rate": 6.655467538835334e-06, "loss": 0.4738, "step": 18919 }, { "epoch": 62.032786885245905, "grad_norm": 5.7305684089660645, "learning_rate": 6.654466822288439e-06, "loss": 0.3549, "step": 18920 }, { "epoch": 62.03606557377049, "grad_norm": 14.155501365661621, "learning_rate": 6.653466143465911e-06, "loss": 0.334, "step": 18921 }, { "epoch": 62.03934426229508, "grad_norm": 8.516512870788574, "learning_rate": 6.652465502379031e-06, "loss": 0.4224, "step": 18922 }, { "epoch": 62.04262295081967, "grad_norm": 7.007702350616455, "learning_rate": 6.651464899039084e-06, "loss": 0.4023, "step": 18923 }, { "epoch": 62.045901639344265, "grad_norm": 5.511355876922607, "learning_rate": 6.650464333457347e-06, "loss": 0.4369, "step": 18924 }, { "epoch": 62.049180327868854, "grad_norm": 8.037418365478516, "learning_rate": 6.649463805645111e-06, "loss": 0.4383, "step": 18925 }, { "epoch": 62.05245901639344, "grad_norm": 11.63637924194336, "learning_rate": 6.648463315613653e-06, "loss": 0.5025, "step": 18926 }, { "epoch": 62.05573770491803, "grad_norm": 5.3513665199279785, "learning_rate": 6.647462863374259e-06, "loss": 0.4525, "step": 18927 }, { "epoch": 62.059016393442626, "grad_norm": 5.425698280334473, "learning_rate": 6.646462448938202e-06, "loss": 0.6515, "step": 18928 }, { "epoch": 62.062295081967214, "grad_norm": 6.521395206451416, "learning_rate": 6.645462072316771e-06, "loss": 0.4027, "step": 18929 }, { "epoch": 62.0655737704918, "grad_norm": 7.986061096191406, "learning_rate": 6.6444617335212426e-06, "loss": 0.3997, "step": 18930 }, { "epoch": 62.06885245901639, "grad_norm": 9.749676704406738, "learning_rate": 6.643461432562894e-06, "loss": 0.3154, "step": 18931 }, { "epoch": 62.072131147540986, "grad_norm": 4.574517726898193, "learning_rate": 6.64246116945301e-06, "loss": 0.522, "step": 18932 }, { "epoch": 62.075409836065575, "grad_norm": 5.75759220123291, "learning_rate": 6.641460944202869e-06, "loss": 0.8813, "step": 18933 }, { "epoch": 62.07868852459016, "grad_norm": 5.349134922027588, "learning_rate": 6.640460756823745e-06, "loss": 0.3396, "step": 18934 }, { "epoch": 62.08196721311475, "grad_norm": 53.57859802246094, "learning_rate": 6.639460607326922e-06, "loss": 0.4001, "step": 18935 }, { "epoch": 62.08524590163935, "grad_norm": 5.7804694175720215, "learning_rate": 6.638460495723673e-06, "loss": 0.3656, "step": 18936 }, { "epoch": 62.088524590163935, "grad_norm": 6.881038665771484, "learning_rate": 6.637460422025275e-06, "loss": 0.5038, "step": 18937 }, { "epoch": 62.09180327868852, "grad_norm": 5.617045879364014, "learning_rate": 6.636460386243009e-06, "loss": 0.5011, "step": 18938 }, { "epoch": 62.09508196721311, "grad_norm": 8.691168785095215, "learning_rate": 6.6354603883881504e-06, "loss": 0.5298, "step": 18939 }, { "epoch": 62.09836065573771, "grad_norm": 7.406665802001953, "learning_rate": 6.634460428471975e-06, "loss": 0.5897, "step": 18940 }, { "epoch": 62.101639344262296, "grad_norm": 5.192136764526367, "learning_rate": 6.633460506505752e-06, "loss": 0.6266, "step": 18941 }, { "epoch": 62.104918032786884, "grad_norm": 7.417356967926025, "learning_rate": 6.632460622500767e-06, "loss": 0.3526, "step": 18942 }, { "epoch": 62.10819672131147, "grad_norm": 9.368982315063477, "learning_rate": 6.631460776468291e-06, "loss": 0.5377, "step": 18943 }, { "epoch": 62.11147540983607, "grad_norm": 5.658158779144287, "learning_rate": 6.630460968419594e-06, "loss": 0.3985, "step": 18944 }, { "epoch": 62.114754098360656, "grad_norm": 5.638621807098389, "learning_rate": 6.629461198365952e-06, "loss": 0.2581, "step": 18945 }, { "epoch": 62.118032786885244, "grad_norm": 6.150169372558594, "learning_rate": 6.628461466318641e-06, "loss": 0.6342, "step": 18946 }, { "epoch": 62.12131147540983, "grad_norm": 6.651447772979736, "learning_rate": 6.627461772288934e-06, "loss": 0.4782, "step": 18947 }, { "epoch": 62.12459016393443, "grad_norm": 5.943185806274414, "learning_rate": 6.6264621162881015e-06, "loss": 0.4586, "step": 18948 }, { "epoch": 62.12786885245902, "grad_norm": 6.110888957977295, "learning_rate": 6.625462498327418e-06, "loss": 0.4485, "step": 18949 }, { "epoch": 62.131147540983605, "grad_norm": 5.082983493804932, "learning_rate": 6.624462918418147e-06, "loss": 0.3139, "step": 18950 }, { "epoch": 62.13442622950819, "grad_norm": 4.989294052124023, "learning_rate": 6.623463376571571e-06, "loss": 0.3377, "step": 18951 }, { "epoch": 62.13770491803279, "grad_norm": 5.777947425842285, "learning_rate": 6.622463872798956e-06, "loss": 0.2884, "step": 18952 }, { "epoch": 62.14098360655738, "grad_norm": 5.447143077850342, "learning_rate": 6.621464407111574e-06, "loss": 0.294, "step": 18953 }, { "epoch": 62.144262295081965, "grad_norm": 4.300683498382568, "learning_rate": 6.620464979520689e-06, "loss": 0.4988, "step": 18954 }, { "epoch": 62.14754098360656, "grad_norm": 6.898970127105713, "learning_rate": 6.619465590037579e-06, "loss": 0.729, "step": 18955 }, { "epoch": 62.15081967213115, "grad_norm": 5.117136478424072, "learning_rate": 6.618466238673509e-06, "loss": 0.6169, "step": 18956 }, { "epoch": 62.15409836065574, "grad_norm": 8.588774681091309, "learning_rate": 6.617466925439746e-06, "loss": 0.6633, "step": 18957 }, { "epoch": 62.157377049180326, "grad_norm": 6.353798866271973, "learning_rate": 6.61646765034756e-06, "loss": 0.3811, "step": 18958 }, { "epoch": 62.16065573770492, "grad_norm": 6.625107288360596, "learning_rate": 6.6154684134082226e-06, "loss": 0.4829, "step": 18959 }, { "epoch": 62.16393442622951, "grad_norm": 32.925987243652344, "learning_rate": 6.614469214632997e-06, "loss": 0.4809, "step": 18960 }, { "epoch": 62.1672131147541, "grad_norm": 4.030432224273682, "learning_rate": 6.613470054033149e-06, "loss": 0.5969, "step": 18961 }, { "epoch": 62.170491803278686, "grad_norm": 4.785466194152832, "learning_rate": 6.61247093161995e-06, "loss": 0.4492, "step": 18962 }, { "epoch": 62.17377049180328, "grad_norm": 7.230980396270752, "learning_rate": 6.61147184740466e-06, "loss": 0.4766, "step": 18963 }, { "epoch": 62.17704918032787, "grad_norm": 5.6031341552734375, "learning_rate": 6.61047280139855e-06, "loss": 0.3448, "step": 18964 }, { "epoch": 62.18032786885246, "grad_norm": 8.42889404296875, "learning_rate": 6.6094737936128835e-06, "loss": 0.6804, "step": 18965 }, { "epoch": 62.18360655737705, "grad_norm": 11.716437339782715, "learning_rate": 6.608474824058927e-06, "loss": 0.4591, "step": 18966 }, { "epoch": 62.18688524590164, "grad_norm": 21.669523239135742, "learning_rate": 6.607475892747938e-06, "loss": 0.4324, "step": 18967 }, { "epoch": 62.19016393442623, "grad_norm": 5.144531726837158, "learning_rate": 6.606476999691189e-06, "loss": 0.5662, "step": 18968 }, { "epoch": 62.19344262295082, "grad_norm": 5.751857280731201, "learning_rate": 6.605478144899942e-06, "loss": 0.6947, "step": 18969 }, { "epoch": 62.19672131147541, "grad_norm": 11.237775802612305, "learning_rate": 6.604479328385458e-06, "loss": 0.1939, "step": 18970 }, { "epoch": 62.2, "grad_norm": 5.391320705413818, "learning_rate": 6.603480550158995e-06, "loss": 0.3507, "step": 18971 }, { "epoch": 62.20327868852459, "grad_norm": 5.082678318023682, "learning_rate": 6.602481810231824e-06, "loss": 0.688, "step": 18972 }, { "epoch": 62.20655737704918, "grad_norm": 7.864218711853027, "learning_rate": 6.6014831086152055e-06, "loss": 0.2865, "step": 18973 }, { "epoch": 62.20983606557377, "grad_norm": 6.263219833374023, "learning_rate": 6.600484445320396e-06, "loss": 0.2846, "step": 18974 }, { "epoch": 62.21311475409836, "grad_norm": 5.6087327003479, "learning_rate": 6.59948582035866e-06, "loss": 0.2719, "step": 18975 }, { "epoch": 62.21639344262295, "grad_norm": 5.449955940246582, "learning_rate": 6.598487233741253e-06, "loss": 0.3883, "step": 18976 }, { "epoch": 62.21967213114754, "grad_norm": 4.931570053100586, "learning_rate": 6.597488685479443e-06, "loss": 0.3009, "step": 18977 }, { "epoch": 62.22295081967213, "grad_norm": 9.409542083740234, "learning_rate": 6.596490175584486e-06, "loss": 0.3467, "step": 18978 }, { "epoch": 62.226229508196724, "grad_norm": 5.591654300689697, "learning_rate": 6.59549170406764e-06, "loss": 0.5164, "step": 18979 }, { "epoch": 62.22950819672131, "grad_norm": 5.922577857971191, "learning_rate": 6.594493270940162e-06, "loss": 0.6326, "step": 18980 }, { "epoch": 62.2327868852459, "grad_norm": 8.046667098999023, "learning_rate": 6.593494876213318e-06, "loss": 0.5159, "step": 18981 }, { "epoch": 62.23606557377049, "grad_norm": 5.122015953063965, "learning_rate": 6.5924965198983595e-06, "loss": 0.4113, "step": 18982 }, { "epoch": 62.239344262295084, "grad_norm": 5.875826358795166, "learning_rate": 6.591498202006545e-06, "loss": 0.349, "step": 18983 }, { "epoch": 62.24262295081967, "grad_norm": 4.951845645904541, "learning_rate": 6.590499922549131e-06, "loss": 0.4468, "step": 18984 }, { "epoch": 62.24590163934426, "grad_norm": 7.001489639282227, "learning_rate": 6.589501681537378e-06, "loss": 0.6017, "step": 18985 }, { "epoch": 62.24918032786885, "grad_norm": 5.3334150314331055, "learning_rate": 6.5885034789825385e-06, "loss": 0.4528, "step": 18986 }, { "epoch": 62.252459016393445, "grad_norm": 4.874631881713867, "learning_rate": 6.587505314895868e-06, "loss": 0.4882, "step": 18987 }, { "epoch": 62.25573770491803, "grad_norm": 5.728114604949951, "learning_rate": 6.5865071892886266e-06, "loss": 0.7719, "step": 18988 }, { "epoch": 62.25901639344262, "grad_norm": 5.526301383972168, "learning_rate": 6.585509102172062e-06, "loss": 0.3169, "step": 18989 }, { "epoch": 62.26229508196721, "grad_norm": 6.0193023681640625, "learning_rate": 6.584511053557433e-06, "loss": 0.2586, "step": 18990 }, { "epoch": 62.265573770491805, "grad_norm": 11.928086280822754, "learning_rate": 6.583513043455996e-06, "loss": 0.2904, "step": 18991 }, { "epoch": 62.268852459016394, "grad_norm": 17.362300872802734, "learning_rate": 6.582515071879e-06, "loss": 0.3498, "step": 18992 }, { "epoch": 62.27213114754098, "grad_norm": 8.70138931274414, "learning_rate": 6.581517138837697e-06, "loss": 0.3845, "step": 18993 }, { "epoch": 62.27540983606557, "grad_norm": 6.424413681030273, "learning_rate": 6.580519244343346e-06, "loss": 0.7417, "step": 18994 }, { "epoch": 62.278688524590166, "grad_norm": 5.467837333679199, "learning_rate": 6.579521388407196e-06, "loss": 0.5516, "step": 18995 }, { "epoch": 62.281967213114754, "grad_norm": 5.801802635192871, "learning_rate": 6.578523571040499e-06, "loss": 0.5285, "step": 18996 }, { "epoch": 62.28524590163934, "grad_norm": 5.5088911056518555, "learning_rate": 6.5775257922545026e-06, "loss": 0.3987, "step": 18997 }, { "epoch": 62.28852459016394, "grad_norm": 4.5620341300964355, "learning_rate": 6.576528052060465e-06, "loss": 0.2584, "step": 18998 }, { "epoch": 62.291803278688526, "grad_norm": 5.469786167144775, "learning_rate": 6.5755303504696325e-06, "loss": 0.3256, "step": 18999 }, { "epoch": 62.295081967213115, "grad_norm": 6.49851655960083, "learning_rate": 6.574532687493256e-06, "loss": 0.4973, "step": 19000 }, { "epoch": 62.2983606557377, "grad_norm": 5.723092079162598, "learning_rate": 6.573535063142587e-06, "loss": 0.4944, "step": 19001 }, { "epoch": 62.3016393442623, "grad_norm": 5.548248767852783, "learning_rate": 6.572537477428866e-06, "loss": 0.5147, "step": 19002 }, { "epoch": 62.30491803278689, "grad_norm": 5.094974040985107, "learning_rate": 6.571539930363353e-06, "loss": 0.5575, "step": 19003 }, { "epoch": 62.308196721311475, "grad_norm": 5.477262020111084, "learning_rate": 6.570542421957293e-06, "loss": 0.5533, "step": 19004 }, { "epoch": 62.31147540983606, "grad_norm": 10.681842803955078, "learning_rate": 6.569544952221934e-06, "loss": 0.5359, "step": 19005 }, { "epoch": 62.31475409836066, "grad_norm": 5.940280914306641, "learning_rate": 6.568547521168515e-06, "loss": 0.5207, "step": 19006 }, { "epoch": 62.31803278688525, "grad_norm": 6.424545764923096, "learning_rate": 6.567550128808298e-06, "loss": 0.3865, "step": 19007 }, { "epoch": 62.321311475409836, "grad_norm": 6.136033535003662, "learning_rate": 6.566552775152521e-06, "loss": 0.4145, "step": 19008 }, { "epoch": 62.324590163934424, "grad_norm": 5.245865345001221, "learning_rate": 6.565555460212432e-06, "loss": 0.435, "step": 19009 }, { "epoch": 62.32786885245902, "grad_norm": 7.068316459655762, "learning_rate": 6.5645581839992704e-06, "loss": 0.4516, "step": 19010 }, { "epoch": 62.33114754098361, "grad_norm": 6.928122520446777, "learning_rate": 6.563560946524292e-06, "loss": 0.4208, "step": 19011 }, { "epoch": 62.334426229508196, "grad_norm": 4.535867214202881, "learning_rate": 6.562563747798737e-06, "loss": 0.4568, "step": 19012 }, { "epoch": 62.337704918032784, "grad_norm": 7.199830055236816, "learning_rate": 6.561566587833847e-06, "loss": 0.2813, "step": 19013 }, { "epoch": 62.34098360655738, "grad_norm": 7.617714881896973, "learning_rate": 6.560569466640872e-06, "loss": 0.5164, "step": 19014 }, { "epoch": 62.34426229508197, "grad_norm": 6.509796142578125, "learning_rate": 6.55957238423105e-06, "loss": 0.4941, "step": 19015 }, { "epoch": 62.34754098360656, "grad_norm": 5.797389507293701, "learning_rate": 6.558575340615627e-06, "loss": 0.6035, "step": 19016 }, { "epoch": 62.350819672131145, "grad_norm": 6.603235721588135, "learning_rate": 6.557578335805847e-06, "loss": 0.3728, "step": 19017 }, { "epoch": 62.35409836065574, "grad_norm": 5.757213592529297, "learning_rate": 6.556581369812951e-06, "loss": 0.5926, "step": 19018 }, { "epoch": 62.35737704918033, "grad_norm": 7.545641899108887, "learning_rate": 6.5555844426481755e-06, "loss": 0.5574, "step": 19019 }, { "epoch": 62.36065573770492, "grad_norm": 7.5937395095825195, "learning_rate": 6.55458755432277e-06, "loss": 0.4629, "step": 19020 }, { "epoch": 62.363934426229505, "grad_norm": 7.457666873931885, "learning_rate": 6.553590704847973e-06, "loss": 0.5003, "step": 19021 }, { "epoch": 62.3672131147541, "grad_norm": 5.70643949508667, "learning_rate": 6.552593894235024e-06, "loss": 0.2659, "step": 19022 }, { "epoch": 62.37049180327869, "grad_norm": 8.541374206542969, "learning_rate": 6.551597122495159e-06, "loss": 0.5406, "step": 19023 }, { "epoch": 62.37377049180328, "grad_norm": 5.30429220199585, "learning_rate": 6.550600389639627e-06, "loss": 0.554, "step": 19024 }, { "epoch": 62.377049180327866, "grad_norm": 6.183071613311768, "learning_rate": 6.54960369567966e-06, "loss": 0.6549, "step": 19025 }, { "epoch": 62.38032786885246, "grad_norm": 16.974029541015625, "learning_rate": 6.5486070406265016e-06, "loss": 0.2695, "step": 19026 }, { "epoch": 62.38360655737705, "grad_norm": 5.144045352935791, "learning_rate": 6.547610424491385e-06, "loss": 0.4886, "step": 19027 }, { "epoch": 62.38688524590164, "grad_norm": 5.314111709594727, "learning_rate": 6.546613847285548e-06, "loss": 0.6561, "step": 19028 }, { "epoch": 62.390163934426226, "grad_norm": 5.845248699188232, "learning_rate": 6.5456173090202336e-06, "loss": 0.5577, "step": 19029 }, { "epoch": 62.39344262295082, "grad_norm": 5.720242023468018, "learning_rate": 6.544620809706677e-06, "loss": 0.6201, "step": 19030 }, { "epoch": 62.39672131147541, "grad_norm": 7.319105625152588, "learning_rate": 6.543624349356112e-06, "loss": 0.6047, "step": 19031 }, { "epoch": 62.4, "grad_norm": 5.068685054779053, "learning_rate": 6.542627927979772e-06, "loss": 0.5128, "step": 19032 }, { "epoch": 62.40327868852459, "grad_norm": 10.596269607543945, "learning_rate": 6.541631545588901e-06, "loss": 0.3804, "step": 19033 }, { "epoch": 62.40655737704918, "grad_norm": 5.533938884735107, "learning_rate": 6.540635202194731e-06, "loss": 0.4855, "step": 19034 }, { "epoch": 62.40983606557377, "grad_norm": 6.390635013580322, "learning_rate": 6.5396388978084955e-06, "loss": 0.5935, "step": 19035 }, { "epoch": 62.41311475409836, "grad_norm": 6.81550931930542, "learning_rate": 6.538642632441425e-06, "loss": 0.4202, "step": 19036 }, { "epoch": 62.41639344262295, "grad_norm": 5.335574150085449, "learning_rate": 6.537646406104763e-06, "loss": 0.3627, "step": 19037 }, { "epoch": 62.41967213114754, "grad_norm": 5.726591110229492, "learning_rate": 6.536650218809737e-06, "loss": 0.6839, "step": 19038 }, { "epoch": 62.42295081967213, "grad_norm": 6.934732913970947, "learning_rate": 6.535654070567578e-06, "loss": 0.4701, "step": 19039 }, { "epoch": 62.42622950819672, "grad_norm": 4.819119930267334, "learning_rate": 6.534657961389523e-06, "loss": 0.4734, "step": 19040 }, { "epoch": 62.429508196721315, "grad_norm": 12.187729835510254, "learning_rate": 6.5336618912868025e-06, "loss": 0.406, "step": 19041 }, { "epoch": 62.4327868852459, "grad_norm": 6.558437347412109, "learning_rate": 6.532665860270649e-06, "loss": 0.6304, "step": 19042 }, { "epoch": 62.43606557377049, "grad_norm": 4.968197345733643, "learning_rate": 6.53166986835229e-06, "loss": 0.4934, "step": 19043 }, { "epoch": 62.43934426229508, "grad_norm": 12.69111442565918, "learning_rate": 6.5306739155429646e-06, "loss": 0.4253, "step": 19044 }, { "epoch": 62.442622950819676, "grad_norm": 5.866832733154297, "learning_rate": 6.529678001853893e-06, "loss": 0.5479, "step": 19045 }, { "epoch": 62.445901639344264, "grad_norm": 5.405961513519287, "learning_rate": 6.528682127296311e-06, "loss": 0.3468, "step": 19046 }, { "epoch": 62.44918032786885, "grad_norm": 6.279550075531006, "learning_rate": 6.52768629188145e-06, "loss": 0.3923, "step": 19047 }, { "epoch": 62.45245901639344, "grad_norm": 5.052774429321289, "learning_rate": 6.526690495620536e-06, "loss": 0.3959, "step": 19048 }, { "epoch": 62.455737704918036, "grad_norm": 8.863645553588867, "learning_rate": 6.525694738524794e-06, "loss": 0.4543, "step": 19049 }, { "epoch": 62.459016393442624, "grad_norm": 4.026221752166748, "learning_rate": 6.52469902060546e-06, "loss": 0.584, "step": 19050 }, { "epoch": 62.46229508196721, "grad_norm": 5.056404113769531, "learning_rate": 6.523703341873757e-06, "loss": 0.3774, "step": 19051 }, { "epoch": 62.4655737704918, "grad_norm": 5.712545394897461, "learning_rate": 6.522707702340916e-06, "loss": 0.4051, "step": 19052 }, { "epoch": 62.4688524590164, "grad_norm": 6.586441516876221, "learning_rate": 6.5217121020181585e-06, "loss": 0.3715, "step": 19053 }, { "epoch": 62.472131147540985, "grad_norm": 4.356700897216797, "learning_rate": 6.520716540916709e-06, "loss": 0.5584, "step": 19054 }, { "epoch": 62.47540983606557, "grad_norm": 5.371958255767822, "learning_rate": 6.5197210190478046e-06, "loss": 0.4704, "step": 19055 }, { "epoch": 62.47868852459016, "grad_norm": 5.050052642822266, "learning_rate": 6.518725536422663e-06, "loss": 0.4403, "step": 19056 }, { "epoch": 62.48196721311476, "grad_norm": 6.142199993133545, "learning_rate": 6.517730093052511e-06, "loss": 0.6284, "step": 19057 }, { "epoch": 62.485245901639345, "grad_norm": 5.945675849914551, "learning_rate": 6.51673468894857e-06, "loss": 0.3043, "step": 19058 }, { "epoch": 62.488524590163934, "grad_norm": 4.999321937561035, "learning_rate": 6.515739324122068e-06, "loss": 0.4662, "step": 19059 }, { "epoch": 62.49180327868852, "grad_norm": 7.105560779571533, "learning_rate": 6.51474399858423e-06, "loss": 0.5395, "step": 19060 }, { "epoch": 62.49508196721312, "grad_norm": 6.077880382537842, "learning_rate": 6.513748712346277e-06, "loss": 0.4537, "step": 19061 }, { "epoch": 62.498360655737706, "grad_norm": 5.667394161224365, "learning_rate": 6.512753465419428e-06, "loss": 0.4985, "step": 19062 }, { "epoch": 62.501639344262294, "grad_norm": 5.705316066741943, "learning_rate": 6.511758257814913e-06, "loss": 0.4562, "step": 19063 }, { "epoch": 62.50491803278688, "grad_norm": 5.566810607910156, "learning_rate": 6.510763089543951e-06, "loss": 0.3454, "step": 19064 }, { "epoch": 62.50819672131148, "grad_norm": 4.585686683654785, "learning_rate": 6.509767960617763e-06, "loss": 0.4143, "step": 19065 }, { "epoch": 62.511475409836066, "grad_norm": 4.6578850746154785, "learning_rate": 6.508772871047568e-06, "loss": 0.3465, "step": 19066 }, { "epoch": 62.514754098360655, "grad_norm": 4.497910499572754, "learning_rate": 6.507777820844589e-06, "loss": 0.365, "step": 19067 }, { "epoch": 62.51803278688524, "grad_norm": 6.558767795562744, "learning_rate": 6.506782810020047e-06, "loss": 0.3874, "step": 19068 }, { "epoch": 62.52131147540984, "grad_norm": 4.534986972808838, "learning_rate": 6.505787838585161e-06, "loss": 0.3952, "step": 19069 }, { "epoch": 62.52459016393443, "grad_norm": 6.242035865783691, "learning_rate": 6.50479290655115e-06, "loss": 0.5132, "step": 19070 }, { "epoch": 62.527868852459015, "grad_norm": 6.602100849151611, "learning_rate": 6.503798013929232e-06, "loss": 0.5456, "step": 19071 }, { "epoch": 62.5311475409836, "grad_norm": 8.026644706726074, "learning_rate": 6.502803160730626e-06, "loss": 0.422, "step": 19072 }, { "epoch": 62.5344262295082, "grad_norm": 4.609890460968018, "learning_rate": 6.501808346966554e-06, "loss": 0.4726, "step": 19073 }, { "epoch": 62.53770491803279, "grad_norm": 5.6265058517456055, "learning_rate": 6.50081357264823e-06, "loss": 0.2346, "step": 19074 }, { "epoch": 62.540983606557376, "grad_norm": 4.979935646057129, "learning_rate": 6.499818837786866e-06, "loss": 0.541, "step": 19075 }, { "epoch": 62.544262295081964, "grad_norm": 4.86780309677124, "learning_rate": 6.498824142393688e-06, "loss": 0.279, "step": 19076 }, { "epoch": 62.54754098360656, "grad_norm": 9.138489723205566, "learning_rate": 6.497829486479909e-06, "loss": 0.6397, "step": 19077 }, { "epoch": 62.55081967213115, "grad_norm": 7.205055236816406, "learning_rate": 6.496834870056743e-06, "loss": 0.5219, "step": 19078 }, { "epoch": 62.554098360655736, "grad_norm": 4.942074775695801, "learning_rate": 6.495840293135407e-06, "loss": 0.7622, "step": 19079 }, { "epoch": 62.557377049180324, "grad_norm": 6.144079208374023, "learning_rate": 6.49484575572711e-06, "loss": 0.6153, "step": 19080 }, { "epoch": 62.56065573770492, "grad_norm": 5.884538650512695, "learning_rate": 6.493851257843076e-06, "loss": 0.5482, "step": 19081 }, { "epoch": 62.56393442622951, "grad_norm": 5.206638813018799, "learning_rate": 6.492856799494515e-06, "loss": 0.3283, "step": 19082 }, { "epoch": 62.5672131147541, "grad_norm": 4.806797504425049, "learning_rate": 6.491862380692639e-06, "loss": 0.6296, "step": 19083 }, { "epoch": 62.570491803278685, "grad_norm": 6.2249579429626465, "learning_rate": 6.49086800144866e-06, "loss": 0.5136, "step": 19084 }, { "epoch": 62.57377049180328, "grad_norm": 6.475386619567871, "learning_rate": 6.4898736617737955e-06, "loss": 0.4678, "step": 19085 }, { "epoch": 62.57704918032787, "grad_norm": 5.0503950119018555, "learning_rate": 6.488879361679257e-06, "loss": 0.5273, "step": 19086 }, { "epoch": 62.58032786885246, "grad_norm": 5.548984527587891, "learning_rate": 6.4878851011762525e-06, "loss": 0.5827, "step": 19087 }, { "epoch": 62.58360655737705, "grad_norm": 7.234115123748779, "learning_rate": 6.486890880275991e-06, "loss": 0.3949, "step": 19088 }, { "epoch": 62.58688524590164, "grad_norm": 6.046258449554443, "learning_rate": 6.4858966989896925e-06, "loss": 0.5161, "step": 19089 }, { "epoch": 62.59016393442623, "grad_norm": 6.720648765563965, "learning_rate": 6.484902557328562e-06, "loss": 0.4473, "step": 19090 }, { "epoch": 62.59344262295082, "grad_norm": 5.330162048339844, "learning_rate": 6.483908455303809e-06, "loss": 0.2669, "step": 19091 }, { "epoch": 62.59672131147541, "grad_norm": 14.742128372192383, "learning_rate": 6.482914392926644e-06, "loss": 0.3488, "step": 19092 }, { "epoch": 62.6, "grad_norm": 7.6778178215026855, "learning_rate": 6.481920370208274e-06, "loss": 0.2356, "step": 19093 }, { "epoch": 62.60327868852459, "grad_norm": 5.142500400543213, "learning_rate": 6.480926387159914e-06, "loss": 0.397, "step": 19094 }, { "epoch": 62.60655737704918, "grad_norm": 5.09289026260376, "learning_rate": 6.479932443792765e-06, "loss": 0.5624, "step": 19095 }, { "epoch": 62.609836065573774, "grad_norm": 4.784373760223389, "learning_rate": 6.478938540118039e-06, "loss": 0.3697, "step": 19096 }, { "epoch": 62.61311475409836, "grad_norm": 5.939155578613281, "learning_rate": 6.477944676146939e-06, "loss": 0.4374, "step": 19097 }, { "epoch": 62.61639344262295, "grad_norm": 6.957935810089111, "learning_rate": 6.476950851890677e-06, "loss": 0.3935, "step": 19098 }, { "epoch": 62.61967213114754, "grad_norm": 5.361697196960449, "learning_rate": 6.475957067360458e-06, "loss": 0.3148, "step": 19099 }, { "epoch": 62.622950819672134, "grad_norm": 6.547206401824951, "learning_rate": 6.474963322567487e-06, "loss": 0.5278, "step": 19100 }, { "epoch": 62.62622950819672, "grad_norm": 4.706331253051758, "learning_rate": 6.473969617522966e-06, "loss": 0.5625, "step": 19101 }, { "epoch": 62.62950819672131, "grad_norm": 5.532144546508789, "learning_rate": 6.4729759522381074e-06, "loss": 0.5295, "step": 19102 }, { "epoch": 62.6327868852459, "grad_norm": 6.35631799697876, "learning_rate": 6.471982326724113e-06, "loss": 0.431, "step": 19103 }, { "epoch": 62.636065573770495, "grad_norm": 5.411264419555664, "learning_rate": 6.470988740992185e-06, "loss": 0.4883, "step": 19104 }, { "epoch": 62.63934426229508, "grad_norm": 6.31894063949585, "learning_rate": 6.469995195053529e-06, "loss": 0.5415, "step": 19105 }, { "epoch": 62.64262295081967, "grad_norm": 4.703019142150879, "learning_rate": 6.469001688919343e-06, "loss": 0.4304, "step": 19106 }, { "epoch": 62.64590163934426, "grad_norm": 4.798712730407715, "learning_rate": 6.468008222600839e-06, "loss": 0.572, "step": 19107 }, { "epoch": 62.649180327868855, "grad_norm": 5.430003643035889, "learning_rate": 6.467014796109215e-06, "loss": 0.4939, "step": 19108 }, { "epoch": 62.65245901639344, "grad_norm": 5.561277866363525, "learning_rate": 6.466021409455672e-06, "loss": 0.6555, "step": 19109 }, { "epoch": 62.65573770491803, "grad_norm": 4.970428943634033, "learning_rate": 6.465028062651406e-06, "loss": 0.5157, "step": 19110 }, { "epoch": 62.65901639344262, "grad_norm": 6.27777624130249, "learning_rate": 6.464034755707631e-06, "loss": 0.7437, "step": 19111 }, { "epoch": 62.662295081967216, "grad_norm": 5.7200164794921875, "learning_rate": 6.463041488635539e-06, "loss": 0.4984, "step": 19112 }, { "epoch": 62.665573770491804, "grad_norm": 4.356764793395996, "learning_rate": 6.462048261446332e-06, "loss": 0.5315, "step": 19113 }, { "epoch": 62.66885245901639, "grad_norm": 5.229246616363525, "learning_rate": 6.461055074151206e-06, "loss": 0.4833, "step": 19114 }, { "epoch": 62.67213114754098, "grad_norm": 4.536275863647461, "learning_rate": 6.460061926761367e-06, "loss": 0.4456, "step": 19115 }, { "epoch": 62.675409836065576, "grad_norm": 5.409986972808838, "learning_rate": 6.45906881928801e-06, "loss": 0.3314, "step": 19116 }, { "epoch": 62.678688524590164, "grad_norm": 4.656509876251221, "learning_rate": 6.4580757517423345e-06, "loss": 0.507, "step": 19117 }, { "epoch": 62.68196721311475, "grad_norm": 7.167966842651367, "learning_rate": 6.457082724135535e-06, "loss": 0.4975, "step": 19118 }, { "epoch": 62.68524590163934, "grad_norm": 5.0011115074157715, "learning_rate": 6.456089736478812e-06, "loss": 0.3629, "step": 19119 }, { "epoch": 62.68852459016394, "grad_norm": 4.4201507568359375, "learning_rate": 6.455096788783364e-06, "loss": 0.2336, "step": 19120 }, { "epoch": 62.691803278688525, "grad_norm": 6.448601722717285, "learning_rate": 6.454103881060383e-06, "loss": 0.394, "step": 19121 }, { "epoch": 62.69508196721311, "grad_norm": 8.231487274169922, "learning_rate": 6.45311101332107e-06, "loss": 0.4643, "step": 19122 }, { "epoch": 62.6983606557377, "grad_norm": 5.095358848571777, "learning_rate": 6.452118185576616e-06, "loss": 0.5287, "step": 19123 }, { "epoch": 62.7016393442623, "grad_norm": 4.2613844871521, "learning_rate": 6.45112539783822e-06, "loss": 0.4635, "step": 19124 }, { "epoch": 62.704918032786885, "grad_norm": 7.765599250793457, "learning_rate": 6.4501326501170734e-06, "loss": 0.52, "step": 19125 }, { "epoch": 62.708196721311474, "grad_norm": 5.01884651184082, "learning_rate": 6.449139942424374e-06, "loss": 0.475, "step": 19126 }, { "epoch": 62.71147540983607, "grad_norm": 5.706582069396973, "learning_rate": 6.448147274771313e-06, "loss": 0.5058, "step": 19127 }, { "epoch": 62.71475409836066, "grad_norm": 5.054599761962891, "learning_rate": 6.447154647169082e-06, "loss": 0.5111, "step": 19128 }, { "epoch": 62.718032786885246, "grad_norm": 7.169543266296387, "learning_rate": 6.4461620596288795e-06, "loss": 0.4045, "step": 19129 }, { "epoch": 62.721311475409834, "grad_norm": 5.881169319152832, "learning_rate": 6.445169512161895e-06, "loss": 0.3382, "step": 19130 }, { "epoch": 62.72459016393443, "grad_norm": 6.972024440765381, "learning_rate": 6.44417700477932e-06, "loss": 0.5415, "step": 19131 }, { "epoch": 62.72786885245902, "grad_norm": 4.957470417022705, "learning_rate": 6.443184537492344e-06, "loss": 0.5868, "step": 19132 }, { "epoch": 62.731147540983606, "grad_norm": 5.121526718139648, "learning_rate": 6.4421921103121624e-06, "loss": 0.6171, "step": 19133 }, { "epoch": 62.734426229508195, "grad_norm": 6.203983783721924, "learning_rate": 6.441199723249965e-06, "loss": 0.4644, "step": 19134 }, { "epoch": 62.73770491803279, "grad_norm": 5.093392372131348, "learning_rate": 6.4402073763169405e-06, "loss": 0.7404, "step": 19135 }, { "epoch": 62.74098360655738, "grad_norm": 5.717812538146973, "learning_rate": 6.439215069524276e-06, "loss": 0.6226, "step": 19136 }, { "epoch": 62.74426229508197, "grad_norm": 6.533528804779053, "learning_rate": 6.438222802883167e-06, "loss": 0.366, "step": 19137 }, { "epoch": 62.747540983606555, "grad_norm": 6.831751346588135, "learning_rate": 6.4372305764047995e-06, "loss": 0.2813, "step": 19138 }, { "epoch": 62.75081967213115, "grad_norm": 5.3236918449401855, "learning_rate": 6.436238390100361e-06, "loss": 0.5812, "step": 19139 }, { "epoch": 62.75409836065574, "grad_norm": 4.768003463745117, "learning_rate": 6.435246243981041e-06, "loss": 0.6043, "step": 19140 }, { "epoch": 62.75737704918033, "grad_norm": 5.172018527984619, "learning_rate": 6.434254138058021e-06, "loss": 0.5717, "step": 19141 }, { "epoch": 62.760655737704916, "grad_norm": 4.816915988922119, "learning_rate": 6.433262072342497e-06, "loss": 0.7628, "step": 19142 }, { "epoch": 62.76393442622951, "grad_norm": 8.037229537963867, "learning_rate": 6.4322700468456525e-06, "loss": 0.5289, "step": 19143 }, { "epoch": 62.7672131147541, "grad_norm": 5.536598205566406, "learning_rate": 6.431278061578673e-06, "loss": 0.4519, "step": 19144 }, { "epoch": 62.77049180327869, "grad_norm": 5.9484148025512695, "learning_rate": 6.43028611655274e-06, "loss": 0.5793, "step": 19145 }, { "epoch": 62.773770491803276, "grad_norm": 5.7981343269348145, "learning_rate": 6.429294211779045e-06, "loss": 0.4012, "step": 19146 }, { "epoch": 62.77704918032787, "grad_norm": 5.008084297180176, "learning_rate": 6.428302347268771e-06, "loss": 0.5348, "step": 19147 }, { "epoch": 62.78032786885246, "grad_norm": 5.490179538726807, "learning_rate": 6.4273105230331e-06, "loss": 0.4611, "step": 19148 }, { "epoch": 62.78360655737705, "grad_norm": 6.832577228546143, "learning_rate": 6.426318739083217e-06, "loss": 0.4017, "step": 19149 }, { "epoch": 62.78688524590164, "grad_norm": 6.704050064086914, "learning_rate": 6.42532699543031e-06, "loss": 0.6148, "step": 19150 }, { "epoch": 62.79016393442623, "grad_norm": 5.180011749267578, "learning_rate": 6.424335292085553e-06, "loss": 0.3339, "step": 19151 }, { "epoch": 62.79344262295082, "grad_norm": 5.916033744812012, "learning_rate": 6.423343629060137e-06, "loss": 0.4066, "step": 19152 }, { "epoch": 62.79672131147541, "grad_norm": 7.086813926696777, "learning_rate": 6.42235200636524e-06, "loss": 0.5425, "step": 19153 }, { "epoch": 62.8, "grad_norm": 5.242716312408447, "learning_rate": 6.421360424012039e-06, "loss": 0.3445, "step": 19154 }, { "epoch": 62.80327868852459, "grad_norm": 4.5190510749816895, "learning_rate": 6.420368882011726e-06, "loss": 0.6643, "step": 19155 }, { "epoch": 62.80655737704918, "grad_norm": 13.047707557678223, "learning_rate": 6.419377380375476e-06, "loss": 0.4805, "step": 19156 }, { "epoch": 62.80983606557377, "grad_norm": 4.6534905433654785, "learning_rate": 6.418385919114467e-06, "loss": 0.4422, "step": 19157 }, { "epoch": 62.81311475409836, "grad_norm": 6.668212413787842, "learning_rate": 6.417394498239877e-06, "loss": 0.6071, "step": 19158 }, { "epoch": 62.81639344262295, "grad_norm": 8.240099906921387, "learning_rate": 6.416403117762892e-06, "loss": 0.3082, "step": 19159 }, { "epoch": 62.81967213114754, "grad_norm": 16.60336685180664, "learning_rate": 6.415411777694691e-06, "loss": 0.7636, "step": 19160 }, { "epoch": 62.82295081967213, "grad_norm": 5.494783878326416, "learning_rate": 6.414420478046447e-06, "loss": 0.3421, "step": 19161 }, { "epoch": 62.82622950819672, "grad_norm": 5.748664855957031, "learning_rate": 6.413429218829337e-06, "loss": 0.7131, "step": 19162 }, { "epoch": 62.829508196721314, "grad_norm": 6.582952976226807, "learning_rate": 6.412438000054545e-06, "loss": 0.6694, "step": 19163 }, { "epoch": 62.8327868852459, "grad_norm": 4.016868591308594, "learning_rate": 6.411446821733245e-06, "loss": 0.2477, "step": 19164 }, { "epoch": 62.83606557377049, "grad_norm": 5.081075191497803, "learning_rate": 6.410455683876613e-06, "loss": 0.5723, "step": 19165 }, { "epoch": 62.83934426229508, "grad_norm": 5.256467819213867, "learning_rate": 6.409464586495826e-06, "loss": 0.4828, "step": 19166 }, { "epoch": 62.842622950819674, "grad_norm": 5.009435653686523, "learning_rate": 6.408473529602055e-06, "loss": 0.3265, "step": 19167 }, { "epoch": 62.84590163934426, "grad_norm": 6.629855155944824, "learning_rate": 6.4074825132064825e-06, "loss": 0.4919, "step": 19168 }, { "epoch": 62.84918032786885, "grad_norm": 5.842901229858398, "learning_rate": 6.406491537320281e-06, "loss": 0.4712, "step": 19169 }, { "epoch": 62.85245901639344, "grad_norm": 6.487308502197266, "learning_rate": 6.405500601954622e-06, "loss": 0.4418, "step": 19170 }, { "epoch": 62.855737704918035, "grad_norm": 4.4161272048950195, "learning_rate": 6.4045097071206785e-06, "loss": 0.3046, "step": 19171 }, { "epoch": 62.85901639344262, "grad_norm": 4.354671955108643, "learning_rate": 6.403518852829629e-06, "loss": 0.3726, "step": 19172 }, { "epoch": 62.86229508196721, "grad_norm": 4.8802409172058105, "learning_rate": 6.402528039092646e-06, "loss": 0.3836, "step": 19173 }, { "epoch": 62.86557377049181, "grad_norm": 6.161806106567383, "learning_rate": 6.401537265920896e-06, "loss": 0.4428, "step": 19174 }, { "epoch": 62.868852459016395, "grad_norm": 4.673542499542236, "learning_rate": 6.4005465333255555e-06, "loss": 0.4269, "step": 19175 }, { "epoch": 62.87213114754098, "grad_norm": 5.067198753356934, "learning_rate": 6.399555841317797e-06, "loss": 0.4323, "step": 19176 }, { "epoch": 62.87540983606557, "grad_norm": 5.164965629577637, "learning_rate": 6.3985651899087874e-06, "loss": 0.5121, "step": 19177 }, { "epoch": 62.87868852459017, "grad_norm": 5.731149196624756, "learning_rate": 6.397574579109703e-06, "loss": 0.5753, "step": 19178 }, { "epoch": 62.881967213114756, "grad_norm": 7.190459728240967, "learning_rate": 6.39658400893171e-06, "loss": 0.5553, "step": 19179 }, { "epoch": 62.885245901639344, "grad_norm": 5.242973327636719, "learning_rate": 6.395593479385975e-06, "loss": 0.3991, "step": 19180 }, { "epoch": 62.88852459016393, "grad_norm": 6.172014236450195, "learning_rate": 6.394602990483676e-06, "loss": 0.5261, "step": 19181 }, { "epoch": 62.89180327868853, "grad_norm": 4.727298736572266, "learning_rate": 6.393612542235977e-06, "loss": 0.5054, "step": 19182 }, { "epoch": 62.895081967213116, "grad_norm": 5.007772922515869, "learning_rate": 6.392622134654045e-06, "loss": 0.3207, "step": 19183 }, { "epoch": 62.898360655737704, "grad_norm": 4.981260776519775, "learning_rate": 6.391631767749047e-06, "loss": 0.6662, "step": 19184 }, { "epoch": 62.90163934426229, "grad_norm": 12.836238861083984, "learning_rate": 6.3906414415321545e-06, "loss": 0.5306, "step": 19185 }, { "epoch": 62.90491803278689, "grad_norm": 5.90874719619751, "learning_rate": 6.389651156014534e-06, "loss": 0.5531, "step": 19186 }, { "epoch": 62.90819672131148, "grad_norm": 4.995168685913086, "learning_rate": 6.388660911207349e-06, "loss": 0.6342, "step": 19187 }, { "epoch": 62.911475409836065, "grad_norm": 5.840649604797363, "learning_rate": 6.387670707121765e-06, "loss": 0.4669, "step": 19188 }, { "epoch": 62.91475409836065, "grad_norm": 5.887513160705566, "learning_rate": 6.386680543768953e-06, "loss": 0.619, "step": 19189 }, { "epoch": 62.91803278688525, "grad_norm": 5.540610313415527, "learning_rate": 6.3856904211600735e-06, "loss": 0.4794, "step": 19190 }, { "epoch": 62.92131147540984, "grad_norm": 16.89153480529785, "learning_rate": 6.3847003393062936e-06, "loss": 0.7414, "step": 19191 }, { "epoch": 62.924590163934425, "grad_norm": 9.146482467651367, "learning_rate": 6.383710298218777e-06, "loss": 0.6017, "step": 19192 }, { "epoch": 62.927868852459014, "grad_norm": 4.873529434204102, "learning_rate": 6.382720297908682e-06, "loss": 0.3799, "step": 19193 }, { "epoch": 62.93114754098361, "grad_norm": 5.127469539642334, "learning_rate": 6.3817303383871796e-06, "loss": 0.3375, "step": 19194 }, { "epoch": 62.9344262295082, "grad_norm": 7.345698833465576, "learning_rate": 6.380740419665432e-06, "loss": 0.3484, "step": 19195 }, { "epoch": 62.937704918032786, "grad_norm": 5.165151596069336, "learning_rate": 6.379750541754598e-06, "loss": 0.3718, "step": 19196 }, { "epoch": 62.940983606557374, "grad_norm": 4.655308246612549, "learning_rate": 6.378760704665836e-06, "loss": 0.2644, "step": 19197 }, { "epoch": 62.94426229508197, "grad_norm": 5.191847324371338, "learning_rate": 6.377770908410316e-06, "loss": 0.5939, "step": 19198 }, { "epoch": 62.94754098360656, "grad_norm": 4.848532676696777, "learning_rate": 6.376781152999197e-06, "loss": 0.3806, "step": 19199 }, { "epoch": 62.950819672131146, "grad_norm": 26.967721939086914, "learning_rate": 6.375791438443635e-06, "loss": 0.451, "step": 19200 }, { "epoch": 62.954098360655735, "grad_norm": 5.769126892089844, "learning_rate": 6.374801764754792e-06, "loss": 0.569, "step": 19201 }, { "epoch": 62.95737704918033, "grad_norm": 9.097774505615234, "learning_rate": 6.373812131943832e-06, "loss": 0.4834, "step": 19202 }, { "epoch": 62.96065573770492, "grad_norm": 7.765484809875488, "learning_rate": 6.372822540021908e-06, "loss": 0.5118, "step": 19203 }, { "epoch": 62.96393442622951, "grad_norm": 4.573898792266846, "learning_rate": 6.371832989000182e-06, "loss": 0.5186, "step": 19204 }, { "epoch": 62.967213114754095, "grad_norm": 5.218558311462402, "learning_rate": 6.370843478889812e-06, "loss": 0.6297, "step": 19205 }, { "epoch": 62.97049180327869, "grad_norm": 6.071234226226807, "learning_rate": 6.369854009701953e-06, "loss": 0.4814, "step": 19206 }, { "epoch": 62.97377049180328, "grad_norm": 5.017379283905029, "learning_rate": 6.368864581447765e-06, "loss": 0.3088, "step": 19207 }, { "epoch": 62.97704918032787, "grad_norm": 4.539848804473877, "learning_rate": 6.367875194138406e-06, "loss": 0.5037, "step": 19208 }, { "epoch": 62.980327868852456, "grad_norm": 8.071182250976562, "learning_rate": 6.36688584778503e-06, "loss": 0.592, "step": 19209 }, { "epoch": 62.98360655737705, "grad_norm": 15.05698013305664, "learning_rate": 6.365896542398791e-06, "loss": 0.4631, "step": 19210 }, { "epoch": 62.98688524590164, "grad_norm": 8.541091918945312, "learning_rate": 6.3649072779908505e-06, "loss": 0.6077, "step": 19211 }, { "epoch": 62.99016393442623, "grad_norm": 5.887753009796143, "learning_rate": 6.363918054572359e-06, "loss": 0.6057, "step": 19212 }, { "epoch": 62.993442622950816, "grad_norm": 5.699806213378906, "learning_rate": 6.362928872154473e-06, "loss": 0.3425, "step": 19213 }, { "epoch": 62.99672131147541, "grad_norm": 11.061002731323242, "learning_rate": 6.361939730748341e-06, "loss": 0.6195, "step": 19214 }, { "epoch": 63.0, "grad_norm": 4.828025817871094, "learning_rate": 6.360950630365126e-06, "loss": 0.2991, "step": 19215 }, { "epoch": 63.00327868852459, "grad_norm": 5.982668876647949, "learning_rate": 6.359961571015976e-06, "loss": 0.5475, "step": 19216 }, { "epoch": 63.006557377049184, "grad_norm": 5.2019243240356445, "learning_rate": 6.358972552712043e-06, "loss": 0.7005, "step": 19217 }, { "epoch": 63.00983606557377, "grad_norm": 4.247687816619873, "learning_rate": 6.357983575464482e-06, "loss": 0.5476, "step": 19218 }, { "epoch": 63.01311475409836, "grad_norm": 5.142889976501465, "learning_rate": 6.356994639284436e-06, "loss": 0.4246, "step": 19219 }, { "epoch": 63.01639344262295, "grad_norm": 5.483973503112793, "learning_rate": 6.3560057441830695e-06, "loss": 0.4051, "step": 19220 }, { "epoch": 63.019672131147544, "grad_norm": 7.0180253982543945, "learning_rate": 6.355016890171526e-06, "loss": 0.6134, "step": 19221 }, { "epoch": 63.02295081967213, "grad_norm": 5.172515392303467, "learning_rate": 6.354028077260959e-06, "loss": 0.3772, "step": 19222 }, { "epoch": 63.02622950819672, "grad_norm": 4.842433452606201, "learning_rate": 6.353039305462509e-06, "loss": 0.5176, "step": 19223 }, { "epoch": 63.02950819672131, "grad_norm": 4.919274806976318, "learning_rate": 6.3520505747873385e-06, "loss": 0.4677, "step": 19224 }, { "epoch": 63.032786885245905, "grad_norm": 7.551237106323242, "learning_rate": 6.351061885246591e-06, "loss": 0.586, "step": 19225 }, { "epoch": 63.03606557377049, "grad_norm": 4.9258904457092285, "learning_rate": 6.350073236851415e-06, "loss": 0.3862, "step": 19226 }, { "epoch": 63.03934426229508, "grad_norm": 5.188631534576416, "learning_rate": 6.349084629612954e-06, "loss": 0.5068, "step": 19227 }, { "epoch": 63.04262295081967, "grad_norm": 7.97538948059082, "learning_rate": 6.348096063542363e-06, "loss": 0.3879, "step": 19228 }, { "epoch": 63.045901639344265, "grad_norm": 7.682197570800781, "learning_rate": 6.347107538650785e-06, "loss": 0.494, "step": 19229 }, { "epoch": 63.049180327868854, "grad_norm": 4.516916751861572, "learning_rate": 6.346119054949368e-06, "loss": 0.319, "step": 19230 }, { "epoch": 63.05245901639344, "grad_norm": 6.145565986633301, "learning_rate": 6.345130612449259e-06, "loss": 0.2876, "step": 19231 }, { "epoch": 63.05573770491803, "grad_norm": 6.483330249786377, "learning_rate": 6.344142211161599e-06, "loss": 0.5629, "step": 19232 }, { "epoch": 63.059016393442626, "grad_norm": 7.820570468902588, "learning_rate": 6.3431538510975385e-06, "loss": 0.4061, "step": 19233 }, { "epoch": 63.062295081967214, "grad_norm": 5.595228672027588, "learning_rate": 6.3421655322682205e-06, "loss": 0.3332, "step": 19234 }, { "epoch": 63.0655737704918, "grad_norm": 6.554577350616455, "learning_rate": 6.341177254684792e-06, "loss": 0.6308, "step": 19235 }, { "epoch": 63.06885245901639, "grad_norm": 7.071450710296631, "learning_rate": 6.340189018358388e-06, "loss": 0.4003, "step": 19236 }, { "epoch": 63.072131147540986, "grad_norm": 5.927217960357666, "learning_rate": 6.3392008233001626e-06, "loss": 0.5708, "step": 19237 }, { "epoch": 63.075409836065575, "grad_norm": 6.122169494628906, "learning_rate": 6.3382126695212554e-06, "loss": 0.6423, "step": 19238 }, { "epoch": 63.07868852459016, "grad_norm": 6.869910717010498, "learning_rate": 6.3372245570328065e-06, "loss": 0.4382, "step": 19239 }, { "epoch": 63.08196721311475, "grad_norm": 32.33851623535156, "learning_rate": 6.336236485845954e-06, "loss": 0.4868, "step": 19240 }, { "epoch": 63.08524590163935, "grad_norm": 3.8537046909332275, "learning_rate": 6.33524845597185e-06, "loss": 0.4478, "step": 19241 }, { "epoch": 63.088524590163935, "grad_norm": 5.669567108154297, "learning_rate": 6.3342604674216305e-06, "loss": 0.6889, "step": 19242 }, { "epoch": 63.09180327868852, "grad_norm": 4.977323055267334, "learning_rate": 6.333272520206435e-06, "loss": 0.5503, "step": 19243 }, { "epoch": 63.09508196721311, "grad_norm": 5.985143184661865, "learning_rate": 6.332284614337403e-06, "loss": 0.6812, "step": 19244 }, { "epoch": 63.09836065573771, "grad_norm": 9.478468894958496, "learning_rate": 6.331296749825673e-06, "loss": 0.4046, "step": 19245 }, { "epoch": 63.101639344262296, "grad_norm": 5.179092884063721, "learning_rate": 6.330308926682391e-06, "loss": 0.4647, "step": 19246 }, { "epoch": 63.104918032786884, "grad_norm": 4.704674243927002, "learning_rate": 6.3293211449186896e-06, "loss": 0.4598, "step": 19247 }, { "epoch": 63.10819672131147, "grad_norm": 4.864034652709961, "learning_rate": 6.32833340454571e-06, "loss": 0.5576, "step": 19248 }, { "epoch": 63.11147540983607, "grad_norm": 5.052605628967285, "learning_rate": 6.3273457055745844e-06, "loss": 0.3226, "step": 19249 }, { "epoch": 63.114754098360656, "grad_norm": 6.292104244232178, "learning_rate": 6.326358048016459e-06, "loss": 0.528, "step": 19250 }, { "epoch": 63.118032786885244, "grad_norm": 4.643085479736328, "learning_rate": 6.325370431882467e-06, "loss": 0.2968, "step": 19251 }, { "epoch": 63.12131147540983, "grad_norm": 10.181221961975098, "learning_rate": 6.324382857183742e-06, "loss": 0.5303, "step": 19252 }, { "epoch": 63.12459016393443, "grad_norm": 6.708831787109375, "learning_rate": 6.323395323931419e-06, "loss": 0.4502, "step": 19253 }, { "epoch": 63.12786885245902, "grad_norm": 5.639472007751465, "learning_rate": 6.322407832136641e-06, "loss": 0.3241, "step": 19254 }, { "epoch": 63.131147540983605, "grad_norm": 4.817257881164551, "learning_rate": 6.321420381810538e-06, "loss": 0.4764, "step": 19255 }, { "epoch": 63.13442622950819, "grad_norm": 5.766231060028076, "learning_rate": 6.320432972964243e-06, "loss": 0.4604, "step": 19256 }, { "epoch": 63.13770491803279, "grad_norm": 5.572198867797852, "learning_rate": 6.319445605608894e-06, "loss": 0.4191, "step": 19257 }, { "epoch": 63.14098360655738, "grad_norm": 5.20519495010376, "learning_rate": 6.318458279755621e-06, "loss": 0.3789, "step": 19258 }, { "epoch": 63.144262295081965, "grad_norm": 5.741410255432129, "learning_rate": 6.317470995415558e-06, "loss": 0.3325, "step": 19259 }, { "epoch": 63.14754098360656, "grad_norm": 7.208306312561035, "learning_rate": 6.31648375259984e-06, "loss": 0.5208, "step": 19260 }, { "epoch": 63.15081967213115, "grad_norm": 6.788680553436279, "learning_rate": 6.315496551319599e-06, "loss": 0.5172, "step": 19261 }, { "epoch": 63.15409836065574, "grad_norm": 6.085826873779297, "learning_rate": 6.31450939158596e-06, "loss": 0.2898, "step": 19262 }, { "epoch": 63.157377049180326, "grad_norm": 6.9499640464782715, "learning_rate": 6.313522273410064e-06, "loss": 0.8315, "step": 19263 }, { "epoch": 63.16065573770492, "grad_norm": 6.063748836517334, "learning_rate": 6.3125351968030375e-06, "loss": 0.4308, "step": 19264 }, { "epoch": 63.16393442622951, "grad_norm": 6.7292351722717285, "learning_rate": 6.311548161776011e-06, "loss": 0.2357, "step": 19265 }, { "epoch": 63.1672131147541, "grad_norm": 5.144380569458008, "learning_rate": 6.31056116834011e-06, "loss": 0.2864, "step": 19266 }, { "epoch": 63.170491803278686, "grad_norm": 6.325994968414307, "learning_rate": 6.309574216506471e-06, "loss": 0.5468, "step": 19267 }, { "epoch": 63.17377049180328, "grad_norm": 5.750614166259766, "learning_rate": 6.3085873062862204e-06, "loss": 0.4192, "step": 19268 }, { "epoch": 63.17704918032787, "grad_norm": 5.886579513549805, "learning_rate": 6.307600437690486e-06, "loss": 0.6938, "step": 19269 }, { "epoch": 63.18032786885246, "grad_norm": 5.464720249176025, "learning_rate": 6.3066136107303964e-06, "loss": 0.2939, "step": 19270 }, { "epoch": 63.18360655737705, "grad_norm": 5.076376438140869, "learning_rate": 6.305626825417073e-06, "loss": 0.4133, "step": 19271 }, { "epoch": 63.18688524590164, "grad_norm": 5.303369998931885, "learning_rate": 6.304640081761653e-06, "loss": 0.3752, "step": 19272 }, { "epoch": 63.19016393442623, "grad_norm": 5.576560020446777, "learning_rate": 6.303653379775259e-06, "loss": 0.4442, "step": 19273 }, { "epoch": 63.19344262295082, "grad_norm": 5.279451370239258, "learning_rate": 6.302666719469015e-06, "loss": 0.3874, "step": 19274 }, { "epoch": 63.19672131147541, "grad_norm": 6.303448677062988, "learning_rate": 6.301680100854046e-06, "loss": 0.5879, "step": 19275 }, { "epoch": 63.2, "grad_norm": 5.594088077545166, "learning_rate": 6.300693523941481e-06, "loss": 0.4802, "step": 19276 }, { "epoch": 63.20327868852459, "grad_norm": 5.740756988525391, "learning_rate": 6.299706988742444e-06, "loss": 0.7069, "step": 19277 }, { "epoch": 63.20655737704918, "grad_norm": 5.2384352684021, "learning_rate": 6.298720495268058e-06, "loss": 0.6106, "step": 19278 }, { "epoch": 63.20983606557377, "grad_norm": 5.179543972015381, "learning_rate": 6.297734043529442e-06, "loss": 0.3829, "step": 19279 }, { "epoch": 63.21311475409836, "grad_norm": 7.865764617919922, "learning_rate": 6.29674763353773e-06, "loss": 0.4251, "step": 19280 }, { "epoch": 63.21639344262295, "grad_norm": 5.014914035797119, "learning_rate": 6.295761265304037e-06, "loss": 0.3324, "step": 19281 }, { "epoch": 63.21967213114754, "grad_norm": 5.388189792633057, "learning_rate": 6.294774938839485e-06, "loss": 0.5084, "step": 19282 }, { "epoch": 63.22295081967213, "grad_norm": 4.7378411293029785, "learning_rate": 6.293788654155202e-06, "loss": 0.362, "step": 19283 }, { "epoch": 63.226229508196724, "grad_norm": 4.669742584228516, "learning_rate": 6.292802411262302e-06, "loss": 0.3629, "step": 19284 }, { "epoch": 63.22950819672131, "grad_norm": 18.11126708984375, "learning_rate": 6.291816210171909e-06, "loss": 0.4166, "step": 19285 }, { "epoch": 63.2327868852459, "grad_norm": 4.887505531311035, "learning_rate": 6.290830050895147e-06, "loss": 0.5186, "step": 19286 }, { "epoch": 63.23606557377049, "grad_norm": 10.154803276062012, "learning_rate": 6.2898439334431316e-06, "loss": 0.3666, "step": 19287 }, { "epoch": 63.239344262295084, "grad_norm": 5.097113609313965, "learning_rate": 6.288857857826982e-06, "loss": 0.5236, "step": 19288 }, { "epoch": 63.24262295081967, "grad_norm": 4.616393089294434, "learning_rate": 6.287871824057818e-06, "loss": 0.3771, "step": 19289 }, { "epoch": 63.24590163934426, "grad_norm": 5.664294719696045, "learning_rate": 6.286885832146762e-06, "loss": 0.4394, "step": 19290 }, { "epoch": 63.24918032786885, "grad_norm": 5.334412574768066, "learning_rate": 6.2858998821049275e-06, "loss": 0.3853, "step": 19291 }, { "epoch": 63.252459016393445, "grad_norm": 4.708053112030029, "learning_rate": 6.284913973943431e-06, "loss": 0.4497, "step": 19292 }, { "epoch": 63.25573770491803, "grad_norm": 5.249453544616699, "learning_rate": 6.283928107673395e-06, "loss": 0.7017, "step": 19293 }, { "epoch": 63.25901639344262, "grad_norm": 6.035428524017334, "learning_rate": 6.282942283305934e-06, "loss": 0.4478, "step": 19294 }, { "epoch": 63.26229508196721, "grad_norm": 5.923666000366211, "learning_rate": 6.281956500852163e-06, "loss": 0.3562, "step": 19295 }, { "epoch": 63.265573770491805, "grad_norm": 6.037125110626221, "learning_rate": 6.280970760323199e-06, "loss": 0.6614, "step": 19296 }, { "epoch": 63.268852459016394, "grad_norm": 6.693667411804199, "learning_rate": 6.279985061730152e-06, "loss": 0.5633, "step": 19297 }, { "epoch": 63.27213114754098, "grad_norm": 5.578596591949463, "learning_rate": 6.278999405084145e-06, "loss": 0.5197, "step": 19298 }, { "epoch": 63.27540983606557, "grad_norm": 5.864475727081299, "learning_rate": 6.278013790396289e-06, "loss": 0.4975, "step": 19299 }, { "epoch": 63.278688524590166, "grad_norm": 4.872368812561035, "learning_rate": 6.277028217677698e-06, "loss": 0.4139, "step": 19300 }, { "epoch": 63.281967213114754, "grad_norm": 6.878281116485596, "learning_rate": 6.27604268693948e-06, "loss": 0.4221, "step": 19301 }, { "epoch": 63.28524590163934, "grad_norm": 4.864138603210449, "learning_rate": 6.275057198192757e-06, "loss": 0.3079, "step": 19302 }, { "epoch": 63.28852459016394, "grad_norm": 5.0362467765808105, "learning_rate": 6.274071751448636e-06, "loss": 0.5174, "step": 19303 }, { "epoch": 63.291803278688526, "grad_norm": 4.921227931976318, "learning_rate": 6.273086346718231e-06, "loss": 0.5192, "step": 19304 }, { "epoch": 63.295081967213115, "grad_norm": 8.644425392150879, "learning_rate": 6.272100984012648e-06, "loss": 0.7285, "step": 19305 }, { "epoch": 63.2983606557377, "grad_norm": 5.41129207611084, "learning_rate": 6.271115663343007e-06, "loss": 0.3992, "step": 19306 }, { "epoch": 63.3016393442623, "grad_norm": 5.600493431091309, "learning_rate": 6.270130384720415e-06, "loss": 0.4249, "step": 19307 }, { "epoch": 63.30491803278689, "grad_norm": 6.476739883422852, "learning_rate": 6.269145148155978e-06, "loss": 0.3618, "step": 19308 }, { "epoch": 63.308196721311475, "grad_norm": 6.149216175079346, "learning_rate": 6.268159953660809e-06, "loss": 0.5427, "step": 19309 }, { "epoch": 63.31147540983606, "grad_norm": 4.870804786682129, "learning_rate": 6.267174801246015e-06, "loss": 0.2091, "step": 19310 }, { "epoch": 63.31475409836066, "grad_norm": 4.414190292358398, "learning_rate": 6.26618969092271e-06, "loss": 0.5269, "step": 19311 }, { "epoch": 63.31803278688525, "grad_norm": 5.391222953796387, "learning_rate": 6.2652046227019944e-06, "loss": 0.5488, "step": 19312 }, { "epoch": 63.321311475409836, "grad_norm": 7.6935505867004395, "learning_rate": 6.264219596594985e-06, "loss": 0.45, "step": 19313 }, { "epoch": 63.324590163934424, "grad_norm": 5.405896186828613, "learning_rate": 6.263234612612779e-06, "loss": 0.505, "step": 19314 }, { "epoch": 63.32786885245902, "grad_norm": 7.008880615234375, "learning_rate": 6.262249670766489e-06, "loss": 0.6299, "step": 19315 }, { "epoch": 63.33114754098361, "grad_norm": 13.704119682312012, "learning_rate": 6.261264771067222e-06, "loss": 0.5323, "step": 19316 }, { "epoch": 63.334426229508196, "grad_norm": 6.274504661560059, "learning_rate": 6.260279913526083e-06, "loss": 0.4053, "step": 19317 }, { "epoch": 63.337704918032784, "grad_norm": 8.87714672088623, "learning_rate": 6.259295098154171e-06, "loss": 0.7915, "step": 19318 }, { "epoch": 63.34098360655738, "grad_norm": 6.338038921356201, "learning_rate": 6.258310324962601e-06, "loss": 0.3728, "step": 19319 }, { "epoch": 63.34426229508197, "grad_norm": 7.094759464263916, "learning_rate": 6.257325593962472e-06, "loss": 0.4081, "step": 19320 }, { "epoch": 63.34754098360656, "grad_norm": 5.342562198638916, "learning_rate": 6.256340905164888e-06, "loss": 0.5397, "step": 19321 }, { "epoch": 63.350819672131145, "grad_norm": 8.941252708435059, "learning_rate": 6.2553562585809534e-06, "loss": 0.4282, "step": 19322 }, { "epoch": 63.35409836065574, "grad_norm": 5.026102542877197, "learning_rate": 6.254371654221767e-06, "loss": 0.4767, "step": 19323 }, { "epoch": 63.35737704918033, "grad_norm": 4.20045280456543, "learning_rate": 6.253387092098437e-06, "loss": 0.4347, "step": 19324 }, { "epoch": 63.36065573770492, "grad_norm": 5.983660697937012, "learning_rate": 6.252402572222065e-06, "loss": 0.4839, "step": 19325 }, { "epoch": 63.363934426229505, "grad_norm": 4.870532035827637, "learning_rate": 6.25141809460375e-06, "loss": 0.4485, "step": 19326 }, { "epoch": 63.3672131147541, "grad_norm": 7.247769355773926, "learning_rate": 6.250433659254589e-06, "loss": 0.411, "step": 19327 }, { "epoch": 63.37049180327869, "grad_norm": 5.927321434020996, "learning_rate": 6.2494492661856896e-06, "loss": 0.3699, "step": 19328 }, { "epoch": 63.37377049180328, "grad_norm": 5.303178787231445, "learning_rate": 6.2484649154081524e-06, "loss": 0.319, "step": 19329 }, { "epoch": 63.377049180327866, "grad_norm": 5.880183696746826, "learning_rate": 6.247480606933072e-06, "loss": 0.4528, "step": 19330 }, { "epoch": 63.38032786885246, "grad_norm": 5.53444242477417, "learning_rate": 6.2464963407715455e-06, "loss": 0.2936, "step": 19331 }, { "epoch": 63.38360655737705, "grad_norm": 6.232907772064209, "learning_rate": 6.24551211693468e-06, "loss": 0.487, "step": 19332 }, { "epoch": 63.38688524590164, "grad_norm": 4.759633541107178, "learning_rate": 6.2445279354335685e-06, "loss": 0.5308, "step": 19333 }, { "epoch": 63.390163934426226, "grad_norm": 6.230350494384766, "learning_rate": 6.24354379627931e-06, "loss": 0.3778, "step": 19334 }, { "epoch": 63.39344262295082, "grad_norm": 6.591496467590332, "learning_rate": 6.2425596994829974e-06, "loss": 0.5404, "step": 19335 }, { "epoch": 63.39672131147541, "grad_norm": 4.702265739440918, "learning_rate": 6.2415756450557315e-06, "loss": 0.47, "step": 19336 }, { "epoch": 63.4, "grad_norm": 8.57014274597168, "learning_rate": 6.2405916330086106e-06, "loss": 0.4591, "step": 19337 }, { "epoch": 63.40327868852459, "grad_norm": 4.875648021697998, "learning_rate": 6.239607663352726e-06, "loss": 0.4395, "step": 19338 }, { "epoch": 63.40655737704918, "grad_norm": 5.3529462814331055, "learning_rate": 6.238623736099176e-06, "loss": 0.4558, "step": 19339 }, { "epoch": 63.40983606557377, "grad_norm": 5.958291530609131, "learning_rate": 6.237639851259053e-06, "loss": 0.5414, "step": 19340 }, { "epoch": 63.41311475409836, "grad_norm": 7.466793537139893, "learning_rate": 6.236656008843452e-06, "loss": 0.5365, "step": 19341 }, { "epoch": 63.41639344262295, "grad_norm": 5.595611095428467, "learning_rate": 6.23567220886347e-06, "loss": 0.7453, "step": 19342 }, { "epoch": 63.41967213114754, "grad_norm": 4.82017707824707, "learning_rate": 6.2346884513301975e-06, "loss": 0.3505, "step": 19343 }, { "epoch": 63.42295081967213, "grad_norm": 4.270644187927246, "learning_rate": 6.233704736254724e-06, "loss": 0.4093, "step": 19344 }, { "epoch": 63.42622950819672, "grad_norm": 5.10321044921875, "learning_rate": 6.232721063648148e-06, "loss": 0.2265, "step": 19345 }, { "epoch": 63.429508196721315, "grad_norm": 4.95827579498291, "learning_rate": 6.231737433521561e-06, "loss": 0.2883, "step": 19346 }, { "epoch": 63.4327868852459, "grad_norm": 8.660544395446777, "learning_rate": 6.230753845886051e-06, "loss": 0.4456, "step": 19347 }, { "epoch": 63.43606557377049, "grad_norm": 5.763199329376221, "learning_rate": 6.229770300752711e-06, "loss": 0.6991, "step": 19348 }, { "epoch": 63.43934426229508, "grad_norm": 4.641099452972412, "learning_rate": 6.228786798132627e-06, "loss": 0.5579, "step": 19349 }, { "epoch": 63.442622950819676, "grad_norm": 6.7942423820495605, "learning_rate": 6.2278033380368975e-06, "loss": 0.4203, "step": 19350 }, { "epoch": 63.445901639344264, "grad_norm": 5.812424182891846, "learning_rate": 6.2268199204766065e-06, "loss": 0.4027, "step": 19351 }, { "epoch": 63.44918032786885, "grad_norm": 7.983433723449707, "learning_rate": 6.225836545462845e-06, "loss": 0.5048, "step": 19352 }, { "epoch": 63.45245901639344, "grad_norm": 5.664939880371094, "learning_rate": 6.224853213006695e-06, "loss": 0.7242, "step": 19353 }, { "epoch": 63.455737704918036, "grad_norm": 5.760021686553955, "learning_rate": 6.2238699231192556e-06, "loss": 0.473, "step": 19354 }, { "epoch": 63.459016393442624, "grad_norm": 4.977642059326172, "learning_rate": 6.222886675811608e-06, "loss": 0.499, "step": 19355 }, { "epoch": 63.46229508196721, "grad_norm": 6.682328224182129, "learning_rate": 6.2219034710948415e-06, "loss": 0.4435, "step": 19356 }, { "epoch": 63.4655737704918, "grad_norm": 4.594069957733154, "learning_rate": 6.220920308980036e-06, "loss": 0.5907, "step": 19357 }, { "epoch": 63.4688524590164, "grad_norm": 5.11760950088501, "learning_rate": 6.219937189478289e-06, "loss": 0.3488, "step": 19358 }, { "epoch": 63.472131147540985, "grad_norm": 5.72029447555542, "learning_rate": 6.218954112600679e-06, "loss": 0.5647, "step": 19359 }, { "epoch": 63.47540983606557, "grad_norm": 6.216772079467773, "learning_rate": 6.217971078358294e-06, "loss": 0.4241, "step": 19360 }, { "epoch": 63.47868852459016, "grad_norm": 5.198127746582031, "learning_rate": 6.216988086762215e-06, "loss": 0.469, "step": 19361 }, { "epoch": 63.48196721311476, "grad_norm": 4.167154312133789, "learning_rate": 6.216005137823528e-06, "loss": 0.5502, "step": 19362 }, { "epoch": 63.485245901639345, "grad_norm": 5.673473358154297, "learning_rate": 6.2150222315533205e-06, "loss": 0.5, "step": 19363 }, { "epoch": 63.488524590163934, "grad_norm": 4.3898396492004395, "learning_rate": 6.21403936796267e-06, "loss": 0.4255, "step": 19364 }, { "epoch": 63.49180327868852, "grad_norm": 5.4754157066345215, "learning_rate": 6.213056547062663e-06, "loss": 0.5257, "step": 19365 }, { "epoch": 63.49508196721312, "grad_norm": 5.512045383453369, "learning_rate": 6.212073768864382e-06, "loss": 0.39, "step": 19366 }, { "epoch": 63.498360655737706, "grad_norm": 11.505411148071289, "learning_rate": 6.211091033378905e-06, "loss": 0.6091, "step": 19367 }, { "epoch": 63.501639344262294, "grad_norm": 4.267329216003418, "learning_rate": 6.210108340617319e-06, "loss": 0.4968, "step": 19368 }, { "epoch": 63.50491803278688, "grad_norm": 5.400529861450195, "learning_rate": 6.2091256905907015e-06, "loss": 0.6474, "step": 19369 }, { "epoch": 63.50819672131148, "grad_norm": 4.463533878326416, "learning_rate": 6.208143083310132e-06, "loss": 0.4674, "step": 19370 }, { "epoch": 63.511475409836066, "grad_norm": 8.804211616516113, "learning_rate": 6.207160518786691e-06, "loss": 0.4952, "step": 19371 }, { "epoch": 63.514754098360655, "grad_norm": 6.318220615386963, "learning_rate": 6.206177997031462e-06, "loss": 0.5134, "step": 19372 }, { "epoch": 63.51803278688524, "grad_norm": 4.73621129989624, "learning_rate": 6.205195518055519e-06, "loss": 0.5466, "step": 19373 }, { "epoch": 63.52131147540984, "grad_norm": 5.667174816131592, "learning_rate": 6.204213081869943e-06, "loss": 0.5511, "step": 19374 }, { "epoch": 63.52459016393443, "grad_norm": 9.597541809082031, "learning_rate": 6.203230688485807e-06, "loss": 0.4114, "step": 19375 }, { "epoch": 63.527868852459015, "grad_norm": 7.655783176422119, "learning_rate": 6.202248337914196e-06, "loss": 0.3884, "step": 19376 }, { "epoch": 63.5311475409836, "grad_norm": 6.586922645568848, "learning_rate": 6.201266030166185e-06, "loss": 0.4652, "step": 19377 }, { "epoch": 63.5344262295082, "grad_norm": 5.346607208251953, "learning_rate": 6.2002837652528495e-06, "loss": 0.3121, "step": 19378 }, { "epoch": 63.53770491803279, "grad_norm": 4.5646138191223145, "learning_rate": 6.19930154318526e-06, "loss": 0.4227, "step": 19379 }, { "epoch": 63.540983606557376, "grad_norm": 7.544699192047119, "learning_rate": 6.198319363974501e-06, "loss": 0.5752, "step": 19380 }, { "epoch": 63.544262295081964, "grad_norm": 16.32611846923828, "learning_rate": 6.197337227631646e-06, "loss": 0.3295, "step": 19381 }, { "epoch": 63.54754098360656, "grad_norm": 4.6178879737854, "learning_rate": 6.196355134167766e-06, "loss": 0.4876, "step": 19382 }, { "epoch": 63.55081967213115, "grad_norm": 5.199400901794434, "learning_rate": 6.195373083593932e-06, "loss": 0.3465, "step": 19383 }, { "epoch": 63.554098360655736, "grad_norm": 7.46492862701416, "learning_rate": 6.1943910759212265e-06, "loss": 0.6058, "step": 19384 }, { "epoch": 63.557377049180324, "grad_norm": 9.430770874023438, "learning_rate": 6.19340911116072e-06, "loss": 0.248, "step": 19385 }, { "epoch": 63.56065573770492, "grad_norm": 4.844975471496582, "learning_rate": 6.192427189323482e-06, "loss": 0.4537, "step": 19386 }, { "epoch": 63.56393442622951, "grad_norm": 5.9720892906188965, "learning_rate": 6.191445310420585e-06, "loss": 0.365, "step": 19387 }, { "epoch": 63.5672131147541, "grad_norm": 6.127199172973633, "learning_rate": 6.190463474463102e-06, "loss": 0.3522, "step": 19388 }, { "epoch": 63.570491803278685, "grad_norm": 4.974292278289795, "learning_rate": 6.189481681462106e-06, "loss": 0.4002, "step": 19389 }, { "epoch": 63.57377049180328, "grad_norm": 5.2689619064331055, "learning_rate": 6.188499931428666e-06, "loss": 0.5067, "step": 19390 }, { "epoch": 63.57704918032787, "grad_norm": 4.764227390289307, "learning_rate": 6.187518224373852e-06, "loss": 0.4412, "step": 19391 }, { "epoch": 63.58032786885246, "grad_norm": 4.636425971984863, "learning_rate": 6.186536560308732e-06, "loss": 0.5341, "step": 19392 }, { "epoch": 63.58360655737705, "grad_norm": 4.647479057312012, "learning_rate": 6.185554939244381e-06, "loss": 0.2344, "step": 19393 }, { "epoch": 63.58688524590164, "grad_norm": 6.5964202880859375, "learning_rate": 6.18457336119186e-06, "loss": 0.5512, "step": 19394 }, { "epoch": 63.59016393442623, "grad_norm": 6.95267391204834, "learning_rate": 6.183591826162245e-06, "loss": 0.5902, "step": 19395 }, { "epoch": 63.59344262295082, "grad_norm": 6.457531452178955, "learning_rate": 6.1826103341665965e-06, "loss": 0.5672, "step": 19396 }, { "epoch": 63.59672131147541, "grad_norm": 10.23141098022461, "learning_rate": 6.1816288852159865e-06, "loss": 0.4051, "step": 19397 }, { "epoch": 63.6, "grad_norm": 4.4483137130737305, "learning_rate": 6.180647479321484e-06, "loss": 0.5991, "step": 19398 }, { "epoch": 63.60327868852459, "grad_norm": 5.244459629058838, "learning_rate": 6.179666116494152e-06, "loss": 0.2215, "step": 19399 }, { "epoch": 63.60655737704918, "grad_norm": 5.8572211265563965, "learning_rate": 6.178684796745056e-06, "loss": 0.5545, "step": 19400 }, { "epoch": 63.609836065573774, "grad_norm": 4.574770927429199, "learning_rate": 6.177703520085257e-06, "loss": 0.6406, "step": 19401 }, { "epoch": 63.61311475409836, "grad_norm": 7.336895942687988, "learning_rate": 6.176722286525831e-06, "loss": 0.3788, "step": 19402 }, { "epoch": 63.61639344262295, "grad_norm": 6.0685601234436035, "learning_rate": 6.1757410960778356e-06, "loss": 0.5029, "step": 19403 }, { "epoch": 63.61967213114754, "grad_norm": 6.916145324707031, "learning_rate": 6.174759948752336e-06, "loss": 0.4017, "step": 19404 }, { "epoch": 63.622950819672134, "grad_norm": 7.018264293670654, "learning_rate": 6.173778844560389e-06, "loss": 0.4724, "step": 19405 }, { "epoch": 63.62622950819672, "grad_norm": 5.2485504150390625, "learning_rate": 6.17279778351307e-06, "loss": 0.6131, "step": 19406 }, { "epoch": 63.62950819672131, "grad_norm": 4.999108791351318, "learning_rate": 6.171816765621435e-06, "loss": 0.3305, "step": 19407 }, { "epoch": 63.6327868852459, "grad_norm": 7.870413303375244, "learning_rate": 6.170835790896547e-06, "loss": 0.3418, "step": 19408 }, { "epoch": 63.636065573770495, "grad_norm": 4.986876010894775, "learning_rate": 6.169854859349461e-06, "loss": 0.5396, "step": 19409 }, { "epoch": 63.63934426229508, "grad_norm": 6.159591197967529, "learning_rate": 6.168873970991249e-06, "loss": 0.4921, "step": 19410 }, { "epoch": 63.64262295081967, "grad_norm": 7.000304698944092, "learning_rate": 6.1678931258329665e-06, "loss": 0.4892, "step": 19411 }, { "epoch": 63.64590163934426, "grad_norm": 4.452494144439697, "learning_rate": 6.166912323885672e-06, "loss": 0.4312, "step": 19412 }, { "epoch": 63.649180327868855, "grad_norm": 6.185589790344238, "learning_rate": 6.165931565160428e-06, "loss": 0.4412, "step": 19413 }, { "epoch": 63.65245901639344, "grad_norm": 5.634472370147705, "learning_rate": 6.164950849668287e-06, "loss": 0.4811, "step": 19414 }, { "epoch": 63.65573770491803, "grad_norm": 5.288305282592773, "learning_rate": 6.1639701774203176e-06, "loss": 0.5475, "step": 19415 }, { "epoch": 63.65901639344262, "grad_norm": 8.123947143554688, "learning_rate": 6.162989548427573e-06, "loss": 0.353, "step": 19416 }, { "epoch": 63.662295081967216, "grad_norm": 27.146896362304688, "learning_rate": 6.162008962701107e-06, "loss": 0.5303, "step": 19417 }, { "epoch": 63.665573770491804, "grad_norm": 21.111909866333008, "learning_rate": 6.161028420251982e-06, "loss": 0.4505, "step": 19418 }, { "epoch": 63.66885245901639, "grad_norm": 5.247369289398193, "learning_rate": 6.160047921091254e-06, "loss": 0.4598, "step": 19419 }, { "epoch": 63.67213114754098, "grad_norm": 5.096258163452148, "learning_rate": 6.159067465229977e-06, "loss": 0.3834, "step": 19420 }, { "epoch": 63.675409836065576, "grad_norm": 6.6137189865112305, "learning_rate": 6.158087052679212e-06, "loss": 0.5607, "step": 19421 }, { "epoch": 63.678688524590164, "grad_norm": 4.899382591247559, "learning_rate": 6.1571066834500046e-06, "loss": 0.5529, "step": 19422 }, { "epoch": 63.68196721311475, "grad_norm": 5.385373115539551, "learning_rate": 6.156126357553416e-06, "loss": 0.3044, "step": 19423 }, { "epoch": 63.68524590163934, "grad_norm": 5.699224948883057, "learning_rate": 6.155146075000502e-06, "loss": 0.6396, "step": 19424 }, { "epoch": 63.68852459016394, "grad_norm": 4.673335552215576, "learning_rate": 6.154165835802312e-06, "loss": 0.5171, "step": 19425 }, { "epoch": 63.691803278688525, "grad_norm": 7.349233150482178, "learning_rate": 6.1531856399699035e-06, "loss": 0.547, "step": 19426 }, { "epoch": 63.69508196721311, "grad_norm": 5.358745574951172, "learning_rate": 6.152205487514321e-06, "loss": 0.6985, "step": 19427 }, { "epoch": 63.6983606557377, "grad_norm": 4.999501705169678, "learning_rate": 6.151225378446626e-06, "loss": 0.6297, "step": 19428 }, { "epoch": 63.7016393442623, "grad_norm": 5.0640435218811035, "learning_rate": 6.150245312777867e-06, "loss": 0.5087, "step": 19429 }, { "epoch": 63.704918032786885, "grad_norm": 4.1518754959106445, "learning_rate": 6.1492652905190955e-06, "loss": 0.255, "step": 19430 }, { "epoch": 63.708196721311474, "grad_norm": 13.77982234954834, "learning_rate": 6.1482853116813565e-06, "loss": 0.3282, "step": 19431 }, { "epoch": 63.71147540983607, "grad_norm": 5.076145172119141, "learning_rate": 6.1473053762757105e-06, "loss": 0.4502, "step": 19432 }, { "epoch": 63.71475409836066, "grad_norm": 5.254865646362305, "learning_rate": 6.146325484313203e-06, "loss": 0.4323, "step": 19433 }, { "epoch": 63.718032786885246, "grad_norm": 38.160133361816406, "learning_rate": 6.14534563580488e-06, "loss": 0.4453, "step": 19434 }, { "epoch": 63.721311475409834, "grad_norm": 8.151965141296387, "learning_rate": 6.144365830761789e-06, "loss": 0.3881, "step": 19435 }, { "epoch": 63.72459016393443, "grad_norm": 6.945479869842529, "learning_rate": 6.1433860691949875e-06, "loss": 0.4869, "step": 19436 }, { "epoch": 63.72786885245902, "grad_norm": 5.452750205993652, "learning_rate": 6.142406351115517e-06, "loss": 0.4855, "step": 19437 }, { "epoch": 63.731147540983606, "grad_norm": 6.097863674163818, "learning_rate": 6.141426676534428e-06, "loss": 0.4215, "step": 19438 }, { "epoch": 63.734426229508195, "grad_norm": 6.0167975425720215, "learning_rate": 6.140447045462763e-06, "loss": 0.4846, "step": 19439 }, { "epoch": 63.73770491803279, "grad_norm": 5.002938747406006, "learning_rate": 6.139467457911566e-06, "loss": 0.5347, "step": 19440 }, { "epoch": 63.74098360655738, "grad_norm": 5.358938217163086, "learning_rate": 6.138487913891893e-06, "loss": 0.5166, "step": 19441 }, { "epoch": 63.74426229508197, "grad_norm": 7.1054534912109375, "learning_rate": 6.137508413414784e-06, "loss": 0.3968, "step": 19442 }, { "epoch": 63.747540983606555, "grad_norm": 4.718687534332275, "learning_rate": 6.136528956491279e-06, "loss": 0.3072, "step": 19443 }, { "epoch": 63.75081967213115, "grad_norm": 5.690255165100098, "learning_rate": 6.135549543132428e-06, "loss": 0.5923, "step": 19444 }, { "epoch": 63.75409836065574, "grad_norm": 4.546443939208984, "learning_rate": 6.134570173349277e-06, "loss": 0.3583, "step": 19445 }, { "epoch": 63.75737704918033, "grad_norm": 6.319768905639648, "learning_rate": 6.1335908471528636e-06, "loss": 0.6432, "step": 19446 }, { "epoch": 63.760655737704916, "grad_norm": 5.2255778312683105, "learning_rate": 6.132611564554236e-06, "loss": 0.4422, "step": 19447 }, { "epoch": 63.76393442622951, "grad_norm": 7.465662002563477, "learning_rate": 6.1316323255644316e-06, "loss": 0.2653, "step": 19448 }, { "epoch": 63.7672131147541, "grad_norm": 6.432897090911865, "learning_rate": 6.130653130194496e-06, "loss": 0.4429, "step": 19449 }, { "epoch": 63.77049180327869, "grad_norm": 6.711737632751465, "learning_rate": 6.12967397845547e-06, "loss": 0.3475, "step": 19450 }, { "epoch": 63.773770491803276, "grad_norm": 5.2005438804626465, "learning_rate": 6.1286948703583946e-06, "loss": 0.3189, "step": 19451 }, { "epoch": 63.77704918032787, "grad_norm": 9.227823257446289, "learning_rate": 6.12771580591431e-06, "loss": 0.4727, "step": 19452 }, { "epoch": 63.78032786885246, "grad_norm": 5.512507438659668, "learning_rate": 6.1267367851342506e-06, "loss": 0.5045, "step": 19453 }, { "epoch": 63.78360655737705, "grad_norm": 5.818576335906982, "learning_rate": 6.125757808029267e-06, "loss": 0.5191, "step": 19454 }, { "epoch": 63.78688524590164, "grad_norm": 11.891851425170898, "learning_rate": 6.124778874610391e-06, "loss": 0.4619, "step": 19455 }, { "epoch": 63.79016393442623, "grad_norm": 4.707231044769287, "learning_rate": 6.1237999848886634e-06, "loss": 0.2232, "step": 19456 }, { "epoch": 63.79344262295082, "grad_norm": 5.144631385803223, "learning_rate": 6.122821138875117e-06, "loss": 0.2789, "step": 19457 }, { "epoch": 63.79672131147541, "grad_norm": 5.574187755584717, "learning_rate": 6.121842336580797e-06, "loss": 0.7808, "step": 19458 }, { "epoch": 63.8, "grad_norm": 4.060417175292969, "learning_rate": 6.120863578016736e-06, "loss": 0.4596, "step": 19459 }, { "epoch": 63.80327868852459, "grad_norm": 6.241719722747803, "learning_rate": 6.1198848631939725e-06, "loss": 0.5835, "step": 19460 }, { "epoch": 63.80655737704918, "grad_norm": 6.318624496459961, "learning_rate": 6.118906192123537e-06, "loss": 0.4633, "step": 19461 }, { "epoch": 63.80983606557377, "grad_norm": 5.367182731628418, "learning_rate": 6.1179275648164745e-06, "loss": 0.4599, "step": 19462 }, { "epoch": 63.81311475409836, "grad_norm": 6.528014659881592, "learning_rate": 6.116948981283815e-06, "loss": 0.4108, "step": 19463 }, { "epoch": 63.81639344262295, "grad_norm": 5.223431587219238, "learning_rate": 6.115970441536591e-06, "loss": 0.484, "step": 19464 }, { "epoch": 63.81967213114754, "grad_norm": 5.256823539733887, "learning_rate": 6.114991945585842e-06, "loss": 0.2351, "step": 19465 }, { "epoch": 63.82295081967213, "grad_norm": 4.580591201782227, "learning_rate": 6.1140134934425925e-06, "loss": 0.4281, "step": 19466 }, { "epoch": 63.82622950819672, "grad_norm": 5.256187438964844, "learning_rate": 6.113035085117884e-06, "loss": 0.4648, "step": 19467 }, { "epoch": 63.829508196721314, "grad_norm": 5.074652671813965, "learning_rate": 6.112056720622748e-06, "loss": 0.6301, "step": 19468 }, { "epoch": 63.8327868852459, "grad_norm": 5.679352760314941, "learning_rate": 6.111078399968212e-06, "loss": 0.6592, "step": 19469 }, { "epoch": 63.83606557377049, "grad_norm": 5.507526397705078, "learning_rate": 6.110100123165311e-06, "loss": 0.5266, "step": 19470 }, { "epoch": 63.83934426229508, "grad_norm": 5.1635236740112305, "learning_rate": 6.109121890225078e-06, "loss": 0.2052, "step": 19471 }, { "epoch": 63.842622950819674, "grad_norm": 5.138909339904785, "learning_rate": 6.108143701158541e-06, "loss": 0.3362, "step": 19472 }, { "epoch": 63.84590163934426, "grad_norm": 5.669534683227539, "learning_rate": 6.107165555976728e-06, "loss": 0.5009, "step": 19473 }, { "epoch": 63.84918032786885, "grad_norm": 10.778793334960938, "learning_rate": 6.10618745469067e-06, "loss": 0.3562, "step": 19474 }, { "epoch": 63.85245901639344, "grad_norm": 5.972937107086182, "learning_rate": 6.1052093973114e-06, "loss": 0.5225, "step": 19475 }, { "epoch": 63.855737704918035, "grad_norm": 5.5437188148498535, "learning_rate": 6.1042313838499425e-06, "loss": 0.2082, "step": 19476 }, { "epoch": 63.85901639344262, "grad_norm": 5.203896522521973, "learning_rate": 6.103253414317328e-06, "loss": 0.2577, "step": 19477 }, { "epoch": 63.86229508196721, "grad_norm": 4.779775619506836, "learning_rate": 6.1022754887245825e-06, "loss": 0.5344, "step": 19478 }, { "epoch": 63.86557377049181, "grad_norm": 6.905849456787109, "learning_rate": 6.101297607082729e-06, "loss": 0.3109, "step": 19479 }, { "epoch": 63.868852459016395, "grad_norm": 5.852219104766846, "learning_rate": 6.100319769402803e-06, "loss": 0.3502, "step": 19480 }, { "epoch": 63.87213114754098, "grad_norm": 6.325062274932861, "learning_rate": 6.099341975695827e-06, "loss": 0.457, "step": 19481 }, { "epoch": 63.87540983606557, "grad_norm": 5.35161828994751, "learning_rate": 6.098364225972826e-06, "loss": 0.3069, "step": 19482 }, { "epoch": 63.87868852459017, "grad_norm": 7.481870174407959, "learning_rate": 6.097386520244819e-06, "loss": 0.6325, "step": 19483 }, { "epoch": 63.881967213114756, "grad_norm": 16.583951950073242, "learning_rate": 6.0964088585228424e-06, "loss": 0.4431, "step": 19484 }, { "epoch": 63.885245901639344, "grad_norm": 5.833547115325928, "learning_rate": 6.095431240817915e-06, "loss": 0.3341, "step": 19485 }, { "epoch": 63.88852459016393, "grad_norm": 5.187015056610107, "learning_rate": 6.094453667141059e-06, "loss": 0.3987, "step": 19486 }, { "epoch": 63.89180327868853, "grad_norm": 6.053070068359375, "learning_rate": 6.0934761375032955e-06, "loss": 0.369, "step": 19487 }, { "epoch": 63.895081967213116, "grad_norm": 8.280373573303223, "learning_rate": 6.092498651915654e-06, "loss": 0.6452, "step": 19488 }, { "epoch": 63.898360655737704, "grad_norm": 4.806391716003418, "learning_rate": 6.091521210389153e-06, "loss": 0.4669, "step": 19489 }, { "epoch": 63.90163934426229, "grad_norm": 6.119194030761719, "learning_rate": 6.090543812934812e-06, "loss": 0.4273, "step": 19490 }, { "epoch": 63.90491803278689, "grad_norm": 5.835601806640625, "learning_rate": 6.089566459563659e-06, "loss": 0.6465, "step": 19491 }, { "epoch": 63.90819672131148, "grad_norm": 5.618711471557617, "learning_rate": 6.088589150286702e-06, "loss": 0.3781, "step": 19492 }, { "epoch": 63.911475409836065, "grad_norm": 5.824202537536621, "learning_rate": 6.087611885114974e-06, "loss": 0.3932, "step": 19493 }, { "epoch": 63.91475409836065, "grad_norm": 6.8491034507751465, "learning_rate": 6.086634664059491e-06, "loss": 0.4547, "step": 19494 }, { "epoch": 63.91803278688525, "grad_norm": 5.064292907714844, "learning_rate": 6.085657487131271e-06, "loss": 0.4705, "step": 19495 }, { "epoch": 63.92131147540984, "grad_norm": 5.863269805908203, "learning_rate": 6.084680354341328e-06, "loss": 0.4782, "step": 19496 }, { "epoch": 63.924590163934425, "grad_norm": 6.490723133087158, "learning_rate": 6.083703265700689e-06, "loss": 0.3241, "step": 19497 }, { "epoch": 63.927868852459014, "grad_norm": 7.115593910217285, "learning_rate": 6.082726221220368e-06, "loss": 0.5871, "step": 19498 }, { "epoch": 63.93114754098361, "grad_norm": 7.861824989318848, "learning_rate": 6.0817492209113795e-06, "loss": 0.5207, "step": 19499 }, { "epoch": 63.9344262295082, "grad_norm": 9.424386978149414, "learning_rate": 6.080772264784743e-06, "loss": 0.5865, "step": 19500 }, { "epoch": 63.937704918032786, "grad_norm": 5.6909074783325195, "learning_rate": 6.079795352851474e-06, "loss": 0.4503, "step": 19501 }, { "epoch": 63.940983606557374, "grad_norm": 6.52217435836792, "learning_rate": 6.078818485122589e-06, "loss": 0.4556, "step": 19502 }, { "epoch": 63.94426229508197, "grad_norm": 8.789372444152832, "learning_rate": 6.077841661609103e-06, "loss": 0.3879, "step": 19503 }, { "epoch": 63.94754098360656, "grad_norm": 5.801815986633301, "learning_rate": 6.0768648823220314e-06, "loss": 0.3564, "step": 19504 }, { "epoch": 63.950819672131146, "grad_norm": 4.436236381530762, "learning_rate": 6.075888147272382e-06, "loss": 0.4197, "step": 19505 }, { "epoch": 63.954098360655735, "grad_norm": 8.916581153869629, "learning_rate": 6.07491145647118e-06, "loss": 0.643, "step": 19506 }, { "epoch": 63.95737704918033, "grad_norm": 5.944423675537109, "learning_rate": 6.07393480992943e-06, "loss": 0.4832, "step": 19507 }, { "epoch": 63.96065573770492, "grad_norm": 5.8529438972473145, "learning_rate": 6.072958207658149e-06, "loss": 0.3337, "step": 19508 }, { "epoch": 63.96393442622951, "grad_norm": 6.103756427764893, "learning_rate": 6.0719816496683415e-06, "loss": 0.3347, "step": 19509 }, { "epoch": 63.967213114754095, "grad_norm": 6.251338481903076, "learning_rate": 6.07100513597103e-06, "loss": 0.6046, "step": 19510 }, { "epoch": 63.97049180327869, "grad_norm": 4.1463093757629395, "learning_rate": 6.0700286665772215e-06, "loss": 0.2742, "step": 19511 }, { "epoch": 63.97377049180328, "grad_norm": 6.6977081298828125, "learning_rate": 6.069052241497924e-06, "loss": 0.6315, "step": 19512 }, { "epoch": 63.97704918032787, "grad_norm": 5.339644908905029, "learning_rate": 6.068075860744147e-06, "loss": 0.4961, "step": 19513 }, { "epoch": 63.980327868852456, "grad_norm": 5.485395908355713, "learning_rate": 6.067099524326907e-06, "loss": 0.4136, "step": 19514 }, { "epoch": 63.98360655737705, "grad_norm": 8.92495346069336, "learning_rate": 6.066123232257208e-06, "loss": 0.5087, "step": 19515 }, { "epoch": 63.98688524590164, "grad_norm": 4.62190580368042, "learning_rate": 6.06514698454606e-06, "loss": 0.446, "step": 19516 }, { "epoch": 63.99016393442623, "grad_norm": 4.93229866027832, "learning_rate": 6.064170781204472e-06, "loss": 0.3875, "step": 19517 }, { "epoch": 63.993442622950816, "grad_norm": 7.273016452789307, "learning_rate": 6.063194622243446e-06, "loss": 0.4671, "step": 19518 }, { "epoch": 63.99672131147541, "grad_norm": 10.008614540100098, "learning_rate": 6.062218507673998e-06, "loss": 0.4433, "step": 19519 }, { "epoch": 64.0, "grad_norm": 5.323795318603516, "learning_rate": 6.061242437507131e-06, "loss": 0.5894, "step": 19520 }, { "epoch": 64.00327868852459, "grad_norm": 6.466884136199951, "learning_rate": 6.0602664117538505e-06, "loss": 0.3368, "step": 19521 }, { "epoch": 64.00655737704918, "grad_norm": 5.202090740203857, "learning_rate": 6.059290430425158e-06, "loss": 0.3472, "step": 19522 }, { "epoch": 64.00983606557377, "grad_norm": 5.571657657623291, "learning_rate": 6.058314493532067e-06, "loss": 0.4931, "step": 19523 }, { "epoch": 64.01311475409837, "grad_norm": 5.9139790534973145, "learning_rate": 6.0573386010855784e-06, "loss": 0.3976, "step": 19524 }, { "epoch": 64.01639344262296, "grad_norm": 6.3126702308654785, "learning_rate": 6.056362753096695e-06, "loss": 0.5112, "step": 19525 }, { "epoch": 64.01967213114754, "grad_norm": 9.674773216247559, "learning_rate": 6.0553869495764225e-06, "loss": 0.3702, "step": 19526 }, { "epoch": 64.02295081967213, "grad_norm": 5.138742446899414, "learning_rate": 6.054411190535765e-06, "loss": 0.4475, "step": 19527 }, { "epoch": 64.02622950819672, "grad_norm": 5.9776716232299805, "learning_rate": 6.053435475985721e-06, "loss": 0.3998, "step": 19528 }, { "epoch": 64.02950819672131, "grad_norm": 5.490751266479492, "learning_rate": 6.0524598059372986e-06, "loss": 0.536, "step": 19529 }, { "epoch": 64.0327868852459, "grad_norm": 11.151274681091309, "learning_rate": 6.051484180401495e-06, "loss": 0.362, "step": 19530 }, { "epoch": 64.03606557377049, "grad_norm": 4.917835712432861, "learning_rate": 6.050508599389311e-06, "loss": 0.5296, "step": 19531 }, { "epoch": 64.03934426229509, "grad_norm": 7.165524959564209, "learning_rate": 6.04953306291175e-06, "loss": 0.5244, "step": 19532 }, { "epoch": 64.04262295081968, "grad_norm": 15.96894359588623, "learning_rate": 6.048557570979811e-06, "loss": 0.6284, "step": 19533 }, { "epoch": 64.04590163934427, "grad_norm": 6.158575057983398, "learning_rate": 6.047582123604496e-06, "loss": 0.7177, "step": 19534 }, { "epoch": 64.04918032786885, "grad_norm": 7.5828857421875, "learning_rate": 6.0466067207967985e-06, "loss": 0.32, "step": 19535 }, { "epoch": 64.05245901639344, "grad_norm": 5.478557586669922, "learning_rate": 6.045631362567723e-06, "loss": 0.3021, "step": 19536 }, { "epoch": 64.05573770491803, "grad_norm": 5.963372707366943, "learning_rate": 6.044656048928266e-06, "loss": 0.5165, "step": 19537 }, { "epoch": 64.05901639344262, "grad_norm": 6.394724369049072, "learning_rate": 6.0436807798894245e-06, "loss": 0.5193, "step": 19538 }, { "epoch": 64.0622950819672, "grad_norm": 5.376319885253906, "learning_rate": 6.042705555462192e-06, "loss": 0.6182, "step": 19539 }, { "epoch": 64.06557377049181, "grad_norm": 4.530162334442139, "learning_rate": 6.041730375657573e-06, "loss": 0.3308, "step": 19540 }, { "epoch": 64.0688524590164, "grad_norm": 5.106815338134766, "learning_rate": 6.0407552404865595e-06, "loss": 0.431, "step": 19541 }, { "epoch": 64.07213114754099, "grad_norm": 4.829690456390381, "learning_rate": 6.0397801499601475e-06, "loss": 0.4985, "step": 19542 }, { "epoch": 64.07540983606557, "grad_norm": 4.551390171051025, "learning_rate": 6.038805104089331e-06, "loss": 0.4291, "step": 19543 }, { "epoch": 64.07868852459016, "grad_norm": 7.97532320022583, "learning_rate": 6.037830102885103e-06, "loss": 0.4821, "step": 19544 }, { "epoch": 64.08196721311475, "grad_norm": 7.985882759094238, "learning_rate": 6.036855146358462e-06, "loss": 0.5302, "step": 19545 }, { "epoch": 64.08524590163934, "grad_norm": 7.8140692710876465, "learning_rate": 6.035880234520401e-06, "loss": 0.485, "step": 19546 }, { "epoch": 64.08852459016393, "grad_norm": 4.5938239097595215, "learning_rate": 6.034905367381914e-06, "loss": 0.6931, "step": 19547 }, { "epoch": 64.09180327868853, "grad_norm": 5.7626543045043945, "learning_rate": 6.033930544953985e-06, "loss": 0.3132, "step": 19548 }, { "epoch": 64.09508196721312, "grad_norm": 5.855908393859863, "learning_rate": 6.032955767247617e-06, "loss": 0.3045, "step": 19549 }, { "epoch": 64.09836065573771, "grad_norm": 7.708423614501953, "learning_rate": 6.031981034273798e-06, "loss": 0.36, "step": 19550 }, { "epoch": 64.1016393442623, "grad_norm": 5.986908435821533, "learning_rate": 6.0310063460435175e-06, "loss": 0.3457, "step": 19551 }, { "epoch": 64.10491803278688, "grad_norm": 5.435356140136719, "learning_rate": 6.030031702567766e-06, "loss": 0.429, "step": 19552 }, { "epoch": 64.10819672131147, "grad_norm": 6.656430721282959, "learning_rate": 6.0290571038575365e-06, "loss": 0.6184, "step": 19553 }, { "epoch": 64.11147540983606, "grad_norm": 5.247680187225342, "learning_rate": 6.028082549923817e-06, "loss": 0.3421, "step": 19554 }, { "epoch": 64.11475409836065, "grad_norm": 5.349966526031494, "learning_rate": 6.027108040777596e-06, "loss": 0.6284, "step": 19555 }, { "epoch": 64.11803278688525, "grad_norm": 5.288865089416504, "learning_rate": 6.026133576429862e-06, "loss": 0.3706, "step": 19556 }, { "epoch": 64.12131147540984, "grad_norm": 5.756563663482666, "learning_rate": 6.025159156891605e-06, "loss": 0.5632, "step": 19557 }, { "epoch": 64.12459016393443, "grad_norm": 9.518729209899902, "learning_rate": 6.0241847821738085e-06, "loss": 0.3805, "step": 19558 }, { "epoch": 64.12786885245902, "grad_norm": 5.482675552368164, "learning_rate": 6.023210452287466e-06, "loss": 0.2691, "step": 19559 }, { "epoch": 64.1311475409836, "grad_norm": 6.59611701965332, "learning_rate": 6.02223616724356e-06, "loss": 0.4301, "step": 19560 }, { "epoch": 64.1344262295082, "grad_norm": 9.364850044250488, "learning_rate": 6.0212619270530725e-06, "loss": 0.6251, "step": 19561 }, { "epoch": 64.13770491803278, "grad_norm": 4.741506576538086, "learning_rate": 6.0202877317269985e-06, "loss": 0.422, "step": 19562 }, { "epoch": 64.14098360655737, "grad_norm": 6.035020351409912, "learning_rate": 6.019313581276318e-06, "loss": 0.6178, "step": 19563 }, { "epoch": 64.14426229508197, "grad_norm": 4.896277904510498, "learning_rate": 6.018339475712015e-06, "loss": 0.5652, "step": 19564 }, { "epoch": 64.14754098360656, "grad_norm": 6.281496524810791, "learning_rate": 6.017365415045071e-06, "loss": 0.3387, "step": 19565 }, { "epoch": 64.15081967213115, "grad_norm": 7.247679233551025, "learning_rate": 6.016391399286476e-06, "loss": 0.3871, "step": 19566 }, { "epoch": 64.15409836065574, "grad_norm": 6.156870365142822, "learning_rate": 6.0154174284472115e-06, "loss": 0.6047, "step": 19567 }, { "epoch": 64.15737704918033, "grad_norm": 6.312503337860107, "learning_rate": 6.014443502538256e-06, "loss": 0.7768, "step": 19568 }, { "epoch": 64.16065573770491, "grad_norm": 5.706517696380615, "learning_rate": 6.013469621570597e-06, "loss": 0.2871, "step": 19569 }, { "epoch": 64.1639344262295, "grad_norm": 6.224005699157715, "learning_rate": 6.012495785555207e-06, "loss": 0.3587, "step": 19570 }, { "epoch": 64.1672131147541, "grad_norm": 5.217976093292236, "learning_rate": 6.011521994503078e-06, "loss": 0.3845, "step": 19571 }, { "epoch": 64.1704918032787, "grad_norm": 4.471908092498779, "learning_rate": 6.010548248425184e-06, "loss": 0.3947, "step": 19572 }, { "epoch": 64.17377049180328, "grad_norm": 4.753155708312988, "learning_rate": 6.009574547332507e-06, "loss": 0.4864, "step": 19573 }, { "epoch": 64.17704918032787, "grad_norm": 5.404268264770508, "learning_rate": 6.008600891236023e-06, "loss": 0.463, "step": 19574 }, { "epoch": 64.18032786885246, "grad_norm": 5.569519519805908, "learning_rate": 6.007627280146717e-06, "loss": 0.4566, "step": 19575 }, { "epoch": 64.18360655737705, "grad_norm": 5.068517684936523, "learning_rate": 6.0066537140755655e-06, "loss": 0.6209, "step": 19576 }, { "epoch": 64.18688524590164, "grad_norm": 6.185604572296143, "learning_rate": 6.005680193033544e-06, "loss": 0.6571, "step": 19577 }, { "epoch": 64.19016393442622, "grad_norm": 5.325141906738281, "learning_rate": 6.00470671703163e-06, "loss": 0.377, "step": 19578 }, { "epoch": 64.19344262295083, "grad_norm": 5.267350673675537, "learning_rate": 6.0037332860808015e-06, "loss": 0.4645, "step": 19579 }, { "epoch": 64.19672131147541, "grad_norm": 4.723959445953369, "learning_rate": 6.002759900192039e-06, "loss": 0.3588, "step": 19580 }, { "epoch": 64.2, "grad_norm": 9.82856273651123, "learning_rate": 6.00178655937631e-06, "loss": 0.2578, "step": 19581 }, { "epoch": 64.20327868852459, "grad_norm": 5.036617279052734, "learning_rate": 6.0008132636445986e-06, "loss": 0.295, "step": 19582 }, { "epoch": 64.20655737704918, "grad_norm": 6.5530314445495605, "learning_rate": 5.999840013007874e-06, "loss": 0.3344, "step": 19583 }, { "epoch": 64.20983606557377, "grad_norm": 5.710973739624023, "learning_rate": 5.998866807477111e-06, "loss": 0.4512, "step": 19584 }, { "epoch": 64.21311475409836, "grad_norm": 6.064938545227051, "learning_rate": 5.997893647063286e-06, "loss": 0.473, "step": 19585 }, { "epoch": 64.21639344262294, "grad_norm": 5.082958698272705, "learning_rate": 5.996920531777374e-06, "loss": 0.3482, "step": 19586 }, { "epoch": 64.21967213114755, "grad_norm": 4.818122863769531, "learning_rate": 5.99594746163034e-06, "loss": 0.3747, "step": 19587 }, { "epoch": 64.22295081967214, "grad_norm": 24.50746726989746, "learning_rate": 5.994974436633165e-06, "loss": 0.4549, "step": 19588 }, { "epoch": 64.22622950819672, "grad_norm": 6.799044132232666, "learning_rate": 5.994001456796818e-06, "loss": 0.6124, "step": 19589 }, { "epoch": 64.22950819672131, "grad_norm": 5.76724100112915, "learning_rate": 5.993028522132269e-06, "loss": 0.5678, "step": 19590 }, { "epoch": 64.2327868852459, "grad_norm": 6.228525161743164, "learning_rate": 5.9920556326504906e-06, "loss": 0.3107, "step": 19591 }, { "epoch": 64.23606557377049, "grad_norm": 5.248135566711426, "learning_rate": 5.991082788362448e-06, "loss": 0.3329, "step": 19592 }, { "epoch": 64.23934426229508, "grad_norm": 6.21965217590332, "learning_rate": 5.990109989279118e-06, "loss": 0.3771, "step": 19593 }, { "epoch": 64.24262295081967, "grad_norm": 9.112056732177734, "learning_rate": 5.989137235411469e-06, "loss": 0.4414, "step": 19594 }, { "epoch": 64.24590163934427, "grad_norm": 5.995686054229736, "learning_rate": 5.988164526770467e-06, "loss": 0.4568, "step": 19595 }, { "epoch": 64.24918032786886, "grad_norm": 6.677524089813232, "learning_rate": 5.987191863367077e-06, "loss": 0.683, "step": 19596 }, { "epoch": 64.25245901639344, "grad_norm": 4.008650779724121, "learning_rate": 5.986219245212275e-06, "loss": 0.4336, "step": 19597 }, { "epoch": 64.25573770491803, "grad_norm": 5.560066223144531, "learning_rate": 5.985246672317025e-06, "loss": 0.5567, "step": 19598 }, { "epoch": 64.25901639344262, "grad_norm": 5.650545120239258, "learning_rate": 5.9842741446922926e-06, "loss": 0.5923, "step": 19599 }, { "epoch": 64.26229508196721, "grad_norm": 5.323775768280029, "learning_rate": 5.9833016623490405e-06, "loss": 0.3429, "step": 19600 }, { "epoch": 64.2655737704918, "grad_norm": 5.798598766326904, "learning_rate": 5.982329225298243e-06, "loss": 0.4928, "step": 19601 }, { "epoch": 64.26885245901639, "grad_norm": 5.568763256072998, "learning_rate": 5.98135683355086e-06, "loss": 0.394, "step": 19602 }, { "epoch": 64.27213114754099, "grad_norm": 4.9525628089904785, "learning_rate": 5.980384487117856e-06, "loss": 0.6523, "step": 19603 }, { "epoch": 64.27540983606558, "grad_norm": 5.886294841766357, "learning_rate": 5.979412186010196e-06, "loss": 0.4872, "step": 19604 }, { "epoch": 64.27868852459017, "grad_norm": 9.000330924987793, "learning_rate": 5.978439930238844e-06, "loss": 0.4772, "step": 19605 }, { "epoch": 64.28196721311475, "grad_norm": 7.937823295593262, "learning_rate": 5.977467719814765e-06, "loss": 0.4322, "step": 19606 }, { "epoch": 64.28524590163934, "grad_norm": 7.2660298347473145, "learning_rate": 5.976495554748918e-06, "loss": 0.3865, "step": 19607 }, { "epoch": 64.28852459016393, "grad_norm": 5.8690266609191895, "learning_rate": 5.975523435052268e-06, "loss": 0.5425, "step": 19608 }, { "epoch": 64.29180327868852, "grad_norm": 4.083981037139893, "learning_rate": 5.9745513607357744e-06, "loss": 0.304, "step": 19609 }, { "epoch": 64.29508196721312, "grad_norm": 5.572854042053223, "learning_rate": 5.9735793318104e-06, "loss": 0.3078, "step": 19610 }, { "epoch": 64.29836065573771, "grad_norm": 5.62203311920166, "learning_rate": 5.972607348287105e-06, "loss": 0.413, "step": 19611 }, { "epoch": 64.3016393442623, "grad_norm": 7.600523948669434, "learning_rate": 5.971635410176853e-06, "loss": 0.3698, "step": 19612 }, { "epoch": 64.30491803278689, "grad_norm": 9.815101623535156, "learning_rate": 5.9706635174905955e-06, "loss": 0.3322, "step": 19613 }, { "epoch": 64.30819672131148, "grad_norm": 6.301026344299316, "learning_rate": 5.969691670239297e-06, "loss": 0.4945, "step": 19614 }, { "epoch": 64.31147540983606, "grad_norm": 6.137999534606934, "learning_rate": 5.968719868433916e-06, "loss": 0.679, "step": 19615 }, { "epoch": 64.31475409836065, "grad_norm": 5.357300758361816, "learning_rate": 5.967748112085413e-06, "loss": 0.4749, "step": 19616 }, { "epoch": 64.31803278688524, "grad_norm": 5.769832611083984, "learning_rate": 5.966776401204741e-06, "loss": 0.2916, "step": 19617 }, { "epoch": 64.32131147540984, "grad_norm": 7.316508769989014, "learning_rate": 5.965804735802854e-06, "loss": 0.3427, "step": 19618 }, { "epoch": 64.32459016393443, "grad_norm": 9.412080764770508, "learning_rate": 5.964833115890718e-06, "loss": 0.5393, "step": 19619 }, { "epoch": 64.32786885245902, "grad_norm": 4.842799663543701, "learning_rate": 5.963861541479283e-06, "loss": 0.6866, "step": 19620 }, { "epoch": 64.33114754098361, "grad_norm": 5.289743423461914, "learning_rate": 5.962890012579508e-06, "loss": 0.5784, "step": 19621 }, { "epoch": 64.3344262295082, "grad_norm": 8.650407791137695, "learning_rate": 5.9619185292023395e-06, "loss": 0.7983, "step": 19622 }, { "epoch": 64.33770491803278, "grad_norm": 4.589821815490723, "learning_rate": 5.960947091358743e-06, "loss": 0.5682, "step": 19623 }, { "epoch": 64.34098360655737, "grad_norm": 6.319149494171143, "learning_rate": 5.959975699059668e-06, "loss": 0.4077, "step": 19624 }, { "epoch": 64.34426229508196, "grad_norm": 5.522919178009033, "learning_rate": 5.959004352316067e-06, "loss": 0.4096, "step": 19625 }, { "epoch": 64.34754098360656, "grad_norm": 7.533610820770264, "learning_rate": 5.958033051138888e-06, "loss": 0.6568, "step": 19626 }, { "epoch": 64.35081967213115, "grad_norm": 6.826817512512207, "learning_rate": 5.9570617955390955e-06, "loss": 0.6614, "step": 19627 }, { "epoch": 64.35409836065574, "grad_norm": 7.0866475105285645, "learning_rate": 5.956090585527634e-06, "loss": 0.3337, "step": 19628 }, { "epoch": 64.35737704918033, "grad_norm": 7.750503063201904, "learning_rate": 5.955119421115455e-06, "loss": 0.4458, "step": 19629 }, { "epoch": 64.36065573770492, "grad_norm": 4.806023120880127, "learning_rate": 5.954148302313509e-06, "loss": 0.2251, "step": 19630 }, { "epoch": 64.3639344262295, "grad_norm": 9.819087982177734, "learning_rate": 5.953177229132747e-06, "loss": 0.4607, "step": 19631 }, { "epoch": 64.3672131147541, "grad_norm": 5.6758880615234375, "learning_rate": 5.952206201584121e-06, "loss": 0.332, "step": 19632 }, { "epoch": 64.37049180327868, "grad_norm": 4.9386305809021, "learning_rate": 5.951235219678577e-06, "loss": 0.3509, "step": 19633 }, { "epoch": 64.37377049180328, "grad_norm": 5.519867420196533, "learning_rate": 5.950264283427069e-06, "loss": 0.3618, "step": 19634 }, { "epoch": 64.37704918032787, "grad_norm": 6.843827724456787, "learning_rate": 5.9492933928405384e-06, "loss": 0.4954, "step": 19635 }, { "epoch": 64.38032786885246, "grad_norm": 4.85128116607666, "learning_rate": 5.948322547929939e-06, "loss": 0.3289, "step": 19636 }, { "epoch": 64.38360655737705, "grad_norm": 6.658947467803955, "learning_rate": 5.947351748706214e-06, "loss": 0.5041, "step": 19637 }, { "epoch": 64.38688524590164, "grad_norm": 6.198559284210205, "learning_rate": 5.946380995180312e-06, "loss": 0.4818, "step": 19638 }, { "epoch": 64.39016393442623, "grad_norm": 5.939760684967041, "learning_rate": 5.945410287363178e-06, "loss": 0.2955, "step": 19639 }, { "epoch": 64.39344262295081, "grad_norm": 5.76837682723999, "learning_rate": 5.9444396252657586e-06, "loss": 0.5921, "step": 19640 }, { "epoch": 64.3967213114754, "grad_norm": 4.9788079261779785, "learning_rate": 5.943469008899001e-06, "loss": 0.4879, "step": 19641 }, { "epoch": 64.4, "grad_norm": 7.97413969039917, "learning_rate": 5.942498438273849e-06, "loss": 0.4411, "step": 19642 }, { "epoch": 64.4032786885246, "grad_norm": 3.8095104694366455, "learning_rate": 5.941527913401246e-06, "loss": 0.3328, "step": 19643 }, { "epoch": 64.40655737704918, "grad_norm": 6.152879238128662, "learning_rate": 5.9405574342921305e-06, "loss": 0.2654, "step": 19644 }, { "epoch": 64.40983606557377, "grad_norm": 5.459219932556152, "learning_rate": 5.939587000957454e-06, "loss": 0.4174, "step": 19645 }, { "epoch": 64.41311475409836, "grad_norm": 4.698117733001709, "learning_rate": 5.938616613408157e-06, "loss": 0.3364, "step": 19646 }, { "epoch": 64.41639344262295, "grad_norm": 5.873323440551758, "learning_rate": 5.937646271655181e-06, "loss": 0.3532, "step": 19647 }, { "epoch": 64.41967213114754, "grad_norm": 5.248401165008545, "learning_rate": 5.936675975709463e-06, "loss": 0.3133, "step": 19648 }, { "epoch": 64.42295081967212, "grad_norm": 5.137565612792969, "learning_rate": 5.935705725581951e-06, "loss": 0.4359, "step": 19649 }, { "epoch": 64.42622950819673, "grad_norm": 4.657776355743408, "learning_rate": 5.934735521283583e-06, "loss": 0.6251, "step": 19650 }, { "epoch": 64.42950819672132, "grad_norm": 4.311758995056152, "learning_rate": 5.933765362825299e-06, "loss": 0.4756, "step": 19651 }, { "epoch": 64.4327868852459, "grad_norm": 5.0102338790893555, "learning_rate": 5.932795250218035e-06, "loss": 0.3793, "step": 19652 }, { "epoch": 64.43606557377049, "grad_norm": 4.805154800415039, "learning_rate": 5.931825183472737e-06, "loss": 0.3206, "step": 19653 }, { "epoch": 64.43934426229508, "grad_norm": 5.528106689453125, "learning_rate": 5.93085516260034e-06, "loss": 0.4118, "step": 19654 }, { "epoch": 64.44262295081967, "grad_norm": 5.411589622497559, "learning_rate": 5.929885187611782e-06, "loss": 0.2631, "step": 19655 }, { "epoch": 64.44590163934426, "grad_norm": 6.462667942047119, "learning_rate": 5.928915258517999e-06, "loss": 0.2367, "step": 19656 }, { "epoch": 64.44918032786886, "grad_norm": 4.8011932373046875, "learning_rate": 5.9279453753299265e-06, "loss": 0.4004, "step": 19657 }, { "epoch": 64.45245901639345, "grad_norm": 14.954221725463867, "learning_rate": 5.926975538058508e-06, "loss": 0.4503, "step": 19658 }, { "epoch": 64.45573770491804, "grad_norm": 8.818185806274414, "learning_rate": 5.926005746714674e-06, "loss": 0.5294, "step": 19659 }, { "epoch": 64.45901639344262, "grad_norm": 4.770228385925293, "learning_rate": 5.92503600130936e-06, "loss": 0.8232, "step": 19660 }, { "epoch": 64.46229508196721, "grad_norm": 8.324336051940918, "learning_rate": 5.924066301853502e-06, "loss": 0.5137, "step": 19661 }, { "epoch": 64.4655737704918, "grad_norm": 7.621884346008301, "learning_rate": 5.9230966483580355e-06, "loss": 0.4255, "step": 19662 }, { "epoch": 64.46885245901639, "grad_norm": 6.468703269958496, "learning_rate": 5.922127040833892e-06, "loss": 0.3231, "step": 19663 }, { "epoch": 64.47213114754098, "grad_norm": 5.08537483215332, "learning_rate": 5.921157479292008e-06, "loss": 0.2777, "step": 19664 }, { "epoch": 64.47540983606558, "grad_norm": 6.473144054412842, "learning_rate": 5.920187963743314e-06, "loss": 0.4672, "step": 19665 }, { "epoch": 64.47868852459017, "grad_norm": 6.446266174316406, "learning_rate": 5.91921849419874e-06, "loss": 0.3222, "step": 19666 }, { "epoch": 64.48196721311476, "grad_norm": 5.788148403167725, "learning_rate": 5.9182490706692245e-06, "loss": 0.3532, "step": 19667 }, { "epoch": 64.48524590163935, "grad_norm": 7.258866310119629, "learning_rate": 5.9172796931656915e-06, "loss": 0.4408, "step": 19668 }, { "epoch": 64.48852459016393, "grad_norm": 4.786023139953613, "learning_rate": 5.916310361699078e-06, "loss": 0.4019, "step": 19669 }, { "epoch": 64.49180327868852, "grad_norm": 5.8562235832214355, "learning_rate": 5.9153410762803055e-06, "loss": 0.6734, "step": 19670 }, { "epoch": 64.49508196721311, "grad_norm": 5.60610818862915, "learning_rate": 5.914371836920314e-06, "loss": 0.588, "step": 19671 }, { "epoch": 64.4983606557377, "grad_norm": 6.844119071960449, "learning_rate": 5.9134026436300286e-06, "loss": 0.4893, "step": 19672 }, { "epoch": 64.5016393442623, "grad_norm": 5.576107501983643, "learning_rate": 5.912433496420376e-06, "loss": 0.3642, "step": 19673 }, { "epoch": 64.50491803278689, "grad_norm": 5.518764972686768, "learning_rate": 5.911464395302283e-06, "loss": 0.6308, "step": 19674 }, { "epoch": 64.50819672131148, "grad_norm": 5.826501846313477, "learning_rate": 5.910495340286683e-06, "loss": 0.4188, "step": 19675 }, { "epoch": 64.51147540983607, "grad_norm": 4.624368667602539, "learning_rate": 5.909526331384501e-06, "loss": 0.6534, "step": 19676 }, { "epoch": 64.51475409836065, "grad_norm": 10.122257232666016, "learning_rate": 5.908557368606662e-06, "loss": 0.5155, "step": 19677 }, { "epoch": 64.51803278688524, "grad_norm": 4.44560432434082, "learning_rate": 5.90758845196409e-06, "loss": 0.6496, "step": 19678 }, { "epoch": 64.52131147540983, "grad_norm": 4.887315273284912, "learning_rate": 5.9066195814677165e-06, "loss": 0.3969, "step": 19679 }, { "epoch": 64.52459016393442, "grad_norm": 6.225890159606934, "learning_rate": 5.905650757128463e-06, "loss": 0.3776, "step": 19680 }, { "epoch": 64.52786885245902, "grad_norm": 7.1178765296936035, "learning_rate": 5.904681978957254e-06, "loss": 0.3982, "step": 19681 }, { "epoch": 64.53114754098361, "grad_norm": 5.6260786056518555, "learning_rate": 5.903713246965013e-06, "loss": 0.2138, "step": 19682 }, { "epoch": 64.5344262295082, "grad_norm": 5.7213897705078125, "learning_rate": 5.902744561162661e-06, "loss": 0.4367, "step": 19683 }, { "epoch": 64.53770491803279, "grad_norm": 5.741661071777344, "learning_rate": 5.901775921561128e-06, "loss": 0.5327, "step": 19684 }, { "epoch": 64.54098360655738, "grad_norm": 6.092578887939453, "learning_rate": 5.900807328171334e-06, "loss": 0.5366, "step": 19685 }, { "epoch": 64.54426229508196, "grad_norm": 6.822015762329102, "learning_rate": 5.899838781004197e-06, "loss": 0.5321, "step": 19686 }, { "epoch": 64.54754098360655, "grad_norm": 8.913352012634277, "learning_rate": 5.8988702800706385e-06, "loss": 0.4669, "step": 19687 }, { "epoch": 64.55081967213114, "grad_norm": 7.810384750366211, "learning_rate": 5.897901825381587e-06, "loss": 0.6338, "step": 19688 }, { "epoch": 64.55409836065574, "grad_norm": 5.083075523376465, "learning_rate": 5.896933416947953e-06, "loss": 0.2229, "step": 19689 }, { "epoch": 64.55737704918033, "grad_norm": 7.926944255828857, "learning_rate": 5.895965054780663e-06, "loss": 0.7046, "step": 19690 }, { "epoch": 64.56065573770492, "grad_norm": 6.98657751083374, "learning_rate": 5.894996738890634e-06, "loss": 0.5607, "step": 19691 }, { "epoch": 64.56393442622951, "grad_norm": 4.740018844604492, "learning_rate": 5.894028469288784e-06, "loss": 0.6188, "step": 19692 }, { "epoch": 64.5672131147541, "grad_norm": 5.598658084869385, "learning_rate": 5.893060245986033e-06, "loss": 0.4983, "step": 19693 }, { "epoch": 64.57049180327868, "grad_norm": 6.251491069793701, "learning_rate": 5.892092068993299e-06, "loss": 0.4592, "step": 19694 }, { "epoch": 64.57377049180327, "grad_norm": 5.6216511726379395, "learning_rate": 5.891123938321498e-06, "loss": 0.5252, "step": 19695 }, { "epoch": 64.57704918032788, "grad_norm": 6.9904279708862305, "learning_rate": 5.890155853981543e-06, "loss": 0.4372, "step": 19696 }, { "epoch": 64.58032786885246, "grad_norm": 4.8129472732543945, "learning_rate": 5.889187815984357e-06, "loss": 0.654, "step": 19697 }, { "epoch": 64.58360655737705, "grad_norm": 5.818148136138916, "learning_rate": 5.8882198243408525e-06, "loss": 0.369, "step": 19698 }, { "epoch": 64.58688524590164, "grad_norm": 5.324487209320068, "learning_rate": 5.887251879061946e-06, "loss": 0.5839, "step": 19699 }, { "epoch": 64.59016393442623, "grad_norm": 8.165092468261719, "learning_rate": 5.8862839801585466e-06, "loss": 0.4501, "step": 19700 }, { "epoch": 64.59344262295082, "grad_norm": 7.167387008666992, "learning_rate": 5.885316127641576e-06, "loss": 0.6259, "step": 19701 }, { "epoch": 64.5967213114754, "grad_norm": 5.831766128540039, "learning_rate": 5.884348321521944e-06, "loss": 0.6438, "step": 19702 }, { "epoch": 64.6, "grad_norm": 6.244561672210693, "learning_rate": 5.8833805618105635e-06, "loss": 0.5041, "step": 19703 }, { "epoch": 64.6032786885246, "grad_norm": 16.320951461791992, "learning_rate": 5.882412848518344e-06, "loss": 0.4823, "step": 19704 }, { "epoch": 64.60655737704919, "grad_norm": 4.458797454833984, "learning_rate": 5.8814451816562045e-06, "loss": 0.5813, "step": 19705 }, { "epoch": 64.60983606557377, "grad_norm": 5.22843599319458, "learning_rate": 5.880477561235054e-06, "loss": 0.3636, "step": 19706 }, { "epoch": 64.61311475409836, "grad_norm": 5.494943618774414, "learning_rate": 5.879509987265802e-06, "loss": 0.4382, "step": 19707 }, { "epoch": 64.61639344262295, "grad_norm": 6.779026985168457, "learning_rate": 5.878542459759358e-06, "loss": 0.2843, "step": 19708 }, { "epoch": 64.61967213114754, "grad_norm": 9.243748664855957, "learning_rate": 5.877574978726629e-06, "loss": 0.6485, "step": 19709 }, { "epoch": 64.62295081967213, "grad_norm": 8.513199806213379, "learning_rate": 5.876607544178532e-06, "loss": 0.2958, "step": 19710 }, { "epoch": 64.62622950819672, "grad_norm": 4.77383279800415, "learning_rate": 5.875640156125975e-06, "loss": 0.4146, "step": 19711 }, { "epoch": 64.62950819672132, "grad_norm": 5.441247463226318, "learning_rate": 5.874672814579858e-06, "loss": 0.2683, "step": 19712 }, { "epoch": 64.6327868852459, "grad_norm": 8.156519889831543, "learning_rate": 5.873705519551096e-06, "loss": 0.4045, "step": 19713 }, { "epoch": 64.6360655737705, "grad_norm": 4.992990016937256, "learning_rate": 5.872738271050597e-06, "loss": 0.5357, "step": 19714 }, { "epoch": 64.63934426229508, "grad_norm": 5.498623371124268, "learning_rate": 5.871771069089264e-06, "loss": 0.327, "step": 19715 }, { "epoch": 64.64262295081967, "grad_norm": 5.787059307098389, "learning_rate": 5.870803913678002e-06, "loss": 0.4272, "step": 19716 }, { "epoch": 64.64590163934426, "grad_norm": 5.128597736358643, "learning_rate": 5.8698368048277225e-06, "loss": 0.3991, "step": 19717 }, { "epoch": 64.64918032786885, "grad_norm": 5.8517632484436035, "learning_rate": 5.868869742549326e-06, "loss": 0.7482, "step": 19718 }, { "epoch": 64.65245901639344, "grad_norm": 4.449399948120117, "learning_rate": 5.867902726853718e-06, "loss": 0.5052, "step": 19719 }, { "epoch": 64.65573770491804, "grad_norm": 5.597986698150635, "learning_rate": 5.866935757751804e-06, "loss": 0.4076, "step": 19720 }, { "epoch": 64.65901639344263, "grad_norm": 5.223006248474121, "learning_rate": 5.865968835254488e-06, "loss": 0.4481, "step": 19721 }, { "epoch": 64.66229508196722, "grad_norm": 7.363903999328613, "learning_rate": 5.865001959372666e-06, "loss": 0.4448, "step": 19722 }, { "epoch": 64.6655737704918, "grad_norm": 5.183190822601318, "learning_rate": 5.864035130117252e-06, "loss": 0.4996, "step": 19723 }, { "epoch": 64.66885245901639, "grad_norm": 5.355589866638184, "learning_rate": 5.8630683474991416e-06, "loss": 0.5332, "step": 19724 }, { "epoch": 64.67213114754098, "grad_norm": 5.529470443725586, "learning_rate": 5.8621016115292365e-06, "loss": 0.5483, "step": 19725 }, { "epoch": 64.67540983606557, "grad_norm": 6.604003429412842, "learning_rate": 5.861134922218434e-06, "loss": 0.6632, "step": 19726 }, { "epoch": 64.67868852459016, "grad_norm": 5.210387706756592, "learning_rate": 5.860168279577643e-06, "loss": 0.6371, "step": 19727 }, { "epoch": 64.68196721311476, "grad_norm": 5.739406108856201, "learning_rate": 5.859201683617758e-06, "loss": 0.5983, "step": 19728 }, { "epoch": 64.68524590163935, "grad_norm": 4.377575397491455, "learning_rate": 5.8582351343496805e-06, "loss": 0.3735, "step": 19729 }, { "epoch": 64.68852459016394, "grad_norm": 5.847423553466797, "learning_rate": 5.8572686317843034e-06, "loss": 0.543, "step": 19730 }, { "epoch": 64.69180327868852, "grad_norm": 5.8709025382995605, "learning_rate": 5.856302175932534e-06, "loss": 0.635, "step": 19731 }, { "epoch": 64.69508196721311, "grad_norm": 5.355490207672119, "learning_rate": 5.855335766805267e-06, "loss": 0.3968, "step": 19732 }, { "epoch": 64.6983606557377, "grad_norm": 6.602545261383057, "learning_rate": 5.8543694044133984e-06, "loss": 0.3209, "step": 19733 }, { "epoch": 64.70163934426229, "grad_norm": 6.155492305755615, "learning_rate": 5.853403088767824e-06, "loss": 0.5618, "step": 19734 }, { "epoch": 64.70491803278688, "grad_norm": 6.089819431304932, "learning_rate": 5.852436819879438e-06, "loss": 0.4503, "step": 19735 }, { "epoch": 64.70819672131148, "grad_norm": 5.7428436279296875, "learning_rate": 5.851470597759144e-06, "loss": 0.5856, "step": 19736 }, { "epoch": 64.71147540983607, "grad_norm": 7.978256702423096, "learning_rate": 5.850504422417831e-06, "loss": 0.5198, "step": 19737 }, { "epoch": 64.71475409836066, "grad_norm": 5.494292736053467, "learning_rate": 5.849538293866395e-06, "loss": 0.5009, "step": 19738 }, { "epoch": 64.71803278688525, "grad_norm": 5.286932945251465, "learning_rate": 5.8485722121157265e-06, "loss": 0.2819, "step": 19739 }, { "epoch": 64.72131147540983, "grad_norm": 8.25719928741455, "learning_rate": 5.847606177176727e-06, "loss": 0.4087, "step": 19740 }, { "epoch": 64.72459016393442, "grad_norm": 4.684027671813965, "learning_rate": 5.846640189060284e-06, "loss": 0.2916, "step": 19741 }, { "epoch": 64.72786885245901, "grad_norm": 5.858130931854248, "learning_rate": 5.845674247777291e-06, "loss": 0.3075, "step": 19742 }, { "epoch": 64.73114754098361, "grad_norm": 5.3141584396362305, "learning_rate": 5.84470835333864e-06, "loss": 0.4296, "step": 19743 }, { "epoch": 64.7344262295082, "grad_norm": 5.959165096282959, "learning_rate": 5.843742505755223e-06, "loss": 0.3596, "step": 19744 }, { "epoch": 64.73770491803279, "grad_norm": 8.023412704467773, "learning_rate": 5.842776705037932e-06, "loss": 0.3884, "step": 19745 }, { "epoch": 64.74098360655738, "grad_norm": 6.899608612060547, "learning_rate": 5.841810951197655e-06, "loss": 0.4004, "step": 19746 }, { "epoch": 64.74426229508197, "grad_norm": 4.08351993560791, "learning_rate": 5.840845244245283e-06, "loss": 0.3708, "step": 19747 }, { "epoch": 64.74754098360656, "grad_norm": 5.182743072509766, "learning_rate": 5.839879584191703e-06, "loss": 0.5135, "step": 19748 }, { "epoch": 64.75081967213114, "grad_norm": 5.337594985961914, "learning_rate": 5.838913971047811e-06, "loss": 0.5503, "step": 19749 }, { "epoch": 64.75409836065573, "grad_norm": 6.252325534820557, "learning_rate": 5.837948404824487e-06, "loss": 0.3362, "step": 19750 }, { "epoch": 64.75737704918033, "grad_norm": 10.342175483703613, "learning_rate": 5.836982885532625e-06, "loss": 0.5337, "step": 19751 }, { "epoch": 64.76065573770492, "grad_norm": 5.318865776062012, "learning_rate": 5.836017413183104e-06, "loss": 0.429, "step": 19752 }, { "epoch": 64.76393442622951, "grad_norm": 4.3087615966796875, "learning_rate": 5.8350519877868214e-06, "loss": 0.3824, "step": 19753 }, { "epoch": 64.7672131147541, "grad_norm": 7.673742771148682, "learning_rate": 5.834086609354657e-06, "loss": 0.4398, "step": 19754 }, { "epoch": 64.77049180327869, "grad_norm": 5.483654499053955, "learning_rate": 5.833121277897498e-06, "loss": 0.5311, "step": 19755 }, { "epoch": 64.77377049180328, "grad_norm": 4.599042892456055, "learning_rate": 5.832155993426225e-06, "loss": 0.3573, "step": 19756 }, { "epoch": 64.77704918032786, "grad_norm": 12.342629432678223, "learning_rate": 5.83119075595173e-06, "loss": 0.5098, "step": 19757 }, { "epoch": 64.78032786885245, "grad_norm": 5.79620885848999, "learning_rate": 5.830225565484895e-06, "loss": 0.4379, "step": 19758 }, { "epoch": 64.78360655737706, "grad_norm": 6.477372646331787, "learning_rate": 5.829260422036601e-06, "loss": 0.4384, "step": 19759 }, { "epoch": 64.78688524590164, "grad_norm": 6.0955281257629395, "learning_rate": 5.828295325617733e-06, "loss": 0.5624, "step": 19760 }, { "epoch": 64.79016393442623, "grad_norm": 5.4819207191467285, "learning_rate": 5.827330276239169e-06, "loss": 0.5298, "step": 19761 }, { "epoch": 64.79344262295082, "grad_norm": 5.4101481437683105, "learning_rate": 5.826365273911798e-06, "loss": 0.4834, "step": 19762 }, { "epoch": 64.79672131147541, "grad_norm": 4.504267692565918, "learning_rate": 5.825400318646497e-06, "loss": 0.2808, "step": 19763 }, { "epoch": 64.8, "grad_norm": 32.48923873901367, "learning_rate": 5.82443541045415e-06, "loss": 0.3584, "step": 19764 }, { "epoch": 64.80327868852459, "grad_norm": 6.683239936828613, "learning_rate": 5.82347054934563e-06, "loss": 0.3129, "step": 19765 }, { "epoch": 64.80655737704917, "grad_norm": 5.1296281814575195, "learning_rate": 5.822505735331826e-06, "loss": 0.4133, "step": 19766 }, { "epoch": 64.80983606557378, "grad_norm": 6.91019344329834, "learning_rate": 5.8215409684236135e-06, "loss": 0.3596, "step": 19767 }, { "epoch": 64.81311475409836, "grad_norm": 5.193314075469971, "learning_rate": 5.8205762486318705e-06, "loss": 0.4113, "step": 19768 }, { "epoch": 64.81639344262295, "grad_norm": 14.23112678527832, "learning_rate": 5.819611575967473e-06, "loss": 0.4435, "step": 19769 }, { "epoch": 64.81967213114754, "grad_norm": 9.180514335632324, "learning_rate": 5.818646950441306e-06, "loss": 0.3911, "step": 19770 }, { "epoch": 64.82295081967213, "grad_norm": 5.368328094482422, "learning_rate": 5.817682372064241e-06, "loss": 0.6255, "step": 19771 }, { "epoch": 64.82622950819672, "grad_norm": 6.904816150665283, "learning_rate": 5.816717840847156e-06, "loss": 0.4639, "step": 19772 }, { "epoch": 64.8295081967213, "grad_norm": 5.4496965408325195, "learning_rate": 5.8157533568009265e-06, "loss": 0.5091, "step": 19773 }, { "epoch": 64.8327868852459, "grad_norm": 6.163214683532715, "learning_rate": 5.8147889199364245e-06, "loss": 0.4348, "step": 19774 }, { "epoch": 64.8360655737705, "grad_norm": 5.458466053009033, "learning_rate": 5.8138245302645334e-06, "loss": 0.2798, "step": 19775 }, { "epoch": 64.83934426229509, "grad_norm": 6.291550159454346, "learning_rate": 5.812860187796123e-06, "loss": 0.6733, "step": 19776 }, { "epoch": 64.84262295081967, "grad_norm": 4.410654544830322, "learning_rate": 5.811895892542067e-06, "loss": 0.2868, "step": 19777 }, { "epoch": 64.84590163934426, "grad_norm": 6.521479606628418, "learning_rate": 5.810931644513236e-06, "loss": 0.457, "step": 19778 }, { "epoch": 64.84918032786885, "grad_norm": 9.622788429260254, "learning_rate": 5.80996744372051e-06, "loss": 0.5834, "step": 19779 }, { "epoch": 64.85245901639344, "grad_norm": 5.475706100463867, "learning_rate": 5.809003290174759e-06, "loss": 0.4888, "step": 19780 }, { "epoch": 64.85573770491803, "grad_norm": 4.421565532684326, "learning_rate": 5.808039183886847e-06, "loss": 0.3557, "step": 19781 }, { "epoch": 64.85901639344263, "grad_norm": 5.480216026306152, "learning_rate": 5.807075124867658e-06, "loss": 0.6084, "step": 19782 }, { "epoch": 64.86229508196722, "grad_norm": 6.7621941566467285, "learning_rate": 5.806111113128055e-06, "loss": 0.3072, "step": 19783 }, { "epoch": 64.8655737704918, "grad_norm": 5.770972728729248, "learning_rate": 5.805147148678907e-06, "loss": 0.6092, "step": 19784 }, { "epoch": 64.8688524590164, "grad_norm": 5.391505241394043, "learning_rate": 5.804183231531089e-06, "loss": 0.4948, "step": 19785 }, { "epoch": 64.87213114754098, "grad_norm": 5.714118003845215, "learning_rate": 5.803219361695469e-06, "loss": 0.6056, "step": 19786 }, { "epoch": 64.87540983606557, "grad_norm": 5.689597129821777, "learning_rate": 5.802255539182913e-06, "loss": 0.3879, "step": 19787 }, { "epoch": 64.87868852459016, "grad_norm": 5.4529008865356445, "learning_rate": 5.801291764004287e-06, "loss": 0.4961, "step": 19788 }, { "epoch": 64.88196721311475, "grad_norm": 5.559029579162598, "learning_rate": 5.800328036170466e-06, "loss": 0.3316, "step": 19789 }, { "epoch": 64.88524590163935, "grad_norm": 5.271095275878906, "learning_rate": 5.799364355692312e-06, "loss": 0.3013, "step": 19790 }, { "epoch": 64.88852459016394, "grad_norm": 5.073611736297607, "learning_rate": 5.7984007225806895e-06, "loss": 0.2584, "step": 19791 }, { "epoch": 64.89180327868853, "grad_norm": 5.240966320037842, "learning_rate": 5.797437136846471e-06, "loss": 0.6034, "step": 19792 }, { "epoch": 64.89508196721312, "grad_norm": 5.533940315246582, "learning_rate": 5.796473598500518e-06, "loss": 0.6692, "step": 19793 }, { "epoch": 64.8983606557377, "grad_norm": 5.07332181930542, "learning_rate": 5.795510107553697e-06, "loss": 0.3616, "step": 19794 }, { "epoch": 64.90163934426229, "grad_norm": 6.3418192863464355, "learning_rate": 5.794546664016867e-06, "loss": 0.5629, "step": 19795 }, { "epoch": 64.90491803278688, "grad_norm": 5.35438871383667, "learning_rate": 5.793583267900899e-06, "loss": 0.3203, "step": 19796 }, { "epoch": 64.90819672131147, "grad_norm": 6.569352626800537, "learning_rate": 5.792619919216653e-06, "loss": 0.4541, "step": 19797 }, { "epoch": 64.91147540983607, "grad_norm": 4.790137767791748, "learning_rate": 5.791656617974992e-06, "loss": 0.3681, "step": 19798 }, { "epoch": 64.91475409836066, "grad_norm": 5.495965480804443, "learning_rate": 5.790693364186779e-06, "loss": 0.3479, "step": 19799 }, { "epoch": 64.91803278688525, "grad_norm": 5.79541540145874, "learning_rate": 5.7897301578628714e-06, "loss": 0.3108, "step": 19800 }, { "epoch": 64.92131147540984, "grad_norm": 4.533618927001953, "learning_rate": 5.788766999014137e-06, "loss": 0.3488, "step": 19801 }, { "epoch": 64.92459016393443, "grad_norm": 5.080368518829346, "learning_rate": 5.787803887651433e-06, "loss": 0.7174, "step": 19802 }, { "epoch": 64.92786885245901, "grad_norm": 6.733427047729492, "learning_rate": 5.78684082378562e-06, "loss": 0.6053, "step": 19803 }, { "epoch": 64.9311475409836, "grad_norm": 5.920557975769043, "learning_rate": 5.785877807427553e-06, "loss": 0.4576, "step": 19804 }, { "epoch": 64.93442622950819, "grad_norm": 4.742975234985352, "learning_rate": 5.7849148385881e-06, "loss": 0.4314, "step": 19805 }, { "epoch": 64.9377049180328, "grad_norm": 4.878332138061523, "learning_rate": 5.783951917278115e-06, "loss": 0.4972, "step": 19806 }, { "epoch": 64.94098360655738, "grad_norm": 5.839559078216553, "learning_rate": 5.782989043508456e-06, "loss": 0.2297, "step": 19807 }, { "epoch": 64.94426229508197, "grad_norm": 5.209255695343018, "learning_rate": 5.782026217289975e-06, "loss": 0.3888, "step": 19808 }, { "epoch": 64.94754098360656, "grad_norm": 4.229194164276123, "learning_rate": 5.7810634386335384e-06, "loss": 0.5389, "step": 19809 }, { "epoch": 64.95081967213115, "grad_norm": 5.984829902648926, "learning_rate": 5.780100707549998e-06, "loss": 0.3277, "step": 19810 }, { "epoch": 64.95409836065573, "grad_norm": 6.179839134216309, "learning_rate": 5.7791380240502106e-06, "loss": 0.2962, "step": 19811 }, { "epoch": 64.95737704918032, "grad_norm": 6.169498920440674, "learning_rate": 5.778175388145029e-06, "loss": 0.4379, "step": 19812 }, { "epoch": 64.96065573770491, "grad_norm": 5.4082818031311035, "learning_rate": 5.7772127998453066e-06, "loss": 0.1852, "step": 19813 }, { "epoch": 64.96393442622951, "grad_norm": 7.642482280731201, "learning_rate": 5.776250259161904e-06, "loss": 0.6502, "step": 19814 }, { "epoch": 64.9672131147541, "grad_norm": 5.558459281921387, "learning_rate": 5.77528776610567e-06, "loss": 0.3585, "step": 19815 }, { "epoch": 64.97049180327869, "grad_norm": 33.52861022949219, "learning_rate": 5.7743253206874616e-06, "loss": 0.7026, "step": 19816 }, { "epoch": 64.97377049180328, "grad_norm": 6.034562587738037, "learning_rate": 5.773362922918124e-06, "loss": 0.5732, "step": 19817 }, { "epoch": 64.97704918032787, "grad_norm": 5.647297382354736, "learning_rate": 5.7724005728085175e-06, "loss": 0.4949, "step": 19818 }, { "epoch": 64.98032786885246, "grad_norm": 5.520177364349365, "learning_rate": 5.771438270369491e-06, "loss": 0.6105, "step": 19819 }, { "epoch": 64.98360655737704, "grad_norm": 4.695064067840576, "learning_rate": 5.770476015611893e-06, "loss": 0.3226, "step": 19820 }, { "epoch": 64.98688524590163, "grad_norm": 11.51259994506836, "learning_rate": 5.769513808546573e-06, "loss": 0.504, "step": 19821 }, { "epoch": 64.99016393442623, "grad_norm": 5.768029689788818, "learning_rate": 5.768551649184386e-06, "loss": 0.3778, "step": 19822 }, { "epoch": 64.99344262295082, "grad_norm": 5.411552429199219, "learning_rate": 5.7675895375361804e-06, "loss": 0.4572, "step": 19823 }, { "epoch": 64.99672131147541, "grad_norm": 7.22039794921875, "learning_rate": 5.766627473612802e-06, "loss": 0.378, "step": 19824 }, { "epoch": 65.0, "grad_norm": 7.024973392486572, "learning_rate": 5.765665457425102e-06, "loss": 0.5356, "step": 19825 }, { "epoch": 65.00327868852459, "grad_norm": 5.236657619476318, "learning_rate": 5.764703488983923e-06, "loss": 0.5425, "step": 19826 }, { "epoch": 65.00655737704918, "grad_norm": 6.286177158355713, "learning_rate": 5.763741568300118e-06, "loss": 0.4176, "step": 19827 }, { "epoch": 65.00983606557377, "grad_norm": 4.635154724121094, "learning_rate": 5.762779695384531e-06, "loss": 0.253, "step": 19828 }, { "epoch": 65.01311475409837, "grad_norm": 5.675628185272217, "learning_rate": 5.76181787024801e-06, "loss": 0.3802, "step": 19829 }, { "epoch": 65.01639344262296, "grad_norm": 5.673379898071289, "learning_rate": 5.760856092901394e-06, "loss": 0.5464, "step": 19830 }, { "epoch": 65.01967213114754, "grad_norm": 6.2860212326049805, "learning_rate": 5.759894363355538e-06, "loss": 0.5866, "step": 19831 }, { "epoch": 65.02295081967213, "grad_norm": 6.5123677253723145, "learning_rate": 5.758932681621281e-06, "loss": 0.3641, "step": 19832 }, { "epoch": 65.02622950819672, "grad_norm": 7.883888244628906, "learning_rate": 5.7579710477094675e-06, "loss": 0.4281, "step": 19833 }, { "epoch": 65.02950819672131, "grad_norm": 8.545907020568848, "learning_rate": 5.757009461630938e-06, "loss": 0.3371, "step": 19834 }, { "epoch": 65.0327868852459, "grad_norm": 7.2133378982543945, "learning_rate": 5.756047923396542e-06, "loss": 0.504, "step": 19835 }, { "epoch": 65.03606557377049, "grad_norm": 20.756425857543945, "learning_rate": 5.755086433017119e-06, "loss": 0.5194, "step": 19836 }, { "epoch": 65.03934426229509, "grad_norm": 4.920678615570068, "learning_rate": 5.754124990503504e-06, "loss": 0.3458, "step": 19837 }, { "epoch": 65.04262295081968, "grad_norm": 5.97735071182251, "learning_rate": 5.753163595866551e-06, "loss": 0.4873, "step": 19838 }, { "epoch": 65.04590163934427, "grad_norm": 6.595041275024414, "learning_rate": 5.752202249117091e-06, "loss": 0.5211, "step": 19839 }, { "epoch": 65.04918032786885, "grad_norm": 4.921850204467773, "learning_rate": 5.7512409502659664e-06, "loss": 0.5006, "step": 19840 }, { "epoch": 65.05245901639344, "grad_norm": 5.6529221534729, "learning_rate": 5.75027969932402e-06, "loss": 0.4157, "step": 19841 }, { "epoch": 65.05573770491803, "grad_norm": 4.405836582183838, "learning_rate": 5.749318496302088e-06, "loss": 0.4585, "step": 19842 }, { "epoch": 65.05901639344262, "grad_norm": 6.621982574462891, "learning_rate": 5.748357341211006e-06, "loss": 0.6379, "step": 19843 }, { "epoch": 65.0622950819672, "grad_norm": 5.264084339141846, "learning_rate": 5.747396234061621e-06, "loss": 0.5205, "step": 19844 }, { "epoch": 65.06557377049181, "grad_norm": 5.734551429748535, "learning_rate": 5.746435174864764e-06, "loss": 0.3919, "step": 19845 }, { "epoch": 65.0688524590164, "grad_norm": 4.905429840087891, "learning_rate": 5.745474163631272e-06, "loss": 0.3551, "step": 19846 }, { "epoch": 65.07213114754099, "grad_norm": 8.060690879821777, "learning_rate": 5.74451320037198e-06, "loss": 0.4372, "step": 19847 }, { "epoch": 65.07540983606557, "grad_norm": 8.685709953308105, "learning_rate": 5.74355228509773e-06, "loss": 0.593, "step": 19848 }, { "epoch": 65.07868852459016, "grad_norm": 4.876192092895508, "learning_rate": 5.742591417819353e-06, "loss": 0.3612, "step": 19849 }, { "epoch": 65.08196721311475, "grad_norm": 7.530868053436279, "learning_rate": 5.741630598547685e-06, "loss": 0.532, "step": 19850 }, { "epoch": 65.08524590163934, "grad_norm": 6.863060474395752, "learning_rate": 5.7406698272935595e-06, "loss": 0.4007, "step": 19851 }, { "epoch": 65.08852459016393, "grad_norm": 7.2181901931762695, "learning_rate": 5.739709104067805e-06, "loss": 0.3727, "step": 19852 }, { "epoch": 65.09180327868853, "grad_norm": 5.8336381912231445, "learning_rate": 5.738748428881265e-06, "loss": 0.4307, "step": 19853 }, { "epoch": 65.09508196721312, "grad_norm": 7.605769157409668, "learning_rate": 5.737787801744768e-06, "loss": 0.4486, "step": 19854 }, { "epoch": 65.09836065573771, "grad_norm": 9.133462905883789, "learning_rate": 5.736827222669144e-06, "loss": 0.4759, "step": 19855 }, { "epoch": 65.1016393442623, "grad_norm": 6.805262088775635, "learning_rate": 5.7358666916652215e-06, "loss": 0.5256, "step": 19856 }, { "epoch": 65.10491803278688, "grad_norm": 5.552088737487793, "learning_rate": 5.73490620874384e-06, "loss": 0.3439, "step": 19857 }, { "epoch": 65.10819672131147, "grad_norm": 4.894779205322266, "learning_rate": 5.733945773915826e-06, "loss": 0.7115, "step": 19858 }, { "epoch": 65.11147540983606, "grad_norm": 6.594311714172363, "learning_rate": 5.732985387192007e-06, "loss": 0.4146, "step": 19859 }, { "epoch": 65.11475409836065, "grad_norm": 4.606600761413574, "learning_rate": 5.7320250485832116e-06, "loss": 0.393, "step": 19860 }, { "epoch": 65.11803278688525, "grad_norm": 5.2228875160217285, "learning_rate": 5.7310647581002755e-06, "loss": 0.6413, "step": 19861 }, { "epoch": 65.12131147540984, "grad_norm": 7.657063961029053, "learning_rate": 5.730104515754024e-06, "loss": 0.5623, "step": 19862 }, { "epoch": 65.12459016393443, "grad_norm": 6.464356422424316, "learning_rate": 5.729144321555282e-06, "loss": 0.4431, "step": 19863 }, { "epoch": 65.12786885245902, "grad_norm": 6.017506122589111, "learning_rate": 5.7281841755148795e-06, "loss": 0.3544, "step": 19864 }, { "epoch": 65.1311475409836, "grad_norm": 7.823716163635254, "learning_rate": 5.727224077643636e-06, "loss": 0.2777, "step": 19865 }, { "epoch": 65.1344262295082, "grad_norm": 4.5861382484436035, "learning_rate": 5.726264027952391e-06, "loss": 0.5011, "step": 19866 }, { "epoch": 65.13770491803278, "grad_norm": 5.082658290863037, "learning_rate": 5.725304026451959e-06, "loss": 0.4431, "step": 19867 }, { "epoch": 65.14098360655737, "grad_norm": 12.52746868133545, "learning_rate": 5.724344073153171e-06, "loss": 0.6555, "step": 19868 }, { "epoch": 65.14426229508197, "grad_norm": 5.311376094818115, "learning_rate": 5.723384168066845e-06, "loss": 0.3149, "step": 19869 }, { "epoch": 65.14754098360656, "grad_norm": 8.524367332458496, "learning_rate": 5.722424311203812e-06, "loss": 0.5099, "step": 19870 }, { "epoch": 65.15081967213115, "grad_norm": 5.493475914001465, "learning_rate": 5.721464502574893e-06, "loss": 0.3942, "step": 19871 }, { "epoch": 65.15409836065574, "grad_norm": 4.906352996826172, "learning_rate": 5.720504742190911e-06, "loss": 0.4554, "step": 19872 }, { "epoch": 65.15737704918033, "grad_norm": 5.5600266456604, "learning_rate": 5.719545030062682e-06, "loss": 0.374, "step": 19873 }, { "epoch": 65.16065573770491, "grad_norm": 6.723227024078369, "learning_rate": 5.7185853662010384e-06, "loss": 0.3836, "step": 19874 }, { "epoch": 65.1639344262295, "grad_norm": 7.012892246246338, "learning_rate": 5.7176257506167956e-06, "loss": 0.3235, "step": 19875 }, { "epoch": 65.1672131147541, "grad_norm": 5.4382123947143555, "learning_rate": 5.716666183320776e-06, "loss": 0.3677, "step": 19876 }, { "epoch": 65.1704918032787, "grad_norm": 6.04575777053833, "learning_rate": 5.715706664323799e-06, "loss": 0.4244, "step": 19877 }, { "epoch": 65.17377049180328, "grad_norm": 6.250089645385742, "learning_rate": 5.714747193636678e-06, "loss": 0.353, "step": 19878 }, { "epoch": 65.17704918032787, "grad_norm": 5.486930847167969, "learning_rate": 5.713787771270244e-06, "loss": 0.3634, "step": 19879 }, { "epoch": 65.18032786885246, "grad_norm": 5.16532039642334, "learning_rate": 5.7128283972353085e-06, "loss": 0.3741, "step": 19880 }, { "epoch": 65.18360655737705, "grad_norm": 5.707698822021484, "learning_rate": 5.71186907154269e-06, "loss": 0.4055, "step": 19881 }, { "epoch": 65.18688524590164, "grad_norm": 6.472205638885498, "learning_rate": 5.710909794203204e-06, "loss": 0.4084, "step": 19882 }, { "epoch": 65.19016393442622, "grad_norm": 4.6458234786987305, "learning_rate": 5.709950565227671e-06, "loss": 0.3063, "step": 19883 }, { "epoch": 65.19344262295083, "grad_norm": 5.678030014038086, "learning_rate": 5.708991384626908e-06, "loss": 0.422, "step": 19884 }, { "epoch": 65.19672131147541, "grad_norm": 5.688613414764404, "learning_rate": 5.708032252411728e-06, "loss": 0.3177, "step": 19885 }, { "epoch": 65.2, "grad_norm": 5.818294525146484, "learning_rate": 5.707073168592943e-06, "loss": 0.4997, "step": 19886 }, { "epoch": 65.20327868852459, "grad_norm": 5.751858234405518, "learning_rate": 5.706114133181375e-06, "loss": 0.2267, "step": 19887 }, { "epoch": 65.20655737704918, "grad_norm": 7.902892589569092, "learning_rate": 5.705155146187835e-06, "loss": 0.3033, "step": 19888 }, { "epoch": 65.20983606557377, "grad_norm": 7.04282808303833, "learning_rate": 5.704196207623136e-06, "loss": 0.5092, "step": 19889 }, { "epoch": 65.21311475409836, "grad_norm": 4.845231533050537, "learning_rate": 5.703237317498087e-06, "loss": 0.3824, "step": 19890 }, { "epoch": 65.21639344262294, "grad_norm": 7.179180145263672, "learning_rate": 5.7022784758235095e-06, "loss": 0.4308, "step": 19891 }, { "epoch": 65.21967213114755, "grad_norm": 5.18721342086792, "learning_rate": 5.701319682610211e-06, "loss": 0.4388, "step": 19892 }, { "epoch": 65.22295081967214, "grad_norm": 6.269205093383789, "learning_rate": 5.700360937868998e-06, "loss": 0.5057, "step": 19893 }, { "epoch": 65.22622950819672, "grad_norm": 4.901308059692383, "learning_rate": 5.6994022416106896e-06, "loss": 0.4677, "step": 19894 }, { "epoch": 65.22950819672131, "grad_norm": 8.027399063110352, "learning_rate": 5.698443593846092e-06, "loss": 0.6036, "step": 19895 }, { "epoch": 65.2327868852459, "grad_norm": 4.656598091125488, "learning_rate": 5.6974849945860135e-06, "loss": 0.4864, "step": 19896 }, { "epoch": 65.23606557377049, "grad_norm": 5.457470893859863, "learning_rate": 5.696526443841268e-06, "loss": 0.391, "step": 19897 }, { "epoch": 65.23934426229508, "grad_norm": 6.594692230224609, "learning_rate": 5.6955679416226605e-06, "loss": 0.4003, "step": 19898 }, { "epoch": 65.24262295081967, "grad_norm": 8.015178680419922, "learning_rate": 5.694609487940997e-06, "loss": 0.6504, "step": 19899 }, { "epoch": 65.24590163934427, "grad_norm": 15.567276954650879, "learning_rate": 5.693651082807092e-06, "loss": 0.5743, "step": 19900 }, { "epoch": 65.24918032786886, "grad_norm": 4.649704456329346, "learning_rate": 5.692692726231748e-06, "loss": 0.6719, "step": 19901 }, { "epoch": 65.25245901639344, "grad_norm": 7.11080265045166, "learning_rate": 5.691734418225772e-06, "loss": 0.3906, "step": 19902 }, { "epoch": 65.25573770491803, "grad_norm": 7.619994640350342, "learning_rate": 5.69077615879997e-06, "loss": 0.6956, "step": 19903 }, { "epoch": 65.25901639344262, "grad_norm": 19.053922653198242, "learning_rate": 5.689817947965144e-06, "loss": 0.4001, "step": 19904 }, { "epoch": 65.26229508196721, "grad_norm": 5.196062088012695, "learning_rate": 5.688859785732105e-06, "loss": 0.6135, "step": 19905 }, { "epoch": 65.2655737704918, "grad_norm": 6.832465171813965, "learning_rate": 5.687901672111655e-06, "loss": 0.4956, "step": 19906 }, { "epoch": 65.26885245901639, "grad_norm": 11.039093017578125, "learning_rate": 5.686943607114597e-06, "loss": 0.4626, "step": 19907 }, { "epoch": 65.27213114754099, "grad_norm": 5.2388596534729, "learning_rate": 5.685985590751731e-06, "loss": 0.5353, "step": 19908 }, { "epoch": 65.27540983606558, "grad_norm": 5.116909027099609, "learning_rate": 5.685027623033865e-06, "loss": 0.3124, "step": 19909 }, { "epoch": 65.27868852459017, "grad_norm": 4.998529434204102, "learning_rate": 5.6840697039717994e-06, "loss": 0.4573, "step": 19910 }, { "epoch": 65.28196721311475, "grad_norm": 5.187666893005371, "learning_rate": 5.683111833576337e-06, "loss": 0.5097, "step": 19911 }, { "epoch": 65.28524590163934, "grad_norm": 6.484959602355957, "learning_rate": 5.682154011858272e-06, "loss": 0.2693, "step": 19912 }, { "epoch": 65.28852459016393, "grad_norm": 5.732092380523682, "learning_rate": 5.681196238828414e-06, "loss": 0.6554, "step": 19913 }, { "epoch": 65.29180327868852, "grad_norm": 9.019679069519043, "learning_rate": 5.680238514497559e-06, "loss": 0.3439, "step": 19914 }, { "epoch": 65.29508196721312, "grad_norm": 7.565916538238525, "learning_rate": 5.6792808388765065e-06, "loss": 0.6648, "step": 19915 }, { "epoch": 65.29836065573771, "grad_norm": 5.504287242889404, "learning_rate": 5.678323211976055e-06, "loss": 0.3477, "step": 19916 }, { "epoch": 65.3016393442623, "grad_norm": 5.277853012084961, "learning_rate": 5.677365633806997e-06, "loss": 0.3143, "step": 19917 }, { "epoch": 65.30491803278689, "grad_norm": 5.803097724914551, "learning_rate": 5.676408104380143e-06, "loss": 0.3418, "step": 19918 }, { "epoch": 65.30819672131148, "grad_norm": 5.43834114074707, "learning_rate": 5.67545062370628e-06, "loss": 0.5908, "step": 19919 }, { "epoch": 65.31147540983606, "grad_norm": 6.042140007019043, "learning_rate": 5.6744931917962084e-06, "loss": 0.5127, "step": 19920 }, { "epoch": 65.31475409836065, "grad_norm": 5.796270370483398, "learning_rate": 5.673535808660721e-06, "loss": 0.5025, "step": 19921 }, { "epoch": 65.31803278688524, "grad_norm": 6.818975925445557, "learning_rate": 5.672578474310618e-06, "loss": 0.4357, "step": 19922 }, { "epoch": 65.32131147540984, "grad_norm": 5.0602264404296875, "learning_rate": 5.671621188756693e-06, "loss": 0.2981, "step": 19923 }, { "epoch": 65.32459016393443, "grad_norm": 7.649343967437744, "learning_rate": 5.670663952009739e-06, "loss": 0.4677, "step": 19924 }, { "epoch": 65.32786885245902, "grad_norm": 4.841614246368408, "learning_rate": 5.6697067640805466e-06, "loss": 0.3181, "step": 19925 }, { "epoch": 65.33114754098361, "grad_norm": 16.400449752807617, "learning_rate": 5.668749624979916e-06, "loss": 0.518, "step": 19926 }, { "epoch": 65.3344262295082, "grad_norm": 6.353786468505859, "learning_rate": 5.667792534718639e-06, "loss": 0.528, "step": 19927 }, { "epoch": 65.33770491803278, "grad_norm": 5.405552864074707, "learning_rate": 5.666835493307503e-06, "loss": 0.4161, "step": 19928 }, { "epoch": 65.34098360655737, "grad_norm": 4.923956394195557, "learning_rate": 5.665878500757304e-06, "loss": 0.3889, "step": 19929 }, { "epoch": 65.34426229508196, "grad_norm": 5.9820170402526855, "learning_rate": 5.6649215570788265e-06, "loss": 0.3004, "step": 19930 }, { "epoch": 65.34754098360656, "grad_norm": 5.272865295410156, "learning_rate": 5.6639646622828694e-06, "loss": 0.4267, "step": 19931 }, { "epoch": 65.35081967213115, "grad_norm": 5.734740734100342, "learning_rate": 5.66300781638022e-06, "loss": 0.593, "step": 19932 }, { "epoch": 65.35409836065574, "grad_norm": 4.738135814666748, "learning_rate": 5.662051019381666e-06, "loss": 0.2744, "step": 19933 }, { "epoch": 65.35737704918033, "grad_norm": 6.970702648162842, "learning_rate": 5.661094271297993e-06, "loss": 0.5995, "step": 19934 }, { "epoch": 65.36065573770492, "grad_norm": 5.879394054412842, "learning_rate": 5.660137572139999e-06, "loss": 0.2902, "step": 19935 }, { "epoch": 65.3639344262295, "grad_norm": 5.102778434753418, "learning_rate": 5.659180921918464e-06, "loss": 0.4557, "step": 19936 }, { "epoch": 65.3672131147541, "grad_norm": 5.248876094818115, "learning_rate": 5.658224320644179e-06, "loss": 0.2596, "step": 19937 }, { "epoch": 65.37049180327868, "grad_norm": 6.325010299682617, "learning_rate": 5.6572677683279246e-06, "loss": 0.4915, "step": 19938 }, { "epoch": 65.37377049180328, "grad_norm": 14.858987808227539, "learning_rate": 5.656311264980494e-06, "loss": 0.3873, "step": 19939 }, { "epoch": 65.37704918032787, "grad_norm": 6.090249538421631, "learning_rate": 5.655354810612672e-06, "loss": 0.7062, "step": 19940 }, { "epoch": 65.38032786885246, "grad_norm": 6.572393894195557, "learning_rate": 5.654398405235242e-06, "loss": 0.6591, "step": 19941 }, { "epoch": 65.38360655737705, "grad_norm": 6.500646591186523, "learning_rate": 5.653442048858984e-06, "loss": 0.559, "step": 19942 }, { "epoch": 65.38688524590164, "grad_norm": 8.189582824707031, "learning_rate": 5.652485741494689e-06, "loss": 0.4669, "step": 19943 }, { "epoch": 65.39016393442623, "grad_norm": 6.402052879333496, "learning_rate": 5.651529483153139e-06, "loss": 0.3959, "step": 19944 }, { "epoch": 65.39344262295081, "grad_norm": 4.569725036621094, "learning_rate": 5.65057327384511e-06, "loss": 0.3956, "step": 19945 }, { "epoch": 65.3967213114754, "grad_norm": 11.72895336151123, "learning_rate": 5.649617113581394e-06, "loss": 0.2043, "step": 19946 }, { "epoch": 65.4, "grad_norm": 5.163595676422119, "learning_rate": 5.648661002372769e-06, "loss": 0.6959, "step": 19947 }, { "epoch": 65.4032786885246, "grad_norm": 5.350545406341553, "learning_rate": 5.647704940230011e-06, "loss": 0.5481, "step": 19948 }, { "epoch": 65.40655737704918, "grad_norm": 6.1567864418029785, "learning_rate": 5.64674892716391e-06, "loss": 0.4261, "step": 19949 }, { "epoch": 65.40983606557377, "grad_norm": 5.242455959320068, "learning_rate": 5.6457929631852395e-06, "loss": 0.5839, "step": 19950 }, { "epoch": 65.41311475409836, "grad_norm": 5.747793197631836, "learning_rate": 5.644837048304781e-06, "loss": 0.5141, "step": 19951 }, { "epoch": 65.41639344262295, "grad_norm": 4.865512371063232, "learning_rate": 5.64388118253331e-06, "loss": 0.582, "step": 19952 }, { "epoch": 65.41967213114754, "grad_norm": 7.502607345581055, "learning_rate": 5.642925365881611e-06, "loss": 0.3044, "step": 19953 }, { "epoch": 65.42295081967212, "grad_norm": 6.4240593910217285, "learning_rate": 5.6419695983604595e-06, "loss": 0.4168, "step": 19954 }, { "epoch": 65.42622950819673, "grad_norm": 8.05815315246582, "learning_rate": 5.64101387998063e-06, "loss": 0.4471, "step": 19955 }, { "epoch": 65.42950819672132, "grad_norm": 5.944666862487793, "learning_rate": 5.640058210752899e-06, "loss": 0.3184, "step": 19956 }, { "epoch": 65.4327868852459, "grad_norm": 6.522531986236572, "learning_rate": 5.6391025906880485e-06, "loss": 0.2264, "step": 19957 }, { "epoch": 65.43606557377049, "grad_norm": 5.7414960861206055, "learning_rate": 5.638147019796851e-06, "loss": 0.3373, "step": 19958 }, { "epoch": 65.43934426229508, "grad_norm": 6.69423770904541, "learning_rate": 5.63719149809008e-06, "loss": 0.6175, "step": 19959 }, { "epoch": 65.44262295081967, "grad_norm": 6.123056888580322, "learning_rate": 5.6362360255785075e-06, "loss": 0.3873, "step": 19960 }, { "epoch": 65.44590163934426, "grad_norm": 4.295945167541504, "learning_rate": 5.6352806022729155e-06, "loss": 0.4265, "step": 19961 }, { "epoch": 65.44918032786886, "grad_norm": 6.123212814331055, "learning_rate": 5.634325228184072e-06, "loss": 0.337, "step": 19962 }, { "epoch": 65.45245901639345, "grad_norm": 6.177946090698242, "learning_rate": 5.633369903322752e-06, "loss": 0.471, "step": 19963 }, { "epoch": 65.45573770491804, "grad_norm": 4.57216739654541, "learning_rate": 5.6324146276997215e-06, "loss": 0.5523, "step": 19964 }, { "epoch": 65.45901639344262, "grad_norm": 5.853734493255615, "learning_rate": 5.631459401325761e-06, "loss": 0.3924, "step": 19965 }, { "epoch": 65.46229508196721, "grad_norm": 5.91549825668335, "learning_rate": 5.6305042242116394e-06, "loss": 0.463, "step": 19966 }, { "epoch": 65.4655737704918, "grad_norm": 5.622467994689941, "learning_rate": 5.629549096368123e-06, "loss": 0.5427, "step": 19967 }, { "epoch": 65.46885245901639, "grad_norm": 5.919672012329102, "learning_rate": 5.628594017805987e-06, "loss": 0.5737, "step": 19968 }, { "epoch": 65.47213114754098, "grad_norm": 5.129489421844482, "learning_rate": 5.627638988535994e-06, "loss": 0.438, "step": 19969 }, { "epoch": 65.47540983606558, "grad_norm": 4.831214904785156, "learning_rate": 5.626684008568921e-06, "loss": 0.4209, "step": 19970 }, { "epoch": 65.47868852459017, "grad_norm": 35.80139923095703, "learning_rate": 5.625729077915534e-06, "loss": 0.7047, "step": 19971 }, { "epoch": 65.48196721311476, "grad_norm": 6.037034511566162, "learning_rate": 5.6247741965866e-06, "loss": 0.3294, "step": 19972 }, { "epoch": 65.48524590163935, "grad_norm": 5.259551048278809, "learning_rate": 5.62381936459288e-06, "loss": 0.2124, "step": 19973 }, { "epoch": 65.48852459016393, "grad_norm": 15.783102989196777, "learning_rate": 5.6228645819451525e-06, "loss": 0.3351, "step": 19974 }, { "epoch": 65.49180327868852, "grad_norm": 6.380606174468994, "learning_rate": 5.621909848654177e-06, "loss": 0.6011, "step": 19975 }, { "epoch": 65.49508196721311, "grad_norm": 6.319663047790527, "learning_rate": 5.6209551647307205e-06, "loss": 0.4173, "step": 19976 }, { "epoch": 65.4983606557377, "grad_norm": 6.367246150970459, "learning_rate": 5.620000530185543e-06, "loss": 0.4185, "step": 19977 }, { "epoch": 65.5016393442623, "grad_norm": 8.147710800170898, "learning_rate": 5.6190459450294175e-06, "loss": 0.5956, "step": 19978 }, { "epoch": 65.50491803278689, "grad_norm": 6.41354513168335, "learning_rate": 5.6180914092731056e-06, "loss": 0.3905, "step": 19979 }, { "epoch": 65.50819672131148, "grad_norm": 6.251058578491211, "learning_rate": 5.617136922927368e-06, "loss": 0.6186, "step": 19980 }, { "epoch": 65.51147540983607, "grad_norm": 7.101934432983398, "learning_rate": 5.616182486002968e-06, "loss": 0.434, "step": 19981 }, { "epoch": 65.51475409836065, "grad_norm": 5.757791519165039, "learning_rate": 5.615228098510665e-06, "loss": 0.461, "step": 19982 }, { "epoch": 65.51803278688524, "grad_norm": 5.13823127746582, "learning_rate": 5.614273760461228e-06, "loss": 0.2908, "step": 19983 }, { "epoch": 65.52131147540983, "grad_norm": 5.918799877166748, "learning_rate": 5.6133194718654145e-06, "loss": 0.462, "step": 19984 }, { "epoch": 65.52459016393442, "grad_norm": 7.972184658050537, "learning_rate": 5.612365232733986e-06, "loss": 0.6106, "step": 19985 }, { "epoch": 65.52786885245902, "grad_norm": 5.519725322723389, "learning_rate": 5.611411043077697e-06, "loss": 0.3857, "step": 19986 }, { "epoch": 65.53114754098361, "grad_norm": 21.147506713867188, "learning_rate": 5.610456902907315e-06, "loss": 0.2803, "step": 19987 }, { "epoch": 65.5344262295082, "grad_norm": 5.531349182128906, "learning_rate": 5.609502812233596e-06, "loss": 0.3625, "step": 19988 }, { "epoch": 65.53770491803279, "grad_norm": 7.445827484130859, "learning_rate": 5.608548771067297e-06, "loss": 0.5313, "step": 19989 }, { "epoch": 65.54098360655738, "grad_norm": 5.734051704406738, "learning_rate": 5.607594779419172e-06, "loss": 0.3704, "step": 19990 }, { "epoch": 65.54426229508196, "grad_norm": 5.515871047973633, "learning_rate": 5.606640837299988e-06, "loss": 0.5073, "step": 19991 }, { "epoch": 65.54754098360655, "grad_norm": 5.581363677978516, "learning_rate": 5.605686944720497e-06, "loss": 0.6388, "step": 19992 }, { "epoch": 65.55081967213114, "grad_norm": 6.515286445617676, "learning_rate": 5.604733101691453e-06, "loss": 0.5056, "step": 19993 }, { "epoch": 65.55409836065574, "grad_norm": 6.97005558013916, "learning_rate": 5.6037793082236145e-06, "loss": 0.7043, "step": 19994 }, { "epoch": 65.55737704918033, "grad_norm": 6.085702896118164, "learning_rate": 5.6028255643277305e-06, "loss": 0.3124, "step": 19995 }, { "epoch": 65.56065573770492, "grad_norm": 5.664175510406494, "learning_rate": 5.601871870014565e-06, "loss": 0.5817, "step": 19996 }, { "epoch": 65.56393442622951, "grad_norm": 7.350643157958984, "learning_rate": 5.600918225294867e-06, "loss": 0.3598, "step": 19997 }, { "epoch": 65.5672131147541, "grad_norm": 18.017290115356445, "learning_rate": 5.599964630179384e-06, "loss": 0.4693, "step": 19998 }, { "epoch": 65.57049180327868, "grad_norm": 5.540295124053955, "learning_rate": 5.59901108467888e-06, "loss": 0.4649, "step": 19999 }, { "epoch": 65.57377049180327, "grad_norm": 6.134943962097168, "learning_rate": 5.598057588804103e-06, "loss": 0.4217, "step": 20000 }, { "epoch": 65.57704918032788, "grad_norm": 5.7324419021606445, "learning_rate": 5.597104142565799e-06, "loss": 0.2894, "step": 20001 }, { "epoch": 65.58032786885246, "grad_norm": 8.655593872070312, "learning_rate": 5.596150745974727e-06, "loss": 0.6995, "step": 20002 }, { "epoch": 65.58360655737705, "grad_norm": 5.16244649887085, "learning_rate": 5.595197399041634e-06, "loss": 0.5623, "step": 20003 }, { "epoch": 65.58688524590164, "grad_norm": 5.217873573303223, "learning_rate": 5.594244101777267e-06, "loss": 0.4768, "step": 20004 }, { "epoch": 65.59016393442623, "grad_norm": 7.448635578155518, "learning_rate": 5.593290854192383e-06, "loss": 0.3824, "step": 20005 }, { "epoch": 65.59344262295082, "grad_norm": 4.183510780334473, "learning_rate": 5.592337656297725e-06, "loss": 0.5023, "step": 20006 }, { "epoch": 65.5967213114754, "grad_norm": 4.398624420166016, "learning_rate": 5.591384508104043e-06, "loss": 0.2922, "step": 20007 }, { "epoch": 65.6, "grad_norm": 5.34273099899292, "learning_rate": 5.590431409622081e-06, "loss": 0.7862, "step": 20008 }, { "epoch": 65.6032786885246, "grad_norm": 5.584120273590088, "learning_rate": 5.589478360862594e-06, "loss": 0.4578, "step": 20009 }, { "epoch": 65.60655737704919, "grad_norm": 12.820418357849121, "learning_rate": 5.588525361836323e-06, "loss": 0.4942, "step": 20010 }, { "epoch": 65.60983606557377, "grad_norm": 5.7390522956848145, "learning_rate": 5.587572412554016e-06, "loss": 0.4271, "step": 20011 }, { "epoch": 65.61311475409836, "grad_norm": 6.330120086669922, "learning_rate": 5.586619513026415e-06, "loss": 0.4357, "step": 20012 }, { "epoch": 65.61639344262295, "grad_norm": 6.146056652069092, "learning_rate": 5.5856666632642705e-06, "loss": 0.4879, "step": 20013 }, { "epoch": 65.61967213114754, "grad_norm": 5.370100975036621, "learning_rate": 5.584713863278324e-06, "loss": 0.3773, "step": 20014 }, { "epoch": 65.62295081967213, "grad_norm": 4.929731845855713, "learning_rate": 5.58376111307932e-06, "loss": 0.3245, "step": 20015 }, { "epoch": 65.62622950819672, "grad_norm": 5.204983711242676, "learning_rate": 5.5828084126780005e-06, "loss": 0.6312, "step": 20016 }, { "epoch": 65.62950819672132, "grad_norm": 6.012598514556885, "learning_rate": 5.581855762085107e-06, "loss": 0.4638, "step": 20017 }, { "epoch": 65.6327868852459, "grad_norm": 5.221148490905762, "learning_rate": 5.580903161311384e-06, "loss": 0.2915, "step": 20018 }, { "epoch": 65.6360655737705, "grad_norm": 5.1321187019348145, "learning_rate": 5.579950610367575e-06, "loss": 0.4408, "step": 20019 }, { "epoch": 65.63934426229508, "grad_norm": 5.598296165466309, "learning_rate": 5.5789981092644175e-06, "loss": 0.209, "step": 20020 }, { "epoch": 65.64262295081967, "grad_norm": 8.980347633361816, "learning_rate": 5.57804565801265e-06, "loss": 0.3518, "step": 20021 }, { "epoch": 65.64590163934426, "grad_norm": 5.605223655700684, "learning_rate": 5.577093256623019e-06, "loss": 0.4861, "step": 20022 }, { "epoch": 65.64918032786885, "grad_norm": 5.346739292144775, "learning_rate": 5.57614090510626e-06, "loss": 0.3911, "step": 20023 }, { "epoch": 65.65245901639344, "grad_norm": 6.207627773284912, "learning_rate": 5.575188603473112e-06, "loss": 0.6108, "step": 20024 }, { "epoch": 65.65573770491804, "grad_norm": 5.015228748321533, "learning_rate": 5.574236351734309e-06, "loss": 0.4769, "step": 20025 }, { "epoch": 65.65901639344263, "grad_norm": 5.5111565589904785, "learning_rate": 5.573284149900597e-06, "loss": 0.4146, "step": 20026 }, { "epoch": 65.66229508196722, "grad_norm": 5.882325172424316, "learning_rate": 5.57233199798271e-06, "loss": 0.3604, "step": 20027 }, { "epoch": 65.6655737704918, "grad_norm": 6.287007808685303, "learning_rate": 5.571379895991381e-06, "loss": 0.8818, "step": 20028 }, { "epoch": 65.66885245901639, "grad_norm": 7.751833915710449, "learning_rate": 5.570427843937349e-06, "loss": 0.4231, "step": 20029 }, { "epoch": 65.67213114754098, "grad_norm": 5.031032085418701, "learning_rate": 5.569475841831346e-06, "loss": 0.735, "step": 20030 }, { "epoch": 65.67540983606557, "grad_norm": 4.8850321769714355, "learning_rate": 5.568523889684112e-06, "loss": 0.471, "step": 20031 }, { "epoch": 65.67868852459016, "grad_norm": 5.266216278076172, "learning_rate": 5.567571987506379e-06, "loss": 0.3793, "step": 20032 }, { "epoch": 65.68196721311476, "grad_norm": 6.0771803855896, "learning_rate": 5.566620135308881e-06, "loss": 0.3254, "step": 20033 }, { "epoch": 65.68524590163935, "grad_norm": 5.273473739624023, "learning_rate": 5.565668333102346e-06, "loss": 0.3915, "step": 20034 }, { "epoch": 65.68852459016394, "grad_norm": 6.960974216461182, "learning_rate": 5.564716580897516e-06, "loss": 0.567, "step": 20035 }, { "epoch": 65.69180327868852, "grad_norm": 5.712417125701904, "learning_rate": 5.563764878705117e-06, "loss": 0.3725, "step": 20036 }, { "epoch": 65.69508196721311, "grad_norm": 5.2012786865234375, "learning_rate": 5.562813226535881e-06, "loss": 0.4754, "step": 20037 }, { "epoch": 65.6983606557377, "grad_norm": 5.020068645477295, "learning_rate": 5.561861624400537e-06, "loss": 0.5898, "step": 20038 }, { "epoch": 65.70163934426229, "grad_norm": 6.754072666168213, "learning_rate": 5.560910072309822e-06, "loss": 0.4778, "step": 20039 }, { "epoch": 65.70491803278688, "grad_norm": 5.229774475097656, "learning_rate": 5.559958570274459e-06, "loss": 0.5747, "step": 20040 }, { "epoch": 65.70819672131148, "grad_norm": 5.31185245513916, "learning_rate": 5.559007118305182e-06, "loss": 0.2695, "step": 20041 }, { "epoch": 65.71147540983607, "grad_norm": 5.298817157745361, "learning_rate": 5.558055716412716e-06, "loss": 0.3509, "step": 20042 }, { "epoch": 65.71475409836066, "grad_norm": 6.891366004943848, "learning_rate": 5.557104364607786e-06, "loss": 0.4152, "step": 20043 }, { "epoch": 65.71803278688525, "grad_norm": 7.493725299835205, "learning_rate": 5.556153062901128e-06, "loss": 0.3319, "step": 20044 }, { "epoch": 65.72131147540983, "grad_norm": 7.452609062194824, "learning_rate": 5.555201811303465e-06, "loss": 0.6143, "step": 20045 }, { "epoch": 65.72459016393442, "grad_norm": 10.420431137084961, "learning_rate": 5.5542506098255245e-06, "loss": 0.3949, "step": 20046 }, { "epoch": 65.72786885245901, "grad_norm": 7.440215587615967, "learning_rate": 5.553299458478024e-06, "loss": 0.4419, "step": 20047 }, { "epoch": 65.73114754098361, "grad_norm": 5.043059349060059, "learning_rate": 5.552348357271702e-06, "loss": 0.4819, "step": 20048 }, { "epoch": 65.7344262295082, "grad_norm": 5.769527435302734, "learning_rate": 5.551397306217274e-06, "loss": 0.4937, "step": 20049 }, { "epoch": 65.73770491803279, "grad_norm": 5.02237606048584, "learning_rate": 5.550446305325466e-06, "loss": 0.4556, "step": 20050 }, { "epoch": 65.74098360655738, "grad_norm": 6.615108489990234, "learning_rate": 5.549495354607003e-06, "loss": 0.6463, "step": 20051 }, { "epoch": 65.74426229508197, "grad_norm": 6.237687587738037, "learning_rate": 5.5485444540726085e-06, "loss": 0.6606, "step": 20052 }, { "epoch": 65.74754098360656, "grad_norm": 8.650439262390137, "learning_rate": 5.5475936037330035e-06, "loss": 0.3682, "step": 20053 }, { "epoch": 65.75081967213114, "grad_norm": 7.186148166656494, "learning_rate": 5.546642803598907e-06, "loss": 0.5674, "step": 20054 }, { "epoch": 65.75409836065573, "grad_norm": 7.238755226135254, "learning_rate": 5.545692053681047e-06, "loss": 0.7372, "step": 20055 }, { "epoch": 65.75737704918033, "grad_norm": 5.67113733291626, "learning_rate": 5.544741353990139e-06, "loss": 0.3039, "step": 20056 }, { "epoch": 65.76065573770492, "grad_norm": 4.9828104972839355, "learning_rate": 5.543790704536902e-06, "loss": 0.3281, "step": 20057 }, { "epoch": 65.76393442622951, "grad_norm": 4.8440656661987305, "learning_rate": 5.542840105332061e-06, "loss": 0.334, "step": 20058 }, { "epoch": 65.7672131147541, "grad_norm": 6.601023197174072, "learning_rate": 5.541889556386333e-06, "loss": 0.4664, "step": 20059 }, { "epoch": 65.77049180327869, "grad_norm": 8.427932739257812, "learning_rate": 5.5409390577104305e-06, "loss": 0.6355, "step": 20060 }, { "epoch": 65.77377049180328, "grad_norm": 10.972195625305176, "learning_rate": 5.539988609315081e-06, "loss": 0.4538, "step": 20061 }, { "epoch": 65.77704918032786, "grad_norm": 4.8723955154418945, "learning_rate": 5.539038211210999e-06, "loss": 0.4117, "step": 20062 }, { "epoch": 65.78032786885245, "grad_norm": 5.839085578918457, "learning_rate": 5.538087863408897e-06, "loss": 0.3414, "step": 20063 }, { "epoch": 65.78360655737706, "grad_norm": 4.905490875244141, "learning_rate": 5.53713756591949e-06, "loss": 0.5731, "step": 20064 }, { "epoch": 65.78688524590164, "grad_norm": 7.42349910736084, "learning_rate": 5.536187318753501e-06, "loss": 0.4466, "step": 20065 }, { "epoch": 65.79016393442623, "grad_norm": 18.39897918701172, "learning_rate": 5.5352371219216416e-06, "loss": 0.2578, "step": 20066 }, { "epoch": 65.79344262295082, "grad_norm": 5.815889835357666, "learning_rate": 5.5342869754346266e-06, "loss": 0.3468, "step": 20067 }, { "epoch": 65.79672131147541, "grad_norm": 5.865796089172363, "learning_rate": 5.533336879303168e-06, "loss": 0.6179, "step": 20068 }, { "epoch": 65.8, "grad_norm": 8.437931060791016, "learning_rate": 5.5323868335379775e-06, "loss": 0.4683, "step": 20069 }, { "epoch": 65.80327868852459, "grad_norm": 5.142826080322266, "learning_rate": 5.531436838149773e-06, "loss": 0.5934, "step": 20070 }, { "epoch": 65.80655737704917, "grad_norm": 8.21597671508789, "learning_rate": 5.530486893149265e-06, "loss": 0.406, "step": 20071 }, { "epoch": 65.80983606557378, "grad_norm": 7.80699348449707, "learning_rate": 5.529536998547164e-06, "loss": 0.5073, "step": 20072 }, { "epoch": 65.81311475409836, "grad_norm": 4.894659996032715, "learning_rate": 5.528587154354177e-06, "loss": 0.493, "step": 20073 }, { "epoch": 65.81639344262295, "grad_norm": 5.087356090545654, "learning_rate": 5.527637360581024e-06, "loss": 0.2884, "step": 20074 }, { "epoch": 65.81967213114754, "grad_norm": 6.978231430053711, "learning_rate": 5.526687617238411e-06, "loss": 0.7207, "step": 20075 }, { "epoch": 65.82295081967213, "grad_norm": 7.134201526641846, "learning_rate": 5.525737924337045e-06, "loss": 0.346, "step": 20076 }, { "epoch": 65.82622950819672, "grad_norm": 5.896889686584473, "learning_rate": 5.5247882818876306e-06, "loss": 0.7122, "step": 20077 }, { "epoch": 65.8295081967213, "grad_norm": 5.914063930511475, "learning_rate": 5.523838689900887e-06, "loss": 0.5377, "step": 20078 }, { "epoch": 65.8327868852459, "grad_norm": 5.97839879989624, "learning_rate": 5.522889148387516e-06, "loss": 0.3521, "step": 20079 }, { "epoch": 65.8360655737705, "grad_norm": 5.483644485473633, "learning_rate": 5.521939657358224e-06, "loss": 0.3451, "step": 20080 }, { "epoch": 65.83934426229509, "grad_norm": 6.490823268890381, "learning_rate": 5.520990216823719e-06, "loss": 0.4571, "step": 20081 }, { "epoch": 65.84262295081967, "grad_norm": 6.685027599334717, "learning_rate": 5.5200408267947026e-06, "loss": 0.3721, "step": 20082 }, { "epoch": 65.84590163934426, "grad_norm": 7.752922534942627, "learning_rate": 5.519091487281887e-06, "loss": 0.4719, "step": 20083 }, { "epoch": 65.84918032786885, "grad_norm": 8.551345825195312, "learning_rate": 5.518142198295975e-06, "loss": 0.6269, "step": 20084 }, { "epoch": 65.85245901639344, "grad_norm": 5.161499977111816, "learning_rate": 5.517192959847669e-06, "loss": 0.2985, "step": 20085 }, { "epoch": 65.85573770491803, "grad_norm": 5.373453617095947, "learning_rate": 5.516243771947669e-06, "loss": 0.4919, "step": 20086 }, { "epoch": 65.85901639344263, "grad_norm": 7.461160659790039, "learning_rate": 5.5152946346066875e-06, "loss": 0.1958, "step": 20087 }, { "epoch": 65.86229508196722, "grad_norm": 6.750239849090576, "learning_rate": 5.514345547835421e-06, "loss": 0.5356, "step": 20088 }, { "epoch": 65.8655737704918, "grad_norm": 6.710442066192627, "learning_rate": 5.513396511644573e-06, "loss": 0.4244, "step": 20089 }, { "epoch": 65.8688524590164, "grad_norm": 5.65306282043457, "learning_rate": 5.51244752604484e-06, "loss": 0.3148, "step": 20090 }, { "epoch": 65.87213114754098, "grad_norm": 6.198172092437744, "learning_rate": 5.511498591046931e-06, "loss": 0.3451, "step": 20091 }, { "epoch": 65.87540983606557, "grad_norm": 5.02612829208374, "learning_rate": 5.510549706661542e-06, "loss": 0.4183, "step": 20092 }, { "epoch": 65.87868852459016, "grad_norm": 5.090194225311279, "learning_rate": 5.509600872899373e-06, "loss": 0.4679, "step": 20093 }, { "epoch": 65.88196721311475, "grad_norm": 5.575054168701172, "learning_rate": 5.508652089771122e-06, "loss": 0.5057, "step": 20094 }, { "epoch": 65.88524590163935, "grad_norm": 5.773507595062256, "learning_rate": 5.507703357287486e-06, "loss": 0.3115, "step": 20095 }, { "epoch": 65.88852459016394, "grad_norm": 5.948789596557617, "learning_rate": 5.506754675459169e-06, "loss": 0.3267, "step": 20096 }, { "epoch": 65.89180327868853, "grad_norm": 5.638279914855957, "learning_rate": 5.505806044296867e-06, "loss": 0.4505, "step": 20097 }, { "epoch": 65.89508196721312, "grad_norm": 5.831323623657227, "learning_rate": 5.50485746381127e-06, "loss": 0.4748, "step": 20098 }, { "epoch": 65.8983606557377, "grad_norm": 15.16192626953125, "learning_rate": 5.503908934013079e-06, "loss": 0.5132, "step": 20099 }, { "epoch": 65.90163934426229, "grad_norm": 7.079426288604736, "learning_rate": 5.502960454912991e-06, "loss": 0.3827, "step": 20100 }, { "epoch": 65.90491803278688, "grad_norm": 5.432361602783203, "learning_rate": 5.502012026521701e-06, "loss": 0.5117, "step": 20101 }, { "epoch": 65.90819672131147, "grad_norm": 6.072851181030273, "learning_rate": 5.501063648849903e-06, "loss": 0.3576, "step": 20102 }, { "epoch": 65.91147540983607, "grad_norm": 5.234369277954102, "learning_rate": 5.500115321908284e-06, "loss": 0.4816, "step": 20103 }, { "epoch": 65.91475409836066, "grad_norm": 4.7640461921691895, "learning_rate": 5.499167045707547e-06, "loss": 0.3372, "step": 20104 }, { "epoch": 65.91803278688525, "grad_norm": 5.906347751617432, "learning_rate": 5.498218820258383e-06, "loss": 0.4316, "step": 20105 }, { "epoch": 65.92131147540984, "grad_norm": 5.938300609588623, "learning_rate": 5.497270645571479e-06, "loss": 0.4021, "step": 20106 }, { "epoch": 65.92459016393443, "grad_norm": 5.932765483856201, "learning_rate": 5.496322521657533e-06, "loss": 0.5757, "step": 20107 }, { "epoch": 65.92786885245901, "grad_norm": 5.753783702850342, "learning_rate": 5.4953744485272335e-06, "loss": 0.4756, "step": 20108 }, { "epoch": 65.9311475409836, "grad_norm": 6.053252220153809, "learning_rate": 5.494426426191266e-06, "loss": 0.3945, "step": 20109 }, { "epoch": 65.93442622950819, "grad_norm": 5.781248569488525, "learning_rate": 5.493478454660331e-06, "loss": 0.5499, "step": 20110 }, { "epoch": 65.9377049180328, "grad_norm": 5.712040901184082, "learning_rate": 5.492530533945109e-06, "loss": 0.3315, "step": 20111 }, { "epoch": 65.94098360655738, "grad_norm": 5.780488014221191, "learning_rate": 5.4915826640562894e-06, "loss": 0.4101, "step": 20112 }, { "epoch": 65.94426229508197, "grad_norm": 5.420853137969971, "learning_rate": 5.490634845004568e-06, "loss": 0.6134, "step": 20113 }, { "epoch": 65.94754098360656, "grad_norm": 7.2313151359558105, "learning_rate": 5.489687076800626e-06, "loss": 0.499, "step": 20114 }, { "epoch": 65.95081967213115, "grad_norm": 6.212826251983643, "learning_rate": 5.488739359455153e-06, "loss": 0.2633, "step": 20115 }, { "epoch": 65.95409836065573, "grad_norm": 5.6682024002075195, "learning_rate": 5.487791692978829e-06, "loss": 0.5854, "step": 20116 }, { "epoch": 65.95737704918032, "grad_norm": 7.674367427825928, "learning_rate": 5.486844077382349e-06, "loss": 0.3728, "step": 20117 }, { "epoch": 65.96065573770491, "grad_norm": 6.78046178817749, "learning_rate": 5.485896512676395e-06, "loss": 0.3948, "step": 20118 }, { "epoch": 65.96393442622951, "grad_norm": 4.72466516494751, "learning_rate": 5.484948998871651e-06, "loss": 0.5526, "step": 20119 }, { "epoch": 65.9672131147541, "grad_norm": 8.197016716003418, "learning_rate": 5.484001535978802e-06, "loss": 0.5922, "step": 20120 }, { "epoch": 65.97049180327869, "grad_norm": 5.579186916351318, "learning_rate": 5.483054124008528e-06, "loss": 0.416, "step": 20121 }, { "epoch": 65.97377049180328, "grad_norm": 9.179832458496094, "learning_rate": 5.482106762971517e-06, "loss": 0.5533, "step": 20122 }, { "epoch": 65.97704918032787, "grad_norm": 5.050380229949951, "learning_rate": 5.481159452878452e-06, "loss": 0.3413, "step": 20123 }, { "epoch": 65.98032786885246, "grad_norm": 5.240086078643799, "learning_rate": 5.480212193740011e-06, "loss": 0.3927, "step": 20124 }, { "epoch": 65.98360655737704, "grad_norm": 6.212337493896484, "learning_rate": 5.479264985566873e-06, "loss": 0.5738, "step": 20125 }, { "epoch": 65.98688524590163, "grad_norm": 5.389113903045654, "learning_rate": 5.478317828369728e-06, "loss": 0.6008, "step": 20126 }, { "epoch": 65.99016393442623, "grad_norm": 11.366192817687988, "learning_rate": 5.4773707221592496e-06, "loss": 0.3992, "step": 20127 }, { "epoch": 65.99344262295082, "grad_norm": 7.583046913146973, "learning_rate": 5.476423666946119e-06, "loss": 0.4643, "step": 20128 }, { "epoch": 65.99672131147541, "grad_norm": 6.60922908782959, "learning_rate": 5.4754766627410104e-06, "loss": 0.4103, "step": 20129 }, { "epoch": 66.0, "grad_norm": 8.761590003967285, "learning_rate": 5.4745297095546125e-06, "loss": 0.5283, "step": 20130 }, { "epoch": 66.00327868852459, "grad_norm": 5.211461067199707, "learning_rate": 5.473582807397595e-06, "loss": 0.4986, "step": 20131 }, { "epoch": 66.00655737704918, "grad_norm": 11.46800708770752, "learning_rate": 5.4726359562806384e-06, "loss": 0.6147, "step": 20132 }, { "epoch": 66.00983606557377, "grad_norm": 7.364145278930664, "learning_rate": 5.471689156214419e-06, "loss": 0.4209, "step": 20133 }, { "epoch": 66.01311475409837, "grad_norm": 8.464592933654785, "learning_rate": 5.4707424072096095e-06, "loss": 0.5563, "step": 20134 }, { "epoch": 66.01639344262296, "grad_norm": 12.641907691955566, "learning_rate": 5.469795709276892e-06, "loss": 0.5907, "step": 20135 }, { "epoch": 66.01967213114754, "grad_norm": 10.861845016479492, "learning_rate": 5.468849062426937e-06, "loss": 0.4734, "step": 20136 }, { "epoch": 66.02295081967213, "grad_norm": 8.880813598632812, "learning_rate": 5.4679024666704215e-06, "loss": 0.4577, "step": 20137 }, { "epoch": 66.02622950819672, "grad_norm": 4.719919681549072, "learning_rate": 5.466955922018013e-06, "loss": 0.4279, "step": 20138 }, { "epoch": 66.02950819672131, "grad_norm": 6.021059989929199, "learning_rate": 5.4660094284803945e-06, "loss": 0.5202, "step": 20139 }, { "epoch": 66.0327868852459, "grad_norm": 12.31033706665039, "learning_rate": 5.465062986068233e-06, "loss": 0.3077, "step": 20140 }, { "epoch": 66.03606557377049, "grad_norm": 5.272991180419922, "learning_rate": 5.464116594792202e-06, "loss": 0.6603, "step": 20141 }, { "epoch": 66.03934426229509, "grad_norm": 4.916914463043213, "learning_rate": 5.463170254662968e-06, "loss": 0.4309, "step": 20142 }, { "epoch": 66.04262295081968, "grad_norm": 5.739283084869385, "learning_rate": 5.4622239656912115e-06, "loss": 0.2503, "step": 20143 }, { "epoch": 66.04590163934427, "grad_norm": 5.20522928237915, "learning_rate": 5.461277727887597e-06, "loss": 0.4704, "step": 20144 }, { "epoch": 66.04918032786885, "grad_norm": 4.7448883056640625, "learning_rate": 5.460331541262795e-06, "loss": 0.45, "step": 20145 }, { "epoch": 66.05245901639344, "grad_norm": 4.954268932342529, "learning_rate": 5.459385405827477e-06, "loss": 0.5936, "step": 20146 }, { "epoch": 66.05573770491803, "grad_norm": 7.507992267608643, "learning_rate": 5.458439321592304e-06, "loss": 0.5442, "step": 20147 }, { "epoch": 66.05901639344262, "grad_norm": 5.5689921379089355, "learning_rate": 5.4574932885679534e-06, "loss": 0.5442, "step": 20148 }, { "epoch": 66.0622950819672, "grad_norm": 4.439755916595459, "learning_rate": 5.456547306765089e-06, "loss": 0.3903, "step": 20149 }, { "epoch": 66.06557377049181, "grad_norm": 11.093213081359863, "learning_rate": 5.455601376194377e-06, "loss": 0.3688, "step": 20150 }, { "epoch": 66.0688524590164, "grad_norm": 5.767224311828613, "learning_rate": 5.4546554968664825e-06, "loss": 0.3969, "step": 20151 }, { "epoch": 66.07213114754099, "grad_norm": 4.945798873901367, "learning_rate": 5.453709668792076e-06, "loss": 0.5055, "step": 20152 }, { "epoch": 66.07540983606557, "grad_norm": 4.834238052368164, "learning_rate": 5.4527638919818206e-06, "loss": 0.3398, "step": 20153 }, { "epoch": 66.07868852459016, "grad_norm": 4.850523948669434, "learning_rate": 5.45181816644638e-06, "loss": 0.4561, "step": 20154 }, { "epoch": 66.08196721311475, "grad_norm": 5.715885162353516, "learning_rate": 5.450872492196414e-06, "loss": 0.6788, "step": 20155 }, { "epoch": 66.08524590163934, "grad_norm": 38.92193603515625, "learning_rate": 5.4499268692425945e-06, "loss": 0.6335, "step": 20156 }, { "epoch": 66.08852459016393, "grad_norm": 6.173995494842529, "learning_rate": 5.448981297595581e-06, "loss": 0.4632, "step": 20157 }, { "epoch": 66.09180327868853, "grad_norm": 4.376633167266846, "learning_rate": 5.448035777266035e-06, "loss": 0.3528, "step": 20158 }, { "epoch": 66.09508196721312, "grad_norm": 8.856572151184082, "learning_rate": 5.4470903082646155e-06, "loss": 0.3814, "step": 20159 }, { "epoch": 66.09836065573771, "grad_norm": 23.993717193603516, "learning_rate": 5.44614489060199e-06, "loss": 0.4266, "step": 20160 }, { "epoch": 66.1016393442623, "grad_norm": 6.889433860778809, "learning_rate": 5.445199524288815e-06, "loss": 0.301, "step": 20161 }, { "epoch": 66.10491803278688, "grad_norm": 5.640021324157715, "learning_rate": 5.444254209335747e-06, "loss": 0.5478, "step": 20162 }, { "epoch": 66.10819672131147, "grad_norm": 5.5483174324035645, "learning_rate": 5.443308945753454e-06, "loss": 0.3961, "step": 20163 }, { "epoch": 66.11147540983606, "grad_norm": 5.311107158660889, "learning_rate": 5.442363733552591e-06, "loss": 0.526, "step": 20164 }, { "epoch": 66.11475409836065, "grad_norm": 7.389790058135986, "learning_rate": 5.44141857274381e-06, "loss": 0.2749, "step": 20165 }, { "epoch": 66.11803278688525, "grad_norm": 6.823149681091309, "learning_rate": 5.440473463337781e-06, "loss": 0.7063, "step": 20166 }, { "epoch": 66.12131147540984, "grad_norm": 6.007128715515137, "learning_rate": 5.439528405345152e-06, "loss": 0.2473, "step": 20167 }, { "epoch": 66.12459016393443, "grad_norm": 35.182273864746094, "learning_rate": 5.438583398776579e-06, "loss": 0.402, "step": 20168 }, { "epoch": 66.12786885245902, "grad_norm": 6.479172229766846, "learning_rate": 5.437638443642725e-06, "loss": 0.5765, "step": 20169 }, { "epoch": 66.1311475409836, "grad_norm": 7.385558128356934, "learning_rate": 5.4366935399542406e-06, "loss": 0.4774, "step": 20170 }, { "epoch": 66.1344262295082, "grad_norm": 5.416371822357178, "learning_rate": 5.435748687721781e-06, "loss": 0.3865, "step": 20171 }, { "epoch": 66.13770491803278, "grad_norm": 4.777454376220703, "learning_rate": 5.434803886956002e-06, "loss": 0.4498, "step": 20172 }, { "epoch": 66.14098360655737, "grad_norm": 5.105201721191406, "learning_rate": 5.43385913766755e-06, "loss": 0.437, "step": 20173 }, { "epoch": 66.14426229508197, "grad_norm": 17.693708419799805, "learning_rate": 5.4329144398670895e-06, "loss": 0.5502, "step": 20174 }, { "epoch": 66.14754098360656, "grad_norm": 4.677999973297119, "learning_rate": 5.431969793565266e-06, "loss": 0.3816, "step": 20175 }, { "epoch": 66.15081967213115, "grad_norm": 5.597416400909424, "learning_rate": 5.431025198772732e-06, "loss": 0.5364, "step": 20176 }, { "epoch": 66.15409836065574, "grad_norm": 4.663826942443848, "learning_rate": 5.430080655500139e-06, "loss": 0.4498, "step": 20177 }, { "epoch": 66.15737704918033, "grad_norm": 5.364822864532471, "learning_rate": 5.429136163758139e-06, "loss": 0.4999, "step": 20178 }, { "epoch": 66.16065573770491, "grad_norm": 6.595705509185791, "learning_rate": 5.428191723557381e-06, "loss": 0.446, "step": 20179 }, { "epoch": 66.1639344262295, "grad_norm": 5.6915459632873535, "learning_rate": 5.427247334908517e-06, "loss": 0.3967, "step": 20180 }, { "epoch": 66.1672131147541, "grad_norm": 4.695367813110352, "learning_rate": 5.426302997822188e-06, "loss": 0.3725, "step": 20181 }, { "epoch": 66.1704918032787, "grad_norm": 5.505544185638428, "learning_rate": 5.425358712309053e-06, "loss": 0.3002, "step": 20182 }, { "epoch": 66.17377049180328, "grad_norm": 4.319457054138184, "learning_rate": 5.424414478379754e-06, "loss": 0.5416, "step": 20183 }, { "epoch": 66.17704918032787, "grad_norm": 5.075937271118164, "learning_rate": 5.423470296044939e-06, "loss": 0.4866, "step": 20184 }, { "epoch": 66.18032786885246, "grad_norm": 24.815143585205078, "learning_rate": 5.4225261653152565e-06, "loss": 0.7264, "step": 20185 }, { "epoch": 66.18360655737705, "grad_norm": 4.652331352233887, "learning_rate": 5.421582086201346e-06, "loss": 0.2318, "step": 20186 }, { "epoch": 66.18688524590164, "grad_norm": 9.810017585754395, "learning_rate": 5.420638058713861e-06, "loss": 0.6247, "step": 20187 }, { "epoch": 66.19016393442622, "grad_norm": 4.837152004241943, "learning_rate": 5.4196940828634445e-06, "loss": 0.4496, "step": 20188 }, { "epoch": 66.19344262295083, "grad_norm": 4.509722709655762, "learning_rate": 5.418750158660739e-06, "loss": 0.4692, "step": 20189 }, { "epoch": 66.19672131147541, "grad_norm": 4.661871433258057, "learning_rate": 5.417806286116385e-06, "loss": 0.1981, "step": 20190 }, { "epoch": 66.2, "grad_norm": 4.413617134094238, "learning_rate": 5.416862465241033e-06, "loss": 0.418, "step": 20191 }, { "epoch": 66.20327868852459, "grad_norm": 5.924393177032471, "learning_rate": 5.415918696045322e-06, "loss": 0.5748, "step": 20192 }, { "epoch": 66.20655737704918, "grad_norm": 5.984902858734131, "learning_rate": 5.414974978539895e-06, "loss": 0.5804, "step": 20193 }, { "epoch": 66.20983606557377, "grad_norm": 6.148869514465332, "learning_rate": 5.4140313127353875e-06, "loss": 0.37, "step": 20194 }, { "epoch": 66.21311475409836, "grad_norm": 6.576436519622803, "learning_rate": 5.413087698642448e-06, "loss": 0.5811, "step": 20195 }, { "epoch": 66.21639344262294, "grad_norm": 6.212141990661621, "learning_rate": 5.412144136271716e-06, "loss": 0.5034, "step": 20196 }, { "epoch": 66.21967213114755, "grad_norm": 5.220954418182373, "learning_rate": 5.411200625633828e-06, "loss": 0.4053, "step": 20197 }, { "epoch": 66.22295081967214, "grad_norm": 4.824382781982422, "learning_rate": 5.410257166739423e-06, "loss": 0.3688, "step": 20198 }, { "epoch": 66.22622950819672, "grad_norm": 6.89523983001709, "learning_rate": 5.40931375959914e-06, "loss": 0.4662, "step": 20199 }, { "epoch": 66.22950819672131, "grad_norm": 11.337998390197754, "learning_rate": 5.408370404223617e-06, "loss": 0.6755, "step": 20200 }, { "epoch": 66.2327868852459, "grad_norm": 6.711691856384277, "learning_rate": 5.407427100623495e-06, "loss": 0.5192, "step": 20201 }, { "epoch": 66.23606557377049, "grad_norm": 6.407087326049805, "learning_rate": 5.406483848809407e-06, "loss": 0.3169, "step": 20202 }, { "epoch": 66.23934426229508, "grad_norm": 5.842578887939453, "learning_rate": 5.4055406487919845e-06, "loss": 0.266, "step": 20203 }, { "epoch": 66.24262295081967, "grad_norm": 5.256184101104736, "learning_rate": 5.404597500581874e-06, "loss": 0.5219, "step": 20204 }, { "epoch": 66.24590163934427, "grad_norm": 5.988674163818359, "learning_rate": 5.403654404189703e-06, "loss": 0.745, "step": 20205 }, { "epoch": 66.24918032786886, "grad_norm": 4.572228908538818, "learning_rate": 5.402711359626108e-06, "loss": 0.3516, "step": 20206 }, { "epoch": 66.25245901639344, "grad_norm": 5.399784564971924, "learning_rate": 5.401768366901719e-06, "loss": 0.3851, "step": 20207 }, { "epoch": 66.25573770491803, "grad_norm": 6.146735668182373, "learning_rate": 5.400825426027177e-06, "loss": 0.6829, "step": 20208 }, { "epoch": 66.25901639344262, "grad_norm": 4.862457752227783, "learning_rate": 5.399882537013108e-06, "loss": 0.512, "step": 20209 }, { "epoch": 66.26229508196721, "grad_norm": 4.644131660461426, "learning_rate": 5.3989396998701495e-06, "loss": 0.4574, "step": 20210 }, { "epoch": 66.2655737704918, "grad_norm": 5.557723522186279, "learning_rate": 5.397996914608923e-06, "loss": 0.3153, "step": 20211 }, { "epoch": 66.26885245901639, "grad_norm": 7.939840316772461, "learning_rate": 5.397054181240071e-06, "loss": 0.6437, "step": 20212 }, { "epoch": 66.27213114754099, "grad_norm": 4.430291652679443, "learning_rate": 5.396111499774219e-06, "loss": 0.4047, "step": 20213 }, { "epoch": 66.27540983606558, "grad_norm": 5.616168975830078, "learning_rate": 5.395168870221994e-06, "loss": 0.4884, "step": 20214 }, { "epoch": 66.27868852459017, "grad_norm": 4.277191638946533, "learning_rate": 5.394226292594031e-06, "loss": 0.4009, "step": 20215 }, { "epoch": 66.28196721311475, "grad_norm": 6.005358695983887, "learning_rate": 5.3932837669009545e-06, "loss": 0.496, "step": 20216 }, { "epoch": 66.28524590163934, "grad_norm": 5.5602006912231445, "learning_rate": 5.392341293153393e-06, "loss": 0.4683, "step": 20217 }, { "epoch": 66.28852459016393, "grad_norm": 5.456116676330566, "learning_rate": 5.391398871361972e-06, "loss": 0.3215, "step": 20218 }, { "epoch": 66.29180327868852, "grad_norm": 5.29546594619751, "learning_rate": 5.390456501537325e-06, "loss": 0.392, "step": 20219 }, { "epoch": 66.29508196721312, "grad_norm": 9.684099197387695, "learning_rate": 5.389514183690071e-06, "loss": 0.4588, "step": 20220 }, { "epoch": 66.29836065573771, "grad_norm": 4.994876384735107, "learning_rate": 5.388571917830836e-06, "loss": 0.5295, "step": 20221 }, { "epoch": 66.3016393442623, "grad_norm": 4.960348606109619, "learning_rate": 5.3876297039702506e-06, "loss": 0.1472, "step": 20222 }, { "epoch": 66.30491803278689, "grad_norm": 5.106367111206055, "learning_rate": 5.386687542118936e-06, "loss": 0.4588, "step": 20223 }, { "epoch": 66.30819672131148, "grad_norm": 5.013195514678955, "learning_rate": 5.385745432287517e-06, "loss": 0.5448, "step": 20224 }, { "epoch": 66.31147540983606, "grad_norm": 5.083199977874756, "learning_rate": 5.384803374486611e-06, "loss": 0.319, "step": 20225 }, { "epoch": 66.31475409836065, "grad_norm": 5.167365550994873, "learning_rate": 5.383861368726849e-06, "loss": 0.3181, "step": 20226 }, { "epoch": 66.31803278688524, "grad_norm": 12.919896125793457, "learning_rate": 5.38291941501885e-06, "loss": 0.5453, "step": 20227 }, { "epoch": 66.32131147540984, "grad_norm": 4.7177653312683105, "learning_rate": 5.381977513373235e-06, "loss": 0.4848, "step": 20228 }, { "epoch": 66.32459016393443, "grad_norm": 5.889021873474121, "learning_rate": 5.3810356638006224e-06, "loss": 0.3111, "step": 20229 }, { "epoch": 66.32786885245902, "grad_norm": 5.307566165924072, "learning_rate": 5.380093866311639e-06, "loss": 0.4169, "step": 20230 }, { "epoch": 66.33114754098361, "grad_norm": 6.926520824432373, "learning_rate": 5.3791521209169e-06, "loss": 0.3715, "step": 20231 }, { "epoch": 66.3344262295082, "grad_norm": 5.314487934112549, "learning_rate": 5.378210427627025e-06, "loss": 0.5993, "step": 20232 }, { "epoch": 66.33770491803278, "grad_norm": 5.00180721282959, "learning_rate": 5.377268786452629e-06, "loss": 0.4392, "step": 20233 }, { "epoch": 66.34098360655737, "grad_norm": 5.7550835609436035, "learning_rate": 5.376327197404339e-06, "loss": 0.3606, "step": 20234 }, { "epoch": 66.34426229508196, "grad_norm": 16.09561538696289, "learning_rate": 5.375385660492767e-06, "loss": 0.6171, "step": 20235 }, { "epoch": 66.34754098360656, "grad_norm": 5.26283073425293, "learning_rate": 5.374444175728528e-06, "loss": 0.5492, "step": 20236 }, { "epoch": 66.35081967213115, "grad_norm": 7.524124622344971, "learning_rate": 5.373502743122243e-06, "loss": 0.5595, "step": 20237 }, { "epoch": 66.35409836065574, "grad_norm": 6.2070488929748535, "learning_rate": 5.372561362684519e-06, "loss": 0.439, "step": 20238 }, { "epoch": 66.35737704918033, "grad_norm": 5.599255084991455, "learning_rate": 5.371620034425981e-06, "loss": 0.3443, "step": 20239 }, { "epoch": 66.36065573770492, "grad_norm": 12.840378761291504, "learning_rate": 5.370678758357241e-06, "loss": 0.3353, "step": 20240 }, { "epoch": 66.3639344262295, "grad_norm": 5.790128707885742, "learning_rate": 5.36973753448891e-06, "loss": 0.4647, "step": 20241 }, { "epoch": 66.3672131147541, "grad_norm": 6.665410041809082, "learning_rate": 5.368796362831599e-06, "loss": 0.5347, "step": 20242 }, { "epoch": 66.37049180327868, "grad_norm": 7.025850772857666, "learning_rate": 5.367855243395929e-06, "loss": 0.4157, "step": 20243 }, { "epoch": 66.37377049180328, "grad_norm": 4.66768217086792, "learning_rate": 5.366914176192506e-06, "loss": 0.3734, "step": 20244 }, { "epoch": 66.37704918032787, "grad_norm": 12.285285949707031, "learning_rate": 5.365973161231943e-06, "loss": 0.5016, "step": 20245 }, { "epoch": 66.38032786885246, "grad_norm": 5.804084300994873, "learning_rate": 5.3650321985248474e-06, "loss": 0.3356, "step": 20246 }, { "epoch": 66.38360655737705, "grad_norm": 5.610345840454102, "learning_rate": 5.364091288081837e-06, "loss": 0.4819, "step": 20247 }, { "epoch": 66.38688524590164, "grad_norm": 7.052988529205322, "learning_rate": 5.363150429913518e-06, "loss": 0.3514, "step": 20248 }, { "epoch": 66.39016393442623, "grad_norm": 5.579371929168701, "learning_rate": 5.362209624030497e-06, "loss": 0.417, "step": 20249 }, { "epoch": 66.39344262295081, "grad_norm": 6.121953964233398, "learning_rate": 5.361268870443386e-06, "loss": 0.2337, "step": 20250 }, { "epoch": 66.3967213114754, "grad_norm": 5.481579780578613, "learning_rate": 5.360328169162787e-06, "loss": 0.2968, "step": 20251 }, { "epoch": 66.4, "grad_norm": 8.07962703704834, "learning_rate": 5.359387520199317e-06, "loss": 0.3528, "step": 20252 }, { "epoch": 66.4032786885246, "grad_norm": 5.085220813751221, "learning_rate": 5.358446923563576e-06, "loss": 0.4586, "step": 20253 }, { "epoch": 66.40655737704918, "grad_norm": 6.630411148071289, "learning_rate": 5.357506379266173e-06, "loss": 0.4591, "step": 20254 }, { "epoch": 66.40983606557377, "grad_norm": 5.208728313446045, "learning_rate": 5.356565887317709e-06, "loss": 0.7122, "step": 20255 }, { "epoch": 66.41311475409836, "grad_norm": 7.6350555419921875, "learning_rate": 5.355625447728796e-06, "loss": 0.2962, "step": 20256 }, { "epoch": 66.41639344262295, "grad_norm": 5.533721446990967, "learning_rate": 5.354685060510035e-06, "loss": 0.4789, "step": 20257 }, { "epoch": 66.41967213114754, "grad_norm": 5.769259929656982, "learning_rate": 5.35374472567203e-06, "loss": 0.5261, "step": 20258 }, { "epoch": 66.42295081967212, "grad_norm": 6.53670597076416, "learning_rate": 5.352804443225381e-06, "loss": 0.5128, "step": 20259 }, { "epoch": 66.42622950819673, "grad_norm": 11.572139739990234, "learning_rate": 5.351864213180697e-06, "loss": 0.5174, "step": 20260 }, { "epoch": 66.42950819672132, "grad_norm": 5.801444053649902, "learning_rate": 5.350924035548576e-06, "loss": 0.4273, "step": 20261 }, { "epoch": 66.4327868852459, "grad_norm": 5.069643974304199, "learning_rate": 5.349983910339621e-06, "loss": 0.3398, "step": 20262 }, { "epoch": 66.43606557377049, "grad_norm": 7.458707332611084, "learning_rate": 5.349043837564432e-06, "loss": 0.6581, "step": 20263 }, { "epoch": 66.43934426229508, "grad_norm": 6.081977844238281, "learning_rate": 5.348103817233605e-06, "loss": 0.3753, "step": 20264 }, { "epoch": 66.44262295081967, "grad_norm": 12.562106132507324, "learning_rate": 5.34716384935775e-06, "loss": 0.4589, "step": 20265 }, { "epoch": 66.44590163934426, "grad_norm": 6.479166030883789, "learning_rate": 5.346223933947459e-06, "loss": 0.5573, "step": 20266 }, { "epoch": 66.44918032786886, "grad_norm": 4.441590785980225, "learning_rate": 5.345284071013328e-06, "loss": 0.3221, "step": 20267 }, { "epoch": 66.45245901639345, "grad_norm": 5.970773220062256, "learning_rate": 5.3443442605659635e-06, "loss": 0.3918, "step": 20268 }, { "epoch": 66.45573770491804, "grad_norm": 4.986765384674072, "learning_rate": 5.343404502615957e-06, "loss": 0.438, "step": 20269 }, { "epoch": 66.45901639344262, "grad_norm": 6.183078765869141, "learning_rate": 5.342464797173903e-06, "loss": 0.3993, "step": 20270 }, { "epoch": 66.46229508196721, "grad_norm": 10.962678909301758, "learning_rate": 5.341525144250406e-06, "loss": 0.4333, "step": 20271 }, { "epoch": 66.4655737704918, "grad_norm": 8.093363761901855, "learning_rate": 5.340585543856055e-06, "loss": 0.3217, "step": 20272 }, { "epoch": 66.46885245901639, "grad_norm": 4.974008560180664, "learning_rate": 5.339645996001443e-06, "loss": 0.4366, "step": 20273 }, { "epoch": 66.47213114754098, "grad_norm": 6.10323429107666, "learning_rate": 5.338706500697173e-06, "loss": 0.2762, "step": 20274 }, { "epoch": 66.47540983606558, "grad_norm": 11.365945816040039, "learning_rate": 5.337767057953833e-06, "loss": 0.4727, "step": 20275 }, { "epoch": 66.47868852459017, "grad_norm": 8.666186332702637, "learning_rate": 5.336827667782016e-06, "loss": 0.3667, "step": 20276 }, { "epoch": 66.48196721311476, "grad_norm": 5.623122692108154, "learning_rate": 5.335888330192313e-06, "loss": 0.5264, "step": 20277 }, { "epoch": 66.48524590163935, "grad_norm": 5.561613082885742, "learning_rate": 5.334949045195321e-06, "loss": 0.4078, "step": 20278 }, { "epoch": 66.48852459016393, "grad_norm": 6.022046089172363, "learning_rate": 5.334009812801631e-06, "loss": 0.6205, "step": 20279 }, { "epoch": 66.49180327868852, "grad_norm": 8.488055229187012, "learning_rate": 5.333070633021829e-06, "loss": 0.478, "step": 20280 }, { "epoch": 66.49508196721311, "grad_norm": 5.176743984222412, "learning_rate": 5.332131505866507e-06, "loss": 0.3239, "step": 20281 }, { "epoch": 66.4983606557377, "grad_norm": 6.704710960388184, "learning_rate": 5.331192431346257e-06, "loss": 0.2957, "step": 20282 }, { "epoch": 66.5016393442623, "grad_norm": 23.33205795288086, "learning_rate": 5.330253409471668e-06, "loss": 0.592, "step": 20283 }, { "epoch": 66.50491803278689, "grad_norm": 4.077178001403809, "learning_rate": 5.329314440253328e-06, "loss": 0.2903, "step": 20284 }, { "epoch": 66.50819672131148, "grad_norm": 5.952802658081055, "learning_rate": 5.328375523701818e-06, "loss": 0.3875, "step": 20285 }, { "epoch": 66.51147540983607, "grad_norm": 5.891162872314453, "learning_rate": 5.327436659827737e-06, "loss": 0.4858, "step": 20286 }, { "epoch": 66.51475409836065, "grad_norm": 7.756780624389648, "learning_rate": 5.326497848641665e-06, "loss": 0.3721, "step": 20287 }, { "epoch": 66.51803278688524, "grad_norm": 6.8089680671691895, "learning_rate": 5.325559090154189e-06, "loss": 0.5395, "step": 20288 }, { "epoch": 66.52131147540983, "grad_norm": 4.599621772766113, "learning_rate": 5.324620384375895e-06, "loss": 0.5144, "step": 20289 }, { "epoch": 66.52459016393442, "grad_norm": 6.031890869140625, "learning_rate": 5.323681731317362e-06, "loss": 0.4456, "step": 20290 }, { "epoch": 66.52786885245902, "grad_norm": 4.937480926513672, "learning_rate": 5.322743130989186e-06, "loss": 0.4331, "step": 20291 }, { "epoch": 66.53114754098361, "grad_norm": 6.5826897621154785, "learning_rate": 5.3218045834019415e-06, "loss": 0.2835, "step": 20292 }, { "epoch": 66.5344262295082, "grad_norm": 6.465790271759033, "learning_rate": 5.320866088566217e-06, "loss": 0.4317, "step": 20293 }, { "epoch": 66.53770491803279, "grad_norm": 6.213351249694824, "learning_rate": 5.319927646492586e-06, "loss": 0.4653, "step": 20294 }, { "epoch": 66.54098360655738, "grad_norm": 5.476329326629639, "learning_rate": 5.318989257191643e-06, "loss": 0.5722, "step": 20295 }, { "epoch": 66.54426229508196, "grad_norm": 6.072423458099365, "learning_rate": 5.318050920673963e-06, "loss": 0.4555, "step": 20296 }, { "epoch": 66.54754098360655, "grad_norm": 3.7652955055236816, "learning_rate": 5.317112636950126e-06, "loss": 0.4091, "step": 20297 }, { "epoch": 66.55081967213114, "grad_norm": 5.853370189666748, "learning_rate": 5.316174406030709e-06, "loss": 0.3985, "step": 20298 }, { "epoch": 66.55409836065574, "grad_norm": 6.714191913604736, "learning_rate": 5.315236227926299e-06, "loss": 0.4491, "step": 20299 }, { "epoch": 66.55737704918033, "grad_norm": 4.782564640045166, "learning_rate": 5.314298102647473e-06, "loss": 0.294, "step": 20300 }, { "epoch": 66.56065573770492, "grad_norm": 6.014631271362305, "learning_rate": 5.313360030204806e-06, "loss": 0.5861, "step": 20301 }, { "epoch": 66.56393442622951, "grad_norm": 16.97893524169922, "learning_rate": 5.312422010608879e-06, "loss": 0.5071, "step": 20302 }, { "epoch": 66.5672131147541, "grad_norm": 7.04214334487915, "learning_rate": 5.311484043870263e-06, "loss": 0.2982, "step": 20303 }, { "epoch": 66.57049180327868, "grad_norm": 5.910065174102783, "learning_rate": 5.310546129999543e-06, "loss": 0.5787, "step": 20304 }, { "epoch": 66.57377049180327, "grad_norm": 4.23470401763916, "learning_rate": 5.3096082690072915e-06, "loss": 0.276, "step": 20305 }, { "epoch": 66.57704918032788, "grad_norm": 4.449950695037842, "learning_rate": 5.308670460904084e-06, "loss": 0.3145, "step": 20306 }, { "epoch": 66.58032786885246, "grad_norm": 5.216652870178223, "learning_rate": 5.30773270570049e-06, "loss": 0.5603, "step": 20307 }, { "epoch": 66.58360655737705, "grad_norm": 5.7848992347717285, "learning_rate": 5.306795003407093e-06, "loss": 0.445, "step": 20308 }, { "epoch": 66.58688524590164, "grad_norm": 5.299025058746338, "learning_rate": 5.305857354034463e-06, "loss": 0.335, "step": 20309 }, { "epoch": 66.59016393442623, "grad_norm": 6.367244243621826, "learning_rate": 5.30491975759317e-06, "loss": 0.5377, "step": 20310 }, { "epoch": 66.59344262295082, "grad_norm": 5.7748260498046875, "learning_rate": 5.303982214093786e-06, "loss": 0.4053, "step": 20311 }, { "epoch": 66.5967213114754, "grad_norm": 5.722256660461426, "learning_rate": 5.303044723546888e-06, "loss": 0.3423, "step": 20312 }, { "epoch": 66.6, "grad_norm": 5.322119235992432, "learning_rate": 5.302107285963045e-06, "loss": 0.2972, "step": 20313 }, { "epoch": 66.6032786885246, "grad_norm": 4.159222602844238, "learning_rate": 5.3011699013528275e-06, "loss": 0.4299, "step": 20314 }, { "epoch": 66.60655737704919, "grad_norm": 5.721738815307617, "learning_rate": 5.300232569726805e-06, "loss": 0.2369, "step": 20315 }, { "epoch": 66.60983606557377, "grad_norm": 6.195683002471924, "learning_rate": 5.299295291095541e-06, "loss": 0.3122, "step": 20316 }, { "epoch": 66.61311475409836, "grad_norm": 4.746310710906982, "learning_rate": 5.298358065469615e-06, "loss": 0.2892, "step": 20317 }, { "epoch": 66.61639344262295, "grad_norm": 7.530007362365723, "learning_rate": 5.2974208928595905e-06, "loss": 0.4338, "step": 20318 }, { "epoch": 66.61967213114754, "grad_norm": 5.147876262664795, "learning_rate": 5.296483773276036e-06, "loss": 0.3435, "step": 20319 }, { "epoch": 66.62295081967213, "grad_norm": 5.196065902709961, "learning_rate": 5.295546706729511e-06, "loss": 0.4756, "step": 20320 }, { "epoch": 66.62622950819672, "grad_norm": 6.391033172607422, "learning_rate": 5.294609693230593e-06, "loss": 0.4148, "step": 20321 }, { "epoch": 66.62950819672132, "grad_norm": 4.117146015167236, "learning_rate": 5.293672732789844e-06, "loss": 0.6036, "step": 20322 }, { "epoch": 66.6327868852459, "grad_norm": 5.663236141204834, "learning_rate": 5.292735825417824e-06, "loss": 0.3974, "step": 20323 }, { "epoch": 66.6360655737705, "grad_norm": 8.99016284942627, "learning_rate": 5.291798971125106e-06, "loss": 0.4321, "step": 20324 }, { "epoch": 66.63934426229508, "grad_norm": 5.18373441696167, "learning_rate": 5.290862169922249e-06, "loss": 0.2862, "step": 20325 }, { "epoch": 66.64262295081967, "grad_norm": 7.673994064331055, "learning_rate": 5.289925421819815e-06, "loss": 0.5875, "step": 20326 }, { "epoch": 66.64590163934426, "grad_norm": 5.165722370147705, "learning_rate": 5.2889887268283725e-06, "loss": 0.3916, "step": 20327 }, { "epoch": 66.64918032786885, "grad_norm": 4.879420757293701, "learning_rate": 5.288052084958479e-06, "loss": 0.3988, "step": 20328 }, { "epoch": 66.65245901639344, "grad_norm": 4.943553447723389, "learning_rate": 5.287115496220695e-06, "loss": 0.2068, "step": 20329 }, { "epoch": 66.65573770491804, "grad_norm": 4.633086681365967, "learning_rate": 5.286178960625587e-06, "loss": 0.402, "step": 20330 }, { "epoch": 66.65901639344263, "grad_norm": 10.1229248046875, "learning_rate": 5.285242478183713e-06, "loss": 0.4891, "step": 20331 }, { "epoch": 66.66229508196722, "grad_norm": 5.45716667175293, "learning_rate": 5.284306048905632e-06, "loss": 0.4662, "step": 20332 }, { "epoch": 66.6655737704918, "grad_norm": 5.981395721435547, "learning_rate": 5.283369672801899e-06, "loss": 0.4924, "step": 20333 }, { "epoch": 66.66885245901639, "grad_norm": 6.126612663269043, "learning_rate": 5.282433349883081e-06, "loss": 0.5775, "step": 20334 }, { "epoch": 66.67213114754098, "grad_norm": 5.508817195892334, "learning_rate": 5.281497080159732e-06, "loss": 0.3777, "step": 20335 }, { "epoch": 66.67540983606557, "grad_norm": 5.4988837242126465, "learning_rate": 5.28056086364241e-06, "loss": 0.4025, "step": 20336 }, { "epoch": 66.67868852459016, "grad_norm": 4.747523307800293, "learning_rate": 5.279624700341668e-06, "loss": 0.6331, "step": 20337 }, { "epoch": 66.68196721311476, "grad_norm": 7.257876396179199, "learning_rate": 5.2786885902680684e-06, "loss": 0.5132, "step": 20338 }, { "epoch": 66.68524590163935, "grad_norm": 5.403059959411621, "learning_rate": 5.277752533432163e-06, "loss": 0.2253, "step": 20339 }, { "epoch": 66.68852459016394, "grad_norm": 8.497302055358887, "learning_rate": 5.276816529844508e-06, "loss": 0.5434, "step": 20340 }, { "epoch": 66.69180327868852, "grad_norm": 5.9776291847229, "learning_rate": 5.27588057951566e-06, "loss": 0.439, "step": 20341 }, { "epoch": 66.69508196721311, "grad_norm": 4.579197883605957, "learning_rate": 5.274944682456165e-06, "loss": 0.2888, "step": 20342 }, { "epoch": 66.6983606557377, "grad_norm": 6.7939958572387695, "learning_rate": 5.274008838676583e-06, "loss": 0.2625, "step": 20343 }, { "epoch": 66.70163934426229, "grad_norm": 11.216802597045898, "learning_rate": 5.273073048187468e-06, "loss": 0.4892, "step": 20344 }, { "epoch": 66.70491803278688, "grad_norm": 6.167721748352051, "learning_rate": 5.2721373109993675e-06, "loss": 0.3469, "step": 20345 }, { "epoch": 66.70819672131148, "grad_norm": 4.8998847007751465, "learning_rate": 5.271201627122833e-06, "loss": 0.2832, "step": 20346 }, { "epoch": 66.71147540983607, "grad_norm": 5.328322410583496, "learning_rate": 5.270265996568418e-06, "loss": 0.3561, "step": 20347 }, { "epoch": 66.71475409836066, "grad_norm": 6.21767520904541, "learning_rate": 5.269330419346673e-06, "loss": 0.7259, "step": 20348 }, { "epoch": 66.71803278688525, "grad_norm": 5.745477676391602, "learning_rate": 5.268394895468147e-06, "loss": 0.4097, "step": 20349 }, { "epoch": 66.72131147540983, "grad_norm": 5.927984237670898, "learning_rate": 5.267459424943382e-06, "loss": 0.4402, "step": 20350 }, { "epoch": 66.72459016393442, "grad_norm": 5.524205684661865, "learning_rate": 5.266524007782939e-06, "loss": 0.376, "step": 20351 }, { "epoch": 66.72786885245901, "grad_norm": 9.048563957214355, "learning_rate": 5.265588643997357e-06, "loss": 0.4426, "step": 20352 }, { "epoch": 66.73114754098361, "grad_norm": 11.229815483093262, "learning_rate": 5.264653333597187e-06, "loss": 0.3745, "step": 20353 }, { "epoch": 66.7344262295082, "grad_norm": 4.2970170974731445, "learning_rate": 5.263718076592974e-06, "loss": 0.6451, "step": 20354 }, { "epoch": 66.73770491803279, "grad_norm": 5.1027984619140625, "learning_rate": 5.26278287299526e-06, "loss": 0.3539, "step": 20355 }, { "epoch": 66.74098360655738, "grad_norm": 7.720487594604492, "learning_rate": 5.2618477228145995e-06, "loss": 0.8122, "step": 20356 }, { "epoch": 66.74426229508197, "grad_norm": 4.498617649078369, "learning_rate": 5.260912626061531e-06, "loss": 0.4684, "step": 20357 }, { "epoch": 66.74754098360656, "grad_norm": 6.401974678039551, "learning_rate": 5.2599775827466e-06, "loss": 0.69, "step": 20358 }, { "epoch": 66.75081967213114, "grad_norm": 5.7129693031311035, "learning_rate": 5.259042592880348e-06, "loss": 0.3453, "step": 20359 }, { "epoch": 66.75409836065573, "grad_norm": 4.708343029022217, "learning_rate": 5.2581076564733215e-06, "loss": 0.3342, "step": 20360 }, { "epoch": 66.75737704918033, "grad_norm": 6.071119785308838, "learning_rate": 5.257172773536063e-06, "loss": 0.4814, "step": 20361 }, { "epoch": 66.76065573770492, "grad_norm": 5.382676124572754, "learning_rate": 5.256237944079113e-06, "loss": 0.4258, "step": 20362 }, { "epoch": 66.76393442622951, "grad_norm": 23.931886672973633, "learning_rate": 5.2553031681130085e-06, "loss": 0.544, "step": 20363 }, { "epoch": 66.7672131147541, "grad_norm": 5.18042516708374, "learning_rate": 5.254368445648297e-06, "loss": 0.3574, "step": 20364 }, { "epoch": 66.77049180327869, "grad_norm": 6.298992156982422, "learning_rate": 5.253433776695517e-06, "loss": 0.5251, "step": 20365 }, { "epoch": 66.77377049180328, "grad_norm": 11.767715454101562, "learning_rate": 5.252499161265205e-06, "loss": 0.5387, "step": 20366 }, { "epoch": 66.77704918032786, "grad_norm": 7.896843433380127, "learning_rate": 5.251564599367902e-06, "loss": 0.3011, "step": 20367 }, { "epoch": 66.78032786885245, "grad_norm": 6.412745475769043, "learning_rate": 5.2506300910141405e-06, "loss": 0.4371, "step": 20368 }, { "epoch": 66.78360655737706, "grad_norm": 4.630080223083496, "learning_rate": 5.249695636214467e-06, "loss": 0.3256, "step": 20369 }, { "epoch": 66.78688524590164, "grad_norm": 15.281079292297363, "learning_rate": 5.248761234979415e-06, "loss": 0.3791, "step": 20370 }, { "epoch": 66.79016393442623, "grad_norm": 6.209704875946045, "learning_rate": 5.24782688731952e-06, "loss": 0.601, "step": 20371 }, { "epoch": 66.79344262295082, "grad_norm": 16.289888381958008, "learning_rate": 5.246892593245313e-06, "loss": 0.3381, "step": 20372 }, { "epoch": 66.79672131147541, "grad_norm": 7.540300369262695, "learning_rate": 5.245958352767339e-06, "loss": 0.4845, "step": 20373 }, { "epoch": 66.8, "grad_norm": 6.510104656219482, "learning_rate": 5.245024165896126e-06, "loss": 0.4235, "step": 20374 }, { "epoch": 66.80327868852459, "grad_norm": 5.161381721496582, "learning_rate": 5.244090032642207e-06, "loss": 0.6363, "step": 20375 }, { "epoch": 66.80655737704917, "grad_norm": 5.774048328399658, "learning_rate": 5.243155953016121e-06, "loss": 0.5022, "step": 20376 }, { "epoch": 66.80983606557378, "grad_norm": 4.819135665893555, "learning_rate": 5.242221927028399e-06, "loss": 0.2771, "step": 20377 }, { "epoch": 66.81311475409836, "grad_norm": 5.249420642852783, "learning_rate": 5.241287954689566e-06, "loss": 0.826, "step": 20378 }, { "epoch": 66.81639344262295, "grad_norm": 5.195110321044922, "learning_rate": 5.240354036010163e-06, "loss": 0.3509, "step": 20379 }, { "epoch": 66.81967213114754, "grad_norm": 4.303234100341797, "learning_rate": 5.239420171000717e-06, "loss": 0.4705, "step": 20380 }, { "epoch": 66.82295081967213, "grad_norm": 6.692070007324219, "learning_rate": 5.238486359671759e-06, "loss": 0.402, "step": 20381 }, { "epoch": 66.82622950819672, "grad_norm": 5.48853063583374, "learning_rate": 5.2375526020338155e-06, "loss": 0.3595, "step": 20382 }, { "epoch": 66.8295081967213, "grad_norm": 6.626587390899658, "learning_rate": 5.236618898097421e-06, "loss": 0.5123, "step": 20383 }, { "epoch": 66.8327868852459, "grad_norm": 5.401974678039551, "learning_rate": 5.235685247873099e-06, "loss": 0.2461, "step": 20384 }, { "epoch": 66.8360655737705, "grad_norm": 5.480771541595459, "learning_rate": 5.234751651371378e-06, "loss": 0.3121, "step": 20385 }, { "epoch": 66.83934426229509, "grad_norm": 5.010394096374512, "learning_rate": 5.233818108602791e-06, "loss": 0.4778, "step": 20386 }, { "epoch": 66.84262295081967, "grad_norm": 5.824285984039307, "learning_rate": 5.232884619577858e-06, "loss": 0.4704, "step": 20387 }, { "epoch": 66.84590163934426, "grad_norm": 5.755430698394775, "learning_rate": 5.231951184307109e-06, "loss": 0.5417, "step": 20388 }, { "epoch": 66.84918032786885, "grad_norm": 6.263921737670898, "learning_rate": 5.231017802801064e-06, "loss": 0.4199, "step": 20389 }, { "epoch": 66.85245901639344, "grad_norm": 7.496675968170166, "learning_rate": 5.230084475070255e-06, "loss": 0.3933, "step": 20390 }, { "epoch": 66.85573770491803, "grad_norm": 6.316381931304932, "learning_rate": 5.229151201125204e-06, "loss": 0.5559, "step": 20391 }, { "epoch": 66.85901639344263, "grad_norm": 4.527217388153076, "learning_rate": 5.228217980976433e-06, "loss": 0.4968, "step": 20392 }, { "epoch": 66.86229508196722, "grad_norm": 5.535275936126709, "learning_rate": 5.227284814634466e-06, "loss": 0.6178, "step": 20393 }, { "epoch": 66.8655737704918, "grad_norm": 5.512258052825928, "learning_rate": 5.226351702109821e-06, "loss": 0.727, "step": 20394 }, { "epoch": 66.8688524590164, "grad_norm": 7.845378875732422, "learning_rate": 5.225418643413027e-06, "loss": 0.4217, "step": 20395 }, { "epoch": 66.87213114754098, "grad_norm": 7.058484077453613, "learning_rate": 5.224485638554604e-06, "loss": 0.5051, "step": 20396 }, { "epoch": 66.87540983606557, "grad_norm": 6.142958164215088, "learning_rate": 5.223552687545069e-06, "loss": 0.3281, "step": 20397 }, { "epoch": 66.87868852459016, "grad_norm": 5.080392360687256, "learning_rate": 5.222619790394941e-06, "loss": 0.5139, "step": 20398 }, { "epoch": 66.88196721311475, "grad_norm": 5.122526168823242, "learning_rate": 5.221686947114745e-06, "loss": 0.4081, "step": 20399 }, { "epoch": 66.88524590163935, "grad_norm": 5.649041175842285, "learning_rate": 5.220754157714998e-06, "loss": 0.467, "step": 20400 }, { "epoch": 66.88852459016394, "grad_norm": 6.21834135055542, "learning_rate": 5.219821422206215e-06, "loss": 0.627, "step": 20401 }, { "epoch": 66.89180327868853, "grad_norm": 5.522474765777588, "learning_rate": 5.218888740598913e-06, "loss": 0.3997, "step": 20402 }, { "epoch": 66.89508196721312, "grad_norm": 17.742380142211914, "learning_rate": 5.217956112903616e-06, "loss": 0.4828, "step": 20403 }, { "epoch": 66.8983606557377, "grad_norm": 5.302117824554443, "learning_rate": 5.217023539130835e-06, "loss": 0.3486, "step": 20404 }, { "epoch": 66.90163934426229, "grad_norm": 5.786933422088623, "learning_rate": 5.216091019291088e-06, "loss": 0.4111, "step": 20405 }, { "epoch": 66.90491803278688, "grad_norm": 5.640315055847168, "learning_rate": 5.215158553394888e-06, "loss": 0.3378, "step": 20406 }, { "epoch": 66.90819672131147, "grad_norm": 7.150050163269043, "learning_rate": 5.214226141452745e-06, "loss": 0.2732, "step": 20407 }, { "epoch": 66.91147540983607, "grad_norm": 12.198474884033203, "learning_rate": 5.213293783475184e-06, "loss": 0.4398, "step": 20408 }, { "epoch": 66.91475409836066, "grad_norm": 6.802986145019531, "learning_rate": 5.212361479472712e-06, "loss": 0.3202, "step": 20409 }, { "epoch": 66.91803278688525, "grad_norm": 6.207213878631592, "learning_rate": 5.2114292294558425e-06, "loss": 0.3925, "step": 20410 }, { "epoch": 66.92131147540984, "grad_norm": 7.56230354309082, "learning_rate": 5.210497033435083e-06, "loss": 0.4356, "step": 20411 }, { "epoch": 66.92459016393443, "grad_norm": 4.524188041687012, "learning_rate": 5.209564891420953e-06, "loss": 0.271, "step": 20412 }, { "epoch": 66.92786885245901, "grad_norm": 6.972085475921631, "learning_rate": 5.208632803423959e-06, "loss": 0.5697, "step": 20413 }, { "epoch": 66.9311475409836, "grad_norm": 5.20444393157959, "learning_rate": 5.2077007694546134e-06, "loss": 0.5258, "step": 20414 }, { "epoch": 66.93442622950819, "grad_norm": 9.054658889770508, "learning_rate": 5.20676878952342e-06, "loss": 0.5497, "step": 20415 }, { "epoch": 66.9377049180328, "grad_norm": 4.3823323249816895, "learning_rate": 5.205836863640896e-06, "loss": 0.4876, "step": 20416 }, { "epoch": 66.94098360655738, "grad_norm": 5.8654351234436035, "learning_rate": 5.204904991817545e-06, "loss": 0.4472, "step": 20417 }, { "epoch": 66.94426229508197, "grad_norm": 4.962766647338867, "learning_rate": 5.203973174063876e-06, "loss": 0.2874, "step": 20418 }, { "epoch": 66.94754098360656, "grad_norm": 5.995235919952393, "learning_rate": 5.203041410390398e-06, "loss": 0.4355, "step": 20419 }, { "epoch": 66.95081967213115, "grad_norm": 6.020544528961182, "learning_rate": 5.202109700807612e-06, "loss": 0.3412, "step": 20420 }, { "epoch": 66.95409836065573, "grad_norm": 10.96370792388916, "learning_rate": 5.20117804532603e-06, "loss": 0.5454, "step": 20421 }, { "epoch": 66.95737704918032, "grad_norm": 4.915179252624512, "learning_rate": 5.200246443956154e-06, "loss": 0.5873, "step": 20422 }, { "epoch": 66.96065573770491, "grad_norm": 5.594960689544678, "learning_rate": 5.1993148967084916e-06, "loss": 0.2027, "step": 20423 }, { "epoch": 66.96393442622951, "grad_norm": 8.113364219665527, "learning_rate": 5.198383403593541e-06, "loss": 0.3703, "step": 20424 }, { "epoch": 66.9672131147541, "grad_norm": 6.673568248748779, "learning_rate": 5.197451964621813e-06, "loss": 0.3056, "step": 20425 }, { "epoch": 66.97049180327869, "grad_norm": 5.381161689758301, "learning_rate": 5.196520579803808e-06, "loss": 0.2752, "step": 20426 }, { "epoch": 66.97377049180328, "grad_norm": 4.593443393707275, "learning_rate": 5.195589249150028e-06, "loss": 0.4408, "step": 20427 }, { "epoch": 66.97704918032787, "grad_norm": 5.857973575592041, "learning_rate": 5.194657972670971e-06, "loss": 0.7992, "step": 20428 }, { "epoch": 66.98032786885246, "grad_norm": 5.1426239013671875, "learning_rate": 5.193726750377146e-06, "loss": 0.2274, "step": 20429 }, { "epoch": 66.98360655737704, "grad_norm": 5.610762596130371, "learning_rate": 5.192795582279049e-06, "loss": 0.3997, "step": 20430 }, { "epoch": 66.98688524590163, "grad_norm": 4.633204936981201, "learning_rate": 5.191864468387176e-06, "loss": 0.6063, "step": 20431 }, { "epoch": 66.99016393442623, "grad_norm": 5.782498836517334, "learning_rate": 5.190933408712033e-06, "loss": 0.569, "step": 20432 }, { "epoch": 66.99344262295082, "grad_norm": 23.128883361816406, "learning_rate": 5.190002403264116e-06, "loss": 0.415, "step": 20433 }, { "epoch": 66.99672131147541, "grad_norm": 6.270152568817139, "learning_rate": 5.18907145205392e-06, "loss": 0.4064, "step": 20434 }, { "epoch": 67.0, "grad_norm": 5.755824089050293, "learning_rate": 5.18814055509195e-06, "loss": 0.5167, "step": 20435 }, { "epoch": 67.00327868852459, "grad_norm": 6.9665141105651855, "learning_rate": 5.187209712388696e-06, "loss": 0.48, "step": 20436 }, { "epoch": 67.00655737704918, "grad_norm": 5.083208084106445, "learning_rate": 5.186278923954655e-06, "loss": 0.521, "step": 20437 }, { "epoch": 67.00983606557377, "grad_norm": 5.3059611320495605, "learning_rate": 5.185348189800328e-06, "loss": 0.6046, "step": 20438 }, { "epoch": 67.01311475409837, "grad_norm": 5.791696548461914, "learning_rate": 5.184417509936206e-06, "loss": 0.4699, "step": 20439 }, { "epoch": 67.01639344262296, "grad_norm": 7.1715006828308105, "learning_rate": 5.183486884372782e-06, "loss": 0.2897, "step": 20440 }, { "epoch": 67.01967213114754, "grad_norm": 4.757732391357422, "learning_rate": 5.182556313120553e-06, "loss": 0.3713, "step": 20441 }, { "epoch": 67.02295081967213, "grad_norm": 5.396963596343994, "learning_rate": 5.181625796190005e-06, "loss": 0.4071, "step": 20442 }, { "epoch": 67.02622950819672, "grad_norm": 4.437233924865723, "learning_rate": 5.180695333591641e-06, "loss": 0.3366, "step": 20443 }, { "epoch": 67.02950819672131, "grad_norm": 4.817759990692139, "learning_rate": 5.179764925335948e-06, "loss": 0.4802, "step": 20444 }, { "epoch": 67.0327868852459, "grad_norm": 5.543038368225098, "learning_rate": 5.178834571433416e-06, "loss": 0.5268, "step": 20445 }, { "epoch": 67.03606557377049, "grad_norm": 4.852062225341797, "learning_rate": 5.177904271894535e-06, "loss": 0.3948, "step": 20446 }, { "epoch": 67.03934426229509, "grad_norm": 6.65016508102417, "learning_rate": 5.1769740267297995e-06, "loss": 0.3649, "step": 20447 }, { "epoch": 67.04262295081968, "grad_norm": 6.821762561798096, "learning_rate": 5.1760438359496975e-06, "loss": 0.3223, "step": 20448 }, { "epoch": 67.04590163934427, "grad_norm": 5.535268306732178, "learning_rate": 5.175113699564716e-06, "loss": 0.3145, "step": 20449 }, { "epoch": 67.04918032786885, "grad_norm": 19.782968521118164, "learning_rate": 5.17418361758534e-06, "loss": 0.2604, "step": 20450 }, { "epoch": 67.05245901639344, "grad_norm": 5.65020227432251, "learning_rate": 5.173253590022067e-06, "loss": 0.576, "step": 20451 }, { "epoch": 67.05573770491803, "grad_norm": 5.7786736488342285, "learning_rate": 5.172323616885378e-06, "loss": 0.5078, "step": 20452 }, { "epoch": 67.05901639344262, "grad_norm": 5.727726459503174, "learning_rate": 5.1713936981857585e-06, "loss": 0.4586, "step": 20453 }, { "epoch": 67.0622950819672, "grad_norm": 8.920320510864258, "learning_rate": 5.170463833933696e-06, "loss": 0.516, "step": 20454 }, { "epoch": 67.06557377049181, "grad_norm": 5.538409233093262, "learning_rate": 5.169534024139671e-06, "loss": 0.5081, "step": 20455 }, { "epoch": 67.0688524590164, "grad_norm": 5.574304103851318, "learning_rate": 5.1686042688141755e-06, "loss": 0.3941, "step": 20456 }, { "epoch": 67.07213114754099, "grad_norm": 4.0129828453063965, "learning_rate": 5.167674567967692e-06, "loss": 0.2999, "step": 20457 }, { "epoch": 67.07540983606557, "grad_norm": 3.9456350803375244, "learning_rate": 5.166744921610701e-06, "loss": 0.4148, "step": 20458 }, { "epoch": 67.07868852459016, "grad_norm": 5.921222686767578, "learning_rate": 5.165815329753683e-06, "loss": 0.3586, "step": 20459 }, { "epoch": 67.08196721311475, "grad_norm": 6.206542491912842, "learning_rate": 5.164885792407127e-06, "loss": 0.4635, "step": 20460 }, { "epoch": 67.08524590163934, "grad_norm": 8.257442474365234, "learning_rate": 5.163956309581512e-06, "loss": 0.4068, "step": 20461 }, { "epoch": 67.08852459016393, "grad_norm": 5.456117153167725, "learning_rate": 5.163026881287316e-06, "loss": 0.3665, "step": 20462 }, { "epoch": 67.09180327868853, "grad_norm": 5.308799743652344, "learning_rate": 5.162097507535018e-06, "loss": 0.6033, "step": 20463 }, { "epoch": 67.09508196721312, "grad_norm": 7.029352188110352, "learning_rate": 5.161168188335105e-06, "loss": 0.3412, "step": 20464 }, { "epoch": 67.09836065573771, "grad_norm": 4.963196754455566, "learning_rate": 5.160238923698051e-06, "loss": 0.2241, "step": 20465 }, { "epoch": 67.1016393442623, "grad_norm": 5.512874126434326, "learning_rate": 5.159309713634337e-06, "loss": 0.3039, "step": 20466 }, { "epoch": 67.10491803278688, "grad_norm": 6.313636779785156, "learning_rate": 5.158380558154437e-06, "loss": 0.4248, "step": 20467 }, { "epoch": 67.10819672131147, "grad_norm": 10.517529487609863, "learning_rate": 5.157451457268827e-06, "loss": 0.3522, "step": 20468 }, { "epoch": 67.11147540983606, "grad_norm": 5.815296649932861, "learning_rate": 5.156522410987992e-06, "loss": 0.3767, "step": 20469 }, { "epoch": 67.11475409836065, "grad_norm": 7.085298538208008, "learning_rate": 5.155593419322401e-06, "loss": 0.3976, "step": 20470 }, { "epoch": 67.11803278688525, "grad_norm": 6.930247783660889, "learning_rate": 5.154664482282532e-06, "loss": 0.2741, "step": 20471 }, { "epoch": 67.12131147540984, "grad_norm": 6.101863384246826, "learning_rate": 5.1537355998788555e-06, "loss": 0.7142, "step": 20472 }, { "epoch": 67.12459016393443, "grad_norm": 5.7312164306640625, "learning_rate": 5.152806772121852e-06, "loss": 0.425, "step": 20473 }, { "epoch": 67.12786885245902, "grad_norm": 7.0647807121276855, "learning_rate": 5.151877999021992e-06, "loss": 0.407, "step": 20474 }, { "epoch": 67.1311475409836, "grad_norm": 4.720301151275635, "learning_rate": 5.150949280589748e-06, "loss": 0.398, "step": 20475 }, { "epoch": 67.1344262295082, "grad_norm": 8.524706840515137, "learning_rate": 5.15002061683559e-06, "loss": 0.3837, "step": 20476 }, { "epoch": 67.13770491803278, "grad_norm": 5.399238586425781, "learning_rate": 5.149092007769994e-06, "loss": 0.4245, "step": 20477 }, { "epoch": 67.14098360655737, "grad_norm": 6.260247230529785, "learning_rate": 5.148163453403431e-06, "loss": 0.3878, "step": 20478 }, { "epoch": 67.14426229508197, "grad_norm": 4.975168704986572, "learning_rate": 5.1472349537463695e-06, "loss": 0.4352, "step": 20479 }, { "epoch": 67.14754098360656, "grad_norm": 4.451534748077393, "learning_rate": 5.146306508809275e-06, "loss": 0.4289, "step": 20480 }, { "epoch": 67.15081967213115, "grad_norm": 5.852044105529785, "learning_rate": 5.145378118602626e-06, "loss": 0.5306, "step": 20481 }, { "epoch": 67.15409836065574, "grad_norm": 6.038863658905029, "learning_rate": 5.144449783136886e-06, "loss": 0.5115, "step": 20482 }, { "epoch": 67.15737704918033, "grad_norm": 7.413521766662598, "learning_rate": 5.1435215024225215e-06, "loss": 0.4773, "step": 20483 }, { "epoch": 67.16065573770491, "grad_norm": 5.418913841247559, "learning_rate": 5.14259327647e-06, "loss": 0.4282, "step": 20484 }, { "epoch": 67.1639344262295, "grad_norm": 5.142923355102539, "learning_rate": 5.141665105289792e-06, "loss": 0.5421, "step": 20485 }, { "epoch": 67.1672131147541, "grad_norm": 5.226281642913818, "learning_rate": 5.140736988892363e-06, "loss": 0.5267, "step": 20486 }, { "epoch": 67.1704918032787, "grad_norm": 7.549836158752441, "learning_rate": 5.139808927288173e-06, "loss": 0.4702, "step": 20487 }, { "epoch": 67.17377049180328, "grad_norm": 5.3270697593688965, "learning_rate": 5.138880920487692e-06, "loss": 0.3033, "step": 20488 }, { "epoch": 67.17704918032787, "grad_norm": 5.0262651443481445, "learning_rate": 5.137952968501385e-06, "loss": 0.4815, "step": 20489 }, { "epoch": 67.18032786885246, "grad_norm": 8.952159881591797, "learning_rate": 5.137025071339709e-06, "loss": 0.4033, "step": 20490 }, { "epoch": 67.18360655737705, "grad_norm": 6.4553937911987305, "learning_rate": 5.136097229013135e-06, "loss": 0.5203, "step": 20491 }, { "epoch": 67.18688524590164, "grad_norm": 4.50532865524292, "learning_rate": 5.135169441532123e-06, "loss": 0.41, "step": 20492 }, { "epoch": 67.19016393442622, "grad_norm": 4.566690921783447, "learning_rate": 5.1342417089071325e-06, "loss": 0.4533, "step": 20493 }, { "epoch": 67.19344262295083, "grad_norm": 5.716766357421875, "learning_rate": 5.133314031148623e-06, "loss": 0.3575, "step": 20494 }, { "epoch": 67.19672131147541, "grad_norm": 5.497541427612305, "learning_rate": 5.132386408267062e-06, "loss": 0.4243, "step": 20495 }, { "epoch": 67.2, "grad_norm": 8.56274127960205, "learning_rate": 5.131458840272905e-06, "loss": 0.3529, "step": 20496 }, { "epoch": 67.20327868852459, "grad_norm": 6.0383830070495605, "learning_rate": 5.130531327176611e-06, "loss": 0.4896, "step": 20497 }, { "epoch": 67.20655737704918, "grad_norm": 4.69154691696167, "learning_rate": 5.129603868988635e-06, "loss": 0.207, "step": 20498 }, { "epoch": 67.20983606557377, "grad_norm": 7.613338947296143, "learning_rate": 5.1286764657194446e-06, "loss": 0.2982, "step": 20499 }, { "epoch": 67.21311475409836, "grad_norm": 5.05598783493042, "learning_rate": 5.1277491173794905e-06, "loss": 0.4377, "step": 20500 }, { "epoch": 67.21639344262294, "grad_norm": 5.498117923736572, "learning_rate": 5.126821823979233e-06, "loss": 0.4589, "step": 20501 }, { "epoch": 67.21967213114755, "grad_norm": 7.914686679840088, "learning_rate": 5.125894585529121e-06, "loss": 0.4378, "step": 20502 }, { "epoch": 67.22295081967214, "grad_norm": 7.651241302490234, "learning_rate": 5.124967402039618e-06, "loss": 0.6702, "step": 20503 }, { "epoch": 67.22622950819672, "grad_norm": 5.333798408508301, "learning_rate": 5.124040273521178e-06, "loss": 0.3366, "step": 20504 }, { "epoch": 67.22950819672131, "grad_norm": 5.93007230758667, "learning_rate": 5.123113199984253e-06, "loss": 0.4563, "step": 20505 }, { "epoch": 67.2327868852459, "grad_norm": 5.5250091552734375, "learning_rate": 5.122186181439298e-06, "loss": 0.466, "step": 20506 }, { "epoch": 67.23606557377049, "grad_norm": 10.901252746582031, "learning_rate": 5.121259217896762e-06, "loss": 0.3007, "step": 20507 }, { "epoch": 67.23934426229508, "grad_norm": 7.294511318206787, "learning_rate": 5.120332309367103e-06, "loss": 0.6453, "step": 20508 }, { "epoch": 67.24262295081967, "grad_norm": 6.208440780639648, "learning_rate": 5.119405455860772e-06, "loss": 0.4068, "step": 20509 }, { "epoch": 67.24590163934427, "grad_norm": 5.151294708251953, "learning_rate": 5.118478657388219e-06, "loss": 0.4688, "step": 20510 }, { "epoch": 67.24918032786886, "grad_norm": 7.716441631317139, "learning_rate": 5.11755191395989e-06, "loss": 0.3319, "step": 20511 }, { "epoch": 67.25245901639344, "grad_norm": 5.163569927215576, "learning_rate": 5.116625225586245e-06, "loss": 0.349, "step": 20512 }, { "epoch": 67.25573770491803, "grad_norm": 5.5686726570129395, "learning_rate": 5.115698592277727e-06, "loss": 0.5827, "step": 20513 }, { "epoch": 67.25901639344262, "grad_norm": 4.730583667755127, "learning_rate": 5.114772014044787e-06, "loss": 0.3555, "step": 20514 }, { "epoch": 67.26229508196721, "grad_norm": 5.75999116897583, "learning_rate": 5.1138454908978665e-06, "loss": 0.2637, "step": 20515 }, { "epoch": 67.2655737704918, "grad_norm": 7.900751113891602, "learning_rate": 5.112919022847422e-06, "loss": 0.5977, "step": 20516 }, { "epoch": 67.26885245901639, "grad_norm": 6.102478504180908, "learning_rate": 5.111992609903898e-06, "loss": 0.3104, "step": 20517 }, { "epoch": 67.27213114754099, "grad_norm": 7.629745006561279, "learning_rate": 5.111066252077739e-06, "loss": 0.5665, "step": 20518 }, { "epoch": 67.27540983606558, "grad_norm": 6.62880802154541, "learning_rate": 5.1101399493793915e-06, "loss": 0.5028, "step": 20519 }, { "epoch": 67.27868852459017, "grad_norm": 5.367081642150879, "learning_rate": 5.109213701819296e-06, "loss": 0.5439, "step": 20520 }, { "epoch": 67.28196721311475, "grad_norm": 6.465683937072754, "learning_rate": 5.108287509407905e-06, "loss": 0.4347, "step": 20521 }, { "epoch": 67.28524590163934, "grad_norm": 5.305942535400391, "learning_rate": 5.107361372155659e-06, "loss": 0.6625, "step": 20522 }, { "epoch": 67.28852459016393, "grad_norm": 6.466213226318359, "learning_rate": 5.106435290073e-06, "loss": 0.4499, "step": 20523 }, { "epoch": 67.29180327868852, "grad_norm": 6.300034523010254, "learning_rate": 5.105509263170367e-06, "loss": 0.4026, "step": 20524 }, { "epoch": 67.29508196721312, "grad_norm": 5.664036273956299, "learning_rate": 5.104583291458212e-06, "loss": 0.5224, "step": 20525 }, { "epoch": 67.29836065573771, "grad_norm": 5.53132438659668, "learning_rate": 5.103657374946969e-06, "loss": 0.526, "step": 20526 }, { "epoch": 67.3016393442623, "grad_norm": 3.6352574825286865, "learning_rate": 5.102731513647079e-06, "loss": 0.1826, "step": 20527 }, { "epoch": 67.30491803278689, "grad_norm": 5.4715070724487305, "learning_rate": 5.1018057075689805e-06, "loss": 0.3995, "step": 20528 }, { "epoch": 67.30819672131148, "grad_norm": 7.412134647369385, "learning_rate": 5.100879956723119e-06, "loss": 0.4556, "step": 20529 }, { "epoch": 67.31147540983606, "grad_norm": 6.642816066741943, "learning_rate": 5.099954261119929e-06, "loss": 0.4225, "step": 20530 }, { "epoch": 67.31475409836065, "grad_norm": 5.875857830047607, "learning_rate": 5.09902862076985e-06, "loss": 0.4102, "step": 20531 }, { "epoch": 67.31803278688524, "grad_norm": 9.43879508972168, "learning_rate": 5.09810303568332e-06, "loss": 0.5095, "step": 20532 }, { "epoch": 67.32131147540984, "grad_norm": 4.2732343673706055, "learning_rate": 5.0971775058707695e-06, "loss": 0.3357, "step": 20533 }, { "epoch": 67.32459016393443, "grad_norm": 6.015626907348633, "learning_rate": 5.096252031342646e-06, "loss": 0.463, "step": 20534 }, { "epoch": 67.32786885245902, "grad_norm": 10.304022789001465, "learning_rate": 5.095326612109378e-06, "loss": 0.4601, "step": 20535 }, { "epoch": 67.33114754098361, "grad_norm": 4.554564952850342, "learning_rate": 5.094401248181399e-06, "loss": 0.3387, "step": 20536 }, { "epoch": 67.3344262295082, "grad_norm": 5.644933223724365, "learning_rate": 5.09347593956915e-06, "loss": 0.4625, "step": 20537 }, { "epoch": 67.33770491803278, "grad_norm": 8.286637306213379, "learning_rate": 5.092550686283061e-06, "loss": 0.2843, "step": 20538 }, { "epoch": 67.34098360655737, "grad_norm": 6.446742057800293, "learning_rate": 5.091625488333561e-06, "loss": 0.3776, "step": 20539 }, { "epoch": 67.34426229508196, "grad_norm": 5.908022880554199, "learning_rate": 5.090700345731092e-06, "loss": 0.4442, "step": 20540 }, { "epoch": 67.34754098360656, "grad_norm": 5.028944969177246, "learning_rate": 5.089775258486081e-06, "loss": 0.4012, "step": 20541 }, { "epoch": 67.35081967213115, "grad_norm": 5.281213760375977, "learning_rate": 5.088850226608954e-06, "loss": 0.3527, "step": 20542 }, { "epoch": 67.35409836065574, "grad_norm": 5.287090301513672, "learning_rate": 5.087925250110153e-06, "loss": 0.4664, "step": 20543 }, { "epoch": 67.35737704918033, "grad_norm": 4.204860210418701, "learning_rate": 5.087000329000101e-06, "loss": 0.4131, "step": 20544 }, { "epoch": 67.36065573770492, "grad_norm": 4.495246410369873, "learning_rate": 5.086075463289229e-06, "loss": 0.2828, "step": 20545 }, { "epoch": 67.3639344262295, "grad_norm": 5.086669445037842, "learning_rate": 5.085150652987962e-06, "loss": 0.4638, "step": 20546 }, { "epoch": 67.3672131147541, "grad_norm": 4.694277763366699, "learning_rate": 5.084225898106734e-06, "loss": 0.3641, "step": 20547 }, { "epoch": 67.37049180327868, "grad_norm": 5.5998992919921875, "learning_rate": 5.08330119865597e-06, "loss": 0.377, "step": 20548 }, { "epoch": 67.37377049180328, "grad_norm": 3.435887575149536, "learning_rate": 5.082376554646098e-06, "loss": 0.2262, "step": 20549 }, { "epoch": 67.37704918032787, "grad_norm": 5.534112930297852, "learning_rate": 5.081451966087539e-06, "loss": 0.4661, "step": 20550 }, { "epoch": 67.38032786885246, "grad_norm": 5.2601847648620605, "learning_rate": 5.080527432990727e-06, "loss": 0.6098, "step": 20551 }, { "epoch": 67.38360655737705, "grad_norm": 5.011318683624268, "learning_rate": 5.0796029553660845e-06, "loss": 0.265, "step": 20552 }, { "epoch": 67.38688524590164, "grad_norm": 6.31744384765625, "learning_rate": 5.078678533224033e-06, "loss": 0.4618, "step": 20553 }, { "epoch": 67.39016393442623, "grad_norm": 6.28855037689209, "learning_rate": 5.0777541665749955e-06, "loss": 0.676, "step": 20554 }, { "epoch": 67.39344262295081, "grad_norm": 5.6472086906433105, "learning_rate": 5.076829855429399e-06, "loss": 0.2734, "step": 20555 }, { "epoch": 67.3967213114754, "grad_norm": 6.286689758300781, "learning_rate": 5.075905599797668e-06, "loss": 0.5822, "step": 20556 }, { "epoch": 67.4, "grad_norm": 6.079956531524658, "learning_rate": 5.074981399690219e-06, "loss": 0.3713, "step": 20557 }, { "epoch": 67.4032786885246, "grad_norm": 4.2866716384887695, "learning_rate": 5.074057255117475e-06, "loss": 0.1163, "step": 20558 }, { "epoch": 67.40655737704918, "grad_norm": 6.36495304107666, "learning_rate": 5.073133166089854e-06, "loss": 0.7427, "step": 20559 }, { "epoch": 67.40983606557377, "grad_norm": 9.129558563232422, "learning_rate": 5.072209132617784e-06, "loss": 0.5272, "step": 20560 }, { "epoch": 67.41311475409836, "grad_norm": 5.698281764984131, "learning_rate": 5.071285154711678e-06, "loss": 0.3204, "step": 20561 }, { "epoch": 67.41639344262295, "grad_norm": 5.607081890106201, "learning_rate": 5.070361232381958e-06, "loss": 0.5744, "step": 20562 }, { "epoch": 67.41967213114754, "grad_norm": 4.982420444488525, "learning_rate": 5.069437365639036e-06, "loss": 0.5572, "step": 20563 }, { "epoch": 67.42295081967212, "grad_norm": 4.2049713134765625, "learning_rate": 5.068513554493339e-06, "loss": 0.4275, "step": 20564 }, { "epoch": 67.42622950819673, "grad_norm": 11.563176155090332, "learning_rate": 5.067589798955278e-06, "loss": 0.3024, "step": 20565 }, { "epoch": 67.42950819672132, "grad_norm": 5.277818202972412, "learning_rate": 5.066666099035271e-06, "loss": 0.5018, "step": 20566 }, { "epoch": 67.4327868852459, "grad_norm": 6.32296895980835, "learning_rate": 5.0657424547437285e-06, "loss": 0.2864, "step": 20567 }, { "epoch": 67.43606557377049, "grad_norm": 5.6727423667907715, "learning_rate": 5.0648188660910745e-06, "loss": 0.3944, "step": 20568 }, { "epoch": 67.43934426229508, "grad_norm": 5.269607067108154, "learning_rate": 5.063895333087719e-06, "loss": 0.7007, "step": 20569 }, { "epoch": 67.44262295081967, "grad_norm": 14.797537803649902, "learning_rate": 5.062971855744077e-06, "loss": 0.3599, "step": 20570 }, { "epoch": 67.44590163934426, "grad_norm": 5.151233196258545, "learning_rate": 5.062048434070559e-06, "loss": 0.5351, "step": 20571 }, { "epoch": 67.44918032786886, "grad_norm": 6.823970317840576, "learning_rate": 5.061125068077575e-06, "loss": 0.6411, "step": 20572 }, { "epoch": 67.45245901639345, "grad_norm": 6.571676254272461, "learning_rate": 5.0602017577755465e-06, "loss": 0.4709, "step": 20573 }, { "epoch": 67.45573770491804, "grad_norm": 5.57496976852417, "learning_rate": 5.059278503174878e-06, "loss": 0.4477, "step": 20574 }, { "epoch": 67.45901639344262, "grad_norm": 5.506742000579834, "learning_rate": 5.058355304285982e-06, "loss": 0.4306, "step": 20575 }, { "epoch": 67.46229508196721, "grad_norm": 3.970787525177002, "learning_rate": 5.057432161119264e-06, "loss": 0.236, "step": 20576 }, { "epoch": 67.4655737704918, "grad_norm": 4.46680212020874, "learning_rate": 5.05650907368514e-06, "loss": 0.201, "step": 20577 }, { "epoch": 67.46885245901639, "grad_norm": 13.494009017944336, "learning_rate": 5.055586041994019e-06, "loss": 0.5187, "step": 20578 }, { "epoch": 67.47213114754098, "grad_norm": 10.144292831420898, "learning_rate": 5.0546630660563045e-06, "loss": 0.5491, "step": 20579 }, { "epoch": 67.47540983606558, "grad_norm": 5.239211082458496, "learning_rate": 5.053740145882402e-06, "loss": 0.318, "step": 20580 }, { "epoch": 67.47868852459017, "grad_norm": 5.736299991607666, "learning_rate": 5.052817281482726e-06, "loss": 0.3989, "step": 20581 }, { "epoch": 67.48196721311476, "grad_norm": 5.215611934661865, "learning_rate": 5.051894472867679e-06, "loss": 0.3061, "step": 20582 }, { "epoch": 67.48524590163935, "grad_norm": 6.410972595214844, "learning_rate": 5.050971720047666e-06, "loss": 0.3788, "step": 20583 }, { "epoch": 67.48852459016393, "grad_norm": 4.755325794219971, "learning_rate": 5.050049023033093e-06, "loss": 0.3519, "step": 20584 }, { "epoch": 67.49180327868852, "grad_norm": 4.12502384185791, "learning_rate": 5.049126381834361e-06, "loss": 0.5806, "step": 20585 }, { "epoch": 67.49508196721311, "grad_norm": 4.785105228424072, "learning_rate": 5.0482037964618795e-06, "loss": 0.4622, "step": 20586 }, { "epoch": 67.4983606557377, "grad_norm": 5.241650104522705, "learning_rate": 5.047281266926049e-06, "loss": 0.23, "step": 20587 }, { "epoch": 67.5016393442623, "grad_norm": 4.893016338348389, "learning_rate": 5.046358793237271e-06, "loss": 0.3382, "step": 20588 }, { "epoch": 67.50491803278689, "grad_norm": 5.671041011810303, "learning_rate": 5.045436375405945e-06, "loss": 0.4636, "step": 20589 }, { "epoch": 67.50819672131148, "grad_norm": 12.175126075744629, "learning_rate": 5.0445140134424795e-06, "loss": 0.3231, "step": 20590 }, { "epoch": 67.51147540983607, "grad_norm": 4.67448091506958, "learning_rate": 5.043591707357271e-06, "loss": 0.397, "step": 20591 }, { "epoch": 67.51475409836065, "grad_norm": 6.520418167114258, "learning_rate": 5.042669457160715e-06, "loss": 0.4748, "step": 20592 }, { "epoch": 67.51803278688524, "grad_norm": 4.777408599853516, "learning_rate": 5.041747262863219e-06, "loss": 0.3852, "step": 20593 }, { "epoch": 67.52131147540983, "grad_norm": 6.975821495056152, "learning_rate": 5.040825124475177e-06, "loss": 0.6687, "step": 20594 }, { "epoch": 67.52459016393442, "grad_norm": 5.6463236808776855, "learning_rate": 5.039903042006986e-06, "loss": 0.5423, "step": 20595 }, { "epoch": 67.52786885245902, "grad_norm": 5.286209583282471, "learning_rate": 5.0389810154690485e-06, "loss": 0.3381, "step": 20596 }, { "epoch": 67.53114754098361, "grad_norm": 5.051987171173096, "learning_rate": 5.038059044871758e-06, "loss": 0.4223, "step": 20597 }, { "epoch": 67.5344262295082, "grad_norm": 5.685335636138916, "learning_rate": 5.037137130225507e-06, "loss": 0.4576, "step": 20598 }, { "epoch": 67.53770491803279, "grad_norm": 5.443858623504639, "learning_rate": 5.036215271540699e-06, "loss": 0.4023, "step": 20599 }, { "epoch": 67.54098360655738, "grad_norm": 4.66466760635376, "learning_rate": 5.035293468827724e-06, "loss": 0.4618, "step": 20600 }, { "epoch": 67.54426229508196, "grad_norm": 4.996200084686279, "learning_rate": 5.034371722096979e-06, "loss": 0.5905, "step": 20601 }, { "epoch": 67.54754098360655, "grad_norm": 5.438222885131836, "learning_rate": 5.0334500313588504e-06, "loss": 0.5162, "step": 20602 }, { "epoch": 67.55081967213114, "grad_norm": 5.106089115142822, "learning_rate": 5.032528396623741e-06, "loss": 0.2735, "step": 20603 }, { "epoch": 67.55409836065574, "grad_norm": 4.672598838806152, "learning_rate": 5.031606817902039e-06, "loss": 0.4517, "step": 20604 }, { "epoch": 67.55737704918033, "grad_norm": 4.580109596252441, "learning_rate": 5.030685295204136e-06, "loss": 0.3328, "step": 20605 }, { "epoch": 67.56065573770492, "grad_norm": 4.703487396240234, "learning_rate": 5.029763828540419e-06, "loss": 0.4839, "step": 20606 }, { "epoch": 67.56393442622951, "grad_norm": 5.043468952178955, "learning_rate": 5.028842417921287e-06, "loss": 0.4226, "step": 20607 }, { "epoch": 67.5672131147541, "grad_norm": 7.1107988357543945, "learning_rate": 5.027921063357124e-06, "loss": 0.655, "step": 20608 }, { "epoch": 67.57049180327868, "grad_norm": 5.05792236328125, "learning_rate": 5.026999764858322e-06, "loss": 0.5093, "step": 20609 }, { "epoch": 67.57377049180327, "grad_norm": 4.554471492767334, "learning_rate": 5.026078522435267e-06, "loss": 0.4506, "step": 20610 }, { "epoch": 67.57704918032788, "grad_norm": 4.272867202758789, "learning_rate": 5.025157336098346e-06, "loss": 0.4784, "step": 20611 }, { "epoch": 67.58032786885246, "grad_norm": 4.950891017913818, "learning_rate": 5.02423620585795e-06, "loss": 0.7615, "step": 20612 }, { "epoch": 67.58360655737705, "grad_norm": 6.541472911834717, "learning_rate": 5.023315131724466e-06, "loss": 0.4602, "step": 20613 }, { "epoch": 67.58688524590164, "grad_norm": 5.922569274902344, "learning_rate": 5.02239411370828e-06, "loss": 0.2817, "step": 20614 }, { "epoch": 67.59016393442623, "grad_norm": 7.820285320281982, "learning_rate": 5.021473151819769e-06, "loss": 0.3002, "step": 20615 }, { "epoch": 67.59344262295082, "grad_norm": 5.127241134643555, "learning_rate": 5.02055224606933e-06, "loss": 0.6957, "step": 20616 }, { "epoch": 67.5967213114754, "grad_norm": 5.597306728363037, "learning_rate": 5.019631396467341e-06, "loss": 0.4864, "step": 20617 }, { "epoch": 67.6, "grad_norm": 5.3690595626831055, "learning_rate": 5.018710603024187e-06, "loss": 0.388, "step": 20618 }, { "epoch": 67.6032786885246, "grad_norm": 7.538817405700684, "learning_rate": 5.017789865750246e-06, "loss": 0.3288, "step": 20619 }, { "epoch": 67.60655737704919, "grad_norm": 18.670997619628906, "learning_rate": 5.016869184655908e-06, "loss": 0.362, "step": 20620 }, { "epoch": 67.60983606557377, "grad_norm": 8.706451416015625, "learning_rate": 5.015948559751551e-06, "loss": 0.2984, "step": 20621 }, { "epoch": 67.61311475409836, "grad_norm": 11.972062110900879, "learning_rate": 5.015027991047557e-06, "loss": 0.4872, "step": 20622 }, { "epoch": 67.61639344262295, "grad_norm": 5.093382358551025, "learning_rate": 5.014107478554305e-06, "loss": 0.4063, "step": 20623 }, { "epoch": 67.61967213114754, "grad_norm": 9.042863845825195, "learning_rate": 5.013187022282171e-06, "loss": 0.3777, "step": 20624 }, { "epoch": 67.62295081967213, "grad_norm": 6.111413955688477, "learning_rate": 5.012266622241544e-06, "loss": 0.621, "step": 20625 }, { "epoch": 67.62622950819672, "grad_norm": 5.037566661834717, "learning_rate": 5.011346278442794e-06, "loss": 0.5076, "step": 20626 }, { "epoch": 67.62950819672132, "grad_norm": 5.04642915725708, "learning_rate": 5.010425990896304e-06, "loss": 0.4139, "step": 20627 }, { "epoch": 67.6327868852459, "grad_norm": 8.141300201416016, "learning_rate": 5.009505759612443e-06, "loss": 0.4712, "step": 20628 }, { "epoch": 67.6360655737705, "grad_norm": 7.5271477699279785, "learning_rate": 5.008585584601598e-06, "loss": 0.2067, "step": 20629 }, { "epoch": 67.63934426229508, "grad_norm": 5.0730414390563965, "learning_rate": 5.0076654658741405e-06, "loss": 0.3914, "step": 20630 }, { "epoch": 67.64262295081967, "grad_norm": 8.079277038574219, "learning_rate": 5.006745403440445e-06, "loss": 0.6402, "step": 20631 }, { "epoch": 67.64590163934426, "grad_norm": 4.36098575592041, "learning_rate": 5.005825397310884e-06, "loss": 0.4899, "step": 20632 }, { "epoch": 67.64918032786885, "grad_norm": 5.602046489715576, "learning_rate": 5.004905447495838e-06, "loss": 0.2809, "step": 20633 }, { "epoch": 67.65245901639344, "grad_norm": 6.39695930480957, "learning_rate": 5.003985554005676e-06, "loss": 0.8056, "step": 20634 }, { "epoch": 67.65573770491804, "grad_norm": 5.676152229309082, "learning_rate": 5.003065716850771e-06, "loss": 0.5768, "step": 20635 }, { "epoch": 67.65901639344263, "grad_norm": 7.81513786315918, "learning_rate": 5.002145936041496e-06, "loss": 0.4931, "step": 20636 }, { "epoch": 67.66229508196722, "grad_norm": 5.219969749450684, "learning_rate": 5.0012262115882195e-06, "loss": 0.6268, "step": 20637 }, { "epoch": 67.6655737704918, "grad_norm": 8.857148170471191, "learning_rate": 5.000306543501316e-06, "loss": 0.5013, "step": 20638 }, { "epoch": 67.66885245901639, "grad_norm": 5.856065273284912, "learning_rate": 4.999386931791157e-06, "loss": 0.4365, "step": 20639 }, { "epoch": 67.67213114754098, "grad_norm": 5.136466979980469, "learning_rate": 4.998467376468109e-06, "loss": 0.4081, "step": 20640 }, { "epoch": 67.67540983606557, "grad_norm": 5.2776641845703125, "learning_rate": 4.997547877542538e-06, "loss": 0.2928, "step": 20641 }, { "epoch": 67.67868852459016, "grad_norm": 6.3501973152160645, "learning_rate": 4.996628435024819e-06, "loss": 0.5252, "step": 20642 }, { "epoch": 67.68196721311476, "grad_norm": 13.490697860717773, "learning_rate": 4.995709048925317e-06, "loss": 0.4151, "step": 20643 }, { "epoch": 67.68524590163935, "grad_norm": 8.479287147521973, "learning_rate": 4.994789719254395e-06, "loss": 0.4234, "step": 20644 }, { "epoch": 67.68852459016394, "grad_norm": 4.3906474113464355, "learning_rate": 4.993870446022426e-06, "loss": 0.5098, "step": 20645 }, { "epoch": 67.69180327868852, "grad_norm": 5.4399333000183105, "learning_rate": 4.992951229239774e-06, "loss": 0.2177, "step": 20646 }, { "epoch": 67.69508196721311, "grad_norm": 5.240034580230713, "learning_rate": 4.992032068916802e-06, "loss": 0.2738, "step": 20647 }, { "epoch": 67.6983606557377, "grad_norm": 5.021435737609863, "learning_rate": 4.991112965063872e-06, "loss": 0.5944, "step": 20648 }, { "epoch": 67.70163934426229, "grad_norm": 4.581910610198975, "learning_rate": 4.990193917691355e-06, "loss": 0.4762, "step": 20649 }, { "epoch": 67.70491803278688, "grad_norm": 5.511763572692871, "learning_rate": 4.989274926809611e-06, "loss": 0.6428, "step": 20650 }, { "epoch": 67.70819672131148, "grad_norm": 5.083390712738037, "learning_rate": 4.988355992428997e-06, "loss": 0.4442, "step": 20651 }, { "epoch": 67.71147540983607, "grad_norm": 4.394289970397949, "learning_rate": 4.987437114559885e-06, "loss": 0.542, "step": 20652 }, { "epoch": 67.71475409836066, "grad_norm": 4.194692134857178, "learning_rate": 4.986518293212631e-06, "loss": 0.3336, "step": 20653 }, { "epoch": 67.71803278688525, "grad_norm": 4.320433139801025, "learning_rate": 4.985599528397592e-06, "loss": 0.5682, "step": 20654 }, { "epoch": 67.72131147540983, "grad_norm": 8.14864730834961, "learning_rate": 4.984680820125135e-06, "loss": 0.6893, "step": 20655 }, { "epoch": 67.72459016393442, "grad_norm": 4.671836853027344, "learning_rate": 4.983762168405618e-06, "loss": 0.3784, "step": 20656 }, { "epoch": 67.72786885245901, "grad_norm": 4.741912841796875, "learning_rate": 4.982843573249397e-06, "loss": 0.7351, "step": 20657 }, { "epoch": 67.73114754098361, "grad_norm": 6.367825984954834, "learning_rate": 4.981925034666828e-06, "loss": 0.5821, "step": 20658 }, { "epoch": 67.7344262295082, "grad_norm": 5.535452365875244, "learning_rate": 4.9810065526682746e-06, "loss": 0.4576, "step": 20659 }, { "epoch": 67.73770491803279, "grad_norm": 5.328721523284912, "learning_rate": 4.980088127264092e-06, "loss": 0.3499, "step": 20660 }, { "epoch": 67.74098360655738, "grad_norm": 4.8325605392456055, "learning_rate": 4.979169758464635e-06, "loss": 0.2926, "step": 20661 }, { "epoch": 67.74426229508197, "grad_norm": 5.881931781768799, "learning_rate": 4.9782514462802575e-06, "loss": 0.5545, "step": 20662 }, { "epoch": 67.74754098360656, "grad_norm": 5.904040813446045, "learning_rate": 4.9773331907213156e-06, "loss": 0.4642, "step": 20663 }, { "epoch": 67.75081967213114, "grad_norm": 5.461737632751465, "learning_rate": 4.976414991798165e-06, "loss": 0.5454, "step": 20664 }, { "epoch": 67.75409836065573, "grad_norm": 5.104408264160156, "learning_rate": 4.97549684952116e-06, "loss": 0.4737, "step": 20665 }, { "epoch": 67.75737704918033, "grad_norm": 5.531894683837891, "learning_rate": 4.974578763900653e-06, "loss": 0.5204, "step": 20666 }, { "epoch": 67.76065573770492, "grad_norm": 4.752509117126465, "learning_rate": 4.97366073494699e-06, "loss": 0.4996, "step": 20667 }, { "epoch": 67.76393442622951, "grad_norm": 4.8634772300720215, "learning_rate": 4.972742762670533e-06, "loss": 0.3439, "step": 20668 }, { "epoch": 67.7672131147541, "grad_norm": 4.740372180938721, "learning_rate": 4.971824847081629e-06, "loss": 0.6181, "step": 20669 }, { "epoch": 67.77049180327869, "grad_norm": 4.678070068359375, "learning_rate": 4.970906988190627e-06, "loss": 0.6268, "step": 20670 }, { "epoch": 67.77377049180328, "grad_norm": 5.760928630828857, "learning_rate": 4.969989186007874e-06, "loss": 0.4204, "step": 20671 }, { "epoch": 67.77704918032786, "grad_norm": 4.571420669555664, "learning_rate": 4.969071440543727e-06, "loss": 0.5043, "step": 20672 }, { "epoch": 67.78032786885245, "grad_norm": 4.7784953117370605, "learning_rate": 4.96815375180853e-06, "loss": 0.3914, "step": 20673 }, { "epoch": 67.78360655737706, "grad_norm": 5.5657734870910645, "learning_rate": 4.967236119812631e-06, "loss": 0.5644, "step": 20674 }, { "epoch": 67.78688524590164, "grad_norm": 6.743614196777344, "learning_rate": 4.966318544566378e-06, "loss": 0.5227, "step": 20675 }, { "epoch": 67.79016393442623, "grad_norm": 4.520321369171143, "learning_rate": 4.9654010260801124e-06, "loss": 0.1614, "step": 20676 }, { "epoch": 67.79344262295082, "grad_norm": 4.909032344818115, "learning_rate": 4.96448356436419e-06, "loss": 0.2839, "step": 20677 }, { "epoch": 67.79672131147541, "grad_norm": 4.612848281860352, "learning_rate": 4.963566159428949e-06, "loss": 0.5356, "step": 20678 }, { "epoch": 67.8, "grad_norm": 4.910671234130859, "learning_rate": 4.9626488112847384e-06, "loss": 0.4224, "step": 20679 }, { "epoch": 67.80327868852459, "grad_norm": 10.453364372253418, "learning_rate": 4.9617315199418955e-06, "loss": 0.4074, "step": 20680 }, { "epoch": 67.80655737704917, "grad_norm": 5.863117694854736, "learning_rate": 4.960814285410772e-06, "loss": 0.4509, "step": 20681 }, { "epoch": 67.80983606557378, "grad_norm": 9.604151725769043, "learning_rate": 4.959897107701707e-06, "loss": 0.343, "step": 20682 }, { "epoch": 67.81311475409836, "grad_norm": 4.6546630859375, "learning_rate": 4.958979986825042e-06, "loss": 0.4857, "step": 20683 }, { "epoch": 67.81639344262295, "grad_norm": 6.6178693771362305, "learning_rate": 4.958062922791115e-06, "loss": 0.5326, "step": 20684 }, { "epoch": 67.81967213114754, "grad_norm": 3.9604742527008057, "learning_rate": 4.9571459156102755e-06, "loss": 0.2858, "step": 20685 }, { "epoch": 67.82295081967213, "grad_norm": 5.319899082183838, "learning_rate": 4.956228965292858e-06, "loss": 0.5355, "step": 20686 }, { "epoch": 67.82622950819672, "grad_norm": 6.204491138458252, "learning_rate": 4.955312071849204e-06, "loss": 0.6152, "step": 20687 }, { "epoch": 67.8295081967213, "grad_norm": 5.07647705078125, "learning_rate": 4.9543952352896515e-06, "loss": 0.2391, "step": 20688 }, { "epoch": 67.8327868852459, "grad_norm": 5.343405723571777, "learning_rate": 4.9534784556245356e-06, "loss": 0.5369, "step": 20689 }, { "epoch": 67.8360655737705, "grad_norm": 5.084252834320068, "learning_rate": 4.952561732864199e-06, "loss": 0.4661, "step": 20690 }, { "epoch": 67.83934426229509, "grad_norm": 6.24711275100708, "learning_rate": 4.951645067018979e-06, "loss": 0.3801, "step": 20691 }, { "epoch": 67.84262295081967, "grad_norm": 5.767917156219482, "learning_rate": 4.950728458099209e-06, "loss": 0.364, "step": 20692 }, { "epoch": 67.84590163934426, "grad_norm": 6.823861122131348, "learning_rate": 4.949811906115221e-06, "loss": 0.5477, "step": 20693 }, { "epoch": 67.84918032786885, "grad_norm": 4.886119365692139, "learning_rate": 4.948895411077359e-06, "loss": 0.5164, "step": 20694 }, { "epoch": 67.85245901639344, "grad_norm": 4.96608829498291, "learning_rate": 4.9479789729959535e-06, "loss": 0.4895, "step": 20695 }, { "epoch": 67.85573770491803, "grad_norm": 4.339216232299805, "learning_rate": 4.947062591881338e-06, "loss": 0.4004, "step": 20696 }, { "epoch": 67.85901639344263, "grad_norm": 6.97693395614624, "learning_rate": 4.946146267743841e-06, "loss": 0.6372, "step": 20697 }, { "epoch": 67.86229508196722, "grad_norm": 5.195245265960693, "learning_rate": 4.945230000593804e-06, "loss": 0.6586, "step": 20698 }, { "epoch": 67.8655737704918, "grad_norm": 6.5948166847229, "learning_rate": 4.944313790441554e-06, "loss": 0.4281, "step": 20699 }, { "epoch": 67.8688524590164, "grad_norm": 4.541021347045898, "learning_rate": 4.943397637297418e-06, "loss": 0.3596, "step": 20700 }, { "epoch": 67.87213114754098, "grad_norm": 6.224208354949951, "learning_rate": 4.942481541171736e-06, "loss": 0.7036, "step": 20701 }, { "epoch": 67.87540983606557, "grad_norm": 5.695353984832764, "learning_rate": 4.9415655020748335e-06, "loss": 0.2873, "step": 20702 }, { "epoch": 67.87868852459016, "grad_norm": 5.909300327301025, "learning_rate": 4.9406495200170345e-06, "loss": 0.5256, "step": 20703 }, { "epoch": 67.88196721311475, "grad_norm": 4.871660232543945, "learning_rate": 4.9397335950086765e-06, "loss": 0.3304, "step": 20704 }, { "epoch": 67.88524590163935, "grad_norm": 9.098837852478027, "learning_rate": 4.938817727060085e-06, "loss": 0.3131, "step": 20705 }, { "epoch": 67.88852459016394, "grad_norm": 5.734152317047119, "learning_rate": 4.937901916181581e-06, "loss": 0.5792, "step": 20706 }, { "epoch": 67.89180327868853, "grad_norm": 4.295072555541992, "learning_rate": 4.936986162383499e-06, "loss": 0.4982, "step": 20707 }, { "epoch": 67.89508196721312, "grad_norm": 5.133510112762451, "learning_rate": 4.9360704656761635e-06, "loss": 0.6745, "step": 20708 }, { "epoch": 67.8983606557377, "grad_norm": 5.220356464385986, "learning_rate": 4.935154826069899e-06, "loss": 0.4601, "step": 20709 }, { "epoch": 67.90163934426229, "grad_norm": 9.501477241516113, "learning_rate": 4.9342392435750255e-06, "loss": 0.5189, "step": 20710 }, { "epoch": 67.90491803278688, "grad_norm": 4.59959077835083, "learning_rate": 4.933323718201876e-06, "loss": 0.3362, "step": 20711 }, { "epoch": 67.90819672131147, "grad_norm": 5.213994979858398, "learning_rate": 4.9324082499607685e-06, "loss": 0.5767, "step": 20712 }, { "epoch": 67.91147540983607, "grad_norm": 4.567126274108887, "learning_rate": 4.931492838862028e-06, "loss": 0.4227, "step": 20713 }, { "epoch": 67.91475409836066, "grad_norm": 5.3603620529174805, "learning_rate": 4.9305774849159746e-06, "loss": 0.5327, "step": 20714 }, { "epoch": 67.91803278688525, "grad_norm": 5.1566267013549805, "learning_rate": 4.929662188132928e-06, "loss": 0.4307, "step": 20715 }, { "epoch": 67.92131147540984, "grad_norm": 4.437228679656982, "learning_rate": 4.928746948523215e-06, "loss": 0.6126, "step": 20716 }, { "epoch": 67.92459016393443, "grad_norm": 9.393332481384277, "learning_rate": 4.9278317660971546e-06, "loss": 0.4301, "step": 20717 }, { "epoch": 67.92786885245901, "grad_norm": 4.736496448516846, "learning_rate": 4.926916640865063e-06, "loss": 0.2613, "step": 20718 }, { "epoch": 67.9311475409836, "grad_norm": 4.1053385734558105, "learning_rate": 4.926001572837259e-06, "loss": 0.4474, "step": 20719 }, { "epoch": 67.93442622950819, "grad_norm": 3.660059690475464, "learning_rate": 4.925086562024065e-06, "loss": 0.3283, "step": 20720 }, { "epoch": 67.9377049180328, "grad_norm": 5.804110050201416, "learning_rate": 4.924171608435797e-06, "loss": 0.4289, "step": 20721 }, { "epoch": 67.94098360655738, "grad_norm": 5.280465126037598, "learning_rate": 4.9232567120827725e-06, "loss": 0.3621, "step": 20722 }, { "epoch": 67.94426229508197, "grad_norm": 9.06502914428711, "learning_rate": 4.922341872975302e-06, "loss": 0.2916, "step": 20723 }, { "epoch": 67.94754098360656, "grad_norm": 9.695280075073242, "learning_rate": 4.92142709112371e-06, "loss": 0.3978, "step": 20724 }, { "epoch": 67.95081967213115, "grad_norm": 6.371368885040283, "learning_rate": 4.9205123665383105e-06, "loss": 0.6054, "step": 20725 }, { "epoch": 67.95409836065573, "grad_norm": 3.871494770050049, "learning_rate": 4.919597699229413e-06, "loss": 0.6409, "step": 20726 }, { "epoch": 67.95737704918032, "grad_norm": 8.504623413085938, "learning_rate": 4.918683089207334e-06, "loss": 0.436, "step": 20727 }, { "epoch": 67.96065573770491, "grad_norm": 4.92939567565918, "learning_rate": 4.9177685364823835e-06, "loss": 0.3056, "step": 20728 }, { "epoch": 67.96393442622951, "grad_norm": 6.146036624908447, "learning_rate": 4.91685404106488e-06, "loss": 0.3912, "step": 20729 }, { "epoch": 67.9672131147541, "grad_norm": 5.876700401306152, "learning_rate": 4.9159396029651315e-06, "loss": 0.2854, "step": 20730 }, { "epoch": 67.97049180327869, "grad_norm": 6.088425159454346, "learning_rate": 4.915025222193453e-06, "loss": 0.5439, "step": 20731 }, { "epoch": 67.97377049180328, "grad_norm": 6.079394340515137, "learning_rate": 4.914110898760145e-06, "loss": 0.4861, "step": 20732 }, { "epoch": 67.97704918032787, "grad_norm": 5.961599349975586, "learning_rate": 4.913196632675529e-06, "loss": 0.3903, "step": 20733 }, { "epoch": 67.98032786885246, "grad_norm": 5.706319808959961, "learning_rate": 4.9122824239499126e-06, "loss": 0.28, "step": 20734 }, { "epoch": 67.98360655737704, "grad_norm": 4.425597667694092, "learning_rate": 4.911368272593599e-06, "loss": 0.3809, "step": 20735 }, { "epoch": 67.98688524590163, "grad_norm": 7.8375959396362305, "learning_rate": 4.910454178616897e-06, "loss": 0.2629, "step": 20736 }, { "epoch": 67.99016393442623, "grad_norm": 4.505175590515137, "learning_rate": 4.909540142030118e-06, "loss": 0.3789, "step": 20737 }, { "epoch": 67.99344262295082, "grad_norm": 5.392647743225098, "learning_rate": 4.908626162843568e-06, "loss": 0.628, "step": 20738 }, { "epoch": 67.99672131147541, "grad_norm": 6.449816703796387, "learning_rate": 4.907712241067551e-06, "loss": 0.2345, "step": 20739 }, { "epoch": 68.0, "grad_norm": 12.904863357543945, "learning_rate": 4.9067983767123736e-06, "loss": 0.3598, "step": 20740 }, { "epoch": 68.00327868852459, "grad_norm": 5.199636936187744, "learning_rate": 4.905884569788336e-06, "loss": 0.4481, "step": 20741 }, { "epoch": 68.00655737704918, "grad_norm": 5.442419052124023, "learning_rate": 4.90497082030575e-06, "loss": 0.5105, "step": 20742 }, { "epoch": 68.00983606557377, "grad_norm": 6.16788911819458, "learning_rate": 4.904057128274916e-06, "loss": 0.5072, "step": 20743 }, { "epoch": 68.01311475409837, "grad_norm": 4.1970391273498535, "learning_rate": 4.9031434937061364e-06, "loss": 0.2882, "step": 20744 }, { "epoch": 68.01639344262296, "grad_norm": 4.127373695373535, "learning_rate": 4.9022299166097095e-06, "loss": 0.4242, "step": 20745 }, { "epoch": 68.01967213114754, "grad_norm": 8.306144714355469, "learning_rate": 4.9013163969959445e-06, "loss": 0.565, "step": 20746 }, { "epoch": 68.02295081967213, "grad_norm": 4.919017791748047, "learning_rate": 4.900402934875138e-06, "loss": 0.338, "step": 20747 }, { "epoch": 68.02622950819672, "grad_norm": 5.1115851402282715, "learning_rate": 4.8994895302575905e-06, "loss": 0.613, "step": 20748 }, { "epoch": 68.02950819672131, "grad_norm": 4.944883346557617, "learning_rate": 4.898576183153598e-06, "loss": 0.8213, "step": 20749 }, { "epoch": 68.0327868852459, "grad_norm": 4.6430888175964355, "learning_rate": 4.897662893573467e-06, "loss": 0.2147, "step": 20750 }, { "epoch": 68.03606557377049, "grad_norm": 5.6137471199035645, "learning_rate": 4.896749661527492e-06, "loss": 0.5776, "step": 20751 }, { "epoch": 68.03934426229509, "grad_norm": 5.931912899017334, "learning_rate": 4.895836487025971e-06, "loss": 0.6586, "step": 20752 }, { "epoch": 68.04262295081968, "grad_norm": 7.151721000671387, "learning_rate": 4.894923370079198e-06, "loss": 0.6151, "step": 20753 }, { "epoch": 68.04590163934427, "grad_norm": 6.487734317779541, "learning_rate": 4.894010310697474e-06, "loss": 0.3752, "step": 20754 }, { "epoch": 68.04918032786885, "grad_norm": 6.639936923980713, "learning_rate": 4.893097308891093e-06, "loss": 0.5818, "step": 20755 }, { "epoch": 68.05245901639344, "grad_norm": 7.076017379760742, "learning_rate": 4.892184364670345e-06, "loss": 0.7016, "step": 20756 }, { "epoch": 68.05573770491803, "grad_norm": 6.434563159942627, "learning_rate": 4.891271478045534e-06, "loss": 0.3544, "step": 20757 }, { "epoch": 68.05901639344262, "grad_norm": 4.760347843170166, "learning_rate": 4.890358649026948e-06, "loss": 0.4557, "step": 20758 }, { "epoch": 68.0622950819672, "grad_norm": 5.374197006225586, "learning_rate": 4.889445877624879e-06, "loss": 0.2813, "step": 20759 }, { "epoch": 68.06557377049181, "grad_norm": 4.513175964355469, "learning_rate": 4.888533163849624e-06, "loss": 0.3274, "step": 20760 }, { "epoch": 68.0688524590164, "grad_norm": 7.099061965942383, "learning_rate": 4.887620507711472e-06, "loss": 0.4702, "step": 20761 }, { "epoch": 68.07213114754099, "grad_norm": 5.010105609893799, "learning_rate": 4.8867079092207105e-06, "loss": 0.3852, "step": 20762 }, { "epoch": 68.07540983606557, "grad_norm": 5.5583109855651855, "learning_rate": 4.885795368387637e-06, "loss": 0.4214, "step": 20763 }, { "epoch": 68.07868852459016, "grad_norm": 10.692368507385254, "learning_rate": 4.884882885222539e-06, "loss": 0.2565, "step": 20764 }, { "epoch": 68.08196721311475, "grad_norm": 6.596866130828857, "learning_rate": 4.8839704597357055e-06, "loss": 0.4352, "step": 20765 }, { "epoch": 68.08524590163934, "grad_norm": 8.52769660949707, "learning_rate": 4.883058091937424e-06, "loss": 0.4522, "step": 20766 }, { "epoch": 68.08852459016393, "grad_norm": 5.538295269012451, "learning_rate": 4.882145781837978e-06, "loss": 0.3379, "step": 20767 }, { "epoch": 68.09180327868853, "grad_norm": 4.641575336456299, "learning_rate": 4.8812335294476645e-06, "loss": 0.2324, "step": 20768 }, { "epoch": 68.09508196721312, "grad_norm": 6.169515132904053, "learning_rate": 4.880321334776764e-06, "loss": 0.3221, "step": 20769 }, { "epoch": 68.09836065573771, "grad_norm": 6.322666645050049, "learning_rate": 4.879409197835564e-06, "loss": 0.4619, "step": 20770 }, { "epoch": 68.1016393442623, "grad_norm": 7.247328281402588, "learning_rate": 4.878497118634345e-06, "loss": 0.3354, "step": 20771 }, { "epoch": 68.10491803278688, "grad_norm": 7.390398979187012, "learning_rate": 4.877585097183401e-06, "loss": 0.526, "step": 20772 }, { "epoch": 68.10819672131147, "grad_norm": 5.126985549926758, "learning_rate": 4.876673133493011e-06, "loss": 0.3455, "step": 20773 }, { "epoch": 68.11147540983606, "grad_norm": 4.646908283233643, "learning_rate": 4.8757612275734574e-06, "loss": 0.2821, "step": 20774 }, { "epoch": 68.11475409836065, "grad_norm": 4.229025840759277, "learning_rate": 4.874849379435022e-06, "loss": 0.29, "step": 20775 }, { "epoch": 68.11803278688525, "grad_norm": 7.080283164978027, "learning_rate": 4.8739375890879905e-06, "loss": 0.2774, "step": 20776 }, { "epoch": 68.12131147540984, "grad_norm": 4.998244285583496, "learning_rate": 4.873025856542643e-06, "loss": 0.5422, "step": 20777 }, { "epoch": 68.12459016393443, "grad_norm": 4.0869622230529785, "learning_rate": 4.8721141818092595e-06, "loss": 0.4946, "step": 20778 }, { "epoch": 68.12786885245902, "grad_norm": 6.231708526611328, "learning_rate": 4.871202564898121e-06, "loss": 0.4688, "step": 20779 }, { "epoch": 68.1311475409836, "grad_norm": 6.557485103607178, "learning_rate": 4.870291005819502e-06, "loss": 0.4856, "step": 20780 }, { "epoch": 68.1344262295082, "grad_norm": 5.313704967498779, "learning_rate": 4.869379504583689e-06, "loss": 0.324, "step": 20781 }, { "epoch": 68.13770491803278, "grad_norm": 5.034142017364502, "learning_rate": 4.868468061200956e-06, "loss": 0.5554, "step": 20782 }, { "epoch": 68.14098360655737, "grad_norm": 5.223183631896973, "learning_rate": 4.8675566756815814e-06, "loss": 0.5161, "step": 20783 }, { "epoch": 68.14426229508197, "grad_norm": 8.039926528930664, "learning_rate": 4.866645348035839e-06, "loss": 0.3351, "step": 20784 }, { "epoch": 68.14754098360656, "grad_norm": 9.714860916137695, "learning_rate": 4.86573407827401e-06, "loss": 0.405, "step": 20785 }, { "epoch": 68.15081967213115, "grad_norm": 5.704329013824463, "learning_rate": 4.864822866406369e-06, "loss": 0.454, "step": 20786 }, { "epoch": 68.15409836065574, "grad_norm": 5.057865619659424, "learning_rate": 4.863911712443189e-06, "loss": 0.3601, "step": 20787 }, { "epoch": 68.15737704918033, "grad_norm": 5.013315200805664, "learning_rate": 4.86300061639474e-06, "loss": 0.369, "step": 20788 }, { "epoch": 68.16065573770491, "grad_norm": 6.601724147796631, "learning_rate": 4.862089578271305e-06, "loss": 0.6877, "step": 20789 }, { "epoch": 68.1639344262295, "grad_norm": 7.3617024421691895, "learning_rate": 4.861178598083151e-06, "loss": 0.2906, "step": 20790 }, { "epoch": 68.1672131147541, "grad_norm": 6.781238555908203, "learning_rate": 4.860267675840552e-06, "loss": 0.594, "step": 20791 }, { "epoch": 68.1704918032787, "grad_norm": 5.024374485015869, "learning_rate": 4.859356811553779e-06, "loss": 0.2999, "step": 20792 }, { "epoch": 68.17377049180328, "grad_norm": 5.1100873947143555, "learning_rate": 4.8584460052331e-06, "loss": 0.3534, "step": 20793 }, { "epoch": 68.17704918032787, "grad_norm": 8.382803916931152, "learning_rate": 4.8575352568887905e-06, "loss": 0.3528, "step": 20794 }, { "epoch": 68.18032786885246, "grad_norm": 4.855480194091797, "learning_rate": 4.856624566531117e-06, "loss": 0.431, "step": 20795 }, { "epoch": 68.18360655737705, "grad_norm": 4.261274814605713, "learning_rate": 4.855713934170351e-06, "loss": 0.368, "step": 20796 }, { "epoch": 68.18688524590164, "grad_norm": 5.197873115539551, "learning_rate": 4.8548033598167554e-06, "loss": 0.3689, "step": 20797 }, { "epoch": 68.19016393442622, "grad_norm": 8.587528228759766, "learning_rate": 4.853892843480605e-06, "loss": 0.5303, "step": 20798 }, { "epoch": 68.19344262295083, "grad_norm": 7.349748134613037, "learning_rate": 4.852982385172163e-06, "loss": 0.5875, "step": 20799 }, { "epoch": 68.19672131147541, "grad_norm": 6.085870742797852, "learning_rate": 4.852071984901696e-06, "loss": 0.4886, "step": 20800 }, { "epoch": 68.2, "grad_norm": 5.177571773529053, "learning_rate": 4.851161642679466e-06, "loss": 0.2909, "step": 20801 }, { "epoch": 68.20327868852459, "grad_norm": 4.789011478424072, "learning_rate": 4.850251358515746e-06, "loss": 0.6909, "step": 20802 }, { "epoch": 68.20655737704918, "grad_norm": 7.346892833709717, "learning_rate": 4.8493411324207975e-06, "loss": 0.4449, "step": 20803 }, { "epoch": 68.20983606557377, "grad_norm": 5.543479919433594, "learning_rate": 4.848430964404882e-06, "loss": 0.3612, "step": 20804 }, { "epoch": 68.21311475409836, "grad_norm": 5.178431034088135, "learning_rate": 4.84752085447826e-06, "loss": 0.3951, "step": 20805 }, { "epoch": 68.21639344262294, "grad_norm": 5.903398513793945, "learning_rate": 4.846610802651202e-06, "loss": 0.3279, "step": 20806 }, { "epoch": 68.21967213114755, "grad_norm": 7.4337239265441895, "learning_rate": 4.845700808933964e-06, "loss": 0.353, "step": 20807 }, { "epoch": 68.22295081967214, "grad_norm": 7.369836807250977, "learning_rate": 4.844790873336806e-06, "loss": 0.5695, "step": 20808 }, { "epoch": 68.22622950819672, "grad_norm": 8.382673263549805, "learning_rate": 4.8438809958699936e-06, "loss": 0.2558, "step": 20809 }, { "epoch": 68.22950819672131, "grad_norm": 6.426436901092529, "learning_rate": 4.842971176543785e-06, "loss": 0.5149, "step": 20810 }, { "epoch": 68.2327868852459, "grad_norm": 5.345343112945557, "learning_rate": 4.842061415368437e-06, "loss": 0.4223, "step": 20811 }, { "epoch": 68.23606557377049, "grad_norm": 8.408523559570312, "learning_rate": 4.841151712354208e-06, "loss": 0.2442, "step": 20812 }, { "epoch": 68.23934426229508, "grad_norm": 6.527351379394531, "learning_rate": 4.840242067511359e-06, "loss": 0.2641, "step": 20813 }, { "epoch": 68.24262295081967, "grad_norm": 4.855434417724609, "learning_rate": 4.839332480850146e-06, "loss": 0.5682, "step": 20814 }, { "epoch": 68.24590163934427, "grad_norm": 5.459534168243408, "learning_rate": 4.838422952380821e-06, "loss": 0.3667, "step": 20815 }, { "epoch": 68.24918032786886, "grad_norm": 5.717409610748291, "learning_rate": 4.837513482113648e-06, "loss": 0.4402, "step": 20816 }, { "epoch": 68.25245901639344, "grad_norm": 4.818223476409912, "learning_rate": 4.836604070058879e-06, "loss": 0.3445, "step": 20817 }, { "epoch": 68.25573770491803, "grad_norm": 7.417887210845947, "learning_rate": 4.835694716226767e-06, "loss": 0.6526, "step": 20818 }, { "epoch": 68.25901639344262, "grad_norm": 30.413002014160156, "learning_rate": 4.834785420627562e-06, "loss": 0.5608, "step": 20819 }, { "epoch": 68.26229508196721, "grad_norm": 4.909234523773193, "learning_rate": 4.8338761832715275e-06, "loss": 0.4506, "step": 20820 }, { "epoch": 68.2655737704918, "grad_norm": 5.869424343109131, "learning_rate": 4.832967004168909e-06, "loss": 0.4501, "step": 20821 }, { "epoch": 68.26885245901639, "grad_norm": 5.7240118980407715, "learning_rate": 4.8320578833299605e-06, "loss": 0.5154, "step": 20822 }, { "epoch": 68.27213114754099, "grad_norm": 6.697866439819336, "learning_rate": 4.831148820764928e-06, "loss": 0.3578, "step": 20823 }, { "epoch": 68.27540983606558, "grad_norm": 12.810235023498535, "learning_rate": 4.830239816484071e-06, "loss": 0.5772, "step": 20824 }, { "epoch": 68.27868852459017, "grad_norm": 5.439732074737549, "learning_rate": 4.829330870497636e-06, "loss": 0.4289, "step": 20825 }, { "epoch": 68.28196721311475, "grad_norm": 5.76460599899292, "learning_rate": 4.82842198281587e-06, "loss": 0.6206, "step": 20826 }, { "epoch": 68.28524590163934, "grad_norm": 5.9795002937316895, "learning_rate": 4.827513153449022e-06, "loss": 0.4009, "step": 20827 }, { "epoch": 68.28852459016393, "grad_norm": 4.283414840698242, "learning_rate": 4.826604382407344e-06, "loss": 0.739, "step": 20828 }, { "epoch": 68.29180327868852, "grad_norm": 4.141474723815918, "learning_rate": 4.8256956697010795e-06, "loss": 0.372, "step": 20829 }, { "epoch": 68.29508196721312, "grad_norm": 3.844104051589966, "learning_rate": 4.8247870153404776e-06, "loss": 0.4362, "step": 20830 }, { "epoch": 68.29836065573771, "grad_norm": 5.912759780883789, "learning_rate": 4.823878419335781e-06, "loss": 0.5322, "step": 20831 }, { "epoch": 68.3016393442623, "grad_norm": 4.661853790283203, "learning_rate": 4.822969881697235e-06, "loss": 0.3141, "step": 20832 }, { "epoch": 68.30491803278689, "grad_norm": 6.03510046005249, "learning_rate": 4.82206140243509e-06, "loss": 0.3443, "step": 20833 }, { "epoch": 68.30819672131148, "grad_norm": 4.739254951477051, "learning_rate": 4.821152981559586e-06, "loss": 0.3714, "step": 20834 }, { "epoch": 68.31147540983606, "grad_norm": 5.49190092086792, "learning_rate": 4.820244619080967e-06, "loss": 0.5839, "step": 20835 }, { "epoch": 68.31475409836065, "grad_norm": 5.625887393951416, "learning_rate": 4.819336315009471e-06, "loss": 0.5999, "step": 20836 }, { "epoch": 68.31803278688524, "grad_norm": 4.208573341369629, "learning_rate": 4.81842806935535e-06, "loss": 0.2083, "step": 20837 }, { "epoch": 68.32131147540984, "grad_norm": 5.057236194610596, "learning_rate": 4.817519882128838e-06, "loss": 0.4912, "step": 20838 }, { "epoch": 68.32459016393443, "grad_norm": 9.22102165222168, "learning_rate": 4.816611753340179e-06, "loss": 0.6467, "step": 20839 }, { "epoch": 68.32786885245902, "grad_norm": 5.48244571685791, "learning_rate": 4.815703682999607e-06, "loss": 0.3108, "step": 20840 }, { "epoch": 68.33114754098361, "grad_norm": 10.599372863769531, "learning_rate": 4.814795671117372e-06, "loss": 0.2905, "step": 20841 }, { "epoch": 68.3344262295082, "grad_norm": 6.137540340423584, "learning_rate": 4.813887717703706e-06, "loss": 0.3837, "step": 20842 }, { "epoch": 68.33770491803278, "grad_norm": 4.6953229904174805, "learning_rate": 4.812979822768847e-06, "loss": 0.3224, "step": 20843 }, { "epoch": 68.34098360655737, "grad_norm": 11.458205223083496, "learning_rate": 4.8120719863230345e-06, "loss": 0.5551, "step": 20844 }, { "epoch": 68.34426229508196, "grad_norm": 4.259912014007568, "learning_rate": 4.811164208376502e-06, "loss": 0.5351, "step": 20845 }, { "epoch": 68.34754098360656, "grad_norm": 5.0237579345703125, "learning_rate": 4.810256488939491e-06, "loss": 0.449, "step": 20846 }, { "epoch": 68.35081967213115, "grad_norm": 6.677714824676514, "learning_rate": 4.809348828022233e-06, "loss": 0.5792, "step": 20847 }, { "epoch": 68.35409836065574, "grad_norm": 5.414870262145996, "learning_rate": 4.808441225634966e-06, "loss": 0.3035, "step": 20848 }, { "epoch": 68.35737704918033, "grad_norm": 4.649033069610596, "learning_rate": 4.807533681787916e-06, "loss": 0.3663, "step": 20849 }, { "epoch": 68.36065573770492, "grad_norm": 5.789243698120117, "learning_rate": 4.806626196491328e-06, "loss": 0.4883, "step": 20850 }, { "epoch": 68.3639344262295, "grad_norm": 8.709382057189941, "learning_rate": 4.805718769755428e-06, "loss": 0.4952, "step": 20851 }, { "epoch": 68.3672131147541, "grad_norm": 3.9948530197143555, "learning_rate": 4.80481140159045e-06, "loss": 0.375, "step": 20852 }, { "epoch": 68.37049180327868, "grad_norm": 4.251592636108398, "learning_rate": 4.803904092006626e-06, "loss": 0.2911, "step": 20853 }, { "epoch": 68.37377049180328, "grad_norm": 6.551116943359375, "learning_rate": 4.802996841014181e-06, "loss": 0.3718, "step": 20854 }, { "epoch": 68.37704918032787, "grad_norm": 8.60612964630127, "learning_rate": 4.802089648623355e-06, "loss": 0.3047, "step": 20855 }, { "epoch": 68.38032786885246, "grad_norm": 4.853729724884033, "learning_rate": 4.801182514844372e-06, "loss": 0.2714, "step": 20856 }, { "epoch": 68.38360655737705, "grad_norm": 7.4902448654174805, "learning_rate": 4.800275439687462e-06, "loss": 0.3974, "step": 20857 }, { "epoch": 68.38688524590164, "grad_norm": 5.523648738861084, "learning_rate": 4.799368423162849e-06, "loss": 0.5382, "step": 20858 }, { "epoch": 68.39016393442623, "grad_norm": 5.340891361236572, "learning_rate": 4.798461465280767e-06, "loss": 0.4412, "step": 20859 }, { "epoch": 68.39344262295081, "grad_norm": 6.71498441696167, "learning_rate": 4.797554566051441e-06, "loss": 0.2797, "step": 20860 }, { "epoch": 68.3967213114754, "grad_norm": 7.220729827880859, "learning_rate": 4.7966477254850926e-06, "loss": 0.3542, "step": 20861 }, { "epoch": 68.4, "grad_norm": 5.016384124755859, "learning_rate": 4.795740943591955e-06, "loss": 0.572, "step": 20862 }, { "epoch": 68.4032786885246, "grad_norm": 4.819465160369873, "learning_rate": 4.794834220382249e-06, "loss": 0.3922, "step": 20863 }, { "epoch": 68.40655737704918, "grad_norm": 5.733312606811523, "learning_rate": 4.793927555866197e-06, "loss": 0.4515, "step": 20864 }, { "epoch": 68.40983606557377, "grad_norm": 7.441385269165039, "learning_rate": 4.793020950054027e-06, "loss": 0.6879, "step": 20865 }, { "epoch": 68.41311475409836, "grad_norm": 7.292726516723633, "learning_rate": 4.792114402955961e-06, "loss": 0.372, "step": 20866 }, { "epoch": 68.41639344262295, "grad_norm": 4.529869556427002, "learning_rate": 4.7912079145822145e-06, "loss": 0.6812, "step": 20867 }, { "epoch": 68.41967213114754, "grad_norm": 6.338425159454346, "learning_rate": 4.79030148494302e-06, "loss": 0.4084, "step": 20868 }, { "epoch": 68.42295081967212, "grad_norm": 6.491021633148193, "learning_rate": 4.789395114048592e-06, "loss": 0.4476, "step": 20869 }, { "epoch": 68.42622950819673, "grad_norm": 5.161049842834473, "learning_rate": 4.788488801909151e-06, "loss": 0.3958, "step": 20870 }, { "epoch": 68.42950819672132, "grad_norm": 5.601110458374023, "learning_rate": 4.787582548534914e-06, "loss": 0.2473, "step": 20871 }, { "epoch": 68.4327868852459, "grad_norm": 5.39456844329834, "learning_rate": 4.786676353936108e-06, "loss": 0.4955, "step": 20872 }, { "epoch": 68.43606557377049, "grad_norm": 5.528378009796143, "learning_rate": 4.785770218122946e-06, "loss": 0.4663, "step": 20873 }, { "epoch": 68.43934426229508, "grad_norm": 5.061783313751221, "learning_rate": 4.784864141105646e-06, "loss": 0.4837, "step": 20874 }, { "epoch": 68.44262295081967, "grad_norm": 4.942539691925049, "learning_rate": 4.783958122894422e-06, "loss": 0.4807, "step": 20875 }, { "epoch": 68.44590163934426, "grad_norm": 4.413231372833252, "learning_rate": 4.783052163499497e-06, "loss": 0.3651, "step": 20876 }, { "epoch": 68.44918032786886, "grad_norm": 4.497961044311523, "learning_rate": 4.782146262931083e-06, "loss": 0.4846, "step": 20877 }, { "epoch": 68.45245901639345, "grad_norm": 5.0125908851623535, "learning_rate": 4.781240421199396e-06, "loss": 0.5106, "step": 20878 }, { "epoch": 68.45573770491804, "grad_norm": 4.7371087074279785, "learning_rate": 4.7803346383146485e-06, "loss": 0.3563, "step": 20879 }, { "epoch": 68.45901639344262, "grad_norm": 6.344053268432617, "learning_rate": 4.779428914287052e-06, "loss": 0.4967, "step": 20880 }, { "epoch": 68.46229508196721, "grad_norm": 5.4615678787231445, "learning_rate": 4.778523249126825e-06, "loss": 0.6403, "step": 20881 }, { "epoch": 68.4655737704918, "grad_norm": 4.8285040855407715, "learning_rate": 4.777617642844179e-06, "loss": 0.5095, "step": 20882 }, { "epoch": 68.46885245901639, "grad_norm": 4.5172295570373535, "learning_rate": 4.776712095449323e-06, "loss": 0.447, "step": 20883 }, { "epoch": 68.47213114754098, "grad_norm": 5.2659711837768555, "learning_rate": 4.7758066069524645e-06, "loss": 0.3498, "step": 20884 }, { "epoch": 68.47540983606558, "grad_norm": 4.434089660644531, "learning_rate": 4.774901177363823e-06, "loss": 0.6995, "step": 20885 }, { "epoch": 68.47868852459017, "grad_norm": 5.299226760864258, "learning_rate": 4.773995806693603e-06, "loss": 0.3721, "step": 20886 }, { "epoch": 68.48196721311476, "grad_norm": 6.140538215637207, "learning_rate": 4.773090494952015e-06, "loss": 0.4019, "step": 20887 }, { "epoch": 68.48524590163935, "grad_norm": 6.530685901641846, "learning_rate": 4.772185242149262e-06, "loss": 0.5613, "step": 20888 }, { "epoch": 68.48852459016393, "grad_norm": 7.0280022621154785, "learning_rate": 4.771280048295559e-06, "loss": 0.3284, "step": 20889 }, { "epoch": 68.49180327868852, "grad_norm": 5.9040021896362305, "learning_rate": 4.77037491340111e-06, "loss": 0.5053, "step": 20890 }, { "epoch": 68.49508196721311, "grad_norm": 6.912369728088379, "learning_rate": 4.769469837476123e-06, "loss": 0.636, "step": 20891 }, { "epoch": 68.4983606557377, "grad_norm": 4.6557416915893555, "learning_rate": 4.7685648205308e-06, "loss": 0.463, "step": 20892 }, { "epoch": 68.5016393442623, "grad_norm": 5.834744930267334, "learning_rate": 4.767659862575346e-06, "loss": 0.4522, "step": 20893 }, { "epoch": 68.50491803278689, "grad_norm": 7.632503509521484, "learning_rate": 4.76675496361997e-06, "loss": 0.4575, "step": 20894 }, { "epoch": 68.50819672131148, "grad_norm": 4.943118572235107, "learning_rate": 4.765850123674872e-06, "loss": 0.3699, "step": 20895 }, { "epoch": 68.51147540983607, "grad_norm": 6.2230730056762695, "learning_rate": 4.764945342750257e-06, "loss": 0.3101, "step": 20896 }, { "epoch": 68.51475409836065, "grad_norm": 5.521496772766113, "learning_rate": 4.764040620856323e-06, "loss": 0.5482, "step": 20897 }, { "epoch": 68.51803278688524, "grad_norm": 5.531918048858643, "learning_rate": 4.763135958003278e-06, "loss": 0.5531, "step": 20898 }, { "epoch": 68.52131147540983, "grad_norm": 5.162243366241455, "learning_rate": 4.762231354201321e-06, "loss": 0.5481, "step": 20899 }, { "epoch": 68.52459016393442, "grad_norm": 5.356734275817871, "learning_rate": 4.761326809460651e-06, "loss": 0.4035, "step": 20900 }, { "epoch": 68.52786885245902, "grad_norm": 13.348578453063965, "learning_rate": 4.760422323791464e-06, "loss": 0.6859, "step": 20901 }, { "epoch": 68.53114754098361, "grad_norm": 6.601775646209717, "learning_rate": 4.759517897203967e-06, "loss": 0.6524, "step": 20902 }, { "epoch": 68.5344262295082, "grad_norm": 5.078019618988037, "learning_rate": 4.758613529708355e-06, "loss": 0.2494, "step": 20903 }, { "epoch": 68.53770491803279, "grad_norm": 5.216696262359619, "learning_rate": 4.757709221314825e-06, "loss": 0.4334, "step": 20904 }, { "epoch": 68.54098360655738, "grad_norm": 3.7145626544952393, "learning_rate": 4.756804972033573e-06, "loss": 0.2931, "step": 20905 }, { "epoch": 68.54426229508196, "grad_norm": 5.346398830413818, "learning_rate": 4.7559007818747934e-06, "loss": 0.2598, "step": 20906 }, { "epoch": 68.54754098360655, "grad_norm": 5.12897253036499, "learning_rate": 4.754996650848689e-06, "loss": 0.582, "step": 20907 }, { "epoch": 68.55081967213114, "grad_norm": 4.7523956298828125, "learning_rate": 4.754092578965451e-06, "loss": 0.4314, "step": 20908 }, { "epoch": 68.55409836065574, "grad_norm": 23.328813552856445, "learning_rate": 4.753188566235273e-06, "loss": 0.4735, "step": 20909 }, { "epoch": 68.55737704918033, "grad_norm": 4.846703052520752, "learning_rate": 4.752284612668345e-06, "loss": 0.4891, "step": 20910 }, { "epoch": 68.56065573770492, "grad_norm": 6.576904296875, "learning_rate": 4.7513807182748695e-06, "loss": 0.5096, "step": 20911 }, { "epoch": 68.56393442622951, "grad_norm": 9.372550964355469, "learning_rate": 4.750476883065032e-06, "loss": 0.3444, "step": 20912 }, { "epoch": 68.5672131147541, "grad_norm": 5.147408962249756, "learning_rate": 4.749573107049027e-06, "loss": 0.3278, "step": 20913 }, { "epoch": 68.57049180327868, "grad_norm": 6.871682643890381, "learning_rate": 4.74866939023704e-06, "loss": 0.437, "step": 20914 }, { "epoch": 68.57377049180327, "grad_norm": 9.06541633605957, "learning_rate": 4.7477657326392705e-06, "loss": 0.529, "step": 20915 }, { "epoch": 68.57704918032788, "grad_norm": 5.882883071899414, "learning_rate": 4.746862134265902e-06, "loss": 0.3029, "step": 20916 }, { "epoch": 68.58032786885246, "grad_norm": 11.664773941040039, "learning_rate": 4.74595859512712e-06, "loss": 0.505, "step": 20917 }, { "epoch": 68.58360655737705, "grad_norm": 7.183467864990234, "learning_rate": 4.745055115233123e-06, "loss": 0.4931, "step": 20918 }, { "epoch": 68.58688524590164, "grad_norm": 4.859659194946289, "learning_rate": 4.744151694594093e-06, "loss": 0.4328, "step": 20919 }, { "epoch": 68.59016393442623, "grad_norm": 6.010378837585449, "learning_rate": 4.743248333220214e-06, "loss": 0.4611, "step": 20920 }, { "epoch": 68.59344262295082, "grad_norm": 4.887147903442383, "learning_rate": 4.74234503112168e-06, "loss": 0.3366, "step": 20921 }, { "epoch": 68.5967213114754, "grad_norm": 5.2536163330078125, "learning_rate": 4.741441788308672e-06, "loss": 0.3079, "step": 20922 }, { "epoch": 68.6, "grad_norm": 4.555187702178955, "learning_rate": 4.740538604791371e-06, "loss": 0.5103, "step": 20923 }, { "epoch": 68.6032786885246, "grad_norm": 5.49914026260376, "learning_rate": 4.73963548057997e-06, "loss": 0.5469, "step": 20924 }, { "epoch": 68.60655737704919, "grad_norm": 4.448085784912109, "learning_rate": 4.738732415684647e-06, "loss": 0.3927, "step": 20925 }, { "epoch": 68.60983606557377, "grad_norm": 19.57853889465332, "learning_rate": 4.737829410115587e-06, "loss": 0.6283, "step": 20926 }, { "epoch": 68.61311475409836, "grad_norm": 4.801319599151611, "learning_rate": 4.7369264638829695e-06, "loss": 0.306, "step": 20927 }, { "epoch": 68.61639344262295, "grad_norm": 4.98225212097168, "learning_rate": 4.73602357699698e-06, "loss": 0.5545, "step": 20928 }, { "epoch": 68.61967213114754, "grad_norm": 4.767955303192139, "learning_rate": 4.735120749467799e-06, "loss": 0.519, "step": 20929 }, { "epoch": 68.62295081967213, "grad_norm": 4.990570068359375, "learning_rate": 4.7342179813056055e-06, "loss": 0.3843, "step": 20930 }, { "epoch": 68.62622950819672, "grad_norm": 5.224205017089844, "learning_rate": 4.733315272520579e-06, "loss": 0.3531, "step": 20931 }, { "epoch": 68.62950819672132, "grad_norm": 4.5123982429504395, "learning_rate": 4.732412623122895e-06, "loss": 0.3776, "step": 20932 }, { "epoch": 68.6327868852459, "grad_norm": 6.089385986328125, "learning_rate": 4.731510033122739e-06, "loss": 0.5154, "step": 20933 }, { "epoch": 68.6360655737705, "grad_norm": 4.807471752166748, "learning_rate": 4.730607502530287e-06, "loss": 0.5407, "step": 20934 }, { "epoch": 68.63934426229508, "grad_norm": 5.004751205444336, "learning_rate": 4.729705031355712e-06, "loss": 0.6234, "step": 20935 }, { "epoch": 68.64262295081967, "grad_norm": 4.849009990692139, "learning_rate": 4.728802619609191e-06, "loss": 0.5027, "step": 20936 }, { "epoch": 68.64590163934426, "grad_norm": 5.533104419708252, "learning_rate": 4.727900267300904e-06, "loss": 0.3927, "step": 20937 }, { "epoch": 68.64918032786885, "grad_norm": 7.736013889312744, "learning_rate": 4.726997974441022e-06, "loss": 0.2989, "step": 20938 }, { "epoch": 68.65245901639344, "grad_norm": 4.535680294036865, "learning_rate": 4.726095741039721e-06, "loss": 0.4934, "step": 20939 }, { "epoch": 68.65573770491804, "grad_norm": 6.012088775634766, "learning_rate": 4.72519356710717e-06, "loss": 0.2864, "step": 20940 }, { "epoch": 68.65901639344263, "grad_norm": 4.863410472869873, "learning_rate": 4.72429145265355e-06, "loss": 0.3571, "step": 20941 }, { "epoch": 68.66229508196722, "grad_norm": 14.5275239944458, "learning_rate": 4.72338939768903e-06, "loss": 0.498, "step": 20942 }, { "epoch": 68.6655737704918, "grad_norm": 7.185543537139893, "learning_rate": 4.72248740222378e-06, "loss": 0.2788, "step": 20943 }, { "epoch": 68.66885245901639, "grad_norm": 7.345696449279785, "learning_rate": 4.721585466267972e-06, "loss": 0.4716, "step": 20944 }, { "epoch": 68.67213114754098, "grad_norm": 5.299618721008301, "learning_rate": 4.720683589831771e-06, "loss": 0.3422, "step": 20945 }, { "epoch": 68.67540983606557, "grad_norm": 4.621123313903809, "learning_rate": 4.719781772925357e-06, "loss": 0.2655, "step": 20946 }, { "epoch": 68.67868852459016, "grad_norm": 6.091716766357422, "learning_rate": 4.718880015558893e-06, "loss": 0.7659, "step": 20947 }, { "epoch": 68.68196721311476, "grad_norm": 4.553969860076904, "learning_rate": 4.7179783177425485e-06, "loss": 0.4444, "step": 20948 }, { "epoch": 68.68524590163935, "grad_norm": 5.464234352111816, "learning_rate": 4.7170766794864865e-06, "loss": 0.329, "step": 20949 }, { "epoch": 68.68852459016394, "grad_norm": 6.474649906158447, "learning_rate": 4.71617510080088e-06, "loss": 0.4691, "step": 20950 }, { "epoch": 68.69180327868852, "grad_norm": 4.931312561035156, "learning_rate": 4.715273581695895e-06, "loss": 0.3591, "step": 20951 }, { "epoch": 68.69508196721311, "grad_norm": 6.354124069213867, "learning_rate": 4.714372122181694e-06, "loss": 0.5838, "step": 20952 }, { "epoch": 68.6983606557377, "grad_norm": 5.323521614074707, "learning_rate": 4.713470722268439e-06, "loss": 0.3677, "step": 20953 }, { "epoch": 68.70163934426229, "grad_norm": 5.708169937133789, "learning_rate": 4.712569381966303e-06, "loss": 0.3358, "step": 20954 }, { "epoch": 68.70491803278688, "grad_norm": 4.7990946769714355, "learning_rate": 4.7116681012854445e-06, "loss": 0.3388, "step": 20955 }, { "epoch": 68.70819672131148, "grad_norm": 5.036291599273682, "learning_rate": 4.7107668802360266e-06, "loss": 0.6033, "step": 20956 }, { "epoch": 68.71147540983607, "grad_norm": 12.39985466003418, "learning_rate": 4.709865718828212e-06, "loss": 0.2408, "step": 20957 }, { "epoch": 68.71475409836066, "grad_norm": 6.912322998046875, "learning_rate": 4.708964617072157e-06, "loss": 0.5076, "step": 20958 }, { "epoch": 68.71803278688525, "grad_norm": 4.181977272033691, "learning_rate": 4.708063574978031e-06, "loss": 0.2612, "step": 20959 }, { "epoch": 68.72131147540983, "grad_norm": 7.0008392333984375, "learning_rate": 4.707162592555992e-06, "loss": 0.4481, "step": 20960 }, { "epoch": 68.72459016393442, "grad_norm": 4.87215518951416, "learning_rate": 4.706261669816196e-06, "loss": 0.5517, "step": 20961 }, { "epoch": 68.72786885245901, "grad_norm": 5.575453758239746, "learning_rate": 4.7053608067688e-06, "loss": 0.4474, "step": 20962 }, { "epoch": 68.73114754098361, "grad_norm": 6.853443622589111, "learning_rate": 4.704460003423971e-06, "loss": 0.4302, "step": 20963 }, { "epoch": 68.7344262295082, "grad_norm": 6.986231803894043, "learning_rate": 4.703559259791861e-06, "loss": 0.6462, "step": 20964 }, { "epoch": 68.73770491803279, "grad_norm": 5.631296157836914, "learning_rate": 4.702658575882627e-06, "loss": 0.46, "step": 20965 }, { "epoch": 68.74098360655738, "grad_norm": 6.790196895599365, "learning_rate": 4.701757951706423e-06, "loss": 0.364, "step": 20966 }, { "epoch": 68.74426229508197, "grad_norm": 6.566244602203369, "learning_rate": 4.7008573872734085e-06, "loss": 0.5194, "step": 20967 }, { "epoch": 68.74754098360656, "grad_norm": 4.635988712310791, "learning_rate": 4.699956882593738e-06, "loss": 0.3991, "step": 20968 }, { "epoch": 68.75081967213114, "grad_norm": 6.632394313812256, "learning_rate": 4.69905643767756e-06, "loss": 0.5055, "step": 20969 }, { "epoch": 68.75409836065573, "grad_norm": 5.120808124542236, "learning_rate": 4.698156052535036e-06, "loss": 0.4783, "step": 20970 }, { "epoch": 68.75737704918033, "grad_norm": 5.271546363830566, "learning_rate": 4.697255727176315e-06, "loss": 0.3841, "step": 20971 }, { "epoch": 68.76065573770492, "grad_norm": 5.032966136932373, "learning_rate": 4.696355461611547e-06, "loss": 0.4114, "step": 20972 }, { "epoch": 68.76393442622951, "grad_norm": 5.023796081542969, "learning_rate": 4.695455255850887e-06, "loss": 0.649, "step": 20973 }, { "epoch": 68.7672131147541, "grad_norm": 5.20141077041626, "learning_rate": 4.694555109904486e-06, "loss": 0.3313, "step": 20974 }, { "epoch": 68.77049180327869, "grad_norm": 4.987359046936035, "learning_rate": 4.693655023782492e-06, "loss": 0.436, "step": 20975 }, { "epoch": 68.77377049180328, "grad_norm": 4.854759693145752, "learning_rate": 4.69275499749505e-06, "loss": 0.5382, "step": 20976 }, { "epoch": 68.77704918032786, "grad_norm": 5.028017520904541, "learning_rate": 4.6918550310523195e-06, "loss": 0.3388, "step": 20977 }, { "epoch": 68.78032786885245, "grad_norm": 6.461669445037842, "learning_rate": 4.69095512446444e-06, "loss": 0.3114, "step": 20978 }, { "epoch": 68.78360655737706, "grad_norm": 5.024684906005859, "learning_rate": 4.69005527774156e-06, "loss": 0.3513, "step": 20979 }, { "epoch": 68.78688524590164, "grad_norm": 5.963414192199707, "learning_rate": 4.68915549089383e-06, "loss": 0.5188, "step": 20980 }, { "epoch": 68.79016393442623, "grad_norm": 5.290888786315918, "learning_rate": 4.688255763931394e-06, "loss": 0.432, "step": 20981 }, { "epoch": 68.79344262295082, "grad_norm": 4.817714214324951, "learning_rate": 4.687356096864397e-06, "loss": 0.3672, "step": 20982 }, { "epoch": 68.79672131147541, "grad_norm": 5.2230916023254395, "learning_rate": 4.686456489702984e-06, "loss": 0.4171, "step": 20983 }, { "epoch": 68.8, "grad_norm": 5.627264022827148, "learning_rate": 4.685556942457296e-06, "loss": 0.6153, "step": 20984 }, { "epoch": 68.80327868852459, "grad_norm": 4.378051280975342, "learning_rate": 4.684657455137482e-06, "loss": 0.4428, "step": 20985 }, { "epoch": 68.80655737704917, "grad_norm": 5.56502628326416, "learning_rate": 4.683758027753681e-06, "loss": 0.5566, "step": 20986 }, { "epoch": 68.80983606557378, "grad_norm": 4.126858234405518, "learning_rate": 4.6828586603160365e-06, "loss": 0.4098, "step": 20987 }, { "epoch": 68.81311475409836, "grad_norm": 4.740032196044922, "learning_rate": 4.681959352834685e-06, "loss": 0.493, "step": 20988 }, { "epoch": 68.81639344262295, "grad_norm": 6.822483062744141, "learning_rate": 4.681060105319776e-06, "loss": 0.4339, "step": 20989 }, { "epoch": 68.81967213114754, "grad_norm": 5.689661026000977, "learning_rate": 4.680160917781443e-06, "loss": 0.3119, "step": 20990 }, { "epoch": 68.82295081967213, "grad_norm": 5.5530195236206055, "learning_rate": 4.679261790229829e-06, "loss": 0.5799, "step": 20991 }, { "epoch": 68.82622950819672, "grad_norm": 4.46961784362793, "learning_rate": 4.678362722675065e-06, "loss": 0.2968, "step": 20992 }, { "epoch": 68.8295081967213, "grad_norm": 5.148994445800781, "learning_rate": 4.677463715127298e-06, "loss": 0.3402, "step": 20993 }, { "epoch": 68.8327868852459, "grad_norm": 5.090617656707764, "learning_rate": 4.676564767596663e-06, "loss": 0.4987, "step": 20994 }, { "epoch": 68.8360655737705, "grad_norm": 4.886112689971924, "learning_rate": 4.675665880093294e-06, "loss": 0.5677, "step": 20995 }, { "epoch": 68.83934426229509, "grad_norm": 6.901499271392822, "learning_rate": 4.6747670526273296e-06, "loss": 0.3956, "step": 20996 }, { "epoch": 68.84262295081967, "grad_norm": 5.513448715209961, "learning_rate": 4.673868285208898e-06, "loss": 0.3392, "step": 20997 }, { "epoch": 68.84590163934426, "grad_norm": 5.5147504806518555, "learning_rate": 4.672969577848144e-06, "loss": 0.4849, "step": 20998 }, { "epoch": 68.84918032786885, "grad_norm": 11.05679988861084, "learning_rate": 4.672070930555196e-06, "loss": 0.5762, "step": 20999 }, { "epoch": 68.85245901639344, "grad_norm": 4.862240314483643, "learning_rate": 4.671172343340189e-06, "loss": 0.4873, "step": 21000 }, { "epoch": 68.85573770491803, "grad_norm": 11.236363410949707, "learning_rate": 4.67027381621325e-06, "loss": 0.3698, "step": 21001 }, { "epoch": 68.85901639344263, "grad_norm": 4.730092525482178, "learning_rate": 4.669375349184519e-06, "loss": 0.3909, "step": 21002 }, { "epoch": 68.86229508196722, "grad_norm": 6.019511699676514, "learning_rate": 4.668476942264124e-06, "loss": 0.4914, "step": 21003 }, { "epoch": 68.8655737704918, "grad_norm": 3.9178731441497803, "learning_rate": 4.667578595462194e-06, "loss": 0.2109, "step": 21004 }, { "epoch": 68.8688524590164, "grad_norm": 7.520512104034424, "learning_rate": 4.666680308788857e-06, "loss": 0.5626, "step": 21005 }, { "epoch": 68.87213114754098, "grad_norm": 5.070362567901611, "learning_rate": 4.665782082254247e-06, "loss": 0.4021, "step": 21006 }, { "epoch": 68.87540983606557, "grad_norm": 5.996250152587891, "learning_rate": 4.664883915868491e-06, "loss": 0.3362, "step": 21007 }, { "epoch": 68.87868852459016, "grad_norm": 5.3022685050964355, "learning_rate": 4.663985809641717e-06, "loss": 0.3079, "step": 21008 }, { "epoch": 68.88196721311475, "grad_norm": 4.803516864776611, "learning_rate": 4.66308776358405e-06, "loss": 0.1932, "step": 21009 }, { "epoch": 68.88524590163935, "grad_norm": 5.867275714874268, "learning_rate": 4.662189777705614e-06, "loss": 0.6098, "step": 21010 }, { "epoch": 68.88852459016394, "grad_norm": 5.648631572723389, "learning_rate": 4.661291852016542e-06, "loss": 0.2193, "step": 21011 }, { "epoch": 68.89180327868853, "grad_norm": 6.371619701385498, "learning_rate": 4.660393986526954e-06, "loss": 0.4445, "step": 21012 }, { "epoch": 68.89508196721312, "grad_norm": 5.230565071105957, "learning_rate": 4.6594961812469775e-06, "loss": 0.3038, "step": 21013 }, { "epoch": 68.8983606557377, "grad_norm": 4.604912281036377, "learning_rate": 4.658598436186729e-06, "loss": 0.4091, "step": 21014 }, { "epoch": 68.90163934426229, "grad_norm": 7.870186805725098, "learning_rate": 4.657700751356341e-06, "loss": 0.3161, "step": 21015 }, { "epoch": 68.90491803278688, "grad_norm": 8.065634727478027, "learning_rate": 4.656803126765932e-06, "loss": 0.5169, "step": 21016 }, { "epoch": 68.90819672131147, "grad_norm": 5.561306476593018, "learning_rate": 4.655905562425623e-06, "loss": 0.4958, "step": 21017 }, { "epoch": 68.91147540983607, "grad_norm": 5.507621765136719, "learning_rate": 4.6550080583455315e-06, "loss": 0.4214, "step": 21018 }, { "epoch": 68.91475409836066, "grad_norm": 6.698411464691162, "learning_rate": 4.654110614535786e-06, "loss": 0.4723, "step": 21019 }, { "epoch": 68.91803278688525, "grad_norm": 7.380552291870117, "learning_rate": 4.6532132310065e-06, "loss": 0.3967, "step": 21020 }, { "epoch": 68.92131147540984, "grad_norm": 5.19464111328125, "learning_rate": 4.652315907767796e-06, "loss": 0.3411, "step": 21021 }, { "epoch": 68.92459016393443, "grad_norm": 4.981045246124268, "learning_rate": 4.651418644829786e-06, "loss": 0.2996, "step": 21022 }, { "epoch": 68.92786885245901, "grad_norm": 5.871662139892578, "learning_rate": 4.6505214422025945e-06, "loss": 0.5727, "step": 21023 }, { "epoch": 68.9311475409836, "grad_norm": 9.093026161193848, "learning_rate": 4.649624299896337e-06, "loss": 0.5337, "step": 21024 }, { "epoch": 68.93442622950819, "grad_norm": 5.407269477844238, "learning_rate": 4.6487272179211255e-06, "loss": 0.4143, "step": 21025 }, { "epoch": 68.9377049180328, "grad_norm": 8.580230712890625, "learning_rate": 4.647830196287081e-06, "loss": 0.6293, "step": 21026 }, { "epoch": 68.94098360655738, "grad_norm": 6.891581058502197, "learning_rate": 4.646933235004315e-06, "loss": 0.4384, "step": 21027 }, { "epoch": 68.94426229508197, "grad_norm": 5.446918487548828, "learning_rate": 4.6460363340829406e-06, "loss": 0.4039, "step": 21028 }, { "epoch": 68.94754098360656, "grad_norm": 5.023777961730957, "learning_rate": 4.645139493533077e-06, "loss": 0.1809, "step": 21029 }, { "epoch": 68.95081967213115, "grad_norm": 4.428964138031006, "learning_rate": 4.644242713364831e-06, "loss": 0.3543, "step": 21030 }, { "epoch": 68.95409836065573, "grad_norm": 5.367990970611572, "learning_rate": 4.6433459935883156e-06, "loss": 0.4384, "step": 21031 }, { "epoch": 68.95737704918032, "grad_norm": 7.171029567718506, "learning_rate": 4.6424493342136465e-06, "loss": 0.6188, "step": 21032 }, { "epoch": 68.96065573770491, "grad_norm": 5.343758583068848, "learning_rate": 4.641552735250933e-06, "loss": 0.4719, "step": 21033 }, { "epoch": 68.96393442622951, "grad_norm": 5.536419868469238, "learning_rate": 4.640656196710281e-06, "loss": 0.3667, "step": 21034 }, { "epoch": 68.9672131147541, "grad_norm": 5.243997573852539, "learning_rate": 4.639759718601805e-06, "loss": 0.382, "step": 21035 }, { "epoch": 68.97049180327869, "grad_norm": 4.908714294433594, "learning_rate": 4.638863300935606e-06, "loss": 0.5961, "step": 21036 }, { "epoch": 68.97377049180328, "grad_norm": 4.757080078125, "learning_rate": 4.637966943721804e-06, "loss": 0.469, "step": 21037 }, { "epoch": 68.97704918032787, "grad_norm": 8.069604873657227, "learning_rate": 4.637070646970497e-06, "loss": 0.2825, "step": 21038 }, { "epoch": 68.98032786885246, "grad_norm": 5.4043803215026855, "learning_rate": 4.636174410691796e-06, "loss": 0.3171, "step": 21039 }, { "epoch": 68.98360655737704, "grad_norm": 7.892029762268066, "learning_rate": 4.635278234895802e-06, "loss": 0.3656, "step": 21040 }, { "epoch": 68.98688524590163, "grad_norm": 10.939618110656738, "learning_rate": 4.634382119592625e-06, "loss": 0.3996, "step": 21041 }, { "epoch": 68.99016393442623, "grad_norm": 4.372046947479248, "learning_rate": 4.6334860647923706e-06, "loss": 0.6203, "step": 21042 }, { "epoch": 68.99344262295082, "grad_norm": 5.078404426574707, "learning_rate": 4.632590070505141e-06, "loss": 0.3893, "step": 21043 }, { "epoch": 68.99672131147541, "grad_norm": 5.555828094482422, "learning_rate": 4.631694136741034e-06, "loss": 0.3053, "step": 21044 }, { "epoch": 69.0, "grad_norm": 8.569414138793945, "learning_rate": 4.630798263510162e-06, "loss": 0.4196, "step": 21045 }, { "epoch": 69.00327868852459, "grad_norm": 4.5806050300598145, "learning_rate": 4.629902450822622e-06, "loss": 0.2518, "step": 21046 }, { "epoch": 69.00655737704918, "grad_norm": 4.9874396324157715, "learning_rate": 4.629006698688515e-06, "loss": 0.4815, "step": 21047 }, { "epoch": 69.00983606557377, "grad_norm": 4.708341121673584, "learning_rate": 4.628111007117941e-06, "loss": 0.2479, "step": 21048 }, { "epoch": 69.01311475409837, "grad_norm": 4.521627902984619, "learning_rate": 4.627215376120998e-06, "loss": 0.2899, "step": 21049 }, { "epoch": 69.01639344262296, "grad_norm": 7.017852783203125, "learning_rate": 4.6263198057077916e-06, "loss": 0.2475, "step": 21050 }, { "epoch": 69.01967213114754, "grad_norm": 6.360005855560303, "learning_rate": 4.625424295888418e-06, "loss": 0.5409, "step": 21051 }, { "epoch": 69.02295081967213, "grad_norm": 4.614086151123047, "learning_rate": 4.624528846672972e-06, "loss": 0.6882, "step": 21052 }, { "epoch": 69.02622950819672, "grad_norm": 4.606401443481445, "learning_rate": 4.623633458071549e-06, "loss": 0.496, "step": 21053 }, { "epoch": 69.02950819672131, "grad_norm": 6.441519737243652, "learning_rate": 4.622738130094252e-06, "loss": 0.5292, "step": 21054 }, { "epoch": 69.0327868852459, "grad_norm": 5.633777618408203, "learning_rate": 4.6218428627511744e-06, "loss": 0.5399, "step": 21055 }, { "epoch": 69.03606557377049, "grad_norm": 5.161105632781982, "learning_rate": 4.62094765605241e-06, "loss": 0.4344, "step": 21056 }, { "epoch": 69.03934426229509, "grad_norm": 4.987829685211182, "learning_rate": 4.620052510008049e-06, "loss": 0.4706, "step": 21057 }, { "epoch": 69.04262295081968, "grad_norm": 9.101212501525879, "learning_rate": 4.619157424628195e-06, "loss": 0.4884, "step": 21058 }, { "epoch": 69.04590163934427, "grad_norm": 5.462585926055908, "learning_rate": 4.618262399922935e-06, "loss": 0.5736, "step": 21059 }, { "epoch": 69.04918032786885, "grad_norm": 4.754011631011963, "learning_rate": 4.617367435902363e-06, "loss": 0.5332, "step": 21060 }, { "epoch": 69.05245901639344, "grad_norm": 6.572820663452148, "learning_rate": 4.616472532576568e-06, "loss": 0.6885, "step": 21061 }, { "epoch": 69.05573770491803, "grad_norm": 5.35509729385376, "learning_rate": 4.615577689955639e-06, "loss": 0.3756, "step": 21062 }, { "epoch": 69.05901639344262, "grad_norm": 63.41215515136719, "learning_rate": 4.6146829080496746e-06, "loss": 0.4373, "step": 21063 }, { "epoch": 69.0622950819672, "grad_norm": 6.014679908752441, "learning_rate": 4.613788186868759e-06, "loss": 0.4765, "step": 21064 }, { "epoch": 69.06557377049181, "grad_norm": 4.076762676239014, "learning_rate": 4.612893526422983e-06, "loss": 0.099, "step": 21065 }, { "epoch": 69.0688524590164, "grad_norm": 10.573928833007812, "learning_rate": 4.611998926722428e-06, "loss": 0.4079, "step": 21066 }, { "epoch": 69.07213114754099, "grad_norm": 4.818136692047119, "learning_rate": 4.611104387777193e-06, "loss": 0.5231, "step": 21067 }, { "epoch": 69.07540983606557, "grad_norm": 4.2541961669921875, "learning_rate": 4.610209909597359e-06, "loss": 0.5875, "step": 21068 }, { "epoch": 69.07868852459016, "grad_norm": 4.418519020080566, "learning_rate": 4.609315492193011e-06, "loss": 0.3311, "step": 21069 }, { "epoch": 69.08196721311475, "grad_norm": 5.743397235870361, "learning_rate": 4.608421135574232e-06, "loss": 0.389, "step": 21070 }, { "epoch": 69.08524590163934, "grad_norm": 4.416395664215088, "learning_rate": 4.607526839751115e-06, "loss": 0.4571, "step": 21071 }, { "epoch": 69.08852459016393, "grad_norm": 4.324887752532959, "learning_rate": 4.60663260473374e-06, "loss": 0.4068, "step": 21072 }, { "epoch": 69.09180327868853, "grad_norm": 4.749372959136963, "learning_rate": 4.60573843053219e-06, "loss": 0.479, "step": 21073 }, { "epoch": 69.09508196721312, "grad_norm": 6.139114856719971, "learning_rate": 4.604844317156543e-06, "loss": 0.4236, "step": 21074 }, { "epoch": 69.09836065573771, "grad_norm": 4.867362022399902, "learning_rate": 4.60395026461689e-06, "loss": 0.3945, "step": 21075 }, { "epoch": 69.1016393442623, "grad_norm": 11.150317192077637, "learning_rate": 4.603056272923309e-06, "loss": 0.3385, "step": 21076 }, { "epoch": 69.10491803278688, "grad_norm": 4.624267578125, "learning_rate": 4.602162342085879e-06, "loss": 0.6664, "step": 21077 }, { "epoch": 69.10819672131147, "grad_norm": 4.3692545890808105, "learning_rate": 4.6012684721146775e-06, "loss": 0.3848, "step": 21078 }, { "epoch": 69.11147540983606, "grad_norm": 6.288823127746582, "learning_rate": 4.600374663019791e-06, "loss": 0.4292, "step": 21079 }, { "epoch": 69.11475409836065, "grad_norm": 8.297119140625, "learning_rate": 4.599480914811294e-06, "loss": 0.4517, "step": 21080 }, { "epoch": 69.11803278688525, "grad_norm": 5.110057353973389, "learning_rate": 4.598587227499261e-06, "loss": 0.3481, "step": 21081 }, { "epoch": 69.12131147540984, "grad_norm": 7.92757511138916, "learning_rate": 4.597693601093779e-06, "loss": 0.4601, "step": 21082 }, { "epoch": 69.12459016393443, "grad_norm": 5.423368453979492, "learning_rate": 4.596800035604917e-06, "loss": 0.5631, "step": 21083 }, { "epoch": 69.12786885245902, "grad_norm": 6.459432125091553, "learning_rate": 4.5959065310427485e-06, "loss": 0.3326, "step": 21084 }, { "epoch": 69.1311475409836, "grad_norm": 4.236459255218506, "learning_rate": 4.595013087417356e-06, "loss": 0.1855, "step": 21085 }, { "epoch": 69.1344262295082, "grad_norm": 5.3160834312438965, "learning_rate": 4.594119704738812e-06, "loss": 0.5689, "step": 21086 }, { "epoch": 69.13770491803278, "grad_norm": 4.824451446533203, "learning_rate": 4.593226383017189e-06, "loss": 0.5194, "step": 21087 }, { "epoch": 69.14098360655737, "grad_norm": 4.399815559387207, "learning_rate": 4.592333122262555e-06, "loss": 0.4007, "step": 21088 }, { "epoch": 69.14426229508197, "grad_norm": 5.794680118560791, "learning_rate": 4.591439922484993e-06, "loss": 0.34, "step": 21089 }, { "epoch": 69.14754098360656, "grad_norm": 9.986686706542969, "learning_rate": 4.590546783694568e-06, "loss": 0.535, "step": 21090 }, { "epoch": 69.15081967213115, "grad_norm": 5.024967193603516, "learning_rate": 4.5896537059013536e-06, "loss": 0.5907, "step": 21091 }, { "epoch": 69.15409836065574, "grad_norm": 6.068625450134277, "learning_rate": 4.588760689115414e-06, "loss": 0.4801, "step": 21092 }, { "epoch": 69.15737704918033, "grad_norm": 4.345926284790039, "learning_rate": 4.587867733346829e-06, "loss": 0.4985, "step": 21093 }, { "epoch": 69.16065573770491, "grad_norm": 9.943999290466309, "learning_rate": 4.5869748386056615e-06, "loss": 0.4823, "step": 21094 }, { "epoch": 69.1639344262295, "grad_norm": 5.113020896911621, "learning_rate": 4.586082004901982e-06, "loss": 0.2922, "step": 21095 }, { "epoch": 69.1672131147541, "grad_norm": 6.060427188873291, "learning_rate": 4.585189232245851e-06, "loss": 0.4196, "step": 21096 }, { "epoch": 69.1704918032787, "grad_norm": 4.5267653465271, "learning_rate": 4.584296520647348e-06, "loss": 0.3699, "step": 21097 }, { "epoch": 69.17377049180328, "grad_norm": 6.214968681335449, "learning_rate": 4.5834038701165304e-06, "loss": 0.4294, "step": 21098 }, { "epoch": 69.17704918032787, "grad_norm": 6.23384952545166, "learning_rate": 4.5825112806634665e-06, "loss": 0.2746, "step": 21099 }, { "epoch": 69.18032786885246, "grad_norm": 10.471280097961426, "learning_rate": 4.581618752298221e-06, "loss": 0.5056, "step": 21100 }, { "epoch": 69.18360655737705, "grad_norm": 6.663015842437744, "learning_rate": 4.5807262850308544e-06, "loss": 0.3427, "step": 21101 }, { "epoch": 69.18688524590164, "grad_norm": 4.8505706787109375, "learning_rate": 4.579833878871437e-06, "loss": 0.3668, "step": 21102 }, { "epoch": 69.19016393442622, "grad_norm": 6.666677951812744, "learning_rate": 4.578941533830028e-06, "loss": 0.2914, "step": 21103 }, { "epoch": 69.19344262295083, "grad_norm": 6.6105194091796875, "learning_rate": 4.57804924991669e-06, "loss": 0.3742, "step": 21104 }, { "epoch": 69.19672131147541, "grad_norm": 19.18216323852539, "learning_rate": 4.57715702714148e-06, "loss": 0.5105, "step": 21105 }, { "epoch": 69.2, "grad_norm": 6.188175201416016, "learning_rate": 4.576264865514467e-06, "loss": 0.6055, "step": 21106 }, { "epoch": 69.20327868852459, "grad_norm": 5.7790632247924805, "learning_rate": 4.575372765045707e-06, "loss": 0.3829, "step": 21107 }, { "epoch": 69.20655737704918, "grad_norm": 4.4120965003967285, "learning_rate": 4.574480725745258e-06, "loss": 0.3, "step": 21108 }, { "epoch": 69.20983606557377, "grad_norm": 4.932261943817139, "learning_rate": 4.573588747623178e-06, "loss": 0.4449, "step": 21109 }, { "epoch": 69.21311475409836, "grad_norm": 5.059193134307861, "learning_rate": 4.5726968306895306e-06, "loss": 0.2127, "step": 21110 }, { "epoch": 69.21639344262294, "grad_norm": 7.534898281097412, "learning_rate": 4.571804974954368e-06, "loss": 0.3818, "step": 21111 }, { "epoch": 69.21967213114755, "grad_norm": 6.006926536560059, "learning_rate": 4.570913180427749e-06, "loss": 0.4923, "step": 21112 }, { "epoch": 69.22295081967214, "grad_norm": 5.433441638946533, "learning_rate": 4.570021447119729e-06, "loss": 0.3233, "step": 21113 }, { "epoch": 69.22622950819672, "grad_norm": 5.020256996154785, "learning_rate": 4.569129775040359e-06, "loss": 0.4546, "step": 21114 }, { "epoch": 69.22950819672131, "grad_norm": 5.567745685577393, "learning_rate": 4.568238164199701e-06, "loss": 0.4058, "step": 21115 }, { "epoch": 69.2327868852459, "grad_norm": 5.266453742980957, "learning_rate": 4.567346614607805e-06, "loss": 0.2143, "step": 21116 }, { "epoch": 69.23606557377049, "grad_norm": 11.18169116973877, "learning_rate": 4.5664551262747244e-06, "loss": 0.4175, "step": 21117 }, { "epoch": 69.23934426229508, "grad_norm": 11.72359561920166, "learning_rate": 4.565563699210509e-06, "loss": 0.5691, "step": 21118 }, { "epoch": 69.24262295081967, "grad_norm": 4.147406101226807, "learning_rate": 4.5646723334252165e-06, "loss": 0.4474, "step": 21119 }, { "epoch": 69.24590163934427, "grad_norm": 4.7689056396484375, "learning_rate": 4.563781028928894e-06, "loss": 0.4277, "step": 21120 }, { "epoch": 69.24918032786886, "grad_norm": 17.776077270507812, "learning_rate": 4.562889785731594e-06, "loss": 0.3961, "step": 21121 }, { "epoch": 69.25245901639344, "grad_norm": 5.265648365020752, "learning_rate": 4.561998603843359e-06, "loss": 0.5734, "step": 21122 }, { "epoch": 69.25573770491803, "grad_norm": 6.836697578430176, "learning_rate": 4.561107483274249e-06, "loss": 0.3932, "step": 21123 }, { "epoch": 69.25901639344262, "grad_norm": 5.845282554626465, "learning_rate": 4.5602164240343085e-06, "loss": 0.5484, "step": 21124 }, { "epoch": 69.26229508196721, "grad_norm": 4.978288650512695, "learning_rate": 4.5593254261335816e-06, "loss": 0.4152, "step": 21125 }, { "epoch": 69.2655737704918, "grad_norm": 6.757359504699707, "learning_rate": 4.558434489582119e-06, "loss": 0.7277, "step": 21126 }, { "epoch": 69.26885245901639, "grad_norm": 4.43781852722168, "learning_rate": 4.557543614389961e-06, "loss": 0.5603, "step": 21127 }, { "epoch": 69.27213114754099, "grad_norm": 5.858060359954834, "learning_rate": 4.55665280056716e-06, "loss": 0.6272, "step": 21128 }, { "epoch": 69.27540983606558, "grad_norm": 4.413166522979736, "learning_rate": 4.555762048123761e-06, "loss": 0.5281, "step": 21129 }, { "epoch": 69.27868852459017, "grad_norm": 7.467249870300293, "learning_rate": 4.5548713570698e-06, "loss": 0.3791, "step": 21130 }, { "epoch": 69.28196721311475, "grad_norm": 8.585856437683105, "learning_rate": 4.553980727415329e-06, "loss": 0.4505, "step": 21131 }, { "epoch": 69.28524590163934, "grad_norm": 7.041963577270508, "learning_rate": 4.553090159170389e-06, "loss": 0.4843, "step": 21132 }, { "epoch": 69.28852459016393, "grad_norm": 5.415542125701904, "learning_rate": 4.552199652345016e-06, "loss": 0.3663, "step": 21133 }, { "epoch": 69.29180327868852, "grad_norm": 8.124509811401367, "learning_rate": 4.551309206949262e-06, "loss": 0.4389, "step": 21134 }, { "epoch": 69.29508196721312, "grad_norm": 6.634955883026123, "learning_rate": 4.55041882299316e-06, "loss": 0.3621, "step": 21135 }, { "epoch": 69.29836065573771, "grad_norm": 6.267683982849121, "learning_rate": 4.549528500486753e-06, "loss": 0.3913, "step": 21136 }, { "epoch": 69.3016393442623, "grad_norm": 5.349595069885254, "learning_rate": 4.548638239440074e-06, "loss": 0.3246, "step": 21137 }, { "epoch": 69.30491803278689, "grad_norm": 7.335188865661621, "learning_rate": 4.547748039863172e-06, "loss": 0.3483, "step": 21138 }, { "epoch": 69.30819672131148, "grad_norm": 6.14605188369751, "learning_rate": 4.5468579017660796e-06, "loss": 0.596, "step": 21139 }, { "epoch": 69.31147540983606, "grad_norm": 5.393110752105713, "learning_rate": 4.54596782515883e-06, "loss": 0.6696, "step": 21140 }, { "epoch": 69.31475409836065, "grad_norm": 5.310802459716797, "learning_rate": 4.545077810051468e-06, "loss": 0.3372, "step": 21141 }, { "epoch": 69.31803278688524, "grad_norm": 5.040396213531494, "learning_rate": 4.544187856454025e-06, "loss": 0.3098, "step": 21142 }, { "epoch": 69.32131147540984, "grad_norm": 5.025834083557129, "learning_rate": 4.5432979643765375e-06, "loss": 0.5525, "step": 21143 }, { "epoch": 69.32459016393443, "grad_norm": 5.175705909729004, "learning_rate": 4.542408133829034e-06, "loss": 0.3653, "step": 21144 }, { "epoch": 69.32786885245902, "grad_norm": 4.045888900756836, "learning_rate": 4.541518364821557e-06, "loss": 0.2326, "step": 21145 }, { "epoch": 69.33114754098361, "grad_norm": 7.655885219573975, "learning_rate": 4.540628657364136e-06, "loss": 0.2629, "step": 21146 }, { "epoch": 69.3344262295082, "grad_norm": 4.764509677886963, "learning_rate": 4.539739011466805e-06, "loss": 0.4041, "step": 21147 }, { "epoch": 69.33770491803278, "grad_norm": 4.61870813369751, "learning_rate": 4.538849427139588e-06, "loss": 0.2729, "step": 21148 }, { "epoch": 69.34098360655737, "grad_norm": 4.7018256187438965, "learning_rate": 4.537959904392527e-06, "loss": 0.4806, "step": 21149 }, { "epoch": 69.34426229508196, "grad_norm": 6.694897174835205, "learning_rate": 4.537070443235646e-06, "loss": 0.4713, "step": 21150 }, { "epoch": 69.34754098360656, "grad_norm": 7.937499046325684, "learning_rate": 4.536181043678976e-06, "loss": 0.5216, "step": 21151 }, { "epoch": 69.35081967213115, "grad_norm": 4.820291042327881, "learning_rate": 4.535291705732547e-06, "loss": 0.5059, "step": 21152 }, { "epoch": 69.35409836065574, "grad_norm": 6.3642778396606445, "learning_rate": 4.5344024294063805e-06, "loss": 0.446, "step": 21153 }, { "epoch": 69.35737704918033, "grad_norm": 6.0899763107299805, "learning_rate": 4.533513214710515e-06, "loss": 0.4189, "step": 21154 }, { "epoch": 69.36065573770492, "grad_norm": 4.372117042541504, "learning_rate": 4.53262406165497e-06, "loss": 0.2784, "step": 21155 }, { "epoch": 69.3639344262295, "grad_norm": 7.568437576293945, "learning_rate": 4.531734970249774e-06, "loss": 0.4941, "step": 21156 }, { "epoch": 69.3672131147541, "grad_norm": 5.222970485687256, "learning_rate": 4.530845940504948e-06, "loss": 0.5778, "step": 21157 }, { "epoch": 69.37049180327868, "grad_norm": 6.582097053527832, "learning_rate": 4.529956972430524e-06, "loss": 0.5148, "step": 21158 }, { "epoch": 69.37377049180328, "grad_norm": 6.748401165008545, "learning_rate": 4.529068066036523e-06, "loss": 0.3187, "step": 21159 }, { "epoch": 69.37704918032787, "grad_norm": 5.172211170196533, "learning_rate": 4.528179221332967e-06, "loss": 0.4901, "step": 21160 }, { "epoch": 69.38032786885246, "grad_norm": 5.628026962280273, "learning_rate": 4.527290438329876e-06, "loss": 0.325, "step": 21161 }, { "epoch": 69.38360655737705, "grad_norm": 4.341569900512695, "learning_rate": 4.52640171703728e-06, "loss": 0.2646, "step": 21162 }, { "epoch": 69.38688524590164, "grad_norm": 4.613437175750732, "learning_rate": 4.525513057465195e-06, "loss": 0.5379, "step": 21163 }, { "epoch": 69.39016393442623, "grad_norm": 5.259754180908203, "learning_rate": 4.524624459623643e-06, "loss": 0.5472, "step": 21164 }, { "epoch": 69.39344262295081, "grad_norm": 5.694925785064697, "learning_rate": 4.5237359235226434e-06, "loss": 0.3304, "step": 21165 }, { "epoch": 69.3967213114754, "grad_norm": 5.405331611633301, "learning_rate": 4.522847449172211e-06, "loss": 0.3989, "step": 21166 }, { "epoch": 69.4, "grad_norm": 5.2633185386657715, "learning_rate": 4.521959036582372e-06, "loss": 0.7195, "step": 21167 }, { "epoch": 69.4032786885246, "grad_norm": 4.911101341247559, "learning_rate": 4.52107068576314e-06, "loss": 0.3138, "step": 21168 }, { "epoch": 69.40655737704918, "grad_norm": 4.780327320098877, "learning_rate": 4.520182396724534e-06, "loss": 0.2055, "step": 21169 }, { "epoch": 69.40983606557377, "grad_norm": 5.008604049682617, "learning_rate": 4.519294169476565e-06, "loss": 0.3117, "step": 21170 }, { "epoch": 69.41311475409836, "grad_norm": 13.591509819030762, "learning_rate": 4.518406004029256e-06, "loss": 0.3505, "step": 21171 }, { "epoch": 69.41639344262295, "grad_norm": 4.570397853851318, "learning_rate": 4.517517900392618e-06, "loss": 0.3174, "step": 21172 }, { "epoch": 69.41967213114754, "grad_norm": 4.573800086975098, "learning_rate": 4.516629858576667e-06, "loss": 0.3816, "step": 21173 }, { "epoch": 69.42295081967212, "grad_norm": 5.027214527130127, "learning_rate": 4.515741878591413e-06, "loss": 0.5779, "step": 21174 }, { "epoch": 69.42622950819673, "grad_norm": 5.647025108337402, "learning_rate": 4.514853960446873e-06, "loss": 0.4016, "step": 21175 }, { "epoch": 69.42950819672132, "grad_norm": 6.397541046142578, "learning_rate": 4.513966104153059e-06, "loss": 0.439, "step": 21176 }, { "epoch": 69.4327868852459, "grad_norm": 4.855385780334473, "learning_rate": 4.513078309719981e-06, "loss": 0.2978, "step": 21177 }, { "epoch": 69.43606557377049, "grad_norm": 5.13671875, "learning_rate": 4.51219057715765e-06, "loss": 0.5374, "step": 21178 }, { "epoch": 69.43934426229508, "grad_norm": 4.278522968292236, "learning_rate": 4.511302906476073e-06, "loss": 0.5487, "step": 21179 }, { "epoch": 69.44262295081967, "grad_norm": 4.44713830947876, "learning_rate": 4.510415297685266e-06, "loss": 0.2899, "step": 21180 }, { "epoch": 69.44590163934426, "grad_norm": 5.16531229019165, "learning_rate": 4.509527750795234e-06, "loss": 0.422, "step": 21181 }, { "epoch": 69.44918032786886, "grad_norm": 5.682018280029297, "learning_rate": 4.508640265815984e-06, "loss": 0.51, "step": 21182 }, { "epoch": 69.45245901639345, "grad_norm": 11.32913875579834, "learning_rate": 4.507752842757524e-06, "loss": 0.4849, "step": 21183 }, { "epoch": 69.45573770491804, "grad_norm": 4.933193206787109, "learning_rate": 4.506865481629862e-06, "loss": 0.543, "step": 21184 }, { "epoch": 69.45901639344262, "grad_norm": 6.290352821350098, "learning_rate": 4.505978182443004e-06, "loss": 0.4743, "step": 21185 }, { "epoch": 69.46229508196721, "grad_norm": 4.673447132110596, "learning_rate": 4.505090945206951e-06, "loss": 0.4954, "step": 21186 }, { "epoch": 69.4655737704918, "grad_norm": 11.082098960876465, "learning_rate": 4.504203769931715e-06, "loss": 0.1926, "step": 21187 }, { "epoch": 69.46885245901639, "grad_norm": 4.084883213043213, "learning_rate": 4.503316656627295e-06, "loss": 0.2884, "step": 21188 }, { "epoch": 69.47213114754098, "grad_norm": 5.461822986602783, "learning_rate": 4.502429605303691e-06, "loss": 0.4162, "step": 21189 }, { "epoch": 69.47540983606558, "grad_norm": 5.396120548248291, "learning_rate": 4.501542615970913e-06, "loss": 0.5244, "step": 21190 }, { "epoch": 69.47868852459017, "grad_norm": 4.951560020446777, "learning_rate": 4.500655688638959e-06, "loss": 0.4279, "step": 21191 }, { "epoch": 69.48196721311476, "grad_norm": 5.152243614196777, "learning_rate": 4.499768823317826e-06, "loss": 0.6215, "step": 21192 }, { "epoch": 69.48524590163935, "grad_norm": 4.815619468688965, "learning_rate": 4.498882020017522e-06, "loss": 0.326, "step": 21193 }, { "epoch": 69.48852459016393, "grad_norm": 4.775608062744141, "learning_rate": 4.4979952787480444e-06, "loss": 0.2865, "step": 21194 }, { "epoch": 69.49180327868852, "grad_norm": 7.565737724304199, "learning_rate": 4.497108599519389e-06, "loss": 0.6155, "step": 21195 }, { "epoch": 69.49508196721311, "grad_norm": 7.3431010246276855, "learning_rate": 4.496221982341553e-06, "loss": 0.4298, "step": 21196 }, { "epoch": 69.4983606557377, "grad_norm": 5.737128257751465, "learning_rate": 4.49533542722454e-06, "loss": 0.3871, "step": 21197 }, { "epoch": 69.5016393442623, "grad_norm": 5.406680107116699, "learning_rate": 4.494448934178344e-06, "loss": 0.4338, "step": 21198 }, { "epoch": 69.50491803278689, "grad_norm": 6.270165920257568, "learning_rate": 4.493562503212959e-06, "loss": 0.6226, "step": 21199 }, { "epoch": 69.50819672131148, "grad_norm": 5.146512508392334, "learning_rate": 4.49267613433838e-06, "loss": 0.4428, "step": 21200 }, { "epoch": 69.51147540983607, "grad_norm": 5.146845817565918, "learning_rate": 4.491789827564606e-06, "loss": 0.3039, "step": 21201 }, { "epoch": 69.51475409836065, "grad_norm": 5.525745391845703, "learning_rate": 4.49090358290163e-06, "loss": 0.458, "step": 21202 }, { "epoch": 69.51803278688524, "grad_norm": 5.691770076751709, "learning_rate": 4.490017400359444e-06, "loss": 0.3562, "step": 21203 }, { "epoch": 69.52131147540983, "grad_norm": 5.072693824768066, "learning_rate": 4.48913127994804e-06, "loss": 0.4547, "step": 21204 }, { "epoch": 69.52459016393442, "grad_norm": 17.15655517578125, "learning_rate": 4.488245221677409e-06, "loss": 0.6287, "step": 21205 }, { "epoch": 69.52786885245902, "grad_norm": 6.288959503173828, "learning_rate": 4.487359225557545e-06, "loss": 0.3328, "step": 21206 }, { "epoch": 69.53114754098361, "grad_norm": 4.797799110412598, "learning_rate": 4.486473291598439e-06, "loss": 0.2771, "step": 21207 }, { "epoch": 69.5344262295082, "grad_norm": 4.20881986618042, "learning_rate": 4.485587419810079e-06, "loss": 0.3747, "step": 21208 }, { "epoch": 69.53770491803279, "grad_norm": 5.184203147888184, "learning_rate": 4.4847016102024495e-06, "loss": 0.6541, "step": 21209 }, { "epoch": 69.54098360655738, "grad_norm": 7.111997127532959, "learning_rate": 4.483815862785549e-06, "loss": 0.5067, "step": 21210 }, { "epoch": 69.54426229508196, "grad_norm": 6.434099197387695, "learning_rate": 4.48293017756936e-06, "loss": 0.5127, "step": 21211 }, { "epoch": 69.54754098360655, "grad_norm": 6.03999662399292, "learning_rate": 4.482044554563869e-06, "loss": 0.4621, "step": 21212 }, { "epoch": 69.55081967213114, "grad_norm": 4.876189231872559, "learning_rate": 4.481158993779059e-06, "loss": 0.4996, "step": 21213 }, { "epoch": 69.55409836065574, "grad_norm": 6.01818323135376, "learning_rate": 4.480273495224924e-06, "loss": 0.4372, "step": 21214 }, { "epoch": 69.55737704918033, "grad_norm": 5.109306812286377, "learning_rate": 4.479388058911443e-06, "loss": 0.3383, "step": 21215 }, { "epoch": 69.56065573770492, "grad_norm": 11.51046371459961, "learning_rate": 4.478502684848604e-06, "loss": 0.5249, "step": 21216 }, { "epoch": 69.56393442622951, "grad_norm": 4.2132039070129395, "learning_rate": 4.4776173730463864e-06, "loss": 0.4149, "step": 21217 }, { "epoch": 69.5672131147541, "grad_norm": 4.608546257019043, "learning_rate": 4.476732123514771e-06, "loss": 0.2665, "step": 21218 }, { "epoch": 69.57049180327868, "grad_norm": 6.479859828948975, "learning_rate": 4.475846936263748e-06, "loss": 0.2544, "step": 21219 }, { "epoch": 69.57377049180327, "grad_norm": 6.534286975860596, "learning_rate": 4.4749618113032945e-06, "loss": 0.271, "step": 21220 }, { "epoch": 69.57704918032788, "grad_norm": 6.926113605499268, "learning_rate": 4.474076748643391e-06, "loss": 0.2688, "step": 21221 }, { "epoch": 69.58032786885246, "grad_norm": 4.632765769958496, "learning_rate": 4.4731917482940135e-06, "loss": 0.2641, "step": 21222 }, { "epoch": 69.58360655737705, "grad_norm": 4.368277072906494, "learning_rate": 4.47230681026515e-06, "loss": 0.436, "step": 21223 }, { "epoch": 69.58688524590164, "grad_norm": 5.488447666168213, "learning_rate": 4.4714219345667745e-06, "loss": 0.4718, "step": 21224 }, { "epoch": 69.59016393442623, "grad_norm": 6.240122318267822, "learning_rate": 4.470537121208864e-06, "loss": 0.4336, "step": 21225 }, { "epoch": 69.59344262295082, "grad_norm": 5.483293056488037, "learning_rate": 4.4696523702013935e-06, "loss": 0.3356, "step": 21226 }, { "epoch": 69.5967213114754, "grad_norm": 4.303501605987549, "learning_rate": 4.468767681554347e-06, "loss": 0.372, "step": 21227 }, { "epoch": 69.6, "grad_norm": 6.31109094619751, "learning_rate": 4.467883055277696e-06, "loss": 0.4161, "step": 21228 }, { "epoch": 69.6032786885246, "grad_norm": 7.333486080169678, "learning_rate": 4.466998491381413e-06, "loss": 0.2979, "step": 21229 }, { "epoch": 69.60655737704919, "grad_norm": 4.701954364776611, "learning_rate": 4.466113989875478e-06, "loss": 0.5113, "step": 21230 }, { "epoch": 69.60983606557377, "grad_norm": 3.9182865619659424, "learning_rate": 4.465229550769856e-06, "loss": 0.2888, "step": 21231 }, { "epoch": 69.61311475409836, "grad_norm": 6.017703056335449, "learning_rate": 4.46434517407453e-06, "loss": 0.3058, "step": 21232 }, { "epoch": 69.61639344262295, "grad_norm": 4.776162147521973, "learning_rate": 4.463460859799468e-06, "loss": 0.2726, "step": 21233 }, { "epoch": 69.61967213114754, "grad_norm": 4.865444183349609, "learning_rate": 4.462576607954641e-06, "loss": 0.4992, "step": 21234 }, { "epoch": 69.62295081967213, "grad_norm": 5.82581090927124, "learning_rate": 4.461692418550017e-06, "loss": 0.4824, "step": 21235 }, { "epoch": 69.62622950819672, "grad_norm": 5.958526611328125, "learning_rate": 4.460808291595572e-06, "loss": 0.5824, "step": 21236 }, { "epoch": 69.62950819672132, "grad_norm": 5.230915069580078, "learning_rate": 4.459924227101273e-06, "loss": 0.382, "step": 21237 }, { "epoch": 69.6327868852459, "grad_norm": 5.1781134605407715, "learning_rate": 4.459040225077086e-06, "loss": 0.2747, "step": 21238 }, { "epoch": 69.6360655737705, "grad_norm": 4.826895236968994, "learning_rate": 4.458156285532984e-06, "loss": 0.513, "step": 21239 }, { "epoch": 69.63934426229508, "grad_norm": 5.543557167053223, "learning_rate": 4.457272408478933e-06, "loss": 0.5054, "step": 21240 }, { "epoch": 69.64262295081967, "grad_norm": 4.748877048492432, "learning_rate": 4.456388593924897e-06, "loss": 0.4847, "step": 21241 }, { "epoch": 69.64590163934426, "grad_norm": 4.2666215896606445, "learning_rate": 4.455504841880842e-06, "loss": 0.3828, "step": 21242 }, { "epoch": 69.64918032786885, "grad_norm": 5.813002586364746, "learning_rate": 4.454621152356737e-06, "loss": 0.2919, "step": 21243 }, { "epoch": 69.65245901639344, "grad_norm": 4.762764930725098, "learning_rate": 4.453737525362544e-06, "loss": 0.2437, "step": 21244 }, { "epoch": 69.65573770491804, "grad_norm": 5.712033748626709, "learning_rate": 4.452853960908224e-06, "loss": 0.4426, "step": 21245 }, { "epoch": 69.65901639344263, "grad_norm": 4.434603691101074, "learning_rate": 4.4519704590037485e-06, "loss": 0.3971, "step": 21246 }, { "epoch": 69.66229508196722, "grad_norm": 12.527069091796875, "learning_rate": 4.451087019659073e-06, "loss": 0.4429, "step": 21247 }, { "epoch": 69.6655737704918, "grad_norm": 4.903153896331787, "learning_rate": 4.450203642884156e-06, "loss": 0.2192, "step": 21248 }, { "epoch": 69.66885245901639, "grad_norm": 13.3887300491333, "learning_rate": 4.449320328688969e-06, "loss": 0.655, "step": 21249 }, { "epoch": 69.67213114754098, "grad_norm": 4.627981662750244, "learning_rate": 4.448437077083465e-06, "loss": 0.2946, "step": 21250 }, { "epoch": 69.67540983606557, "grad_norm": 6.184403419494629, "learning_rate": 4.447553888077606e-06, "loss": 0.3133, "step": 21251 }, { "epoch": 69.67868852459016, "grad_norm": 6.973102569580078, "learning_rate": 4.446670761681345e-06, "loss": 0.445, "step": 21252 }, { "epoch": 69.68196721311476, "grad_norm": 4.37593936920166, "learning_rate": 4.445787697904651e-06, "loss": 0.4823, "step": 21253 }, { "epoch": 69.68524590163935, "grad_norm": 5.376957893371582, "learning_rate": 4.444904696757474e-06, "loss": 0.3442, "step": 21254 }, { "epoch": 69.68852459016394, "grad_norm": 6.251377582550049, "learning_rate": 4.444021758249774e-06, "loss": 0.4936, "step": 21255 }, { "epoch": 69.69180327868852, "grad_norm": 6.119541645050049, "learning_rate": 4.443138882391503e-06, "loss": 0.6882, "step": 21256 }, { "epoch": 69.69508196721311, "grad_norm": 5.302008152008057, "learning_rate": 4.442256069192617e-06, "loss": 0.5785, "step": 21257 }, { "epoch": 69.6983606557377, "grad_norm": 4.021934509277344, "learning_rate": 4.4413733186630755e-06, "loss": 0.5805, "step": 21258 }, { "epoch": 69.70163934426229, "grad_norm": 5.986817836761475, "learning_rate": 4.440490630812829e-06, "loss": 0.2342, "step": 21259 }, { "epoch": 69.70491803278688, "grad_norm": 5.288819313049316, "learning_rate": 4.439608005651832e-06, "loss": 0.4482, "step": 21260 }, { "epoch": 69.70819672131148, "grad_norm": 5.534578323364258, "learning_rate": 4.4387254431900314e-06, "loss": 0.5908, "step": 21261 }, { "epoch": 69.71147540983607, "grad_norm": 5.634636402130127, "learning_rate": 4.437842943437389e-06, "loss": 0.457, "step": 21262 }, { "epoch": 69.71475409836066, "grad_norm": 4.904442310333252, "learning_rate": 4.436960506403849e-06, "loss": 0.5031, "step": 21263 }, { "epoch": 69.71803278688525, "grad_norm": 6.8283371925354, "learning_rate": 4.436078132099364e-06, "loss": 0.525, "step": 21264 }, { "epoch": 69.72131147540983, "grad_norm": 5.091111660003662, "learning_rate": 4.43519582053388e-06, "loss": 0.3532, "step": 21265 }, { "epoch": 69.72459016393442, "grad_norm": 7.64028263092041, "learning_rate": 4.434313571717352e-06, "loss": 0.2598, "step": 21266 }, { "epoch": 69.72786885245901, "grad_norm": 5.8445963859558105, "learning_rate": 4.433431385659726e-06, "loss": 0.3048, "step": 21267 }, { "epoch": 69.73114754098361, "grad_norm": 5.35722017288208, "learning_rate": 4.432549262370948e-06, "loss": 0.4721, "step": 21268 }, { "epoch": 69.7344262295082, "grad_norm": 4.936645030975342, "learning_rate": 4.431667201860965e-06, "loss": 0.4227, "step": 21269 }, { "epoch": 69.73770491803279, "grad_norm": 5.279725551605225, "learning_rate": 4.430785204139722e-06, "loss": 0.537, "step": 21270 }, { "epoch": 69.74098360655738, "grad_norm": 4.97439432144165, "learning_rate": 4.4299032692171695e-06, "loss": 0.5076, "step": 21271 }, { "epoch": 69.74426229508197, "grad_norm": 5.259214878082275, "learning_rate": 4.429021397103249e-06, "loss": 0.4161, "step": 21272 }, { "epoch": 69.74754098360656, "grad_norm": 5.2230730056762695, "learning_rate": 4.428139587807905e-06, "loss": 0.3854, "step": 21273 }, { "epoch": 69.75081967213114, "grad_norm": 5.870216369628906, "learning_rate": 4.427257841341076e-06, "loss": 0.3732, "step": 21274 }, { "epoch": 69.75409836065573, "grad_norm": 4.535610675811768, "learning_rate": 4.426376157712713e-06, "loss": 0.424, "step": 21275 }, { "epoch": 69.75737704918033, "grad_norm": 4.595137119293213, "learning_rate": 4.425494536932754e-06, "loss": 0.3781, "step": 21276 }, { "epoch": 69.76065573770492, "grad_norm": 5.416951656341553, "learning_rate": 4.42461297901114e-06, "loss": 0.3521, "step": 21277 }, { "epoch": 69.76393442622951, "grad_norm": 4.064438343048096, "learning_rate": 4.423731483957808e-06, "loss": 0.3771, "step": 21278 }, { "epoch": 69.7672131147541, "grad_norm": 5.2983808517456055, "learning_rate": 4.422850051782706e-06, "loss": 0.3827, "step": 21279 }, { "epoch": 69.77049180327869, "grad_norm": 6.3126983642578125, "learning_rate": 4.4219686824957666e-06, "loss": 0.4451, "step": 21280 }, { "epoch": 69.77377049180328, "grad_norm": 5.46948766708374, "learning_rate": 4.421087376106931e-06, "loss": 0.4163, "step": 21281 }, { "epoch": 69.77704918032786, "grad_norm": 6.758310794830322, "learning_rate": 4.4202061326261355e-06, "loss": 0.7541, "step": 21282 }, { "epoch": 69.78032786885245, "grad_norm": 5.040554523468018, "learning_rate": 4.419324952063314e-06, "loss": 0.6329, "step": 21283 }, { "epoch": 69.78360655737706, "grad_norm": 5.00167989730835, "learning_rate": 4.41844383442841e-06, "loss": 0.2964, "step": 21284 }, { "epoch": 69.78688524590164, "grad_norm": 38.90119171142578, "learning_rate": 4.417562779731355e-06, "loss": 0.5049, "step": 21285 }, { "epoch": 69.79016393442623, "grad_norm": 8.162191390991211, "learning_rate": 4.416681787982084e-06, "loss": 0.417, "step": 21286 }, { "epoch": 69.79344262295082, "grad_norm": 7.114770412445068, "learning_rate": 4.415800859190527e-06, "loss": 0.2902, "step": 21287 }, { "epoch": 69.79672131147541, "grad_norm": 5.572673320770264, "learning_rate": 4.414919993366625e-06, "loss": 0.6086, "step": 21288 }, { "epoch": 69.8, "grad_norm": 5.5461883544921875, "learning_rate": 4.414039190520308e-06, "loss": 0.2886, "step": 21289 }, { "epoch": 69.80327868852459, "grad_norm": 7.251645565032959, "learning_rate": 4.413158450661507e-06, "loss": 0.2949, "step": 21290 }, { "epoch": 69.80655737704917, "grad_norm": 5.005259990692139, "learning_rate": 4.412277773800149e-06, "loss": 0.3258, "step": 21291 }, { "epoch": 69.80983606557378, "grad_norm": 5.3972859382629395, "learning_rate": 4.411397159946172e-06, "loss": 0.3586, "step": 21292 }, { "epoch": 69.81311475409836, "grad_norm": 27.506053924560547, "learning_rate": 4.410516609109504e-06, "loss": 0.4525, "step": 21293 }, { "epoch": 69.81639344262295, "grad_norm": 6.300683975219727, "learning_rate": 4.409636121300068e-06, "loss": 0.3757, "step": 21294 }, { "epoch": 69.81967213114754, "grad_norm": 5.347556114196777, "learning_rate": 4.408755696527803e-06, "loss": 0.4253, "step": 21295 }, { "epoch": 69.82295081967213, "grad_norm": 4.368635177612305, "learning_rate": 4.407875334802628e-06, "loss": 0.3837, "step": 21296 }, { "epoch": 69.82622950819672, "grad_norm": 6.731258869171143, "learning_rate": 4.406995036134471e-06, "loss": 0.2898, "step": 21297 }, { "epoch": 69.8295081967213, "grad_norm": 5.01181697845459, "learning_rate": 4.4061148005332635e-06, "loss": 0.5631, "step": 21298 }, { "epoch": 69.8327868852459, "grad_norm": 7.2702956199646, "learning_rate": 4.405234628008929e-06, "loss": 0.2501, "step": 21299 }, { "epoch": 69.8360655737705, "grad_norm": 6.689379692077637, "learning_rate": 4.40435451857139e-06, "loss": 0.4406, "step": 21300 }, { "epoch": 69.83934426229509, "grad_norm": 5.501379013061523, "learning_rate": 4.4034744722305674e-06, "loss": 0.5971, "step": 21301 }, { "epoch": 69.84262295081967, "grad_norm": 8.424420356750488, "learning_rate": 4.402594488996393e-06, "loss": 0.4611, "step": 21302 }, { "epoch": 69.84590163934426, "grad_norm": 4.9261932373046875, "learning_rate": 4.4017145688787866e-06, "loss": 0.3562, "step": 21303 }, { "epoch": 69.84918032786885, "grad_norm": 5.39015007019043, "learning_rate": 4.400834711887669e-06, "loss": 0.4521, "step": 21304 }, { "epoch": 69.85245901639344, "grad_norm": 5.143743515014648, "learning_rate": 4.3999549180329584e-06, "loss": 0.3595, "step": 21305 }, { "epoch": 69.85573770491803, "grad_norm": 7.45655632019043, "learning_rate": 4.399075187324582e-06, "loss": 0.2267, "step": 21306 }, { "epoch": 69.85901639344263, "grad_norm": 7.4210052490234375, "learning_rate": 4.398195519772456e-06, "loss": 0.5057, "step": 21307 }, { "epoch": 69.86229508196722, "grad_norm": 4.533053874969482, "learning_rate": 4.3973159153865e-06, "loss": 0.2602, "step": 21308 }, { "epoch": 69.8655737704918, "grad_norm": 5.112777233123779, "learning_rate": 4.396436374176628e-06, "loss": 0.2986, "step": 21309 }, { "epoch": 69.8688524590164, "grad_norm": 4.945201396942139, "learning_rate": 4.395556896152767e-06, "loss": 0.4414, "step": 21310 }, { "epoch": 69.87213114754098, "grad_norm": 6.824235439300537, "learning_rate": 4.394677481324828e-06, "loss": 0.6872, "step": 21311 }, { "epoch": 69.87540983606557, "grad_norm": 5.52367639541626, "learning_rate": 4.3937981297027295e-06, "loss": 0.5867, "step": 21312 }, { "epoch": 69.87868852459016, "grad_norm": 5.692975044250488, "learning_rate": 4.392918841296382e-06, "loss": 0.4446, "step": 21313 }, { "epoch": 69.88196721311475, "grad_norm": 6.147720813751221, "learning_rate": 4.392039616115708e-06, "loss": 0.4459, "step": 21314 }, { "epoch": 69.88524590163935, "grad_norm": 5.6288886070251465, "learning_rate": 4.391160454170619e-06, "loss": 0.4837, "step": 21315 }, { "epoch": 69.88852459016394, "grad_norm": 10.760040283203125, "learning_rate": 4.390281355471027e-06, "loss": 0.5098, "step": 21316 }, { "epoch": 69.89180327868853, "grad_norm": 5.9485087394714355, "learning_rate": 4.389402320026846e-06, "loss": 0.277, "step": 21317 }, { "epoch": 69.89508196721312, "grad_norm": 5.41927433013916, "learning_rate": 4.388523347847983e-06, "loss": 0.363, "step": 21318 }, { "epoch": 69.8983606557377, "grad_norm": 6.204504013061523, "learning_rate": 4.387644438944358e-06, "loss": 0.4267, "step": 21319 }, { "epoch": 69.90163934426229, "grad_norm": 4.381925582885742, "learning_rate": 4.386765593325876e-06, "loss": 0.2517, "step": 21320 }, { "epoch": 69.90491803278688, "grad_norm": 4.890461444854736, "learning_rate": 4.385886811002449e-06, "loss": 0.7871, "step": 21321 }, { "epoch": 69.90819672131147, "grad_norm": 6.998946666717529, "learning_rate": 4.385008091983981e-06, "loss": 0.4828, "step": 21322 }, { "epoch": 69.91147540983607, "grad_norm": 5.12752628326416, "learning_rate": 4.38412943628039e-06, "loss": 0.3986, "step": 21323 }, { "epoch": 69.91475409836066, "grad_norm": 5.033581733703613, "learning_rate": 4.383250843901577e-06, "loss": 0.4107, "step": 21324 }, { "epoch": 69.91803278688525, "grad_norm": 8.881035804748535, "learning_rate": 4.382372314857452e-06, "loss": 0.5342, "step": 21325 }, { "epoch": 69.92131147540984, "grad_norm": 9.549781799316406, "learning_rate": 4.381493849157916e-06, "loss": 0.3849, "step": 21326 }, { "epoch": 69.92459016393443, "grad_norm": 8.728013038635254, "learning_rate": 4.380615446812883e-06, "loss": 0.4098, "step": 21327 }, { "epoch": 69.92786885245901, "grad_norm": 5.278473854064941, "learning_rate": 4.379737107832253e-06, "loss": 0.4149, "step": 21328 }, { "epoch": 69.9311475409836, "grad_norm": 4.095138072967529, "learning_rate": 4.378858832225931e-06, "loss": 0.4453, "step": 21329 }, { "epoch": 69.93442622950819, "grad_norm": 6.403100967407227, "learning_rate": 4.377980620003819e-06, "loss": 0.4706, "step": 21330 }, { "epoch": 69.9377049180328, "grad_norm": 4.590975284576416, "learning_rate": 4.377102471175818e-06, "loss": 0.6645, "step": 21331 }, { "epoch": 69.94098360655738, "grad_norm": 5.102485179901123, "learning_rate": 4.376224385751837e-06, "loss": 0.2506, "step": 21332 }, { "epoch": 69.94426229508197, "grad_norm": 5.473907470703125, "learning_rate": 4.375346363741772e-06, "loss": 0.5724, "step": 21333 }, { "epoch": 69.94754098360656, "grad_norm": 5.2781596183776855, "learning_rate": 4.374468405155528e-06, "loss": 0.5521, "step": 21334 }, { "epoch": 69.95081967213115, "grad_norm": 4.91156005859375, "learning_rate": 4.3735905100029956e-06, "loss": 0.4802, "step": 21335 }, { "epoch": 69.95409836065573, "grad_norm": 14.901738166809082, "learning_rate": 4.372712678294084e-06, "loss": 0.8295, "step": 21336 }, { "epoch": 69.95737704918032, "grad_norm": 13.332070350646973, "learning_rate": 4.371834910038688e-06, "loss": 0.347, "step": 21337 }, { "epoch": 69.96065573770491, "grad_norm": 5.859004497528076, "learning_rate": 4.370957205246705e-06, "loss": 0.4129, "step": 21338 }, { "epoch": 69.96393442622951, "grad_norm": 4.7196269035339355, "learning_rate": 4.370079563928029e-06, "loss": 0.4828, "step": 21339 }, { "epoch": 69.9672131147541, "grad_norm": 7.762723445892334, "learning_rate": 4.369201986092564e-06, "loss": 0.2708, "step": 21340 }, { "epoch": 69.97049180327869, "grad_norm": 6.3835954666137695, "learning_rate": 4.368324471750201e-06, "loss": 0.527, "step": 21341 }, { "epoch": 69.97377049180328, "grad_norm": 6.289803981781006, "learning_rate": 4.367447020910833e-06, "loss": 0.3167, "step": 21342 }, { "epoch": 69.97704918032787, "grad_norm": 6.952024936676025, "learning_rate": 4.366569633584359e-06, "loss": 0.4678, "step": 21343 }, { "epoch": 69.98032786885246, "grad_norm": 5.190078258514404, "learning_rate": 4.365692309780664e-06, "loss": 0.3538, "step": 21344 }, { "epoch": 69.98360655737704, "grad_norm": 4.92171573638916, "learning_rate": 4.364815049509651e-06, "loss": 0.3474, "step": 21345 }, { "epoch": 69.98688524590163, "grad_norm": 5.6539998054504395, "learning_rate": 4.363937852781207e-06, "loss": 0.3078, "step": 21346 }, { "epoch": 69.99016393442623, "grad_norm": 5.975747585296631, "learning_rate": 4.363060719605221e-06, "loss": 0.6105, "step": 21347 }, { "epoch": 69.99344262295082, "grad_norm": 5.024999141693115, "learning_rate": 4.362183649991589e-06, "loss": 0.2559, "step": 21348 }, { "epoch": 69.99672131147541, "grad_norm": 5.061413288116455, "learning_rate": 4.361306643950199e-06, "loss": 0.3872, "step": 21349 }, { "epoch": 70.0, "grad_norm": 4.469847679138184, "learning_rate": 4.360429701490935e-06, "loss": 0.161, "step": 21350 }, { "epoch": 70.00327868852459, "grad_norm": 6.420072555541992, "learning_rate": 4.359552822623693e-06, "loss": 0.3633, "step": 21351 }, { "epoch": 70.00655737704918, "grad_norm": 6.349737644195557, "learning_rate": 4.358676007358359e-06, "loss": 0.5165, "step": 21352 }, { "epoch": 70.00983606557377, "grad_norm": 5.054539680480957, "learning_rate": 4.357799255704813e-06, "loss": 0.6667, "step": 21353 }, { "epoch": 70.01311475409837, "grad_norm": 4.820802211761475, "learning_rate": 4.356922567672952e-06, "loss": 0.3626, "step": 21354 }, { "epoch": 70.01639344262296, "grad_norm": 5.309854984283447, "learning_rate": 4.356045943272656e-06, "loss": 0.262, "step": 21355 }, { "epoch": 70.01967213114754, "grad_norm": 11.244443893432617, "learning_rate": 4.35516938251381e-06, "loss": 0.3445, "step": 21356 }, { "epoch": 70.02295081967213, "grad_norm": 4.481125354766846, "learning_rate": 4.354292885406295e-06, "loss": 0.3616, "step": 21357 }, { "epoch": 70.02622950819672, "grad_norm": 5.223537445068359, "learning_rate": 4.353416451960001e-06, "loss": 0.2925, "step": 21358 }, { "epoch": 70.02950819672131, "grad_norm": 4.44478178024292, "learning_rate": 4.352540082184809e-06, "loss": 0.445, "step": 21359 }, { "epoch": 70.0327868852459, "grad_norm": 4.4014410972595215, "learning_rate": 4.351663776090598e-06, "loss": 0.4087, "step": 21360 }, { "epoch": 70.03606557377049, "grad_norm": 4.218769550323486, "learning_rate": 4.3507875336872464e-06, "loss": 0.6363, "step": 21361 }, { "epoch": 70.03934426229509, "grad_norm": 5.442562580108643, "learning_rate": 4.3499113549846436e-06, "loss": 0.4884, "step": 21362 }, { "epoch": 70.04262295081968, "grad_norm": 11.772254943847656, "learning_rate": 4.349035239992665e-06, "loss": 0.3444, "step": 21363 }, { "epoch": 70.04590163934427, "grad_norm": 4.712497711181641, "learning_rate": 4.348159188721189e-06, "loss": 0.3598, "step": 21364 }, { "epoch": 70.04918032786885, "grad_norm": 5.064521312713623, "learning_rate": 4.347283201180092e-06, "loss": 0.2391, "step": 21365 }, { "epoch": 70.05245901639344, "grad_norm": 5.3177995681762695, "learning_rate": 4.346407277379258e-06, "loss": 0.3708, "step": 21366 }, { "epoch": 70.05573770491803, "grad_norm": 5.556114196777344, "learning_rate": 4.345531417328559e-06, "loss": 0.2735, "step": 21367 }, { "epoch": 70.05901639344262, "grad_norm": 4.125637531280518, "learning_rate": 4.344655621037873e-06, "loss": 0.3441, "step": 21368 }, { "epoch": 70.0622950819672, "grad_norm": 5.993211269378662, "learning_rate": 4.343779888517074e-06, "loss": 0.4664, "step": 21369 }, { "epoch": 70.06557377049181, "grad_norm": 7.744433403015137, "learning_rate": 4.342904219776036e-06, "loss": 0.5245, "step": 21370 }, { "epoch": 70.0688524590164, "grad_norm": 6.089824676513672, "learning_rate": 4.342028614824637e-06, "loss": 0.2066, "step": 21371 }, { "epoch": 70.07213114754099, "grad_norm": 4.502997398376465, "learning_rate": 4.3411530736727495e-06, "loss": 0.4, "step": 21372 }, { "epoch": 70.07540983606557, "grad_norm": 4.128888130187988, "learning_rate": 4.340277596330243e-06, "loss": 0.5377, "step": 21373 }, { "epoch": 70.07868852459016, "grad_norm": 5.276333332061768, "learning_rate": 4.33940218280699e-06, "loss": 0.3867, "step": 21374 }, { "epoch": 70.08196721311475, "grad_norm": 5.460389614105225, "learning_rate": 4.338526833112865e-06, "loss": 0.2902, "step": 21375 }, { "epoch": 70.08524590163934, "grad_norm": 6.765847206115723, "learning_rate": 4.3376515472577365e-06, "loss": 0.6093, "step": 21376 }, { "epoch": 70.08852459016393, "grad_norm": 5.036906719207764, "learning_rate": 4.336776325251474e-06, "loss": 0.263, "step": 21377 }, { "epoch": 70.09180327868853, "grad_norm": 4.585904598236084, "learning_rate": 4.335901167103943e-06, "loss": 0.4181, "step": 21378 }, { "epoch": 70.09508196721312, "grad_norm": 5.191755771636963, "learning_rate": 4.335026072825021e-06, "loss": 0.3952, "step": 21379 }, { "epoch": 70.09836065573771, "grad_norm": 6.117591381072998, "learning_rate": 4.3341510424245685e-06, "loss": 0.3635, "step": 21380 }, { "epoch": 70.1016393442623, "grad_norm": 7.732686519622803, "learning_rate": 4.333276075912454e-06, "loss": 0.4989, "step": 21381 }, { "epoch": 70.10491803278688, "grad_norm": 5.173923492431641, "learning_rate": 4.332401173298544e-06, "loss": 0.6462, "step": 21382 }, { "epoch": 70.10819672131147, "grad_norm": 4.312822341918945, "learning_rate": 4.331526334592699e-06, "loss": 0.2744, "step": 21383 }, { "epoch": 70.11147540983606, "grad_norm": 5.829410076141357, "learning_rate": 4.330651559804792e-06, "loss": 0.6664, "step": 21384 }, { "epoch": 70.11475409836065, "grad_norm": 4.954040050506592, "learning_rate": 4.329776848944684e-06, "loss": 0.3801, "step": 21385 }, { "epoch": 70.11803278688525, "grad_norm": 4.658535003662109, "learning_rate": 4.328902202022238e-06, "loss": 0.3622, "step": 21386 }, { "epoch": 70.12131147540984, "grad_norm": 5.251931667327881, "learning_rate": 4.328027619047312e-06, "loss": 0.6004, "step": 21387 }, { "epoch": 70.12459016393443, "grad_norm": 9.001245498657227, "learning_rate": 4.327153100029776e-06, "loss": 0.3854, "step": 21388 }, { "epoch": 70.12786885245902, "grad_norm": 4.503814220428467, "learning_rate": 4.326278644979487e-06, "loss": 0.4633, "step": 21389 }, { "epoch": 70.1311475409836, "grad_norm": 4.360562801361084, "learning_rate": 4.325404253906306e-06, "loss": 0.2488, "step": 21390 }, { "epoch": 70.1344262295082, "grad_norm": 6.472097873687744, "learning_rate": 4.3245299268200865e-06, "loss": 0.2789, "step": 21391 }, { "epoch": 70.13770491803278, "grad_norm": 7.096673488616943, "learning_rate": 4.323655663730699e-06, "loss": 0.5775, "step": 21392 }, { "epoch": 70.14098360655737, "grad_norm": 6.276737213134766, "learning_rate": 4.322781464647995e-06, "loss": 0.3965, "step": 21393 }, { "epoch": 70.14426229508197, "grad_norm": 6.858421802520752, "learning_rate": 4.321907329581833e-06, "loss": 0.2701, "step": 21394 }, { "epoch": 70.14754098360656, "grad_norm": 6.754164695739746, "learning_rate": 4.321033258542068e-06, "loss": 0.2939, "step": 21395 }, { "epoch": 70.15081967213115, "grad_norm": 4.549800395965576, "learning_rate": 4.320159251538556e-06, "loss": 0.4295, "step": 21396 }, { "epoch": 70.15409836065574, "grad_norm": 5.721160411834717, "learning_rate": 4.319285308581156e-06, "loss": 0.3046, "step": 21397 }, { "epoch": 70.15737704918033, "grad_norm": 5.336126804351807, "learning_rate": 4.318411429679722e-06, "loss": 0.6188, "step": 21398 }, { "epoch": 70.16065573770491, "grad_norm": 4.483874797821045, "learning_rate": 4.317537614844101e-06, "loss": 0.4829, "step": 21399 }, { "epoch": 70.1639344262295, "grad_norm": 4.713897228240967, "learning_rate": 4.316663864084158e-06, "loss": 0.5073, "step": 21400 }, { "epoch": 70.1672131147541, "grad_norm": 6.2069411277771, "learning_rate": 4.3157901774097375e-06, "loss": 0.5508, "step": 21401 }, { "epoch": 70.1704918032787, "grad_norm": 7.472577095031738, "learning_rate": 4.314916554830693e-06, "loss": 0.4807, "step": 21402 }, { "epoch": 70.17377049180328, "grad_norm": 4.694759368896484, "learning_rate": 4.314042996356872e-06, "loss": 0.5273, "step": 21403 }, { "epoch": 70.17704918032787, "grad_norm": 5.414965629577637, "learning_rate": 4.313169501998132e-06, "loss": 0.3316, "step": 21404 }, { "epoch": 70.18032786885246, "grad_norm": 4.722665786743164, "learning_rate": 4.312296071764318e-06, "loss": 0.2103, "step": 21405 }, { "epoch": 70.18360655737705, "grad_norm": 5.5061259269714355, "learning_rate": 4.311422705665275e-06, "loss": 0.4647, "step": 21406 }, { "epoch": 70.18688524590164, "grad_norm": 5.419296741485596, "learning_rate": 4.310549403710862e-06, "loss": 0.386, "step": 21407 }, { "epoch": 70.19016393442622, "grad_norm": 7.719634532928467, "learning_rate": 4.309676165910917e-06, "loss": 0.3042, "step": 21408 }, { "epoch": 70.19344262295083, "grad_norm": 6.8290276527404785, "learning_rate": 4.308802992275288e-06, "loss": 0.5577, "step": 21409 }, { "epoch": 70.19672131147541, "grad_norm": 10.119503021240234, "learning_rate": 4.307929882813826e-06, "loss": 0.5459, "step": 21410 }, { "epoch": 70.2, "grad_norm": 8.444686889648438, "learning_rate": 4.307056837536373e-06, "loss": 0.651, "step": 21411 }, { "epoch": 70.20327868852459, "grad_norm": 4.671285152435303, "learning_rate": 4.306183856452772e-06, "loss": 0.4531, "step": 21412 }, { "epoch": 70.20655737704918, "grad_norm": 6.126816272735596, "learning_rate": 4.305310939572866e-06, "loss": 0.5723, "step": 21413 }, { "epoch": 70.20983606557377, "grad_norm": 6.032673358917236, "learning_rate": 4.304438086906502e-06, "loss": 0.6659, "step": 21414 }, { "epoch": 70.21311475409836, "grad_norm": 5.7820305824279785, "learning_rate": 4.303565298463523e-06, "loss": 0.5584, "step": 21415 }, { "epoch": 70.21639344262294, "grad_norm": 5.327169895172119, "learning_rate": 4.302692574253766e-06, "loss": 0.3145, "step": 21416 }, { "epoch": 70.21967213114755, "grad_norm": 7.927241802215576, "learning_rate": 4.301819914287072e-06, "loss": 0.4313, "step": 21417 }, { "epoch": 70.22295081967214, "grad_norm": 4.143240928649902, "learning_rate": 4.300947318573285e-06, "loss": 0.4556, "step": 21418 }, { "epoch": 70.22622950819672, "grad_norm": 7.539348125457764, "learning_rate": 4.300074787122244e-06, "loss": 0.4148, "step": 21419 }, { "epoch": 70.22950819672131, "grad_norm": 6.687716960906982, "learning_rate": 4.299202319943785e-06, "loss": 0.3542, "step": 21420 }, { "epoch": 70.2327868852459, "grad_norm": 6.490957736968994, "learning_rate": 4.298329917047748e-06, "loss": 0.6141, "step": 21421 }, { "epoch": 70.23606557377049, "grad_norm": 6.353403091430664, "learning_rate": 4.2974575784439655e-06, "loss": 0.6544, "step": 21422 }, { "epoch": 70.23934426229508, "grad_norm": 4.9611358642578125, "learning_rate": 4.296585304142281e-06, "loss": 0.3101, "step": 21423 }, { "epoch": 70.24262295081967, "grad_norm": 5.9781813621521, "learning_rate": 4.295713094152528e-06, "loss": 0.4287, "step": 21424 }, { "epoch": 70.24590163934427, "grad_norm": 4.48599100112915, "learning_rate": 4.294840948484541e-06, "loss": 0.2516, "step": 21425 }, { "epoch": 70.24918032786886, "grad_norm": 3.9169251918792725, "learning_rate": 4.293968867148149e-06, "loss": 0.4717, "step": 21426 }, { "epoch": 70.25245901639344, "grad_norm": 11.557562828063965, "learning_rate": 4.293096850153197e-06, "loss": 0.4307, "step": 21427 }, { "epoch": 70.25573770491803, "grad_norm": 5.746267795562744, "learning_rate": 4.292224897509511e-06, "loss": 0.4943, "step": 21428 }, { "epoch": 70.25901639344262, "grad_norm": 5.030760288238525, "learning_rate": 4.291353009226922e-06, "loss": 0.5042, "step": 21429 }, { "epoch": 70.26229508196721, "grad_norm": 4.85143518447876, "learning_rate": 4.29048118531526e-06, "loss": 0.5005, "step": 21430 }, { "epoch": 70.2655737704918, "grad_norm": 4.83386754989624, "learning_rate": 4.2896094257843645e-06, "loss": 0.3994, "step": 21431 }, { "epoch": 70.26885245901639, "grad_norm": 4.697059154510498, "learning_rate": 4.288737730644059e-06, "loss": 0.2537, "step": 21432 }, { "epoch": 70.27213114754099, "grad_norm": 4.721714496612549, "learning_rate": 4.287866099904175e-06, "loss": 0.4267, "step": 21433 }, { "epoch": 70.27540983606558, "grad_norm": 3.960036516189575, "learning_rate": 4.2869945335745375e-06, "loss": 0.3558, "step": 21434 }, { "epoch": 70.27868852459017, "grad_norm": 6.096299648284912, "learning_rate": 4.286123031664975e-06, "loss": 0.2353, "step": 21435 }, { "epoch": 70.28196721311475, "grad_norm": 7.515739917755127, "learning_rate": 4.285251594185319e-06, "loss": 0.5453, "step": 21436 }, { "epoch": 70.28524590163934, "grad_norm": 4.618780136108398, "learning_rate": 4.284380221145393e-06, "loss": 0.2972, "step": 21437 }, { "epoch": 70.28852459016393, "grad_norm": 4.903985977172852, "learning_rate": 4.283508912555023e-06, "loss": 0.4386, "step": 21438 }, { "epoch": 70.29180327868852, "grad_norm": 4.280879020690918, "learning_rate": 4.282637668424031e-06, "loss": 0.1745, "step": 21439 }, { "epoch": 70.29508196721312, "grad_norm": 6.509668827056885, "learning_rate": 4.2817664887622464e-06, "loss": 0.6782, "step": 21440 }, { "epoch": 70.29836065573771, "grad_norm": 5.506170749664307, "learning_rate": 4.28089537357949e-06, "loss": 0.3701, "step": 21441 }, { "epoch": 70.3016393442623, "grad_norm": 5.270876884460449, "learning_rate": 4.280024322885585e-06, "loss": 0.469, "step": 21442 }, { "epoch": 70.30491803278689, "grad_norm": 5.445340156555176, "learning_rate": 4.279153336690349e-06, "loss": 0.3282, "step": 21443 }, { "epoch": 70.30819672131148, "grad_norm": 5.797527313232422, "learning_rate": 4.278282415003611e-06, "loss": 0.5564, "step": 21444 }, { "epoch": 70.31147540983606, "grad_norm": 7.770228385925293, "learning_rate": 4.277411557835189e-06, "loss": 0.3046, "step": 21445 }, { "epoch": 70.31475409836065, "grad_norm": 5.408121585845947, "learning_rate": 4.276540765194901e-06, "loss": 0.3582, "step": 21446 }, { "epoch": 70.31803278688524, "grad_norm": 3.951549530029297, "learning_rate": 4.275670037092566e-06, "loss": 0.2846, "step": 21447 }, { "epoch": 70.32131147540984, "grad_norm": 4.28698205947876, "learning_rate": 4.274799373538001e-06, "loss": 0.3782, "step": 21448 }, { "epoch": 70.32459016393443, "grad_norm": 5.522632598876953, "learning_rate": 4.273928774541028e-06, "loss": 0.1359, "step": 21449 }, { "epoch": 70.32786885245902, "grad_norm": 5.1890974044799805, "learning_rate": 4.273058240111463e-06, "loss": 0.2626, "step": 21450 }, { "epoch": 70.33114754098361, "grad_norm": 5.020951747894287, "learning_rate": 4.27218777025912e-06, "loss": 0.4408, "step": 21451 }, { "epoch": 70.3344262295082, "grad_norm": 6.2222113609313965, "learning_rate": 4.271317364993812e-06, "loss": 0.2243, "step": 21452 }, { "epoch": 70.33770491803278, "grad_norm": 4.903385162353516, "learning_rate": 4.270447024325361e-06, "loss": 0.7304, "step": 21453 }, { "epoch": 70.34098360655737, "grad_norm": 4.5111846923828125, "learning_rate": 4.269576748263576e-06, "loss": 0.2151, "step": 21454 }, { "epoch": 70.34426229508196, "grad_norm": 5.85819673538208, "learning_rate": 4.268706536818268e-06, "loss": 0.2118, "step": 21455 }, { "epoch": 70.34754098360656, "grad_norm": 4.98185396194458, "learning_rate": 4.267836389999257e-06, "loss": 0.3175, "step": 21456 }, { "epoch": 70.35081967213115, "grad_norm": 5.64238977432251, "learning_rate": 4.266966307816351e-06, "loss": 0.3502, "step": 21457 }, { "epoch": 70.35409836065574, "grad_norm": 4.053959369659424, "learning_rate": 4.266096290279355e-06, "loss": 0.3587, "step": 21458 }, { "epoch": 70.35737704918033, "grad_norm": 4.4460625648498535, "learning_rate": 4.265226337398091e-06, "loss": 0.5006, "step": 21459 }, { "epoch": 70.36065573770492, "grad_norm": 6.592282772064209, "learning_rate": 4.264356449182362e-06, "loss": 0.3585, "step": 21460 }, { "epoch": 70.3639344262295, "grad_norm": 6.189439296722412, "learning_rate": 4.263486625641974e-06, "loss": 0.4011, "step": 21461 }, { "epoch": 70.3672131147541, "grad_norm": 22.37807273864746, "learning_rate": 4.262616866786742e-06, "loss": 0.6697, "step": 21462 }, { "epoch": 70.37049180327868, "grad_norm": 3.994872570037842, "learning_rate": 4.2617471726264705e-06, "loss": 0.3917, "step": 21463 }, { "epoch": 70.37377049180328, "grad_norm": 5.333111763000488, "learning_rate": 4.260877543170966e-06, "loss": 0.3211, "step": 21464 }, { "epoch": 70.37704918032787, "grad_norm": 4.004075527191162, "learning_rate": 4.260007978430031e-06, "loss": 0.2245, "step": 21465 }, { "epoch": 70.38032786885246, "grad_norm": 5.057386875152588, "learning_rate": 4.259138478413477e-06, "loss": 0.4611, "step": 21466 }, { "epoch": 70.38360655737705, "grad_norm": 5.340061187744141, "learning_rate": 4.258269043131107e-06, "loss": 0.3856, "step": 21467 }, { "epoch": 70.38688524590164, "grad_norm": 6.79872465133667, "learning_rate": 4.257399672592722e-06, "loss": 0.258, "step": 21468 }, { "epoch": 70.39016393442623, "grad_norm": 12.794836044311523, "learning_rate": 4.256530366808124e-06, "loss": 0.3595, "step": 21469 }, { "epoch": 70.39344262295081, "grad_norm": 4.6695685386657715, "learning_rate": 4.255661125787121e-06, "loss": 0.3795, "step": 21470 }, { "epoch": 70.3967213114754, "grad_norm": 4.614675998687744, "learning_rate": 4.254791949539511e-06, "loss": 0.3094, "step": 21471 }, { "epoch": 70.4, "grad_norm": 4.95555305480957, "learning_rate": 4.2539228380750955e-06, "loss": 0.3661, "step": 21472 }, { "epoch": 70.4032786885246, "grad_norm": 6.081467628479004, "learning_rate": 4.253053791403674e-06, "loss": 0.3055, "step": 21473 }, { "epoch": 70.40655737704918, "grad_norm": 5.2601141929626465, "learning_rate": 4.252184809535044e-06, "loss": 0.2375, "step": 21474 }, { "epoch": 70.40983606557377, "grad_norm": 5.49569845199585, "learning_rate": 4.251315892479009e-06, "loss": 0.3411, "step": 21475 }, { "epoch": 70.41311475409836, "grad_norm": 4.712841987609863, "learning_rate": 4.250447040245365e-06, "loss": 0.4363, "step": 21476 }, { "epoch": 70.41639344262295, "grad_norm": 5.511759281158447, "learning_rate": 4.249578252843908e-06, "loss": 0.3061, "step": 21477 }, { "epoch": 70.41967213114754, "grad_norm": 5.423724174499512, "learning_rate": 4.248709530284433e-06, "loss": 0.5015, "step": 21478 }, { "epoch": 70.42295081967212, "grad_norm": 6.324258327484131, "learning_rate": 4.24784087257674e-06, "loss": 0.4625, "step": 21479 }, { "epoch": 70.42622950819673, "grad_norm": 6.327305316925049, "learning_rate": 4.246972279730622e-06, "loss": 0.5557, "step": 21480 }, { "epoch": 70.42950819672132, "grad_norm": 11.615174293518066, "learning_rate": 4.246103751755873e-06, "loss": 0.42, "step": 21481 }, { "epoch": 70.4327868852459, "grad_norm": 6.490568161010742, "learning_rate": 4.245235288662285e-06, "loss": 0.4394, "step": 21482 }, { "epoch": 70.43606557377049, "grad_norm": 5.767471790313721, "learning_rate": 4.244366890459655e-06, "loss": 0.388, "step": 21483 }, { "epoch": 70.43934426229508, "grad_norm": 5.06589937210083, "learning_rate": 4.2434985571577724e-06, "loss": 0.3843, "step": 21484 }, { "epoch": 70.44262295081967, "grad_norm": 4.664409160614014, "learning_rate": 4.242630288766428e-06, "loss": 0.2214, "step": 21485 }, { "epoch": 70.44590163934426, "grad_norm": 5.328444957733154, "learning_rate": 4.241762085295415e-06, "loss": 0.5525, "step": 21486 }, { "epoch": 70.44918032786886, "grad_norm": 6.056755065917969, "learning_rate": 4.240893946754518e-06, "loss": 0.4697, "step": 21487 }, { "epoch": 70.45245901639345, "grad_norm": 4.643509387969971, "learning_rate": 4.240025873153532e-06, "loss": 0.4826, "step": 21488 }, { "epoch": 70.45573770491804, "grad_norm": 7.345558166503906, "learning_rate": 4.2391578645022436e-06, "loss": 0.4144, "step": 21489 }, { "epoch": 70.45901639344262, "grad_norm": 6.04123592376709, "learning_rate": 4.23828992081044e-06, "loss": 0.2641, "step": 21490 }, { "epoch": 70.46229508196721, "grad_norm": 6.150730609893799, "learning_rate": 4.237422042087904e-06, "loss": 0.504, "step": 21491 }, { "epoch": 70.4655737704918, "grad_norm": 5.387292385101318, "learning_rate": 4.2365542283444305e-06, "loss": 0.3089, "step": 21492 }, { "epoch": 70.46885245901639, "grad_norm": 5.669246196746826, "learning_rate": 4.2356864795898015e-06, "loss": 0.3274, "step": 21493 }, { "epoch": 70.47213114754098, "grad_norm": 4.462072372436523, "learning_rate": 4.234818795833799e-06, "loss": 0.4964, "step": 21494 }, { "epoch": 70.47540983606558, "grad_norm": 4.911374092102051, "learning_rate": 4.233951177086207e-06, "loss": 0.5648, "step": 21495 }, { "epoch": 70.47868852459017, "grad_norm": 5.216710567474365, "learning_rate": 4.233083623356813e-06, "loss": 0.4436, "step": 21496 }, { "epoch": 70.48196721311476, "grad_norm": 9.827214241027832, "learning_rate": 4.232216134655397e-06, "loss": 0.5166, "step": 21497 }, { "epoch": 70.48524590163935, "grad_norm": 5.128528594970703, "learning_rate": 4.231348710991742e-06, "loss": 0.6469, "step": 21498 }, { "epoch": 70.48852459016393, "grad_norm": 13.306053161621094, "learning_rate": 4.2304813523756274e-06, "loss": 0.3262, "step": 21499 }, { "epoch": 70.49180327868852, "grad_norm": 9.334268569946289, "learning_rate": 4.229614058816831e-06, "loss": 0.3901, "step": 21500 }, { "epoch": 70.49508196721311, "grad_norm": 3.9698691368103027, "learning_rate": 4.22874683032514e-06, "loss": 0.3741, "step": 21501 }, { "epoch": 70.4983606557377, "grad_norm": 4.740121841430664, "learning_rate": 4.227879666910329e-06, "loss": 0.3626, "step": 21502 }, { "epoch": 70.5016393442623, "grad_norm": 5.387384414672852, "learning_rate": 4.227012568582175e-06, "loss": 0.4737, "step": 21503 }, { "epoch": 70.50491803278689, "grad_norm": 5.889023303985596, "learning_rate": 4.2261455353504546e-06, "loss": 0.4735, "step": 21504 }, { "epoch": 70.50819672131148, "grad_norm": 6.8665618896484375, "learning_rate": 4.22527856722495e-06, "loss": 0.4743, "step": 21505 }, { "epoch": 70.51147540983607, "grad_norm": 16.91988182067871, "learning_rate": 4.224411664215433e-06, "loss": 0.4709, "step": 21506 }, { "epoch": 70.51475409836065, "grad_norm": 6.681239604949951, "learning_rate": 4.2235448263316795e-06, "loss": 0.2668, "step": 21507 }, { "epoch": 70.51803278688524, "grad_norm": 3.6475296020507812, "learning_rate": 4.222678053583461e-06, "loss": 0.5099, "step": 21508 }, { "epoch": 70.52131147540983, "grad_norm": 6.224219799041748, "learning_rate": 4.2218113459805575e-06, "loss": 0.5198, "step": 21509 }, { "epoch": 70.52459016393442, "grad_norm": 5.4840087890625, "learning_rate": 4.22094470353274e-06, "loss": 0.3101, "step": 21510 }, { "epoch": 70.52786885245902, "grad_norm": 7.648414611816406, "learning_rate": 4.220078126249775e-06, "loss": 0.2913, "step": 21511 }, { "epoch": 70.53114754098361, "grad_norm": 6.965719699859619, "learning_rate": 4.219211614141443e-06, "loss": 0.4434, "step": 21512 }, { "epoch": 70.5344262295082, "grad_norm": 4.860395431518555, "learning_rate": 4.21834516721751e-06, "loss": 0.3357, "step": 21513 }, { "epoch": 70.53770491803279, "grad_norm": 4.840073108673096, "learning_rate": 4.217478785487743e-06, "loss": 0.3956, "step": 21514 }, { "epoch": 70.54098360655738, "grad_norm": 4.745328426361084, "learning_rate": 4.216612468961919e-06, "loss": 0.3588, "step": 21515 }, { "epoch": 70.54426229508196, "grad_norm": 7.449304103851318, "learning_rate": 4.215746217649803e-06, "loss": 0.4216, "step": 21516 }, { "epoch": 70.54754098360655, "grad_norm": 4.799558639526367, "learning_rate": 4.214880031561158e-06, "loss": 0.2478, "step": 21517 }, { "epoch": 70.55081967213114, "grad_norm": 6.193493366241455, "learning_rate": 4.21401391070576e-06, "loss": 0.5471, "step": 21518 }, { "epoch": 70.55409836065574, "grad_norm": 5.320930480957031, "learning_rate": 4.21314785509337e-06, "loss": 0.4719, "step": 21519 }, { "epoch": 70.55737704918033, "grad_norm": 4.5766215324401855, "learning_rate": 4.212281864733756e-06, "loss": 0.5043, "step": 21520 }, { "epoch": 70.56065573770492, "grad_norm": 4.817168712615967, "learning_rate": 4.211415939636677e-06, "loss": 0.3898, "step": 21521 }, { "epoch": 70.56393442622951, "grad_norm": 5.468932151794434, "learning_rate": 4.210550079811907e-06, "loss": 0.6142, "step": 21522 }, { "epoch": 70.5672131147541, "grad_norm": 4.508028507232666, "learning_rate": 4.209684285269203e-06, "loss": 0.4845, "step": 21523 }, { "epoch": 70.57049180327868, "grad_norm": 4.343873500823975, "learning_rate": 4.20881855601833e-06, "loss": 0.2353, "step": 21524 }, { "epoch": 70.57377049180327, "grad_norm": 6.379252910614014, "learning_rate": 4.2079528920690484e-06, "loss": 0.434, "step": 21525 }, { "epoch": 70.57704918032788, "grad_norm": 7.571066379547119, "learning_rate": 4.207087293431117e-06, "loss": 0.3786, "step": 21526 }, { "epoch": 70.58032786885246, "grad_norm": 5.027153968811035, "learning_rate": 4.206221760114303e-06, "loss": 0.7098, "step": 21527 }, { "epoch": 70.58360655737705, "grad_norm": 9.747029304504395, "learning_rate": 4.2053562921283635e-06, "loss": 0.4155, "step": 21528 }, { "epoch": 70.58688524590164, "grad_norm": 6.959203720092773, "learning_rate": 4.204490889483055e-06, "loss": 0.3516, "step": 21529 }, { "epoch": 70.59016393442623, "grad_norm": 6.1238861083984375, "learning_rate": 4.203625552188134e-06, "loss": 0.4466, "step": 21530 }, { "epoch": 70.59344262295082, "grad_norm": 4.980617523193359, "learning_rate": 4.202760280253366e-06, "loss": 0.3538, "step": 21531 }, { "epoch": 70.5967213114754, "grad_norm": 9.000509262084961, "learning_rate": 4.2018950736885046e-06, "loss": 0.3987, "step": 21532 }, { "epoch": 70.6, "grad_norm": 5.818118572235107, "learning_rate": 4.201029932503303e-06, "loss": 0.4748, "step": 21533 }, { "epoch": 70.6032786885246, "grad_norm": 5.058353424072266, "learning_rate": 4.200164856707516e-06, "loss": 0.4881, "step": 21534 }, { "epoch": 70.60655737704919, "grad_norm": 4.586209774017334, "learning_rate": 4.199299846310903e-06, "loss": 0.349, "step": 21535 }, { "epoch": 70.60983606557377, "grad_norm": 4.772475242614746, "learning_rate": 4.1984349013232165e-06, "loss": 0.3294, "step": 21536 }, { "epoch": 70.61311475409836, "grad_norm": 5.149524211883545, "learning_rate": 4.197570021754207e-06, "loss": 0.3048, "step": 21537 }, { "epoch": 70.61639344262295, "grad_norm": 6.376547813415527, "learning_rate": 4.19670520761363e-06, "loss": 0.6545, "step": 21538 }, { "epoch": 70.61967213114754, "grad_norm": 9.074631690979004, "learning_rate": 4.195840458911231e-06, "loss": 0.3851, "step": 21539 }, { "epoch": 70.62295081967213, "grad_norm": 4.325913906097412, "learning_rate": 4.194975775656771e-06, "loss": 0.3103, "step": 21540 }, { "epoch": 70.62622950819672, "grad_norm": 4.3883490562438965, "learning_rate": 4.194111157859993e-06, "loss": 0.5136, "step": 21541 }, { "epoch": 70.62950819672132, "grad_norm": 4.583677291870117, "learning_rate": 4.1932466055306495e-06, "loss": 0.451, "step": 21542 }, { "epoch": 70.6327868852459, "grad_norm": 4.50565767288208, "learning_rate": 4.192382118678484e-06, "loss": 0.5337, "step": 21543 }, { "epoch": 70.6360655737705, "grad_norm": 5.598474502563477, "learning_rate": 4.191517697313252e-06, "loss": 0.517, "step": 21544 }, { "epoch": 70.63934426229508, "grad_norm": 5.168460845947266, "learning_rate": 4.190653341444697e-06, "loss": 0.3173, "step": 21545 }, { "epoch": 70.64262295081967, "grad_norm": 5.80313777923584, "learning_rate": 4.1897890510825665e-06, "loss": 0.4916, "step": 21546 }, { "epoch": 70.64590163934426, "grad_norm": 5.421197891235352, "learning_rate": 4.188924826236601e-06, "loss": 0.3873, "step": 21547 }, { "epoch": 70.64918032786885, "grad_norm": 6.303099632263184, "learning_rate": 4.188060666916555e-06, "loss": 0.3888, "step": 21548 }, { "epoch": 70.65245901639344, "grad_norm": 4.58734655380249, "learning_rate": 4.187196573132167e-06, "loss": 0.6325, "step": 21549 }, { "epoch": 70.65573770491804, "grad_norm": 5.148384094238281, "learning_rate": 4.186332544893181e-06, "loss": 0.56, "step": 21550 }, { "epoch": 70.65901639344263, "grad_norm": 5.490029811859131, "learning_rate": 4.185468582209341e-06, "loss": 0.5246, "step": 21551 }, { "epoch": 70.66229508196722, "grad_norm": 4.310945510864258, "learning_rate": 4.184604685090386e-06, "loss": 0.2079, "step": 21552 }, { "epoch": 70.6655737704918, "grad_norm": 4.226485252380371, "learning_rate": 4.183740853546061e-06, "loss": 0.1862, "step": 21553 }, { "epoch": 70.66885245901639, "grad_norm": 5.264317512512207, "learning_rate": 4.182877087586106e-06, "loss": 0.3529, "step": 21554 }, { "epoch": 70.67213114754098, "grad_norm": 4.122071743011475, "learning_rate": 4.18201338722026e-06, "loss": 0.2157, "step": 21555 }, { "epoch": 70.67540983606557, "grad_norm": 6.1632890701293945, "learning_rate": 4.181149752458259e-06, "loss": 0.603, "step": 21556 }, { "epoch": 70.67868852459016, "grad_norm": 4.999209403991699, "learning_rate": 4.180286183309849e-06, "loss": 0.3503, "step": 21557 }, { "epoch": 70.68196721311476, "grad_norm": 4.391408920288086, "learning_rate": 4.179422679784762e-06, "loss": 0.5659, "step": 21558 }, { "epoch": 70.68524590163935, "grad_norm": 5.840440273284912, "learning_rate": 4.178559241892737e-06, "loss": 0.283, "step": 21559 }, { "epoch": 70.68852459016394, "grad_norm": 4.884553909301758, "learning_rate": 4.1776958696435045e-06, "loss": 0.3886, "step": 21560 }, { "epoch": 70.69180327868852, "grad_norm": 7.867879390716553, "learning_rate": 4.17683256304681e-06, "loss": 0.4269, "step": 21561 }, { "epoch": 70.69508196721311, "grad_norm": 6.120119094848633, "learning_rate": 4.1759693221123824e-06, "loss": 0.3295, "step": 21562 }, { "epoch": 70.6983606557377, "grad_norm": 5.075772762298584, "learning_rate": 4.175106146849951e-06, "loss": 0.4268, "step": 21563 }, { "epoch": 70.70163934426229, "grad_norm": 6.319981098175049, "learning_rate": 4.17424303726926e-06, "loss": 0.4149, "step": 21564 }, { "epoch": 70.70491803278688, "grad_norm": 5.01069974899292, "learning_rate": 4.173379993380034e-06, "loss": 0.3389, "step": 21565 }, { "epoch": 70.70819672131148, "grad_norm": 5.633805751800537, "learning_rate": 4.172517015192008e-06, "loss": 0.4378, "step": 21566 }, { "epoch": 70.71147540983607, "grad_norm": 4.6746439933776855, "learning_rate": 4.1716541027149085e-06, "loss": 0.2666, "step": 21567 }, { "epoch": 70.71475409836066, "grad_norm": 42.71180725097656, "learning_rate": 4.170791255958472e-06, "loss": 0.4431, "step": 21568 }, { "epoch": 70.71803278688525, "grad_norm": 5.709102153778076, "learning_rate": 4.169928474932424e-06, "loss": 0.3927, "step": 21569 }, { "epoch": 70.72131147540983, "grad_norm": 4.731954574584961, "learning_rate": 4.169065759646492e-06, "loss": 0.3917, "step": 21570 }, { "epoch": 70.72459016393442, "grad_norm": 5.581993103027344, "learning_rate": 4.168203110110409e-06, "loss": 0.4362, "step": 21571 }, { "epoch": 70.72786885245901, "grad_norm": 3.9312942028045654, "learning_rate": 4.167340526333901e-06, "loss": 0.2335, "step": 21572 }, { "epoch": 70.73114754098361, "grad_norm": 6.151539325714111, "learning_rate": 4.166478008326688e-06, "loss": 0.5054, "step": 21573 }, { "epoch": 70.7344262295082, "grad_norm": 4.845081329345703, "learning_rate": 4.165615556098504e-06, "loss": 0.2333, "step": 21574 }, { "epoch": 70.73770491803279, "grad_norm": 4.665171146392822, "learning_rate": 4.1647531696590714e-06, "loss": 0.7272, "step": 21575 }, { "epoch": 70.74098360655738, "grad_norm": 5.173975944519043, "learning_rate": 4.163890849018114e-06, "loss": 0.3455, "step": 21576 }, { "epoch": 70.74426229508197, "grad_norm": 5.907779216766357, "learning_rate": 4.163028594185355e-06, "loss": 0.485, "step": 21577 }, { "epoch": 70.74754098360656, "grad_norm": 5.237670421600342, "learning_rate": 4.162166405170514e-06, "loss": 0.5071, "step": 21578 }, { "epoch": 70.75081967213114, "grad_norm": 4.46929931640625, "learning_rate": 4.161304281983321e-06, "loss": 0.7064, "step": 21579 }, { "epoch": 70.75409836065573, "grad_norm": 5.3630051612854, "learning_rate": 4.1604422246334916e-06, "loss": 0.4011, "step": 21580 }, { "epoch": 70.75737704918033, "grad_norm": 3.920593738555908, "learning_rate": 4.1595802331307486e-06, "loss": 0.5174, "step": 21581 }, { "epoch": 70.76065573770492, "grad_norm": 7.970550060272217, "learning_rate": 4.158718307484808e-06, "loss": 0.3951, "step": 21582 }, { "epoch": 70.76393442622951, "grad_norm": 4.689802646636963, "learning_rate": 4.157856447705395e-06, "loss": 0.4324, "step": 21583 }, { "epoch": 70.7672131147541, "grad_norm": 10.089305877685547, "learning_rate": 4.156994653802224e-06, "loss": 0.3295, "step": 21584 }, { "epoch": 70.77049180327869, "grad_norm": 4.612941741943359, "learning_rate": 4.156132925785015e-06, "loss": 0.5122, "step": 21585 }, { "epoch": 70.77377049180328, "grad_norm": 4.996892929077148, "learning_rate": 4.155271263663479e-06, "loss": 0.5261, "step": 21586 }, { "epoch": 70.77704918032786, "grad_norm": 4.505249977111816, "learning_rate": 4.154409667447339e-06, "loss": 0.3833, "step": 21587 }, { "epoch": 70.78032786885245, "grad_norm": 4.77387809753418, "learning_rate": 4.153548137146309e-06, "loss": 0.5696, "step": 21588 }, { "epoch": 70.78360655737706, "grad_norm": 5.130129814147949, "learning_rate": 4.152686672770103e-06, "loss": 0.4254, "step": 21589 }, { "epoch": 70.78688524590164, "grad_norm": 4.790388107299805, "learning_rate": 4.151825274328433e-06, "loss": 0.409, "step": 21590 }, { "epoch": 70.79016393442623, "grad_norm": 5.417407035827637, "learning_rate": 4.1509639418310114e-06, "loss": 0.4668, "step": 21591 }, { "epoch": 70.79344262295082, "grad_norm": 6.550619125366211, "learning_rate": 4.150102675287556e-06, "loss": 0.5606, "step": 21592 }, { "epoch": 70.79672131147541, "grad_norm": 6.410682678222656, "learning_rate": 4.149241474707775e-06, "loss": 0.2733, "step": 21593 }, { "epoch": 70.8, "grad_norm": 5.0459794998168945, "learning_rate": 4.14838034010138e-06, "loss": 0.4414, "step": 21594 }, { "epoch": 70.80327868852459, "grad_norm": 5.750784873962402, "learning_rate": 4.147519271478076e-06, "loss": 0.6152, "step": 21595 }, { "epoch": 70.80655737704917, "grad_norm": 9.56707763671875, "learning_rate": 4.146658268847583e-06, "loss": 0.5549, "step": 21596 }, { "epoch": 70.80983606557378, "grad_norm": 6.578253269195557, "learning_rate": 4.145797332219603e-06, "loss": 0.3607, "step": 21597 }, { "epoch": 70.81311475409836, "grad_norm": 4.702493190765381, "learning_rate": 4.144936461603845e-06, "loss": 0.4657, "step": 21598 }, { "epoch": 70.81639344262295, "grad_norm": 5.6904473304748535, "learning_rate": 4.144075657010012e-06, "loss": 0.491, "step": 21599 }, { "epoch": 70.81967213114754, "grad_norm": 5.1031718254089355, "learning_rate": 4.143214918447818e-06, "loss": 0.6344, "step": 21600 }, { "epoch": 70.82295081967213, "grad_norm": 5.1375298500061035, "learning_rate": 4.142354245926966e-06, "loss": 0.314, "step": 21601 }, { "epoch": 70.82622950819672, "grad_norm": 5.221610069274902, "learning_rate": 4.14149363945716e-06, "loss": 0.1858, "step": 21602 }, { "epoch": 70.8295081967213, "grad_norm": 6.706140041351318, "learning_rate": 4.140633099048106e-06, "loss": 0.534, "step": 21603 }, { "epoch": 70.8327868852459, "grad_norm": 8.151484489440918, "learning_rate": 4.139772624709501e-06, "loss": 0.3574, "step": 21604 }, { "epoch": 70.8360655737705, "grad_norm": 8.084425926208496, "learning_rate": 4.138912216451057e-06, "loss": 0.5382, "step": 21605 }, { "epoch": 70.83934426229509, "grad_norm": 5.572951316833496, "learning_rate": 4.13805187428247e-06, "loss": 0.4475, "step": 21606 }, { "epoch": 70.84262295081967, "grad_norm": 5.610589981079102, "learning_rate": 4.137191598213445e-06, "loss": 0.3862, "step": 21607 }, { "epoch": 70.84590163934426, "grad_norm": 5.123067378997803, "learning_rate": 4.136331388253676e-06, "loss": 0.3906, "step": 21608 }, { "epoch": 70.84918032786885, "grad_norm": 7.7065348625183105, "learning_rate": 4.135471244412871e-06, "loss": 0.4005, "step": 21609 }, { "epoch": 70.85245901639344, "grad_norm": 5.2901997566223145, "learning_rate": 4.134611166700725e-06, "loss": 0.5094, "step": 21610 }, { "epoch": 70.85573770491803, "grad_norm": 8.649206161499023, "learning_rate": 4.133751155126937e-06, "loss": 0.5775, "step": 21611 }, { "epoch": 70.85901639344263, "grad_norm": 6.1770453453063965, "learning_rate": 4.132891209701201e-06, "loss": 0.3168, "step": 21612 }, { "epoch": 70.86229508196722, "grad_norm": 3.6374266147613525, "learning_rate": 4.13203133043322e-06, "loss": 0.2974, "step": 21613 }, { "epoch": 70.8655737704918, "grad_norm": 6.885819911956787, "learning_rate": 4.131171517332687e-06, "loss": 0.4126, "step": 21614 }, { "epoch": 70.8688524590164, "grad_norm": 4.809868335723877, "learning_rate": 4.130311770409297e-06, "loss": 0.6625, "step": 21615 }, { "epoch": 70.87213114754098, "grad_norm": 6.1382246017456055, "learning_rate": 4.129452089672741e-06, "loss": 0.3709, "step": 21616 }, { "epoch": 70.87540983606557, "grad_norm": 5.35050106048584, "learning_rate": 4.12859247513272e-06, "loss": 0.4803, "step": 21617 }, { "epoch": 70.87868852459016, "grad_norm": 4.655606269836426, "learning_rate": 4.1277329267989245e-06, "loss": 0.5369, "step": 21618 }, { "epoch": 70.88196721311475, "grad_norm": 6.0803608894348145, "learning_rate": 4.126873444681041e-06, "loss": 0.2907, "step": 21619 }, { "epoch": 70.88524590163935, "grad_norm": 5.126220703125, "learning_rate": 4.12601402878877e-06, "loss": 0.2927, "step": 21620 }, { "epoch": 70.88852459016394, "grad_norm": 3.374199628829956, "learning_rate": 4.125154679131799e-06, "loss": 0.4615, "step": 21621 }, { "epoch": 70.89180327868853, "grad_norm": 4.787285804748535, "learning_rate": 4.124295395719813e-06, "loss": 0.2477, "step": 21622 }, { "epoch": 70.89508196721312, "grad_norm": 4.906506538391113, "learning_rate": 4.123436178562509e-06, "loss": 0.3437, "step": 21623 }, { "epoch": 70.8983606557377, "grad_norm": 5.286612033843994, "learning_rate": 4.1225770276695735e-06, "loss": 0.5178, "step": 21624 }, { "epoch": 70.90163934426229, "grad_norm": 4.647950649261475, "learning_rate": 4.121717943050688e-06, "loss": 0.1449, "step": 21625 }, { "epoch": 70.90491803278688, "grad_norm": 4.33027982711792, "learning_rate": 4.120858924715548e-06, "loss": 0.4393, "step": 21626 }, { "epoch": 70.90819672131147, "grad_norm": 4.98294734954834, "learning_rate": 4.119999972673837e-06, "loss": 0.3421, "step": 21627 }, { "epoch": 70.91147540983607, "grad_norm": 5.242311000823975, "learning_rate": 4.11914108693524e-06, "loss": 0.2481, "step": 21628 }, { "epoch": 70.91475409836066, "grad_norm": 4.973171234130859, "learning_rate": 4.118282267509442e-06, "loss": 0.3745, "step": 21629 }, { "epoch": 70.91803278688525, "grad_norm": 4.47818660736084, "learning_rate": 4.117423514406124e-06, "loss": 0.4795, "step": 21630 }, { "epoch": 70.92131147540984, "grad_norm": 4.729251384735107, "learning_rate": 4.116564827634975e-06, "loss": 0.3993, "step": 21631 }, { "epoch": 70.92459016393443, "grad_norm": 4.595917224884033, "learning_rate": 4.1157062072056744e-06, "loss": 0.6683, "step": 21632 }, { "epoch": 70.92786885245901, "grad_norm": 4.802947998046875, "learning_rate": 4.114847653127904e-06, "loss": 0.4034, "step": 21633 }, { "epoch": 70.9311475409836, "grad_norm": 6.072000980377197, "learning_rate": 4.113989165411343e-06, "loss": 0.6453, "step": 21634 }, { "epoch": 70.93442622950819, "grad_norm": 4.808569431304932, "learning_rate": 4.113130744065677e-06, "loss": 0.4997, "step": 21635 }, { "epoch": 70.9377049180328, "grad_norm": 5.080073833465576, "learning_rate": 4.112272389100582e-06, "loss": 0.215, "step": 21636 }, { "epoch": 70.94098360655738, "grad_norm": 5.647106170654297, "learning_rate": 4.111414100525739e-06, "loss": 0.4923, "step": 21637 }, { "epoch": 70.94426229508197, "grad_norm": 4.55714225769043, "learning_rate": 4.11055587835082e-06, "loss": 0.5353, "step": 21638 }, { "epoch": 70.94754098360656, "grad_norm": 11.859776496887207, "learning_rate": 4.1096977225855105e-06, "loss": 0.5093, "step": 21639 }, { "epoch": 70.95081967213115, "grad_norm": 4.831518650054932, "learning_rate": 4.108839633239485e-06, "loss": 0.2833, "step": 21640 }, { "epoch": 70.95409836065573, "grad_norm": 14.706119537353516, "learning_rate": 4.107981610322417e-06, "loss": 0.4621, "step": 21641 }, { "epoch": 70.95737704918032, "grad_norm": 5.269888877868652, "learning_rate": 4.107123653843982e-06, "loss": 0.6477, "step": 21642 }, { "epoch": 70.96065573770491, "grad_norm": 5.2414703369140625, "learning_rate": 4.106265763813852e-06, "loss": 0.5855, "step": 21643 }, { "epoch": 70.96393442622951, "grad_norm": 5.258011341094971, "learning_rate": 4.105407940241706e-06, "loss": 0.4811, "step": 21644 }, { "epoch": 70.9672131147541, "grad_norm": 6.864663124084473, "learning_rate": 4.104550183137215e-06, "loss": 0.5297, "step": 21645 }, { "epoch": 70.97049180327869, "grad_norm": 4.353890895843506, "learning_rate": 4.103692492510051e-06, "loss": 0.4781, "step": 21646 }, { "epoch": 70.97377049180328, "grad_norm": 5.207235813140869, "learning_rate": 4.1028348683698804e-06, "loss": 0.3937, "step": 21647 }, { "epoch": 70.97704918032787, "grad_norm": 4.638917446136475, "learning_rate": 4.1019773107263815e-06, "loss": 0.3962, "step": 21648 }, { "epoch": 70.98032786885246, "grad_norm": 5.884092807769775, "learning_rate": 4.101119819589221e-06, "loss": 0.3316, "step": 21649 }, { "epoch": 70.98360655737704, "grad_norm": 5.864499568939209, "learning_rate": 4.100262394968069e-06, "loss": 0.4971, "step": 21650 }, { "epoch": 70.98688524590163, "grad_norm": 3.9048831462860107, "learning_rate": 4.099405036872588e-06, "loss": 0.4347, "step": 21651 }, { "epoch": 70.99016393442623, "grad_norm": 3.3374979496002197, "learning_rate": 4.098547745312453e-06, "loss": 0.3117, "step": 21652 }, { "epoch": 70.99344262295082, "grad_norm": 5.624715805053711, "learning_rate": 4.097690520297331e-06, "loss": 0.4282, "step": 21653 }, { "epoch": 70.99672131147541, "grad_norm": 10.152374267578125, "learning_rate": 4.096833361836883e-06, "loss": 0.6945, "step": 21654 }, { "epoch": 71.0, "grad_norm": 6.296936988830566, "learning_rate": 4.095976269940777e-06, "loss": 0.2978, "step": 21655 }, { "epoch": 71.00327868852459, "grad_norm": 5.3077239990234375, "learning_rate": 4.095119244618674e-06, "loss": 0.2231, "step": 21656 }, { "epoch": 71.00655737704918, "grad_norm": 6.070929050445557, "learning_rate": 4.094262285880244e-06, "loss": 0.4298, "step": 21657 }, { "epoch": 71.00983606557377, "grad_norm": 4.822981834411621, "learning_rate": 4.093405393735147e-06, "loss": 0.5881, "step": 21658 }, { "epoch": 71.01311475409837, "grad_norm": 4.954549789428711, "learning_rate": 4.092548568193047e-06, "loss": 0.3364, "step": 21659 }, { "epoch": 71.01639344262296, "grad_norm": 4.41231632232666, "learning_rate": 4.091691809263598e-06, "loss": 0.2197, "step": 21660 }, { "epoch": 71.01967213114754, "grad_norm": 5.752340793609619, "learning_rate": 4.090835116956473e-06, "loss": 0.3228, "step": 21661 }, { "epoch": 71.02295081967213, "grad_norm": 11.930645942687988, "learning_rate": 4.089978491281325e-06, "loss": 0.5069, "step": 21662 }, { "epoch": 71.02622950819672, "grad_norm": 7.048184394836426, "learning_rate": 4.089121932247815e-06, "loss": 0.4549, "step": 21663 }, { "epoch": 71.02950819672131, "grad_norm": 5.562273025512695, "learning_rate": 4.0882654398655965e-06, "loss": 0.4453, "step": 21664 }, { "epoch": 71.0327868852459, "grad_norm": 5.60603141784668, "learning_rate": 4.087409014144336e-06, "loss": 0.6418, "step": 21665 }, { "epoch": 71.03606557377049, "grad_norm": 4.6980791091918945, "learning_rate": 4.086552655093686e-06, "loss": 0.2997, "step": 21666 }, { "epoch": 71.03934426229509, "grad_norm": 4.52344274520874, "learning_rate": 4.085696362723304e-06, "loss": 0.5202, "step": 21667 }, { "epoch": 71.04262295081968, "grad_norm": 5.710110664367676, "learning_rate": 4.084840137042843e-06, "loss": 0.4221, "step": 21668 }, { "epoch": 71.04590163934427, "grad_norm": 5.494714260101318, "learning_rate": 4.083983978061958e-06, "loss": 0.569, "step": 21669 }, { "epoch": 71.04918032786885, "grad_norm": 4.344875335693359, "learning_rate": 4.083127885790308e-06, "loss": 0.3046, "step": 21670 }, { "epoch": 71.05245901639344, "grad_norm": 5.327675819396973, "learning_rate": 4.082271860237542e-06, "loss": 0.5596, "step": 21671 }, { "epoch": 71.05573770491803, "grad_norm": 4.44237756729126, "learning_rate": 4.081415901413312e-06, "loss": 0.3186, "step": 21672 }, { "epoch": 71.05901639344262, "grad_norm": 5.327648162841797, "learning_rate": 4.080560009327274e-06, "loss": 0.4328, "step": 21673 }, { "epoch": 71.0622950819672, "grad_norm": 4.612157344818115, "learning_rate": 4.079704183989076e-06, "loss": 0.3467, "step": 21674 }, { "epoch": 71.06557377049181, "grad_norm": 5.249100685119629, "learning_rate": 4.078848425408366e-06, "loss": 0.5373, "step": 21675 }, { "epoch": 71.0688524590164, "grad_norm": 4.860501289367676, "learning_rate": 4.0779927335948e-06, "loss": 0.4244, "step": 21676 }, { "epoch": 71.07213114754099, "grad_norm": 4.774408340454102, "learning_rate": 4.0771371085580234e-06, "loss": 0.2678, "step": 21677 }, { "epoch": 71.07540983606557, "grad_norm": 5.725965976715088, "learning_rate": 4.07628155030768e-06, "loss": 0.4022, "step": 21678 }, { "epoch": 71.07868852459016, "grad_norm": 6.3499321937561035, "learning_rate": 4.075426058853426e-06, "loss": 0.5187, "step": 21679 }, { "epoch": 71.08196721311475, "grad_norm": 4.863243103027344, "learning_rate": 4.074570634204902e-06, "loss": 0.6164, "step": 21680 }, { "epoch": 71.08524590163934, "grad_norm": 5.14970588684082, "learning_rate": 4.0737152763717556e-06, "loss": 0.1914, "step": 21681 }, { "epoch": 71.08852459016393, "grad_norm": 5.107907772064209, "learning_rate": 4.0728599853636274e-06, "loss": 0.3123, "step": 21682 }, { "epoch": 71.09180327868853, "grad_norm": 4.904938697814941, "learning_rate": 4.072004761190169e-06, "loss": 0.5507, "step": 21683 }, { "epoch": 71.09508196721312, "grad_norm": 5.611758232116699, "learning_rate": 4.071149603861021e-06, "loss": 0.4074, "step": 21684 }, { "epoch": 71.09836065573771, "grad_norm": 3.8650283813476562, "learning_rate": 4.070294513385825e-06, "loss": 0.4033, "step": 21685 }, { "epoch": 71.1016393442623, "grad_norm": 7.235983371734619, "learning_rate": 4.06943948977422e-06, "loss": 0.4615, "step": 21686 }, { "epoch": 71.10491803278688, "grad_norm": 5.658923625946045, "learning_rate": 4.068584533035855e-06, "loss": 0.4629, "step": 21687 }, { "epoch": 71.10819672131147, "grad_norm": 4.554291725158691, "learning_rate": 4.067729643180367e-06, "loss": 0.5412, "step": 21688 }, { "epoch": 71.11147540983606, "grad_norm": 5.773850440979004, "learning_rate": 4.066874820217395e-06, "loss": 0.3679, "step": 21689 }, { "epoch": 71.11475409836065, "grad_norm": 3.9744319915771484, "learning_rate": 4.066020064156574e-06, "loss": 0.3653, "step": 21690 }, { "epoch": 71.11803278688525, "grad_norm": 5.462623596191406, "learning_rate": 4.065165375007551e-06, "loss": 0.209, "step": 21691 }, { "epoch": 71.12131147540984, "grad_norm": 4.259376049041748, "learning_rate": 4.064310752779958e-06, "loss": 0.3491, "step": 21692 }, { "epoch": 71.12459016393443, "grad_norm": 4.87862491607666, "learning_rate": 4.063456197483434e-06, "loss": 0.3043, "step": 21693 }, { "epoch": 71.12786885245902, "grad_norm": 5.061209201812744, "learning_rate": 4.062601709127614e-06, "loss": 0.5916, "step": 21694 }, { "epoch": 71.1311475409836, "grad_norm": 4.434592247009277, "learning_rate": 4.061747287722128e-06, "loss": 0.239, "step": 21695 }, { "epoch": 71.1344262295082, "grad_norm": 5.675134181976318, "learning_rate": 4.0608929332766215e-06, "loss": 0.5312, "step": 21696 }, { "epoch": 71.13770491803278, "grad_norm": 4.110776901245117, "learning_rate": 4.06003864580072e-06, "loss": 0.3222, "step": 21697 }, { "epoch": 71.14098360655737, "grad_norm": 4.115659713745117, "learning_rate": 4.05918442530406e-06, "loss": 0.2849, "step": 21698 }, { "epoch": 71.14426229508197, "grad_norm": 6.2779765129089355, "learning_rate": 4.058330271796269e-06, "loss": 0.4689, "step": 21699 }, { "epoch": 71.14754098360656, "grad_norm": 4.228052139282227, "learning_rate": 4.057476185286985e-06, "loss": 0.3931, "step": 21700 }, { "epoch": 71.15081967213115, "grad_norm": 4.637057781219482, "learning_rate": 4.056622165785837e-06, "loss": 0.3261, "step": 21701 }, { "epoch": 71.15409836065574, "grad_norm": 4.403563022613525, "learning_rate": 4.055768213302451e-06, "loss": 0.3539, "step": 21702 }, { "epoch": 71.15737704918033, "grad_norm": 6.951238632202148, "learning_rate": 4.054914327846458e-06, "loss": 0.5606, "step": 21703 }, { "epoch": 71.16065573770491, "grad_norm": 4.719918727874756, "learning_rate": 4.054060509427489e-06, "loss": 0.4059, "step": 21704 }, { "epoch": 71.1639344262295, "grad_norm": 5.411387920379639, "learning_rate": 4.05320675805517e-06, "loss": 0.3473, "step": 21705 }, { "epoch": 71.1672131147541, "grad_norm": 5.0358991622924805, "learning_rate": 4.052353073739129e-06, "loss": 0.28, "step": 21706 }, { "epoch": 71.1704918032787, "grad_norm": 5.617726802825928, "learning_rate": 4.051499456488991e-06, "loss": 0.3937, "step": 21707 }, { "epoch": 71.17377049180328, "grad_norm": 5.641623497009277, "learning_rate": 4.050645906314378e-06, "loss": 0.3531, "step": 21708 }, { "epoch": 71.17704918032787, "grad_norm": 6.00899600982666, "learning_rate": 4.049792423224921e-06, "loss": 0.2975, "step": 21709 }, { "epoch": 71.18032786885246, "grad_norm": 5.173893928527832, "learning_rate": 4.048939007230241e-06, "loss": 0.5503, "step": 21710 }, { "epoch": 71.18360655737705, "grad_norm": 5.61283016204834, "learning_rate": 4.048085658339962e-06, "loss": 0.2987, "step": 21711 }, { "epoch": 71.18688524590164, "grad_norm": 5.050161838531494, "learning_rate": 4.047232376563701e-06, "loss": 0.6033, "step": 21712 }, { "epoch": 71.19016393442622, "grad_norm": 8.318732261657715, "learning_rate": 4.046379161911089e-06, "loss": 0.296, "step": 21713 }, { "epoch": 71.19344262295083, "grad_norm": 4.293887138366699, "learning_rate": 4.045526014391742e-06, "loss": 0.2497, "step": 21714 }, { "epoch": 71.19672131147541, "grad_norm": 4.471284866333008, "learning_rate": 4.04467293401528e-06, "loss": 0.4225, "step": 21715 }, { "epoch": 71.2, "grad_norm": 5.676132678985596, "learning_rate": 4.043819920791322e-06, "loss": 0.3366, "step": 21716 }, { "epoch": 71.20327868852459, "grad_norm": 6.967745304107666, "learning_rate": 4.042966974729485e-06, "loss": 0.2835, "step": 21717 }, { "epoch": 71.20655737704918, "grad_norm": 5.806057453155518, "learning_rate": 4.0421140958393925e-06, "loss": 0.5377, "step": 21718 }, { "epoch": 71.20983606557377, "grad_norm": 4.943073272705078, "learning_rate": 4.041261284130658e-06, "loss": 0.3508, "step": 21719 }, { "epoch": 71.21311475409836, "grad_norm": 6.457333564758301, "learning_rate": 4.040408539612897e-06, "loss": 0.3771, "step": 21720 }, { "epoch": 71.21639344262294, "grad_norm": 6.127530574798584, "learning_rate": 4.039555862295723e-06, "loss": 0.3569, "step": 21721 }, { "epoch": 71.21967213114755, "grad_norm": 50.35973358154297, "learning_rate": 4.038703252188758e-06, "loss": 0.288, "step": 21722 }, { "epoch": 71.22295081967214, "grad_norm": 5.750066757202148, "learning_rate": 4.037850709301613e-06, "loss": 0.4264, "step": 21723 }, { "epoch": 71.22622950819672, "grad_norm": 5.461610317230225, "learning_rate": 4.036998233643895e-06, "loss": 0.3359, "step": 21724 }, { "epoch": 71.22950819672131, "grad_norm": 5.140904426574707, "learning_rate": 4.036145825225226e-06, "loss": 0.4135, "step": 21725 }, { "epoch": 71.2327868852459, "grad_norm": 5.432646751403809, "learning_rate": 4.035293484055214e-06, "loss": 0.3153, "step": 21726 }, { "epoch": 71.23606557377049, "grad_norm": 9.266594886779785, "learning_rate": 4.034441210143466e-06, "loss": 0.3669, "step": 21727 }, { "epoch": 71.23934426229508, "grad_norm": 5.250791072845459, "learning_rate": 4.033589003499599e-06, "loss": 0.4947, "step": 21728 }, { "epoch": 71.24262295081967, "grad_norm": 5.9468793869018555, "learning_rate": 4.032736864133221e-06, "loss": 0.3713, "step": 21729 }, { "epoch": 71.24590163934427, "grad_norm": 4.265634536743164, "learning_rate": 4.031884792053938e-06, "loss": 0.3902, "step": 21730 }, { "epoch": 71.24918032786886, "grad_norm": 4.646817207336426, "learning_rate": 4.031032787271356e-06, "loss": 0.3175, "step": 21731 }, { "epoch": 71.25245901639344, "grad_norm": 3.5922391414642334, "learning_rate": 4.030180849795089e-06, "loss": 0.5491, "step": 21732 }, { "epoch": 71.25573770491803, "grad_norm": 12.226844787597656, "learning_rate": 4.02932897963474e-06, "loss": 0.4002, "step": 21733 }, { "epoch": 71.25901639344262, "grad_norm": 6.735252857208252, "learning_rate": 4.028477176799912e-06, "loss": 0.25, "step": 21734 }, { "epoch": 71.26229508196721, "grad_norm": 4.542227268218994, "learning_rate": 4.027625441300214e-06, "loss": 0.2634, "step": 21735 }, { "epoch": 71.2655737704918, "grad_norm": 5.01769495010376, "learning_rate": 4.0267737731452515e-06, "loss": 0.4171, "step": 21736 }, { "epoch": 71.26885245901639, "grad_norm": 5.289946556091309, "learning_rate": 4.025922172344624e-06, "loss": 0.5404, "step": 21737 }, { "epoch": 71.27213114754099, "grad_norm": 23.491844177246094, "learning_rate": 4.025070638907932e-06, "loss": 0.5265, "step": 21738 }, { "epoch": 71.27540983606558, "grad_norm": 4.568775653839111, "learning_rate": 4.024219172844784e-06, "loss": 0.2831, "step": 21739 }, { "epoch": 71.27868852459017, "grad_norm": 5.8054046630859375, "learning_rate": 4.023367774164779e-06, "loss": 0.5078, "step": 21740 }, { "epoch": 71.28196721311475, "grad_norm": 4.598245143890381, "learning_rate": 4.022516442877515e-06, "loss": 0.4176, "step": 21741 }, { "epoch": 71.28524590163934, "grad_norm": 4.630246639251709, "learning_rate": 4.021665178992595e-06, "loss": 0.4688, "step": 21742 }, { "epoch": 71.28852459016393, "grad_norm": 5.623000144958496, "learning_rate": 4.020813982519611e-06, "loss": 0.5041, "step": 21743 }, { "epoch": 71.29180327868852, "grad_norm": 5.170628547668457, "learning_rate": 4.0199628534681715e-06, "loss": 0.418, "step": 21744 }, { "epoch": 71.29508196721312, "grad_norm": 7.709779262542725, "learning_rate": 4.0191117918478676e-06, "loss": 0.3116, "step": 21745 }, { "epoch": 71.29836065573771, "grad_norm": 4.799350261688232, "learning_rate": 4.0182607976682956e-06, "loss": 0.3287, "step": 21746 }, { "epoch": 71.3016393442623, "grad_norm": 5.3211140632629395, "learning_rate": 4.01740987093905e-06, "loss": 0.3679, "step": 21747 }, { "epoch": 71.30491803278689, "grad_norm": 5.3405632972717285, "learning_rate": 4.0165590116697315e-06, "loss": 0.5483, "step": 21748 }, { "epoch": 71.30819672131148, "grad_norm": 4.913936138153076, "learning_rate": 4.015708219869932e-06, "loss": 0.4008, "step": 21749 }, { "epoch": 71.31147540983606, "grad_norm": 4.189485549926758, "learning_rate": 4.014857495549245e-06, "loss": 0.3622, "step": 21750 }, { "epoch": 71.31475409836065, "grad_norm": 5.754927635192871, "learning_rate": 4.014006838717258e-06, "loss": 0.5653, "step": 21751 }, { "epoch": 71.31803278688524, "grad_norm": 4.686328887939453, "learning_rate": 4.013156249383572e-06, "loss": 0.3954, "step": 21752 }, { "epoch": 71.32131147540984, "grad_norm": 5.8225202560424805, "learning_rate": 4.0123057275577735e-06, "loss": 0.3869, "step": 21753 }, { "epoch": 71.32459016393443, "grad_norm": 5.9668121337890625, "learning_rate": 4.011455273249454e-06, "loss": 0.3835, "step": 21754 }, { "epoch": 71.32786885245902, "grad_norm": 6.628140449523926, "learning_rate": 4.010604886468202e-06, "loss": 0.4757, "step": 21755 }, { "epoch": 71.33114754098361, "grad_norm": 5.293871879577637, "learning_rate": 4.009754567223605e-06, "loss": 0.4258, "step": 21756 }, { "epoch": 71.3344262295082, "grad_norm": 5.479689598083496, "learning_rate": 4.008904315525256e-06, "loss": 0.45, "step": 21757 }, { "epoch": 71.33770491803278, "grad_norm": 10.290382385253906, "learning_rate": 4.008054131382741e-06, "loss": 0.6441, "step": 21758 }, { "epoch": 71.34098360655737, "grad_norm": 4.657501697540283, "learning_rate": 4.007204014805644e-06, "loss": 0.4342, "step": 21759 }, { "epoch": 71.34426229508196, "grad_norm": 5.487935543060303, "learning_rate": 4.0063539658035514e-06, "loss": 0.4524, "step": 21760 }, { "epoch": 71.34754098360656, "grad_norm": 5.311681747436523, "learning_rate": 4.005503984386052e-06, "loss": 0.5869, "step": 21761 }, { "epoch": 71.35081967213115, "grad_norm": 4.834671497344971, "learning_rate": 4.004654070562728e-06, "loss": 0.2831, "step": 21762 }, { "epoch": 71.35409836065574, "grad_norm": 4.796403884887695, "learning_rate": 4.003804224343163e-06, "loss": 0.5983, "step": 21763 }, { "epoch": 71.35737704918033, "grad_norm": 6.890293598175049, "learning_rate": 4.002954445736936e-06, "loss": 0.3565, "step": 21764 }, { "epoch": 71.36065573770492, "grad_norm": 4.8459601402282715, "learning_rate": 4.002104734753638e-06, "loss": 0.2312, "step": 21765 }, { "epoch": 71.3639344262295, "grad_norm": 4.4508137702941895, "learning_rate": 4.001255091402844e-06, "loss": 0.5017, "step": 21766 }, { "epoch": 71.3672131147541, "grad_norm": 6.793845176696777, "learning_rate": 4.0004055156941355e-06, "loss": 0.6127, "step": 21767 }, { "epoch": 71.37049180327868, "grad_norm": 5.638720989227295, "learning_rate": 3.999556007637094e-06, "loss": 0.504, "step": 21768 }, { "epoch": 71.37377049180328, "grad_norm": 4.396449089050293, "learning_rate": 3.9987065672412936e-06, "loss": 0.3911, "step": 21769 }, { "epoch": 71.37704918032787, "grad_norm": 9.519712448120117, "learning_rate": 3.997857194516321e-06, "loss": 0.2937, "step": 21770 }, { "epoch": 71.38032786885246, "grad_norm": 4.674233436584473, "learning_rate": 3.997007889471747e-06, "loss": 0.5037, "step": 21771 }, { "epoch": 71.38360655737705, "grad_norm": 5.10499382019043, "learning_rate": 3.996158652117152e-06, "loss": 0.4703, "step": 21772 }, { "epoch": 71.38688524590164, "grad_norm": 4.980307102203369, "learning_rate": 3.9953094824621064e-06, "loss": 0.327, "step": 21773 }, { "epoch": 71.39016393442623, "grad_norm": 5.242820739746094, "learning_rate": 3.994460380516193e-06, "loss": 0.3431, "step": 21774 }, { "epoch": 71.39344262295081, "grad_norm": 4.769464015960693, "learning_rate": 3.9936113462889836e-06, "loss": 0.3371, "step": 21775 }, { "epoch": 71.3967213114754, "grad_norm": 4.3872785568237305, "learning_rate": 3.9927623797900515e-06, "loss": 0.2511, "step": 21776 }, { "epoch": 71.4, "grad_norm": 5.168768882751465, "learning_rate": 3.991913481028965e-06, "loss": 0.3753, "step": 21777 }, { "epoch": 71.4032786885246, "grad_norm": 5.414803504943848, "learning_rate": 3.991064650015306e-06, "loss": 0.4637, "step": 21778 }, { "epoch": 71.40655737704918, "grad_norm": 7.175111293792725, "learning_rate": 3.99021588675864e-06, "loss": 0.4545, "step": 21779 }, { "epoch": 71.40983606557377, "grad_norm": 10.326210021972656, "learning_rate": 3.9893671912685336e-06, "loss": 0.3246, "step": 21780 }, { "epoch": 71.41311475409836, "grad_norm": 5.0272216796875, "learning_rate": 3.988518563554567e-06, "loss": 0.2905, "step": 21781 }, { "epoch": 71.41639344262295, "grad_norm": 5.652525424957275, "learning_rate": 3.9876700036263035e-06, "loss": 0.7114, "step": 21782 }, { "epoch": 71.41967213114754, "grad_norm": 4.999783515930176, "learning_rate": 3.986821511493308e-06, "loss": 0.3794, "step": 21783 }, { "epoch": 71.42295081967212, "grad_norm": 7.009800910949707, "learning_rate": 3.985973087165156e-06, "loss": 0.6241, "step": 21784 }, { "epoch": 71.42622950819673, "grad_norm": 5.198126316070557, "learning_rate": 3.985124730651411e-06, "loss": 0.1839, "step": 21785 }, { "epoch": 71.42950819672132, "grad_norm": 4.777515888214111, "learning_rate": 3.9842764419616345e-06, "loss": 0.5179, "step": 21786 }, { "epoch": 71.4327868852459, "grad_norm": 6.344264507293701, "learning_rate": 3.9834282211053985e-06, "loss": 0.5634, "step": 21787 }, { "epoch": 71.43606557377049, "grad_norm": 5.028690814971924, "learning_rate": 3.982580068092266e-06, "loss": 0.5035, "step": 21788 }, { "epoch": 71.43934426229508, "grad_norm": 5.934419631958008, "learning_rate": 3.9817319829318e-06, "loss": 0.4025, "step": 21789 }, { "epoch": 71.44262295081967, "grad_norm": 4.561377048492432, "learning_rate": 3.98088396563356e-06, "loss": 0.291, "step": 21790 }, { "epoch": 71.44590163934426, "grad_norm": 6.526782512664795, "learning_rate": 3.980036016207114e-06, "loss": 0.4057, "step": 21791 }, { "epoch": 71.44918032786886, "grad_norm": 7.117127895355225, "learning_rate": 3.979188134662022e-06, "loss": 0.5432, "step": 21792 }, { "epoch": 71.45245901639345, "grad_norm": 4.724240303039551, "learning_rate": 3.978340321007843e-06, "loss": 0.4893, "step": 21793 }, { "epoch": 71.45573770491804, "grad_norm": 5.8745856285095215, "learning_rate": 3.977492575254138e-06, "loss": 0.3884, "step": 21794 }, { "epoch": 71.45901639344262, "grad_norm": 5.648104667663574, "learning_rate": 3.976644897410464e-06, "loss": 0.4921, "step": 21795 }, { "epoch": 71.46229508196721, "grad_norm": 4.594768524169922, "learning_rate": 3.975797287486383e-06, "loss": 0.4237, "step": 21796 }, { "epoch": 71.4655737704918, "grad_norm": 5.495728015899658, "learning_rate": 3.974949745491452e-06, "loss": 0.5772, "step": 21797 }, { "epoch": 71.46885245901639, "grad_norm": 12.650614738464355, "learning_rate": 3.974102271435228e-06, "loss": 0.4614, "step": 21798 }, { "epoch": 71.47213114754098, "grad_norm": 4.269931793212891, "learning_rate": 3.973254865327262e-06, "loss": 0.4436, "step": 21799 }, { "epoch": 71.47540983606558, "grad_norm": 5.242095947265625, "learning_rate": 3.9724075271771165e-06, "loss": 0.4316, "step": 21800 }, { "epoch": 71.47868852459017, "grad_norm": 5.768035888671875, "learning_rate": 3.971560256994343e-06, "loss": 0.5597, "step": 21801 }, { "epoch": 71.48196721311476, "grad_norm": 6.944339275360107, "learning_rate": 3.970713054788498e-06, "loss": 0.4805, "step": 21802 }, { "epoch": 71.48524590163935, "grad_norm": 7.130357265472412, "learning_rate": 3.969865920569127e-06, "loss": 0.3889, "step": 21803 }, { "epoch": 71.48852459016393, "grad_norm": 4.275629043579102, "learning_rate": 3.969018854345791e-06, "loss": 0.3688, "step": 21804 }, { "epoch": 71.49180327868852, "grad_norm": 5.877649784088135, "learning_rate": 3.968171856128038e-06, "loss": 0.3958, "step": 21805 }, { "epoch": 71.49508196721311, "grad_norm": 5.436657905578613, "learning_rate": 3.967324925925419e-06, "loss": 0.3003, "step": 21806 }, { "epoch": 71.4983606557377, "grad_norm": 5.228183746337891, "learning_rate": 3.966478063747484e-06, "loss": 0.4597, "step": 21807 }, { "epoch": 71.5016393442623, "grad_norm": 5.344873905181885, "learning_rate": 3.965631269603778e-06, "loss": 0.3327, "step": 21808 }, { "epoch": 71.50491803278689, "grad_norm": 4.62693977355957, "learning_rate": 3.964784543503858e-06, "loss": 0.3221, "step": 21809 }, { "epoch": 71.50819672131148, "grad_norm": 4.919651031494141, "learning_rate": 3.963937885457268e-06, "loss": 0.4369, "step": 21810 }, { "epoch": 71.51147540983607, "grad_norm": 4.5263566970825195, "learning_rate": 3.963091295473552e-06, "loss": 0.6641, "step": 21811 }, { "epoch": 71.51475409836065, "grad_norm": 4.403683185577393, "learning_rate": 3.962244773562256e-06, "loss": 0.4128, "step": 21812 }, { "epoch": 71.51803278688524, "grad_norm": 4.703249454498291, "learning_rate": 3.961398319732932e-06, "loss": 0.2525, "step": 21813 }, { "epoch": 71.52131147540983, "grad_norm": 11.20138931274414, "learning_rate": 3.96055193399512e-06, "loss": 0.5701, "step": 21814 }, { "epoch": 71.52459016393442, "grad_norm": 4.308524131774902, "learning_rate": 3.959705616358365e-06, "loss": 0.3603, "step": 21815 }, { "epoch": 71.52786885245902, "grad_norm": 5.4658379554748535, "learning_rate": 3.958859366832205e-06, "loss": 0.3226, "step": 21816 }, { "epoch": 71.53114754098361, "grad_norm": 5.310748100280762, "learning_rate": 3.9580131854261905e-06, "loss": 0.3604, "step": 21817 }, { "epoch": 71.5344262295082, "grad_norm": 5.7083821296691895, "learning_rate": 3.9571670721498604e-06, "loss": 0.396, "step": 21818 }, { "epoch": 71.53770491803279, "grad_norm": 4.164593696594238, "learning_rate": 3.956321027012754e-06, "loss": 0.4156, "step": 21819 }, { "epoch": 71.54098360655738, "grad_norm": 5.698862075805664, "learning_rate": 3.955475050024412e-06, "loss": 0.4034, "step": 21820 }, { "epoch": 71.54426229508196, "grad_norm": 4.757547855377197, "learning_rate": 3.9546291411943694e-06, "loss": 0.6003, "step": 21821 }, { "epoch": 71.54754098360655, "grad_norm": 5.993583679199219, "learning_rate": 3.953783300532172e-06, "loss": 0.3249, "step": 21822 }, { "epoch": 71.55081967213114, "grad_norm": 6.164750099182129, "learning_rate": 3.9529375280473556e-06, "loss": 0.477, "step": 21823 }, { "epoch": 71.55409836065574, "grad_norm": 4.405345439910889, "learning_rate": 3.952091823749455e-06, "loss": 0.4621, "step": 21824 }, { "epoch": 71.55737704918033, "grad_norm": 4.665740489959717, "learning_rate": 3.951246187648004e-06, "loss": 0.3014, "step": 21825 }, { "epoch": 71.56065573770492, "grad_norm": 4.581432819366455, "learning_rate": 3.950400619752546e-06, "loss": 0.3573, "step": 21826 }, { "epoch": 71.56393442622951, "grad_norm": 3.8420333862304688, "learning_rate": 3.94955512007261e-06, "loss": 0.1763, "step": 21827 }, { "epoch": 71.5672131147541, "grad_norm": 5.208044052124023, "learning_rate": 3.948709688617731e-06, "loss": 0.585, "step": 21828 }, { "epoch": 71.57049180327868, "grad_norm": 6.83834981918335, "learning_rate": 3.947864325397439e-06, "loss": 0.4198, "step": 21829 }, { "epoch": 71.57377049180327, "grad_norm": 5.959685802459717, "learning_rate": 3.947019030421273e-06, "loss": 0.4937, "step": 21830 }, { "epoch": 71.57704918032788, "grad_norm": 5.593225479125977, "learning_rate": 3.946173803698759e-06, "loss": 0.251, "step": 21831 }, { "epoch": 71.58032786885246, "grad_norm": 4.894075393676758, "learning_rate": 3.945328645239432e-06, "loss": 0.3896, "step": 21832 }, { "epoch": 71.58360655737705, "grad_norm": 3.935798406600952, "learning_rate": 3.944483555052816e-06, "loss": 0.5648, "step": 21833 }, { "epoch": 71.58688524590164, "grad_norm": 8.13927936553955, "learning_rate": 3.943638533148447e-06, "loss": 0.3779, "step": 21834 }, { "epoch": 71.59016393442623, "grad_norm": 13.501458168029785, "learning_rate": 3.942793579535851e-06, "loss": 0.227, "step": 21835 }, { "epoch": 71.59344262295082, "grad_norm": 5.151966094970703, "learning_rate": 3.941948694224551e-06, "loss": 0.3397, "step": 21836 }, { "epoch": 71.5967213114754, "grad_norm": 4.873384475708008, "learning_rate": 3.941103877224083e-06, "loss": 0.3133, "step": 21837 }, { "epoch": 71.6, "grad_norm": 6.586667537689209, "learning_rate": 3.940259128543967e-06, "loss": 0.2346, "step": 21838 }, { "epoch": 71.6032786885246, "grad_norm": 5.518886089324951, "learning_rate": 3.939414448193727e-06, "loss": 0.3442, "step": 21839 }, { "epoch": 71.60655737704919, "grad_norm": 9.48572826385498, "learning_rate": 3.938569836182894e-06, "loss": 0.3836, "step": 21840 }, { "epoch": 71.60983606557377, "grad_norm": 17.505325317382812, "learning_rate": 3.937725292520988e-06, "loss": 0.4074, "step": 21841 }, { "epoch": 71.61311475409836, "grad_norm": 4.333292484283447, "learning_rate": 3.93688081721753e-06, "loss": 0.5081, "step": 21842 }, { "epoch": 71.61639344262295, "grad_norm": 4.967032432556152, "learning_rate": 3.936036410282048e-06, "loss": 0.7805, "step": 21843 }, { "epoch": 71.61967213114754, "grad_norm": 4.804538249969482, "learning_rate": 3.93519207172406e-06, "loss": 0.5239, "step": 21844 }, { "epoch": 71.62295081967213, "grad_norm": 5.018357276916504, "learning_rate": 3.934347801553088e-06, "loss": 0.3833, "step": 21845 }, { "epoch": 71.62622950819672, "grad_norm": 5.39005708694458, "learning_rate": 3.933503599778651e-06, "loss": 0.3759, "step": 21846 }, { "epoch": 71.62950819672132, "grad_norm": 4.448554992675781, "learning_rate": 3.932659466410264e-06, "loss": 0.4396, "step": 21847 }, { "epoch": 71.6327868852459, "grad_norm": 6.805516242980957, "learning_rate": 3.931815401457455e-06, "loss": 0.2336, "step": 21848 }, { "epoch": 71.6360655737705, "grad_norm": 4.083134174346924, "learning_rate": 3.930971404929736e-06, "loss": 0.4582, "step": 21849 }, { "epoch": 71.63934426229508, "grad_norm": 5.356358528137207, "learning_rate": 3.930127476836624e-06, "loss": 0.3028, "step": 21850 }, { "epoch": 71.64262295081967, "grad_norm": 6.040408134460449, "learning_rate": 3.929283617187632e-06, "loss": 0.5, "step": 21851 }, { "epoch": 71.64590163934426, "grad_norm": 5.6141533851623535, "learning_rate": 3.928439825992284e-06, "loss": 0.498, "step": 21852 }, { "epoch": 71.64918032786885, "grad_norm": 4.024646759033203, "learning_rate": 3.927596103260089e-06, "loss": 0.3128, "step": 21853 }, { "epoch": 71.65245901639344, "grad_norm": 4.30621862411499, "learning_rate": 3.9267524490005625e-06, "loss": 0.3874, "step": 21854 }, { "epoch": 71.65573770491804, "grad_norm": 4.73880672454834, "learning_rate": 3.925908863223212e-06, "loss": 0.5115, "step": 21855 }, { "epoch": 71.65901639344263, "grad_norm": 5.920200347900391, "learning_rate": 3.925065345937559e-06, "loss": 0.3428, "step": 21856 }, { "epoch": 71.66229508196722, "grad_norm": 4.402838230133057, "learning_rate": 3.92422189715311e-06, "loss": 0.4147, "step": 21857 }, { "epoch": 71.6655737704918, "grad_norm": 6.61658239364624, "learning_rate": 3.923378516879377e-06, "loss": 0.4597, "step": 21858 }, { "epoch": 71.66885245901639, "grad_norm": 5.147707939147949, "learning_rate": 3.922535205125869e-06, "loss": 0.4355, "step": 21859 }, { "epoch": 71.67213114754098, "grad_norm": 4.446861267089844, "learning_rate": 3.921691961902092e-06, "loss": 0.4403, "step": 21860 }, { "epoch": 71.67540983606557, "grad_norm": 8.058938026428223, "learning_rate": 3.920848787217562e-06, "loss": 0.159, "step": 21861 }, { "epoch": 71.67868852459016, "grad_norm": 5.355014801025391, "learning_rate": 3.920005681081781e-06, "loss": 0.3329, "step": 21862 }, { "epoch": 71.68196721311476, "grad_norm": 4.416805744171143, "learning_rate": 3.919162643504259e-06, "loss": 0.4534, "step": 21863 }, { "epoch": 71.68524590163935, "grad_norm": 4.065347194671631, "learning_rate": 3.918319674494496e-06, "loss": 0.3666, "step": 21864 }, { "epoch": 71.68852459016394, "grad_norm": 12.993353843688965, "learning_rate": 3.917476774062007e-06, "loss": 0.5327, "step": 21865 }, { "epoch": 71.69180327868852, "grad_norm": 6.561979293823242, "learning_rate": 3.916633942216291e-06, "loss": 0.4759, "step": 21866 }, { "epoch": 71.69508196721311, "grad_norm": 5.123233795166016, "learning_rate": 3.915791178966852e-06, "loss": 0.4777, "step": 21867 }, { "epoch": 71.6983606557377, "grad_norm": 4.801011085510254, "learning_rate": 3.914948484323191e-06, "loss": 0.5101, "step": 21868 }, { "epoch": 71.70163934426229, "grad_norm": 8.478280067443848, "learning_rate": 3.914105858294815e-06, "loss": 0.5334, "step": 21869 }, { "epoch": 71.70491803278688, "grad_norm": 6.028598308563232, "learning_rate": 3.913263300891223e-06, "loss": 0.3879, "step": 21870 }, { "epoch": 71.70819672131148, "grad_norm": 4.804906368255615, "learning_rate": 3.912420812121917e-06, "loss": 0.312, "step": 21871 }, { "epoch": 71.71147540983607, "grad_norm": 5.302731513977051, "learning_rate": 3.911578391996395e-06, "loss": 0.4365, "step": 21872 }, { "epoch": 71.71475409836066, "grad_norm": 5.407123565673828, "learning_rate": 3.910736040524155e-06, "loss": 0.2519, "step": 21873 }, { "epoch": 71.71803278688525, "grad_norm": 4.139601707458496, "learning_rate": 3.9098937577147e-06, "loss": 0.4144, "step": 21874 }, { "epoch": 71.72131147540983, "grad_norm": 7.564930438995361, "learning_rate": 3.9090515435775245e-06, "loss": 0.3414, "step": 21875 }, { "epoch": 71.72459016393442, "grad_norm": 7.360818386077881, "learning_rate": 3.908209398122127e-06, "loss": 0.613, "step": 21876 }, { "epoch": 71.72786885245901, "grad_norm": 4.307245254516602, "learning_rate": 3.907367321357998e-06, "loss": 0.5281, "step": 21877 }, { "epoch": 71.73114754098361, "grad_norm": 4.395073890686035, "learning_rate": 3.90652531329464e-06, "loss": 0.6248, "step": 21878 }, { "epoch": 71.7344262295082, "grad_norm": 6.414195537567139, "learning_rate": 3.905683373941546e-06, "loss": 0.4454, "step": 21879 }, { "epoch": 71.73770491803279, "grad_norm": 4.377481460571289, "learning_rate": 3.904841503308208e-06, "loss": 0.3951, "step": 21880 }, { "epoch": 71.74098360655738, "grad_norm": 4.903448581695557, "learning_rate": 3.903999701404115e-06, "loss": 0.3494, "step": 21881 }, { "epoch": 71.74426229508197, "grad_norm": 4.617918491363525, "learning_rate": 3.903157968238769e-06, "loss": 0.2353, "step": 21882 }, { "epoch": 71.74754098360656, "grad_norm": 6.53289794921875, "learning_rate": 3.902316303821655e-06, "loss": 0.3417, "step": 21883 }, { "epoch": 71.75081967213114, "grad_norm": 4.193511486053467, "learning_rate": 3.901474708162265e-06, "loss": 0.3404, "step": 21884 }, { "epoch": 71.75409836065573, "grad_norm": 4.377703666687012, "learning_rate": 3.9006331812700845e-06, "loss": 0.4811, "step": 21885 }, { "epoch": 71.75737704918033, "grad_norm": 6.490262508392334, "learning_rate": 3.89979172315461e-06, "loss": 0.4596, "step": 21886 }, { "epoch": 71.76065573770492, "grad_norm": 4.202402114868164, "learning_rate": 3.898950333825327e-06, "loss": 0.4969, "step": 21887 }, { "epoch": 71.76393442622951, "grad_norm": 4.746401309967041, "learning_rate": 3.8981090132917185e-06, "loss": 0.3212, "step": 21888 }, { "epoch": 71.7672131147541, "grad_norm": 4.069312572479248, "learning_rate": 3.89726776156328e-06, "loss": 0.5446, "step": 21889 }, { "epoch": 71.77049180327869, "grad_norm": 9.144876480102539, "learning_rate": 3.8964265786494915e-06, "loss": 0.5186, "step": 21890 }, { "epoch": 71.77377049180328, "grad_norm": 6.256877422332764, "learning_rate": 3.8955854645598365e-06, "loss": 0.5335, "step": 21891 }, { "epoch": 71.77704918032786, "grad_norm": 5.358333587646484, "learning_rate": 3.894744419303805e-06, "loss": 0.4263, "step": 21892 }, { "epoch": 71.78032786885245, "grad_norm": 6.802393913269043, "learning_rate": 3.893903442890879e-06, "loss": 0.4618, "step": 21893 }, { "epoch": 71.78360655737706, "grad_norm": 5.01523494720459, "learning_rate": 3.89306253533054e-06, "loss": 0.3038, "step": 21894 }, { "epoch": 71.78688524590164, "grad_norm": 4.584443092346191, "learning_rate": 3.892221696632268e-06, "loss": 0.6403, "step": 21895 }, { "epoch": 71.79016393442623, "grad_norm": 5.678660869598389, "learning_rate": 3.891380926805549e-06, "loss": 0.3566, "step": 21896 }, { "epoch": 71.79344262295082, "grad_norm": 6.1703715324401855, "learning_rate": 3.890540225859862e-06, "loss": 0.423, "step": 21897 }, { "epoch": 71.79672131147541, "grad_norm": 6.5308427810668945, "learning_rate": 3.889699593804686e-06, "loss": 0.4646, "step": 21898 }, { "epoch": 71.8, "grad_norm": 11.381186485290527, "learning_rate": 3.888859030649498e-06, "loss": 0.7914, "step": 21899 }, { "epoch": 71.80327868852459, "grad_norm": 4.59450101852417, "learning_rate": 3.88801853640378e-06, "loss": 0.3041, "step": 21900 }, { "epoch": 71.80655737704917, "grad_norm": 5.069129943847656, "learning_rate": 3.887178111077009e-06, "loss": 0.4475, "step": 21901 }, { "epoch": 71.80983606557378, "grad_norm": 26.117876052856445, "learning_rate": 3.88633775467866e-06, "loss": 0.316, "step": 21902 }, { "epoch": 71.81311475409836, "grad_norm": 6.4855475425720215, "learning_rate": 3.885497467218206e-06, "loss": 0.5006, "step": 21903 }, { "epoch": 71.81639344262295, "grad_norm": 7.614099979400635, "learning_rate": 3.884657248705129e-06, "loss": 0.6097, "step": 21904 }, { "epoch": 71.81967213114754, "grad_norm": 4.925731658935547, "learning_rate": 3.8838170991489e-06, "loss": 0.3759, "step": 21905 }, { "epoch": 71.82295081967213, "grad_norm": 6.502557277679443, "learning_rate": 3.882977018558993e-06, "loss": 0.5053, "step": 21906 }, { "epoch": 71.82622950819672, "grad_norm": 4.496484756469727, "learning_rate": 3.882137006944876e-06, "loss": 0.2974, "step": 21907 }, { "epoch": 71.8295081967213, "grad_norm": 4.299463748931885, "learning_rate": 3.88129706431603e-06, "loss": 0.2505, "step": 21908 }, { "epoch": 71.8327868852459, "grad_norm": 4.329429626464844, "learning_rate": 3.88045719068192e-06, "loss": 0.4796, "step": 21909 }, { "epoch": 71.8360655737705, "grad_norm": 4.8930583000183105, "learning_rate": 3.879617386052018e-06, "loss": 0.4555, "step": 21910 }, { "epoch": 71.83934426229509, "grad_norm": 3.83992075920105, "learning_rate": 3.878777650435794e-06, "loss": 0.1884, "step": 21911 }, { "epoch": 71.84262295081967, "grad_norm": 4.482945442199707, "learning_rate": 3.877937983842712e-06, "loss": 0.4968, "step": 21912 }, { "epoch": 71.84590163934426, "grad_norm": 4.515425205230713, "learning_rate": 3.8770983862822496e-06, "loss": 0.3454, "step": 21913 }, { "epoch": 71.84918032786885, "grad_norm": 4.449558734893799, "learning_rate": 3.8762588577638685e-06, "loss": 0.3776, "step": 21914 }, { "epoch": 71.85245901639344, "grad_norm": 4.395892143249512, "learning_rate": 3.8754193982970354e-06, "loss": 0.3696, "step": 21915 }, { "epoch": 71.85573770491803, "grad_norm": 5.359565258026123, "learning_rate": 3.874580007891214e-06, "loss": 0.2756, "step": 21916 }, { "epoch": 71.85901639344263, "grad_norm": 5.826501369476318, "learning_rate": 3.873740686555875e-06, "loss": 0.612, "step": 21917 }, { "epoch": 71.86229508196722, "grad_norm": 4.627754211425781, "learning_rate": 3.872901434300479e-06, "loss": 0.4411, "step": 21918 }, { "epoch": 71.8655737704918, "grad_norm": 4.8071770668029785, "learning_rate": 3.87206225113449e-06, "loss": 0.3694, "step": 21919 }, { "epoch": 71.8688524590164, "grad_norm": 7.282412052154541, "learning_rate": 3.871223137067368e-06, "loss": 0.4352, "step": 21920 }, { "epoch": 71.87213114754098, "grad_norm": 5.001422882080078, "learning_rate": 3.87038409210858e-06, "loss": 0.5185, "step": 21921 }, { "epoch": 71.87540983606557, "grad_norm": 3.751185894012451, "learning_rate": 3.869545116267584e-06, "loss": 0.4447, "step": 21922 }, { "epoch": 71.87868852459016, "grad_norm": 4.535803318023682, "learning_rate": 3.868706209553843e-06, "loss": 0.2028, "step": 21923 }, { "epoch": 71.88196721311475, "grad_norm": 4.487363338470459, "learning_rate": 3.867867371976812e-06, "loss": 0.3624, "step": 21924 }, { "epoch": 71.88524590163935, "grad_norm": 7.545395374298096, "learning_rate": 3.86702860354595e-06, "loss": 0.5137, "step": 21925 }, { "epoch": 71.88852459016394, "grad_norm": 5.579014301300049, "learning_rate": 3.86618990427072e-06, "loss": 0.4507, "step": 21926 }, { "epoch": 71.89180327868853, "grad_norm": 5.607293605804443, "learning_rate": 3.865351274160578e-06, "loss": 0.4311, "step": 21927 }, { "epoch": 71.89508196721312, "grad_norm": 5.41832971572876, "learning_rate": 3.864512713224979e-06, "loss": 0.5959, "step": 21928 }, { "epoch": 71.8983606557377, "grad_norm": 4.916661739349365, "learning_rate": 3.863674221473372e-06, "loss": 0.3392, "step": 21929 }, { "epoch": 71.90163934426229, "grad_norm": 4.454437255859375, "learning_rate": 3.862835798915224e-06, "loss": 0.6215, "step": 21930 }, { "epoch": 71.90491803278688, "grad_norm": 5.2284722328186035, "learning_rate": 3.861997445559983e-06, "loss": 0.3651, "step": 21931 }, { "epoch": 71.90819672131147, "grad_norm": 5.295957565307617, "learning_rate": 3.861159161417103e-06, "loss": 0.427, "step": 21932 }, { "epoch": 71.91147540983607, "grad_norm": 7.25903844833374, "learning_rate": 3.860320946496032e-06, "loss": 0.5411, "step": 21933 }, { "epoch": 71.91475409836066, "grad_norm": 4.919764518737793, "learning_rate": 3.85948280080623e-06, "loss": 0.4369, "step": 21934 }, { "epoch": 71.91803278688525, "grad_norm": 5.34415340423584, "learning_rate": 3.8586447243571445e-06, "loss": 0.3487, "step": 21935 }, { "epoch": 71.92131147540984, "grad_norm": 5.876136779785156, "learning_rate": 3.857806717158224e-06, "loss": 0.3149, "step": 21936 }, { "epoch": 71.92459016393443, "grad_norm": 4.965918064117432, "learning_rate": 3.856968779218919e-06, "loss": 0.5175, "step": 21937 }, { "epoch": 71.92786885245901, "grad_norm": 6.077529430389404, "learning_rate": 3.856130910548676e-06, "loss": 0.4176, "step": 21938 }, { "epoch": 71.9311475409836, "grad_norm": 5.64809513092041, "learning_rate": 3.855293111156948e-06, "loss": 0.3887, "step": 21939 }, { "epoch": 71.93442622950819, "grad_norm": 5.102517604827881, "learning_rate": 3.854455381053178e-06, "loss": 0.4726, "step": 21940 }, { "epoch": 71.9377049180328, "grad_norm": 6.98372220993042, "learning_rate": 3.853617720246812e-06, "loss": 0.5053, "step": 21941 }, { "epoch": 71.94098360655738, "grad_norm": 8.492931365966797, "learning_rate": 3.852780128747298e-06, "loss": 0.2835, "step": 21942 }, { "epoch": 71.94426229508197, "grad_norm": 5.057751178741455, "learning_rate": 3.851942606564081e-06, "loss": 0.6757, "step": 21943 }, { "epoch": 71.94754098360656, "grad_norm": 4.6473774909973145, "learning_rate": 3.851105153706599e-06, "loss": 0.3115, "step": 21944 }, { "epoch": 71.95081967213115, "grad_norm": 4.939947605133057, "learning_rate": 3.850267770184304e-06, "loss": 0.3946, "step": 21945 }, { "epoch": 71.95409836065573, "grad_norm": 4.730437278747559, "learning_rate": 3.849430456006633e-06, "loss": 0.5168, "step": 21946 }, { "epoch": 71.95737704918032, "grad_norm": 7.555145740509033, "learning_rate": 3.848593211183026e-06, "loss": 0.3698, "step": 21947 }, { "epoch": 71.96065573770491, "grad_norm": 4.471724510192871, "learning_rate": 3.8477560357229304e-06, "loss": 0.4621, "step": 21948 }, { "epoch": 71.96393442622951, "grad_norm": 6.840083599090576, "learning_rate": 3.846918929635781e-06, "loss": 0.3903, "step": 21949 }, { "epoch": 71.9672131147541, "grad_norm": 4.252371311187744, "learning_rate": 3.84608189293102e-06, "loss": 0.4324, "step": 21950 }, { "epoch": 71.97049180327869, "grad_norm": 4.714483261108398, "learning_rate": 3.845244925618078e-06, "loss": 0.6469, "step": 21951 }, { "epoch": 71.97377049180328, "grad_norm": 4.588481426239014, "learning_rate": 3.844408027706405e-06, "loss": 0.274, "step": 21952 }, { "epoch": 71.97704918032787, "grad_norm": 4.473246097564697, "learning_rate": 3.843571199205429e-06, "loss": 0.2894, "step": 21953 }, { "epoch": 71.98032786885246, "grad_norm": 5.383155345916748, "learning_rate": 3.842734440124591e-06, "loss": 0.3449, "step": 21954 }, { "epoch": 71.98360655737704, "grad_norm": 4.697794437408447, "learning_rate": 3.8418977504733204e-06, "loss": 0.515, "step": 21955 }, { "epoch": 71.98688524590163, "grad_norm": 4.491223335266113, "learning_rate": 3.841061130261058e-06, "loss": 0.4008, "step": 21956 }, { "epoch": 71.99016393442623, "grad_norm": 4.487462043762207, "learning_rate": 3.840224579497235e-06, "loss": 0.4565, "step": 21957 }, { "epoch": 71.99344262295082, "grad_norm": 4.65437650680542, "learning_rate": 3.839388098191285e-06, "loss": 0.4743, "step": 21958 }, { "epoch": 71.99672131147541, "grad_norm": 5.1953511238098145, "learning_rate": 3.838551686352636e-06, "loss": 0.5147, "step": 21959 }, { "epoch": 72.0, "grad_norm": 4.288629055023193, "learning_rate": 3.837715343990727e-06, "loss": 0.2528, "step": 21960 }, { "epoch": 72.00327868852459, "grad_norm": 4.8076982498168945, "learning_rate": 3.8368790711149835e-06, "loss": 0.4925, "step": 21961 }, { "epoch": 72.00655737704918, "grad_norm": 8.294493675231934, "learning_rate": 3.836042867734838e-06, "loss": 0.4413, "step": 21962 }, { "epoch": 72.00983606557377, "grad_norm": 4.265605926513672, "learning_rate": 3.835206733859718e-06, "loss": 0.3895, "step": 21963 }, { "epoch": 72.01311475409837, "grad_norm": 3.951030731201172, "learning_rate": 3.834370669499047e-06, "loss": 0.5703, "step": 21964 }, { "epoch": 72.01639344262296, "grad_norm": 5.150183200836182, "learning_rate": 3.833534674662261e-06, "loss": 0.5838, "step": 21965 }, { "epoch": 72.01967213114754, "grad_norm": 3.9414989948272705, "learning_rate": 3.832698749358784e-06, "loss": 0.2059, "step": 21966 }, { "epoch": 72.02295081967213, "grad_norm": 5.909923076629639, "learning_rate": 3.8318628935980405e-06, "loss": 0.3752, "step": 21967 }, { "epoch": 72.02622950819672, "grad_norm": 4.492208957672119, "learning_rate": 3.8310271073894535e-06, "loss": 0.3199, "step": 21968 }, { "epoch": 72.02950819672131, "grad_norm": 4.844061851501465, "learning_rate": 3.830191390742453e-06, "loss": 0.3407, "step": 21969 }, { "epoch": 72.0327868852459, "grad_norm": 5.356202602386475, "learning_rate": 3.8293557436664584e-06, "loss": 0.4454, "step": 21970 }, { "epoch": 72.03606557377049, "grad_norm": 5.061557292938232, "learning_rate": 3.828520166170895e-06, "loss": 0.5919, "step": 21971 }, { "epoch": 72.03934426229509, "grad_norm": 5.314749717712402, "learning_rate": 3.82768465826518e-06, "loss": 0.4278, "step": 21972 }, { "epoch": 72.04262295081968, "grad_norm": 4.685070037841797, "learning_rate": 3.826849219958741e-06, "loss": 0.316, "step": 21973 }, { "epoch": 72.04590163934427, "grad_norm": 4.8531084060668945, "learning_rate": 3.826013851260994e-06, "loss": 0.5282, "step": 21974 }, { "epoch": 72.04918032786885, "grad_norm": 4.1078596115112305, "learning_rate": 3.825178552181362e-06, "loss": 0.3309, "step": 21975 }, { "epoch": 72.05245901639344, "grad_norm": 4.809637069702148, "learning_rate": 3.8243433227292625e-06, "loss": 0.6111, "step": 21976 }, { "epoch": 72.05573770491803, "grad_norm": 7.047455310821533, "learning_rate": 3.823508162914108e-06, "loss": 0.7585, "step": 21977 }, { "epoch": 72.05901639344262, "grad_norm": 4.424952030181885, "learning_rate": 3.822673072745325e-06, "loss": 0.3403, "step": 21978 }, { "epoch": 72.0622950819672, "grad_norm": 5.539525508880615, "learning_rate": 3.8218380522323275e-06, "loss": 0.3386, "step": 21979 }, { "epoch": 72.06557377049181, "grad_norm": 4.010441780090332, "learning_rate": 3.821003101384527e-06, "loss": 0.2194, "step": 21980 }, { "epoch": 72.0688524590164, "grad_norm": 4.7562408447265625, "learning_rate": 3.82016822021134e-06, "loss": 0.3201, "step": 21981 }, { "epoch": 72.07213114754099, "grad_norm": 4.343213081359863, "learning_rate": 3.819333408722184e-06, "loss": 0.4365, "step": 21982 }, { "epoch": 72.07540983606557, "grad_norm": 4.056382656097412, "learning_rate": 3.81849866692647e-06, "loss": 0.4444, "step": 21983 }, { "epoch": 72.07868852459016, "grad_norm": 7.1475138664245605, "learning_rate": 3.817663994833611e-06, "loss": 0.4005, "step": 21984 }, { "epoch": 72.08196721311475, "grad_norm": 4.181141376495361, "learning_rate": 3.816829392453016e-06, "loss": 0.262, "step": 21985 }, { "epoch": 72.08524590163934, "grad_norm": 3.9778358936309814, "learning_rate": 3.8159948597941e-06, "loss": 0.4822, "step": 21986 }, { "epoch": 72.08852459016393, "grad_norm": 7.276113986968994, "learning_rate": 3.815160396866272e-06, "loss": 0.4015, "step": 21987 }, { "epoch": 72.09180327868853, "grad_norm": 5.311120986938477, "learning_rate": 3.814326003678942e-06, "loss": 0.3851, "step": 21988 }, { "epoch": 72.09508196721312, "grad_norm": 4.895600318908691, "learning_rate": 3.8134916802415178e-06, "loss": 0.5577, "step": 21989 }, { "epoch": 72.09836065573771, "grad_norm": 4.640310287475586, "learning_rate": 3.812657426563403e-06, "loss": 0.2971, "step": 21990 }, { "epoch": 72.1016393442623, "grad_norm": 4.887363910675049, "learning_rate": 3.8118232426540135e-06, "loss": 0.3279, "step": 21991 }, { "epoch": 72.10491803278688, "grad_norm": 4.942826747894287, "learning_rate": 3.8109891285227497e-06, "loss": 0.2686, "step": 21992 }, { "epoch": 72.10819672131147, "grad_norm": 6.088970184326172, "learning_rate": 3.810155084179016e-06, "loss": 0.3027, "step": 21993 }, { "epoch": 72.11147540983606, "grad_norm": 4.646745681762695, "learning_rate": 3.8093211096322223e-06, "loss": 0.2, "step": 21994 }, { "epoch": 72.11475409836065, "grad_norm": 3.825620412826538, "learning_rate": 3.808487204891771e-06, "loss": 0.4758, "step": 21995 }, { "epoch": 72.11803278688525, "grad_norm": 5.55680513381958, "learning_rate": 3.8076533699670627e-06, "loss": 0.5727, "step": 21996 }, { "epoch": 72.12131147540984, "grad_norm": 5.5421929359436035, "learning_rate": 3.8068196048674986e-06, "loss": 0.3317, "step": 21997 }, { "epoch": 72.12459016393443, "grad_norm": 6.31765604019165, "learning_rate": 3.8059859096024853e-06, "loss": 0.3702, "step": 21998 }, { "epoch": 72.12786885245902, "grad_norm": 5.296130657196045, "learning_rate": 3.8051522841814215e-06, "loss": 0.4767, "step": 21999 }, { "epoch": 72.1311475409836, "grad_norm": 5.474991321563721, "learning_rate": 3.804318728613704e-06, "loss": 0.4945, "step": 22000 }, { "epoch": 72.1344262295082, "grad_norm": 7.271330833435059, "learning_rate": 3.8034852429087365e-06, "loss": 0.4415, "step": 22001 }, { "epoch": 72.13770491803278, "grad_norm": 4.61482572555542, "learning_rate": 3.8026518270759173e-06, "loss": 0.2577, "step": 22002 }, { "epoch": 72.14098360655737, "grad_norm": 5.628130912780762, "learning_rate": 3.8018184811246386e-06, "loss": 0.4745, "step": 22003 }, { "epoch": 72.14426229508197, "grad_norm": 5.629569053649902, "learning_rate": 3.8009852050643035e-06, "loss": 0.2955, "step": 22004 }, { "epoch": 72.14754098360656, "grad_norm": 5.552751541137695, "learning_rate": 3.8001519989043057e-06, "loss": 0.5428, "step": 22005 }, { "epoch": 72.15081967213115, "grad_norm": 4.539881229400635, "learning_rate": 3.799318862654041e-06, "loss": 0.3284, "step": 22006 }, { "epoch": 72.15409836065574, "grad_norm": 6.116327285766602, "learning_rate": 3.7984857963228994e-06, "loss": 0.4312, "step": 22007 }, { "epoch": 72.15737704918033, "grad_norm": 5.278595924377441, "learning_rate": 3.7976527999202827e-06, "loss": 0.749, "step": 22008 }, { "epoch": 72.16065573770491, "grad_norm": 4.399913787841797, "learning_rate": 3.796819873455578e-06, "loss": 0.2032, "step": 22009 }, { "epoch": 72.1639344262295, "grad_norm": 4.589896202087402, "learning_rate": 3.7959870169381805e-06, "loss": 0.3227, "step": 22010 }, { "epoch": 72.1672131147541, "grad_norm": 7.024770736694336, "learning_rate": 3.795154230377476e-06, "loss": 0.3823, "step": 22011 }, { "epoch": 72.1704918032787, "grad_norm": 4.2233734130859375, "learning_rate": 3.7943215137828616e-06, "loss": 0.3539, "step": 22012 }, { "epoch": 72.17377049180328, "grad_norm": 5.256278991699219, "learning_rate": 3.793488867163725e-06, "loss": 0.3307, "step": 22013 }, { "epoch": 72.17704918032787, "grad_norm": 6.312436103820801, "learning_rate": 3.792656290529455e-06, "loss": 0.3713, "step": 22014 }, { "epoch": 72.18032786885246, "grad_norm": 3.950847864151001, "learning_rate": 3.791823783889439e-06, "loss": 0.5431, "step": 22015 }, { "epoch": 72.18360655737705, "grad_norm": 25.17214012145996, "learning_rate": 3.7909913472530603e-06, "loss": 0.3388, "step": 22016 }, { "epoch": 72.18688524590164, "grad_norm": 5.139309883117676, "learning_rate": 3.7901589806297144e-06, "loss": 0.5676, "step": 22017 }, { "epoch": 72.19016393442622, "grad_norm": 4.42738676071167, "learning_rate": 3.7893266840287823e-06, "loss": 0.6192, "step": 22018 }, { "epoch": 72.19344262295083, "grad_norm": 4.688635349273682, "learning_rate": 3.7884944574596496e-06, "loss": 0.4437, "step": 22019 }, { "epoch": 72.19672131147541, "grad_norm": 4.996987819671631, "learning_rate": 3.787662300931697e-06, "loss": 0.3448, "step": 22020 }, { "epoch": 72.2, "grad_norm": 4.370173931121826, "learning_rate": 3.7868302144543146e-06, "loss": 0.5356, "step": 22021 }, { "epoch": 72.20327868852459, "grad_norm": 4.000861167907715, "learning_rate": 3.785998198036881e-06, "loss": 0.309, "step": 22022 }, { "epoch": 72.20655737704918, "grad_norm": 6.101192474365234, "learning_rate": 3.7851662516887787e-06, "loss": 0.2852, "step": 22023 }, { "epoch": 72.20983606557377, "grad_norm": 6.512879371643066, "learning_rate": 3.7843343754193853e-06, "loss": 0.3814, "step": 22024 }, { "epoch": 72.21311475409836, "grad_norm": 4.362194538116455, "learning_rate": 3.7835025692380876e-06, "loss": 0.6072, "step": 22025 }, { "epoch": 72.21639344262294, "grad_norm": 4.5511603355407715, "learning_rate": 3.7826708331542627e-06, "loss": 0.4858, "step": 22026 }, { "epoch": 72.21967213114755, "grad_norm": 4.402836799621582, "learning_rate": 3.7818391671772893e-06, "loss": 0.3564, "step": 22027 }, { "epoch": 72.22295081967214, "grad_norm": 4.818532466888428, "learning_rate": 3.781007571316543e-06, "loss": 0.386, "step": 22028 }, { "epoch": 72.22622950819672, "grad_norm": 4.879972457885742, "learning_rate": 3.7801760455813997e-06, "loss": 0.3363, "step": 22029 }, { "epoch": 72.22950819672131, "grad_norm": 5.338627338409424, "learning_rate": 3.779344589981242e-06, "loss": 0.5457, "step": 22030 }, { "epoch": 72.2327868852459, "grad_norm": 7.196086883544922, "learning_rate": 3.778513204525441e-06, "loss": 0.3794, "step": 22031 }, { "epoch": 72.23606557377049, "grad_norm": 5.005056858062744, "learning_rate": 3.7776818892233737e-06, "loss": 0.5651, "step": 22032 }, { "epoch": 72.23934426229508, "grad_norm": 4.56427526473999, "learning_rate": 3.776850644084409e-06, "loss": 0.4971, "step": 22033 }, { "epoch": 72.24262295081967, "grad_norm": 4.086126327514648, "learning_rate": 3.776019469117926e-06, "loss": 0.2937, "step": 22034 }, { "epoch": 72.24590163934427, "grad_norm": 8.259854316711426, "learning_rate": 3.7751883643332965e-06, "loss": 0.5986, "step": 22035 }, { "epoch": 72.24918032786886, "grad_norm": 6.928075313568115, "learning_rate": 3.7743573297398896e-06, "loss": 0.341, "step": 22036 }, { "epoch": 72.25245901639344, "grad_norm": 7.839446544647217, "learning_rate": 3.7735263653470732e-06, "loss": 0.4207, "step": 22037 }, { "epoch": 72.25573770491803, "grad_norm": 5.391711235046387, "learning_rate": 3.7726954711642237e-06, "loss": 0.4284, "step": 22038 }, { "epoch": 72.25901639344262, "grad_norm": 4.431018829345703, "learning_rate": 3.771864647200709e-06, "loss": 0.6634, "step": 22039 }, { "epoch": 72.26229508196721, "grad_norm": 5.444554805755615, "learning_rate": 3.7710338934658952e-06, "loss": 0.3026, "step": 22040 }, { "epoch": 72.2655737704918, "grad_norm": 6.451452255249023, "learning_rate": 3.770203209969151e-06, "loss": 0.3086, "step": 22041 }, { "epoch": 72.26885245901639, "grad_norm": 7.337066650390625, "learning_rate": 3.769372596719839e-06, "loss": 0.4034, "step": 22042 }, { "epoch": 72.27213114754099, "grad_norm": 3.9440934658050537, "learning_rate": 3.768542053727333e-06, "loss": 0.2672, "step": 22043 }, { "epoch": 72.27540983606558, "grad_norm": 4.425142288208008, "learning_rate": 3.7677115810009956e-06, "loss": 0.4606, "step": 22044 }, { "epoch": 72.27868852459017, "grad_norm": 5.502600193023682, "learning_rate": 3.766881178550189e-06, "loss": 0.2157, "step": 22045 }, { "epoch": 72.28196721311475, "grad_norm": 4.416711807250977, "learning_rate": 3.766050846384274e-06, "loss": 0.248, "step": 22046 }, { "epoch": 72.28524590163934, "grad_norm": 5.5267486572265625, "learning_rate": 3.765220584512621e-06, "loss": 0.2373, "step": 22047 }, { "epoch": 72.28852459016393, "grad_norm": 4.4737725257873535, "learning_rate": 3.764390392944589e-06, "loss": 0.5456, "step": 22048 }, { "epoch": 72.29180327868852, "grad_norm": 4.524407386779785, "learning_rate": 3.763560271689536e-06, "loss": 0.4185, "step": 22049 }, { "epoch": 72.29508196721312, "grad_norm": 5.381035804748535, "learning_rate": 3.7627302207568272e-06, "loss": 0.2983, "step": 22050 }, { "epoch": 72.29836065573771, "grad_norm": 4.479395866394043, "learning_rate": 3.76190024015582e-06, "loss": 0.355, "step": 22051 }, { "epoch": 72.3016393442623, "grad_norm": 3.838733196258545, "learning_rate": 3.7610703298958717e-06, "loss": 0.378, "step": 22052 }, { "epoch": 72.30491803278689, "grad_norm": 5.98354434967041, "learning_rate": 3.7602404899863455e-06, "loss": 0.4236, "step": 22053 }, { "epoch": 72.30819672131148, "grad_norm": 7.6683878898620605, "learning_rate": 3.759410720436595e-06, "loss": 0.3674, "step": 22054 }, { "epoch": 72.31147540983606, "grad_norm": 4.3134260177612305, "learning_rate": 3.7585810212559738e-06, "loss": 0.2701, "step": 22055 }, { "epoch": 72.31475409836065, "grad_norm": 5.0577287673950195, "learning_rate": 3.7577513924538446e-06, "loss": 0.3043, "step": 22056 }, { "epoch": 72.31803278688524, "grad_norm": 5.51577091217041, "learning_rate": 3.7569218340395575e-06, "loss": 0.3292, "step": 22057 }, { "epoch": 72.32131147540984, "grad_norm": 5.889678478240967, "learning_rate": 3.7560923460224696e-06, "loss": 0.5719, "step": 22058 }, { "epoch": 72.32459016393443, "grad_norm": 4.172152042388916, "learning_rate": 3.755262928411928e-06, "loss": 0.2385, "step": 22059 }, { "epoch": 72.32786885245902, "grad_norm": 4.667776107788086, "learning_rate": 3.7544335812172938e-06, "loss": 0.4772, "step": 22060 }, { "epoch": 72.33114754098361, "grad_norm": 4.54231595993042, "learning_rate": 3.753604304447915e-06, "loss": 0.4601, "step": 22061 }, { "epoch": 72.3344262295082, "grad_norm": 4.798099517822266, "learning_rate": 3.7527750981131415e-06, "loss": 0.3322, "step": 22062 }, { "epoch": 72.33770491803278, "grad_norm": 5.497802257537842, "learning_rate": 3.751945962222322e-06, "loss": 0.5129, "step": 22063 }, { "epoch": 72.34098360655737, "grad_norm": 4.282806873321533, "learning_rate": 3.751116896784811e-06, "loss": 0.4586, "step": 22064 }, { "epoch": 72.34426229508196, "grad_norm": 4.400657653808594, "learning_rate": 3.7502879018099536e-06, "loss": 0.3425, "step": 22065 }, { "epoch": 72.34754098360656, "grad_norm": 5.381537437438965, "learning_rate": 3.749458977307099e-06, "loss": 0.3481, "step": 22066 }, { "epoch": 72.35081967213115, "grad_norm": 6.119892597198486, "learning_rate": 3.7486301232855925e-06, "loss": 0.6219, "step": 22067 }, { "epoch": 72.35409836065574, "grad_norm": 6.497359752655029, "learning_rate": 3.7478013397547786e-06, "loss": 0.6649, "step": 22068 }, { "epoch": 72.35737704918033, "grad_norm": 6.029017448425293, "learning_rate": 3.746972626724008e-06, "loss": 0.2896, "step": 22069 }, { "epoch": 72.36065573770492, "grad_norm": 4.669336318969727, "learning_rate": 3.7461439842026225e-06, "loss": 0.4279, "step": 22070 }, { "epoch": 72.3639344262295, "grad_norm": 5.5112624168396, "learning_rate": 3.745315412199967e-06, "loss": 0.2251, "step": 22071 }, { "epoch": 72.3672131147541, "grad_norm": 5.634500980377197, "learning_rate": 3.7444869107253787e-06, "loss": 0.4282, "step": 22072 }, { "epoch": 72.37049180327868, "grad_norm": 5.194241046905518, "learning_rate": 3.743658479788209e-06, "loss": 0.4011, "step": 22073 }, { "epoch": 72.37377049180328, "grad_norm": 5.486674785614014, "learning_rate": 3.7428301193977947e-06, "loss": 0.4102, "step": 22074 }, { "epoch": 72.37704918032787, "grad_norm": 4.2691473960876465, "learning_rate": 3.7420018295634765e-06, "loss": 0.5729, "step": 22075 }, { "epoch": 72.38032786885246, "grad_norm": 4.921425819396973, "learning_rate": 3.7411736102945905e-06, "loss": 0.3484, "step": 22076 }, { "epoch": 72.38360655737705, "grad_norm": 6.3481831550598145, "learning_rate": 3.740345461600483e-06, "loss": 0.4783, "step": 22077 }, { "epoch": 72.38688524590164, "grad_norm": 6.118326187133789, "learning_rate": 3.7395173834904897e-06, "loss": 0.2722, "step": 22078 }, { "epoch": 72.39016393442623, "grad_norm": 4.851748943328857, "learning_rate": 3.7386893759739464e-06, "loss": 0.4257, "step": 22079 }, { "epoch": 72.39344262295081, "grad_norm": 6.127970218658447, "learning_rate": 3.737861439060191e-06, "loss": 0.2451, "step": 22080 }, { "epoch": 72.3967213114754, "grad_norm": 5.325728893280029, "learning_rate": 3.737033572758555e-06, "loss": 0.6124, "step": 22081 }, { "epoch": 72.4, "grad_norm": 6.773910999298096, "learning_rate": 3.736205777078381e-06, "loss": 0.5065, "step": 22082 }, { "epoch": 72.4032786885246, "grad_norm": 3.969177484512329, "learning_rate": 3.7353780520290006e-06, "loss": 0.3261, "step": 22083 }, { "epoch": 72.40655737704918, "grad_norm": 7.25813102722168, "learning_rate": 3.734550397619745e-06, "loss": 0.2879, "step": 22084 }, { "epoch": 72.40983606557377, "grad_norm": 4.964039325714111, "learning_rate": 3.7337228138599447e-06, "loss": 0.4887, "step": 22085 }, { "epoch": 72.41311475409836, "grad_norm": 5.672425746917725, "learning_rate": 3.7328953007589387e-06, "loss": 0.3881, "step": 22086 }, { "epoch": 72.41639344262295, "grad_norm": 4.773061275482178, "learning_rate": 3.732067858326054e-06, "loss": 0.4348, "step": 22087 }, { "epoch": 72.41967213114754, "grad_norm": 4.689714431762695, "learning_rate": 3.731240486570622e-06, "loss": 0.5437, "step": 22088 }, { "epoch": 72.42295081967212, "grad_norm": 23.54595375061035, "learning_rate": 3.7304131855019663e-06, "loss": 0.3989, "step": 22089 }, { "epoch": 72.42622950819673, "grad_norm": 5.445146083831787, "learning_rate": 3.7295859551294256e-06, "loss": 0.3742, "step": 22090 }, { "epoch": 72.42950819672132, "grad_norm": 4.255313873291016, "learning_rate": 3.7287587954623228e-06, "loss": 0.4062, "step": 22091 }, { "epoch": 72.4327868852459, "grad_norm": 13.916882514953613, "learning_rate": 3.7279317065099854e-06, "loss": 0.5471, "step": 22092 }, { "epoch": 72.43606557377049, "grad_norm": 4.274353504180908, "learning_rate": 3.7271046882817375e-06, "loss": 0.4263, "step": 22093 }, { "epoch": 72.43934426229508, "grad_norm": 5.897439002990723, "learning_rate": 3.7262777407869046e-06, "loss": 0.473, "step": 22094 }, { "epoch": 72.44262295081967, "grad_norm": 4.52950382232666, "learning_rate": 3.7254508640348162e-06, "loss": 0.4408, "step": 22095 }, { "epoch": 72.44590163934426, "grad_norm": 11.641874313354492, "learning_rate": 3.7246240580347924e-06, "loss": 0.1769, "step": 22096 }, { "epoch": 72.44918032786886, "grad_norm": 4.268592834472656, "learning_rate": 3.723797322796159e-06, "loss": 0.2801, "step": 22097 }, { "epoch": 72.45245901639345, "grad_norm": 6.5396928787231445, "learning_rate": 3.722970658328231e-06, "loss": 0.4247, "step": 22098 }, { "epoch": 72.45573770491804, "grad_norm": 11.203999519348145, "learning_rate": 3.7221440646403396e-06, "loss": 0.2836, "step": 22099 }, { "epoch": 72.45901639344262, "grad_norm": 7.330302715301514, "learning_rate": 3.7213175417418012e-06, "loss": 0.3717, "step": 22100 }, { "epoch": 72.46229508196721, "grad_norm": 5.033029556274414, "learning_rate": 3.7204910896419353e-06, "loss": 0.279, "step": 22101 }, { "epoch": 72.4655737704918, "grad_norm": 4.740677356719971, "learning_rate": 3.7196647083500593e-06, "loss": 0.2966, "step": 22102 }, { "epoch": 72.46885245901639, "grad_norm": 5.960200786590576, "learning_rate": 3.718838397875496e-06, "loss": 0.2914, "step": 22103 }, { "epoch": 72.47213114754098, "grad_norm": 6.3977179527282715, "learning_rate": 3.718012158227561e-06, "loss": 0.4528, "step": 22104 }, { "epoch": 72.47540983606558, "grad_norm": 6.2842278480529785, "learning_rate": 3.717185989415566e-06, "loss": 0.6102, "step": 22105 }, { "epoch": 72.47868852459017, "grad_norm": 4.3862433433532715, "learning_rate": 3.7163598914488364e-06, "loss": 0.3084, "step": 22106 }, { "epoch": 72.48196721311476, "grad_norm": 4.993256568908691, "learning_rate": 3.715533864336681e-06, "loss": 0.2384, "step": 22107 }, { "epoch": 72.48524590163935, "grad_norm": 4.939313888549805, "learning_rate": 3.714707908088413e-06, "loss": 0.2382, "step": 22108 }, { "epoch": 72.48852459016393, "grad_norm": 4.9136857986450195, "learning_rate": 3.713882022713351e-06, "loss": 0.1535, "step": 22109 }, { "epoch": 72.49180327868852, "grad_norm": 6.070174217224121, "learning_rate": 3.7130562082208054e-06, "loss": 0.3651, "step": 22110 }, { "epoch": 72.49508196721311, "grad_norm": 4.508566856384277, "learning_rate": 3.7122304646200846e-06, "loss": 0.4029, "step": 22111 }, { "epoch": 72.4983606557377, "grad_norm": 5.552614688873291, "learning_rate": 3.7114047919205066e-06, "loss": 0.3741, "step": 22112 }, { "epoch": 72.5016393442623, "grad_norm": 16.8448486328125, "learning_rate": 3.710579190131378e-06, "loss": 0.5327, "step": 22113 }, { "epoch": 72.50491803278689, "grad_norm": 5.04653787612915, "learning_rate": 3.7097536592620086e-06, "loss": 0.5308, "step": 22114 }, { "epoch": 72.50819672131148, "grad_norm": 4.899077415466309, "learning_rate": 3.708928199321703e-06, "loss": 0.4086, "step": 22115 }, { "epoch": 72.51147540983607, "grad_norm": 5.125030994415283, "learning_rate": 3.708102810319777e-06, "loss": 0.315, "step": 22116 }, { "epoch": 72.51475409836065, "grad_norm": 4.965371608734131, "learning_rate": 3.707277492265533e-06, "loss": 0.2997, "step": 22117 }, { "epoch": 72.51803278688524, "grad_norm": 5.296302795410156, "learning_rate": 3.7064522451682782e-06, "loss": 0.4243, "step": 22118 }, { "epoch": 72.52131147540983, "grad_norm": 5.1962714195251465, "learning_rate": 3.7056270690373186e-06, "loss": 0.6031, "step": 22119 }, { "epoch": 72.52459016393442, "grad_norm": 5.737934112548828, "learning_rate": 3.7048019638819545e-06, "loss": 0.411, "step": 22120 }, { "epoch": 72.52786885245902, "grad_norm": 5.088998794555664, "learning_rate": 3.7039769297114968e-06, "loss": 0.3869, "step": 22121 }, { "epoch": 72.53114754098361, "grad_norm": 5.669374942779541, "learning_rate": 3.7031519665352456e-06, "loss": 0.5581, "step": 22122 }, { "epoch": 72.5344262295082, "grad_norm": 5.366802215576172, "learning_rate": 3.702327074362504e-06, "loss": 0.5063, "step": 22123 }, { "epoch": 72.53770491803279, "grad_norm": 4.72759485244751, "learning_rate": 3.701502253202568e-06, "loss": 0.3576, "step": 22124 }, { "epoch": 72.54098360655738, "grad_norm": 5.867410182952881, "learning_rate": 3.700677503064747e-06, "loss": 0.485, "step": 22125 }, { "epoch": 72.54426229508196, "grad_norm": 6.074514389038086, "learning_rate": 3.699852823958335e-06, "loss": 0.4402, "step": 22126 }, { "epoch": 72.54754098360655, "grad_norm": 4.502996921539307, "learning_rate": 3.699028215892635e-06, "loss": 0.3751, "step": 22127 }, { "epoch": 72.55081967213114, "grad_norm": 4.603289604187012, "learning_rate": 3.698203678876939e-06, "loss": 0.4036, "step": 22128 }, { "epoch": 72.55409836065574, "grad_norm": 5.532293796539307, "learning_rate": 3.697379212920552e-06, "loss": 0.327, "step": 22129 }, { "epoch": 72.55737704918033, "grad_norm": 5.510203838348389, "learning_rate": 3.696554818032768e-06, "loss": 0.4325, "step": 22130 }, { "epoch": 72.56065573770492, "grad_norm": 6.032840728759766, "learning_rate": 3.6957304942228822e-06, "loss": 0.4782, "step": 22131 }, { "epoch": 72.56393442622951, "grad_norm": 5.099521160125732, "learning_rate": 3.69490624150019e-06, "loss": 0.211, "step": 22132 }, { "epoch": 72.5672131147541, "grad_norm": 4.257649898529053, "learning_rate": 3.6940820598739823e-06, "loss": 0.4362, "step": 22133 }, { "epoch": 72.57049180327868, "grad_norm": 4.669092655181885, "learning_rate": 3.693257949353558e-06, "loss": 0.3249, "step": 22134 }, { "epoch": 72.57377049180327, "grad_norm": 4.945739269256592, "learning_rate": 3.69243390994821e-06, "loss": 0.4473, "step": 22135 }, { "epoch": 72.57704918032788, "grad_norm": 5.450915813446045, "learning_rate": 3.6916099416672255e-06, "loss": 0.5617, "step": 22136 }, { "epoch": 72.58032786885246, "grad_norm": 6.659397125244141, "learning_rate": 3.690786044519896e-06, "loss": 0.4682, "step": 22137 }, { "epoch": 72.58360655737705, "grad_norm": 5.365542411804199, "learning_rate": 3.689962218515517e-06, "loss": 0.319, "step": 22138 }, { "epoch": 72.58688524590164, "grad_norm": 10.209288597106934, "learning_rate": 3.6891384636633744e-06, "loss": 0.5426, "step": 22139 }, { "epoch": 72.59016393442623, "grad_norm": 5.88007926940918, "learning_rate": 3.688314779972757e-06, "loss": 0.4491, "step": 22140 }, { "epoch": 72.59344262295082, "grad_norm": 4.083964824676514, "learning_rate": 3.6874911674529535e-06, "loss": 0.3322, "step": 22141 }, { "epoch": 72.5967213114754, "grad_norm": 6.7309651374816895, "learning_rate": 3.6866676261132473e-06, "loss": 0.4119, "step": 22142 }, { "epoch": 72.6, "grad_norm": 4.945968151092529, "learning_rate": 3.685844155962931e-06, "loss": 0.4651, "step": 22143 }, { "epoch": 72.6032786885246, "grad_norm": 5.3618855476379395, "learning_rate": 3.6850207570112872e-06, "loss": 0.4609, "step": 22144 }, { "epoch": 72.60655737704919, "grad_norm": 4.796669006347656, "learning_rate": 3.6841974292676e-06, "loss": 0.3289, "step": 22145 }, { "epoch": 72.60983606557377, "grad_norm": 4.985184192657471, "learning_rate": 3.6833741727411497e-06, "loss": 0.5878, "step": 22146 }, { "epoch": 72.61311475409836, "grad_norm": 6.702362537384033, "learning_rate": 3.682550987441227e-06, "loss": 0.3904, "step": 22147 }, { "epoch": 72.61639344262295, "grad_norm": 9.086429595947266, "learning_rate": 3.68172787337711e-06, "loss": 0.5871, "step": 22148 }, { "epoch": 72.61967213114754, "grad_norm": 6.210653305053711, "learning_rate": 3.6809048305580818e-06, "loss": 0.7354, "step": 22149 }, { "epoch": 72.62295081967213, "grad_norm": 7.341944217681885, "learning_rate": 3.6800818589934174e-06, "loss": 0.4535, "step": 22150 }, { "epoch": 72.62622950819672, "grad_norm": 5.113896369934082, "learning_rate": 3.679258958692404e-06, "loss": 0.3468, "step": 22151 }, { "epoch": 72.62950819672132, "grad_norm": 4.205342769622803, "learning_rate": 3.678436129664319e-06, "loss": 0.3182, "step": 22152 }, { "epoch": 72.6327868852459, "grad_norm": 4.3581342697143555, "learning_rate": 3.677613371918439e-06, "loss": 0.2064, "step": 22153 }, { "epoch": 72.6360655737705, "grad_norm": 5.7719316482543945, "learning_rate": 3.676790685464039e-06, "loss": 0.4581, "step": 22154 }, { "epoch": 72.63934426229508, "grad_norm": 5.427889347076416, "learning_rate": 3.6759680703104016e-06, "loss": 0.2909, "step": 22155 }, { "epoch": 72.64262295081967, "grad_norm": 5.086433410644531, "learning_rate": 3.675145526466799e-06, "loss": 0.3898, "step": 22156 }, { "epoch": 72.64590163934426, "grad_norm": 5.225958824157715, "learning_rate": 3.6743230539425035e-06, "loss": 0.2119, "step": 22157 }, { "epoch": 72.64918032786885, "grad_norm": 6.03057336807251, "learning_rate": 3.6735006527467967e-06, "loss": 0.378, "step": 22158 }, { "epoch": 72.65245901639344, "grad_norm": 5.250396251678467, "learning_rate": 3.6726783228889475e-06, "loss": 0.3361, "step": 22159 }, { "epoch": 72.65573770491804, "grad_norm": 6.624331474304199, "learning_rate": 3.671856064378229e-06, "loss": 0.3928, "step": 22160 }, { "epoch": 72.65901639344263, "grad_norm": 5.58986234664917, "learning_rate": 3.6710338772239094e-06, "loss": 0.5755, "step": 22161 }, { "epoch": 72.66229508196722, "grad_norm": 8.125964164733887, "learning_rate": 3.6702117614352663e-06, "loss": 0.437, "step": 22162 }, { "epoch": 72.6655737704918, "grad_norm": 4.404723644256592, "learning_rate": 3.6693897170215674e-06, "loss": 0.2966, "step": 22163 }, { "epoch": 72.66885245901639, "grad_norm": 5.370922088623047, "learning_rate": 3.6685677439920788e-06, "loss": 0.2361, "step": 22164 }, { "epoch": 72.67213114754098, "grad_norm": 4.959332466125488, "learning_rate": 3.6677458423560754e-06, "loss": 0.393, "step": 22165 }, { "epoch": 72.67540983606557, "grad_norm": 4.839428901672363, "learning_rate": 3.66692401212282e-06, "loss": 0.348, "step": 22166 }, { "epoch": 72.67868852459016, "grad_norm": 5.28900671005249, "learning_rate": 3.6661022533015822e-06, "loss": 0.3663, "step": 22167 }, { "epoch": 72.68196721311476, "grad_norm": 5.795475006103516, "learning_rate": 3.6652805659016234e-06, "loss": 0.4761, "step": 22168 }, { "epoch": 72.68524590163935, "grad_norm": 5.724215030670166, "learning_rate": 3.664458949932217e-06, "loss": 0.3877, "step": 22169 }, { "epoch": 72.68852459016394, "grad_norm": 6.568105697631836, "learning_rate": 3.6636374054026223e-06, "loss": 0.5811, "step": 22170 }, { "epoch": 72.69180327868852, "grad_norm": 39.30297088623047, "learning_rate": 3.6628159323221034e-06, "loss": 0.3538, "step": 22171 }, { "epoch": 72.69508196721311, "grad_norm": 5.73211669921875, "learning_rate": 3.6619945306999216e-06, "loss": 0.5583, "step": 22172 }, { "epoch": 72.6983606557377, "grad_norm": 4.320630073547363, "learning_rate": 3.6611732005453448e-06, "loss": 0.3249, "step": 22173 }, { "epoch": 72.70163934426229, "grad_norm": 5.035377502441406, "learning_rate": 3.6603519418676304e-06, "loss": 0.4634, "step": 22174 }, { "epoch": 72.70491803278688, "grad_norm": 6.88951301574707, "learning_rate": 3.6595307546760393e-06, "loss": 0.3969, "step": 22175 }, { "epoch": 72.70819672131148, "grad_norm": 7.371717929840088, "learning_rate": 3.658709638979828e-06, "loss": 0.2911, "step": 22176 }, { "epoch": 72.71147540983607, "grad_norm": 5.159697532653809, "learning_rate": 3.6578885947882625e-06, "loss": 0.5858, "step": 22177 }, { "epoch": 72.71475409836066, "grad_norm": 4.547677040100098, "learning_rate": 3.6570676221105973e-06, "loss": 0.5042, "step": 22178 }, { "epoch": 72.71803278688525, "grad_norm": 5.323615550994873, "learning_rate": 3.6562467209560905e-06, "loss": 0.5247, "step": 22179 }, { "epoch": 72.72131147540983, "grad_norm": 4.930446624755859, "learning_rate": 3.655425891333996e-06, "loss": 0.5657, "step": 22180 }, { "epoch": 72.72459016393442, "grad_norm": 5.1553120613098145, "learning_rate": 3.654605133253569e-06, "loss": 0.3724, "step": 22181 }, { "epoch": 72.72786885245901, "grad_norm": 5.0221757888793945, "learning_rate": 3.653784446724069e-06, "loss": 0.3575, "step": 22182 }, { "epoch": 72.73114754098361, "grad_norm": 4.230922698974609, "learning_rate": 3.652963831754749e-06, "loss": 0.3775, "step": 22183 }, { "epoch": 72.7344262295082, "grad_norm": 15.194910049438477, "learning_rate": 3.6521432883548603e-06, "loss": 0.4414, "step": 22184 }, { "epoch": 72.73770491803279, "grad_norm": 5.608386039733887, "learning_rate": 3.6513228165336535e-06, "loss": 0.3567, "step": 22185 }, { "epoch": 72.74098360655738, "grad_norm": 6.439774513244629, "learning_rate": 3.6505024163003853e-06, "loss": 0.5105, "step": 22186 }, { "epoch": 72.74426229508197, "grad_norm": 4.835059642791748, "learning_rate": 3.649682087664306e-06, "loss": 0.3193, "step": 22187 }, { "epoch": 72.74754098360656, "grad_norm": 5.280489921569824, "learning_rate": 3.648861830634661e-06, "loss": 0.4517, "step": 22188 }, { "epoch": 72.75081967213114, "grad_norm": 4.4462409019470215, "learning_rate": 3.6480416452207015e-06, "loss": 0.3206, "step": 22189 }, { "epoch": 72.75409836065573, "grad_norm": 4.493156433105469, "learning_rate": 3.6472215314316796e-06, "loss": 0.2497, "step": 22190 }, { "epoch": 72.75737704918033, "grad_norm": 4.244032859802246, "learning_rate": 3.6464014892768397e-06, "loss": 0.4933, "step": 22191 }, { "epoch": 72.76065573770492, "grad_norm": 6.0059075355529785, "learning_rate": 3.6455815187654285e-06, "loss": 0.4538, "step": 22192 }, { "epoch": 72.76393442622951, "grad_norm": 4.497807025909424, "learning_rate": 3.6447616199066937e-06, "loss": 0.6898, "step": 22193 }, { "epoch": 72.7672131147541, "grad_norm": 5.125003337860107, "learning_rate": 3.6439417927098754e-06, "loss": 0.4037, "step": 22194 }, { "epoch": 72.77049180327869, "grad_norm": 4.737198829650879, "learning_rate": 3.6431220371842255e-06, "loss": 0.3428, "step": 22195 }, { "epoch": 72.77377049180328, "grad_norm": 5.301974296569824, "learning_rate": 3.6423023533389845e-06, "loss": 0.4548, "step": 22196 }, { "epoch": 72.77704918032786, "grad_norm": 12.66329288482666, "learning_rate": 3.641482741183395e-06, "loss": 0.3403, "step": 22197 }, { "epoch": 72.78032786885245, "grad_norm": 5.417557716369629, "learning_rate": 3.640663200726695e-06, "loss": 0.5911, "step": 22198 }, { "epoch": 72.78360655737706, "grad_norm": 4.072735786437988, "learning_rate": 3.639843731978133e-06, "loss": 0.2841, "step": 22199 }, { "epoch": 72.78688524590164, "grad_norm": 5.414793491363525, "learning_rate": 3.6390243349469458e-06, "loss": 0.5896, "step": 22200 }, { "epoch": 72.79016393442623, "grad_norm": 4.056135654449463, "learning_rate": 3.638205009642373e-06, "loss": 0.3474, "step": 22201 }, { "epoch": 72.79344262295082, "grad_norm": 5.133024215698242, "learning_rate": 3.637385756073649e-06, "loss": 0.4362, "step": 22202 }, { "epoch": 72.79672131147541, "grad_norm": 5.854948997497559, "learning_rate": 3.636566574250021e-06, "loss": 0.3424, "step": 22203 }, { "epoch": 72.8, "grad_norm": 4.746649265289307, "learning_rate": 3.63574746418072e-06, "loss": 0.3695, "step": 22204 }, { "epoch": 72.80327868852459, "grad_norm": 5.816445350646973, "learning_rate": 3.6349284258749853e-06, "loss": 0.3966, "step": 22205 }, { "epoch": 72.80655737704917, "grad_norm": 3.8208305835723877, "learning_rate": 3.634109459342049e-06, "loss": 0.249, "step": 22206 }, { "epoch": 72.80983606557378, "grad_norm": 4.636892795562744, "learning_rate": 3.6332905645911444e-06, "loss": 0.3614, "step": 22207 }, { "epoch": 72.81311475409836, "grad_norm": 5.467345714569092, "learning_rate": 3.6324717416315116e-06, "loss": 0.3981, "step": 22208 }, { "epoch": 72.81639344262295, "grad_norm": 5.077226161956787, "learning_rate": 3.6316529904723795e-06, "loss": 0.852, "step": 22209 }, { "epoch": 72.81967213114754, "grad_norm": 5.147583484649658, "learning_rate": 3.6308343111229795e-06, "loss": 0.495, "step": 22210 }, { "epoch": 72.82295081967213, "grad_norm": 4.276115894317627, "learning_rate": 3.6300157035925477e-06, "loss": 0.2523, "step": 22211 }, { "epoch": 72.82622950819672, "grad_norm": 7.613235950469971, "learning_rate": 3.6291971678903124e-06, "loss": 0.4734, "step": 22212 }, { "epoch": 72.8295081967213, "grad_norm": 5.128628730773926, "learning_rate": 3.628378704025499e-06, "loss": 0.3271, "step": 22213 }, { "epoch": 72.8327868852459, "grad_norm": 6.3437418937683105, "learning_rate": 3.6275603120073444e-06, "loss": 0.6398, "step": 22214 }, { "epoch": 72.8360655737705, "grad_norm": 5.521486282348633, "learning_rate": 3.6267419918450732e-06, "loss": 0.3508, "step": 22215 }, { "epoch": 72.83934426229509, "grad_norm": 5.273662090301514, "learning_rate": 3.625923743547909e-06, "loss": 0.5154, "step": 22216 }, { "epoch": 72.84262295081967, "grad_norm": 5.533462047576904, "learning_rate": 3.6251055671250845e-06, "loss": 0.5699, "step": 22217 }, { "epoch": 72.84590163934426, "grad_norm": 5.10042142868042, "learning_rate": 3.624287462585824e-06, "loss": 0.3256, "step": 22218 }, { "epoch": 72.84918032786885, "grad_norm": 4.769567012786865, "learning_rate": 3.623469429939351e-06, "loss": 0.7408, "step": 22219 }, { "epoch": 72.85245901639344, "grad_norm": 6.136929035186768, "learning_rate": 3.6226514691948867e-06, "loss": 0.4106, "step": 22220 }, { "epoch": 72.85573770491803, "grad_norm": 5.537346839904785, "learning_rate": 3.621833580361661e-06, "loss": 0.3289, "step": 22221 }, { "epoch": 72.85901639344263, "grad_norm": 6.538013458251953, "learning_rate": 3.6210157634488943e-06, "loss": 0.4569, "step": 22222 }, { "epoch": 72.86229508196722, "grad_norm": 4.486995697021484, "learning_rate": 3.620198018465807e-06, "loss": 0.3047, "step": 22223 }, { "epoch": 72.8655737704918, "grad_norm": 4.325713634490967, "learning_rate": 3.619380345421616e-06, "loss": 0.3702, "step": 22224 }, { "epoch": 72.8688524590164, "grad_norm": 6.697301387786865, "learning_rate": 3.61856274432555e-06, "loss": 0.2933, "step": 22225 }, { "epoch": 72.87213114754098, "grad_norm": 6.9015116691589355, "learning_rate": 3.617745215186824e-06, "loss": 0.543, "step": 22226 }, { "epoch": 72.87540983606557, "grad_norm": 7.4583659172058105, "learning_rate": 3.616927758014657e-06, "loss": 0.5192, "step": 22227 }, { "epoch": 72.87868852459016, "grad_norm": 5.754310131072998, "learning_rate": 3.616110372818262e-06, "loss": 0.3669, "step": 22228 }, { "epoch": 72.88196721311475, "grad_norm": 4.873456001281738, "learning_rate": 3.615293059606864e-06, "loss": 0.4585, "step": 22229 }, { "epoch": 72.88524590163935, "grad_norm": 5.312750816345215, "learning_rate": 3.6144758183896754e-06, "loss": 0.4563, "step": 22230 }, { "epoch": 72.88852459016394, "grad_norm": 5.043371200561523, "learning_rate": 3.6136586491759106e-06, "loss": 0.5188, "step": 22231 }, { "epoch": 72.89180327868853, "grad_norm": 4.915875434875488, "learning_rate": 3.612841551974785e-06, "loss": 0.2498, "step": 22232 }, { "epoch": 72.89508196721312, "grad_norm": 6.460194110870361, "learning_rate": 3.612024526795509e-06, "loss": 0.5501, "step": 22233 }, { "epoch": 72.8983606557377, "grad_norm": 4.951190948486328, "learning_rate": 3.6112075736473006e-06, "loss": 0.3702, "step": 22234 }, { "epoch": 72.90163934426229, "grad_norm": 4.861517906188965, "learning_rate": 3.6103906925393706e-06, "loss": 0.3372, "step": 22235 }, { "epoch": 72.90491803278688, "grad_norm": 4.821234703063965, "learning_rate": 3.609573883480928e-06, "loss": 0.1384, "step": 22236 }, { "epoch": 72.90819672131147, "grad_norm": 7.298126697540283, "learning_rate": 3.608757146481181e-06, "loss": 0.4445, "step": 22237 }, { "epoch": 72.91147540983607, "grad_norm": 4.990599155426025, "learning_rate": 3.607940481549347e-06, "loss": 0.4679, "step": 22238 }, { "epoch": 72.91475409836066, "grad_norm": 9.233766555786133, "learning_rate": 3.6071238886946293e-06, "loss": 0.3616, "step": 22239 }, { "epoch": 72.91803278688525, "grad_norm": 4.180367946624756, "learning_rate": 3.6063073679262363e-06, "loss": 0.3843, "step": 22240 }, { "epoch": 72.92131147540984, "grad_norm": 4.914295673370361, "learning_rate": 3.6054909192533728e-06, "loss": 0.3501, "step": 22241 }, { "epoch": 72.92459016393443, "grad_norm": 4.983980655670166, "learning_rate": 3.6046745426852502e-06, "loss": 0.4727, "step": 22242 }, { "epoch": 72.92786885245901, "grad_norm": 10.671295166015625, "learning_rate": 3.6038582382310725e-06, "loss": 0.4463, "step": 22243 }, { "epoch": 72.9311475409836, "grad_norm": 4.645769119262695, "learning_rate": 3.6030420059000435e-06, "loss": 0.4673, "step": 22244 }, { "epoch": 72.93442622950819, "grad_norm": 5.137565612792969, "learning_rate": 3.602225845701367e-06, "loss": 0.3828, "step": 22245 }, { "epoch": 72.9377049180328, "grad_norm": 6.531506538391113, "learning_rate": 3.601409757644242e-06, "loss": 0.3665, "step": 22246 }, { "epoch": 72.94098360655738, "grad_norm": 4.233063697814941, "learning_rate": 3.6005937417378787e-06, "loss": 0.2463, "step": 22247 }, { "epoch": 72.94426229508197, "grad_norm": 5.424017429351807, "learning_rate": 3.599777797991475e-06, "loss": 0.607, "step": 22248 }, { "epoch": 72.94754098360656, "grad_norm": 3.880143880844116, "learning_rate": 3.5989619264142316e-06, "loss": 0.4607, "step": 22249 }, { "epoch": 72.95081967213115, "grad_norm": 8.18531322479248, "learning_rate": 3.598146127015344e-06, "loss": 0.3814, "step": 22250 }, { "epoch": 72.95409836065573, "grad_norm": 6.176914215087891, "learning_rate": 3.5973303998040178e-06, "loss": 0.2045, "step": 22251 }, { "epoch": 72.95737704918032, "grad_norm": 4.1704277992248535, "learning_rate": 3.596514744789449e-06, "loss": 0.3068, "step": 22252 }, { "epoch": 72.96065573770491, "grad_norm": 5.787651062011719, "learning_rate": 3.5956991619808345e-06, "loss": 0.5376, "step": 22253 }, { "epoch": 72.96393442622951, "grad_norm": 4.755788803100586, "learning_rate": 3.5948836513873674e-06, "loss": 0.4589, "step": 22254 }, { "epoch": 72.9672131147541, "grad_norm": 3.879000425338745, "learning_rate": 3.594068213018249e-06, "loss": 0.4456, "step": 22255 }, { "epoch": 72.97049180327869, "grad_norm": 5.048112869262695, "learning_rate": 3.5932528468826734e-06, "loss": 0.3561, "step": 22256 }, { "epoch": 72.97377049180328, "grad_norm": 4.748424053192139, "learning_rate": 3.5924375529898338e-06, "loss": 0.577, "step": 22257 }, { "epoch": 72.97704918032787, "grad_norm": 4.47221565246582, "learning_rate": 3.591622331348922e-06, "loss": 0.5072, "step": 22258 }, { "epoch": 72.98032786885246, "grad_norm": 5.232933044433594, "learning_rate": 3.590807181969128e-06, "loss": 0.2305, "step": 22259 }, { "epoch": 72.98360655737704, "grad_norm": 4.657144546508789, "learning_rate": 3.5899921048596496e-06, "loss": 0.5699, "step": 22260 }, { "epoch": 72.98688524590163, "grad_norm": 4.782695770263672, "learning_rate": 3.589177100029676e-06, "loss": 0.4993, "step": 22261 }, { "epoch": 72.99016393442623, "grad_norm": 12.166872024536133, "learning_rate": 3.588362167488396e-06, "loss": 0.3891, "step": 22262 }, { "epoch": 72.99344262295082, "grad_norm": 5.124240398406982, "learning_rate": 3.5875473072449964e-06, "loss": 0.3946, "step": 22263 }, { "epoch": 72.99672131147541, "grad_norm": 4.52684211730957, "learning_rate": 3.58673251930867e-06, "loss": 0.3961, "step": 22264 }, { "epoch": 73.0, "grad_norm": 5.52370548248291, "learning_rate": 3.585917803688603e-06, "loss": 0.4433, "step": 22265 }, { "epoch": 73.00327868852459, "grad_norm": 5.395651817321777, "learning_rate": 3.585103160393979e-06, "loss": 0.6117, "step": 22266 }, { "epoch": 73.00655737704918, "grad_norm": 7.668279647827148, "learning_rate": 3.5842885894339898e-06, "loss": 0.1741, "step": 22267 }, { "epoch": 73.00983606557377, "grad_norm": 6.496634483337402, "learning_rate": 3.583474090817818e-06, "loss": 0.6227, "step": 22268 }, { "epoch": 73.01311475409837, "grad_norm": 4.624778747558594, "learning_rate": 3.582659664554643e-06, "loss": 0.4144, "step": 22269 }, { "epoch": 73.01639344262296, "grad_norm": 6.159994602203369, "learning_rate": 3.5818453106536566e-06, "loss": 0.4248, "step": 22270 }, { "epoch": 73.01967213114754, "grad_norm": 5.109714508056641, "learning_rate": 3.581031029124037e-06, "loss": 0.3823, "step": 22271 }, { "epoch": 73.02295081967213, "grad_norm": 5.275430679321289, "learning_rate": 3.580216819974963e-06, "loss": 0.446, "step": 22272 }, { "epoch": 73.02622950819672, "grad_norm": 5.976968288421631, "learning_rate": 3.5794026832156238e-06, "loss": 0.5493, "step": 22273 }, { "epoch": 73.02950819672131, "grad_norm": 6.2679948806762695, "learning_rate": 3.5785886188551945e-06, "loss": 0.4043, "step": 22274 }, { "epoch": 73.0327868852459, "grad_norm": 5.802515506744385, "learning_rate": 3.5777746269028545e-06, "loss": 0.379, "step": 22275 }, { "epoch": 73.03606557377049, "grad_norm": 5.750691890716553, "learning_rate": 3.5769607073677805e-06, "loss": 0.5674, "step": 22276 }, { "epoch": 73.03934426229509, "grad_norm": 5.237104415893555, "learning_rate": 3.5761468602591566e-06, "loss": 0.2688, "step": 22277 }, { "epoch": 73.04262295081968, "grad_norm": 16.125045776367188, "learning_rate": 3.5753330855861544e-06, "loss": 0.5658, "step": 22278 }, { "epoch": 73.04590163934427, "grad_norm": 49.18834686279297, "learning_rate": 3.5745193833579527e-06, "loss": 0.5321, "step": 22279 }, { "epoch": 73.04918032786885, "grad_norm": 5.988929271697998, "learning_rate": 3.573705753583723e-06, "loss": 0.1757, "step": 22280 }, { "epoch": 73.05245901639344, "grad_norm": 15.20003604888916, "learning_rate": 3.572892196272645e-06, "loss": 0.5753, "step": 22281 }, { "epoch": 73.05573770491803, "grad_norm": 5.488536357879639, "learning_rate": 3.5720787114338897e-06, "loss": 0.2743, "step": 22282 }, { "epoch": 73.05901639344262, "grad_norm": 8.632949829101562, "learning_rate": 3.5712652990766307e-06, "loss": 0.2811, "step": 22283 }, { "epoch": 73.0622950819672, "grad_norm": 6.210809230804443, "learning_rate": 3.5704519592100407e-06, "loss": 0.4849, "step": 22284 }, { "epoch": 73.06557377049181, "grad_norm": 5.084220886230469, "learning_rate": 3.5696386918432848e-06, "loss": 0.5123, "step": 22285 }, { "epoch": 73.0688524590164, "grad_norm": 5.195136070251465, "learning_rate": 3.568825496985543e-06, "loss": 0.332, "step": 22286 }, { "epoch": 73.07213114754099, "grad_norm": 4.521875858306885, "learning_rate": 3.5680123746459805e-06, "loss": 0.2675, "step": 22287 }, { "epoch": 73.07540983606557, "grad_norm": 5.543386936187744, "learning_rate": 3.5671993248337654e-06, "loss": 0.4693, "step": 22288 }, { "epoch": 73.07868852459016, "grad_norm": 4.43933629989624, "learning_rate": 3.566386347558063e-06, "loss": 0.4073, "step": 22289 }, { "epoch": 73.08196721311475, "grad_norm": 5.427707195281982, "learning_rate": 3.5655734428280474e-06, "loss": 0.6521, "step": 22290 }, { "epoch": 73.08524590163934, "grad_norm": 5.99733829498291, "learning_rate": 3.564760610652882e-06, "loss": 0.3673, "step": 22291 }, { "epoch": 73.08852459016393, "grad_norm": 5.228793621063232, "learning_rate": 3.5639478510417315e-06, "loss": 0.6726, "step": 22292 }, { "epoch": 73.09180327868853, "grad_norm": 4.826168537139893, "learning_rate": 3.563135164003757e-06, "loss": 0.4646, "step": 22293 }, { "epoch": 73.09508196721312, "grad_norm": 6.115041255950928, "learning_rate": 3.5623225495481296e-06, "loss": 0.2985, "step": 22294 }, { "epoch": 73.09836065573771, "grad_norm": 6.20712423324585, "learning_rate": 3.5615100076840093e-06, "loss": 0.6252, "step": 22295 }, { "epoch": 73.1016393442623, "grad_norm": 4.657607555389404, "learning_rate": 3.5606975384205568e-06, "loss": 0.4804, "step": 22296 }, { "epoch": 73.10491803278688, "grad_norm": 4.6041154861450195, "learning_rate": 3.5598851417669356e-06, "loss": 0.306, "step": 22297 }, { "epoch": 73.10819672131147, "grad_norm": 5.9046549797058105, "learning_rate": 3.559072817732303e-06, "loss": 0.3759, "step": 22298 }, { "epoch": 73.11147540983606, "grad_norm": 10.139131546020508, "learning_rate": 3.558260566325823e-06, "loss": 0.7026, "step": 22299 }, { "epoch": 73.11475409836065, "grad_norm": 5.7802042961120605, "learning_rate": 3.5574483875566547e-06, "loss": 0.4745, "step": 22300 }, { "epoch": 73.11803278688525, "grad_norm": 5.529344081878662, "learning_rate": 3.556636281433953e-06, "loss": 0.4683, "step": 22301 }, { "epoch": 73.12131147540984, "grad_norm": 6.193140506744385, "learning_rate": 3.5558242479668736e-06, "loss": 0.4148, "step": 22302 }, { "epoch": 73.12459016393443, "grad_norm": 5.830481052398682, "learning_rate": 3.5550122871645786e-06, "loss": 0.2939, "step": 22303 }, { "epoch": 73.12786885245902, "grad_norm": 6.212697982788086, "learning_rate": 3.554200399036223e-06, "loss": 0.5359, "step": 22304 }, { "epoch": 73.1311475409836, "grad_norm": 6.436125755310059, "learning_rate": 3.5533885835909587e-06, "loss": 0.2744, "step": 22305 }, { "epoch": 73.1344262295082, "grad_norm": 5.220319747924805, "learning_rate": 3.552576840837938e-06, "loss": 0.299, "step": 22306 }, { "epoch": 73.13770491803278, "grad_norm": 4.920162200927734, "learning_rate": 3.55176517078632e-06, "loss": 0.4981, "step": 22307 }, { "epoch": 73.14098360655737, "grad_norm": 5.602513313293457, "learning_rate": 3.550953573445254e-06, "loss": 0.5721, "step": 22308 }, { "epoch": 73.14426229508197, "grad_norm": 6.154265403747559, "learning_rate": 3.5501420488238926e-06, "loss": 0.5378, "step": 22309 }, { "epoch": 73.14754098360656, "grad_norm": 4.084702014923096, "learning_rate": 3.5493305969313852e-06, "loss": 0.446, "step": 22310 }, { "epoch": 73.15081967213115, "grad_norm": 6.236200332641602, "learning_rate": 3.548519217776879e-06, "loss": 0.4765, "step": 22311 }, { "epoch": 73.15409836065574, "grad_norm": 6.134805202484131, "learning_rate": 3.54770791136953e-06, "loss": 0.3753, "step": 22312 }, { "epoch": 73.15737704918033, "grad_norm": 5.493119716644287, "learning_rate": 3.546896677718482e-06, "loss": 0.3081, "step": 22313 }, { "epoch": 73.16065573770491, "grad_norm": 5.578518390655518, "learning_rate": 3.5460855168328843e-06, "loss": 0.4262, "step": 22314 }, { "epoch": 73.1639344262295, "grad_norm": 4.389149188995361, "learning_rate": 3.5452744287218798e-06, "loss": 0.1778, "step": 22315 }, { "epoch": 73.1672131147541, "grad_norm": 6.250360488891602, "learning_rate": 3.544463413394621e-06, "loss": 0.3078, "step": 22316 }, { "epoch": 73.1704918032787, "grad_norm": 6.765114784240723, "learning_rate": 3.543652470860248e-06, "loss": 0.2593, "step": 22317 }, { "epoch": 73.17377049180328, "grad_norm": 4.416299819946289, "learning_rate": 3.5428416011279043e-06, "loss": 0.3591, "step": 22318 }, { "epoch": 73.17704918032787, "grad_norm": 6.990957736968994, "learning_rate": 3.5420308042067375e-06, "loss": 0.3648, "step": 22319 }, { "epoch": 73.18032786885246, "grad_norm": 13.42894458770752, "learning_rate": 3.5412200801058894e-06, "loss": 0.4236, "step": 22320 }, { "epoch": 73.18360655737705, "grad_norm": 4.183753967285156, "learning_rate": 3.540409428834496e-06, "loss": 0.4008, "step": 22321 }, { "epoch": 73.18688524590164, "grad_norm": 5.338871955871582, "learning_rate": 3.539598850401705e-06, "loss": 0.3481, "step": 22322 }, { "epoch": 73.19016393442622, "grad_norm": 6.09295129776001, "learning_rate": 3.538788344816656e-06, "loss": 0.5068, "step": 22323 }, { "epoch": 73.19344262295083, "grad_norm": 5.004644870758057, "learning_rate": 3.537977912088486e-06, "loss": 0.3677, "step": 22324 }, { "epoch": 73.19672131147541, "grad_norm": 4.113742828369141, "learning_rate": 3.5371675522263296e-06, "loss": 0.3394, "step": 22325 }, { "epoch": 73.2, "grad_norm": 5.758944988250732, "learning_rate": 3.536357265239333e-06, "loss": 0.6983, "step": 22326 }, { "epoch": 73.20327868852459, "grad_norm": 26.78931999206543, "learning_rate": 3.5355470511366272e-06, "loss": 0.4799, "step": 22327 }, { "epoch": 73.20655737704918, "grad_norm": 8.545838356018066, "learning_rate": 3.5347369099273475e-06, "loss": 0.3044, "step": 22328 }, { "epoch": 73.20983606557377, "grad_norm": 5.644626617431641, "learning_rate": 3.533926841620635e-06, "loss": 0.3064, "step": 22329 }, { "epoch": 73.21311475409836, "grad_norm": 9.472677230834961, "learning_rate": 3.5331168462256204e-06, "loss": 0.476, "step": 22330 }, { "epoch": 73.21639344262294, "grad_norm": 8.988320350646973, "learning_rate": 3.5323069237514362e-06, "loss": 0.297, "step": 22331 }, { "epoch": 73.21967213114755, "grad_norm": 7.140109539031982, "learning_rate": 3.531497074207214e-06, "loss": 0.3122, "step": 22332 }, { "epoch": 73.22295081967214, "grad_norm": 10.288620948791504, "learning_rate": 3.53068729760209e-06, "loss": 0.4322, "step": 22333 }, { "epoch": 73.22622950819672, "grad_norm": 5.147743225097656, "learning_rate": 3.5298775939451945e-06, "loss": 0.3629, "step": 22334 }, { "epoch": 73.22950819672131, "grad_norm": 4.539096832275391, "learning_rate": 3.529067963245656e-06, "loss": 0.214, "step": 22335 }, { "epoch": 73.2327868852459, "grad_norm": 5.0239973068237305, "learning_rate": 3.5282584055126035e-06, "loss": 0.2274, "step": 22336 }, { "epoch": 73.23606557377049, "grad_norm": 7.212414741516113, "learning_rate": 3.5274489207551632e-06, "loss": 0.5154, "step": 22337 }, { "epoch": 73.23934426229508, "grad_norm": 6.990869522094727, "learning_rate": 3.52663950898247e-06, "loss": 0.4619, "step": 22338 }, { "epoch": 73.24262295081967, "grad_norm": 4.528573036193848, "learning_rate": 3.5258301702036468e-06, "loss": 0.2002, "step": 22339 }, { "epoch": 73.24590163934427, "grad_norm": 5.574525356292725, "learning_rate": 3.5250209044278196e-06, "loss": 0.3136, "step": 22340 }, { "epoch": 73.24918032786886, "grad_norm": 4.695608615875244, "learning_rate": 3.5242117116641106e-06, "loss": 0.3215, "step": 22341 }, { "epoch": 73.25245901639344, "grad_norm": 6.2972092628479, "learning_rate": 3.523402591921651e-06, "loss": 0.4034, "step": 22342 }, { "epoch": 73.25573770491803, "grad_norm": 4.158177852630615, "learning_rate": 3.5225935452095616e-06, "loss": 0.3391, "step": 22343 }, { "epoch": 73.25901639344262, "grad_norm": 6.312991142272949, "learning_rate": 3.5217845715369646e-06, "loss": 0.4747, "step": 22344 }, { "epoch": 73.26229508196721, "grad_norm": 5.661832332611084, "learning_rate": 3.520975670912978e-06, "loss": 0.4253, "step": 22345 }, { "epoch": 73.2655737704918, "grad_norm": 4.336394309997559, "learning_rate": 3.5201668433467315e-06, "loss": 0.2364, "step": 22346 }, { "epoch": 73.26885245901639, "grad_norm": 5.546006679534912, "learning_rate": 3.519358088847341e-06, "loss": 0.4665, "step": 22347 }, { "epoch": 73.27213114754099, "grad_norm": 4.4003987312316895, "learning_rate": 3.518549407423927e-06, "loss": 0.2331, "step": 22348 }, { "epoch": 73.27540983606558, "grad_norm": 5.323419570922852, "learning_rate": 3.517740799085607e-06, "loss": 0.4279, "step": 22349 }, { "epoch": 73.27868852459017, "grad_norm": 4.188053607940674, "learning_rate": 3.516932263841495e-06, "loss": 0.454, "step": 22350 }, { "epoch": 73.28196721311475, "grad_norm": 5.439333438873291, "learning_rate": 3.5161238017007173e-06, "loss": 0.3689, "step": 22351 }, { "epoch": 73.28524590163934, "grad_norm": 4.585213661193848, "learning_rate": 3.515315412672384e-06, "loss": 0.4413, "step": 22352 }, { "epoch": 73.28852459016393, "grad_norm": 6.757694721221924, "learning_rate": 3.5145070967656126e-06, "loss": 0.5044, "step": 22353 }, { "epoch": 73.29180327868852, "grad_norm": 3.7873947620391846, "learning_rate": 3.5136988539895135e-06, "loss": 0.5432, "step": 22354 }, { "epoch": 73.29508196721312, "grad_norm": 9.403360366821289, "learning_rate": 3.5128906843532063e-06, "loss": 0.4332, "step": 22355 }, { "epoch": 73.29836065573771, "grad_norm": 7.94218635559082, "learning_rate": 3.5120825878658026e-06, "loss": 0.4721, "step": 22356 }, { "epoch": 73.3016393442623, "grad_norm": 5.870996475219727, "learning_rate": 3.511274564536413e-06, "loss": 0.3281, "step": 22357 }, { "epoch": 73.30491803278689, "grad_norm": 6.132272720336914, "learning_rate": 3.5104666143741462e-06, "loss": 0.3531, "step": 22358 }, { "epoch": 73.30819672131148, "grad_norm": 5.015341281890869, "learning_rate": 3.5096587373881187e-06, "loss": 0.4318, "step": 22359 }, { "epoch": 73.31147540983606, "grad_norm": 4.180283546447754, "learning_rate": 3.508850933587438e-06, "loss": 0.2143, "step": 22360 }, { "epoch": 73.31475409836065, "grad_norm": 5.114556789398193, "learning_rate": 3.5080432029812105e-06, "loss": 0.4859, "step": 22361 }, { "epoch": 73.31803278688524, "grad_norm": 5.248011589050293, "learning_rate": 3.5072355455785465e-06, "loss": 0.4879, "step": 22362 }, { "epoch": 73.32131147540984, "grad_norm": 12.194134712219238, "learning_rate": 3.50642796138855e-06, "loss": 0.4837, "step": 22363 }, { "epoch": 73.32459016393443, "grad_norm": 5.458794116973877, "learning_rate": 3.5056204504203327e-06, "loss": 0.3598, "step": 22364 }, { "epoch": 73.32786885245902, "grad_norm": 5.231746673583984, "learning_rate": 3.504813012682997e-06, "loss": 0.4278, "step": 22365 }, { "epoch": 73.33114754098361, "grad_norm": 5.3704118728637695, "learning_rate": 3.504005648185649e-06, "loss": 0.4093, "step": 22366 }, { "epoch": 73.3344262295082, "grad_norm": 4.267362117767334, "learning_rate": 3.5031983569373874e-06, "loss": 0.3014, "step": 22367 }, { "epoch": 73.33770491803278, "grad_norm": 4.391578674316406, "learning_rate": 3.502391138947322e-06, "loss": 0.4904, "step": 22368 }, { "epoch": 73.34098360655737, "grad_norm": 4.180299758911133, "learning_rate": 3.5015839942245533e-06, "loss": 0.3401, "step": 22369 }, { "epoch": 73.34426229508196, "grad_norm": 5.545614719390869, "learning_rate": 3.5007769227781805e-06, "loss": 0.3623, "step": 22370 }, { "epoch": 73.34754098360656, "grad_norm": 4.947173595428467, "learning_rate": 3.4999699246173038e-06, "loss": 0.1966, "step": 22371 }, { "epoch": 73.35081967213115, "grad_norm": 6.290356636047363, "learning_rate": 3.4991629997510256e-06, "loss": 0.4225, "step": 22372 }, { "epoch": 73.35409836065574, "grad_norm": 6.826450824737549, "learning_rate": 3.4983561481884453e-06, "loss": 0.4556, "step": 22373 }, { "epoch": 73.35737704918033, "grad_norm": 5.262472152709961, "learning_rate": 3.4975493699386563e-06, "loss": 0.447, "step": 22374 }, { "epoch": 73.36065573770492, "grad_norm": 5.289665222167969, "learning_rate": 3.4967426650107615e-06, "loss": 0.2179, "step": 22375 }, { "epoch": 73.3639344262295, "grad_norm": 11.033914566040039, "learning_rate": 3.495936033413856e-06, "loss": 0.3895, "step": 22376 }, { "epoch": 73.3672131147541, "grad_norm": 4.845686435699463, "learning_rate": 3.49512947515703e-06, "loss": 0.4678, "step": 22377 }, { "epoch": 73.37049180327868, "grad_norm": 4.510763645172119, "learning_rate": 3.494322990249386e-06, "loss": 0.3376, "step": 22378 }, { "epoch": 73.37377049180328, "grad_norm": 5.790955066680908, "learning_rate": 3.4935165787000146e-06, "loss": 0.5654, "step": 22379 }, { "epoch": 73.37704918032787, "grad_norm": 4.574428081512451, "learning_rate": 3.4927102405180046e-06, "loss": 0.4772, "step": 22380 }, { "epoch": 73.38032786885246, "grad_norm": 6.758782863616943, "learning_rate": 3.4919039757124573e-06, "loss": 0.51, "step": 22381 }, { "epoch": 73.38360655737705, "grad_norm": 5.534375190734863, "learning_rate": 3.491097784292459e-06, "loss": 0.4209, "step": 22382 }, { "epoch": 73.38688524590164, "grad_norm": 5.539991855621338, "learning_rate": 3.4902916662671003e-06, "loss": 0.3221, "step": 22383 }, { "epoch": 73.39016393442623, "grad_norm": 7.210609436035156, "learning_rate": 3.489485621645469e-06, "loss": 0.3848, "step": 22384 }, { "epoch": 73.39344262295081, "grad_norm": 6.4369893074035645, "learning_rate": 3.4886796504366584e-06, "loss": 0.3889, "step": 22385 }, { "epoch": 73.3967213114754, "grad_norm": 4.839920520782471, "learning_rate": 3.487873752649756e-06, "loss": 0.5421, "step": 22386 }, { "epoch": 73.4, "grad_norm": 5.445734977722168, "learning_rate": 3.487067928293848e-06, "loss": 0.3855, "step": 22387 }, { "epoch": 73.4032786885246, "grad_norm": 6.474841117858887, "learning_rate": 3.48626217737802e-06, "loss": 0.3149, "step": 22388 }, { "epoch": 73.40655737704918, "grad_norm": 8.030665397644043, "learning_rate": 3.485456499911356e-06, "loss": 0.2697, "step": 22389 }, { "epoch": 73.40983606557377, "grad_norm": 5.243459701538086, "learning_rate": 3.4846508959029457e-06, "loss": 0.4265, "step": 22390 }, { "epoch": 73.41311475409836, "grad_norm": 4.956374645233154, "learning_rate": 3.483845365361872e-06, "loss": 0.6109, "step": 22391 }, { "epoch": 73.41639344262295, "grad_norm": 7.173008441925049, "learning_rate": 3.4830399082972165e-06, "loss": 0.5047, "step": 22392 }, { "epoch": 73.41967213114754, "grad_norm": 4.867795944213867, "learning_rate": 3.482234524718059e-06, "loss": 0.5239, "step": 22393 }, { "epoch": 73.42295081967212, "grad_norm": 3.815126419067383, "learning_rate": 3.481429214633486e-06, "loss": 0.4962, "step": 22394 }, { "epoch": 73.42622950819673, "grad_norm": 5.273698806762695, "learning_rate": 3.4806239780525776e-06, "loss": 0.4071, "step": 22395 }, { "epoch": 73.42950819672132, "grad_norm": 4.752806186676025, "learning_rate": 3.4798188149844115e-06, "loss": 0.3805, "step": 22396 }, { "epoch": 73.4327868852459, "grad_norm": 8.751225471496582, "learning_rate": 3.4790137254380653e-06, "loss": 0.446, "step": 22397 }, { "epoch": 73.43606557377049, "grad_norm": 5.034787654876709, "learning_rate": 3.4782087094226213e-06, "loss": 0.5526, "step": 22398 }, { "epoch": 73.43934426229508, "grad_norm": 4.899263858795166, "learning_rate": 3.477403766947156e-06, "loss": 0.4173, "step": 22399 }, { "epoch": 73.44262295081967, "grad_norm": 5.166329860687256, "learning_rate": 3.476598898020744e-06, "loss": 0.2841, "step": 22400 }, { "epoch": 73.44590163934426, "grad_norm": 4.734201908111572, "learning_rate": 3.4757941026524634e-06, "loss": 0.5717, "step": 22401 }, { "epoch": 73.44918032786886, "grad_norm": 4.6686296463012695, "learning_rate": 3.4749893808513848e-06, "loss": 0.6538, "step": 22402 }, { "epoch": 73.45245901639345, "grad_norm": 9.234883308410645, "learning_rate": 3.4741847326265878e-06, "loss": 0.2576, "step": 22403 }, { "epoch": 73.45573770491804, "grad_norm": 5.513560771942139, "learning_rate": 3.4733801579871428e-06, "loss": 0.4802, "step": 22404 }, { "epoch": 73.45901639344262, "grad_norm": 5.157856464385986, "learning_rate": 3.472575656942122e-06, "loss": 0.3764, "step": 22405 }, { "epoch": 73.46229508196721, "grad_norm": 4.48374605178833, "learning_rate": 3.4717712295005957e-06, "loss": 0.4392, "step": 22406 }, { "epoch": 73.4655737704918, "grad_norm": 4.649751663208008, "learning_rate": 3.470966875671639e-06, "loss": 0.4006, "step": 22407 }, { "epoch": 73.46885245901639, "grad_norm": 4.968215465545654, "learning_rate": 3.4701625954643182e-06, "loss": 0.5728, "step": 22408 }, { "epoch": 73.47213114754098, "grad_norm": 4.0905585289001465, "learning_rate": 3.4693583888877045e-06, "loss": 0.2889, "step": 22409 }, { "epoch": 73.47540983606558, "grad_norm": 4.588062763214111, "learning_rate": 3.468554255950862e-06, "loss": 0.4628, "step": 22410 }, { "epoch": 73.47868852459017, "grad_norm": 8.343597412109375, "learning_rate": 3.4677501966628645e-06, "loss": 0.2137, "step": 22411 }, { "epoch": 73.48196721311476, "grad_norm": 4.292954921722412, "learning_rate": 3.4669462110327758e-06, "loss": 0.4784, "step": 22412 }, { "epoch": 73.48524590163935, "grad_norm": 4.809937000274658, "learning_rate": 3.4661422990696604e-06, "loss": 0.4849, "step": 22413 }, { "epoch": 73.48852459016393, "grad_norm": 5.222132205963135, "learning_rate": 3.4653384607825833e-06, "loss": 0.448, "step": 22414 }, { "epoch": 73.49180327868852, "grad_norm": 5.498626708984375, "learning_rate": 3.4645346961806083e-06, "loss": 0.2431, "step": 22415 }, { "epoch": 73.49508196721311, "grad_norm": 4.5971503257751465, "learning_rate": 3.4637310052728015e-06, "loss": 0.5103, "step": 22416 }, { "epoch": 73.4983606557377, "grad_norm": 7.933569431304932, "learning_rate": 3.4629273880682244e-06, "loss": 0.3776, "step": 22417 }, { "epoch": 73.5016393442623, "grad_norm": 5.455934047698975, "learning_rate": 3.4621238445759375e-06, "loss": 0.3804, "step": 22418 }, { "epoch": 73.50491803278689, "grad_norm": 5.682078838348389, "learning_rate": 3.4613203748049983e-06, "loss": 0.2823, "step": 22419 }, { "epoch": 73.50819672131148, "grad_norm": 10.031275749206543, "learning_rate": 3.4605169787644745e-06, "loss": 0.3193, "step": 22420 }, { "epoch": 73.51147540983607, "grad_norm": 5.373631477355957, "learning_rate": 3.4597136564634203e-06, "loss": 0.3667, "step": 22421 }, { "epoch": 73.51475409836065, "grad_norm": 4.392612934112549, "learning_rate": 3.4589104079108948e-06, "loss": 0.314, "step": 22422 }, { "epoch": 73.51803278688524, "grad_norm": 4.689985752105713, "learning_rate": 3.4581072331159527e-06, "loss": 0.2987, "step": 22423 }, { "epoch": 73.52131147540983, "grad_norm": 5.157425880432129, "learning_rate": 3.4573041320876566e-06, "loss": 0.3135, "step": 22424 }, { "epoch": 73.52459016393442, "grad_norm": 5.469537258148193, "learning_rate": 3.4565011048350593e-06, "loss": 0.5009, "step": 22425 }, { "epoch": 73.52786885245902, "grad_norm": 4.957327842712402, "learning_rate": 3.455698151367215e-06, "loss": 0.3761, "step": 22426 }, { "epoch": 73.53114754098361, "grad_norm": 5.658586025238037, "learning_rate": 3.4548952716931762e-06, "loss": 0.4575, "step": 22427 }, { "epoch": 73.5344262295082, "grad_norm": 4.283336639404297, "learning_rate": 3.4540924658220008e-06, "loss": 0.2215, "step": 22428 }, { "epoch": 73.53770491803279, "grad_norm": 6.547520160675049, "learning_rate": 3.453289733762739e-06, "loss": 0.3821, "step": 22429 }, { "epoch": 73.54098360655738, "grad_norm": 5.596902370452881, "learning_rate": 3.452487075524439e-06, "loss": 0.4435, "step": 22430 }, { "epoch": 73.54426229508196, "grad_norm": 5.84894323348999, "learning_rate": 3.4516844911161584e-06, "loss": 0.4956, "step": 22431 }, { "epoch": 73.54754098360655, "grad_norm": 15.358674049377441, "learning_rate": 3.450881980546944e-06, "loss": 0.3087, "step": 22432 }, { "epoch": 73.55081967213114, "grad_norm": 6.624294757843018, "learning_rate": 3.4500795438258404e-06, "loss": 0.2831, "step": 22433 }, { "epoch": 73.55409836065574, "grad_norm": 9.44308090209961, "learning_rate": 3.449277180961904e-06, "loss": 0.4243, "step": 22434 }, { "epoch": 73.55737704918033, "grad_norm": 5.628016948699951, "learning_rate": 3.4484748919641786e-06, "loss": 0.4977, "step": 22435 }, { "epoch": 73.56065573770492, "grad_norm": 4.560170650482178, "learning_rate": 3.447672676841707e-06, "loss": 0.5628, "step": 22436 }, { "epoch": 73.56393442622951, "grad_norm": 6.30534553527832, "learning_rate": 3.4468705356035427e-06, "loss": 0.1615, "step": 22437 }, { "epoch": 73.5672131147541, "grad_norm": 5.460744857788086, "learning_rate": 3.4460684682587253e-06, "loss": 0.5023, "step": 22438 }, { "epoch": 73.57049180327868, "grad_norm": 5.597886085510254, "learning_rate": 3.4452664748163013e-06, "loss": 0.3783, "step": 22439 }, { "epoch": 73.57377049180327, "grad_norm": 3.5914058685302734, "learning_rate": 3.444464555285313e-06, "loss": 0.2238, "step": 22440 }, { "epoch": 73.57704918032788, "grad_norm": 6.406837463378906, "learning_rate": 3.4436627096747997e-06, "loss": 0.5765, "step": 22441 }, { "epoch": 73.58032786885246, "grad_norm": 7.018566131591797, "learning_rate": 3.4428609379938095e-06, "loss": 0.3078, "step": 22442 }, { "epoch": 73.58360655737705, "grad_norm": 6.950665473937988, "learning_rate": 3.44205924025138e-06, "loss": 0.3055, "step": 22443 }, { "epoch": 73.58688524590164, "grad_norm": 5.359357833862305, "learning_rate": 3.4412576164565503e-06, "loss": 0.4794, "step": 22444 }, { "epoch": 73.59016393442623, "grad_norm": 5.686385631561279, "learning_rate": 3.4404560666183574e-06, "loss": 0.3145, "step": 22445 }, { "epoch": 73.59344262295082, "grad_norm": 5.424103260040283, "learning_rate": 3.439654590745847e-06, "loss": 0.5652, "step": 22446 }, { "epoch": 73.5967213114754, "grad_norm": 10.914361953735352, "learning_rate": 3.4388531888480505e-06, "loss": 0.4472, "step": 22447 }, { "epoch": 73.6, "grad_norm": 4.419134140014648, "learning_rate": 3.4380518609340076e-06, "loss": 0.3285, "step": 22448 }, { "epoch": 73.6032786885246, "grad_norm": 3.999289035797119, "learning_rate": 3.4372506070127476e-06, "loss": 0.3405, "step": 22449 }, { "epoch": 73.60655737704919, "grad_norm": 4.478139877319336, "learning_rate": 3.4364494270933156e-06, "loss": 0.3981, "step": 22450 }, { "epoch": 73.60983606557377, "grad_norm": 8.400667190551758, "learning_rate": 3.43564832118474e-06, "loss": 0.368, "step": 22451 }, { "epoch": 73.61311475409836, "grad_norm": 3.5598409175872803, "learning_rate": 3.434847289296055e-06, "loss": 0.4273, "step": 22452 }, { "epoch": 73.61639344262295, "grad_norm": 7.080078125, "learning_rate": 3.434046331436293e-06, "loss": 0.4062, "step": 22453 }, { "epoch": 73.61967213114754, "grad_norm": 5.087602615356445, "learning_rate": 3.4332454476144815e-06, "loss": 0.1881, "step": 22454 }, { "epoch": 73.62295081967213, "grad_norm": 5.2658915519714355, "learning_rate": 3.4324446378396582e-06, "loss": 0.4173, "step": 22455 }, { "epoch": 73.62622950819672, "grad_norm": 8.126334190368652, "learning_rate": 3.4316439021208514e-06, "loss": 0.1855, "step": 22456 }, { "epoch": 73.62950819672132, "grad_norm": 4.89491605758667, "learning_rate": 3.4308432404670887e-06, "loss": 0.3992, "step": 22457 }, { "epoch": 73.6327868852459, "grad_norm": 6.314676284790039, "learning_rate": 3.4300426528873952e-06, "loss": 0.345, "step": 22458 }, { "epoch": 73.6360655737705, "grad_norm": 4.768571376800537, "learning_rate": 3.4292421393908047e-06, "loss": 0.331, "step": 22459 }, { "epoch": 73.63934426229508, "grad_norm": 5.025161266326904, "learning_rate": 3.4284416999863413e-06, "loss": 0.4349, "step": 22460 }, { "epoch": 73.64262295081967, "grad_norm": 5.614340305328369, "learning_rate": 3.4276413346830307e-06, "loss": 0.2989, "step": 22461 }, { "epoch": 73.64590163934426, "grad_norm": 16.939815521240234, "learning_rate": 3.4268410434898937e-06, "loss": 0.4143, "step": 22462 }, { "epoch": 73.64918032786885, "grad_norm": 5.338545322418213, "learning_rate": 3.4260408264159618e-06, "loss": 0.3881, "step": 22463 }, { "epoch": 73.65245901639344, "grad_norm": 5.48234224319458, "learning_rate": 3.4252406834702555e-06, "loss": 0.3242, "step": 22464 }, { "epoch": 73.65573770491804, "grad_norm": 5.764184474945068, "learning_rate": 3.424440614661796e-06, "loss": 0.4041, "step": 22465 }, { "epoch": 73.65901639344263, "grad_norm": 4.843843936920166, "learning_rate": 3.4236406199996054e-06, "loss": 0.5147, "step": 22466 }, { "epoch": 73.66229508196722, "grad_norm": 27.681142807006836, "learning_rate": 3.422840699492701e-06, "loss": 0.4775, "step": 22467 }, { "epoch": 73.6655737704918, "grad_norm": 4.669328212738037, "learning_rate": 3.42204085315011e-06, "loss": 0.4297, "step": 22468 }, { "epoch": 73.66885245901639, "grad_norm": 9.350650787353516, "learning_rate": 3.421241080980847e-06, "loss": 0.4463, "step": 22469 }, { "epoch": 73.67213114754098, "grad_norm": 5.322708606719971, "learning_rate": 3.420441382993932e-06, "loss": 0.3289, "step": 22470 }, { "epoch": 73.67540983606557, "grad_norm": 5.037952423095703, "learning_rate": 3.419641759198378e-06, "loss": 0.5483, "step": 22471 }, { "epoch": 73.67868852459016, "grad_norm": 5.405625820159912, "learning_rate": 3.418842209603208e-06, "loss": 0.5443, "step": 22472 }, { "epoch": 73.68196721311476, "grad_norm": 5.921741485595703, "learning_rate": 3.418042734217435e-06, "loss": 0.4012, "step": 22473 }, { "epoch": 73.68524590163935, "grad_norm": 5.030135154724121, "learning_rate": 3.4172433330500732e-06, "loss": 0.3684, "step": 22474 }, { "epoch": 73.68852459016394, "grad_norm": 6.134808540344238, "learning_rate": 3.4164440061101335e-06, "loss": 0.332, "step": 22475 }, { "epoch": 73.69180327868852, "grad_norm": 7.172484874725342, "learning_rate": 3.4156447534066372e-06, "loss": 0.5185, "step": 22476 }, { "epoch": 73.69508196721311, "grad_norm": 7.403911590576172, "learning_rate": 3.414845574948592e-06, "loss": 0.1921, "step": 22477 }, { "epoch": 73.6983606557377, "grad_norm": 10.821000099182129, "learning_rate": 3.4140464707450096e-06, "loss": 0.5956, "step": 22478 }, { "epoch": 73.70163934426229, "grad_norm": 4.787607192993164, "learning_rate": 3.4132474408048976e-06, "loss": 0.4602, "step": 22479 }, { "epoch": 73.70491803278688, "grad_norm": 4.704704761505127, "learning_rate": 3.412448485137273e-06, "loss": 0.196, "step": 22480 }, { "epoch": 73.70819672131148, "grad_norm": 6.6157941818237305, "learning_rate": 3.4116496037511405e-06, "loss": 0.6853, "step": 22481 }, { "epoch": 73.71147540983607, "grad_norm": 4.92172384262085, "learning_rate": 3.410850796655505e-06, "loss": 0.3896, "step": 22482 }, { "epoch": 73.71475409836066, "grad_norm": 5.584526062011719, "learning_rate": 3.4100520638593827e-06, "loss": 0.2241, "step": 22483 }, { "epoch": 73.71803278688525, "grad_norm": 5.456070423126221, "learning_rate": 3.409253405371774e-06, "loss": 0.3111, "step": 22484 }, { "epoch": 73.72131147540983, "grad_norm": 5.816794395446777, "learning_rate": 3.4084548212016823e-06, "loss": 0.386, "step": 22485 }, { "epoch": 73.72459016393442, "grad_norm": 5.97661828994751, "learning_rate": 3.4076563113581196e-06, "loss": 0.5119, "step": 22486 }, { "epoch": 73.72786885245901, "grad_norm": 4.936496257781982, "learning_rate": 3.406857875850087e-06, "loss": 0.359, "step": 22487 }, { "epoch": 73.73114754098361, "grad_norm": 5.024725914001465, "learning_rate": 3.406059514686586e-06, "loss": 0.5621, "step": 22488 }, { "epoch": 73.7344262295082, "grad_norm": 5.607078552246094, "learning_rate": 3.405261227876617e-06, "loss": 0.4812, "step": 22489 }, { "epoch": 73.73770491803279, "grad_norm": 6.425148010253906, "learning_rate": 3.404463015429188e-06, "loss": 0.3627, "step": 22490 }, { "epoch": 73.74098360655738, "grad_norm": 5.744532585144043, "learning_rate": 3.4036648773532967e-06, "loss": 0.5156, "step": 22491 }, { "epoch": 73.74426229508197, "grad_norm": 5.1511149406433105, "learning_rate": 3.402866813657941e-06, "loss": 0.3068, "step": 22492 }, { "epoch": 73.74754098360656, "grad_norm": 5.4006028175354, "learning_rate": 3.402068824352119e-06, "loss": 0.445, "step": 22493 }, { "epoch": 73.75081967213114, "grad_norm": 13.43580150604248, "learning_rate": 3.401270909444835e-06, "loss": 0.2858, "step": 22494 }, { "epoch": 73.75409836065573, "grad_norm": 4.832684516906738, "learning_rate": 3.4004730689450817e-06, "loss": 0.2319, "step": 22495 }, { "epoch": 73.75737704918033, "grad_norm": 5.401019096374512, "learning_rate": 3.399675302861856e-06, "loss": 0.518, "step": 22496 }, { "epoch": 73.76065573770492, "grad_norm": 4.883216857910156, "learning_rate": 3.3988776112041497e-06, "loss": 0.4541, "step": 22497 }, { "epoch": 73.76393442622951, "grad_norm": 11.014505386352539, "learning_rate": 3.3980799939809674e-06, "loss": 0.2325, "step": 22498 }, { "epoch": 73.7672131147541, "grad_norm": 4.627716064453125, "learning_rate": 3.3972824512012957e-06, "loss": 0.1807, "step": 22499 }, { "epoch": 73.77049180327869, "grad_norm": 6.927231788635254, "learning_rate": 3.39648498287413e-06, "loss": 0.4201, "step": 22500 }, { "epoch": 73.77377049180328, "grad_norm": 6.139435291290283, "learning_rate": 3.3956875890084586e-06, "loss": 0.5451, "step": 22501 }, { "epoch": 73.77704918032786, "grad_norm": 5.149401664733887, "learning_rate": 3.3948902696132803e-06, "loss": 0.231, "step": 22502 }, { "epoch": 73.78032786885245, "grad_norm": 5.80362606048584, "learning_rate": 3.394093024697581e-06, "loss": 0.5683, "step": 22503 }, { "epoch": 73.78360655737706, "grad_norm": 4.618653774261475, "learning_rate": 3.393295854270352e-06, "loss": 0.5816, "step": 22504 }, { "epoch": 73.78688524590164, "grad_norm": 6.075286388397217, "learning_rate": 3.392498758340581e-06, "loss": 0.4496, "step": 22505 }, { "epoch": 73.79016393442623, "grad_norm": 5.327391147613525, "learning_rate": 3.3917017369172533e-06, "loss": 0.2891, "step": 22506 }, { "epoch": 73.79344262295082, "grad_norm": 5.211227893829346, "learning_rate": 3.390904790009363e-06, "loss": 0.4731, "step": 22507 }, { "epoch": 73.79672131147541, "grad_norm": 6.506764888763428, "learning_rate": 3.3901079176258923e-06, "loss": 0.4665, "step": 22508 }, { "epoch": 73.8, "grad_norm": 4.4464240074157715, "learning_rate": 3.3893111197758276e-06, "loss": 0.582, "step": 22509 }, { "epoch": 73.80327868852459, "grad_norm": 5.868465900421143, "learning_rate": 3.3885143964681513e-06, "loss": 0.4881, "step": 22510 }, { "epoch": 73.80655737704917, "grad_norm": 17.176252365112305, "learning_rate": 3.387717747711852e-06, "loss": 0.4393, "step": 22511 }, { "epoch": 73.80983606557378, "grad_norm": 4.676990032196045, "learning_rate": 3.3869211735159093e-06, "loss": 0.5113, "step": 22512 }, { "epoch": 73.81311475409836, "grad_norm": 4.375192642211914, "learning_rate": 3.386124673889307e-06, "loss": 0.3353, "step": 22513 }, { "epoch": 73.81639344262295, "grad_norm": 6.517855644226074, "learning_rate": 3.385328248841022e-06, "loss": 0.4491, "step": 22514 }, { "epoch": 73.81967213114754, "grad_norm": 6.1782121658325195, "learning_rate": 3.3845318983800423e-06, "loss": 0.408, "step": 22515 }, { "epoch": 73.82295081967213, "grad_norm": 4.944215297698975, "learning_rate": 3.3837356225153426e-06, "loss": 0.4176, "step": 22516 }, { "epoch": 73.82622950819672, "grad_norm": 16.082429885864258, "learning_rate": 3.3829394212559043e-06, "loss": 0.4187, "step": 22517 }, { "epoch": 73.8295081967213, "grad_norm": 5.306273460388184, "learning_rate": 3.3821432946107035e-06, "loss": 0.3846, "step": 22518 }, { "epoch": 73.8327868852459, "grad_norm": 4.811059474945068, "learning_rate": 3.381347242588715e-06, "loss": 0.2357, "step": 22519 }, { "epoch": 73.8360655737705, "grad_norm": 31.856422424316406, "learning_rate": 3.3805512651989215e-06, "loss": 0.5357, "step": 22520 }, { "epoch": 73.83934426229509, "grad_norm": 5.945713043212891, "learning_rate": 3.3797553624502945e-06, "loss": 0.4579, "step": 22521 }, { "epoch": 73.84262295081967, "grad_norm": 5.6515703201293945, "learning_rate": 3.378959534351809e-06, "loss": 0.3821, "step": 22522 }, { "epoch": 73.84590163934426, "grad_norm": 4.537724018096924, "learning_rate": 3.3781637809124358e-06, "loss": 0.4712, "step": 22523 }, { "epoch": 73.84918032786885, "grad_norm": 5.050394058227539, "learning_rate": 3.377368102141154e-06, "loss": 0.415, "step": 22524 }, { "epoch": 73.85245901639344, "grad_norm": 4.4357991218566895, "learning_rate": 3.376572498046934e-06, "loss": 0.4979, "step": 22525 }, { "epoch": 73.85573770491803, "grad_norm": 4.672683238983154, "learning_rate": 3.3757769686387444e-06, "loss": 0.4111, "step": 22526 }, { "epoch": 73.85901639344263, "grad_norm": 5.354730606079102, "learning_rate": 3.374981513925554e-06, "loss": 0.2259, "step": 22527 }, { "epoch": 73.86229508196722, "grad_norm": 5.356814861297607, "learning_rate": 3.3741861339163383e-06, "loss": 0.5124, "step": 22528 }, { "epoch": 73.8655737704918, "grad_norm": 5.630366802215576, "learning_rate": 3.373390828620063e-06, "loss": 0.2899, "step": 22529 }, { "epoch": 73.8688524590164, "grad_norm": 6.5106096267700195, "learning_rate": 3.3725955980456958e-06, "loss": 0.3796, "step": 22530 }, { "epoch": 73.87213114754098, "grad_norm": 8.244372367858887, "learning_rate": 3.3718004422022056e-06, "loss": 0.4134, "step": 22531 }, { "epoch": 73.87540983606557, "grad_norm": 4.90388822555542, "learning_rate": 3.3710053610985517e-06, "loss": 0.4457, "step": 22532 }, { "epoch": 73.87868852459016, "grad_norm": 5.396993637084961, "learning_rate": 3.3702103547437093e-06, "loss": 0.3404, "step": 22533 }, { "epoch": 73.88196721311475, "grad_norm": 5.690266132354736, "learning_rate": 3.369415423146638e-06, "loss": 0.3233, "step": 22534 }, { "epoch": 73.88524590163935, "grad_norm": 17.656246185302734, "learning_rate": 3.3686205663162987e-06, "loss": 0.2991, "step": 22535 }, { "epoch": 73.88852459016394, "grad_norm": 4.755007743835449, "learning_rate": 3.3678257842616613e-06, "loss": 0.4442, "step": 22536 }, { "epoch": 73.89180327868853, "grad_norm": 4.7492876052856445, "learning_rate": 3.3670310769916827e-06, "loss": 0.4242, "step": 22537 }, { "epoch": 73.89508196721312, "grad_norm": 9.556333541870117, "learning_rate": 3.3662364445153227e-06, "loss": 0.4286, "step": 22538 }, { "epoch": 73.8983606557377, "grad_norm": 5.094532012939453, "learning_rate": 3.3654418868415472e-06, "loss": 0.3866, "step": 22539 }, { "epoch": 73.90163934426229, "grad_norm": 6.289125442504883, "learning_rate": 3.3646474039793133e-06, "loss": 0.5407, "step": 22540 }, { "epoch": 73.90491803278688, "grad_norm": 4.575852870941162, "learning_rate": 3.363852995937575e-06, "loss": 0.4632, "step": 22541 }, { "epoch": 73.90819672131147, "grad_norm": 6.816719055175781, "learning_rate": 3.363058662725297e-06, "loss": 0.3174, "step": 22542 }, { "epoch": 73.91147540983607, "grad_norm": 4.221904754638672, "learning_rate": 3.362264404351434e-06, "loss": 0.3132, "step": 22543 }, { "epoch": 73.91475409836066, "grad_norm": 7.756138324737549, "learning_rate": 3.361470220824942e-06, "loss": 0.5101, "step": 22544 }, { "epoch": 73.91803278688525, "grad_norm": 4.8402485847473145, "learning_rate": 3.3606761121547703e-06, "loss": 0.378, "step": 22545 }, { "epoch": 73.92131147540984, "grad_norm": 5.872274875640869, "learning_rate": 3.3598820783498833e-06, "loss": 0.3155, "step": 22546 }, { "epoch": 73.92459016393443, "grad_norm": 11.704514503479004, "learning_rate": 3.359088119419229e-06, "loss": 0.5213, "step": 22547 }, { "epoch": 73.92786885245901, "grad_norm": 6.055962085723877, "learning_rate": 3.358294235371763e-06, "loss": 0.3799, "step": 22548 }, { "epoch": 73.9311475409836, "grad_norm": 10.242773056030273, "learning_rate": 3.35750042621643e-06, "loss": 0.4087, "step": 22549 }, { "epoch": 73.93442622950819, "grad_norm": 7.393029689788818, "learning_rate": 3.3567066919621894e-06, "loss": 0.595, "step": 22550 }, { "epoch": 73.9377049180328, "grad_norm": 4.295042991638184, "learning_rate": 3.355913032617989e-06, "loss": 0.424, "step": 22551 }, { "epoch": 73.94098360655738, "grad_norm": 6.825183391571045, "learning_rate": 3.3551194481927763e-06, "loss": 0.5212, "step": 22552 }, { "epoch": 73.94426229508197, "grad_norm": 5.709242343902588, "learning_rate": 3.3543259386954984e-06, "loss": 0.2524, "step": 22553 }, { "epoch": 73.94754098360656, "grad_norm": 6.743626117706299, "learning_rate": 3.3535325041351084e-06, "loss": 0.6562, "step": 22554 }, { "epoch": 73.95081967213115, "grad_norm": 4.577982425689697, "learning_rate": 3.3527391445205494e-06, "loss": 0.3246, "step": 22555 }, { "epoch": 73.95409836065573, "grad_norm": 4.650711536407471, "learning_rate": 3.351945859860769e-06, "loss": 0.3689, "step": 22556 }, { "epoch": 73.95737704918032, "grad_norm": 5.870993137359619, "learning_rate": 3.351152650164711e-06, "loss": 0.5155, "step": 22557 }, { "epoch": 73.96065573770491, "grad_norm": 5.556304454803467, "learning_rate": 3.350359515441316e-06, "loss": 0.1981, "step": 22558 }, { "epoch": 73.96393442622951, "grad_norm": 3.8920886516571045, "learning_rate": 3.3495664556995345e-06, "loss": 0.2998, "step": 22559 }, { "epoch": 73.9672131147541, "grad_norm": 5.82656192779541, "learning_rate": 3.3487734709483065e-06, "loss": 0.4283, "step": 22560 }, { "epoch": 73.97049180327869, "grad_norm": 6.0910797119140625, "learning_rate": 3.347980561196573e-06, "loss": 0.4617, "step": 22561 }, { "epoch": 73.97377049180328, "grad_norm": 5.293889045715332, "learning_rate": 3.347187726453273e-06, "loss": 0.4581, "step": 22562 }, { "epoch": 73.97704918032787, "grad_norm": 9.990116119384766, "learning_rate": 3.3463949667273497e-06, "loss": 0.3146, "step": 22563 }, { "epoch": 73.98032786885246, "grad_norm": 5.522974491119385, "learning_rate": 3.3456022820277422e-06, "loss": 0.3816, "step": 22564 }, { "epoch": 73.98360655737704, "grad_norm": 6.682844161987305, "learning_rate": 3.3448096723633882e-06, "loss": 0.4912, "step": 22565 }, { "epoch": 73.98688524590163, "grad_norm": 4.550377368927002, "learning_rate": 3.3440171377432205e-06, "loss": 0.2756, "step": 22566 }, { "epoch": 73.99016393442623, "grad_norm": 5.072203636169434, "learning_rate": 3.3432246781761845e-06, "loss": 0.7281, "step": 22567 }, { "epoch": 73.99344262295082, "grad_norm": 5.795156002044678, "learning_rate": 3.3424322936712106e-06, "loss": 0.4397, "step": 22568 }, { "epoch": 73.99672131147541, "grad_norm": 5.0961713790893555, "learning_rate": 3.341639984237235e-06, "loss": 0.3245, "step": 22569 }, { "epoch": 74.0, "grad_norm": 5.144992828369141, "learning_rate": 3.3408477498831917e-06, "loss": 0.4322, "step": 22570 }, { "epoch": 74.00327868852459, "grad_norm": 5.187911510467529, "learning_rate": 3.340055590618011e-06, "loss": 0.3474, "step": 22571 }, { "epoch": 74.00655737704918, "grad_norm": 6.07952880859375, "learning_rate": 3.3392635064506308e-06, "loss": 0.4673, "step": 22572 }, { "epoch": 74.00983606557377, "grad_norm": 7.13128662109375, "learning_rate": 3.33847149738998e-06, "loss": 0.4411, "step": 22573 }, { "epoch": 74.01311475409837, "grad_norm": 4.746032238006592, "learning_rate": 3.3376795634449887e-06, "loss": 0.4893, "step": 22574 }, { "epoch": 74.01639344262296, "grad_norm": 5.494058132171631, "learning_rate": 3.336887704624585e-06, "loss": 0.2422, "step": 22575 }, { "epoch": 74.01967213114754, "grad_norm": 4.149784088134766, "learning_rate": 3.336095920937703e-06, "loss": 0.2789, "step": 22576 }, { "epoch": 74.02295081967213, "grad_norm": 5.159708023071289, "learning_rate": 3.335304212393269e-06, "loss": 0.2957, "step": 22577 }, { "epoch": 74.02622950819672, "grad_norm": 5.731748104095459, "learning_rate": 3.3345125790002096e-06, "loss": 0.4332, "step": 22578 }, { "epoch": 74.02950819672131, "grad_norm": 6.084404945373535, "learning_rate": 3.3337210207674508e-06, "loss": 0.3444, "step": 22579 }, { "epoch": 74.0327868852459, "grad_norm": 4.118621826171875, "learning_rate": 3.332929537703915e-06, "loss": 0.1402, "step": 22580 }, { "epoch": 74.03606557377049, "grad_norm": 5.363358020782471, "learning_rate": 3.3321381298185353e-06, "loss": 0.4925, "step": 22581 }, { "epoch": 74.03934426229509, "grad_norm": 6.365419387817383, "learning_rate": 3.3313467971202296e-06, "loss": 0.4873, "step": 22582 }, { "epoch": 74.04262295081968, "grad_norm": 8.810050010681152, "learning_rate": 3.3305555396179225e-06, "loss": 0.3144, "step": 22583 }, { "epoch": 74.04590163934427, "grad_norm": 5.802508354187012, "learning_rate": 3.329764357320534e-06, "loss": 0.3933, "step": 22584 }, { "epoch": 74.04918032786885, "grad_norm": 4.932918548583984, "learning_rate": 3.3289732502369896e-06, "loss": 0.4808, "step": 22585 }, { "epoch": 74.05245901639344, "grad_norm": 8.361249923706055, "learning_rate": 3.328182218376209e-06, "loss": 0.4638, "step": 22586 }, { "epoch": 74.05573770491803, "grad_norm": 8.181159019470215, "learning_rate": 3.3273912617471073e-06, "loss": 0.448, "step": 22587 }, { "epoch": 74.05901639344262, "grad_norm": 5.27909517288208, "learning_rate": 3.32660038035861e-06, "loss": 0.5052, "step": 22588 }, { "epoch": 74.0622950819672, "grad_norm": 5.115375518798828, "learning_rate": 3.3258095742196316e-06, "loss": 0.3725, "step": 22589 }, { "epoch": 74.06557377049181, "grad_norm": 5.085508823394775, "learning_rate": 3.3250188433390908e-06, "loss": 0.3611, "step": 22590 }, { "epoch": 74.0688524590164, "grad_norm": 5.712343215942383, "learning_rate": 3.3242281877258985e-06, "loss": 0.4864, "step": 22591 }, { "epoch": 74.07213114754099, "grad_norm": 4.663423538208008, "learning_rate": 3.3234376073889787e-06, "loss": 0.5571, "step": 22592 }, { "epoch": 74.07540983606557, "grad_norm": 7.030742168426514, "learning_rate": 3.322647102337241e-06, "loss": 0.3269, "step": 22593 }, { "epoch": 74.07868852459016, "grad_norm": 4.892574787139893, "learning_rate": 3.3218566725795966e-06, "loss": 0.3081, "step": 22594 }, { "epoch": 74.08196721311475, "grad_norm": 5.9552812576293945, "learning_rate": 3.3210663181249647e-06, "loss": 0.3857, "step": 22595 }, { "epoch": 74.08524590163934, "grad_norm": 6.2089738845825195, "learning_rate": 3.3202760389822553e-06, "loss": 0.3449, "step": 22596 }, { "epoch": 74.08852459016393, "grad_norm": 9.0370512008667, "learning_rate": 3.3194858351603744e-06, "loss": 0.2693, "step": 22597 }, { "epoch": 74.09180327868853, "grad_norm": 5.075149059295654, "learning_rate": 3.318695706668241e-06, "loss": 0.3338, "step": 22598 }, { "epoch": 74.09508196721312, "grad_norm": 5.253509044647217, "learning_rate": 3.3179056535147602e-06, "loss": 0.5754, "step": 22599 }, { "epoch": 74.09836065573771, "grad_norm": 5.950451374053955, "learning_rate": 3.3171156757088406e-06, "loss": 0.4309, "step": 22600 }, { "epoch": 74.1016393442623, "grad_norm": 6.739306926727295, "learning_rate": 3.3163257732593866e-06, "loss": 0.3712, "step": 22601 }, { "epoch": 74.10491803278688, "grad_norm": 9.515935897827148, "learning_rate": 3.315535946175312e-06, "loss": 0.4823, "step": 22602 }, { "epoch": 74.10819672131147, "grad_norm": 6.232358932495117, "learning_rate": 3.3147461944655203e-06, "loss": 0.366, "step": 22603 }, { "epoch": 74.11147540983606, "grad_norm": 4.572998046875, "learning_rate": 3.3139565181389155e-06, "loss": 0.2049, "step": 22604 }, { "epoch": 74.11475409836065, "grad_norm": 6.991917133331299, "learning_rate": 3.3131669172044035e-06, "loss": 0.4345, "step": 22605 }, { "epoch": 74.11803278688525, "grad_norm": 8.649628639221191, "learning_rate": 3.3123773916708836e-06, "loss": 0.3674, "step": 22606 }, { "epoch": 74.12131147540984, "grad_norm": 5.863036155700684, "learning_rate": 3.311587941547265e-06, "loss": 0.3732, "step": 22607 }, { "epoch": 74.12459016393443, "grad_norm": 5.491868495941162, "learning_rate": 3.310798566842447e-06, "loss": 0.4591, "step": 22608 }, { "epoch": 74.12786885245902, "grad_norm": 4.803811073303223, "learning_rate": 3.3100092675653296e-06, "loss": 0.3309, "step": 22609 }, { "epoch": 74.1311475409836, "grad_norm": 7.1266350746154785, "learning_rate": 3.30922004372481e-06, "loss": 0.2616, "step": 22610 }, { "epoch": 74.1344262295082, "grad_norm": 5.5953216552734375, "learning_rate": 3.308430895329795e-06, "loss": 0.1971, "step": 22611 }, { "epoch": 74.13770491803278, "grad_norm": 5.278740406036377, "learning_rate": 3.3076418223891792e-06, "loss": 0.4038, "step": 22612 }, { "epoch": 74.14098360655737, "grad_norm": 5.9078049659729, "learning_rate": 3.3068528249118602e-06, "loss": 0.3137, "step": 22613 }, { "epoch": 74.14426229508197, "grad_norm": 5.243343353271484, "learning_rate": 3.306063902906731e-06, "loss": 0.313, "step": 22614 }, { "epoch": 74.14754098360656, "grad_norm": 4.713679313659668, "learning_rate": 3.305275056382695e-06, "loss": 0.4069, "step": 22615 }, { "epoch": 74.15081967213115, "grad_norm": 8.50851821899414, "learning_rate": 3.304486285348644e-06, "loss": 0.5046, "step": 22616 }, { "epoch": 74.15409836065574, "grad_norm": 6.818833827972412, "learning_rate": 3.3036975898134704e-06, "loss": 0.3372, "step": 22617 }, { "epoch": 74.15737704918033, "grad_norm": 7.563595294952393, "learning_rate": 3.3029089697860694e-06, "loss": 0.4233, "step": 22618 }, { "epoch": 74.16065573770491, "grad_norm": 5.511893272399902, "learning_rate": 3.3021204252753293e-06, "loss": 0.2771, "step": 22619 }, { "epoch": 74.1639344262295, "grad_norm": 7.100221157073975, "learning_rate": 3.301331956290149e-06, "loss": 0.3572, "step": 22620 }, { "epoch": 74.1672131147541, "grad_norm": 4.69182014465332, "learning_rate": 3.3005435628394167e-06, "loss": 0.2171, "step": 22621 }, { "epoch": 74.1704918032787, "grad_norm": 4.8321051597595215, "learning_rate": 3.2997552449320204e-06, "loss": 0.3061, "step": 22622 }, { "epoch": 74.17377049180328, "grad_norm": 4.3227128982543945, "learning_rate": 3.2989670025768473e-06, "loss": 0.3521, "step": 22623 }, { "epoch": 74.17704918032787, "grad_norm": 4.485503673553467, "learning_rate": 3.2981788357827914e-06, "loss": 0.3501, "step": 22624 }, { "epoch": 74.18032786885246, "grad_norm": 9.681598663330078, "learning_rate": 3.2973907445587384e-06, "loss": 0.3792, "step": 22625 }, { "epoch": 74.18360655737705, "grad_norm": 4.879428863525391, "learning_rate": 3.2966027289135725e-06, "loss": 0.3523, "step": 22626 }, { "epoch": 74.18688524590164, "grad_norm": 5.0449137687683105, "learning_rate": 3.2958147888561776e-06, "loss": 0.3893, "step": 22627 }, { "epoch": 74.19016393442622, "grad_norm": 4.995189189910889, "learning_rate": 3.295026924395446e-06, "loss": 0.4116, "step": 22628 }, { "epoch": 74.19344262295083, "grad_norm": 4.783572673797607, "learning_rate": 3.2942391355402557e-06, "loss": 0.2319, "step": 22629 }, { "epoch": 74.19672131147541, "grad_norm": 5.501833915710449, "learning_rate": 3.293451422299492e-06, "loss": 0.3035, "step": 22630 }, { "epoch": 74.2, "grad_norm": 4.696304798126221, "learning_rate": 3.2926637846820366e-06, "loss": 0.5244, "step": 22631 }, { "epoch": 74.20327868852459, "grad_norm": 5.126776218414307, "learning_rate": 3.291876222696767e-06, "loss": 0.4957, "step": 22632 }, { "epoch": 74.20655737704918, "grad_norm": 5.1795806884765625, "learning_rate": 3.2910887363525723e-06, "loss": 0.5997, "step": 22633 }, { "epoch": 74.20983606557377, "grad_norm": 9.791601181030273, "learning_rate": 3.2903013256583273e-06, "loss": 0.3886, "step": 22634 }, { "epoch": 74.21311475409836, "grad_norm": 5.792724609375, "learning_rate": 3.2895139906229103e-06, "loss": 0.226, "step": 22635 }, { "epoch": 74.21639344262294, "grad_norm": 4.5292181968688965, "learning_rate": 3.2887267312551975e-06, "loss": 0.4067, "step": 22636 }, { "epoch": 74.21967213114755, "grad_norm": 5.287165641784668, "learning_rate": 3.2879395475640717e-06, "loss": 0.3684, "step": 22637 }, { "epoch": 74.22295081967214, "grad_norm": 5.835565567016602, "learning_rate": 3.2871524395584065e-06, "loss": 0.406, "step": 22638 }, { "epoch": 74.22622950819672, "grad_norm": 5.465130805969238, "learning_rate": 3.2863654072470774e-06, "loss": 0.4151, "step": 22639 }, { "epoch": 74.22950819672131, "grad_norm": 4.19139289855957, "learning_rate": 3.2855784506389554e-06, "loss": 0.3469, "step": 22640 }, { "epoch": 74.2327868852459, "grad_norm": 5.166706085205078, "learning_rate": 3.2847915697429213e-06, "loss": 0.3626, "step": 22641 }, { "epoch": 74.23606557377049, "grad_norm": 10.928959846496582, "learning_rate": 3.284004764567843e-06, "loss": 0.3056, "step": 22642 }, { "epoch": 74.23934426229508, "grad_norm": 5.940928936004639, "learning_rate": 3.2832180351225916e-06, "loss": 0.5321, "step": 22643 }, { "epoch": 74.24262295081967, "grad_norm": 5.53367805480957, "learning_rate": 3.2824313814160434e-06, "loss": 0.6941, "step": 22644 }, { "epoch": 74.24590163934427, "grad_norm": 4.278500080108643, "learning_rate": 3.281644803457067e-06, "loss": 0.2054, "step": 22645 }, { "epoch": 74.24918032786886, "grad_norm": 7.344345569610596, "learning_rate": 3.280858301254526e-06, "loss": 0.5591, "step": 22646 }, { "epoch": 74.25245901639344, "grad_norm": 9.150655746459961, "learning_rate": 3.280071874817298e-06, "loss": 0.5989, "step": 22647 }, { "epoch": 74.25573770491803, "grad_norm": 6.2076873779296875, "learning_rate": 3.2792855241542465e-06, "loss": 0.3408, "step": 22648 }, { "epoch": 74.25901639344262, "grad_norm": 5.836885452270508, "learning_rate": 3.2784992492742385e-06, "loss": 0.5155, "step": 22649 }, { "epoch": 74.26229508196721, "grad_norm": 7.598717212677002, "learning_rate": 3.2777130501861364e-06, "loss": 0.2467, "step": 22650 }, { "epoch": 74.2655737704918, "grad_norm": 5.439419746398926, "learning_rate": 3.2769269268988125e-06, "loss": 0.1692, "step": 22651 }, { "epoch": 74.26885245901639, "grad_norm": 10.709020614624023, "learning_rate": 3.276140879421128e-06, "loss": 0.458, "step": 22652 }, { "epoch": 74.27213114754099, "grad_norm": 5.171174049377441, "learning_rate": 3.2753549077619417e-06, "loss": 0.4759, "step": 22653 }, { "epoch": 74.27540983606558, "grad_norm": 4.925532817840576, "learning_rate": 3.2745690119301255e-06, "loss": 0.5249, "step": 22654 }, { "epoch": 74.27868852459017, "grad_norm": 4.417569160461426, "learning_rate": 3.2737831919345353e-06, "loss": 0.31, "step": 22655 }, { "epoch": 74.28196721311475, "grad_norm": 7.311987400054932, "learning_rate": 3.2729974477840344e-06, "loss": 0.4292, "step": 22656 }, { "epoch": 74.28524590163934, "grad_norm": 5.081676006317139, "learning_rate": 3.272211779487481e-06, "loss": 0.3978, "step": 22657 }, { "epoch": 74.28852459016393, "grad_norm": 4.058840274810791, "learning_rate": 3.271426187053731e-06, "loss": 0.3452, "step": 22658 }, { "epoch": 74.29180327868852, "grad_norm": 5.34331750869751, "learning_rate": 3.2706406704916505e-06, "loss": 0.4202, "step": 22659 }, { "epoch": 74.29508196721312, "grad_norm": 4.563255786895752, "learning_rate": 3.2698552298100938e-06, "loss": 0.4238, "step": 22660 }, { "epoch": 74.29836065573771, "grad_norm": 5.010681629180908, "learning_rate": 3.2690698650179165e-06, "loss": 0.2861, "step": 22661 }, { "epoch": 74.3016393442623, "grad_norm": 4.75046443939209, "learning_rate": 3.2682845761239714e-06, "loss": 0.4093, "step": 22662 }, { "epoch": 74.30491803278689, "grad_norm": 6.836828231811523, "learning_rate": 3.2674993631371198e-06, "loss": 0.494, "step": 22663 }, { "epoch": 74.30819672131148, "grad_norm": 4.494837760925293, "learning_rate": 3.266714226066213e-06, "loss": 0.3276, "step": 22664 }, { "epoch": 74.31147540983606, "grad_norm": 6.1136860847473145, "learning_rate": 3.265929164920105e-06, "loss": 0.391, "step": 22665 }, { "epoch": 74.31475409836065, "grad_norm": 16.828712463378906, "learning_rate": 3.2651441797076432e-06, "loss": 0.8753, "step": 22666 }, { "epoch": 74.31803278688524, "grad_norm": 4.094006538391113, "learning_rate": 3.264359270437688e-06, "loss": 0.2077, "step": 22667 }, { "epoch": 74.32131147540984, "grad_norm": 4.509213447570801, "learning_rate": 3.2635744371190834e-06, "loss": 0.453, "step": 22668 }, { "epoch": 74.32459016393443, "grad_norm": 4.418176651000977, "learning_rate": 3.262789679760683e-06, "loss": 0.4034, "step": 22669 }, { "epoch": 74.32786885245902, "grad_norm": 7.142394065856934, "learning_rate": 3.2620049983713333e-06, "loss": 0.5395, "step": 22670 }, { "epoch": 74.33114754098361, "grad_norm": 5.289625644683838, "learning_rate": 3.2612203929598786e-06, "loss": 0.3829, "step": 22671 }, { "epoch": 74.3344262295082, "grad_norm": 6.821901798248291, "learning_rate": 3.2604358635351752e-06, "loss": 0.5181, "step": 22672 }, { "epoch": 74.33770491803278, "grad_norm": 7.0301666259765625, "learning_rate": 3.259651410106064e-06, "loss": 0.4996, "step": 22673 }, { "epoch": 74.34098360655737, "grad_norm": 4.336817741394043, "learning_rate": 3.258867032681392e-06, "loss": 0.1601, "step": 22674 }, { "epoch": 74.34426229508196, "grad_norm": 5.49525785446167, "learning_rate": 3.2580827312699993e-06, "loss": 0.3542, "step": 22675 }, { "epoch": 74.34754098360656, "grad_norm": 3.7496094703674316, "learning_rate": 3.2572985058807373e-06, "loss": 0.5425, "step": 22676 }, { "epoch": 74.35081967213115, "grad_norm": 5.0937089920043945, "learning_rate": 3.2565143565224453e-06, "loss": 0.3346, "step": 22677 }, { "epoch": 74.35409836065574, "grad_norm": 4.825790882110596, "learning_rate": 3.255730283203965e-06, "loss": 0.3045, "step": 22678 }, { "epoch": 74.35737704918033, "grad_norm": 7.691089153289795, "learning_rate": 3.254946285934135e-06, "loss": 0.3968, "step": 22679 }, { "epoch": 74.36065573770492, "grad_norm": 5.878824710845947, "learning_rate": 3.2541623647218025e-06, "loss": 0.4558, "step": 22680 }, { "epoch": 74.3639344262295, "grad_norm": 5.024258136749268, "learning_rate": 3.2533785195758026e-06, "loss": 0.4654, "step": 22681 }, { "epoch": 74.3672131147541, "grad_norm": 5.495588302612305, "learning_rate": 3.252594750504975e-06, "loss": 0.3993, "step": 22682 }, { "epoch": 74.37049180327868, "grad_norm": 6.26848030090332, "learning_rate": 3.2518110575181573e-06, "loss": 0.293, "step": 22683 }, { "epoch": 74.37377049180328, "grad_norm": 5.577826499938965, "learning_rate": 3.2510274406241814e-06, "loss": 0.6402, "step": 22684 }, { "epoch": 74.37704918032787, "grad_norm": 6.259435176849365, "learning_rate": 3.250243899831893e-06, "loss": 0.6271, "step": 22685 }, { "epoch": 74.38032786885246, "grad_norm": 4.4072465896606445, "learning_rate": 3.2494604351501223e-06, "loss": 0.3969, "step": 22686 }, { "epoch": 74.38360655737705, "grad_norm": 7.05255126953125, "learning_rate": 3.2486770465877048e-06, "loss": 0.4071, "step": 22687 }, { "epoch": 74.38688524590164, "grad_norm": 5.6197638511657715, "learning_rate": 3.247893734153469e-06, "loss": 0.2948, "step": 22688 }, { "epoch": 74.39016393442623, "grad_norm": 9.21756649017334, "learning_rate": 3.247110497856255e-06, "loss": 0.613, "step": 22689 }, { "epoch": 74.39344262295081, "grad_norm": 4.940595626831055, "learning_rate": 3.246327337704892e-06, "loss": 0.459, "step": 22690 }, { "epoch": 74.3967213114754, "grad_norm": 5.847038269042969, "learning_rate": 3.2455442537082106e-06, "loss": 0.3688, "step": 22691 }, { "epoch": 74.4, "grad_norm": 5.606091022491455, "learning_rate": 3.2447612458750365e-06, "loss": 0.3086, "step": 22692 }, { "epoch": 74.4032786885246, "grad_norm": 4.066317081451416, "learning_rate": 3.2439783142142067e-06, "loss": 0.2791, "step": 22693 }, { "epoch": 74.40655737704918, "grad_norm": 5.003995895385742, "learning_rate": 3.243195458734546e-06, "loss": 0.506, "step": 22694 }, { "epoch": 74.40983606557377, "grad_norm": 5.176268100738525, "learning_rate": 3.2424126794448816e-06, "loss": 0.3057, "step": 22695 }, { "epoch": 74.41311475409836, "grad_norm": 5.032450199127197, "learning_rate": 3.2416299763540372e-06, "loss": 0.2774, "step": 22696 }, { "epoch": 74.41639344262295, "grad_norm": 6.045802116394043, "learning_rate": 3.240847349470845e-06, "loss": 0.3788, "step": 22697 }, { "epoch": 74.41967213114754, "grad_norm": 7.737011432647705, "learning_rate": 3.2400647988041266e-06, "loss": 0.4096, "step": 22698 }, { "epoch": 74.42295081967212, "grad_norm": 4.988595485687256, "learning_rate": 3.2392823243627024e-06, "loss": 0.2779, "step": 22699 }, { "epoch": 74.42622950819673, "grad_norm": 9.196916580200195, "learning_rate": 3.238499926155403e-06, "loss": 0.4962, "step": 22700 }, { "epoch": 74.42950819672132, "grad_norm": 6.52881383895874, "learning_rate": 3.2377176041910473e-06, "loss": 0.5042, "step": 22701 }, { "epoch": 74.4327868852459, "grad_norm": 6.550279140472412, "learning_rate": 3.2369353584784534e-06, "loss": 0.4245, "step": 22702 }, { "epoch": 74.43606557377049, "grad_norm": 5.6174726486206055, "learning_rate": 3.2361531890264486e-06, "loss": 0.3083, "step": 22703 }, { "epoch": 74.43934426229508, "grad_norm": 5.329611301422119, "learning_rate": 3.2353710958438488e-06, "loss": 0.4323, "step": 22704 }, { "epoch": 74.44262295081967, "grad_norm": 4.7066426277160645, "learning_rate": 3.234589078939471e-06, "loss": 0.2816, "step": 22705 }, { "epoch": 74.44590163934426, "grad_norm": 5.653016090393066, "learning_rate": 3.233807138322138e-06, "loss": 0.4478, "step": 22706 }, { "epoch": 74.44918032786886, "grad_norm": 5.285253047943115, "learning_rate": 3.233025274000664e-06, "loss": 0.4972, "step": 22707 }, { "epoch": 74.45245901639345, "grad_norm": 5.513732433319092, "learning_rate": 3.232243485983867e-06, "loss": 0.4622, "step": 22708 }, { "epoch": 74.45573770491804, "grad_norm": 3.8160645961761475, "learning_rate": 3.231461774280561e-06, "loss": 0.333, "step": 22709 }, { "epoch": 74.45901639344262, "grad_norm": 5.197700500488281, "learning_rate": 3.230680138899558e-06, "loss": 0.2552, "step": 22710 }, { "epoch": 74.46229508196721, "grad_norm": 6.100600719451904, "learning_rate": 3.2298985798496785e-06, "loss": 0.3622, "step": 22711 }, { "epoch": 74.4655737704918, "grad_norm": 6.024402141571045, "learning_rate": 3.22911709713973e-06, "loss": 0.4748, "step": 22712 }, { "epoch": 74.46885245901639, "grad_norm": 6.446234703063965, "learning_rate": 3.228335690778528e-06, "loss": 0.6698, "step": 22713 }, { "epoch": 74.47213114754098, "grad_norm": 4.605578899383545, "learning_rate": 3.227554360774877e-06, "loss": 0.3682, "step": 22714 }, { "epoch": 74.47540983606558, "grad_norm": 4.259500503540039, "learning_rate": 3.2267731071375964e-06, "loss": 0.3509, "step": 22715 }, { "epoch": 74.47868852459017, "grad_norm": 5.708957672119141, "learning_rate": 3.2259919298754915e-06, "loss": 0.3402, "step": 22716 }, { "epoch": 74.48196721311476, "grad_norm": 4.560135841369629, "learning_rate": 3.2252108289973707e-06, "loss": 0.4787, "step": 22717 }, { "epoch": 74.48524590163935, "grad_norm": 4.509393215179443, "learning_rate": 3.2244298045120383e-06, "loss": 0.2424, "step": 22718 }, { "epoch": 74.48852459016393, "grad_norm": 4.8852057456970215, "learning_rate": 3.2236488564283074e-06, "loss": 0.2744, "step": 22719 }, { "epoch": 74.49180327868852, "grad_norm": 14.851275444030762, "learning_rate": 3.2228679847549825e-06, "loss": 0.5819, "step": 22720 }, { "epoch": 74.49508196721311, "grad_norm": 5.238436222076416, "learning_rate": 3.2220871895008667e-06, "loss": 0.4332, "step": 22721 }, { "epoch": 74.4983606557377, "grad_norm": 4.854773044586182, "learning_rate": 3.2213064706747654e-06, "loss": 0.4769, "step": 22722 }, { "epoch": 74.5016393442623, "grad_norm": 5.097657203674316, "learning_rate": 3.220525828285478e-06, "loss": 0.2972, "step": 22723 }, { "epoch": 74.50491803278689, "grad_norm": 4.286602973937988, "learning_rate": 3.2197452623418146e-06, "loss": 0.3878, "step": 22724 }, { "epoch": 74.50819672131148, "grad_norm": 4.556804656982422, "learning_rate": 3.218964772852573e-06, "loss": 0.3372, "step": 22725 }, { "epoch": 74.51147540983607, "grad_norm": 5.226657390594482, "learning_rate": 3.218184359826554e-06, "loss": 0.2511, "step": 22726 }, { "epoch": 74.51475409836065, "grad_norm": 4.750992298126221, "learning_rate": 3.2174040232725546e-06, "loss": 0.4423, "step": 22727 }, { "epoch": 74.51803278688524, "grad_norm": 4.897051811218262, "learning_rate": 3.216623763199379e-06, "loss": 0.3638, "step": 22728 }, { "epoch": 74.52131147540983, "grad_norm": 5.959197998046875, "learning_rate": 3.215843579615825e-06, "loss": 0.3978, "step": 22729 }, { "epoch": 74.52459016393442, "grad_norm": 5.386866092681885, "learning_rate": 3.2150634725306873e-06, "loss": 0.5167, "step": 22730 }, { "epoch": 74.52786885245902, "grad_norm": 4.273258209228516, "learning_rate": 3.214283441952761e-06, "loss": 0.4118, "step": 22731 }, { "epoch": 74.53114754098361, "grad_norm": 19.295162200927734, "learning_rate": 3.2135034878908477e-06, "loss": 0.4922, "step": 22732 }, { "epoch": 74.5344262295082, "grad_norm": 5.228683948516846, "learning_rate": 3.2127236103537384e-06, "loss": 0.599, "step": 22733 }, { "epoch": 74.53770491803279, "grad_norm": 3.994795799255371, "learning_rate": 3.2119438093502266e-06, "loss": 0.3535, "step": 22734 }, { "epoch": 74.54098360655738, "grad_norm": 7.214138031005859, "learning_rate": 3.2111640848891068e-06, "loss": 0.3303, "step": 22735 }, { "epoch": 74.54426229508196, "grad_norm": 6.882429599761963, "learning_rate": 3.210384436979168e-06, "loss": 0.5685, "step": 22736 }, { "epoch": 74.54754098360655, "grad_norm": 4.277449607849121, "learning_rate": 3.209604865629207e-06, "loss": 0.4244, "step": 22737 }, { "epoch": 74.55081967213114, "grad_norm": 6.309386253356934, "learning_rate": 3.208825370848011e-06, "loss": 0.3249, "step": 22738 }, { "epoch": 74.55409836065574, "grad_norm": 5.523549556732178, "learning_rate": 3.2080459526443696e-06, "loss": 0.3839, "step": 22739 }, { "epoch": 74.55737704918033, "grad_norm": 4.235146999359131, "learning_rate": 3.207266611027069e-06, "loss": 0.4772, "step": 22740 }, { "epoch": 74.56065573770492, "grad_norm": 4.752033710479736, "learning_rate": 3.206487346004904e-06, "loss": 0.342, "step": 22741 }, { "epoch": 74.56393442622951, "grad_norm": 4.416573524475098, "learning_rate": 3.2057081575866566e-06, "loss": 0.4696, "step": 22742 }, { "epoch": 74.5672131147541, "grad_norm": 6.276725769042969, "learning_rate": 3.2049290457811154e-06, "loss": 0.4454, "step": 22743 }, { "epoch": 74.57049180327868, "grad_norm": 6.422821998596191, "learning_rate": 3.2041500105970603e-06, "loss": 0.4474, "step": 22744 }, { "epoch": 74.57377049180327, "grad_norm": 5.437894821166992, "learning_rate": 3.2033710520432827e-06, "loss": 0.1423, "step": 22745 }, { "epoch": 74.57704918032788, "grad_norm": 5.566749095916748, "learning_rate": 3.2025921701285633e-06, "loss": 0.355, "step": 22746 }, { "epoch": 74.58032786885246, "grad_norm": 5.22459077835083, "learning_rate": 3.2018133648616847e-06, "loss": 0.4604, "step": 22747 }, { "epoch": 74.58360655737705, "grad_norm": 6.824321269989014, "learning_rate": 3.2010346362514254e-06, "loss": 0.3481, "step": 22748 }, { "epoch": 74.58688524590164, "grad_norm": 5.479552745819092, "learning_rate": 3.200255984306574e-06, "loss": 0.3165, "step": 22749 }, { "epoch": 74.59016393442623, "grad_norm": 5.381320476531982, "learning_rate": 3.199477409035905e-06, "loss": 0.3534, "step": 22750 }, { "epoch": 74.59344262295082, "grad_norm": 15.01155948638916, "learning_rate": 3.198698910448197e-06, "loss": 0.3699, "step": 22751 }, { "epoch": 74.5967213114754, "grad_norm": 5.387229919433594, "learning_rate": 3.1979204885522317e-06, "loss": 0.4049, "step": 22752 }, { "epoch": 74.6, "grad_norm": 5.150534152984619, "learning_rate": 3.197142143356787e-06, "loss": 0.1829, "step": 22753 }, { "epoch": 74.6032786885246, "grad_norm": 4.4598469734191895, "learning_rate": 3.1963638748706373e-06, "loss": 0.159, "step": 22754 }, { "epoch": 74.60655737704919, "grad_norm": 5.431356430053711, "learning_rate": 3.1955856831025556e-06, "loss": 0.3444, "step": 22755 }, { "epoch": 74.60983606557377, "grad_norm": 5.402525424957275, "learning_rate": 3.1948075680613233e-06, "loss": 0.2062, "step": 22756 }, { "epoch": 74.61311475409836, "grad_norm": 4.781651020050049, "learning_rate": 3.1940295297557123e-06, "loss": 0.5515, "step": 22757 }, { "epoch": 74.61639344262295, "grad_norm": 4.873383522033691, "learning_rate": 3.19325156819449e-06, "loss": 0.3688, "step": 22758 }, { "epoch": 74.61967213114754, "grad_norm": 5.081677436828613, "learning_rate": 3.192473683386438e-06, "loss": 0.3998, "step": 22759 }, { "epoch": 74.62295081967213, "grad_norm": 5.031820774078369, "learning_rate": 3.191695875340323e-06, "loss": 0.2968, "step": 22760 }, { "epoch": 74.62622950819672, "grad_norm": 4.690179824829102, "learning_rate": 3.190918144064915e-06, "loss": 0.3202, "step": 22761 }, { "epoch": 74.62950819672132, "grad_norm": 5.82227087020874, "learning_rate": 3.1901404895689825e-06, "loss": 0.4244, "step": 22762 }, { "epoch": 74.6327868852459, "grad_norm": 12.511866569519043, "learning_rate": 3.189362911861299e-06, "loss": 0.2353, "step": 22763 }, { "epoch": 74.6360655737705, "grad_norm": 4.119708061218262, "learning_rate": 3.18858541095063e-06, "loss": 0.3573, "step": 22764 }, { "epoch": 74.63934426229508, "grad_norm": 4.184924602508545, "learning_rate": 3.1878079868457422e-06, "loss": 0.3664, "step": 22765 }, { "epoch": 74.64262295081967, "grad_norm": 5.835692882537842, "learning_rate": 3.1870306395553995e-06, "loss": 0.3395, "step": 22766 }, { "epoch": 74.64590163934426, "grad_norm": 4.966587543487549, "learning_rate": 3.1862533690883735e-06, "loss": 0.2585, "step": 22767 }, { "epoch": 74.64918032786885, "grad_norm": 3.9313833713531494, "learning_rate": 3.185476175453426e-06, "loss": 0.6645, "step": 22768 }, { "epoch": 74.65245901639344, "grad_norm": 5.761285305023193, "learning_rate": 3.1846990586593185e-06, "loss": 0.3514, "step": 22769 }, { "epoch": 74.65573770491804, "grad_norm": 5.47258996963501, "learning_rate": 3.183922018714812e-06, "loss": 0.4139, "step": 22770 }, { "epoch": 74.65901639344263, "grad_norm": 7.864512920379639, "learning_rate": 3.1831450556286756e-06, "loss": 0.3089, "step": 22771 }, { "epoch": 74.66229508196722, "grad_norm": 4.428742408752441, "learning_rate": 3.1823681694096665e-06, "loss": 0.4096, "step": 22772 }, { "epoch": 74.6655737704918, "grad_norm": 4.6311750411987305, "learning_rate": 3.1815913600665448e-06, "loss": 0.3928, "step": 22773 }, { "epoch": 74.66885245901639, "grad_norm": 8.838308334350586, "learning_rate": 3.1808146276080696e-06, "loss": 0.51, "step": 22774 }, { "epoch": 74.67213114754098, "grad_norm": 11.332418441772461, "learning_rate": 3.1800379720429964e-06, "loss": 0.2551, "step": 22775 }, { "epoch": 74.67540983606557, "grad_norm": 5.5046916007995605, "learning_rate": 3.1792613933800898e-06, "loss": 0.5556, "step": 22776 }, { "epoch": 74.67868852459016, "grad_norm": 4.438865661621094, "learning_rate": 3.178484891628103e-06, "loss": 0.4984, "step": 22777 }, { "epoch": 74.68196721311476, "grad_norm": 4.641926288604736, "learning_rate": 3.17770846679579e-06, "loss": 0.5809, "step": 22778 }, { "epoch": 74.68524590163935, "grad_norm": 4.128535747528076, "learning_rate": 3.1769321188919056e-06, "loss": 0.2262, "step": 22779 }, { "epoch": 74.68852459016394, "grad_norm": 6.716090679168701, "learning_rate": 3.176155847925209e-06, "loss": 0.4661, "step": 22780 }, { "epoch": 74.69180327868852, "grad_norm": 6.48306131362915, "learning_rate": 3.1753796539044502e-06, "loss": 0.2993, "step": 22781 }, { "epoch": 74.69508196721311, "grad_norm": 5.1512346267700195, "learning_rate": 3.174603536838381e-06, "loss": 0.3421, "step": 22782 }, { "epoch": 74.6983606557377, "grad_norm": 4.535363674163818, "learning_rate": 3.173827496735751e-06, "loss": 0.3811, "step": 22783 }, { "epoch": 74.70163934426229, "grad_norm": 5.140600681304932, "learning_rate": 3.173051533605316e-06, "loss": 0.5967, "step": 22784 }, { "epoch": 74.70491803278688, "grad_norm": 4.4288763999938965, "learning_rate": 3.1722756474558235e-06, "loss": 0.2545, "step": 22785 }, { "epoch": 74.70819672131148, "grad_norm": 7.3304762840271, "learning_rate": 3.171499838296023e-06, "loss": 0.5365, "step": 22786 }, { "epoch": 74.71147540983607, "grad_norm": 5.139747619628906, "learning_rate": 3.1707241061346604e-06, "loss": 0.2912, "step": 22787 }, { "epoch": 74.71475409836066, "grad_norm": 4.899129390716553, "learning_rate": 3.169948450980481e-06, "loss": 0.3319, "step": 22788 }, { "epoch": 74.71803278688525, "grad_norm": 4.734036445617676, "learning_rate": 3.169172872842238e-06, "loss": 0.3454, "step": 22789 }, { "epoch": 74.72131147540983, "grad_norm": 4.84280252456665, "learning_rate": 3.168397371728673e-06, "loss": 0.3941, "step": 22790 }, { "epoch": 74.72459016393442, "grad_norm": 4.821285247802734, "learning_rate": 3.1676219476485317e-06, "loss": 0.2192, "step": 22791 }, { "epoch": 74.72786885245901, "grad_norm": 6.762513637542725, "learning_rate": 3.1668466006105523e-06, "loss": 0.387, "step": 22792 }, { "epoch": 74.73114754098361, "grad_norm": 16.330060958862305, "learning_rate": 3.1660713306234857e-06, "loss": 0.2863, "step": 22793 }, { "epoch": 74.7344262295082, "grad_norm": 10.579424858093262, "learning_rate": 3.16529613769607e-06, "loss": 0.6817, "step": 22794 }, { "epoch": 74.73770491803279, "grad_norm": 4.502475738525391, "learning_rate": 3.164521021837047e-06, "loss": 0.3786, "step": 22795 }, { "epoch": 74.74098360655738, "grad_norm": 4.426224231719971, "learning_rate": 3.163745983055154e-06, "loss": 0.2853, "step": 22796 }, { "epoch": 74.74426229508197, "grad_norm": 5.4275593757629395, "learning_rate": 3.162971021359136e-06, "loss": 0.3289, "step": 22797 }, { "epoch": 74.74754098360656, "grad_norm": 7.025508403778076, "learning_rate": 3.162196136757727e-06, "loss": 0.5408, "step": 22798 }, { "epoch": 74.75081967213114, "grad_norm": 5.408135890960693, "learning_rate": 3.1614213292596674e-06, "loss": 0.4385, "step": 22799 }, { "epoch": 74.75409836065573, "grad_norm": 4.577000141143799, "learning_rate": 3.160646598873692e-06, "loss": 0.539, "step": 22800 }, { "epoch": 74.75737704918033, "grad_norm": 5.425309658050537, "learning_rate": 3.1598719456085345e-06, "loss": 0.4447, "step": 22801 }, { "epoch": 74.76065573770492, "grad_norm": 5.3675737380981445, "learning_rate": 3.1590973694729367e-06, "loss": 0.371, "step": 22802 }, { "epoch": 74.76393442622951, "grad_norm": 6.283022880554199, "learning_rate": 3.1583228704756285e-06, "loss": 0.4675, "step": 22803 }, { "epoch": 74.7672131147541, "grad_norm": 5.276932239532471, "learning_rate": 3.1575484486253393e-06, "loss": 0.2902, "step": 22804 }, { "epoch": 74.77049180327869, "grad_norm": 4.821101665496826, "learning_rate": 3.1567741039308098e-06, "loss": 0.3482, "step": 22805 }, { "epoch": 74.77377049180328, "grad_norm": 5.223635196685791, "learning_rate": 3.155999836400767e-06, "loss": 0.5002, "step": 22806 }, { "epoch": 74.77704918032786, "grad_norm": 6.770998954772949, "learning_rate": 3.155225646043939e-06, "loss": 0.4888, "step": 22807 }, { "epoch": 74.78032786885245, "grad_norm": 3.8792645931243896, "learning_rate": 3.1544515328690617e-06, "loss": 0.3189, "step": 22808 }, { "epoch": 74.78360655737706, "grad_norm": 4.48306131362915, "learning_rate": 3.1536774968848615e-06, "loss": 0.3453, "step": 22809 }, { "epoch": 74.78688524590164, "grad_norm": 4.915085315704346, "learning_rate": 3.152903538100063e-06, "loss": 0.4351, "step": 22810 }, { "epoch": 74.79016393442623, "grad_norm": 4.193343639373779, "learning_rate": 3.1521296565233985e-06, "loss": 0.439, "step": 22811 }, { "epoch": 74.79344262295082, "grad_norm": 6.577909469604492, "learning_rate": 3.151355852163592e-06, "loss": 0.5768, "step": 22812 }, { "epoch": 74.79672131147541, "grad_norm": 7.275900840759277, "learning_rate": 3.15058212502937e-06, "loss": 0.2691, "step": 22813 }, { "epoch": 74.8, "grad_norm": 5.434037208557129, "learning_rate": 3.1498084751294523e-06, "loss": 0.3965, "step": 22814 }, { "epoch": 74.80327868852459, "grad_norm": 8.486884117126465, "learning_rate": 3.14903490247257e-06, "loss": 0.3829, "step": 22815 }, { "epoch": 74.80655737704917, "grad_norm": 5.675105571746826, "learning_rate": 3.1482614070674423e-06, "loss": 0.5116, "step": 22816 }, { "epoch": 74.80983606557378, "grad_norm": 4.979880332946777, "learning_rate": 3.1474879889227907e-06, "loss": 0.5273, "step": 22817 }, { "epoch": 74.81311475409836, "grad_norm": 4.907268524169922, "learning_rate": 3.1467146480473344e-06, "loss": 0.3809, "step": 22818 }, { "epoch": 74.81639344262295, "grad_norm": 5.446148872375488, "learning_rate": 3.1459413844497986e-06, "loss": 0.6132, "step": 22819 }, { "epoch": 74.81967213114754, "grad_norm": 4.7848663330078125, "learning_rate": 3.1451681981389003e-06, "loss": 0.3747, "step": 22820 }, { "epoch": 74.82295081967213, "grad_norm": 5.068652629852295, "learning_rate": 3.1443950891233587e-06, "loss": 0.4625, "step": 22821 }, { "epoch": 74.82622950819672, "grad_norm": 5.344986915588379, "learning_rate": 3.143622057411887e-06, "loss": 0.2839, "step": 22822 }, { "epoch": 74.8295081967213, "grad_norm": 5.684878826141357, "learning_rate": 3.142849103013208e-06, "loss": 0.2803, "step": 22823 }, { "epoch": 74.8327868852459, "grad_norm": 5.433999538421631, "learning_rate": 3.142076225936035e-06, "loss": 0.5132, "step": 22824 }, { "epoch": 74.8360655737705, "grad_norm": 5.486874580383301, "learning_rate": 3.141303426189083e-06, "loss": 0.5015, "step": 22825 }, { "epoch": 74.83934426229509, "grad_norm": 5.702755451202393, "learning_rate": 3.1405307037810672e-06, "loss": 0.2127, "step": 22826 }, { "epoch": 74.84262295081967, "grad_norm": 6.201550006866455, "learning_rate": 3.1397580587206955e-06, "loss": 0.3973, "step": 22827 }, { "epoch": 74.84590163934426, "grad_norm": 6.624098300933838, "learning_rate": 3.138985491016688e-06, "loss": 0.4377, "step": 22828 }, { "epoch": 74.84918032786885, "grad_norm": 4.533843517303467, "learning_rate": 3.138213000677752e-06, "loss": 0.5915, "step": 22829 }, { "epoch": 74.85245901639344, "grad_norm": 4.938811779022217, "learning_rate": 3.1374405877126e-06, "loss": 0.3969, "step": 22830 }, { "epoch": 74.85573770491803, "grad_norm": 8.288877487182617, "learning_rate": 3.1366682521299374e-06, "loss": 0.4618, "step": 22831 }, { "epoch": 74.85901639344263, "grad_norm": 6.624911308288574, "learning_rate": 3.135895993938479e-06, "loss": 0.517, "step": 22832 }, { "epoch": 74.86229508196722, "grad_norm": 9.132746696472168, "learning_rate": 3.1351238131469307e-06, "loss": 0.4578, "step": 22833 }, { "epoch": 74.8655737704918, "grad_norm": 5.543782711029053, "learning_rate": 3.134351709763999e-06, "loss": 0.5355, "step": 22834 }, { "epoch": 74.8688524590164, "grad_norm": 5.514771938323975, "learning_rate": 3.1335796837983866e-06, "loss": 0.2869, "step": 22835 }, { "epoch": 74.87213114754098, "grad_norm": 5.5777997970581055, "learning_rate": 3.132807735258806e-06, "loss": 0.476, "step": 22836 }, { "epoch": 74.87540983606557, "grad_norm": 4.814303874969482, "learning_rate": 3.1320358641539583e-06, "loss": 0.3834, "step": 22837 }, { "epoch": 74.87868852459016, "grad_norm": 4.34100341796875, "learning_rate": 3.131264070492548e-06, "loss": 0.4771, "step": 22838 }, { "epoch": 74.88196721311475, "grad_norm": 4.86126708984375, "learning_rate": 3.1304923542832753e-06, "loss": 0.5347, "step": 22839 }, { "epoch": 74.88524590163935, "grad_norm": 7.357167720794678, "learning_rate": 3.1297207155348417e-06, "loss": 0.2944, "step": 22840 }, { "epoch": 74.88852459016394, "grad_norm": 6.323158264160156, "learning_rate": 3.1289491542559535e-06, "loss": 0.634, "step": 22841 }, { "epoch": 74.89180327868853, "grad_norm": 4.536099910736084, "learning_rate": 3.128177670455307e-06, "loss": 0.3061, "step": 22842 }, { "epoch": 74.89508196721312, "grad_norm": 5.799932479858398, "learning_rate": 3.1274062641416025e-06, "loss": 0.3562, "step": 22843 }, { "epoch": 74.8983606557377, "grad_norm": 5.5404953956604, "learning_rate": 3.126634935323535e-06, "loss": 0.3856, "step": 22844 }, { "epoch": 74.90163934426229, "grad_norm": 4.846859931945801, "learning_rate": 3.125863684009809e-06, "loss": 0.4556, "step": 22845 }, { "epoch": 74.90491803278688, "grad_norm": 4.842133045196533, "learning_rate": 3.1250925102091156e-06, "loss": 0.4958, "step": 22846 }, { "epoch": 74.90819672131147, "grad_norm": 5.076017379760742, "learning_rate": 3.1243214139301535e-06, "loss": 0.354, "step": 22847 }, { "epoch": 74.91147540983607, "grad_norm": 4.550531387329102, "learning_rate": 3.123550395181614e-06, "loss": 0.382, "step": 22848 }, { "epoch": 74.91475409836066, "grad_norm": 6.741007328033447, "learning_rate": 3.1227794539721947e-06, "loss": 0.3477, "step": 22849 }, { "epoch": 74.91803278688525, "grad_norm": 4.679757595062256, "learning_rate": 3.122008590310589e-06, "loss": 0.336, "step": 22850 }, { "epoch": 74.92131147540984, "grad_norm": 7.201663494110107, "learning_rate": 3.121237804205487e-06, "loss": 0.4904, "step": 22851 }, { "epoch": 74.92459016393443, "grad_norm": 3.805655002593994, "learning_rate": 3.1204670956655813e-06, "loss": 0.2619, "step": 22852 }, { "epoch": 74.92786885245901, "grad_norm": 8.875064849853516, "learning_rate": 3.119696464699559e-06, "loss": 0.4183, "step": 22853 }, { "epoch": 74.9311475409836, "grad_norm": 4.523107528686523, "learning_rate": 3.1189259113161152e-06, "loss": 0.4788, "step": 22854 }, { "epoch": 74.93442622950819, "grad_norm": 7.08319091796875, "learning_rate": 3.1181554355239363e-06, "loss": 0.2206, "step": 22855 }, { "epoch": 74.9377049180328, "grad_norm": 4.39411735534668, "learning_rate": 3.1173850373317106e-06, "loss": 0.4123, "step": 22856 }, { "epoch": 74.94098360655738, "grad_norm": 4.936712265014648, "learning_rate": 3.116614716748122e-06, "loss": 0.3601, "step": 22857 }, { "epoch": 74.94426229508197, "grad_norm": 5.751582145690918, "learning_rate": 3.1158444737818616e-06, "loss": 0.4266, "step": 22858 }, { "epoch": 74.94754098360656, "grad_norm": 5.585231781005859, "learning_rate": 3.1150743084416133e-06, "loss": 0.3331, "step": 22859 }, { "epoch": 74.95081967213115, "grad_norm": 5.967334270477295, "learning_rate": 3.1143042207360573e-06, "loss": 0.4526, "step": 22860 }, { "epoch": 74.95409836065573, "grad_norm": 15.944718360900879, "learning_rate": 3.113534210673883e-06, "loss": 0.3113, "step": 22861 }, { "epoch": 74.95737704918032, "grad_norm": 6.429622173309326, "learning_rate": 3.1127642782637714e-06, "loss": 0.422, "step": 22862 }, { "epoch": 74.96065573770491, "grad_norm": 5.008016109466553, "learning_rate": 3.111994423514401e-06, "loss": 0.5394, "step": 22863 }, { "epoch": 74.96393442622951, "grad_norm": 12.997296333312988, "learning_rate": 3.1112246464344565e-06, "loss": 0.4174, "step": 22864 }, { "epoch": 74.9672131147541, "grad_norm": 5.057443141937256, "learning_rate": 3.1104549470326182e-06, "loss": 0.4782, "step": 22865 }, { "epoch": 74.97049180327869, "grad_norm": 5.7904767990112305, "learning_rate": 3.1096853253175595e-06, "loss": 0.307, "step": 22866 }, { "epoch": 74.97377049180328, "grad_norm": 10.53144359588623, "learning_rate": 3.1089157812979662e-06, "loss": 0.5511, "step": 22867 }, { "epoch": 74.97704918032787, "grad_norm": 6.120570182800293, "learning_rate": 3.108146314982513e-06, "loss": 0.6634, "step": 22868 }, { "epoch": 74.98032786885246, "grad_norm": 10.375584602355957, "learning_rate": 3.1073769263798757e-06, "loss": 0.4388, "step": 22869 }, { "epoch": 74.98360655737704, "grad_norm": 5.960925579071045, "learning_rate": 3.106607615498727e-06, "loss": 0.688, "step": 22870 }, { "epoch": 74.98688524590163, "grad_norm": 4.845480918884277, "learning_rate": 3.1058383823477478e-06, "loss": 0.33, "step": 22871 }, { "epoch": 74.99016393442623, "grad_norm": 5.116172790527344, "learning_rate": 3.1050692269356086e-06, "loss": 0.3817, "step": 22872 }, { "epoch": 74.99344262295082, "grad_norm": 5.631613731384277, "learning_rate": 3.1043001492709833e-06, "loss": 0.3754, "step": 22873 }, { "epoch": 74.99672131147541, "grad_norm": 6.352194786071777, "learning_rate": 3.103531149362541e-06, "loss": 0.5036, "step": 22874 }, { "epoch": 75.0, "grad_norm": 5.310283184051514, "learning_rate": 3.1027622272189572e-06, "loss": 0.3503, "step": 22875 }, { "epoch": 75.00327868852459, "grad_norm": 4.3604960441589355, "learning_rate": 3.101993382848901e-06, "loss": 0.7025, "step": 22876 }, { "epoch": 75.00655737704918, "grad_norm": 4.665547847747803, "learning_rate": 3.101224616261043e-06, "loss": 0.4635, "step": 22877 }, { "epoch": 75.00983606557377, "grad_norm": 4.837706565856934, "learning_rate": 3.100455927464049e-06, "loss": 0.3472, "step": 22878 }, { "epoch": 75.01311475409837, "grad_norm": 5.228492736816406, "learning_rate": 3.099687316466584e-06, "loss": 0.382, "step": 22879 }, { "epoch": 75.01639344262296, "grad_norm": 4.885501384735107, "learning_rate": 3.098918783277324e-06, "loss": 0.5013, "step": 22880 }, { "epoch": 75.01967213114754, "grad_norm": 5.412637710571289, "learning_rate": 3.0981503279049295e-06, "loss": 0.2178, "step": 22881 }, { "epoch": 75.02295081967213, "grad_norm": 4.0927886962890625, "learning_rate": 3.097381950358066e-06, "loss": 0.2743, "step": 22882 }, { "epoch": 75.02622950819672, "grad_norm": 5.151551246643066, "learning_rate": 3.096613650645395e-06, "loss": 0.4761, "step": 22883 }, { "epoch": 75.02950819672131, "grad_norm": 4.959298610687256, "learning_rate": 3.095845428775586e-06, "loss": 0.4188, "step": 22884 }, { "epoch": 75.0327868852459, "grad_norm": 4.322702407836914, "learning_rate": 3.0950772847572987e-06, "loss": 0.4363, "step": 22885 }, { "epoch": 75.03606557377049, "grad_norm": 5.43056058883667, "learning_rate": 3.094309218599193e-06, "loss": 0.3505, "step": 22886 }, { "epoch": 75.03934426229509, "grad_norm": 5.051807403564453, "learning_rate": 3.093541230309929e-06, "loss": 0.341, "step": 22887 }, { "epoch": 75.04262295081968, "grad_norm": 5.981350421905518, "learning_rate": 3.0927733198981714e-06, "loss": 0.5919, "step": 22888 }, { "epoch": 75.04590163934427, "grad_norm": 4.737320899963379, "learning_rate": 3.092005487372576e-06, "loss": 0.3452, "step": 22889 }, { "epoch": 75.04918032786885, "grad_norm": 4.85333251953125, "learning_rate": 3.091237732741802e-06, "loss": 0.2434, "step": 22890 }, { "epoch": 75.05245901639344, "grad_norm": 4.834157943725586, "learning_rate": 3.090470056014505e-06, "loss": 0.2329, "step": 22891 }, { "epoch": 75.05573770491803, "grad_norm": 28.029884338378906, "learning_rate": 3.0897024571993396e-06, "loss": 0.3022, "step": 22892 }, { "epoch": 75.05901639344262, "grad_norm": 5.119149208068848, "learning_rate": 3.088934936304967e-06, "loss": 0.3712, "step": 22893 }, { "epoch": 75.0622950819672, "grad_norm": 4.493396282196045, "learning_rate": 3.0881674933400385e-06, "loss": 0.2288, "step": 22894 }, { "epoch": 75.06557377049181, "grad_norm": 4.582334041595459, "learning_rate": 3.087400128313208e-06, "loss": 0.2522, "step": 22895 }, { "epoch": 75.0688524590164, "grad_norm": 8.16551399230957, "learning_rate": 3.086632841233125e-06, "loss": 0.5435, "step": 22896 }, { "epoch": 75.07213114754099, "grad_norm": 6.152432918548584, "learning_rate": 3.085865632108448e-06, "loss": 0.5255, "step": 22897 }, { "epoch": 75.07540983606557, "grad_norm": 6.127963542938232, "learning_rate": 3.085098500947825e-06, "loss": 0.4477, "step": 22898 }, { "epoch": 75.07868852459016, "grad_norm": 7.641422271728516, "learning_rate": 3.0843314477599072e-06, "loss": 0.5428, "step": 22899 }, { "epoch": 75.08196721311475, "grad_norm": 9.022927284240723, "learning_rate": 3.0835644725533385e-06, "loss": 0.5076, "step": 22900 }, { "epoch": 75.08524590163934, "grad_norm": 4.684350490570068, "learning_rate": 3.0827975753367745e-06, "loss": 0.5335, "step": 22901 }, { "epoch": 75.08852459016393, "grad_norm": 4.897100925445557, "learning_rate": 3.0820307561188612e-06, "loss": 0.496, "step": 22902 }, { "epoch": 75.09180327868853, "grad_norm": 7.291773796081543, "learning_rate": 3.081264014908243e-06, "loss": 0.3202, "step": 22903 }, { "epoch": 75.09508196721312, "grad_norm": 5.312602519989014, "learning_rate": 3.080497351713567e-06, "loss": 0.4898, "step": 22904 }, { "epoch": 75.09836065573771, "grad_norm": 5.065968990325928, "learning_rate": 3.079730766543475e-06, "loss": 0.3816, "step": 22905 }, { "epoch": 75.1016393442623, "grad_norm": 18.895076751708984, "learning_rate": 3.078964259406616e-06, "loss": 0.2738, "step": 22906 }, { "epoch": 75.10491803278688, "grad_norm": 6.446370601654053, "learning_rate": 3.0781978303116323e-06, "loss": 0.3399, "step": 22907 }, { "epoch": 75.10819672131147, "grad_norm": 4.5493035316467285, "learning_rate": 3.0774314792671643e-06, "loss": 0.3173, "step": 22908 }, { "epoch": 75.11147540983606, "grad_norm": 4.946304798126221, "learning_rate": 3.0766652062818514e-06, "loss": 0.3732, "step": 22909 }, { "epoch": 75.11475409836065, "grad_norm": 7.507511615753174, "learning_rate": 3.0758990113643393e-06, "loss": 0.4533, "step": 22910 }, { "epoch": 75.11803278688525, "grad_norm": 6.326097011566162, "learning_rate": 3.075132894523265e-06, "loss": 0.4743, "step": 22911 }, { "epoch": 75.12131147540984, "grad_norm": 4.8885064125061035, "learning_rate": 3.0743668557672648e-06, "loss": 0.2658, "step": 22912 }, { "epoch": 75.12459016393443, "grad_norm": 5.574856281280518, "learning_rate": 3.073600895104982e-06, "loss": 0.3381, "step": 22913 }, { "epoch": 75.12786885245902, "grad_norm": 5.146498203277588, "learning_rate": 3.0728350125450513e-06, "loss": 0.4347, "step": 22914 }, { "epoch": 75.1311475409836, "grad_norm": 4.245983123779297, "learning_rate": 3.072069208096108e-06, "loss": 0.3268, "step": 22915 }, { "epoch": 75.1344262295082, "grad_norm": 5.722724437713623, "learning_rate": 3.071303481766783e-06, "loss": 0.4958, "step": 22916 }, { "epoch": 75.13770491803278, "grad_norm": 5.085165977478027, "learning_rate": 3.0705378335657197e-06, "loss": 0.2893, "step": 22917 }, { "epoch": 75.14098360655737, "grad_norm": 4.823217868804932, "learning_rate": 3.0697722635015482e-06, "loss": 0.4801, "step": 22918 }, { "epoch": 75.14426229508197, "grad_norm": 6.425578594207764, "learning_rate": 3.0690067715828953e-06, "loss": 0.486, "step": 22919 }, { "epoch": 75.14754098360656, "grad_norm": 5.362540245056152, "learning_rate": 3.0682413578184012e-06, "loss": 0.3281, "step": 22920 }, { "epoch": 75.15081967213115, "grad_norm": 4.896006107330322, "learning_rate": 3.0674760222166934e-06, "loss": 0.2772, "step": 22921 }, { "epoch": 75.15409836065574, "grad_norm": 6.354699611663818, "learning_rate": 3.066710764786398e-06, "loss": 0.3003, "step": 22922 }, { "epoch": 75.15737704918033, "grad_norm": 5.6547088623046875, "learning_rate": 3.0659455855361496e-06, "loss": 0.3517, "step": 22923 }, { "epoch": 75.16065573770491, "grad_norm": 4.121432781219482, "learning_rate": 3.065180484474576e-06, "loss": 0.4277, "step": 22924 }, { "epoch": 75.1639344262295, "grad_norm": 3.5874478816986084, "learning_rate": 3.0644154616103015e-06, "loss": 0.3777, "step": 22925 }, { "epoch": 75.1672131147541, "grad_norm": 4.338391304016113, "learning_rate": 3.0636505169519516e-06, "loss": 0.308, "step": 22926 }, { "epoch": 75.1704918032787, "grad_norm": 3.655548334121704, "learning_rate": 3.0628856505081573e-06, "loss": 0.4867, "step": 22927 }, { "epoch": 75.17377049180328, "grad_norm": 6.722539901733398, "learning_rate": 3.0621208622875397e-06, "loss": 0.5164, "step": 22928 }, { "epoch": 75.17704918032787, "grad_norm": 5.139005661010742, "learning_rate": 3.0613561522987233e-06, "loss": 0.471, "step": 22929 }, { "epoch": 75.18032786885246, "grad_norm": 4.306280612945557, "learning_rate": 3.06059152055033e-06, "loss": 0.5938, "step": 22930 }, { "epoch": 75.18360655737705, "grad_norm": 4.730737686157227, "learning_rate": 3.059826967050981e-06, "loss": 0.4419, "step": 22931 }, { "epoch": 75.18688524590164, "grad_norm": 5.171574592590332, "learning_rate": 3.0590624918093002e-06, "loss": 0.2248, "step": 22932 }, { "epoch": 75.19016393442622, "grad_norm": 8.06507682800293, "learning_rate": 3.058298094833907e-06, "loss": 0.4756, "step": 22933 }, { "epoch": 75.19344262295083, "grad_norm": 4.601780414581299, "learning_rate": 3.0575337761334213e-06, "loss": 0.3246, "step": 22934 }, { "epoch": 75.19672131147541, "grad_norm": 11.716562271118164, "learning_rate": 3.0567695357164563e-06, "loss": 0.5556, "step": 22935 }, { "epoch": 75.2, "grad_norm": 4.615118980407715, "learning_rate": 3.0560053735916372e-06, "loss": 0.5031, "step": 22936 }, { "epoch": 75.20327868852459, "grad_norm": 6.310563087463379, "learning_rate": 3.0552412897675776e-06, "loss": 0.5561, "step": 22937 }, { "epoch": 75.20655737704918, "grad_norm": 5.200314998626709, "learning_rate": 3.0544772842528935e-06, "loss": 0.3781, "step": 22938 }, { "epoch": 75.20983606557377, "grad_norm": 6.634881973266602, "learning_rate": 3.0537133570561974e-06, "loss": 0.3519, "step": 22939 }, { "epoch": 75.21311475409836, "grad_norm": 5.655248165130615, "learning_rate": 3.052949508186107e-06, "loss": 0.3038, "step": 22940 }, { "epoch": 75.21639344262294, "grad_norm": 4.6079230308532715, "learning_rate": 3.052185737651234e-06, "loss": 0.3296, "step": 22941 }, { "epoch": 75.21967213114755, "grad_norm": 5.34698486328125, "learning_rate": 3.0514220454601917e-06, "loss": 0.6106, "step": 22942 }, { "epoch": 75.22295081967214, "grad_norm": 5.470900058746338, "learning_rate": 3.0506584316215904e-06, "loss": 0.4802, "step": 22943 }, { "epoch": 75.22622950819672, "grad_norm": 4.947431564331055, "learning_rate": 3.049894896144038e-06, "loss": 0.2693, "step": 22944 }, { "epoch": 75.22950819672131, "grad_norm": 5.78665828704834, "learning_rate": 3.0491314390361492e-06, "loss": 0.569, "step": 22945 }, { "epoch": 75.2327868852459, "grad_norm": 4.200882434844971, "learning_rate": 3.0483680603065303e-06, "loss": 0.3188, "step": 22946 }, { "epoch": 75.23606557377049, "grad_norm": 4.378012657165527, "learning_rate": 3.0476047599637904e-06, "loss": 0.5829, "step": 22947 }, { "epoch": 75.23934426229508, "grad_norm": 5.4468512535095215, "learning_rate": 3.046841538016532e-06, "loss": 0.3526, "step": 22948 }, { "epoch": 75.24262295081967, "grad_norm": 4.8375396728515625, "learning_rate": 3.0460783944733675e-06, "loss": 0.3175, "step": 22949 }, { "epoch": 75.24590163934427, "grad_norm": 5.548332214355469, "learning_rate": 3.0453153293428996e-06, "loss": 0.2802, "step": 22950 }, { "epoch": 75.24918032786886, "grad_norm": 4.398159980773926, "learning_rate": 3.044552342633732e-06, "loss": 0.2708, "step": 22951 }, { "epoch": 75.25245901639344, "grad_norm": 6.6675639152526855, "learning_rate": 3.043789434354466e-06, "loss": 0.3021, "step": 22952 }, { "epoch": 75.25573770491803, "grad_norm": 6.190961837768555, "learning_rate": 3.04302660451371e-06, "loss": 0.417, "step": 22953 }, { "epoch": 75.25901639344262, "grad_norm": 5.121613025665283, "learning_rate": 3.042263853120062e-06, "loss": 0.3434, "step": 22954 }, { "epoch": 75.26229508196721, "grad_norm": 4.834550857543945, "learning_rate": 3.0415011801821236e-06, "loss": 0.1581, "step": 22955 }, { "epoch": 75.2655737704918, "grad_norm": 5.8781514167785645, "learning_rate": 3.040738585708495e-06, "loss": 0.4541, "step": 22956 }, { "epoch": 75.26885245901639, "grad_norm": 5.946822166442871, "learning_rate": 3.0399760697077706e-06, "loss": 0.3524, "step": 22957 }, { "epoch": 75.27213114754099, "grad_norm": 8.089742660522461, "learning_rate": 3.039213632188556e-06, "loss": 0.3723, "step": 22958 }, { "epoch": 75.27540983606558, "grad_norm": 10.439767837524414, "learning_rate": 3.038451273159445e-06, "loss": 0.3119, "step": 22959 }, { "epoch": 75.27868852459017, "grad_norm": 5.126623630523682, "learning_rate": 3.0376889926290342e-06, "loss": 0.4063, "step": 22960 }, { "epoch": 75.28196721311475, "grad_norm": 6.910204887390137, "learning_rate": 3.036926790605916e-06, "loss": 0.443, "step": 22961 }, { "epoch": 75.28524590163934, "grad_norm": 5.700817584991455, "learning_rate": 3.036164667098691e-06, "loss": 0.406, "step": 22962 }, { "epoch": 75.28852459016393, "grad_norm": 4.648406982421875, "learning_rate": 3.0354026221159505e-06, "loss": 0.2967, "step": 22963 }, { "epoch": 75.29180327868852, "grad_norm": 5.806151866912842, "learning_rate": 3.0346406556662853e-06, "loss": 0.5724, "step": 22964 }, { "epoch": 75.29508196721312, "grad_norm": 6.02971887588501, "learning_rate": 3.0338787677582872e-06, "loss": 0.4291, "step": 22965 }, { "epoch": 75.29836065573771, "grad_norm": 6.296507835388184, "learning_rate": 3.0331169584005514e-06, "loss": 0.3261, "step": 22966 }, { "epoch": 75.3016393442623, "grad_norm": 5.251997947692871, "learning_rate": 3.0323552276016664e-06, "loss": 0.3806, "step": 22967 }, { "epoch": 75.30491803278689, "grad_norm": 5.746524810791016, "learning_rate": 3.0315935753702174e-06, "loss": 0.2842, "step": 22968 }, { "epoch": 75.30819672131148, "grad_norm": 5.5918169021606445, "learning_rate": 3.0308320017147986e-06, "loss": 0.5106, "step": 22969 }, { "epoch": 75.31147540983606, "grad_norm": 4.5694780349731445, "learning_rate": 3.030070506643995e-06, "loss": 0.2814, "step": 22970 }, { "epoch": 75.31475409836065, "grad_norm": 4.822620868682861, "learning_rate": 3.0293090901663913e-06, "loss": 0.2829, "step": 22971 }, { "epoch": 75.31803278688524, "grad_norm": 5.399603366851807, "learning_rate": 3.0285477522905784e-06, "loss": 0.3267, "step": 22972 }, { "epoch": 75.32131147540984, "grad_norm": 5.169116020202637, "learning_rate": 3.0277864930251366e-06, "loss": 0.3652, "step": 22973 }, { "epoch": 75.32459016393443, "grad_norm": 5.987509250640869, "learning_rate": 3.02702531237865e-06, "loss": 0.3101, "step": 22974 }, { "epoch": 75.32786885245902, "grad_norm": 4.479618549346924, "learning_rate": 3.026264210359705e-06, "loss": 0.4073, "step": 22975 }, { "epoch": 75.33114754098361, "grad_norm": 19.71192741394043, "learning_rate": 3.0255031869768827e-06, "loss": 0.5197, "step": 22976 }, { "epoch": 75.3344262295082, "grad_norm": 4.05648946762085, "learning_rate": 3.024742242238763e-06, "loss": 0.3902, "step": 22977 }, { "epoch": 75.33770491803278, "grad_norm": 4.339513778686523, "learning_rate": 3.0239813761539236e-06, "loss": 0.3348, "step": 22978 }, { "epoch": 75.34098360655737, "grad_norm": 4.296126842498779, "learning_rate": 3.0232205887309507e-06, "loss": 0.3996, "step": 22979 }, { "epoch": 75.34426229508196, "grad_norm": 4.6247172355651855, "learning_rate": 3.0224598799784197e-06, "loss": 0.5502, "step": 22980 }, { "epoch": 75.34754098360656, "grad_norm": 5.854789733886719, "learning_rate": 3.021699249904909e-06, "loss": 0.4261, "step": 22981 }, { "epoch": 75.35081967213115, "grad_norm": 5.876019477844238, "learning_rate": 3.0209386985189946e-06, "loss": 0.5685, "step": 22982 }, { "epoch": 75.35409836065574, "grad_norm": 7.016574382781982, "learning_rate": 3.02017822582925e-06, "loss": 0.2616, "step": 22983 }, { "epoch": 75.35737704918033, "grad_norm": 6.459473609924316, "learning_rate": 3.0194178318442558e-06, "loss": 0.466, "step": 22984 }, { "epoch": 75.36065573770492, "grad_norm": 6.780613899230957, "learning_rate": 3.018657516572583e-06, "loss": 0.464, "step": 22985 }, { "epoch": 75.3639344262295, "grad_norm": 3.8888275623321533, "learning_rate": 3.0178972800228066e-06, "loss": 0.3085, "step": 22986 }, { "epoch": 75.3672131147541, "grad_norm": 7.212997913360596, "learning_rate": 3.017137122203494e-06, "loss": 0.5565, "step": 22987 }, { "epoch": 75.37049180327868, "grad_norm": 4.1081342697143555, "learning_rate": 3.016377043123224e-06, "loss": 0.4372, "step": 22988 }, { "epoch": 75.37377049180328, "grad_norm": 4.45997428894043, "learning_rate": 3.015617042790564e-06, "loss": 0.47, "step": 22989 }, { "epoch": 75.37704918032787, "grad_norm": 3.9282164573669434, "learning_rate": 3.014857121214084e-06, "loss": 0.6906, "step": 22990 }, { "epoch": 75.38032786885246, "grad_norm": 5.671557426452637, "learning_rate": 3.0140972784023493e-06, "loss": 0.4416, "step": 22991 }, { "epoch": 75.38360655737705, "grad_norm": 4.813704490661621, "learning_rate": 3.0133375143639344e-06, "loss": 0.4212, "step": 22992 }, { "epoch": 75.38688524590164, "grad_norm": 6.6028571128845215, "learning_rate": 3.012577829107404e-06, "loss": 0.2493, "step": 22993 }, { "epoch": 75.39016393442623, "grad_norm": 8.072183609008789, "learning_rate": 3.011818222641323e-06, "loss": 0.3748, "step": 22994 }, { "epoch": 75.39344262295081, "grad_norm": 4.626172065734863, "learning_rate": 3.011058694974257e-06, "loss": 0.6316, "step": 22995 }, { "epoch": 75.3967213114754, "grad_norm": 4.302283763885498, "learning_rate": 3.0102992461147685e-06, "loss": 0.333, "step": 22996 }, { "epoch": 75.4, "grad_norm": 4.974273204803467, "learning_rate": 3.009539876071427e-06, "loss": 0.4427, "step": 22997 }, { "epoch": 75.4032786885246, "grad_norm": 7.716856002807617, "learning_rate": 3.008780584852791e-06, "loss": 0.4635, "step": 22998 }, { "epoch": 75.40655737704918, "grad_norm": 6.013092517852783, "learning_rate": 3.0080213724674223e-06, "loss": 0.4091, "step": 22999 }, { "epoch": 75.40983606557377, "grad_norm": 5.64447021484375, "learning_rate": 3.0072622389238805e-06, "loss": 0.3064, "step": 23000 }, { "epoch": 75.41311475409836, "grad_norm": 5.192099571228027, "learning_rate": 3.00650318423073e-06, "loss": 0.3977, "step": 23001 }, { "epoch": 75.41639344262295, "grad_norm": 5.957759857177734, "learning_rate": 3.005744208396527e-06, "loss": 0.37, "step": 23002 }, { "epoch": 75.41967213114754, "grad_norm": 4.9970879554748535, "learning_rate": 3.004985311429832e-06, "loss": 0.3665, "step": 23003 }, { "epoch": 75.42295081967212, "grad_norm": 4.808655261993408, "learning_rate": 3.0042264933391997e-06, "loss": 0.335, "step": 23004 }, { "epoch": 75.42622950819673, "grad_norm": 4.6627068519592285, "learning_rate": 3.0034677541331835e-06, "loss": 0.4347, "step": 23005 }, { "epoch": 75.42950819672132, "grad_norm": 5.35830020904541, "learning_rate": 3.0027090938203475e-06, "loss": 0.1992, "step": 23006 }, { "epoch": 75.4327868852459, "grad_norm": 5.6929216384887695, "learning_rate": 3.001950512409241e-06, "loss": 0.3653, "step": 23007 }, { "epoch": 75.43606557377049, "grad_norm": 4.252301216125488, "learning_rate": 3.001192009908419e-06, "loss": 0.3838, "step": 23008 }, { "epoch": 75.43934426229508, "grad_norm": 4.117774963378906, "learning_rate": 3.00043358632643e-06, "loss": 0.4664, "step": 23009 }, { "epoch": 75.44262295081967, "grad_norm": 5.912746906280518, "learning_rate": 2.9996752416718345e-06, "loss": 0.1797, "step": 23010 }, { "epoch": 75.44590163934426, "grad_norm": 7.31254243850708, "learning_rate": 2.9989169759531777e-06, "loss": 0.3472, "step": 23011 }, { "epoch": 75.44918032786886, "grad_norm": 6.775147438049316, "learning_rate": 2.998158789179012e-06, "loss": 0.3385, "step": 23012 }, { "epoch": 75.45245901639345, "grad_norm": 4.143168926239014, "learning_rate": 2.997400681357884e-06, "loss": 0.5774, "step": 23013 }, { "epoch": 75.45573770491804, "grad_norm": 4.574376106262207, "learning_rate": 2.996642652498346e-06, "loss": 0.4586, "step": 23014 }, { "epoch": 75.45901639344262, "grad_norm": 6.4442830085754395, "learning_rate": 2.9958847026089444e-06, "loss": 0.3006, "step": 23015 }, { "epoch": 75.46229508196721, "grad_norm": 4.27717399597168, "learning_rate": 2.995126831698224e-06, "loss": 0.395, "step": 23016 }, { "epoch": 75.4655737704918, "grad_norm": 5.039739608764648, "learning_rate": 2.9943690397747337e-06, "loss": 0.3122, "step": 23017 }, { "epoch": 75.46885245901639, "grad_norm": 6.129659652709961, "learning_rate": 2.9936113268470124e-06, "loss": 0.3076, "step": 23018 }, { "epoch": 75.47213114754098, "grad_norm": 4.570399284362793, "learning_rate": 2.9928536929236106e-06, "loss": 0.3728, "step": 23019 }, { "epoch": 75.47540983606558, "grad_norm": 4.348351955413818, "learning_rate": 2.9920961380130696e-06, "loss": 0.2773, "step": 23020 }, { "epoch": 75.47868852459017, "grad_norm": 4.789090633392334, "learning_rate": 2.991338662123928e-06, "loss": 0.6481, "step": 23021 }, { "epoch": 75.48196721311476, "grad_norm": 7.872557640075684, "learning_rate": 2.9905812652647337e-06, "loss": 0.5793, "step": 23022 }, { "epoch": 75.48524590163935, "grad_norm": 6.494858264923096, "learning_rate": 2.989823947444024e-06, "loss": 0.5663, "step": 23023 }, { "epoch": 75.48852459016393, "grad_norm": 3.9570963382720947, "learning_rate": 2.989066708670334e-06, "loss": 0.4853, "step": 23024 }, { "epoch": 75.49180327868852, "grad_norm": 4.792579174041748, "learning_rate": 2.9883095489522096e-06, "loss": 0.2333, "step": 23025 }, { "epoch": 75.49508196721311, "grad_norm": 4.837821960449219, "learning_rate": 2.9875524682981848e-06, "loss": 0.2815, "step": 23026 }, { "epoch": 75.4983606557377, "grad_norm": 5.393825531005859, "learning_rate": 2.9867954667167955e-06, "loss": 0.4703, "step": 23027 }, { "epoch": 75.5016393442623, "grad_norm": 5.130126476287842, "learning_rate": 2.9860385442165807e-06, "loss": 0.5571, "step": 23028 }, { "epoch": 75.50491803278689, "grad_norm": 4.994156837463379, "learning_rate": 2.9852817008060752e-06, "loss": 0.3465, "step": 23029 }, { "epoch": 75.50819672131148, "grad_norm": 4.3236775398254395, "learning_rate": 2.984524936493811e-06, "loss": 0.4578, "step": 23030 }, { "epoch": 75.51147540983607, "grad_norm": 5.803750514984131, "learning_rate": 2.9837682512883205e-06, "loss": 0.7298, "step": 23031 }, { "epoch": 75.51475409836065, "grad_norm": 5.553401947021484, "learning_rate": 2.9830116451981408e-06, "loss": 0.1777, "step": 23032 }, { "epoch": 75.51803278688524, "grad_norm": 4.879171848297119, "learning_rate": 2.9822551182317993e-06, "loss": 0.5589, "step": 23033 }, { "epoch": 75.52131147540983, "grad_norm": 8.0422945022583, "learning_rate": 2.98149867039783e-06, "loss": 0.4611, "step": 23034 }, { "epoch": 75.52459016393442, "grad_norm": 4.139823913574219, "learning_rate": 2.9807423017047553e-06, "loss": 0.3995, "step": 23035 }, { "epoch": 75.52786885245902, "grad_norm": 5.694465637207031, "learning_rate": 2.9799860121611145e-06, "loss": 0.5187, "step": 23036 }, { "epoch": 75.53114754098361, "grad_norm": 6.244358539581299, "learning_rate": 2.9792298017754296e-06, "loss": 0.3579, "step": 23037 }, { "epoch": 75.5344262295082, "grad_norm": 5.0582756996154785, "learning_rate": 2.978473670556228e-06, "loss": 0.2193, "step": 23038 }, { "epoch": 75.53770491803279, "grad_norm": 5.473719120025635, "learning_rate": 2.9777176185120336e-06, "loss": 0.4078, "step": 23039 }, { "epoch": 75.54098360655738, "grad_norm": 4.720819473266602, "learning_rate": 2.9769616456513774e-06, "loss": 0.5157, "step": 23040 }, { "epoch": 75.54426229508196, "grad_norm": 4.474855899810791, "learning_rate": 2.976205751982781e-06, "loss": 0.3693, "step": 23041 }, { "epoch": 75.54754098360655, "grad_norm": 8.223806381225586, "learning_rate": 2.975449937514767e-06, "loss": 0.3561, "step": 23042 }, { "epoch": 75.55081967213114, "grad_norm": 4.503050327301025, "learning_rate": 2.97469420225586e-06, "loss": 0.4806, "step": 23043 }, { "epoch": 75.55409836065574, "grad_norm": 5.006637096405029, "learning_rate": 2.9739385462145766e-06, "loss": 0.4974, "step": 23044 }, { "epoch": 75.55737704918033, "grad_norm": 4.983249664306641, "learning_rate": 2.973182969399444e-06, "loss": 0.4748, "step": 23045 }, { "epoch": 75.56065573770492, "grad_norm": 5.808197498321533, "learning_rate": 2.9724274718189796e-06, "loss": 0.3261, "step": 23046 }, { "epoch": 75.56393442622951, "grad_norm": 5.298472881317139, "learning_rate": 2.971672053481702e-06, "loss": 0.2499, "step": 23047 }, { "epoch": 75.5672131147541, "grad_norm": 5.159931182861328, "learning_rate": 2.970916714396128e-06, "loss": 0.4434, "step": 23048 }, { "epoch": 75.57049180327868, "grad_norm": 3.956190347671509, "learning_rate": 2.970161454570778e-06, "loss": 0.1883, "step": 23049 }, { "epoch": 75.57377049180327, "grad_norm": 5.482154846191406, "learning_rate": 2.969406274014167e-06, "loss": 0.4525, "step": 23050 }, { "epoch": 75.57704918032788, "grad_norm": 5.51818323135376, "learning_rate": 2.968651172734811e-06, "loss": 0.4315, "step": 23051 }, { "epoch": 75.58032786885246, "grad_norm": 5.127980709075928, "learning_rate": 2.9678961507412205e-06, "loss": 0.1877, "step": 23052 }, { "epoch": 75.58360655737705, "grad_norm": 5.2155609130859375, "learning_rate": 2.967141208041915e-06, "loss": 0.4133, "step": 23053 }, { "epoch": 75.58688524590164, "grad_norm": 4.469143867492676, "learning_rate": 2.9663863446454053e-06, "loss": 0.2943, "step": 23054 }, { "epoch": 75.59016393442623, "grad_norm": 4.651197910308838, "learning_rate": 2.9656315605602028e-06, "loss": 0.6038, "step": 23055 }, { "epoch": 75.59344262295082, "grad_norm": 5.807220458984375, "learning_rate": 2.9648768557948182e-06, "loss": 0.2638, "step": 23056 }, { "epoch": 75.5967213114754, "grad_norm": 12.120867729187012, "learning_rate": 2.9641222303577576e-06, "loss": 0.6733, "step": 23057 }, { "epoch": 75.6, "grad_norm": 4.322906970977783, "learning_rate": 2.9633676842575386e-06, "loss": 0.2718, "step": 23058 }, { "epoch": 75.6032786885246, "grad_norm": 5.20950174331665, "learning_rate": 2.9626132175026656e-06, "loss": 0.3836, "step": 23059 }, { "epoch": 75.60655737704919, "grad_norm": 4.864321231842041, "learning_rate": 2.9618588301016447e-06, "loss": 0.262, "step": 23060 }, { "epoch": 75.60983606557377, "grad_norm": 4.513707160949707, "learning_rate": 2.9611045220629807e-06, "loss": 0.2626, "step": 23061 }, { "epoch": 75.61311475409836, "grad_norm": 4.609891891479492, "learning_rate": 2.9603502933951846e-06, "loss": 0.2472, "step": 23062 }, { "epoch": 75.61639344262295, "grad_norm": 5.3115644454956055, "learning_rate": 2.959596144106758e-06, "loss": 0.2109, "step": 23063 }, { "epoch": 75.61967213114754, "grad_norm": 4.697447299957275, "learning_rate": 2.9588420742062063e-06, "loss": 0.6344, "step": 23064 }, { "epoch": 75.62295081967213, "grad_norm": 4.319027900695801, "learning_rate": 2.9580880837020266e-06, "loss": 0.4316, "step": 23065 }, { "epoch": 75.62622950819672, "grad_norm": 3.43243145942688, "learning_rate": 2.9573341726027295e-06, "loss": 0.1992, "step": 23066 }, { "epoch": 75.62950819672132, "grad_norm": 4.3103814125061035, "learning_rate": 2.9565803409168116e-06, "loss": 0.3462, "step": 23067 }, { "epoch": 75.6327868852459, "grad_norm": 4.719847679138184, "learning_rate": 2.955826588652775e-06, "loss": 0.4361, "step": 23068 }, { "epoch": 75.6360655737705, "grad_norm": 6.742955684661865, "learning_rate": 2.955072915819116e-06, "loss": 0.1424, "step": 23069 }, { "epoch": 75.63934426229508, "grad_norm": 5.100361347198486, "learning_rate": 2.954319322424333e-06, "loss": 0.3464, "step": 23070 }, { "epoch": 75.64262295081967, "grad_norm": 5.448922157287598, "learning_rate": 2.953565808476927e-06, "loss": 0.3836, "step": 23071 }, { "epoch": 75.64590163934426, "grad_norm": 5.099997520446777, "learning_rate": 2.9528123739853943e-06, "loss": 0.2845, "step": 23072 }, { "epoch": 75.64918032786885, "grad_norm": 5.764629364013672, "learning_rate": 2.9520590189582254e-06, "loss": 0.3364, "step": 23073 }, { "epoch": 75.65245901639344, "grad_norm": 3.6273791790008545, "learning_rate": 2.9513057434039227e-06, "loss": 0.3048, "step": 23074 }, { "epoch": 75.65573770491804, "grad_norm": 6.682363986968994, "learning_rate": 2.950552547330976e-06, "loss": 0.5381, "step": 23075 }, { "epoch": 75.65901639344263, "grad_norm": 4.655972957611084, "learning_rate": 2.9497994307478763e-06, "loss": 0.2792, "step": 23076 }, { "epoch": 75.66229508196722, "grad_norm": 3.597031831741333, "learning_rate": 2.949046393663121e-06, "loss": 0.2751, "step": 23077 }, { "epoch": 75.6655737704918, "grad_norm": 4.599916458129883, "learning_rate": 2.948293436085199e-06, "loss": 0.3906, "step": 23078 }, { "epoch": 75.66885245901639, "grad_norm": 3.8028502464294434, "learning_rate": 2.9475405580226015e-06, "loss": 0.2041, "step": 23079 }, { "epoch": 75.67213114754098, "grad_norm": 5.482666492462158, "learning_rate": 2.946787759483812e-06, "loss": 0.3396, "step": 23080 }, { "epoch": 75.67540983606557, "grad_norm": 5.521459102630615, "learning_rate": 2.9460350404773288e-06, "loss": 0.3386, "step": 23081 }, { "epoch": 75.67868852459016, "grad_norm": 4.247933864593506, "learning_rate": 2.945282401011633e-06, "loss": 0.4415, "step": 23082 }, { "epoch": 75.68196721311476, "grad_norm": 5.7424116134643555, "learning_rate": 2.9445298410952117e-06, "loss": 0.3342, "step": 23083 }, { "epoch": 75.68524590163935, "grad_norm": 5.122172832489014, "learning_rate": 2.943777360736555e-06, "loss": 0.7345, "step": 23084 }, { "epoch": 75.68852459016394, "grad_norm": 5.537275791168213, "learning_rate": 2.943024959944144e-06, "loss": 0.3604, "step": 23085 }, { "epoch": 75.69180327868852, "grad_norm": 6.130739212036133, "learning_rate": 2.9422726387264657e-06, "loss": 0.4624, "step": 23086 }, { "epoch": 75.69508196721311, "grad_norm": 5.331300735473633, "learning_rate": 2.9415203970919983e-06, "loss": 0.549, "step": 23087 }, { "epoch": 75.6983606557377, "grad_norm": 4.6346211433410645, "learning_rate": 2.9407682350492295e-06, "loss": 0.3283, "step": 23088 }, { "epoch": 75.70163934426229, "grad_norm": 4.29133939743042, "learning_rate": 2.9400161526066386e-06, "loss": 0.3137, "step": 23089 }, { "epoch": 75.70491803278688, "grad_norm": 5.0459699630737305, "learning_rate": 2.939264149772707e-06, "loss": 0.4151, "step": 23090 }, { "epoch": 75.70819672131148, "grad_norm": 5.3112945556640625, "learning_rate": 2.9385122265559094e-06, "loss": 0.5582, "step": 23091 }, { "epoch": 75.71147540983607, "grad_norm": 4.58673620223999, "learning_rate": 2.937760382964733e-06, "loss": 0.5544, "step": 23092 }, { "epoch": 75.71475409836066, "grad_norm": 5.250119686126709, "learning_rate": 2.9370086190076496e-06, "loss": 0.1661, "step": 23093 }, { "epoch": 75.71803278688525, "grad_norm": 5.598922252655029, "learning_rate": 2.936256934693139e-06, "loss": 0.3557, "step": 23094 }, { "epoch": 75.72131147540983, "grad_norm": 7.6824750900268555, "learning_rate": 2.9355053300296755e-06, "loss": 0.3014, "step": 23095 }, { "epoch": 75.72459016393442, "grad_norm": 5.100857734680176, "learning_rate": 2.9347538050257306e-06, "loss": 0.4696, "step": 23096 }, { "epoch": 75.72786885245901, "grad_norm": 5.136327743530273, "learning_rate": 2.934002359689787e-06, "loss": 0.3562, "step": 23097 }, { "epoch": 75.73114754098361, "grad_norm": 5.7045674324035645, "learning_rate": 2.9332509940303134e-06, "loss": 0.5295, "step": 23098 }, { "epoch": 75.7344262295082, "grad_norm": 6.986618518829346, "learning_rate": 2.9324997080557814e-06, "loss": 0.6637, "step": 23099 }, { "epoch": 75.73770491803279, "grad_norm": 5.021396160125732, "learning_rate": 2.9317485017746615e-06, "loss": 0.4523, "step": 23100 }, { "epoch": 75.74098360655738, "grad_norm": 4.773778915405273, "learning_rate": 2.930997375195429e-06, "loss": 0.3249, "step": 23101 }, { "epoch": 75.74426229508197, "grad_norm": 4.401392936706543, "learning_rate": 2.9302463283265505e-06, "loss": 0.5314, "step": 23102 }, { "epoch": 75.74754098360656, "grad_norm": 5.939936637878418, "learning_rate": 2.9294953611764963e-06, "loss": 0.4183, "step": 23103 }, { "epoch": 75.75081967213114, "grad_norm": 5.354511260986328, "learning_rate": 2.9287444737537296e-06, "loss": 0.5253, "step": 23104 }, { "epoch": 75.75409836065573, "grad_norm": 5.723474502563477, "learning_rate": 2.9279936660667253e-06, "loss": 0.4002, "step": 23105 }, { "epoch": 75.75737704918033, "grad_norm": 6.559431076049805, "learning_rate": 2.927242938123944e-06, "loss": 0.2312, "step": 23106 }, { "epoch": 75.76065573770492, "grad_norm": 7.144190311431885, "learning_rate": 2.926492289933853e-06, "loss": 0.2212, "step": 23107 }, { "epoch": 75.76393442622951, "grad_norm": 4.674375534057617, "learning_rate": 2.9257417215049166e-06, "loss": 0.5141, "step": 23108 }, { "epoch": 75.7672131147541, "grad_norm": 5.710044860839844, "learning_rate": 2.924991232845594e-06, "loss": 0.4881, "step": 23109 }, { "epoch": 75.77049180327869, "grad_norm": 5.938368320465088, "learning_rate": 2.924240823964355e-06, "loss": 0.4475, "step": 23110 }, { "epoch": 75.77377049180328, "grad_norm": 5.306471347808838, "learning_rate": 2.9234904948696573e-06, "loss": 0.3058, "step": 23111 }, { "epoch": 75.77704918032786, "grad_norm": 6.180764675140381, "learning_rate": 2.9227402455699627e-06, "loss": 0.3345, "step": 23112 }, { "epoch": 75.78032786885245, "grad_norm": 5.76165771484375, "learning_rate": 2.9219900760737263e-06, "loss": 0.3678, "step": 23113 }, { "epoch": 75.78360655737706, "grad_norm": 4.803405284881592, "learning_rate": 2.9212399863894146e-06, "loss": 0.3199, "step": 23114 }, { "epoch": 75.78688524590164, "grad_norm": 7.698544979095459, "learning_rate": 2.920489976525482e-06, "loss": 0.4164, "step": 23115 }, { "epoch": 75.79016393442623, "grad_norm": 4.967814922332764, "learning_rate": 2.919740046490386e-06, "loss": 0.6792, "step": 23116 }, { "epoch": 75.79344262295082, "grad_norm": 4.356478691101074, "learning_rate": 2.918990196292579e-06, "loss": 0.3823, "step": 23117 }, { "epoch": 75.79672131147541, "grad_norm": 4.346271991729736, "learning_rate": 2.918240425940523e-06, "loss": 0.4746, "step": 23118 }, { "epoch": 75.8, "grad_norm": 5.8907389640808105, "learning_rate": 2.9174907354426696e-06, "loss": 0.3521, "step": 23119 }, { "epoch": 75.80327868852459, "grad_norm": 5.99294900894165, "learning_rate": 2.9167411248074728e-06, "loss": 0.3722, "step": 23120 }, { "epoch": 75.80655737704917, "grad_norm": 6.680318355560303, "learning_rate": 2.9159915940433837e-06, "loss": 0.2723, "step": 23121 }, { "epoch": 75.80983606557378, "grad_norm": 7.993127346038818, "learning_rate": 2.915242143158852e-06, "loss": 0.373, "step": 23122 }, { "epoch": 75.81311475409836, "grad_norm": 4.383578777313232, "learning_rate": 2.914492772162335e-06, "loss": 0.6346, "step": 23123 }, { "epoch": 75.81639344262295, "grad_norm": 5.762718677520752, "learning_rate": 2.9137434810622788e-06, "loss": 0.27, "step": 23124 }, { "epoch": 75.81967213114754, "grad_norm": 6.461441516876221, "learning_rate": 2.912994269867132e-06, "loss": 0.4401, "step": 23125 }, { "epoch": 75.82295081967213, "grad_norm": 5.146559715270996, "learning_rate": 2.912245138585341e-06, "loss": 0.6207, "step": 23126 }, { "epoch": 75.82622950819672, "grad_norm": 8.560220718383789, "learning_rate": 2.9114960872253585e-06, "loss": 0.4667, "step": 23127 }, { "epoch": 75.8295081967213, "grad_norm": 5.648412704467773, "learning_rate": 2.910747115795628e-06, "loss": 0.4744, "step": 23128 }, { "epoch": 75.8327868852459, "grad_norm": 4.833948135375977, "learning_rate": 2.909998224304592e-06, "loss": 0.4564, "step": 23129 }, { "epoch": 75.8360655737705, "grad_norm": 4.906998634338379, "learning_rate": 2.9092494127606997e-06, "loss": 0.3108, "step": 23130 }, { "epoch": 75.83934426229509, "grad_norm": 5.459824085235596, "learning_rate": 2.9085006811723937e-06, "loss": 0.2967, "step": 23131 }, { "epoch": 75.84262295081967, "grad_norm": 7.347733020782471, "learning_rate": 2.907752029548112e-06, "loss": 0.4057, "step": 23132 }, { "epoch": 75.84590163934426, "grad_norm": 4.987374305725098, "learning_rate": 2.907003457896305e-06, "loss": 0.5405, "step": 23133 }, { "epoch": 75.84918032786885, "grad_norm": 4.802867412567139, "learning_rate": 2.906254966225407e-06, "loss": 0.2007, "step": 23134 }, { "epoch": 75.85245901639344, "grad_norm": 6.292243003845215, "learning_rate": 2.9055065545438576e-06, "loss": 0.3835, "step": 23135 }, { "epoch": 75.85573770491803, "grad_norm": 5.648980617523193, "learning_rate": 2.9047582228601014e-06, "loss": 0.4906, "step": 23136 }, { "epoch": 75.85901639344263, "grad_norm": 4.41572904586792, "learning_rate": 2.904009971182573e-06, "loss": 0.4679, "step": 23137 }, { "epoch": 75.86229508196722, "grad_norm": 6.37912654876709, "learning_rate": 2.903261799519711e-06, "loss": 0.2506, "step": 23138 }, { "epoch": 75.8655737704918, "grad_norm": 4.777775764465332, "learning_rate": 2.902513707879947e-06, "loss": 0.6413, "step": 23139 }, { "epoch": 75.8688524590164, "grad_norm": 4.929516315460205, "learning_rate": 2.9017656962717235e-06, "loss": 0.3295, "step": 23140 }, { "epoch": 75.87213114754098, "grad_norm": 5.658806800842285, "learning_rate": 2.901017764703473e-06, "loss": 0.5004, "step": 23141 }, { "epoch": 75.87540983606557, "grad_norm": 5.224701881408691, "learning_rate": 2.9002699131836274e-06, "loss": 0.2193, "step": 23142 }, { "epoch": 75.87868852459016, "grad_norm": 6.42155647277832, "learning_rate": 2.8995221417206176e-06, "loss": 0.3853, "step": 23143 }, { "epoch": 75.88196721311475, "grad_norm": 4.651165008544922, "learning_rate": 2.898774450322882e-06, "loss": 0.3403, "step": 23144 }, { "epoch": 75.88524590163935, "grad_norm": 5.882931709289551, "learning_rate": 2.8980268389988477e-06, "loss": 0.6198, "step": 23145 }, { "epoch": 75.88852459016394, "grad_norm": 5.157729625701904, "learning_rate": 2.897279307756944e-06, "loss": 0.4789, "step": 23146 }, { "epoch": 75.89180327868853, "grad_norm": 3.757033348083496, "learning_rate": 2.8965318566056024e-06, "loss": 0.1351, "step": 23147 }, { "epoch": 75.89508196721312, "grad_norm": 5.336336612701416, "learning_rate": 2.8957844855532457e-06, "loss": 0.4061, "step": 23148 }, { "epoch": 75.8983606557377, "grad_norm": 4.1469855308532715, "learning_rate": 2.895037194608309e-06, "loss": 0.3223, "step": 23149 }, { "epoch": 75.90163934426229, "grad_norm": 4.338245868682861, "learning_rate": 2.894289983779215e-06, "loss": 0.2671, "step": 23150 }, { "epoch": 75.90491803278688, "grad_norm": 5.5168986320495605, "learning_rate": 2.8935428530743892e-06, "loss": 0.351, "step": 23151 }, { "epoch": 75.90819672131147, "grad_norm": 5.977524757385254, "learning_rate": 2.892795802502254e-06, "loss": 0.375, "step": 23152 }, { "epoch": 75.91147540983607, "grad_norm": 3.638247013092041, "learning_rate": 2.8920488320712394e-06, "loss": 0.2971, "step": 23153 }, { "epoch": 75.91475409836066, "grad_norm": 5.450740337371826, "learning_rate": 2.8913019417897637e-06, "loss": 0.3495, "step": 23154 }, { "epoch": 75.91803278688525, "grad_norm": 4.371835708618164, "learning_rate": 2.8905551316662506e-06, "loss": 0.5442, "step": 23155 }, { "epoch": 75.92131147540984, "grad_norm": 4.691833972930908, "learning_rate": 2.8898084017091166e-06, "loss": 0.3007, "step": 23156 }, { "epoch": 75.92459016393443, "grad_norm": 6.871234893798828, "learning_rate": 2.8890617519267894e-06, "loss": 0.3879, "step": 23157 }, { "epoch": 75.92786885245901, "grad_norm": 4.774827003479004, "learning_rate": 2.8883151823276833e-06, "loss": 0.4691, "step": 23158 }, { "epoch": 75.9311475409836, "grad_norm": 10.232656478881836, "learning_rate": 2.8875686929202196e-06, "loss": 0.6111, "step": 23159 }, { "epoch": 75.93442622950819, "grad_norm": 3.9409990310668945, "learning_rate": 2.886822283712812e-06, "loss": 0.4574, "step": 23160 }, { "epoch": 75.9377049180328, "grad_norm": 4.418874740600586, "learning_rate": 2.8860759547138773e-06, "loss": 0.6111, "step": 23161 }, { "epoch": 75.94098360655738, "grad_norm": 4.748099327087402, "learning_rate": 2.885329705931835e-06, "loss": 0.3855, "step": 23162 }, { "epoch": 75.94426229508197, "grad_norm": 5.0251922607421875, "learning_rate": 2.8845835373750986e-06, "loss": 0.5104, "step": 23163 }, { "epoch": 75.94754098360656, "grad_norm": 4.799638748168945, "learning_rate": 2.8838374490520803e-06, "loss": 0.4622, "step": 23164 }, { "epoch": 75.95081967213115, "grad_norm": 5.285025596618652, "learning_rate": 2.883091440971191e-06, "loss": 0.416, "step": 23165 }, { "epoch": 75.95409836065573, "grad_norm": 5.041922569274902, "learning_rate": 2.8823455131408486e-06, "loss": 0.3659, "step": 23166 }, { "epoch": 75.95737704918032, "grad_norm": 4.714045524597168, "learning_rate": 2.881599665569461e-06, "loss": 0.352, "step": 23167 }, { "epoch": 75.96065573770491, "grad_norm": 5.3889617919921875, "learning_rate": 2.8808538982654378e-06, "loss": 0.4724, "step": 23168 }, { "epoch": 75.96393442622951, "grad_norm": 4.640111923217773, "learning_rate": 2.880108211237187e-06, "loss": 0.2448, "step": 23169 }, { "epoch": 75.9672131147541, "grad_norm": 5.356896877288818, "learning_rate": 2.879362604493121e-06, "loss": 0.3853, "step": 23170 }, { "epoch": 75.97049180327869, "grad_norm": 8.098549842834473, "learning_rate": 2.8786170780416454e-06, "loss": 0.4413, "step": 23171 }, { "epoch": 75.97377049180328, "grad_norm": 5.106232643127441, "learning_rate": 2.877871631891167e-06, "loss": 0.4208, "step": 23172 }, { "epoch": 75.97704918032787, "grad_norm": 4.405656814575195, "learning_rate": 2.877126266050091e-06, "loss": 0.1791, "step": 23173 }, { "epoch": 75.98032786885246, "grad_norm": 6.273342609405518, "learning_rate": 2.8763809805268195e-06, "loss": 0.2253, "step": 23174 }, { "epoch": 75.98360655737704, "grad_norm": 4.7026824951171875, "learning_rate": 2.8756357753297613e-06, "loss": 0.3652, "step": 23175 }, { "epoch": 75.98688524590163, "grad_norm": 3.9393208026885986, "learning_rate": 2.8748906504673178e-06, "loss": 0.1599, "step": 23176 }, { "epoch": 75.99016393442623, "grad_norm": 4.369217395782471, "learning_rate": 2.874145605947891e-06, "loss": 0.4159, "step": 23177 }, { "epoch": 75.99344262295082, "grad_norm": 5.665663242340088, "learning_rate": 2.8734006417798776e-06, "loss": 0.3511, "step": 23178 }, { "epoch": 75.99672131147541, "grad_norm": 5.230742454528809, "learning_rate": 2.8726557579716852e-06, "loss": 0.4472, "step": 23179 }, { "epoch": 76.0, "grad_norm": 5.064527988433838, "learning_rate": 2.8719109545317102e-06, "loss": 0.6861, "step": 23180 }, { "epoch": 76.00327868852459, "grad_norm": 5.175848484039307, "learning_rate": 2.8711662314683496e-06, "loss": 0.5028, "step": 23181 }, { "epoch": 76.00655737704918, "grad_norm": 6.388839244842529, "learning_rate": 2.870421588789999e-06, "loss": 0.5498, "step": 23182 }, { "epoch": 76.00983606557377, "grad_norm": 5.822409629821777, "learning_rate": 2.869677026505061e-06, "loss": 0.3656, "step": 23183 }, { "epoch": 76.01311475409837, "grad_norm": 6.163691997528076, "learning_rate": 2.8689325446219285e-06, "loss": 0.4631, "step": 23184 }, { "epoch": 76.01639344262296, "grad_norm": 5.035632610321045, "learning_rate": 2.8681881431489933e-06, "loss": 0.4738, "step": 23185 }, { "epoch": 76.01967213114754, "grad_norm": 17.13836097717285, "learning_rate": 2.8674438220946544e-06, "loss": 0.313, "step": 23186 }, { "epoch": 76.02295081967213, "grad_norm": 4.491488456726074, "learning_rate": 2.8666995814673027e-06, "loss": 0.3081, "step": 23187 }, { "epoch": 76.02622950819672, "grad_norm": 4.910647392272949, "learning_rate": 2.865955421275327e-06, "loss": 0.4335, "step": 23188 }, { "epoch": 76.02950819672131, "grad_norm": 5.895582675933838, "learning_rate": 2.8652113415271243e-06, "loss": 0.2486, "step": 23189 }, { "epoch": 76.0327868852459, "grad_norm": 5.490669250488281, "learning_rate": 2.864467342231082e-06, "loss": 0.3908, "step": 23190 }, { "epoch": 76.03606557377049, "grad_norm": 7.641646862030029, "learning_rate": 2.863723423395587e-06, "loss": 0.5928, "step": 23191 }, { "epoch": 76.03934426229509, "grad_norm": 4.413405418395996, "learning_rate": 2.862979585029032e-06, "loss": 0.2574, "step": 23192 }, { "epoch": 76.04262295081968, "grad_norm": 3.8590822219848633, "learning_rate": 2.8622358271398044e-06, "loss": 0.5179, "step": 23193 }, { "epoch": 76.04590163934427, "grad_norm": 4.282835483551025, "learning_rate": 2.861492149736288e-06, "loss": 0.3614, "step": 23194 }, { "epoch": 76.04918032786885, "grad_norm": 6.939260005950928, "learning_rate": 2.8607485528268676e-06, "loss": 0.2171, "step": 23195 }, { "epoch": 76.05245901639344, "grad_norm": 4.135822772979736, "learning_rate": 2.860005036419933e-06, "loss": 0.4713, "step": 23196 }, { "epoch": 76.05573770491803, "grad_norm": 4.806850910186768, "learning_rate": 2.859261600523865e-06, "loss": 0.3668, "step": 23197 }, { "epoch": 76.05901639344262, "grad_norm": 5.3420305252075195, "learning_rate": 2.8585182451470473e-06, "loss": 0.7096, "step": 23198 }, { "epoch": 76.0622950819672, "grad_norm": 4.735559463500977, "learning_rate": 2.8577749702978617e-06, "loss": 0.3739, "step": 23199 }, { "epoch": 76.06557377049181, "grad_norm": 4.583427429199219, "learning_rate": 2.8570317759846865e-06, "loss": 0.3562, "step": 23200 }, { "epoch": 76.0688524590164, "grad_norm": 21.82192611694336, "learning_rate": 2.856288662215908e-06, "loss": 0.4, "step": 23201 }, { "epoch": 76.07213114754099, "grad_norm": 4.483188152313232, "learning_rate": 2.8555456289999016e-06, "loss": 0.4933, "step": 23202 }, { "epoch": 76.07540983606557, "grad_norm": 5.825494766235352, "learning_rate": 2.8548026763450475e-06, "loss": 0.1759, "step": 23203 }, { "epoch": 76.07868852459016, "grad_norm": 5.496946811676025, "learning_rate": 2.854059804259719e-06, "loss": 0.3362, "step": 23204 }, { "epoch": 76.08196721311475, "grad_norm": 4.85124397277832, "learning_rate": 2.8533170127522992e-06, "loss": 0.2681, "step": 23205 }, { "epoch": 76.08524590163934, "grad_norm": 8.907183647155762, "learning_rate": 2.8525743018311603e-06, "loss": 0.4871, "step": 23206 }, { "epoch": 76.08852459016393, "grad_norm": 5.563483715057373, "learning_rate": 2.851831671504678e-06, "loss": 0.3856, "step": 23207 }, { "epoch": 76.09180327868853, "grad_norm": 4.601761817932129, "learning_rate": 2.851089121781223e-06, "loss": 0.2897, "step": 23208 }, { "epoch": 76.09508196721312, "grad_norm": 3.427208662033081, "learning_rate": 2.8503466526691737e-06, "loss": 0.3808, "step": 23209 }, { "epoch": 76.09836065573771, "grad_norm": 4.916423797607422, "learning_rate": 2.8496042641769007e-06, "loss": 0.312, "step": 23210 }, { "epoch": 76.1016393442623, "grad_norm": 3.6274242401123047, "learning_rate": 2.848861956312773e-06, "loss": 0.5369, "step": 23211 }, { "epoch": 76.10491803278688, "grad_norm": 7.876516342163086, "learning_rate": 2.8481197290851626e-06, "loss": 0.4022, "step": 23212 }, { "epoch": 76.10819672131147, "grad_norm": 4.883684158325195, "learning_rate": 2.847377582502435e-06, "loss": 0.3181, "step": 23213 }, { "epoch": 76.11147540983606, "grad_norm": 8.340779304504395, "learning_rate": 2.8466355165729653e-06, "loss": 0.3671, "step": 23214 }, { "epoch": 76.11475409836065, "grad_norm": 5.294316291809082, "learning_rate": 2.8458935313051174e-06, "loss": 0.45, "step": 23215 }, { "epoch": 76.11803278688525, "grad_norm": 4.373496055603027, "learning_rate": 2.845151626707259e-06, "loss": 0.2164, "step": 23216 }, { "epoch": 76.12131147540984, "grad_norm": 5.150568008422852, "learning_rate": 2.844409802787752e-06, "loss": 0.4782, "step": 23217 }, { "epoch": 76.12459016393443, "grad_norm": 4.174757480621338, "learning_rate": 2.843668059554967e-06, "loss": 0.1872, "step": 23218 }, { "epoch": 76.12786885245902, "grad_norm": 5.053538799285889, "learning_rate": 2.842926397017266e-06, "loss": 0.225, "step": 23219 }, { "epoch": 76.1311475409836, "grad_norm": 4.676314353942871, "learning_rate": 2.842184815183011e-06, "loss": 0.3475, "step": 23220 }, { "epoch": 76.1344262295082, "grad_norm": 8.441320419311523, "learning_rate": 2.8414433140605614e-06, "loss": 0.3757, "step": 23221 }, { "epoch": 76.13770491803278, "grad_norm": 10.69495677947998, "learning_rate": 2.840701893658284e-06, "loss": 0.3495, "step": 23222 }, { "epoch": 76.14098360655737, "grad_norm": 4.397298336029053, "learning_rate": 2.8399605539845376e-06, "loss": 0.3353, "step": 23223 }, { "epoch": 76.14426229508197, "grad_norm": 4.702524662017822, "learning_rate": 2.8392192950476796e-06, "loss": 0.2401, "step": 23224 }, { "epoch": 76.14754098360656, "grad_norm": 4.578283309936523, "learning_rate": 2.8384781168560693e-06, "loss": 0.2232, "step": 23225 }, { "epoch": 76.15081967213115, "grad_norm": 4.803945064544678, "learning_rate": 2.837737019418062e-06, "loss": 0.5687, "step": 23226 }, { "epoch": 76.15409836065574, "grad_norm": 5.322179794311523, "learning_rate": 2.8369960027420197e-06, "loss": 0.4336, "step": 23227 }, { "epoch": 76.15737704918033, "grad_norm": 4.658792495727539, "learning_rate": 2.8362550668362952e-06, "loss": 0.2069, "step": 23228 }, { "epoch": 76.16065573770491, "grad_norm": 4.335658550262451, "learning_rate": 2.8355142117092425e-06, "loss": 0.6741, "step": 23229 }, { "epoch": 76.1639344262295, "grad_norm": 4.394469261169434, "learning_rate": 2.8347734373692137e-06, "loss": 0.2974, "step": 23230 }, { "epoch": 76.1672131147541, "grad_norm": 4.251521110534668, "learning_rate": 2.8340327438245673e-06, "loss": 0.3142, "step": 23231 }, { "epoch": 76.1704918032787, "grad_norm": 5.425021171569824, "learning_rate": 2.833292131083654e-06, "loss": 0.543, "step": 23232 }, { "epoch": 76.17377049180328, "grad_norm": 8.36685848236084, "learning_rate": 2.832551599154821e-06, "loss": 0.3376, "step": 23233 }, { "epoch": 76.17704918032787, "grad_norm": 4.660871982574463, "learning_rate": 2.8318111480464194e-06, "loss": 0.1811, "step": 23234 }, { "epoch": 76.18032786885246, "grad_norm": 5.769438743591309, "learning_rate": 2.8310707777668025e-06, "loss": 0.3563, "step": 23235 }, { "epoch": 76.18360655737705, "grad_norm": 5.079658031463623, "learning_rate": 2.8303304883243165e-06, "loss": 0.3905, "step": 23236 }, { "epoch": 76.18688524590164, "grad_norm": 4.982070446014404, "learning_rate": 2.8295902797273057e-06, "loss": 0.3794, "step": 23237 }, { "epoch": 76.19016393442622, "grad_norm": 5.593833923339844, "learning_rate": 2.828850151984124e-06, "loss": 0.5057, "step": 23238 }, { "epoch": 76.19344262295083, "grad_norm": 6.788817882537842, "learning_rate": 2.8281101051031112e-06, "loss": 0.3845, "step": 23239 }, { "epoch": 76.19672131147541, "grad_norm": 5.22628116607666, "learning_rate": 2.827370139092612e-06, "loss": 0.6132, "step": 23240 }, { "epoch": 76.2, "grad_norm": 5.526885509490967, "learning_rate": 2.8266302539609747e-06, "loss": 0.3876, "step": 23241 }, { "epoch": 76.20327868852459, "grad_norm": 5.266722679138184, "learning_rate": 2.8258904497165406e-06, "loss": 0.3737, "step": 23242 }, { "epoch": 76.20655737704918, "grad_norm": 5.233102321624756, "learning_rate": 2.8251507263676503e-06, "loss": 0.3467, "step": 23243 }, { "epoch": 76.20983606557377, "grad_norm": 5.35236930847168, "learning_rate": 2.8244110839226426e-06, "loss": 0.461, "step": 23244 }, { "epoch": 76.21311475409836, "grad_norm": 4.085456371307373, "learning_rate": 2.8236715223898626e-06, "loss": 0.3796, "step": 23245 }, { "epoch": 76.21639344262294, "grad_norm": 4.978282928466797, "learning_rate": 2.8229320417776497e-06, "loss": 0.2778, "step": 23246 }, { "epoch": 76.21967213114755, "grad_norm": 4.695985317230225, "learning_rate": 2.822192642094336e-06, "loss": 0.3952, "step": 23247 }, { "epoch": 76.22295081967214, "grad_norm": 4.139317035675049, "learning_rate": 2.8214533233482654e-06, "loss": 0.2481, "step": 23248 }, { "epoch": 76.22622950819672, "grad_norm": 4.258374214172363, "learning_rate": 2.820714085547774e-06, "loss": 0.3592, "step": 23249 }, { "epoch": 76.22950819672131, "grad_norm": 4.837101459503174, "learning_rate": 2.8199749287011957e-06, "loss": 0.4833, "step": 23250 }, { "epoch": 76.2327868852459, "grad_norm": 5.1774516105651855, "learning_rate": 2.819235852816865e-06, "loss": 0.4866, "step": 23251 }, { "epoch": 76.23606557377049, "grad_norm": 4.1895670890808105, "learning_rate": 2.8184968579031134e-06, "loss": 0.293, "step": 23252 }, { "epoch": 76.23934426229508, "grad_norm": 5.650673866271973, "learning_rate": 2.817757943968279e-06, "loss": 0.4752, "step": 23253 }, { "epoch": 76.24262295081967, "grad_norm": 4.8569135665893555, "learning_rate": 2.8170191110206924e-06, "loss": 0.4104, "step": 23254 }, { "epoch": 76.24590163934427, "grad_norm": 5.209970951080322, "learning_rate": 2.816280359068684e-06, "loss": 0.3157, "step": 23255 }, { "epoch": 76.24918032786886, "grad_norm": 5.523514747619629, "learning_rate": 2.81554168812058e-06, "loss": 0.5384, "step": 23256 }, { "epoch": 76.25245901639344, "grad_norm": 4.913181304931641, "learning_rate": 2.8148030981847164e-06, "loss": 0.4219, "step": 23257 }, { "epoch": 76.25573770491803, "grad_norm": 8.85950756072998, "learning_rate": 2.8140645892694184e-06, "loss": 0.4136, "step": 23258 }, { "epoch": 76.25901639344262, "grad_norm": 4.705654144287109, "learning_rate": 2.8133261613830145e-06, "loss": 0.1485, "step": 23259 }, { "epoch": 76.26229508196721, "grad_norm": 5.582583427429199, "learning_rate": 2.812587814533826e-06, "loss": 0.5532, "step": 23260 }, { "epoch": 76.2655737704918, "grad_norm": 4.575899124145508, "learning_rate": 2.8118495487301865e-06, "loss": 0.4439, "step": 23261 }, { "epoch": 76.26885245901639, "grad_norm": 6.85947322845459, "learning_rate": 2.8111113639804177e-06, "loss": 0.476, "step": 23262 }, { "epoch": 76.27213114754099, "grad_norm": 4.617032051086426, "learning_rate": 2.8103732602928424e-06, "loss": 0.3578, "step": 23263 }, { "epoch": 76.27540983606558, "grad_norm": 4.688540458679199, "learning_rate": 2.809635237675784e-06, "loss": 0.5519, "step": 23264 }, { "epoch": 76.27868852459017, "grad_norm": 5.353930950164795, "learning_rate": 2.8088972961375614e-06, "loss": 0.5617, "step": 23265 }, { "epoch": 76.28196721311475, "grad_norm": 12.195597648620605, "learning_rate": 2.808159435686503e-06, "loss": 0.3384, "step": 23266 }, { "epoch": 76.28524590163934, "grad_norm": 5.855596542358398, "learning_rate": 2.8074216563309233e-06, "loss": 0.6166, "step": 23267 }, { "epoch": 76.28852459016393, "grad_norm": 7.293468952178955, "learning_rate": 2.8066839580791427e-06, "loss": 0.5758, "step": 23268 }, { "epoch": 76.29180327868852, "grad_norm": 5.356149196624756, "learning_rate": 2.805946340939476e-06, "loss": 0.2749, "step": 23269 }, { "epoch": 76.29508196721312, "grad_norm": 4.441783905029297, "learning_rate": 2.805208804920249e-06, "loss": 0.5355, "step": 23270 }, { "epoch": 76.29836065573771, "grad_norm": 5.523360252380371, "learning_rate": 2.8044713500297716e-06, "loss": 0.3107, "step": 23271 }, { "epoch": 76.3016393442623, "grad_norm": 5.710964202880859, "learning_rate": 2.8037339762763627e-06, "loss": 0.3183, "step": 23272 }, { "epoch": 76.30491803278689, "grad_norm": 5.095492362976074, "learning_rate": 2.802996683668332e-06, "loss": 0.3098, "step": 23273 }, { "epoch": 76.30819672131148, "grad_norm": 5.106629371643066, "learning_rate": 2.8022594722139997e-06, "loss": 0.5832, "step": 23274 }, { "epoch": 76.31147540983606, "grad_norm": 5.340355396270752, "learning_rate": 2.8015223419216754e-06, "loss": 0.4989, "step": 23275 }, { "epoch": 76.31475409836065, "grad_norm": 6.274806499481201, "learning_rate": 2.80078529279967e-06, "loss": 0.3187, "step": 23276 }, { "epoch": 76.31803278688524, "grad_norm": 4.757986545562744, "learning_rate": 2.800048324856298e-06, "loss": 0.2759, "step": 23277 }, { "epoch": 76.32131147540984, "grad_norm": 5.176990509033203, "learning_rate": 2.7993114380998633e-06, "loss": 0.5777, "step": 23278 }, { "epoch": 76.32459016393443, "grad_norm": 4.009060382843018, "learning_rate": 2.798574632538682e-06, "loss": 0.3958, "step": 23279 }, { "epoch": 76.32786885245902, "grad_norm": 5.483425617218018, "learning_rate": 2.7978379081810592e-06, "loss": 0.4626, "step": 23280 }, { "epoch": 76.33114754098361, "grad_norm": 4.269043445587158, "learning_rate": 2.7971012650353023e-06, "loss": 0.2516, "step": 23281 }, { "epoch": 76.3344262295082, "grad_norm": 5.027109622955322, "learning_rate": 2.7963647031097153e-06, "loss": 0.5542, "step": 23282 }, { "epoch": 76.33770491803278, "grad_norm": 7.168043613433838, "learning_rate": 2.7956282224126084e-06, "loss": 0.302, "step": 23283 }, { "epoch": 76.34098360655737, "grad_norm": 23.111454010009766, "learning_rate": 2.7948918229522847e-06, "loss": 0.5089, "step": 23284 }, { "epoch": 76.34426229508196, "grad_norm": 6.458731651306152, "learning_rate": 2.794155504737046e-06, "loss": 0.5839, "step": 23285 }, { "epoch": 76.34754098360656, "grad_norm": 4.2887420654296875, "learning_rate": 2.793419267775194e-06, "loss": 0.3192, "step": 23286 }, { "epoch": 76.35081967213115, "grad_norm": 4.694264888763428, "learning_rate": 2.7926831120750362e-06, "loss": 0.4945, "step": 23287 }, { "epoch": 76.35409836065574, "grad_norm": 4.879776954650879, "learning_rate": 2.791947037644869e-06, "loss": 0.2952, "step": 23288 }, { "epoch": 76.35737704918033, "grad_norm": 5.560814380645752, "learning_rate": 2.7912110444929942e-06, "loss": 0.3834, "step": 23289 }, { "epoch": 76.36065573770492, "grad_norm": 5.384796142578125, "learning_rate": 2.7904751326277067e-06, "loss": 0.4646, "step": 23290 }, { "epoch": 76.3639344262295, "grad_norm": 5.956304550170898, "learning_rate": 2.7897393020573117e-06, "loss": 0.4227, "step": 23291 }, { "epoch": 76.3672131147541, "grad_norm": 6.360708713531494, "learning_rate": 2.7890035527901027e-06, "loss": 0.5603, "step": 23292 }, { "epoch": 76.37049180327868, "grad_norm": 5.453044891357422, "learning_rate": 2.7882678848343724e-06, "loss": 0.4534, "step": 23293 }, { "epoch": 76.37377049180328, "grad_norm": 5.412570476531982, "learning_rate": 2.787532298198423e-06, "loss": 0.3676, "step": 23294 }, { "epoch": 76.37704918032787, "grad_norm": 11.951951026916504, "learning_rate": 2.786796792890547e-06, "loss": 0.4326, "step": 23295 }, { "epoch": 76.38032786885246, "grad_norm": 4.524880409240723, "learning_rate": 2.786061368919034e-06, "loss": 0.478, "step": 23296 }, { "epoch": 76.38360655737705, "grad_norm": 5.115376949310303, "learning_rate": 2.7853260262921823e-06, "loss": 0.5798, "step": 23297 }, { "epoch": 76.38688524590164, "grad_norm": 4.806334018707275, "learning_rate": 2.7845907650182814e-06, "loss": 0.3696, "step": 23298 }, { "epoch": 76.39016393442623, "grad_norm": 4.501686096191406, "learning_rate": 2.783855585105618e-06, "loss": 0.3346, "step": 23299 }, { "epoch": 76.39344262295081, "grad_norm": 4.326981544494629, "learning_rate": 2.7831204865624897e-06, "loss": 0.3577, "step": 23300 }, { "epoch": 76.3967213114754, "grad_norm": 5.907792568206787, "learning_rate": 2.782385469397181e-06, "loss": 0.3049, "step": 23301 }, { "epoch": 76.4, "grad_norm": 5.325024604797363, "learning_rate": 2.78165053361798e-06, "loss": 0.3948, "step": 23302 }, { "epoch": 76.4032786885246, "grad_norm": 4.492021560668945, "learning_rate": 2.7809156792331748e-06, "loss": 0.4851, "step": 23303 }, { "epoch": 76.40655737704918, "grad_norm": 4.000931739807129, "learning_rate": 2.7801809062510488e-06, "loss": 0.4819, "step": 23304 }, { "epoch": 76.40983606557377, "grad_norm": 4.657398223876953, "learning_rate": 2.7794462146798928e-06, "loss": 0.3816, "step": 23305 }, { "epoch": 76.41311475409836, "grad_norm": 5.201255798339844, "learning_rate": 2.778711604527988e-06, "loss": 0.377, "step": 23306 }, { "epoch": 76.41639344262295, "grad_norm": 9.309619903564453, "learning_rate": 2.7779770758036173e-06, "loss": 0.5172, "step": 23307 }, { "epoch": 76.41967213114754, "grad_norm": 5.828235626220703, "learning_rate": 2.777242628515062e-06, "loss": 0.5083, "step": 23308 }, { "epoch": 76.42295081967212, "grad_norm": 5.805547714233398, "learning_rate": 2.7765082626706097e-06, "loss": 0.4629, "step": 23309 }, { "epoch": 76.42622950819673, "grad_norm": 4.485323905944824, "learning_rate": 2.7757739782785354e-06, "loss": 0.4184, "step": 23310 }, { "epoch": 76.42950819672132, "grad_norm": 5.449385643005371, "learning_rate": 2.7750397753471227e-06, "loss": 0.3449, "step": 23311 }, { "epoch": 76.4327868852459, "grad_norm": 4.112715721130371, "learning_rate": 2.7743056538846437e-06, "loss": 0.4534, "step": 23312 }, { "epoch": 76.43606557377049, "grad_norm": 5.225471019744873, "learning_rate": 2.773571613899385e-06, "loss": 0.4706, "step": 23313 }, { "epoch": 76.43934426229508, "grad_norm": 5.2533698081970215, "learning_rate": 2.7728376553996207e-06, "loss": 0.6426, "step": 23314 }, { "epoch": 76.44262295081967, "grad_norm": 6.94619607925415, "learning_rate": 2.7721037783936256e-06, "loss": 0.4799, "step": 23315 }, { "epoch": 76.44590163934426, "grad_norm": 4.9384589195251465, "learning_rate": 2.7713699828896756e-06, "loss": 0.4312, "step": 23316 }, { "epoch": 76.44918032786886, "grad_norm": 4.4540486335754395, "learning_rate": 2.770636268896042e-06, "loss": 0.2959, "step": 23317 }, { "epoch": 76.45245901639345, "grad_norm": 7.6802592277526855, "learning_rate": 2.7699026364210048e-06, "loss": 0.2722, "step": 23318 }, { "epoch": 76.45573770491804, "grad_norm": 4.616288185119629, "learning_rate": 2.7691690854728317e-06, "loss": 0.4448, "step": 23319 }, { "epoch": 76.45901639344262, "grad_norm": 5.032630443572998, "learning_rate": 2.768435616059796e-06, "loss": 0.4993, "step": 23320 }, { "epoch": 76.46229508196721, "grad_norm": 7.367059230804443, "learning_rate": 2.7677022281901634e-06, "loss": 0.2447, "step": 23321 }, { "epoch": 76.4655737704918, "grad_norm": 5.917060852050781, "learning_rate": 2.766968921872213e-06, "loss": 0.328, "step": 23322 }, { "epoch": 76.46885245901639, "grad_norm": 5.562460422515869, "learning_rate": 2.766235697114207e-06, "loss": 0.3826, "step": 23323 }, { "epoch": 76.47213114754098, "grad_norm": 5.091547012329102, "learning_rate": 2.765502553924415e-06, "loss": 0.4074, "step": 23324 }, { "epoch": 76.47540983606558, "grad_norm": 4.75209379196167, "learning_rate": 2.7647694923111e-06, "loss": 0.3444, "step": 23325 }, { "epoch": 76.47868852459017, "grad_norm": 3.854757308959961, "learning_rate": 2.7640365122825363e-06, "loss": 0.1968, "step": 23326 }, { "epoch": 76.48196721311476, "grad_norm": 43.017372131347656, "learning_rate": 2.7633036138469836e-06, "loss": 0.385, "step": 23327 }, { "epoch": 76.48524590163935, "grad_norm": 4.558444976806641, "learning_rate": 2.762570797012707e-06, "loss": 0.4362, "step": 23328 }, { "epoch": 76.48852459016393, "grad_norm": 5.111742973327637, "learning_rate": 2.761838061787969e-06, "loss": 0.3843, "step": 23329 }, { "epoch": 76.49180327868852, "grad_norm": 5.732786655426025, "learning_rate": 2.7611054081810307e-06, "loss": 0.4241, "step": 23330 }, { "epoch": 76.49508196721311, "grad_norm": 4.0481953620910645, "learning_rate": 2.760372836200158e-06, "loss": 0.1382, "step": 23331 }, { "epoch": 76.4983606557377, "grad_norm": 4.229673385620117, "learning_rate": 2.7596403458536092e-06, "loss": 0.4108, "step": 23332 }, { "epoch": 76.5016393442623, "grad_norm": 5.43330717086792, "learning_rate": 2.758907937149643e-06, "loss": 0.3246, "step": 23333 }, { "epoch": 76.50491803278689, "grad_norm": 4.061699390411377, "learning_rate": 2.7581756100965164e-06, "loss": 0.2095, "step": 23334 }, { "epoch": 76.50819672131148, "grad_norm": 5.736423015594482, "learning_rate": 2.757443364702492e-06, "loss": 0.5148, "step": 23335 }, { "epoch": 76.51147540983607, "grad_norm": 5.137757778167725, "learning_rate": 2.7567112009758245e-06, "loss": 0.176, "step": 23336 }, { "epoch": 76.51475409836065, "grad_norm": 5.515628337860107, "learning_rate": 2.7559791189247688e-06, "loss": 0.3658, "step": 23337 }, { "epoch": 76.51803278688524, "grad_norm": 4.675024509429932, "learning_rate": 2.7552471185575767e-06, "loss": 0.2967, "step": 23338 }, { "epoch": 76.52131147540983, "grad_norm": 5.081174850463867, "learning_rate": 2.7545151998825103e-06, "loss": 0.355, "step": 23339 }, { "epoch": 76.52459016393442, "grad_norm": 7.149975299835205, "learning_rate": 2.753783362907818e-06, "loss": 0.3848, "step": 23340 }, { "epoch": 76.52786885245902, "grad_norm": 16.927371978759766, "learning_rate": 2.7530516076417522e-06, "loss": 0.3787, "step": 23341 }, { "epoch": 76.53114754098361, "grad_norm": 5.019653797149658, "learning_rate": 2.752319934092562e-06, "loss": 0.5141, "step": 23342 }, { "epoch": 76.5344262295082, "grad_norm": 4.117630958557129, "learning_rate": 2.751588342268503e-06, "loss": 0.3409, "step": 23343 }, { "epoch": 76.53770491803279, "grad_norm": 4.146432876586914, "learning_rate": 2.7508568321778218e-06, "loss": 0.2941, "step": 23344 }, { "epoch": 76.54098360655738, "grad_norm": 4.605225563049316, "learning_rate": 2.750125403828767e-06, "loss": 0.2066, "step": 23345 }, { "epoch": 76.54426229508196, "grad_norm": 5.002648830413818, "learning_rate": 2.7493940572295843e-06, "loss": 0.2934, "step": 23346 }, { "epoch": 76.54754098360655, "grad_norm": 8.080568313598633, "learning_rate": 2.7486627923885236e-06, "loss": 0.591, "step": 23347 }, { "epoch": 76.55081967213114, "grad_norm": 4.814126968383789, "learning_rate": 2.7479316093138297e-06, "loss": 0.355, "step": 23348 }, { "epoch": 76.55409836065574, "grad_norm": 5.996356964111328, "learning_rate": 2.7472005080137455e-06, "loss": 0.564, "step": 23349 }, { "epoch": 76.55737704918033, "grad_norm": 5.5588178634643555, "learning_rate": 2.746469488496518e-06, "loss": 0.3984, "step": 23350 }, { "epoch": 76.56065573770492, "grad_norm": 6.0118889808654785, "learning_rate": 2.7457385507703905e-06, "loss": 0.2434, "step": 23351 }, { "epoch": 76.56393442622951, "grad_norm": 4.298043727874756, "learning_rate": 2.745007694843599e-06, "loss": 0.4618, "step": 23352 }, { "epoch": 76.5672131147541, "grad_norm": 5.181695461273193, "learning_rate": 2.7442769207243926e-06, "loss": 0.6043, "step": 23353 }, { "epoch": 76.57049180327868, "grad_norm": 5.821741580963135, "learning_rate": 2.743546228421008e-06, "loss": 0.3721, "step": 23354 }, { "epoch": 76.57377049180327, "grad_norm": 5.153505325317383, "learning_rate": 2.7428156179416842e-06, "loss": 0.2569, "step": 23355 }, { "epoch": 76.57704918032788, "grad_norm": 4.540640830993652, "learning_rate": 2.742085089294657e-06, "loss": 0.3911, "step": 23356 }, { "epoch": 76.58032786885246, "grad_norm": 4.821072578430176, "learning_rate": 2.7413546424881698e-06, "loss": 0.3813, "step": 23357 }, { "epoch": 76.58360655737705, "grad_norm": 3.9459128379821777, "learning_rate": 2.740624277530456e-06, "loss": 0.3775, "step": 23358 }, { "epoch": 76.58688524590164, "grad_norm": 3.8842103481292725, "learning_rate": 2.739893994429751e-06, "loss": 0.248, "step": 23359 }, { "epoch": 76.59016393442623, "grad_norm": 4.603348731994629, "learning_rate": 2.739163793194287e-06, "loss": 0.2659, "step": 23360 }, { "epoch": 76.59344262295082, "grad_norm": 4.261305809020996, "learning_rate": 2.7384336738323047e-06, "loss": 0.4914, "step": 23361 }, { "epoch": 76.5967213114754, "grad_norm": 6.1836256980896, "learning_rate": 2.7377036363520317e-06, "loss": 0.4451, "step": 23362 }, { "epoch": 76.6, "grad_norm": 6.271854877471924, "learning_rate": 2.736973680761702e-06, "loss": 0.448, "step": 23363 }, { "epoch": 76.6032786885246, "grad_norm": 4.839287281036377, "learning_rate": 2.7362438070695418e-06, "loss": 0.4366, "step": 23364 }, { "epoch": 76.60655737704919, "grad_norm": 4.046041011810303, "learning_rate": 2.735514015283789e-06, "loss": 0.5787, "step": 23365 }, { "epoch": 76.60983606557377, "grad_norm": 5.172839164733887, "learning_rate": 2.734784305412668e-06, "loss": 0.2358, "step": 23366 }, { "epoch": 76.61311475409836, "grad_norm": 5.449665069580078, "learning_rate": 2.7340546774644083e-06, "loss": 0.642, "step": 23367 }, { "epoch": 76.61639344262295, "grad_norm": 6.698968410491943, "learning_rate": 2.7333251314472363e-06, "loss": 0.3597, "step": 23368 }, { "epoch": 76.61967213114754, "grad_norm": 6.505634784698486, "learning_rate": 2.7325956673693766e-06, "loss": 0.3189, "step": 23369 }, { "epoch": 76.62295081967213, "grad_norm": 4.932638168334961, "learning_rate": 2.7318662852390586e-06, "loss": 0.3668, "step": 23370 }, { "epoch": 76.62622950819672, "grad_norm": 6.026106834411621, "learning_rate": 2.7311369850645064e-06, "loss": 0.3193, "step": 23371 }, { "epoch": 76.62950819672132, "grad_norm": 6.57716178894043, "learning_rate": 2.730407766853943e-06, "loss": 0.2697, "step": 23372 }, { "epoch": 76.6327868852459, "grad_norm": 4.136410236358643, "learning_rate": 2.7296786306155864e-06, "loss": 0.4694, "step": 23373 }, { "epoch": 76.6360655737705, "grad_norm": 4.5938568115234375, "learning_rate": 2.7289495763576657e-06, "loss": 0.4547, "step": 23374 }, { "epoch": 76.63934426229508, "grad_norm": 7.355963706970215, "learning_rate": 2.7282206040883987e-06, "loss": 0.4634, "step": 23375 }, { "epoch": 76.64262295081967, "grad_norm": 5.864896297454834, "learning_rate": 2.727491713816005e-06, "loss": 0.4271, "step": 23376 }, { "epoch": 76.64590163934426, "grad_norm": 14.527016639709473, "learning_rate": 2.726762905548701e-06, "loss": 0.4331, "step": 23377 }, { "epoch": 76.64918032786885, "grad_norm": 5.7495436668396, "learning_rate": 2.7260341792947097e-06, "loss": 0.6026, "step": 23378 }, { "epoch": 76.65245901639344, "grad_norm": 6.650550365447998, "learning_rate": 2.725305535062247e-06, "loss": 0.3566, "step": 23379 }, { "epoch": 76.65573770491804, "grad_norm": 8.895587921142578, "learning_rate": 2.7245769728595284e-06, "loss": 0.5765, "step": 23380 }, { "epoch": 76.65901639344263, "grad_norm": 4.649160861968994, "learning_rate": 2.7238484926947684e-06, "loss": 0.4329, "step": 23381 }, { "epoch": 76.66229508196722, "grad_norm": 5.677810192108154, "learning_rate": 2.723120094576178e-06, "loss": 0.3902, "step": 23382 }, { "epoch": 76.6655737704918, "grad_norm": 4.557286739349365, "learning_rate": 2.722391778511979e-06, "loss": 0.3474, "step": 23383 }, { "epoch": 76.66885245901639, "grad_norm": 8.2320556640625, "learning_rate": 2.721663544510379e-06, "loss": 0.3451, "step": 23384 }, { "epoch": 76.67213114754098, "grad_norm": 5.14879846572876, "learning_rate": 2.72093539257959e-06, "loss": 0.3282, "step": 23385 }, { "epoch": 76.67540983606557, "grad_norm": 6.4074320793151855, "learning_rate": 2.72020732272782e-06, "loss": 0.1813, "step": 23386 }, { "epoch": 76.67868852459016, "grad_norm": 5.924179553985596, "learning_rate": 2.7194793349632854e-06, "loss": 0.4556, "step": 23387 }, { "epoch": 76.68196721311476, "grad_norm": 4.00390100479126, "learning_rate": 2.71875142929419e-06, "loss": 0.4303, "step": 23388 }, { "epoch": 76.68524590163935, "grad_norm": 6.2940993309021, "learning_rate": 2.7180236057287423e-06, "loss": 0.5111, "step": 23389 }, { "epoch": 76.68852459016394, "grad_norm": 5.038845062255859, "learning_rate": 2.717295864275148e-06, "loss": 0.2544, "step": 23390 }, { "epoch": 76.69180327868852, "grad_norm": 5.370311260223389, "learning_rate": 2.7165682049416175e-06, "loss": 0.3518, "step": 23391 }, { "epoch": 76.69508196721311, "grad_norm": 3.3420169353485107, "learning_rate": 2.7158406277363537e-06, "loss": 0.2153, "step": 23392 }, { "epoch": 76.6983606557377, "grad_norm": 4.5942816734313965, "learning_rate": 2.7151131326675596e-06, "loss": 0.354, "step": 23393 }, { "epoch": 76.70163934426229, "grad_norm": 5.710554599761963, "learning_rate": 2.7143857197434397e-06, "loss": 0.3334, "step": 23394 }, { "epoch": 76.70491803278688, "grad_norm": 4.260831832885742, "learning_rate": 2.7136583889721933e-06, "loss": 0.4146, "step": 23395 }, { "epoch": 76.70819672131148, "grad_norm": 5.092208385467529, "learning_rate": 2.712931140362027e-06, "loss": 0.4355, "step": 23396 }, { "epoch": 76.71147540983607, "grad_norm": 5.657078742980957, "learning_rate": 2.7122039739211394e-06, "loss": 0.3739, "step": 23397 }, { "epoch": 76.71475409836066, "grad_norm": 5.802751541137695, "learning_rate": 2.711476889657726e-06, "loss": 0.425, "step": 23398 }, { "epoch": 76.71803278688525, "grad_norm": 5.40162467956543, "learning_rate": 2.7107498875799922e-06, "loss": 0.322, "step": 23399 }, { "epoch": 76.72131147540983, "grad_norm": 5.20466423034668, "learning_rate": 2.710022967696132e-06, "loss": 0.3971, "step": 23400 }, { "epoch": 76.72459016393442, "grad_norm": 5.486204624176025, "learning_rate": 2.7092961300143396e-06, "loss": 0.3049, "step": 23401 }, { "epoch": 76.72786885245901, "grad_norm": 3.959226131439209, "learning_rate": 2.7085693745428167e-06, "loss": 0.5182, "step": 23402 }, { "epoch": 76.73114754098361, "grad_norm": 6.662047386169434, "learning_rate": 2.7078427012897547e-06, "loss": 0.635, "step": 23403 }, { "epoch": 76.7344262295082, "grad_norm": 7.853163719177246, "learning_rate": 2.707116110263346e-06, "loss": 0.275, "step": 23404 }, { "epoch": 76.73770491803279, "grad_norm": 3.6803529262542725, "learning_rate": 2.7063896014717884e-06, "loss": 0.4676, "step": 23405 }, { "epoch": 76.74098360655738, "grad_norm": 4.051201343536377, "learning_rate": 2.705663174923272e-06, "loss": 0.5106, "step": 23406 }, { "epoch": 76.74426229508197, "grad_norm": 8.557133674621582, "learning_rate": 2.704936830625987e-06, "loss": 0.2347, "step": 23407 }, { "epoch": 76.74754098360656, "grad_norm": 4.426645755767822, "learning_rate": 2.7042105685881213e-06, "loss": 0.3091, "step": 23408 }, { "epoch": 76.75081967213114, "grad_norm": 5.148177623748779, "learning_rate": 2.7034843888178698e-06, "loss": 0.569, "step": 23409 }, { "epoch": 76.75409836065573, "grad_norm": 5.3505859375, "learning_rate": 2.7027582913234186e-06, "loss": 0.3145, "step": 23410 }, { "epoch": 76.75737704918033, "grad_norm": 5.401461601257324, "learning_rate": 2.702032276112956e-06, "loss": 0.4425, "step": 23411 }, { "epoch": 76.76065573770492, "grad_norm": 6.292735576629639, "learning_rate": 2.7013063431946627e-06, "loss": 0.3214, "step": 23412 }, { "epoch": 76.76393442622951, "grad_norm": 5.1069016456604, "learning_rate": 2.7005804925767333e-06, "loss": 0.2985, "step": 23413 }, { "epoch": 76.7672131147541, "grad_norm": 4.992934226989746, "learning_rate": 2.699854724267348e-06, "loss": 0.4065, "step": 23414 }, { "epoch": 76.77049180327869, "grad_norm": 4.118209362030029, "learning_rate": 2.6991290382746905e-06, "loss": 0.3266, "step": 23415 }, { "epoch": 76.77377049180328, "grad_norm": 5.345637798309326, "learning_rate": 2.698403434606942e-06, "loss": 0.3563, "step": 23416 }, { "epoch": 76.77704918032786, "grad_norm": 5.13695764541626, "learning_rate": 2.6976779132722887e-06, "loss": 0.5994, "step": 23417 }, { "epoch": 76.78032786885245, "grad_norm": 6.111154079437256, "learning_rate": 2.69695247427891e-06, "loss": 0.3703, "step": 23418 }, { "epoch": 76.78360655737706, "grad_norm": 4.448968410491943, "learning_rate": 2.696227117634985e-06, "loss": 0.5415, "step": 23419 }, { "epoch": 76.78688524590164, "grad_norm": 15.439453125, "learning_rate": 2.6955018433486933e-06, "loss": 0.2451, "step": 23420 }, { "epoch": 76.79016393442623, "grad_norm": 3.9160380363464355, "learning_rate": 2.6947766514282104e-06, "loss": 0.1147, "step": 23421 }, { "epoch": 76.79344262295082, "grad_norm": 4.583573818206787, "learning_rate": 2.6940515418817194e-06, "loss": 0.4576, "step": 23422 }, { "epoch": 76.79672131147541, "grad_norm": 7.027183532714844, "learning_rate": 2.6933265147173924e-06, "loss": 0.3345, "step": 23423 }, { "epoch": 76.8, "grad_norm": 6.480367660522461, "learning_rate": 2.692601569943407e-06, "loss": 0.5238, "step": 23424 }, { "epoch": 76.80327868852459, "grad_norm": 5.373983860015869, "learning_rate": 2.6918767075679342e-06, "loss": 0.5463, "step": 23425 }, { "epoch": 76.80655737704917, "grad_norm": 5.060824394226074, "learning_rate": 2.6911519275991517e-06, "loss": 0.3513, "step": 23426 }, { "epoch": 76.80983606557378, "grad_norm": 6.691656589508057, "learning_rate": 2.6904272300452316e-06, "loss": 0.3075, "step": 23427 }, { "epoch": 76.81311475409836, "grad_norm": 4.4751482009887695, "learning_rate": 2.6897026149143435e-06, "loss": 0.3445, "step": 23428 }, { "epoch": 76.81639344262295, "grad_norm": 4.515601634979248, "learning_rate": 2.6889780822146605e-06, "loss": 0.3593, "step": 23429 }, { "epoch": 76.81967213114754, "grad_norm": 4.650857925415039, "learning_rate": 2.688253631954347e-06, "loss": 0.4064, "step": 23430 }, { "epoch": 76.82295081967213, "grad_norm": 6.3396220207214355, "learning_rate": 2.6875292641415794e-06, "loss": 0.2659, "step": 23431 }, { "epoch": 76.82622950819672, "grad_norm": 4.4120988845825195, "learning_rate": 2.686804978784523e-06, "loss": 0.458, "step": 23432 }, { "epoch": 76.8295081967213, "grad_norm": 4.142595291137695, "learning_rate": 2.6860807758913445e-06, "loss": 0.2941, "step": 23433 }, { "epoch": 76.8327868852459, "grad_norm": 4.928340435028076, "learning_rate": 2.685356655470206e-06, "loss": 0.3138, "step": 23434 }, { "epoch": 76.8360655737705, "grad_norm": 5.332067012786865, "learning_rate": 2.68463261752928e-06, "loss": 0.5096, "step": 23435 }, { "epoch": 76.83934426229509, "grad_norm": 4.367399215698242, "learning_rate": 2.6839086620767273e-06, "loss": 0.3873, "step": 23436 }, { "epoch": 76.84262295081967, "grad_norm": 5.090602397918701, "learning_rate": 2.683184789120711e-06, "loss": 0.2381, "step": 23437 }, { "epoch": 76.84590163934426, "grad_norm": 6.768447399139404, "learning_rate": 2.6824609986693906e-06, "loss": 0.3393, "step": 23438 }, { "epoch": 76.84918032786885, "grad_norm": 4.448944568634033, "learning_rate": 2.6817372907309336e-06, "loss": 0.3463, "step": 23439 }, { "epoch": 76.85245901639344, "grad_norm": 6.673822402954102, "learning_rate": 2.681013665313499e-06, "loss": 0.2577, "step": 23440 }, { "epoch": 76.85573770491803, "grad_norm": 3.960710048675537, "learning_rate": 2.6802901224252444e-06, "loss": 0.1598, "step": 23441 }, { "epoch": 76.85901639344263, "grad_norm": 4.56315279006958, "learning_rate": 2.679566662074329e-06, "loss": 0.3159, "step": 23442 }, { "epoch": 76.86229508196722, "grad_norm": 5.255910873413086, "learning_rate": 2.678843284268907e-06, "loss": 0.2418, "step": 23443 }, { "epoch": 76.8655737704918, "grad_norm": 5.186794757843018, "learning_rate": 2.6781199890171438e-06, "loss": 0.3878, "step": 23444 }, { "epoch": 76.8688524590164, "grad_norm": 5.83283805847168, "learning_rate": 2.6773967763271903e-06, "loss": 0.3553, "step": 23445 }, { "epoch": 76.87213114754098, "grad_norm": 4.751291751861572, "learning_rate": 2.676673646207201e-06, "loss": 0.3507, "step": 23446 }, { "epoch": 76.87540983606557, "grad_norm": 4.581732749938965, "learning_rate": 2.675950598665328e-06, "loss": 0.2518, "step": 23447 }, { "epoch": 76.87868852459016, "grad_norm": 5.363866806030273, "learning_rate": 2.6752276337097293e-06, "loss": 0.3786, "step": 23448 }, { "epoch": 76.88196721311475, "grad_norm": 4.3519415855407715, "learning_rate": 2.6745047513485557e-06, "loss": 0.4178, "step": 23449 }, { "epoch": 76.88524590163935, "grad_norm": 4.801410675048828, "learning_rate": 2.6737819515899576e-06, "loss": 0.3571, "step": 23450 }, { "epoch": 76.88852459016394, "grad_norm": 4.921084403991699, "learning_rate": 2.6730592344420826e-06, "loss": 0.5834, "step": 23451 }, { "epoch": 76.89180327868853, "grad_norm": 4.162084102630615, "learning_rate": 2.6723365999130855e-06, "loss": 0.3648, "step": 23452 }, { "epoch": 76.89508196721312, "grad_norm": 5.47651481628418, "learning_rate": 2.671614048011112e-06, "loss": 0.3835, "step": 23453 }, { "epoch": 76.8983606557377, "grad_norm": 4.076236248016357, "learning_rate": 2.6708915787443068e-06, "loss": 0.2618, "step": 23454 }, { "epoch": 76.90163934426229, "grad_norm": 3.7853496074676514, "learning_rate": 2.670169192120823e-06, "loss": 0.3738, "step": 23455 }, { "epoch": 76.90491803278688, "grad_norm": 4.999166965484619, "learning_rate": 2.669446888148802e-06, "loss": 0.2961, "step": 23456 }, { "epoch": 76.90819672131147, "grad_norm": 5.502477645874023, "learning_rate": 2.6687246668363865e-06, "loss": 0.4481, "step": 23457 }, { "epoch": 76.91147540983607, "grad_norm": 6.35469388961792, "learning_rate": 2.6680025281917255e-06, "loss": 0.4354, "step": 23458 }, { "epoch": 76.91475409836066, "grad_norm": 5.228428363800049, "learning_rate": 2.6672804722229604e-06, "loss": 0.3019, "step": 23459 }, { "epoch": 76.91803278688525, "grad_norm": 5.673776626586914, "learning_rate": 2.6665584989382288e-06, "loss": 0.3732, "step": 23460 }, { "epoch": 76.92131147540984, "grad_norm": 4.729709148406982, "learning_rate": 2.6658366083456765e-06, "loss": 0.2677, "step": 23461 }, { "epoch": 76.92459016393443, "grad_norm": 4.887304782867432, "learning_rate": 2.6651148004534434e-06, "loss": 0.3773, "step": 23462 }, { "epoch": 76.92786885245901, "grad_norm": 4.8489813804626465, "learning_rate": 2.664393075269668e-06, "loss": 0.2486, "step": 23463 }, { "epoch": 76.9311475409836, "grad_norm": 4.775202751159668, "learning_rate": 2.663671432802484e-06, "loss": 0.3119, "step": 23464 }, { "epoch": 76.93442622950819, "grad_norm": 4.63571834564209, "learning_rate": 2.6629498730600346e-06, "loss": 0.4308, "step": 23465 }, { "epoch": 76.9377049180328, "grad_norm": 13.76542854309082, "learning_rate": 2.6622283960504546e-06, "loss": 0.4697, "step": 23466 }, { "epoch": 76.94098360655738, "grad_norm": 12.092679023742676, "learning_rate": 2.661507001781879e-06, "loss": 0.3693, "step": 23467 }, { "epoch": 76.94426229508197, "grad_norm": 4.748181343078613, "learning_rate": 2.6607856902624417e-06, "loss": 0.4106, "step": 23468 }, { "epoch": 76.94754098360656, "grad_norm": 5.283345699310303, "learning_rate": 2.6600644615002745e-06, "loss": 0.4657, "step": 23469 }, { "epoch": 76.95081967213115, "grad_norm": 6.765434265136719, "learning_rate": 2.6593433155035143e-06, "loss": 0.5536, "step": 23470 }, { "epoch": 76.95409836065573, "grad_norm": 4.694849967956543, "learning_rate": 2.6586222522802905e-06, "loss": 0.6441, "step": 23471 }, { "epoch": 76.95737704918032, "grad_norm": 6.044251441955566, "learning_rate": 2.657901271838734e-06, "loss": 0.5453, "step": 23472 }, { "epoch": 76.96065573770491, "grad_norm": 4.609004497528076, "learning_rate": 2.6571803741869727e-06, "loss": 0.2735, "step": 23473 }, { "epoch": 76.96393442622951, "grad_norm": 4.254777908325195, "learning_rate": 2.6564595593331387e-06, "loss": 0.2489, "step": 23474 }, { "epoch": 76.9672131147541, "grad_norm": 6.858779430389404, "learning_rate": 2.65573882728536e-06, "loss": 0.4778, "step": 23475 }, { "epoch": 76.97049180327869, "grad_norm": 5.403506755828857, "learning_rate": 2.6550181780517614e-06, "loss": 0.6039, "step": 23476 }, { "epoch": 76.97377049180328, "grad_norm": 6.86060094833374, "learning_rate": 2.6542976116404662e-06, "loss": 0.4744, "step": 23477 }, { "epoch": 76.97704918032787, "grad_norm": 8.14067554473877, "learning_rate": 2.653577128059607e-06, "loss": 0.6113, "step": 23478 }, { "epoch": 76.98032786885246, "grad_norm": 4.345377445220947, "learning_rate": 2.6528567273173035e-06, "loss": 0.2324, "step": 23479 }, { "epoch": 76.98360655737704, "grad_norm": 4.786888122558594, "learning_rate": 2.6521364094216795e-06, "loss": 0.4856, "step": 23480 }, { "epoch": 76.98688524590163, "grad_norm": 4.074772357940674, "learning_rate": 2.6514161743808575e-06, "loss": 0.6449, "step": 23481 }, { "epoch": 76.99016393442623, "grad_norm": 4.8412041664123535, "learning_rate": 2.6506960222029555e-06, "loss": 0.3921, "step": 23482 }, { "epoch": 76.99344262295082, "grad_norm": 6.310454845428467, "learning_rate": 2.6499759528961e-06, "loss": 0.4734, "step": 23483 }, { "epoch": 76.99672131147541, "grad_norm": 4.540224075317383, "learning_rate": 2.6492559664684083e-06, "loss": 0.3567, "step": 23484 }, { "epoch": 77.0, "grad_norm": 6.002243995666504, "learning_rate": 2.648536062927999e-06, "loss": 0.5291, "step": 23485 }, { "epoch": 77.00327868852459, "grad_norm": 4.841890811920166, "learning_rate": 2.6478162422829845e-06, "loss": 0.3797, "step": 23486 }, { "epoch": 77.00655737704918, "grad_norm": 6.178897857666016, "learning_rate": 2.647096504541491e-06, "loss": 0.6208, "step": 23487 }, { "epoch": 77.00983606557377, "grad_norm": 4.423913478851318, "learning_rate": 2.6463768497116283e-06, "loss": 0.3789, "step": 23488 }, { "epoch": 77.01311475409837, "grad_norm": 6.263099193572998, "learning_rate": 2.6456572778015132e-06, "loss": 0.3614, "step": 23489 }, { "epoch": 77.01639344262296, "grad_norm": 5.091554164886475, "learning_rate": 2.644937788819255e-06, "loss": 0.3795, "step": 23490 }, { "epoch": 77.01967213114754, "grad_norm": 7.5370988845825195, "learning_rate": 2.644218382772974e-06, "loss": 0.4994, "step": 23491 }, { "epoch": 77.02295081967213, "grad_norm": 4.463657379150391, "learning_rate": 2.6434990596707788e-06, "loss": 0.4281, "step": 23492 }, { "epoch": 77.02622950819672, "grad_norm": 5.062969207763672, "learning_rate": 2.642779819520781e-06, "loss": 0.3975, "step": 23493 }, { "epoch": 77.02950819672131, "grad_norm": 5.832856178283691, "learning_rate": 2.6420606623310894e-06, "loss": 0.7648, "step": 23494 }, { "epoch": 77.0327868852459, "grad_norm": 6.078191757202148, "learning_rate": 2.6413415881098124e-06, "loss": 0.5063, "step": 23495 }, { "epoch": 77.03606557377049, "grad_norm": 16.25319480895996, "learning_rate": 2.6406225968650625e-06, "loss": 0.3937, "step": 23496 }, { "epoch": 77.03934426229509, "grad_norm": 4.6836981773376465, "learning_rate": 2.6399036886049443e-06, "loss": 0.2657, "step": 23497 }, { "epoch": 77.04262295081968, "grad_norm": 5.508605480194092, "learning_rate": 2.639184863337565e-06, "loss": 0.3433, "step": 23498 }, { "epoch": 77.04590163934427, "grad_norm": 4.788607120513916, "learning_rate": 2.638466121071027e-06, "loss": 0.3223, "step": 23499 }, { "epoch": 77.04918032786885, "grad_norm": 4.587882995605469, "learning_rate": 2.6377474618134403e-06, "loss": 0.4334, "step": 23500 }, { "epoch": 77.05245901639344, "grad_norm": 10.879656791687012, "learning_rate": 2.6370288855729055e-06, "loss": 0.4166, "step": 23501 }, { "epoch": 77.05573770491803, "grad_norm": 5.162012100219727, "learning_rate": 2.6363103923575263e-06, "loss": 0.4191, "step": 23502 }, { "epoch": 77.05901639344262, "grad_norm": 5.33491325378418, "learning_rate": 2.6355919821753995e-06, "loss": 0.4942, "step": 23503 }, { "epoch": 77.0622950819672, "grad_norm": 9.4776611328125, "learning_rate": 2.634873655034634e-06, "loss": 0.2315, "step": 23504 }, { "epoch": 77.06557377049181, "grad_norm": 6.7843732833862305, "learning_rate": 2.6341554109433253e-06, "loss": 0.2818, "step": 23505 }, { "epoch": 77.0688524590164, "grad_norm": 7.862310409545898, "learning_rate": 2.6334372499095706e-06, "loss": 0.3422, "step": 23506 }, { "epoch": 77.07213114754099, "grad_norm": 5.420956134796143, "learning_rate": 2.6327191719414737e-06, "loss": 0.4843, "step": 23507 }, { "epoch": 77.07540983606557, "grad_norm": 4.25651216506958, "learning_rate": 2.6320011770471267e-06, "loss": 0.312, "step": 23508 }, { "epoch": 77.07868852459016, "grad_norm": 5.640439033508301, "learning_rate": 2.6312832652346276e-06, "loss": 0.6856, "step": 23509 }, { "epoch": 77.08196721311475, "grad_norm": 5.9433979988098145, "learning_rate": 2.6305654365120694e-06, "loss": 0.3494, "step": 23510 }, { "epoch": 77.08524590163934, "grad_norm": 5.91692590713501, "learning_rate": 2.62984769088755e-06, "loss": 0.3864, "step": 23511 }, { "epoch": 77.08852459016393, "grad_norm": 4.22236967086792, "learning_rate": 2.629130028369161e-06, "loss": 0.2662, "step": 23512 }, { "epoch": 77.09180327868853, "grad_norm": 5.209020137786865, "learning_rate": 2.628412448964992e-06, "loss": 0.3187, "step": 23513 }, { "epoch": 77.09508196721312, "grad_norm": 4.385410308837891, "learning_rate": 2.6276949526831407e-06, "loss": 0.2759, "step": 23514 }, { "epoch": 77.09836065573771, "grad_norm": 4.977935791015625, "learning_rate": 2.626977539531693e-06, "loss": 0.3499, "step": 23515 }, { "epoch": 77.1016393442623, "grad_norm": 4.6906843185424805, "learning_rate": 2.626260209518737e-06, "loss": 0.5496, "step": 23516 }, { "epoch": 77.10491803278688, "grad_norm": 5.770051956176758, "learning_rate": 2.6255429626523677e-06, "loss": 0.5268, "step": 23517 }, { "epoch": 77.10819672131147, "grad_norm": 5.0356526374816895, "learning_rate": 2.624825798940668e-06, "loss": 0.4238, "step": 23518 }, { "epoch": 77.11147540983606, "grad_norm": 5.604755401611328, "learning_rate": 2.6241087183917257e-06, "loss": 0.4319, "step": 23519 }, { "epoch": 77.11475409836065, "grad_norm": 4.667105197906494, "learning_rate": 2.623391721013627e-06, "loss": 0.4857, "step": 23520 }, { "epoch": 77.11803278688525, "grad_norm": 7.085720062255859, "learning_rate": 2.6226748068144537e-06, "loss": 0.473, "step": 23521 }, { "epoch": 77.12131147540984, "grad_norm": 4.334956645965576, "learning_rate": 2.621957975802295e-06, "loss": 0.4725, "step": 23522 }, { "epoch": 77.12459016393443, "grad_norm": 8.075539588928223, "learning_rate": 2.6212412279852316e-06, "loss": 0.4419, "step": 23523 }, { "epoch": 77.12786885245902, "grad_norm": 10.950860977172852, "learning_rate": 2.620524563371345e-06, "loss": 0.5121, "step": 23524 }, { "epoch": 77.1311475409836, "grad_norm": 4.665064334869385, "learning_rate": 2.619807981968714e-06, "loss": 0.2528, "step": 23525 }, { "epoch": 77.1344262295082, "grad_norm": 5.864878177642822, "learning_rate": 2.619091483785424e-06, "loss": 0.5339, "step": 23526 }, { "epoch": 77.13770491803278, "grad_norm": 4.550796985626221, "learning_rate": 2.618375068829552e-06, "loss": 0.3756, "step": 23527 }, { "epoch": 77.14098360655737, "grad_norm": 4.998553276062012, "learning_rate": 2.6176587371091767e-06, "loss": 0.2354, "step": 23528 }, { "epoch": 77.14426229508197, "grad_norm": 4.023396015167236, "learning_rate": 2.6169424886323714e-06, "loss": 0.242, "step": 23529 }, { "epoch": 77.14754098360656, "grad_norm": 5.610110759735107, "learning_rate": 2.616226323407218e-06, "loss": 0.5632, "step": 23530 }, { "epoch": 77.15081967213115, "grad_norm": 4.916051864624023, "learning_rate": 2.615510241441791e-06, "loss": 0.3528, "step": 23531 }, { "epoch": 77.15409836065574, "grad_norm": 6.919602870941162, "learning_rate": 2.614794242744164e-06, "loss": 0.4904, "step": 23532 }, { "epoch": 77.15737704918033, "grad_norm": 7.244927406311035, "learning_rate": 2.6140783273224103e-06, "loss": 0.5101, "step": 23533 }, { "epoch": 77.16065573770491, "grad_norm": 5.10178804397583, "learning_rate": 2.6133624951846005e-06, "loss": 0.4232, "step": 23534 }, { "epoch": 77.1639344262295, "grad_norm": 4.866269111633301, "learning_rate": 2.6126467463388104e-06, "loss": 0.3371, "step": 23535 }, { "epoch": 77.1672131147541, "grad_norm": 5.815410137176514, "learning_rate": 2.611931080793111e-06, "loss": 0.3982, "step": 23536 }, { "epoch": 77.1704918032787, "grad_norm": 4.884303092956543, "learning_rate": 2.6112154985555695e-06, "loss": 0.4707, "step": 23537 }, { "epoch": 77.17377049180328, "grad_norm": 4.418791770935059, "learning_rate": 2.610499999634252e-06, "loss": 0.504, "step": 23538 }, { "epoch": 77.17704918032787, "grad_norm": 5.196794033050537, "learning_rate": 2.609784584037234e-06, "loss": 0.3781, "step": 23539 }, { "epoch": 77.18032786885246, "grad_norm": 5.467438697814941, "learning_rate": 2.609069251772578e-06, "loss": 0.3701, "step": 23540 }, { "epoch": 77.18360655737705, "grad_norm": 4.272427082061768, "learning_rate": 2.608354002848351e-06, "loss": 0.5497, "step": 23541 }, { "epoch": 77.18688524590164, "grad_norm": 4.19352912902832, "learning_rate": 2.607638837272616e-06, "loss": 0.4554, "step": 23542 }, { "epoch": 77.19016393442622, "grad_norm": 4.652568340301514, "learning_rate": 2.606923755053441e-06, "loss": 0.2847, "step": 23543 }, { "epoch": 77.19344262295083, "grad_norm": 4.350694179534912, "learning_rate": 2.606208756198888e-06, "loss": 0.2666, "step": 23544 }, { "epoch": 77.19672131147541, "grad_norm": 6.161274433135986, "learning_rate": 2.6054938407170193e-06, "loss": 0.4206, "step": 23545 }, { "epoch": 77.2, "grad_norm": 5.682356834411621, "learning_rate": 2.604779008615895e-06, "loss": 0.4342, "step": 23546 }, { "epoch": 77.20327868852459, "grad_norm": 4.780750274658203, "learning_rate": 2.604064259903574e-06, "loss": 0.3687, "step": 23547 }, { "epoch": 77.20655737704918, "grad_norm": 5.012604236602783, "learning_rate": 2.6033495945881215e-06, "loss": 0.343, "step": 23548 }, { "epoch": 77.20983606557377, "grad_norm": 4.358294486999512, "learning_rate": 2.6026350126775923e-06, "loss": 0.2942, "step": 23549 }, { "epoch": 77.21311475409836, "grad_norm": 7.6669816970825195, "learning_rate": 2.601920514180045e-06, "loss": 0.2899, "step": 23550 }, { "epoch": 77.21639344262294, "grad_norm": 5.900514602661133, "learning_rate": 2.6012060991035337e-06, "loss": 0.3852, "step": 23551 }, { "epoch": 77.21967213114755, "grad_norm": 4.685536861419678, "learning_rate": 2.600491767456118e-06, "loss": 0.3515, "step": 23552 }, { "epoch": 77.22295081967214, "grad_norm": 6.132050514221191, "learning_rate": 2.599777519245853e-06, "loss": 0.5162, "step": 23553 }, { "epoch": 77.22622950819672, "grad_norm": 5.567854404449463, "learning_rate": 2.5990633544807895e-06, "loss": 0.5128, "step": 23554 }, { "epoch": 77.22950819672131, "grad_norm": 14.109210968017578, "learning_rate": 2.5983492731689785e-06, "loss": 0.3401, "step": 23555 }, { "epoch": 77.2327868852459, "grad_norm": 5.442742347717285, "learning_rate": 2.5976352753184785e-06, "loss": 0.528, "step": 23556 }, { "epoch": 77.23606557377049, "grad_norm": 8.810429573059082, "learning_rate": 2.5969213609373377e-06, "loss": 0.5448, "step": 23557 }, { "epoch": 77.23934426229508, "grad_norm": 4.410962104797363, "learning_rate": 2.5962075300336054e-06, "loss": 0.4358, "step": 23558 }, { "epoch": 77.24262295081967, "grad_norm": 7.914183616638184, "learning_rate": 2.5954937826153293e-06, "loss": 0.426, "step": 23559 }, { "epoch": 77.24590163934427, "grad_norm": 4.499035358428955, "learning_rate": 2.5947801186905608e-06, "loss": 0.3564, "step": 23560 }, { "epoch": 77.24918032786886, "grad_norm": 5.596664905548096, "learning_rate": 2.5940665382673467e-06, "loss": 0.3821, "step": 23561 }, { "epoch": 77.25245901639344, "grad_norm": 4.821014404296875, "learning_rate": 2.59335304135373e-06, "loss": 0.2515, "step": 23562 }, { "epoch": 77.25573770491803, "grad_norm": 4.028453826904297, "learning_rate": 2.5926396279577616e-06, "loss": 0.3912, "step": 23563 }, { "epoch": 77.25901639344262, "grad_norm": 9.396744728088379, "learning_rate": 2.5919262980874837e-06, "loss": 0.3859, "step": 23564 }, { "epoch": 77.26229508196721, "grad_norm": 4.975776672363281, "learning_rate": 2.591213051750935e-06, "loss": 0.2939, "step": 23565 }, { "epoch": 77.2655737704918, "grad_norm": 5.487144947052002, "learning_rate": 2.590499888956166e-06, "loss": 0.3958, "step": 23566 }, { "epoch": 77.26885245901639, "grad_norm": 5.393196105957031, "learning_rate": 2.5897868097112143e-06, "loss": 0.5232, "step": 23567 }, { "epoch": 77.27213114754099, "grad_norm": 8.409531593322754, "learning_rate": 2.589073814024119e-06, "loss": 0.7001, "step": 23568 }, { "epoch": 77.27540983606558, "grad_norm": 4.8262410163879395, "learning_rate": 2.5883609019029244e-06, "loss": 0.1883, "step": 23569 }, { "epoch": 77.27868852459017, "grad_norm": 7.097641944885254, "learning_rate": 2.5876480733556664e-06, "loss": 0.387, "step": 23570 }, { "epoch": 77.28196721311475, "grad_norm": 6.49806022644043, "learning_rate": 2.586935328390383e-06, "loss": 0.525, "step": 23571 }, { "epoch": 77.28524590163934, "grad_norm": 5.17055606842041, "learning_rate": 2.5862226670151124e-06, "loss": 0.2815, "step": 23572 }, { "epoch": 77.28852459016393, "grad_norm": 5.557954788208008, "learning_rate": 2.585510089237886e-06, "loss": 0.49, "step": 23573 }, { "epoch": 77.29180327868852, "grad_norm": 11.061816215515137, "learning_rate": 2.584797595066746e-06, "loss": 0.3823, "step": 23574 }, { "epoch": 77.29508196721312, "grad_norm": 6.862873077392578, "learning_rate": 2.5840851845097224e-06, "loss": 0.4544, "step": 23575 }, { "epoch": 77.29836065573771, "grad_norm": 4.865683555603027, "learning_rate": 2.5833728575748497e-06, "loss": 0.4489, "step": 23576 }, { "epoch": 77.3016393442623, "grad_norm": 6.192417144775391, "learning_rate": 2.582660614270156e-06, "loss": 0.2959, "step": 23577 }, { "epoch": 77.30491803278689, "grad_norm": 7.017431259155273, "learning_rate": 2.5819484546036787e-06, "loss": 0.2635, "step": 23578 }, { "epoch": 77.30819672131148, "grad_norm": 11.044873237609863, "learning_rate": 2.5812363785834448e-06, "loss": 0.4345, "step": 23579 }, { "epoch": 77.31147540983606, "grad_norm": 5.190528392791748, "learning_rate": 2.5805243862174857e-06, "loss": 0.3499, "step": 23580 }, { "epoch": 77.31475409836065, "grad_norm": 7.200517177581787, "learning_rate": 2.5798124775138243e-06, "loss": 0.2446, "step": 23581 }, { "epoch": 77.31803278688524, "grad_norm": 5.377992630004883, "learning_rate": 2.579100652480496e-06, "loss": 0.3949, "step": 23582 }, { "epoch": 77.32131147540984, "grad_norm": 4.82747220993042, "learning_rate": 2.5783889111255234e-06, "loss": 0.5567, "step": 23583 }, { "epoch": 77.32459016393443, "grad_norm": 5.1157965660095215, "learning_rate": 2.5776772534569326e-06, "loss": 0.3017, "step": 23584 }, { "epoch": 77.32786885245902, "grad_norm": 4.735549449920654, "learning_rate": 2.576965679482748e-06, "loss": 0.34, "step": 23585 }, { "epoch": 77.33114754098361, "grad_norm": 4.35614013671875, "learning_rate": 2.5762541892109904e-06, "loss": 0.2846, "step": 23586 }, { "epoch": 77.3344262295082, "grad_norm": 6.780202865600586, "learning_rate": 2.575542782649688e-06, "loss": 0.2206, "step": 23587 }, { "epoch": 77.33770491803278, "grad_norm": 7.162524700164795, "learning_rate": 2.5748314598068613e-06, "loss": 0.7527, "step": 23588 }, { "epoch": 77.34098360655737, "grad_norm": 4.8734283447265625, "learning_rate": 2.5741202206905293e-06, "loss": 0.4143, "step": 23589 }, { "epoch": 77.34426229508196, "grad_norm": 4.856301784515381, "learning_rate": 2.5734090653087096e-06, "loss": 0.5137, "step": 23590 }, { "epoch": 77.34754098360656, "grad_norm": 4.745573043823242, "learning_rate": 2.5726979936694285e-06, "loss": 0.6401, "step": 23591 }, { "epoch": 77.35081967213115, "grad_norm": 5.079421520233154, "learning_rate": 2.5719870057806996e-06, "loss": 0.3239, "step": 23592 }, { "epoch": 77.35409836065574, "grad_norm": 4.738895416259766, "learning_rate": 2.5712761016505394e-06, "loss": 0.5273, "step": 23593 }, { "epoch": 77.35737704918033, "grad_norm": 4.173016548156738, "learning_rate": 2.570565281286963e-06, "loss": 0.3742, "step": 23594 }, { "epoch": 77.36065573770492, "grad_norm": 3.843618631362915, "learning_rate": 2.5698545446979907e-06, "loss": 0.5534, "step": 23595 }, { "epoch": 77.3639344262295, "grad_norm": 5.220210075378418, "learning_rate": 2.5691438918916332e-06, "loss": 0.4923, "step": 23596 }, { "epoch": 77.3672131147541, "grad_norm": 6.265910625457764, "learning_rate": 2.568433322875905e-06, "loss": 0.3374, "step": 23597 }, { "epoch": 77.37049180327868, "grad_norm": 5.260378837585449, "learning_rate": 2.567722837658818e-06, "loss": 0.3616, "step": 23598 }, { "epoch": 77.37377049180328, "grad_norm": 4.547726154327393, "learning_rate": 2.5670124362483805e-06, "loss": 0.3178, "step": 23599 }, { "epoch": 77.37704918032787, "grad_norm": 7.905111789703369, "learning_rate": 2.5663021186526094e-06, "loss": 0.6043, "step": 23600 }, { "epoch": 77.38032786885246, "grad_norm": 7.557506561279297, "learning_rate": 2.56559188487951e-06, "loss": 0.4454, "step": 23601 }, { "epoch": 77.38360655737705, "grad_norm": 5.0520195960998535, "learning_rate": 2.5648817349370935e-06, "loss": 0.5388, "step": 23602 }, { "epoch": 77.38688524590164, "grad_norm": 5.55763053894043, "learning_rate": 2.564171668833362e-06, "loss": 0.3474, "step": 23603 }, { "epoch": 77.39016393442623, "grad_norm": 4.354410171508789, "learning_rate": 2.5634616865763295e-06, "loss": 0.1748, "step": 23604 }, { "epoch": 77.39344262295081, "grad_norm": 4.946983814239502, "learning_rate": 2.5627517881739982e-06, "loss": 0.3107, "step": 23605 }, { "epoch": 77.3967213114754, "grad_norm": 4.618106365203857, "learning_rate": 2.5620419736343738e-06, "loss": 0.1749, "step": 23606 }, { "epoch": 77.4, "grad_norm": 4.902984142303467, "learning_rate": 2.5613322429654573e-06, "loss": 0.3482, "step": 23607 }, { "epoch": 77.4032786885246, "grad_norm": 4.586668968200684, "learning_rate": 2.560622596175256e-06, "loss": 0.287, "step": 23608 }, { "epoch": 77.40655737704918, "grad_norm": 46.95941162109375, "learning_rate": 2.5599130332717705e-06, "loss": 0.4547, "step": 23609 }, { "epoch": 77.40983606557377, "grad_norm": 4.273137092590332, "learning_rate": 2.559203554263001e-06, "loss": 0.24, "step": 23610 }, { "epoch": 77.41311475409836, "grad_norm": 4.753222465515137, "learning_rate": 2.558494159156948e-06, "loss": 0.3523, "step": 23611 }, { "epoch": 77.41639344262295, "grad_norm": 4.786372184753418, "learning_rate": 2.5577848479616074e-06, "loss": 0.6883, "step": 23612 }, { "epoch": 77.41967213114754, "grad_norm": 7.84597110748291, "learning_rate": 2.5570756206849834e-06, "loss": 0.6116, "step": 23613 }, { "epoch": 77.42295081967212, "grad_norm": 4.988706588745117, "learning_rate": 2.5563664773350706e-06, "loss": 0.1789, "step": 23614 }, { "epoch": 77.42622950819673, "grad_norm": 4.545933246612549, "learning_rate": 2.5556574179198625e-06, "loss": 0.2677, "step": 23615 }, { "epoch": 77.42950819672132, "grad_norm": 5.7125749588012695, "learning_rate": 2.55494844244736e-06, "loss": 0.3982, "step": 23616 }, { "epoch": 77.4327868852459, "grad_norm": 6.595074653625488, "learning_rate": 2.5542395509255547e-06, "loss": 0.5121, "step": 23617 }, { "epoch": 77.43606557377049, "grad_norm": 5.102627277374268, "learning_rate": 2.553530743362438e-06, "loss": 0.4882, "step": 23618 }, { "epoch": 77.43934426229508, "grad_norm": 4.254251956939697, "learning_rate": 2.5528220197660056e-06, "loss": 0.2189, "step": 23619 }, { "epoch": 77.44262295081967, "grad_norm": 5.260985851287842, "learning_rate": 2.55211338014425e-06, "loss": 0.3325, "step": 23620 }, { "epoch": 77.44590163934426, "grad_norm": 4.814409255981445, "learning_rate": 2.551404824505156e-06, "loss": 0.5129, "step": 23621 }, { "epoch": 77.44918032786886, "grad_norm": 6.409449100494385, "learning_rate": 2.5506963528567208e-06, "loss": 0.2841, "step": 23622 }, { "epoch": 77.45245901639345, "grad_norm": 4.136710166931152, "learning_rate": 2.5499879652069293e-06, "loss": 0.2864, "step": 23623 }, { "epoch": 77.45573770491804, "grad_norm": 5.9065680503845215, "learning_rate": 2.5492796615637683e-06, "loss": 0.3862, "step": 23624 }, { "epoch": 77.45901639344262, "grad_norm": 3.9676151275634766, "learning_rate": 2.548571441935225e-06, "loss": 0.2847, "step": 23625 }, { "epoch": 77.46229508196721, "grad_norm": 4.181139945983887, "learning_rate": 2.5478633063292877e-06, "loss": 0.3708, "step": 23626 }, { "epoch": 77.4655737704918, "grad_norm": 4.852865219116211, "learning_rate": 2.5471552547539403e-06, "loss": 0.2681, "step": 23627 }, { "epoch": 77.46885245901639, "grad_norm": 3.4062612056732178, "learning_rate": 2.5464472872171665e-06, "loss": 0.3838, "step": 23628 }, { "epoch": 77.47213114754098, "grad_norm": 5.650994300842285, "learning_rate": 2.545739403726947e-06, "loss": 0.6522, "step": 23629 }, { "epoch": 77.47540983606558, "grad_norm": 4.493939399719238, "learning_rate": 2.545031604291268e-06, "loss": 0.4558, "step": 23630 }, { "epoch": 77.47868852459017, "grad_norm": 5.759641647338867, "learning_rate": 2.54432388891811e-06, "loss": 0.4345, "step": 23631 }, { "epoch": 77.48196721311476, "grad_norm": 5.985074520111084, "learning_rate": 2.5436162576154513e-06, "loss": 0.4019, "step": 23632 }, { "epoch": 77.48524590163935, "grad_norm": 5.302820205688477, "learning_rate": 2.5429087103912685e-06, "loss": 0.4134, "step": 23633 }, { "epoch": 77.48852459016393, "grad_norm": 6.112170219421387, "learning_rate": 2.5422012472535474e-06, "loss": 0.5309, "step": 23634 }, { "epoch": 77.49180327868852, "grad_norm": 5.3447585105896, "learning_rate": 2.5414938682102606e-06, "loss": 0.3822, "step": 23635 }, { "epoch": 77.49508196721311, "grad_norm": 4.8941330909729, "learning_rate": 2.540786573269386e-06, "loss": 0.3315, "step": 23636 }, { "epoch": 77.4983606557377, "grad_norm": 6.289309978485107, "learning_rate": 2.5400793624388985e-06, "loss": 0.3559, "step": 23637 }, { "epoch": 77.5016393442623, "grad_norm": 4.4696807861328125, "learning_rate": 2.539372235726769e-06, "loss": 0.1771, "step": 23638 }, { "epoch": 77.50491803278689, "grad_norm": 6.373545169830322, "learning_rate": 2.538665193140979e-06, "loss": 0.4828, "step": 23639 }, { "epoch": 77.50819672131148, "grad_norm": 6.645174980163574, "learning_rate": 2.537958234689496e-06, "loss": 0.2626, "step": 23640 }, { "epoch": 77.51147540983607, "grad_norm": 5.029555320739746, "learning_rate": 2.5372513603802915e-06, "loss": 0.3251, "step": 23641 }, { "epoch": 77.51475409836065, "grad_norm": 4.690401077270508, "learning_rate": 2.5365445702213355e-06, "loss": 0.4123, "step": 23642 }, { "epoch": 77.51803278688524, "grad_norm": 5.240050792694092, "learning_rate": 2.5358378642206017e-06, "loss": 0.533, "step": 23643 }, { "epoch": 77.52131147540983, "grad_norm": 4.465474605560303, "learning_rate": 2.5351312423860574e-06, "loss": 0.338, "step": 23644 }, { "epoch": 77.52459016393442, "grad_norm": 5.699955463409424, "learning_rate": 2.5344247047256697e-06, "loss": 0.3202, "step": 23645 }, { "epoch": 77.52786885245902, "grad_norm": 5.8828840255737305, "learning_rate": 2.5337182512474025e-06, "loss": 0.3343, "step": 23646 }, { "epoch": 77.53114754098361, "grad_norm": 4.546983242034912, "learning_rate": 2.5330118819592275e-06, "loss": 0.1772, "step": 23647 }, { "epoch": 77.5344262295082, "grad_norm": 5.229435920715332, "learning_rate": 2.5323055968691077e-06, "loss": 0.222, "step": 23648 }, { "epoch": 77.53770491803279, "grad_norm": 5.152628421783447, "learning_rate": 2.531599395985007e-06, "loss": 0.3757, "step": 23649 }, { "epoch": 77.54098360655738, "grad_norm": 4.250526428222656, "learning_rate": 2.530893279314889e-06, "loss": 0.2982, "step": 23650 }, { "epoch": 77.54426229508196, "grad_norm": 5.463738441467285, "learning_rate": 2.530187246866711e-06, "loss": 0.4993, "step": 23651 }, { "epoch": 77.54754098360655, "grad_norm": 4.704633712768555, "learning_rate": 2.5294812986484416e-06, "loss": 0.2586, "step": 23652 }, { "epoch": 77.55081967213114, "grad_norm": 6.941858291625977, "learning_rate": 2.5287754346680382e-06, "loss": 0.2885, "step": 23653 }, { "epoch": 77.55409836065574, "grad_norm": 4.190433979034424, "learning_rate": 2.5280696549334593e-06, "loss": 0.2279, "step": 23654 }, { "epoch": 77.55737704918033, "grad_norm": 4.409797191619873, "learning_rate": 2.5273639594526624e-06, "loss": 0.3582, "step": 23655 }, { "epoch": 77.56065573770492, "grad_norm": 5.1880927085876465, "learning_rate": 2.5266583482336083e-06, "loss": 0.3366, "step": 23656 }, { "epoch": 77.56393442622951, "grad_norm": 5.221599578857422, "learning_rate": 2.5259528212842523e-06, "loss": 0.2736, "step": 23657 }, { "epoch": 77.5672131147541, "grad_norm": 4.087848663330078, "learning_rate": 2.5252473786125485e-06, "loss": 0.296, "step": 23658 }, { "epoch": 77.57049180327868, "grad_norm": 5.6891608238220215, "learning_rate": 2.5245420202264493e-06, "loss": 0.5199, "step": 23659 }, { "epoch": 77.57377049180327, "grad_norm": 4.898087024688721, "learning_rate": 2.5238367461339155e-06, "loss": 0.4208, "step": 23660 }, { "epoch": 77.57704918032788, "grad_norm": 4.780658721923828, "learning_rate": 2.523131556342894e-06, "loss": 0.5929, "step": 23661 }, { "epoch": 77.58032786885246, "grad_norm": 4.476500034332275, "learning_rate": 2.5224264508613395e-06, "loss": 0.4287, "step": 23662 }, { "epoch": 77.58360655737705, "grad_norm": 5.324767589569092, "learning_rate": 2.5217214296972005e-06, "loss": 0.2217, "step": 23663 }, { "epoch": 77.58688524590164, "grad_norm": 3.7572097778320312, "learning_rate": 2.5210164928584257e-06, "loss": 0.4116, "step": 23664 }, { "epoch": 77.59016393442623, "grad_norm": 4.165826320648193, "learning_rate": 2.5203116403529693e-06, "loss": 0.375, "step": 23665 }, { "epoch": 77.59344262295082, "grad_norm": 6.452171802520752, "learning_rate": 2.5196068721887756e-06, "loss": 0.6885, "step": 23666 }, { "epoch": 77.5967213114754, "grad_norm": 4.835371971130371, "learning_rate": 2.5189021883737885e-06, "loss": 0.4432, "step": 23667 }, { "epoch": 77.6, "grad_norm": 4.663578033447266, "learning_rate": 2.5181975889159615e-06, "loss": 0.1678, "step": 23668 }, { "epoch": 77.6032786885246, "grad_norm": 7.2893266677856445, "learning_rate": 2.517493073823235e-06, "loss": 0.4908, "step": 23669 }, { "epoch": 77.60655737704919, "grad_norm": 4.6479620933532715, "learning_rate": 2.5167886431035517e-06, "loss": 0.2995, "step": 23670 }, { "epoch": 77.60983606557377, "grad_norm": 5.3757853507995605, "learning_rate": 2.5160842967648603e-06, "loss": 0.3377, "step": 23671 }, { "epoch": 77.61311475409836, "grad_norm": 4.7533721923828125, "learning_rate": 2.5153800348150993e-06, "loss": 0.1736, "step": 23672 }, { "epoch": 77.61639344262295, "grad_norm": 8.212742805480957, "learning_rate": 2.51467585726221e-06, "loss": 0.3274, "step": 23673 }, { "epoch": 77.61967213114754, "grad_norm": 11.965421676635742, "learning_rate": 2.5139717641141305e-06, "loss": 0.3635, "step": 23674 }, { "epoch": 77.62295081967213, "grad_norm": 5.23230504989624, "learning_rate": 2.5132677553788064e-06, "loss": 0.3518, "step": 23675 }, { "epoch": 77.62622950819672, "grad_norm": 4.708977699279785, "learning_rate": 2.512563831064172e-06, "loss": 0.3413, "step": 23676 }, { "epoch": 77.62950819672132, "grad_norm": 6.04608154296875, "learning_rate": 2.5118599911781626e-06, "loss": 0.4485, "step": 23677 }, { "epoch": 77.6327868852459, "grad_norm": 9.068734169006348, "learning_rate": 2.511156235728721e-06, "loss": 0.4483, "step": 23678 }, { "epoch": 77.6360655737705, "grad_norm": 4.7602386474609375, "learning_rate": 2.510452564723779e-06, "loss": 0.3471, "step": 23679 }, { "epoch": 77.63934426229508, "grad_norm": 4.850884914398193, "learning_rate": 2.5097489781712704e-06, "loss": 0.4608, "step": 23680 }, { "epoch": 77.64262295081967, "grad_norm": 4.943813323974609, "learning_rate": 2.5090454760791284e-06, "loss": 0.347, "step": 23681 }, { "epoch": 77.64590163934426, "grad_norm": 4.886690616607666, "learning_rate": 2.5083420584552896e-06, "loss": 0.4282, "step": 23682 }, { "epoch": 77.64918032786885, "grad_norm": 4.250556945800781, "learning_rate": 2.507638725307684e-06, "loss": 0.3909, "step": 23683 }, { "epoch": 77.65245901639344, "grad_norm": 6.930880069732666, "learning_rate": 2.506935476644241e-06, "loss": 0.4573, "step": 23684 }, { "epoch": 77.65573770491804, "grad_norm": 4.882794380187988, "learning_rate": 2.5062323124728893e-06, "loss": 0.487, "step": 23685 }, { "epoch": 77.65901639344263, "grad_norm": 5.838294506072998, "learning_rate": 2.5055292328015622e-06, "loss": 0.5662, "step": 23686 }, { "epoch": 77.66229508196722, "grad_norm": 7.98432731628418, "learning_rate": 2.504826237638186e-06, "loss": 0.3321, "step": 23687 }, { "epoch": 77.6655737704918, "grad_norm": 7.759398937225342, "learning_rate": 2.5041233269906863e-06, "loss": 0.4536, "step": 23688 }, { "epoch": 77.66885245901639, "grad_norm": 4.365344047546387, "learning_rate": 2.5034205008669898e-06, "loss": 0.3691, "step": 23689 }, { "epoch": 77.67213114754098, "grad_norm": 6.672658443450928, "learning_rate": 2.5027177592750184e-06, "loss": 0.4742, "step": 23690 }, { "epoch": 77.67540983606557, "grad_norm": 3.513577461242676, "learning_rate": 2.5020151022227035e-06, "loss": 0.5343, "step": 23691 }, { "epoch": 77.67868852459016, "grad_norm": 7.68122673034668, "learning_rate": 2.5013125297179643e-06, "loss": 0.4445, "step": 23692 }, { "epoch": 77.68196721311476, "grad_norm": 4.449234962463379, "learning_rate": 2.500610041768722e-06, "loss": 0.3535, "step": 23693 }, { "epoch": 77.68524590163935, "grad_norm": 14.566904067993164, "learning_rate": 2.4999076383828957e-06, "loss": 0.641, "step": 23694 }, { "epoch": 77.68852459016394, "grad_norm": 4.490871429443359, "learning_rate": 2.499205319568413e-06, "loss": 0.4396, "step": 23695 }, { "epoch": 77.69180327868852, "grad_norm": 5.1785688400268555, "learning_rate": 2.498503085333188e-06, "loss": 0.3785, "step": 23696 }, { "epoch": 77.69508196721311, "grad_norm": 6.671586513519287, "learning_rate": 2.497800935685142e-06, "loss": 0.1749, "step": 23697 }, { "epoch": 77.6983606557377, "grad_norm": 11.736953735351562, "learning_rate": 2.4970988706321866e-06, "loss": 0.4332, "step": 23698 }, { "epoch": 77.70163934426229, "grad_norm": 5.337625980377197, "learning_rate": 2.4963968901822454e-06, "loss": 0.3016, "step": 23699 }, { "epoch": 77.70491803278688, "grad_norm": 3.9307501316070557, "learning_rate": 2.4956949943432317e-06, "loss": 0.2145, "step": 23700 }, { "epoch": 77.70819672131148, "grad_norm": 5.529024600982666, "learning_rate": 2.494993183123059e-06, "loss": 0.1735, "step": 23701 }, { "epoch": 77.71147540983607, "grad_norm": 6.802903652191162, "learning_rate": 2.4942914565296416e-06, "loss": 0.2689, "step": 23702 }, { "epoch": 77.71475409836066, "grad_norm": 19.754371643066406, "learning_rate": 2.493589814570889e-06, "loss": 0.3254, "step": 23703 }, { "epoch": 77.71803278688525, "grad_norm": 5.6591339111328125, "learning_rate": 2.4928882572547184e-06, "loss": 0.2914, "step": 23704 }, { "epoch": 77.72131147540983, "grad_norm": 4.833819389343262, "learning_rate": 2.492186784589039e-06, "loss": 0.4223, "step": 23705 }, { "epoch": 77.72459016393442, "grad_norm": 4.881367206573486, "learning_rate": 2.4914853965817598e-06, "loss": 0.3697, "step": 23706 }, { "epoch": 77.72786885245901, "grad_norm": 4.676830291748047, "learning_rate": 2.4907840932407866e-06, "loss": 0.1748, "step": 23707 }, { "epoch": 77.73114754098361, "grad_norm": 5.374578952789307, "learning_rate": 2.4900828745740326e-06, "loss": 0.303, "step": 23708 }, { "epoch": 77.7344262295082, "grad_norm": 5.921907424926758, "learning_rate": 2.489381740589403e-06, "loss": 0.4404, "step": 23709 }, { "epoch": 77.73770491803279, "grad_norm": 4.589046478271484, "learning_rate": 2.4886806912948034e-06, "loss": 0.4285, "step": 23710 }, { "epoch": 77.74098360655738, "grad_norm": 15.766498565673828, "learning_rate": 2.4879797266981352e-06, "loss": 0.3587, "step": 23711 }, { "epoch": 77.74426229508197, "grad_norm": 3.801048517227173, "learning_rate": 2.4872788468073105e-06, "loss": 0.3561, "step": 23712 }, { "epoch": 77.74754098360656, "grad_norm": 4.647673606872559, "learning_rate": 2.4865780516302274e-06, "loss": 0.3293, "step": 23713 }, { "epoch": 77.75081967213114, "grad_norm": 6.169366359710693, "learning_rate": 2.4858773411747883e-06, "loss": 0.2668, "step": 23714 }, { "epoch": 77.75409836065573, "grad_norm": 5.850332736968994, "learning_rate": 2.4851767154488947e-06, "loss": 0.5611, "step": 23715 }, { "epoch": 77.75737704918033, "grad_norm": 5.216888427734375, "learning_rate": 2.4844761744604453e-06, "loss": 0.4073, "step": 23716 }, { "epoch": 77.76065573770492, "grad_norm": 5.538045883178711, "learning_rate": 2.483775718217344e-06, "loss": 0.4162, "step": 23717 }, { "epoch": 77.76393442622951, "grad_norm": 17.39954376220703, "learning_rate": 2.483075346727486e-06, "loss": 0.4517, "step": 23718 }, { "epoch": 77.7672131147541, "grad_norm": 6.014374732971191, "learning_rate": 2.4823750599987683e-06, "loss": 0.4214, "step": 23719 }, { "epoch": 77.77049180327869, "grad_norm": 4.155464172363281, "learning_rate": 2.481674858039086e-06, "loss": 0.4395, "step": 23720 }, { "epoch": 77.77377049180328, "grad_norm": 5.806430816650391, "learning_rate": 2.48097474085634e-06, "loss": 0.5446, "step": 23721 }, { "epoch": 77.77704918032786, "grad_norm": 7.995282173156738, "learning_rate": 2.480274708458421e-06, "loss": 0.2383, "step": 23722 }, { "epoch": 77.78032786885245, "grad_norm": 4.700824737548828, "learning_rate": 2.4795747608532204e-06, "loss": 0.2305, "step": 23723 }, { "epoch": 77.78360655737706, "grad_norm": 4.636143207550049, "learning_rate": 2.478874898048638e-06, "loss": 0.2741, "step": 23724 }, { "epoch": 77.78688524590164, "grad_norm": 4.197860240936279, "learning_rate": 2.4781751200525595e-06, "loss": 0.4761, "step": 23725 }, { "epoch": 77.79016393442623, "grad_norm": 5.587925434112549, "learning_rate": 2.477475426872874e-06, "loss": 0.3517, "step": 23726 }, { "epoch": 77.79344262295082, "grad_norm": 6.242404937744141, "learning_rate": 2.4767758185174786e-06, "loss": 0.3677, "step": 23727 }, { "epoch": 77.79672131147541, "grad_norm": 5.015244483947754, "learning_rate": 2.4760762949942575e-06, "loss": 0.3719, "step": 23728 }, { "epoch": 77.8, "grad_norm": 7.801630973815918, "learning_rate": 2.475376856311097e-06, "loss": 0.3598, "step": 23729 }, { "epoch": 77.80327868852459, "grad_norm": 5.723588943481445, "learning_rate": 2.4746775024758874e-06, "loss": 0.329, "step": 23730 }, { "epoch": 77.80655737704917, "grad_norm": 4.496324062347412, "learning_rate": 2.4739782334965147e-06, "loss": 0.309, "step": 23731 }, { "epoch": 77.80983606557378, "grad_norm": 13.57848834991455, "learning_rate": 2.4732790493808622e-06, "loss": 0.3637, "step": 23732 }, { "epoch": 77.81311475409836, "grad_norm": 4.773297309875488, "learning_rate": 2.472579950136811e-06, "loss": 0.4458, "step": 23733 }, { "epoch": 77.81639344262295, "grad_norm": 5.0412702560424805, "learning_rate": 2.4718809357722505e-06, "loss": 0.4847, "step": 23734 }, { "epoch": 77.81967213114754, "grad_norm": 5.065713882446289, "learning_rate": 2.4711820062950596e-06, "loss": 0.5937, "step": 23735 }, { "epoch": 77.82295081967213, "grad_norm": 4.6749444007873535, "learning_rate": 2.4704831617131197e-06, "loss": 0.3973, "step": 23736 }, { "epoch": 77.82622950819672, "grad_norm": 6.099523544311523, "learning_rate": 2.4697844020343087e-06, "loss": 0.3891, "step": 23737 }, { "epoch": 77.8295081967213, "grad_norm": 6.1849212646484375, "learning_rate": 2.4690857272665102e-06, "loss": 0.4982, "step": 23738 }, { "epoch": 77.8327868852459, "grad_norm": 5.225600242614746, "learning_rate": 2.4683871374176006e-06, "loss": 0.3494, "step": 23739 }, { "epoch": 77.8360655737705, "grad_norm": 7.337457656860352, "learning_rate": 2.467688632495456e-06, "loss": 0.4089, "step": 23740 }, { "epoch": 77.83934426229509, "grad_norm": 4.114261627197266, "learning_rate": 2.466990212507955e-06, "loss": 0.3205, "step": 23741 }, { "epoch": 77.84262295081967, "grad_norm": 5.006916522979736, "learning_rate": 2.4662918774629675e-06, "loss": 0.3625, "step": 23742 }, { "epoch": 77.84590163934426, "grad_norm": 4.682135581970215, "learning_rate": 2.4655936273683768e-06, "loss": 0.4186, "step": 23743 }, { "epoch": 77.84918032786885, "grad_norm": 3.8572704792022705, "learning_rate": 2.46489546223205e-06, "loss": 0.3657, "step": 23744 }, { "epoch": 77.85245901639344, "grad_norm": 4.521607875823975, "learning_rate": 2.4641973820618627e-06, "loss": 0.2185, "step": 23745 }, { "epoch": 77.85573770491803, "grad_norm": 4.077601432800293, "learning_rate": 2.463499386865681e-06, "loss": 0.302, "step": 23746 }, { "epoch": 77.85901639344263, "grad_norm": 4.579373359680176, "learning_rate": 2.4628014766513842e-06, "loss": 0.4084, "step": 23747 }, { "epoch": 77.86229508196722, "grad_norm": 4.797201156616211, "learning_rate": 2.462103651426836e-06, "loss": 0.4426, "step": 23748 }, { "epoch": 77.8655737704918, "grad_norm": 5.911365985870361, "learning_rate": 2.4614059111999076e-06, "loss": 0.5489, "step": 23749 }, { "epoch": 77.8688524590164, "grad_norm": 11.687989234924316, "learning_rate": 2.4607082559784624e-06, "loss": 0.4837, "step": 23750 }, { "epoch": 77.87213114754098, "grad_norm": 4.607108116149902, "learning_rate": 2.4600106857703734e-06, "loss": 0.2563, "step": 23751 }, { "epoch": 77.87540983606557, "grad_norm": 5.914118766784668, "learning_rate": 2.459313200583504e-06, "loss": 0.2968, "step": 23752 }, { "epoch": 77.87868852459016, "grad_norm": 4.349608898162842, "learning_rate": 2.4586158004257177e-06, "loss": 0.4364, "step": 23753 }, { "epoch": 77.88196721311475, "grad_norm": 4.560971736907959, "learning_rate": 2.45791848530488e-06, "loss": 0.4091, "step": 23754 }, { "epoch": 77.88524590163935, "grad_norm": 6.296112060546875, "learning_rate": 2.4572212552288497e-06, "loss": 0.4284, "step": 23755 }, { "epoch": 77.88852459016394, "grad_norm": 5.546994209289551, "learning_rate": 2.456524110205496e-06, "loss": 0.4494, "step": 23756 }, { "epoch": 77.89180327868853, "grad_norm": 5.141977787017822, "learning_rate": 2.455827050242676e-06, "loss": 0.437, "step": 23757 }, { "epoch": 77.89508196721312, "grad_norm": 5.059399604797363, "learning_rate": 2.45513007534825e-06, "loss": 0.31, "step": 23758 }, { "epoch": 77.8983606557377, "grad_norm": 5.875983715057373, "learning_rate": 2.4544331855300742e-06, "loss": 0.2708, "step": 23759 }, { "epoch": 77.90163934426229, "grad_norm": 5.115137577056885, "learning_rate": 2.4537363807960125e-06, "loss": 0.3635, "step": 23760 }, { "epoch": 77.90491803278688, "grad_norm": 4.661309242248535, "learning_rate": 2.453039661153919e-06, "loss": 0.2068, "step": 23761 }, { "epoch": 77.90819672131147, "grad_norm": 4.695418357849121, "learning_rate": 2.45234302661165e-06, "loss": 0.3817, "step": 23762 }, { "epoch": 77.91147540983607, "grad_norm": 5.8175225257873535, "learning_rate": 2.4516464771770577e-06, "loss": 0.379, "step": 23763 }, { "epoch": 77.91475409836066, "grad_norm": 5.761814594268799, "learning_rate": 2.450950012858003e-06, "loss": 0.3656, "step": 23764 }, { "epoch": 77.91803278688525, "grad_norm": 5.699869155883789, "learning_rate": 2.4502536336623373e-06, "loss": 0.2826, "step": 23765 }, { "epoch": 77.92131147540984, "grad_norm": 4.828433036804199, "learning_rate": 2.44955733959791e-06, "loss": 0.3396, "step": 23766 }, { "epoch": 77.92459016393443, "grad_norm": 4.250597953796387, "learning_rate": 2.4488611306725753e-06, "loss": 0.4023, "step": 23767 }, { "epoch": 77.92786885245901, "grad_norm": 4.862955570220947, "learning_rate": 2.448165006894179e-06, "loss": 0.4489, "step": 23768 }, { "epoch": 77.9311475409836, "grad_norm": 4.273762226104736, "learning_rate": 2.4474689682705766e-06, "loss": 0.2376, "step": 23769 }, { "epoch": 77.93442622950819, "grad_norm": 7.557901859283447, "learning_rate": 2.4467730148096146e-06, "loss": 0.5163, "step": 23770 }, { "epoch": 77.9377049180328, "grad_norm": 5.7648820877075195, "learning_rate": 2.4460771465191415e-06, "loss": 0.5428, "step": 23771 }, { "epoch": 77.94098360655738, "grad_norm": 7.72921085357666, "learning_rate": 2.445381363406998e-06, "loss": 0.5452, "step": 23772 }, { "epoch": 77.94426229508197, "grad_norm": 4.0838446617126465, "learning_rate": 2.444685665481038e-06, "loss": 0.3636, "step": 23773 }, { "epoch": 77.94754098360656, "grad_norm": 4.295899391174316, "learning_rate": 2.4439900527491034e-06, "loss": 0.5014, "step": 23774 }, { "epoch": 77.95081967213115, "grad_norm": 4.531343460083008, "learning_rate": 2.4432945252190367e-06, "loss": 0.2362, "step": 23775 }, { "epoch": 77.95409836065573, "grad_norm": 4.7267656326293945, "learning_rate": 2.442599082898678e-06, "loss": 0.4042, "step": 23776 }, { "epoch": 77.95737704918032, "grad_norm": 7.031148910522461, "learning_rate": 2.4419037257958757e-06, "loss": 0.4428, "step": 23777 }, { "epoch": 77.96065573770491, "grad_norm": 7.024040222167969, "learning_rate": 2.4412084539184667e-06, "loss": 0.4323, "step": 23778 }, { "epoch": 77.96393442622951, "grad_norm": 5.5827226638793945, "learning_rate": 2.4405132672742893e-06, "loss": 0.3725, "step": 23779 }, { "epoch": 77.9672131147541, "grad_norm": 5.28797721862793, "learning_rate": 2.4398181658711874e-06, "loss": 0.509, "step": 23780 }, { "epoch": 77.97049180327869, "grad_norm": 4.432661056518555, "learning_rate": 2.4391231497169965e-06, "loss": 0.3126, "step": 23781 }, { "epoch": 77.97377049180328, "grad_norm": 4.90221643447876, "learning_rate": 2.438428218819551e-06, "loss": 0.2996, "step": 23782 }, { "epoch": 77.97704918032787, "grad_norm": 5.4945268630981445, "learning_rate": 2.4377333731866914e-06, "loss": 0.5937, "step": 23783 }, { "epoch": 77.98032786885246, "grad_norm": 5.310599327087402, "learning_rate": 2.4370386128262514e-06, "loss": 0.435, "step": 23784 }, { "epoch": 77.98360655737704, "grad_norm": 5.342100143432617, "learning_rate": 2.4363439377460606e-06, "loss": 0.4131, "step": 23785 }, { "epoch": 77.98688524590163, "grad_norm": 17.6735782623291, "learning_rate": 2.43564934795396e-06, "loss": 0.2948, "step": 23786 }, { "epoch": 77.99016393442623, "grad_norm": 6.26810359954834, "learning_rate": 2.4349548434577774e-06, "loss": 0.431, "step": 23787 }, { "epoch": 77.99344262295082, "grad_norm": 4.553461074829102, "learning_rate": 2.4342604242653455e-06, "loss": 0.3222, "step": 23788 }, { "epoch": 77.99672131147541, "grad_norm": 4.95601224899292, "learning_rate": 2.4335660903844893e-06, "loss": 0.4894, "step": 23789 }, { "epoch": 78.0, "grad_norm": 4.640713214874268, "learning_rate": 2.432871841823047e-06, "loss": 0.3717, "step": 23790 }, { "epoch": 78.00327868852459, "grad_norm": 6.163998603820801, "learning_rate": 2.432177678588842e-06, "loss": 0.2813, "step": 23791 }, { "epoch": 78.00655737704918, "grad_norm": 4.529033660888672, "learning_rate": 2.4314836006897027e-06, "loss": 0.5622, "step": 23792 }, { "epoch": 78.00983606557377, "grad_norm": 4.275805950164795, "learning_rate": 2.4307896081334535e-06, "loss": 0.3776, "step": 23793 }, { "epoch": 78.01311475409837, "grad_norm": 5.379958152770996, "learning_rate": 2.4300957009279203e-06, "loss": 0.3695, "step": 23794 }, { "epoch": 78.01639344262296, "grad_norm": 6.4536662101745605, "learning_rate": 2.4294018790809303e-06, "loss": 0.6257, "step": 23795 }, { "epoch": 78.01967213114754, "grad_norm": 5.087362766265869, "learning_rate": 2.428708142600307e-06, "loss": 0.4258, "step": 23796 }, { "epoch": 78.02295081967213, "grad_norm": 5.607211589813232, "learning_rate": 2.4280144914938706e-06, "loss": 0.5101, "step": 23797 }, { "epoch": 78.02622950819672, "grad_norm": 4.401529788970947, "learning_rate": 2.427320925769441e-06, "loss": 0.2843, "step": 23798 }, { "epoch": 78.02950819672131, "grad_norm": 5.6155853271484375, "learning_rate": 2.426627445434845e-06, "loss": 0.2801, "step": 23799 }, { "epoch": 78.0327868852459, "grad_norm": 4.443413257598877, "learning_rate": 2.425934050497898e-06, "loss": 0.5357, "step": 23800 }, { "epoch": 78.03606557377049, "grad_norm": 5.31117057800293, "learning_rate": 2.42524074096642e-06, "loss": 0.4488, "step": 23801 }, { "epoch": 78.03934426229509, "grad_norm": 5.515144348144531, "learning_rate": 2.424547516848226e-06, "loss": 0.5341, "step": 23802 }, { "epoch": 78.04262295081968, "grad_norm": 6.249584197998047, "learning_rate": 2.423854378151137e-06, "loss": 0.5567, "step": 23803 }, { "epoch": 78.04590163934427, "grad_norm": 7.665732383728027, "learning_rate": 2.4231613248829677e-06, "loss": 0.3613, "step": 23804 }, { "epoch": 78.04918032786885, "grad_norm": 5.777039527893066, "learning_rate": 2.4224683570515326e-06, "loss": 0.3489, "step": 23805 }, { "epoch": 78.05245901639344, "grad_norm": 4.2791643142700195, "learning_rate": 2.4217754746646447e-06, "loss": 0.4189, "step": 23806 }, { "epoch": 78.05573770491803, "grad_norm": 5.124179840087891, "learning_rate": 2.4210826777301154e-06, "loss": 0.4808, "step": 23807 }, { "epoch": 78.05901639344262, "grad_norm": 5.12864351272583, "learning_rate": 2.420389966255763e-06, "loss": 0.3138, "step": 23808 }, { "epoch": 78.0622950819672, "grad_norm": 7.0487141609191895, "learning_rate": 2.4196973402493927e-06, "loss": 0.4791, "step": 23809 }, { "epoch": 78.06557377049181, "grad_norm": 6.312417984008789, "learning_rate": 2.419004799718817e-06, "loss": 0.3354, "step": 23810 }, { "epoch": 78.0688524590164, "grad_norm": 5.4391703605651855, "learning_rate": 2.4183123446718425e-06, "loss": 0.5431, "step": 23811 }, { "epoch": 78.07213114754099, "grad_norm": 5.1657304763793945, "learning_rate": 2.417619975116281e-06, "loss": 0.4271, "step": 23812 }, { "epoch": 78.07540983606557, "grad_norm": 6.040197372436523, "learning_rate": 2.416927691059938e-06, "loss": 0.3787, "step": 23813 }, { "epoch": 78.07868852459016, "grad_norm": 6.7404985427856445, "learning_rate": 2.4162354925106214e-06, "loss": 0.4583, "step": 23814 }, { "epoch": 78.08196721311475, "grad_norm": 4.718793869018555, "learning_rate": 2.4155433794761294e-06, "loss": 0.3973, "step": 23815 }, { "epoch": 78.08524590163934, "grad_norm": 5.144002437591553, "learning_rate": 2.4148513519642756e-06, "loss": 0.277, "step": 23816 }, { "epoch": 78.08852459016393, "grad_norm": 4.285428047180176, "learning_rate": 2.4141594099828603e-06, "loss": 0.3798, "step": 23817 }, { "epoch": 78.09180327868853, "grad_norm": 5.330895900726318, "learning_rate": 2.4134675535396847e-06, "loss": 0.3565, "step": 23818 }, { "epoch": 78.09508196721312, "grad_norm": 4.537327766418457, "learning_rate": 2.412775782642549e-06, "loss": 0.1491, "step": 23819 }, { "epoch": 78.09836065573771, "grad_norm": 5.3122687339782715, "learning_rate": 2.4120840972992533e-06, "loss": 0.2977, "step": 23820 }, { "epoch": 78.1016393442623, "grad_norm": 10.940459251403809, "learning_rate": 2.4113924975176016e-06, "loss": 0.7104, "step": 23821 }, { "epoch": 78.10491803278688, "grad_norm": 11.613120079040527, "learning_rate": 2.4107009833053896e-06, "loss": 0.3632, "step": 23822 }, { "epoch": 78.10819672131147, "grad_norm": 4.641307830810547, "learning_rate": 2.410009554670415e-06, "loss": 0.5648, "step": 23823 }, { "epoch": 78.11147540983606, "grad_norm": 6.047952175140381, "learning_rate": 2.4093182116204716e-06, "loss": 0.3527, "step": 23824 }, { "epoch": 78.11475409836065, "grad_norm": 5.285834312438965, "learning_rate": 2.40862695416336e-06, "loss": 0.5716, "step": 23825 }, { "epoch": 78.11803278688525, "grad_norm": 5.331604957580566, "learning_rate": 2.4079357823068727e-06, "loss": 0.1952, "step": 23826 }, { "epoch": 78.12131147540984, "grad_norm": 4.481794357299805, "learning_rate": 2.4072446960588035e-06, "loss": 0.2546, "step": 23827 }, { "epoch": 78.12459016393443, "grad_norm": 4.892654895782471, "learning_rate": 2.406553695426942e-06, "loss": 0.4828, "step": 23828 }, { "epoch": 78.12786885245902, "grad_norm": 8.858437538146973, "learning_rate": 2.405862780419085e-06, "loss": 0.274, "step": 23829 }, { "epoch": 78.1311475409836, "grad_norm": 5.092231750488281, "learning_rate": 2.4051719510430215e-06, "loss": 0.2651, "step": 23830 }, { "epoch": 78.1344262295082, "grad_norm": 10.415271759033203, "learning_rate": 2.4044812073065384e-06, "loss": 0.5111, "step": 23831 }, { "epoch": 78.13770491803278, "grad_norm": 8.107075691223145, "learning_rate": 2.4037905492174296e-06, "loss": 0.5938, "step": 23832 }, { "epoch": 78.14098360655737, "grad_norm": 5.3233513832092285, "learning_rate": 2.4030999767834807e-06, "loss": 0.2676, "step": 23833 }, { "epoch": 78.14426229508197, "grad_norm": 5.335149765014648, "learning_rate": 2.402409490012475e-06, "loss": 0.31, "step": 23834 }, { "epoch": 78.14754098360656, "grad_norm": 5.515214443206787, "learning_rate": 2.4017190889122045e-06, "loss": 0.3612, "step": 23835 }, { "epoch": 78.15081967213115, "grad_norm": 6.2909369468688965, "learning_rate": 2.4010287734904525e-06, "loss": 0.2964, "step": 23836 }, { "epoch": 78.15409836065574, "grad_norm": 5.8201093673706055, "learning_rate": 2.400338543755001e-06, "loss": 0.4401, "step": 23837 }, { "epoch": 78.15737704918033, "grad_norm": 15.306061744689941, "learning_rate": 2.3996483997136322e-06, "loss": 0.3156, "step": 23838 }, { "epoch": 78.16065573770491, "grad_norm": 4.123435974121094, "learning_rate": 2.3989583413741325e-06, "loss": 0.4606, "step": 23839 }, { "epoch": 78.1639344262295, "grad_norm": 5.074246406555176, "learning_rate": 2.39826836874428e-06, "loss": 0.4437, "step": 23840 }, { "epoch": 78.1672131147541, "grad_norm": 5.096865177154541, "learning_rate": 2.3975784818318536e-06, "loss": 0.5398, "step": 23841 }, { "epoch": 78.1704918032787, "grad_norm": 3.7042133808135986, "learning_rate": 2.396888680644637e-06, "loss": 0.1848, "step": 23842 }, { "epoch": 78.17377049180328, "grad_norm": 5.059101581573486, "learning_rate": 2.396198965190405e-06, "loss": 0.4449, "step": 23843 }, { "epoch": 78.17704918032787, "grad_norm": 5.351049423217773, "learning_rate": 2.3955093354769366e-06, "loss": 0.216, "step": 23844 }, { "epoch": 78.18032786885246, "grad_norm": 4.45170259475708, "learning_rate": 2.3948197915120063e-06, "loss": 0.3113, "step": 23845 }, { "epoch": 78.18360655737705, "grad_norm": 4.723963260650635, "learning_rate": 2.394130333303387e-06, "loss": 0.3396, "step": 23846 }, { "epoch": 78.18688524590164, "grad_norm": 5.668385982513428, "learning_rate": 2.3934409608588607e-06, "loss": 0.2357, "step": 23847 }, { "epoch": 78.19016393442622, "grad_norm": 3.7719826698303223, "learning_rate": 2.3927516741861956e-06, "loss": 0.1644, "step": 23848 }, { "epoch": 78.19344262295083, "grad_norm": 5.368808269500732, "learning_rate": 2.3920624732931654e-06, "loss": 0.3177, "step": 23849 }, { "epoch": 78.19672131147541, "grad_norm": 5.062428951263428, "learning_rate": 2.3913733581875376e-06, "loss": 0.3245, "step": 23850 }, { "epoch": 78.2, "grad_norm": 4.919303894042969, "learning_rate": 2.390684328877089e-06, "loss": 0.4668, "step": 23851 }, { "epoch": 78.20327868852459, "grad_norm": 11.417786598205566, "learning_rate": 2.389995385369587e-06, "loss": 0.5093, "step": 23852 }, { "epoch": 78.20655737704918, "grad_norm": 5.031462669372559, "learning_rate": 2.3893065276727976e-06, "loss": 0.4452, "step": 23853 }, { "epoch": 78.20983606557377, "grad_norm": 4.635671138763428, "learning_rate": 2.388617755794489e-06, "loss": 0.309, "step": 23854 }, { "epoch": 78.21311475409836, "grad_norm": 4.637706756591797, "learning_rate": 2.387929069742432e-06, "loss": 0.2881, "step": 23855 }, { "epoch": 78.21639344262294, "grad_norm": 7.230980396270752, "learning_rate": 2.3872404695243887e-06, "loss": 0.5378, "step": 23856 }, { "epoch": 78.21967213114755, "grad_norm": 12.251198768615723, "learning_rate": 2.3865519551481254e-06, "loss": 0.4048, "step": 23857 }, { "epoch": 78.22295081967214, "grad_norm": 5.091028213500977, "learning_rate": 2.3858635266214036e-06, "loss": 0.2893, "step": 23858 }, { "epoch": 78.22622950819672, "grad_norm": 5.478611946105957, "learning_rate": 2.3851751839519853e-06, "loss": 0.3546, "step": 23859 }, { "epoch": 78.22950819672131, "grad_norm": 5.537567138671875, "learning_rate": 2.384486927147637e-06, "loss": 0.3461, "step": 23860 }, { "epoch": 78.2327868852459, "grad_norm": 4.432549476623535, "learning_rate": 2.383798756216116e-06, "loss": 0.2695, "step": 23861 }, { "epoch": 78.23606557377049, "grad_norm": 113.57809448242188, "learning_rate": 2.383110671165183e-06, "loss": 0.3226, "step": 23862 }, { "epoch": 78.23934426229508, "grad_norm": 7.4448370933532715, "learning_rate": 2.3824226720025944e-06, "loss": 0.3001, "step": 23863 }, { "epoch": 78.24262295081967, "grad_norm": 4.384666442871094, "learning_rate": 2.381734758736113e-06, "loss": 0.4002, "step": 23864 }, { "epoch": 78.24590163934427, "grad_norm": 4.4176554679870605, "learning_rate": 2.381046931373493e-06, "loss": 0.4941, "step": 23865 }, { "epoch": 78.24918032786886, "grad_norm": 4.237224578857422, "learning_rate": 2.380359189922491e-06, "loss": 0.193, "step": 23866 }, { "epoch": 78.25245901639344, "grad_norm": 4.9775776863098145, "learning_rate": 2.379671534390862e-06, "loss": 0.6238, "step": 23867 }, { "epoch": 78.25573770491803, "grad_norm": 6.081577301025391, "learning_rate": 2.3789839647863556e-06, "loss": 0.3486, "step": 23868 }, { "epoch": 78.25901639344262, "grad_norm": 4.50548791885376, "learning_rate": 2.378296481116733e-06, "loss": 0.4656, "step": 23869 }, { "epoch": 78.26229508196721, "grad_norm": 4.716054439544678, "learning_rate": 2.3776090833897414e-06, "loss": 0.4231, "step": 23870 }, { "epoch": 78.2655737704918, "grad_norm": 5.191009998321533, "learning_rate": 2.3769217716131332e-06, "loss": 0.3188, "step": 23871 }, { "epoch": 78.26885245901639, "grad_norm": 4.000978469848633, "learning_rate": 2.3762345457946546e-06, "loss": 0.3722, "step": 23872 }, { "epoch": 78.27213114754099, "grad_norm": 4.858132362365723, "learning_rate": 2.3755474059420614e-06, "loss": 0.3587, "step": 23873 }, { "epoch": 78.27540983606558, "grad_norm": 4.932206153869629, "learning_rate": 2.3748603520630977e-06, "loss": 0.301, "step": 23874 }, { "epoch": 78.27868852459017, "grad_norm": 4.601912975311279, "learning_rate": 2.3741733841655124e-06, "loss": 0.2681, "step": 23875 }, { "epoch": 78.28196721311475, "grad_norm": 4.5645222663879395, "learning_rate": 2.3734865022570484e-06, "loss": 0.2574, "step": 23876 }, { "epoch": 78.28524590163934, "grad_norm": 5.193482398986816, "learning_rate": 2.372799706345457e-06, "loss": 0.4329, "step": 23877 }, { "epoch": 78.28852459016393, "grad_norm": 5.634138107299805, "learning_rate": 2.3721129964384783e-06, "loss": 0.4124, "step": 23878 }, { "epoch": 78.29180327868852, "grad_norm": 4.7612409591674805, "learning_rate": 2.371426372543857e-06, "loss": 0.1621, "step": 23879 }, { "epoch": 78.29508196721312, "grad_norm": 4.255672931671143, "learning_rate": 2.3707398346693346e-06, "loss": 0.3575, "step": 23880 }, { "epoch": 78.29836065573771, "grad_norm": 6.07030725479126, "learning_rate": 2.370053382822651e-06, "loss": 0.3626, "step": 23881 }, { "epoch": 78.3016393442623, "grad_norm": 5.011716842651367, "learning_rate": 2.3693670170115512e-06, "loss": 0.3317, "step": 23882 }, { "epoch": 78.30491803278689, "grad_norm": 5.897212982177734, "learning_rate": 2.3686807372437716e-06, "loss": 0.4911, "step": 23883 }, { "epoch": 78.30819672131148, "grad_norm": 4.248577117919922, "learning_rate": 2.367994543527049e-06, "loss": 0.4188, "step": 23884 }, { "epoch": 78.31147540983606, "grad_norm": 4.829988479614258, "learning_rate": 2.3673084358691257e-06, "loss": 0.3968, "step": 23885 }, { "epoch": 78.31475409836065, "grad_norm": 6.217302322387695, "learning_rate": 2.3666224142777363e-06, "loss": 0.3151, "step": 23886 }, { "epoch": 78.31803278688524, "grad_norm": 6.77354621887207, "learning_rate": 2.365936478760613e-06, "loss": 0.4093, "step": 23887 }, { "epoch": 78.32131147540984, "grad_norm": 5.040812969207764, "learning_rate": 2.3652506293254953e-06, "loss": 0.5651, "step": 23888 }, { "epoch": 78.32459016393443, "grad_norm": 4.700971603393555, "learning_rate": 2.3645648659801155e-06, "loss": 0.4126, "step": 23889 }, { "epoch": 78.32786885245902, "grad_norm": 14.255569458007812, "learning_rate": 2.363879188732203e-06, "loss": 0.6367, "step": 23890 }, { "epoch": 78.33114754098361, "grad_norm": 5.992029190063477, "learning_rate": 2.3631935975894947e-06, "loss": 0.327, "step": 23891 }, { "epoch": 78.3344262295082, "grad_norm": 5.145334720611572, "learning_rate": 2.3625080925597198e-06, "loss": 0.5592, "step": 23892 }, { "epoch": 78.33770491803278, "grad_norm": 6.002738952636719, "learning_rate": 2.361822673650607e-06, "loss": 0.3728, "step": 23893 }, { "epoch": 78.34098360655737, "grad_norm": 6.309530735015869, "learning_rate": 2.3611373408698813e-06, "loss": 0.383, "step": 23894 }, { "epoch": 78.34426229508196, "grad_norm": 4.815617561340332, "learning_rate": 2.3604520942252783e-06, "loss": 0.529, "step": 23895 }, { "epoch": 78.34754098360656, "grad_norm": 5.237115383148193, "learning_rate": 2.35976693372452e-06, "loss": 0.3263, "step": 23896 }, { "epoch": 78.35081967213115, "grad_norm": 4.571209907531738, "learning_rate": 2.359081859375334e-06, "loss": 0.5824, "step": 23897 }, { "epoch": 78.35409836065574, "grad_norm": 4.122174263000488, "learning_rate": 2.358396871185442e-06, "loss": 0.5311, "step": 23898 }, { "epoch": 78.35737704918033, "grad_norm": 5.197638988494873, "learning_rate": 2.357711969162574e-06, "loss": 0.4323, "step": 23899 }, { "epoch": 78.36065573770492, "grad_norm": 4.887250900268555, "learning_rate": 2.3570271533144485e-06, "loss": 0.198, "step": 23900 }, { "epoch": 78.3639344262295, "grad_norm": 4.700129985809326, "learning_rate": 2.3563424236487887e-06, "loss": 0.3651, "step": 23901 }, { "epoch": 78.3672131147541, "grad_norm": 4.575295925140381, "learning_rate": 2.3556577801733126e-06, "loss": 0.4597, "step": 23902 }, { "epoch": 78.37049180327868, "grad_norm": 6.1189446449279785, "learning_rate": 2.3549732228957466e-06, "loss": 0.396, "step": 23903 }, { "epoch": 78.37377049180328, "grad_norm": 4.723036289215088, "learning_rate": 2.3542887518238056e-06, "loss": 0.2586, "step": 23904 }, { "epoch": 78.37704918032787, "grad_norm": 4.323984622955322, "learning_rate": 2.3536043669652086e-06, "loss": 0.3623, "step": 23905 }, { "epoch": 78.38032786885246, "grad_norm": 6.534515857696533, "learning_rate": 2.3529200683276733e-06, "loss": 0.2587, "step": 23906 }, { "epoch": 78.38360655737705, "grad_norm": 4.187929630279541, "learning_rate": 2.352235855918912e-06, "loss": 0.6493, "step": 23907 }, { "epoch": 78.38688524590164, "grad_norm": 5.317453384399414, "learning_rate": 2.351551729746646e-06, "loss": 0.3619, "step": 23908 }, { "epoch": 78.39016393442623, "grad_norm": 7.486457824707031, "learning_rate": 2.3508676898185856e-06, "loss": 0.5287, "step": 23909 }, { "epoch": 78.39344262295081, "grad_norm": 4.451913356781006, "learning_rate": 2.350183736142446e-06, "loss": 0.2573, "step": 23910 }, { "epoch": 78.3967213114754, "grad_norm": 4.39347505569458, "learning_rate": 2.349499868725936e-06, "loss": 0.3918, "step": 23911 }, { "epoch": 78.4, "grad_norm": 4.93197774887085, "learning_rate": 2.3488160875767717e-06, "loss": 0.3529, "step": 23912 }, { "epoch": 78.4032786885246, "grad_norm": 5.479740142822266, "learning_rate": 2.3481323927026623e-06, "loss": 0.352, "step": 23913 }, { "epoch": 78.40655737704918, "grad_norm": 7.620237350463867, "learning_rate": 2.3474487841113146e-06, "loss": 0.6665, "step": 23914 }, { "epoch": 78.40983606557377, "grad_norm": 5.321951866149902, "learning_rate": 2.3467652618104375e-06, "loss": 0.3577, "step": 23915 }, { "epoch": 78.41311475409836, "grad_norm": 5.202331066131592, "learning_rate": 2.346081825807741e-06, "loss": 0.3279, "step": 23916 }, { "epoch": 78.41639344262295, "grad_norm": 6.101379871368408, "learning_rate": 2.3453984761109304e-06, "loss": 0.2342, "step": 23917 }, { "epoch": 78.41967213114754, "grad_norm": 5.717656135559082, "learning_rate": 2.3447152127277105e-06, "loss": 0.2651, "step": 23918 }, { "epoch": 78.42295081967212, "grad_norm": 5.1589250564575195, "learning_rate": 2.3440320356657863e-06, "loss": 0.5938, "step": 23919 }, { "epoch": 78.42622950819673, "grad_norm": 5.422295093536377, "learning_rate": 2.3433489449328583e-06, "loss": 0.3611, "step": 23920 }, { "epoch": 78.42950819672132, "grad_norm": 3.9480373859405518, "learning_rate": 2.342665940536635e-06, "loss": 0.4884, "step": 23921 }, { "epoch": 78.4327868852459, "grad_norm": 4.140563011169434, "learning_rate": 2.341983022484814e-06, "loss": 0.3446, "step": 23922 }, { "epoch": 78.43606557377049, "grad_norm": 6.5608954429626465, "learning_rate": 2.3413001907850975e-06, "loss": 0.3345, "step": 23923 }, { "epoch": 78.43934426229508, "grad_norm": 6.488271713256836, "learning_rate": 2.340617445445181e-06, "loss": 0.3461, "step": 23924 }, { "epoch": 78.44262295081967, "grad_norm": 19.362815856933594, "learning_rate": 2.3399347864727693e-06, "loss": 0.3306, "step": 23925 }, { "epoch": 78.44590163934426, "grad_norm": 4.419638633728027, "learning_rate": 2.339252213875558e-06, "loss": 0.2593, "step": 23926 }, { "epoch": 78.44918032786886, "grad_norm": 3.8678388595581055, "learning_rate": 2.338569727661243e-06, "loss": 0.3531, "step": 23927 }, { "epoch": 78.45245901639345, "grad_norm": 7.052954196929932, "learning_rate": 2.337887327837517e-06, "loss": 0.5487, "step": 23928 }, { "epoch": 78.45573770491804, "grad_norm": 5.659749507904053, "learning_rate": 2.3372050144120815e-06, "loss": 0.5356, "step": 23929 }, { "epoch": 78.45901639344262, "grad_norm": 5.071471214294434, "learning_rate": 2.3365227873926266e-06, "loss": 0.4487, "step": 23930 }, { "epoch": 78.46229508196721, "grad_norm": 6.488310813903809, "learning_rate": 2.3358406467868445e-06, "loss": 0.6521, "step": 23931 }, { "epoch": 78.4655737704918, "grad_norm": 7.0890583992004395, "learning_rate": 2.335158592602429e-06, "loss": 0.4142, "step": 23932 }, { "epoch": 78.46885245901639, "grad_norm": 4.7542195320129395, "learning_rate": 2.334476624847066e-06, "loss": 0.3597, "step": 23933 }, { "epoch": 78.47213114754098, "grad_norm": 6.472294807434082, "learning_rate": 2.3337947435284525e-06, "loss": 0.3971, "step": 23934 }, { "epoch": 78.47540983606558, "grad_norm": 9.223527908325195, "learning_rate": 2.3331129486542738e-06, "loss": 0.3656, "step": 23935 }, { "epoch": 78.47868852459017, "grad_norm": 4.217504024505615, "learning_rate": 2.332431240232216e-06, "loss": 0.2472, "step": 23936 }, { "epoch": 78.48196721311476, "grad_norm": 4.56280517578125, "learning_rate": 2.3317496182699704e-06, "loss": 0.3233, "step": 23937 }, { "epoch": 78.48524590163935, "grad_norm": 4.397084712982178, "learning_rate": 2.3310680827752207e-06, "loss": 0.2682, "step": 23938 }, { "epoch": 78.48852459016393, "grad_norm": 4.639225006103516, "learning_rate": 2.3303866337556523e-06, "loss": 0.4113, "step": 23939 }, { "epoch": 78.49180327868852, "grad_norm": 5.137971878051758, "learning_rate": 2.3297052712189462e-06, "loss": 0.2622, "step": 23940 }, { "epoch": 78.49508196721311, "grad_norm": 4.8739914894104, "learning_rate": 2.3290239951727913e-06, "loss": 0.3061, "step": 23941 }, { "epoch": 78.4983606557377, "grad_norm": 18.574493408203125, "learning_rate": 2.3283428056248668e-06, "loss": 0.7179, "step": 23942 }, { "epoch": 78.5016393442623, "grad_norm": 4.992490291595459, "learning_rate": 2.3276617025828505e-06, "loss": 0.4257, "step": 23943 }, { "epoch": 78.50491803278689, "grad_norm": 4.667822360992432, "learning_rate": 2.3269806860544286e-06, "loss": 0.3572, "step": 23944 }, { "epoch": 78.50819672131148, "grad_norm": 29.38677978515625, "learning_rate": 2.3262997560472776e-06, "loss": 0.4113, "step": 23945 }, { "epoch": 78.51147540983607, "grad_norm": 6.265066623687744, "learning_rate": 2.325618912569073e-06, "loss": 0.3763, "step": 23946 }, { "epoch": 78.51475409836065, "grad_norm": 5.601830005645752, "learning_rate": 2.3249381556274964e-06, "loss": 0.521, "step": 23947 }, { "epoch": 78.51803278688524, "grad_norm": 4.8639936447143555, "learning_rate": 2.3242574852302225e-06, "loss": 0.2829, "step": 23948 }, { "epoch": 78.52131147540983, "grad_norm": 4.649516582489014, "learning_rate": 2.323576901384926e-06, "loss": 0.5375, "step": 23949 }, { "epoch": 78.52459016393442, "grad_norm": 5.1861090660095215, "learning_rate": 2.3228964040992798e-06, "loss": 0.3583, "step": 23950 }, { "epoch": 78.52786885245902, "grad_norm": 4.442910671234131, "learning_rate": 2.3222159933809606e-06, "loss": 0.5412, "step": 23951 }, { "epoch": 78.53114754098361, "grad_norm": 6.973167896270752, "learning_rate": 2.321535669237639e-06, "loss": 0.309, "step": 23952 }, { "epoch": 78.5344262295082, "grad_norm": 5.045154571533203, "learning_rate": 2.320855431676987e-06, "loss": 0.3891, "step": 23953 }, { "epoch": 78.53770491803279, "grad_norm": 9.741188049316406, "learning_rate": 2.3201752807066715e-06, "loss": 0.4003, "step": 23954 }, { "epoch": 78.54098360655738, "grad_norm": 4.851689338684082, "learning_rate": 2.3194952163343667e-06, "loss": 0.3718, "step": 23955 }, { "epoch": 78.54426229508196, "grad_norm": 4.4755425453186035, "learning_rate": 2.3188152385677397e-06, "loss": 0.313, "step": 23956 }, { "epoch": 78.54754098360655, "grad_norm": 5.3893513679504395, "learning_rate": 2.3181353474144565e-06, "loss": 0.353, "step": 23957 }, { "epoch": 78.55081967213114, "grad_norm": 5.1008172035217285, "learning_rate": 2.3174555428821854e-06, "loss": 0.5444, "step": 23958 }, { "epoch": 78.55409836065574, "grad_norm": 5.447037220001221, "learning_rate": 2.3167758249785877e-06, "loss": 0.2325, "step": 23959 }, { "epoch": 78.55737704918033, "grad_norm": 4.8157758712768555, "learning_rate": 2.3160961937113334e-06, "loss": 0.3349, "step": 23960 }, { "epoch": 78.56065573770492, "grad_norm": 4.741034030914307, "learning_rate": 2.3154166490880847e-06, "loss": 0.288, "step": 23961 }, { "epoch": 78.56393442622951, "grad_norm": 4.964934349060059, "learning_rate": 2.314737191116503e-06, "loss": 0.3213, "step": 23962 }, { "epoch": 78.5672131147541, "grad_norm": 4.693398952484131, "learning_rate": 2.3140578198042463e-06, "loss": 0.1972, "step": 23963 }, { "epoch": 78.57049180327868, "grad_norm": 4.594701290130615, "learning_rate": 2.3133785351589833e-06, "loss": 0.3569, "step": 23964 }, { "epoch": 78.57377049180327, "grad_norm": 6.708677291870117, "learning_rate": 2.3126993371883686e-06, "loss": 0.4333, "step": 23965 }, { "epoch": 78.57704918032788, "grad_norm": 4.161100387573242, "learning_rate": 2.312020225900061e-06, "loss": 0.3156, "step": 23966 }, { "epoch": 78.58032786885246, "grad_norm": 4.639076232910156, "learning_rate": 2.311341201301718e-06, "loss": 0.4189, "step": 23967 }, { "epoch": 78.58360655737705, "grad_norm": 5.089381217956543, "learning_rate": 2.310662263400998e-06, "loss": 0.3592, "step": 23968 }, { "epoch": 78.58688524590164, "grad_norm": 5.273154258728027, "learning_rate": 2.3099834122055574e-06, "loss": 0.546, "step": 23969 }, { "epoch": 78.59016393442623, "grad_norm": 6.42578649520874, "learning_rate": 2.3093046477230486e-06, "loss": 0.4497, "step": 23970 }, { "epoch": 78.59344262295082, "grad_norm": 3.5672481060028076, "learning_rate": 2.308625969961127e-06, "loss": 0.2949, "step": 23971 }, { "epoch": 78.5967213114754, "grad_norm": 5.7574028968811035, "learning_rate": 2.307947378927441e-06, "loss": 0.5099, "step": 23972 }, { "epoch": 78.6, "grad_norm": 4.527923583984375, "learning_rate": 2.307268874629649e-06, "loss": 0.3803, "step": 23973 }, { "epoch": 78.6032786885246, "grad_norm": 4.232149124145508, "learning_rate": 2.3065904570753983e-06, "loss": 0.5278, "step": 23974 }, { "epoch": 78.60655737704919, "grad_norm": 6.510988235473633, "learning_rate": 2.3059121262723405e-06, "loss": 0.7488, "step": 23975 }, { "epoch": 78.60983606557377, "grad_norm": 5.304602146148682, "learning_rate": 2.3052338822281194e-06, "loss": 0.3924, "step": 23976 }, { "epoch": 78.61311475409836, "grad_norm": 5.376559734344482, "learning_rate": 2.3045557249503903e-06, "loss": 0.4202, "step": 23977 }, { "epoch": 78.61639344262295, "grad_norm": 5.457753658294678, "learning_rate": 2.303877654446797e-06, "loss": 0.496, "step": 23978 }, { "epoch": 78.61967213114754, "grad_norm": 5.356690406799316, "learning_rate": 2.3031996707249837e-06, "loss": 0.3774, "step": 23979 }, { "epoch": 78.62295081967213, "grad_norm": 4.330227375030518, "learning_rate": 2.3025217737925955e-06, "loss": 0.1925, "step": 23980 }, { "epoch": 78.62622950819672, "grad_norm": 5.09293270111084, "learning_rate": 2.30184396365728e-06, "loss": 0.2727, "step": 23981 }, { "epoch": 78.62950819672132, "grad_norm": 6.528368949890137, "learning_rate": 2.3011662403266778e-06, "loss": 0.4668, "step": 23982 }, { "epoch": 78.6327868852459, "grad_norm": 3.993354320526123, "learning_rate": 2.3004886038084308e-06, "loss": 0.2687, "step": 23983 }, { "epoch": 78.6360655737705, "grad_norm": 4.116604328155518, "learning_rate": 2.2998110541101804e-06, "loss": 0.4568, "step": 23984 }, { "epoch": 78.63934426229508, "grad_norm": 5.9326348304748535, "learning_rate": 2.299133591239564e-06, "loss": 0.3728, "step": 23985 }, { "epoch": 78.64262295081967, "grad_norm": 4.433969497680664, "learning_rate": 2.2984562152042258e-06, "loss": 0.3545, "step": 23986 }, { "epoch": 78.64590163934426, "grad_norm": 5.233268737792969, "learning_rate": 2.297778926011801e-06, "loss": 0.3234, "step": 23987 }, { "epoch": 78.64918032786885, "grad_norm": 5.579715728759766, "learning_rate": 2.2971017236699277e-06, "loss": 0.3811, "step": 23988 }, { "epoch": 78.65245901639344, "grad_norm": 5.607280731201172, "learning_rate": 2.2964246081862385e-06, "loss": 0.3213, "step": 23989 }, { "epoch": 78.65573770491804, "grad_norm": 6.20263671875, "learning_rate": 2.295747579568375e-06, "loss": 0.3128, "step": 23990 }, { "epoch": 78.65901639344263, "grad_norm": 4.639609336853027, "learning_rate": 2.2950706378239673e-06, "loss": 0.6096, "step": 23991 }, { "epoch": 78.66229508196722, "grad_norm": 4.599428176879883, "learning_rate": 2.2943937829606464e-06, "loss": 0.328, "step": 23992 }, { "epoch": 78.6655737704918, "grad_norm": 5.088124752044678, "learning_rate": 2.29371701498605e-06, "loss": 0.5085, "step": 23993 }, { "epoch": 78.66885245901639, "grad_norm": 19.774456024169922, "learning_rate": 2.293040333907808e-06, "loss": 0.2836, "step": 23994 }, { "epoch": 78.67213114754098, "grad_norm": 4.356388568878174, "learning_rate": 2.292363739733545e-06, "loss": 0.5712, "step": 23995 }, { "epoch": 78.67540983606557, "grad_norm": 4.861408710479736, "learning_rate": 2.2916872324708985e-06, "loss": 0.3802, "step": 23996 }, { "epoch": 78.67868852459016, "grad_norm": 4.55862283706665, "learning_rate": 2.2910108121274933e-06, "loss": 0.3625, "step": 23997 }, { "epoch": 78.68196721311476, "grad_norm": 4.005277156829834, "learning_rate": 2.2903344787109527e-06, "loss": 0.2912, "step": 23998 }, { "epoch": 78.68524590163935, "grad_norm": 19.36930274963379, "learning_rate": 2.2896582322289106e-06, "loss": 0.2376, "step": 23999 }, { "epoch": 78.68852459016394, "grad_norm": 5.69624662399292, "learning_rate": 2.2889820726889887e-06, "loss": 0.3682, "step": 24000 }, { "epoch": 78.69180327868852, "grad_norm": 5.123089790344238, "learning_rate": 2.288306000098811e-06, "loss": 0.3715, "step": 24001 }, { "epoch": 78.69508196721311, "grad_norm": 5.477288722991943, "learning_rate": 2.287630014465999e-06, "loss": 0.4045, "step": 24002 }, { "epoch": 78.6983606557377, "grad_norm": 4.925130844116211, "learning_rate": 2.28695411579818e-06, "loss": 0.4037, "step": 24003 }, { "epoch": 78.70163934426229, "grad_norm": 5.871913433074951, "learning_rate": 2.286278304102972e-06, "loss": 0.2632, "step": 24004 }, { "epoch": 78.70491803278688, "grad_norm": 3.8509116172790527, "learning_rate": 2.2856025793879978e-06, "loss": 0.3824, "step": 24005 }, { "epoch": 78.70819672131148, "grad_norm": 4.601780414581299, "learning_rate": 2.284926941660872e-06, "loss": 0.2331, "step": 24006 }, { "epoch": 78.71147540983607, "grad_norm": 4.957388877868652, "learning_rate": 2.2842513909292197e-06, "loss": 0.6232, "step": 24007 }, { "epoch": 78.71475409836066, "grad_norm": 4.389978408813477, "learning_rate": 2.2835759272006554e-06, "loss": 0.2182, "step": 24008 }, { "epoch": 78.71803278688525, "grad_norm": 4.8984055519104, "learning_rate": 2.282900550482795e-06, "loss": 0.524, "step": 24009 }, { "epoch": 78.72131147540983, "grad_norm": 5.751274108886719, "learning_rate": 2.282225260783254e-06, "loss": 0.353, "step": 24010 }, { "epoch": 78.72459016393442, "grad_norm": 7.5642194747924805, "learning_rate": 2.281550058109646e-06, "loss": 0.5267, "step": 24011 }, { "epoch": 78.72786885245901, "grad_norm": 5.216136932373047, "learning_rate": 2.2808749424695896e-06, "loss": 0.4059, "step": 24012 }, { "epoch": 78.73114754098361, "grad_norm": 4.253593444824219, "learning_rate": 2.2801999138706933e-06, "loss": 0.5263, "step": 24013 }, { "epoch": 78.7344262295082, "grad_norm": 5.674278259277344, "learning_rate": 2.2795249723205693e-06, "loss": 0.4478, "step": 24014 }, { "epoch": 78.73770491803279, "grad_norm": 5.135682582855225, "learning_rate": 2.2788501178268262e-06, "loss": 0.4544, "step": 24015 }, { "epoch": 78.74098360655738, "grad_norm": 6.202841281890869, "learning_rate": 2.2781753503970782e-06, "loss": 0.34, "step": 24016 }, { "epoch": 78.74426229508197, "grad_norm": 4.98719596862793, "learning_rate": 2.2775006700389325e-06, "loss": 0.173, "step": 24017 }, { "epoch": 78.74754098360656, "grad_norm": 4.1161956787109375, "learning_rate": 2.2768260767599958e-06, "loss": 0.2332, "step": 24018 }, { "epoch": 78.75081967213114, "grad_norm": 5.011631011962891, "learning_rate": 2.2761515705678727e-06, "loss": 0.4241, "step": 24019 }, { "epoch": 78.75409836065573, "grad_norm": 5.163403511047363, "learning_rate": 2.2754771514701735e-06, "loss": 0.2896, "step": 24020 }, { "epoch": 78.75737704918033, "grad_norm": 5.384404182434082, "learning_rate": 2.274802819474502e-06, "loss": 0.3939, "step": 24021 }, { "epoch": 78.76065573770492, "grad_norm": 5.626774787902832, "learning_rate": 2.27412857458846e-06, "loss": 0.3654, "step": 24022 }, { "epoch": 78.76393442622951, "grad_norm": 8.49947738647461, "learning_rate": 2.2734544168196515e-06, "loss": 0.3778, "step": 24023 }, { "epoch": 78.7672131147541, "grad_norm": 4.473507404327393, "learning_rate": 2.2727803461756748e-06, "loss": 0.4504, "step": 24024 }, { "epoch": 78.77049180327869, "grad_norm": 4.763424396514893, "learning_rate": 2.272106362664137e-06, "loss": 0.3242, "step": 24025 }, { "epoch": 78.77377049180328, "grad_norm": 3.7966959476470947, "learning_rate": 2.2714324662926357e-06, "loss": 0.2293, "step": 24026 }, { "epoch": 78.77704918032786, "grad_norm": 4.593430519104004, "learning_rate": 2.270758657068769e-06, "loss": 0.3662, "step": 24027 }, { "epoch": 78.78032786885245, "grad_norm": 6.847832679748535, "learning_rate": 2.2700849350001306e-06, "loss": 0.6001, "step": 24028 }, { "epoch": 78.78360655737706, "grad_norm": 6.507163047790527, "learning_rate": 2.269411300094326e-06, "loss": 0.2919, "step": 24029 }, { "epoch": 78.78688524590164, "grad_norm": 5.63242769241333, "learning_rate": 2.268737752358946e-06, "loss": 0.3283, "step": 24030 }, { "epoch": 78.79016393442623, "grad_norm": 4.191186428070068, "learning_rate": 2.268064291801587e-06, "loss": 0.268, "step": 24031 }, { "epoch": 78.79344262295082, "grad_norm": 7.341834545135498, "learning_rate": 2.267390918429839e-06, "loss": 0.3361, "step": 24032 }, { "epoch": 78.79672131147541, "grad_norm": 4.0544023513793945, "learning_rate": 2.2667176322513005e-06, "loss": 0.1688, "step": 24033 }, { "epoch": 78.8, "grad_norm": 4.0180583000183105, "learning_rate": 2.266044433273562e-06, "loss": 0.2795, "step": 24034 }, { "epoch": 78.80327868852459, "grad_norm": 6.797003746032715, "learning_rate": 2.2653713215042137e-06, "loss": 0.3173, "step": 24035 }, { "epoch": 78.80655737704917, "grad_norm": 4.37939453125, "learning_rate": 2.2646982969508456e-06, "loss": 0.4667, "step": 24036 }, { "epoch": 78.80983606557378, "grad_norm": 4.404173374176025, "learning_rate": 2.264025359621045e-06, "loss": 0.1421, "step": 24037 }, { "epoch": 78.81311475409836, "grad_norm": 16.226030349731445, "learning_rate": 2.263352509522404e-06, "loss": 0.2069, "step": 24038 }, { "epoch": 78.81639344262295, "grad_norm": 7.440530776977539, "learning_rate": 2.262679746662507e-06, "loss": 0.5652, "step": 24039 }, { "epoch": 78.81967213114754, "grad_norm": 17.610124588012695, "learning_rate": 2.2620070710489416e-06, "loss": 0.4714, "step": 24040 }, { "epoch": 78.82295081967213, "grad_norm": 5.591254234313965, "learning_rate": 2.261334482689289e-06, "loss": 0.6275, "step": 24041 }, { "epoch": 78.82622950819672, "grad_norm": 5.193422317504883, "learning_rate": 2.260661981591139e-06, "loss": 0.5037, "step": 24042 }, { "epoch": 78.8295081967213, "grad_norm": 6.939553737640381, "learning_rate": 2.259989567762072e-06, "loss": 0.3486, "step": 24043 }, { "epoch": 78.8327868852459, "grad_norm": 5.362951755523682, "learning_rate": 2.2593172412096698e-06, "loss": 0.3451, "step": 24044 }, { "epoch": 78.8360655737705, "grad_norm": 5.912522792816162, "learning_rate": 2.2586450019415118e-06, "loss": 0.513, "step": 24045 }, { "epoch": 78.83934426229509, "grad_norm": 4.46004056930542, "learning_rate": 2.257972849965182e-06, "loss": 0.3257, "step": 24046 }, { "epoch": 78.84262295081967, "grad_norm": 5.380290508270264, "learning_rate": 2.257300785288259e-06, "loss": 0.6082, "step": 24047 }, { "epoch": 78.84590163934426, "grad_norm": 12.777604103088379, "learning_rate": 2.2566288079183172e-06, "loss": 0.3979, "step": 24048 }, { "epoch": 78.84918032786885, "grad_norm": 5.319641590118408, "learning_rate": 2.2559569178629394e-06, "loss": 0.2686, "step": 24049 }, { "epoch": 78.85245901639344, "grad_norm": 4.459691047668457, "learning_rate": 2.2552851151296995e-06, "loss": 0.1391, "step": 24050 }, { "epoch": 78.85573770491803, "grad_norm": 5.995357036590576, "learning_rate": 2.254613399726169e-06, "loss": 0.2346, "step": 24051 }, { "epoch": 78.85901639344263, "grad_norm": 5.50771951675415, "learning_rate": 2.2539417716599286e-06, "loss": 0.42, "step": 24052 }, { "epoch": 78.86229508196722, "grad_norm": 5.0172624588012695, "learning_rate": 2.253270230938549e-06, "loss": 0.3427, "step": 24053 }, { "epoch": 78.8655737704918, "grad_norm": 6.493340969085693, "learning_rate": 2.252598777569599e-06, "loss": 0.4359, "step": 24054 }, { "epoch": 78.8688524590164, "grad_norm": 4.955975532531738, "learning_rate": 2.2519274115606558e-06, "loss": 0.29, "step": 24055 }, { "epoch": 78.87213114754098, "grad_norm": 4.960416316986084, "learning_rate": 2.251256132919287e-06, "loss": 0.2903, "step": 24056 }, { "epoch": 78.87540983606557, "grad_norm": 4.821866512298584, "learning_rate": 2.2505849416530624e-06, "loss": 0.3391, "step": 24057 }, { "epoch": 78.87868852459016, "grad_norm": 4.387631416320801, "learning_rate": 2.2499138377695463e-06, "loss": 0.2885, "step": 24058 }, { "epoch": 78.88196721311475, "grad_norm": 6.077194690704346, "learning_rate": 2.249242821276313e-06, "loss": 0.2867, "step": 24059 }, { "epoch": 78.88524590163935, "grad_norm": 4.34840202331543, "learning_rate": 2.2485718921809263e-06, "loss": 0.3852, "step": 24060 }, { "epoch": 78.88852459016394, "grad_norm": 4.682982444763184, "learning_rate": 2.2479010504909504e-06, "loss": 0.2599, "step": 24061 }, { "epoch": 78.89180327868853, "grad_norm": 6.982481002807617, "learning_rate": 2.2472302962139504e-06, "loss": 0.4971, "step": 24062 }, { "epoch": 78.89508196721312, "grad_norm": 3.7686641216278076, "learning_rate": 2.2465596293574867e-06, "loss": 0.2354, "step": 24063 }, { "epoch": 78.8983606557377, "grad_norm": 4.2992634773254395, "learning_rate": 2.2458890499291273e-06, "loss": 0.2484, "step": 24064 }, { "epoch": 78.90163934426229, "grad_norm": 5.607577800750732, "learning_rate": 2.2452185579364318e-06, "loss": 0.2709, "step": 24065 }, { "epoch": 78.90491803278688, "grad_norm": 5.389059066772461, "learning_rate": 2.2445481533869597e-06, "loss": 0.3031, "step": 24066 }, { "epoch": 78.90819672131147, "grad_norm": 4.541276454925537, "learning_rate": 2.2438778362882672e-06, "loss": 0.4182, "step": 24067 }, { "epoch": 78.91147540983607, "grad_norm": 4.748803615570068, "learning_rate": 2.2432076066479203e-06, "loss": 0.4578, "step": 24068 }, { "epoch": 78.91475409836066, "grad_norm": 5.2864556312561035, "learning_rate": 2.242537464473472e-06, "loss": 0.3894, "step": 24069 }, { "epoch": 78.91803278688525, "grad_norm": 10.067207336425781, "learning_rate": 2.24186740977248e-06, "loss": 0.237, "step": 24070 }, { "epoch": 78.92131147540984, "grad_norm": 5.530791759490967, "learning_rate": 2.2411974425524964e-06, "loss": 0.5131, "step": 24071 }, { "epoch": 78.92459016393443, "grad_norm": 7.581827163696289, "learning_rate": 2.240527562821082e-06, "loss": 0.6331, "step": 24072 }, { "epoch": 78.92786885245901, "grad_norm": 6.332217216491699, "learning_rate": 2.239857770585787e-06, "loss": 0.4475, "step": 24073 }, { "epoch": 78.9311475409836, "grad_norm": 9.575469017028809, "learning_rate": 2.2391880658541644e-06, "loss": 0.2219, "step": 24074 }, { "epoch": 78.93442622950819, "grad_norm": 4.248405456542969, "learning_rate": 2.2385184486337643e-06, "loss": 0.5051, "step": 24075 }, { "epoch": 78.9377049180328, "grad_norm": 5.444329261779785, "learning_rate": 2.237848918932137e-06, "loss": 0.4232, "step": 24076 }, { "epoch": 78.94098360655738, "grad_norm": 6.6543803215026855, "learning_rate": 2.2371794767568367e-06, "loss": 0.3765, "step": 24077 }, { "epoch": 78.94426229508197, "grad_norm": 4.18820858001709, "learning_rate": 2.2365101221154084e-06, "loss": 0.4579, "step": 24078 }, { "epoch": 78.94754098360656, "grad_norm": 6.876628875732422, "learning_rate": 2.2358408550154e-06, "loss": 0.6275, "step": 24079 }, { "epoch": 78.95081967213115, "grad_norm": 4.145083904266357, "learning_rate": 2.235171675464357e-06, "loss": 0.4116, "step": 24080 }, { "epoch": 78.95409836065573, "grad_norm": 6.286003589630127, "learning_rate": 2.2345025834698285e-06, "loss": 0.2672, "step": 24081 }, { "epoch": 78.95737704918032, "grad_norm": 4.802170276641846, "learning_rate": 2.2338335790393583e-06, "loss": 0.4866, "step": 24082 }, { "epoch": 78.96065573770491, "grad_norm": 5.035134315490723, "learning_rate": 2.233164662180489e-06, "loss": 0.4855, "step": 24083 }, { "epoch": 78.96393442622951, "grad_norm": 5.138571262359619, "learning_rate": 2.232495832900762e-06, "loss": 0.393, "step": 24084 }, { "epoch": 78.9672131147541, "grad_norm": 4.235174179077148, "learning_rate": 2.231827091207721e-06, "loss": 0.3885, "step": 24085 }, { "epoch": 78.97049180327869, "grad_norm": 6.071766376495361, "learning_rate": 2.231158437108908e-06, "loss": 0.5545, "step": 24086 }, { "epoch": 78.97377049180328, "grad_norm": 5.58784294128418, "learning_rate": 2.2304898706118614e-06, "loss": 0.3679, "step": 24087 }, { "epoch": 78.97704918032787, "grad_norm": 5.195667743682861, "learning_rate": 2.2298213917241196e-06, "loss": 0.459, "step": 24088 }, { "epoch": 78.98032786885246, "grad_norm": 4.748215198516846, "learning_rate": 2.2291530004532168e-06, "loss": 0.1628, "step": 24089 }, { "epoch": 78.98360655737704, "grad_norm": 4.329610824584961, "learning_rate": 2.2284846968066976e-06, "loss": 0.4689, "step": 24090 }, { "epoch": 78.98688524590163, "grad_norm": 4.86710786819458, "learning_rate": 2.2278164807920933e-06, "loss": 0.478, "step": 24091 }, { "epoch": 78.99016393442623, "grad_norm": 7.165732383728027, "learning_rate": 2.227148352416939e-06, "loss": 0.3579, "step": 24092 }, { "epoch": 78.99344262295082, "grad_norm": 5.432634353637695, "learning_rate": 2.2264803116887666e-06, "loss": 0.4868, "step": 24093 }, { "epoch": 78.99672131147541, "grad_norm": 4.809765338897705, "learning_rate": 2.2258123586151137e-06, "loss": 0.4164, "step": 24094 }, { "epoch": 79.0, "grad_norm": 7.732107639312744, "learning_rate": 2.2251444932035094e-06, "loss": 0.4145, "step": 24095 }, { "epoch": 79.00327868852459, "grad_norm": 6.232525825500488, "learning_rate": 2.2244767154614843e-06, "loss": 0.2884, "step": 24096 }, { "epoch": 79.00655737704918, "grad_norm": 4.917602062225342, "learning_rate": 2.2238090253965662e-06, "loss": 0.3172, "step": 24097 }, { "epoch": 79.00983606557377, "grad_norm": 5.964860439300537, "learning_rate": 2.2231414230162897e-06, "loss": 0.4898, "step": 24098 }, { "epoch": 79.01311475409837, "grad_norm": 6.304998874664307, "learning_rate": 2.222473908328179e-06, "loss": 0.3658, "step": 24099 }, { "epoch": 79.01639344262296, "grad_norm": 4.6418914794921875, "learning_rate": 2.22180648133976e-06, "loss": 0.3448, "step": 24100 }, { "epoch": 79.01967213114754, "grad_norm": 4.039450168609619, "learning_rate": 2.2211391420585614e-06, "loss": 0.2458, "step": 24101 }, { "epoch": 79.02295081967213, "grad_norm": 4.503075122833252, "learning_rate": 2.2204718904921084e-06, "loss": 0.3128, "step": 24102 }, { "epoch": 79.02622950819672, "grad_norm": 4.583745002746582, "learning_rate": 2.219804726647923e-06, "loss": 0.2274, "step": 24103 }, { "epoch": 79.02950819672131, "grad_norm": 5.38224983215332, "learning_rate": 2.219137650533525e-06, "loss": 0.3637, "step": 24104 }, { "epoch": 79.0327868852459, "grad_norm": 5.0923590660095215, "learning_rate": 2.2184706621564433e-06, "loss": 0.3931, "step": 24105 }, { "epoch": 79.03606557377049, "grad_norm": 4.036191940307617, "learning_rate": 2.2178037615241967e-06, "loss": 0.4385, "step": 24106 }, { "epoch": 79.03934426229509, "grad_norm": 4.179004192352295, "learning_rate": 2.217136948644301e-06, "loss": 0.3144, "step": 24107 }, { "epoch": 79.04262295081968, "grad_norm": 8.488574028015137, "learning_rate": 2.2164702235242795e-06, "loss": 0.2741, "step": 24108 }, { "epoch": 79.04590163934427, "grad_norm": 5.471632957458496, "learning_rate": 2.215803586171651e-06, "loss": 0.2379, "step": 24109 }, { "epoch": 79.04918032786885, "grad_norm": 5.269779205322266, "learning_rate": 2.2151370365939275e-06, "loss": 0.3995, "step": 24110 }, { "epoch": 79.05245901639344, "grad_norm": 10.119775772094727, "learning_rate": 2.2144705747986304e-06, "loss": 0.4264, "step": 24111 }, { "epoch": 79.05573770491803, "grad_norm": 5.581435680389404, "learning_rate": 2.2138042007932725e-06, "loss": 0.4958, "step": 24112 }, { "epoch": 79.05901639344262, "grad_norm": 4.193531513214111, "learning_rate": 2.213137914585368e-06, "loss": 0.705, "step": 24113 }, { "epoch": 79.0622950819672, "grad_norm": 4.680142879486084, "learning_rate": 2.2124717161824296e-06, "loss": 0.4478, "step": 24114 }, { "epoch": 79.06557377049181, "grad_norm": 4.819206714630127, "learning_rate": 2.211805605591967e-06, "loss": 0.2515, "step": 24115 }, { "epoch": 79.0688524590164, "grad_norm": 4.712584972381592, "learning_rate": 2.2111395828214967e-06, "loss": 0.3533, "step": 24116 }, { "epoch": 79.07213114754099, "grad_norm": 38.98155212402344, "learning_rate": 2.210473647878526e-06, "loss": 0.4329, "step": 24117 }, { "epoch": 79.07540983606557, "grad_norm": 7.901360988616943, "learning_rate": 2.209807800770565e-06, "loss": 0.3431, "step": 24118 }, { "epoch": 79.07868852459016, "grad_norm": 6.76123571395874, "learning_rate": 2.2091420415051168e-06, "loss": 0.4052, "step": 24119 }, { "epoch": 79.08196721311475, "grad_norm": 5.300256729125977, "learning_rate": 2.208476370089695e-06, "loss": 0.2184, "step": 24120 }, { "epoch": 79.08524590163934, "grad_norm": 3.968163013458252, "learning_rate": 2.2078107865318044e-06, "loss": 0.1888, "step": 24121 }, { "epoch": 79.08852459016393, "grad_norm": 7.737478256225586, "learning_rate": 2.2071452908389478e-06, "loss": 0.5329, "step": 24122 }, { "epoch": 79.09180327868853, "grad_norm": 5.151042938232422, "learning_rate": 2.2064798830186283e-06, "loss": 0.5433, "step": 24123 }, { "epoch": 79.09508196721312, "grad_norm": 5.289669036865234, "learning_rate": 2.2058145630783545e-06, "loss": 0.3381, "step": 24124 }, { "epoch": 79.09836065573771, "grad_norm": 5.401118278503418, "learning_rate": 2.2051493310256255e-06, "loss": 0.3486, "step": 24125 }, { "epoch": 79.1016393442623, "grad_norm": 5.923545837402344, "learning_rate": 2.2044841868679422e-06, "loss": 0.2709, "step": 24126 }, { "epoch": 79.10491803278688, "grad_norm": 5.391834259033203, "learning_rate": 2.2038191306128043e-06, "loss": 0.5432, "step": 24127 }, { "epoch": 79.10819672131147, "grad_norm": 7.384142875671387, "learning_rate": 2.20315416226771e-06, "loss": 0.4265, "step": 24128 }, { "epoch": 79.11147540983606, "grad_norm": 7.690103054046631, "learning_rate": 2.202489281840161e-06, "loss": 0.5455, "step": 24129 }, { "epoch": 79.11475409836065, "grad_norm": 5.177007675170898, "learning_rate": 2.201824489337654e-06, "loss": 0.5263, "step": 24130 }, { "epoch": 79.11803278688525, "grad_norm": 5.281131744384766, "learning_rate": 2.2011597847676825e-06, "loss": 0.4547, "step": 24131 }, { "epoch": 79.12131147540984, "grad_norm": 5.700400352478027, "learning_rate": 2.2004951681377417e-06, "loss": 0.4443, "step": 24132 }, { "epoch": 79.12459016393443, "grad_norm": 5.7624640464782715, "learning_rate": 2.1998306394553293e-06, "loss": 0.3807, "step": 24133 }, { "epoch": 79.12786885245902, "grad_norm": 4.671435356140137, "learning_rate": 2.1991661987279368e-06, "loss": 0.2977, "step": 24134 }, { "epoch": 79.1311475409836, "grad_norm": 4.266319274902344, "learning_rate": 2.1985018459630557e-06, "loss": 0.3956, "step": 24135 }, { "epoch": 79.1344262295082, "grad_norm": 3.951932668685913, "learning_rate": 2.197837581168176e-06, "loss": 0.4126, "step": 24136 }, { "epoch": 79.13770491803278, "grad_norm": 4.517702579498291, "learning_rate": 2.197173404350792e-06, "loss": 0.2022, "step": 24137 }, { "epoch": 79.14098360655737, "grad_norm": 10.062911033630371, "learning_rate": 2.1965093155183914e-06, "loss": 0.3531, "step": 24138 }, { "epoch": 79.14426229508197, "grad_norm": 4.165750503540039, "learning_rate": 2.1958453146784607e-06, "loss": 0.5437, "step": 24139 }, { "epoch": 79.14754098360656, "grad_norm": 5.00223970413208, "learning_rate": 2.1951814018384897e-06, "loss": 0.489, "step": 24140 }, { "epoch": 79.15081967213115, "grad_norm": 4.226847171783447, "learning_rate": 2.19451757700596e-06, "loss": 0.435, "step": 24141 }, { "epoch": 79.15409836065574, "grad_norm": 3.9905686378479004, "learning_rate": 2.1938538401883625e-06, "loss": 0.3452, "step": 24142 }, { "epoch": 79.15737704918033, "grad_norm": 10.68777084350586, "learning_rate": 2.1931901913931797e-06, "loss": 0.5403, "step": 24143 }, { "epoch": 79.16065573770491, "grad_norm": 4.269011974334717, "learning_rate": 2.1925266306278945e-06, "loss": 0.3513, "step": 24144 }, { "epoch": 79.1639344262295, "grad_norm": 4.2209343910217285, "learning_rate": 2.191863157899987e-06, "loss": 0.2638, "step": 24145 }, { "epoch": 79.1672131147541, "grad_norm": 4.345408916473389, "learning_rate": 2.191199773216943e-06, "loss": 0.2853, "step": 24146 }, { "epoch": 79.1704918032787, "grad_norm": 4.693040370941162, "learning_rate": 2.1905364765862415e-06, "loss": 0.3572, "step": 24147 }, { "epoch": 79.17377049180328, "grad_norm": 12.227843284606934, "learning_rate": 2.18987326801536e-06, "loss": 0.3875, "step": 24148 }, { "epoch": 79.17704918032787, "grad_norm": 4.96860933303833, "learning_rate": 2.1892101475117754e-06, "loss": 0.248, "step": 24149 }, { "epoch": 79.18032786885246, "grad_norm": 3.765153169631958, "learning_rate": 2.1885471150829705e-06, "loss": 0.3028, "step": 24150 }, { "epoch": 79.18360655737705, "grad_norm": 4.384559631347656, "learning_rate": 2.1878841707364196e-06, "loss": 0.3658, "step": 24151 }, { "epoch": 79.18688524590164, "grad_norm": 5.218328952789307, "learning_rate": 2.187221314479596e-06, "loss": 0.4364, "step": 24152 }, { "epoch": 79.19016393442622, "grad_norm": 8.665589332580566, "learning_rate": 2.1865585463199736e-06, "loss": 0.4874, "step": 24153 }, { "epoch": 79.19344262295083, "grad_norm": 4.9926652908325195, "learning_rate": 2.1858958662650287e-06, "loss": 0.4551, "step": 24154 }, { "epoch": 79.19672131147541, "grad_norm": 6.987595081329346, "learning_rate": 2.185233274322234e-06, "loss": 0.3913, "step": 24155 }, { "epoch": 79.2, "grad_norm": 5.295958518981934, "learning_rate": 2.184570770499056e-06, "loss": 0.4849, "step": 24156 }, { "epoch": 79.20327868852459, "grad_norm": 5.909680366516113, "learning_rate": 2.1839083548029715e-06, "loss": 0.2848, "step": 24157 }, { "epoch": 79.20655737704918, "grad_norm": 6.332797050476074, "learning_rate": 2.1832460272414466e-06, "loss": 0.375, "step": 24158 }, { "epoch": 79.20983606557377, "grad_norm": 6.042525768280029, "learning_rate": 2.182583787821948e-06, "loss": 0.4397, "step": 24159 }, { "epoch": 79.21311475409836, "grad_norm": 4.651207447052002, "learning_rate": 2.181921636551948e-06, "loss": 0.4451, "step": 24160 }, { "epoch": 79.21639344262294, "grad_norm": 4.092367172241211, "learning_rate": 2.181259573438911e-06, "loss": 0.3892, "step": 24161 }, { "epoch": 79.21967213114755, "grad_norm": 10.009044647216797, "learning_rate": 2.1805975984903007e-06, "loss": 0.3522, "step": 24162 }, { "epoch": 79.22295081967214, "grad_norm": 5.5590500831604, "learning_rate": 2.17993571171358e-06, "loss": 0.3352, "step": 24163 }, { "epoch": 79.22622950819672, "grad_norm": 6.015674114227295, "learning_rate": 2.1792739131162177e-06, "loss": 0.4407, "step": 24164 }, { "epoch": 79.22950819672131, "grad_norm": 4.196606636047363, "learning_rate": 2.1786122027056735e-06, "loss": 0.3309, "step": 24165 }, { "epoch": 79.2327868852459, "grad_norm": 4.700545787811279, "learning_rate": 2.1779505804894085e-06, "loss": 0.1821, "step": 24166 }, { "epoch": 79.23606557377049, "grad_norm": 7.065726280212402, "learning_rate": 2.177289046474882e-06, "loss": 0.4706, "step": 24167 }, { "epoch": 79.23934426229508, "grad_norm": 4.889662742614746, "learning_rate": 2.1766276006695573e-06, "loss": 0.3294, "step": 24168 }, { "epoch": 79.24262295081967, "grad_norm": 5.607089042663574, "learning_rate": 2.1759662430808904e-06, "loss": 0.2957, "step": 24169 }, { "epoch": 79.24590163934427, "grad_norm": 6.178500652313232, "learning_rate": 2.175304973716339e-06, "loss": 0.4434, "step": 24170 }, { "epoch": 79.24918032786886, "grad_norm": 6.5831298828125, "learning_rate": 2.1746437925833575e-06, "loss": 0.5114, "step": 24171 }, { "epoch": 79.25245901639344, "grad_norm": 6.84239387512207, "learning_rate": 2.1739826996894063e-06, "loss": 0.2258, "step": 24172 }, { "epoch": 79.25573770491803, "grad_norm": 5.1321001052856445, "learning_rate": 2.1733216950419366e-06, "loss": 0.6543, "step": 24173 }, { "epoch": 79.25901639344262, "grad_norm": 4.491923809051514, "learning_rate": 2.1726607786484035e-06, "loss": 0.6125, "step": 24174 }, { "epoch": 79.26229508196721, "grad_norm": 9.022624015808105, "learning_rate": 2.171999950516255e-06, "loss": 0.437, "step": 24175 }, { "epoch": 79.2655737704918, "grad_norm": 3.828841209411621, "learning_rate": 2.1713392106529485e-06, "loss": 0.4109, "step": 24176 }, { "epoch": 79.26885245901639, "grad_norm": 5.244295120239258, "learning_rate": 2.170678559065933e-06, "loss": 0.3792, "step": 24177 }, { "epoch": 79.27213114754099, "grad_norm": 4.5858683586120605, "learning_rate": 2.1700179957626567e-06, "loss": 0.3797, "step": 24178 }, { "epoch": 79.27540983606558, "grad_norm": 5.905069351196289, "learning_rate": 2.1693575207505677e-06, "loss": 0.3376, "step": 24179 }, { "epoch": 79.27868852459017, "grad_norm": 5.067302703857422, "learning_rate": 2.1686971340371132e-06, "loss": 0.6039, "step": 24180 }, { "epoch": 79.28196721311475, "grad_norm": 4.8716254234313965, "learning_rate": 2.1680368356297433e-06, "loss": 0.5907, "step": 24181 }, { "epoch": 79.28524590163934, "grad_norm": 6.45953369140625, "learning_rate": 2.167376625535902e-06, "loss": 0.4521, "step": 24182 }, { "epoch": 79.28852459016393, "grad_norm": 5.11647891998291, "learning_rate": 2.166716503763032e-06, "loss": 0.3506, "step": 24183 }, { "epoch": 79.29180327868852, "grad_norm": 5.6984758377075195, "learning_rate": 2.166056470318576e-06, "loss": 0.2539, "step": 24184 }, { "epoch": 79.29508196721312, "grad_norm": 8.709494590759277, "learning_rate": 2.1653965252099808e-06, "loss": 0.3809, "step": 24185 }, { "epoch": 79.29836065573771, "grad_norm": 4.905977725982666, "learning_rate": 2.1647366684446858e-06, "loss": 0.4242, "step": 24186 }, { "epoch": 79.3016393442623, "grad_norm": 4.353912830352783, "learning_rate": 2.164076900030132e-06, "loss": 0.4566, "step": 24187 }, { "epoch": 79.30491803278689, "grad_norm": 5.064448833465576, "learning_rate": 2.163417219973755e-06, "loss": 0.5584, "step": 24188 }, { "epoch": 79.30819672131148, "grad_norm": 5.401305675506592, "learning_rate": 2.162757628283e-06, "loss": 0.3287, "step": 24189 }, { "epoch": 79.31147540983606, "grad_norm": 4.639204025268555, "learning_rate": 2.1620981249653016e-06, "loss": 0.3836, "step": 24190 }, { "epoch": 79.31475409836065, "grad_norm": 8.356159210205078, "learning_rate": 2.1614387100280954e-06, "loss": 0.4162, "step": 24191 }, { "epoch": 79.31803278688524, "grad_norm": 4.201958179473877, "learning_rate": 2.1607793834788184e-06, "loss": 0.3706, "step": 24192 }, { "epoch": 79.32131147540984, "grad_norm": 7.22594690322876, "learning_rate": 2.160120145324902e-06, "loss": 0.2532, "step": 24193 }, { "epoch": 79.32459016393443, "grad_norm": 4.992046356201172, "learning_rate": 2.1594609955737855e-06, "loss": 0.3602, "step": 24194 }, { "epoch": 79.32786885245902, "grad_norm": 5.888679504394531, "learning_rate": 2.158801934232897e-06, "loss": 0.2646, "step": 24195 }, { "epoch": 79.33114754098361, "grad_norm": 5.09929895401001, "learning_rate": 2.1581429613096706e-06, "loss": 0.5374, "step": 24196 }, { "epoch": 79.3344262295082, "grad_norm": 4.845461368560791, "learning_rate": 2.1574840768115333e-06, "loss": 0.3998, "step": 24197 }, { "epoch": 79.33770491803278, "grad_norm": 4.767894268035889, "learning_rate": 2.156825280745919e-06, "loss": 0.4345, "step": 24198 }, { "epoch": 79.34098360655737, "grad_norm": 4.644039630889893, "learning_rate": 2.1561665731202554e-06, "loss": 0.3167, "step": 24199 }, { "epoch": 79.34426229508196, "grad_norm": 4.725759983062744, "learning_rate": 2.1555079539419687e-06, "loss": 0.397, "step": 24200 }, { "epoch": 79.34754098360656, "grad_norm": 5.48956298828125, "learning_rate": 2.1548494232184836e-06, "loss": 0.5508, "step": 24201 }, { "epoch": 79.35081967213115, "grad_norm": 6.594020843505859, "learning_rate": 2.15419098095723e-06, "loss": 0.2742, "step": 24202 }, { "epoch": 79.35409836065574, "grad_norm": 5.123099327087402, "learning_rate": 2.153532627165632e-06, "loss": 0.4036, "step": 24203 }, { "epoch": 79.35737704918033, "grad_norm": 6.518645763397217, "learning_rate": 2.1528743618511116e-06, "loss": 0.5108, "step": 24204 }, { "epoch": 79.36065573770492, "grad_norm": 5.101457595825195, "learning_rate": 2.1522161850210908e-06, "loss": 0.3331, "step": 24205 }, { "epoch": 79.3639344262295, "grad_norm": 7.79312801361084, "learning_rate": 2.151558096682991e-06, "loss": 0.3298, "step": 24206 }, { "epoch": 79.3672131147541, "grad_norm": 7.539348602294922, "learning_rate": 2.150900096844235e-06, "loss": 0.2344, "step": 24207 }, { "epoch": 79.37049180327868, "grad_norm": 5.450221061706543, "learning_rate": 2.1502421855122425e-06, "loss": 0.3314, "step": 24208 }, { "epoch": 79.37377049180328, "grad_norm": 4.953159332275391, "learning_rate": 2.149584362694428e-06, "loss": 0.2132, "step": 24209 }, { "epoch": 79.37704918032787, "grad_norm": 6.194108963012695, "learning_rate": 2.1489266283982147e-06, "loss": 0.1714, "step": 24210 }, { "epoch": 79.38032786885246, "grad_norm": 5.710176467895508, "learning_rate": 2.1482689826310177e-06, "loss": 0.368, "step": 24211 }, { "epoch": 79.38360655737705, "grad_norm": 5.826442718505859, "learning_rate": 2.147611425400248e-06, "loss": 0.7363, "step": 24212 }, { "epoch": 79.38688524590164, "grad_norm": 4.933194160461426, "learning_rate": 2.146953956713327e-06, "loss": 0.4721, "step": 24213 }, { "epoch": 79.39016393442623, "grad_norm": 7.061183929443359, "learning_rate": 2.1462965765776646e-06, "loss": 0.3792, "step": 24214 }, { "epoch": 79.39344262295081, "grad_norm": 4.700014114379883, "learning_rate": 2.1456392850006725e-06, "loss": 0.2513, "step": 24215 }, { "epoch": 79.3967213114754, "grad_norm": 10.634115219116211, "learning_rate": 2.144982081989766e-06, "loss": 0.634, "step": 24216 }, { "epoch": 79.4, "grad_norm": 5.8918633460998535, "learning_rate": 2.1443249675523536e-06, "loss": 0.2817, "step": 24217 }, { "epoch": 79.4032786885246, "grad_norm": 5.114095211029053, "learning_rate": 2.143667941695845e-06, "loss": 0.2734, "step": 24218 }, { "epoch": 79.40655737704918, "grad_norm": 5.773759841918945, "learning_rate": 2.1430110044276464e-06, "loss": 0.4062, "step": 24219 }, { "epoch": 79.40983606557377, "grad_norm": 6.593662738800049, "learning_rate": 2.142354155755171e-06, "loss": 0.3228, "step": 24220 }, { "epoch": 79.41311475409836, "grad_norm": 4.181230068206787, "learning_rate": 2.1416973956858224e-06, "loss": 0.2515, "step": 24221 }, { "epoch": 79.41639344262295, "grad_norm": 4.967438697814941, "learning_rate": 2.141040724227006e-06, "loss": 0.3193, "step": 24222 }, { "epoch": 79.41967213114754, "grad_norm": 4.653805732727051, "learning_rate": 2.1403841413861236e-06, "loss": 0.382, "step": 24223 }, { "epoch": 79.42295081967212, "grad_norm": 4.277749061584473, "learning_rate": 2.1397276471705853e-06, "loss": 0.3238, "step": 24224 }, { "epoch": 79.42622950819673, "grad_norm": 9.342290878295898, "learning_rate": 2.13907124158779e-06, "loss": 0.4248, "step": 24225 }, { "epoch": 79.42950819672132, "grad_norm": 5.528289318084717, "learning_rate": 2.13841492464514e-06, "loss": 0.3964, "step": 24226 }, { "epoch": 79.4327868852459, "grad_norm": 6.57073450088501, "learning_rate": 2.137758696350033e-06, "loss": 0.2561, "step": 24227 }, { "epoch": 79.43606557377049, "grad_norm": 5.801220893859863, "learning_rate": 2.1371025567098735e-06, "loss": 0.3774, "step": 24228 }, { "epoch": 79.43934426229508, "grad_norm": 5.648161888122559, "learning_rate": 2.1364465057320584e-06, "loss": 0.3819, "step": 24229 }, { "epoch": 79.44262295081967, "grad_norm": 5.476319789886475, "learning_rate": 2.1357905434239858e-06, "loss": 0.5428, "step": 24230 }, { "epoch": 79.44590163934426, "grad_norm": 5.2657575607299805, "learning_rate": 2.1351346697930507e-06, "loss": 0.1559, "step": 24231 }, { "epoch": 79.44918032786886, "grad_norm": 5.397482395172119, "learning_rate": 2.134478884846647e-06, "loss": 0.413, "step": 24232 }, { "epoch": 79.45245901639345, "grad_norm": 4.517059326171875, "learning_rate": 2.1338231885921743e-06, "loss": 0.264, "step": 24233 }, { "epoch": 79.45573770491804, "grad_norm": 4.6627197265625, "learning_rate": 2.1331675810370244e-06, "loss": 0.3863, "step": 24234 }, { "epoch": 79.45901639344262, "grad_norm": 5.0117411613464355, "learning_rate": 2.1325120621885896e-06, "loss": 0.475, "step": 24235 }, { "epoch": 79.46229508196721, "grad_norm": 5.7011332511901855, "learning_rate": 2.131856632054259e-06, "loss": 0.5573, "step": 24236 }, { "epoch": 79.4655737704918, "grad_norm": 5.40597677230835, "learning_rate": 2.1312012906414282e-06, "loss": 0.2509, "step": 24237 }, { "epoch": 79.46885245901639, "grad_norm": 4.692899703979492, "learning_rate": 2.1305460379574837e-06, "loss": 0.501, "step": 24238 }, { "epoch": 79.47213114754098, "grad_norm": 4.760856628417969, "learning_rate": 2.1298908740098157e-06, "loss": 0.332, "step": 24239 }, { "epoch": 79.47540983606558, "grad_norm": 4.391260147094727, "learning_rate": 2.1292357988058078e-06, "loss": 0.2229, "step": 24240 }, { "epoch": 79.47868852459017, "grad_norm": 5.230528354644775, "learning_rate": 2.1285808123528516e-06, "loss": 0.3597, "step": 24241 }, { "epoch": 79.48196721311476, "grad_norm": 6.0137739181518555, "learning_rate": 2.127925914658332e-06, "loss": 0.329, "step": 24242 }, { "epoch": 79.48524590163935, "grad_norm": 5.993948936462402, "learning_rate": 2.1272711057296325e-06, "loss": 0.538, "step": 24243 }, { "epoch": 79.48852459016393, "grad_norm": 4.303866386413574, "learning_rate": 2.1266163855741373e-06, "loss": 0.436, "step": 24244 }, { "epoch": 79.49180327868852, "grad_norm": 4.487898349761963, "learning_rate": 2.125961754199225e-06, "loss": 0.2775, "step": 24245 }, { "epoch": 79.49508196721311, "grad_norm": 4.154772758483887, "learning_rate": 2.1253072116122843e-06, "loss": 0.1873, "step": 24246 }, { "epoch": 79.4983606557377, "grad_norm": 5.182215690612793, "learning_rate": 2.124652757820692e-06, "loss": 0.2682, "step": 24247 }, { "epoch": 79.5016393442623, "grad_norm": 5.97282600402832, "learning_rate": 2.1239983928318287e-06, "loss": 0.5893, "step": 24248 }, { "epoch": 79.50491803278689, "grad_norm": 5.52943754196167, "learning_rate": 2.1233441166530688e-06, "loss": 0.3539, "step": 24249 }, { "epoch": 79.50819672131148, "grad_norm": 5.88778829574585, "learning_rate": 2.1226899292917967e-06, "loss": 0.4955, "step": 24250 }, { "epoch": 79.51147540983607, "grad_norm": 6.883354663848877, "learning_rate": 2.122035830755387e-06, "loss": 0.4515, "step": 24251 }, { "epoch": 79.51475409836065, "grad_norm": 5.2579569816589355, "learning_rate": 2.121381821051214e-06, "loss": 0.3013, "step": 24252 }, { "epoch": 79.51803278688524, "grad_norm": 4.280639171600342, "learning_rate": 2.1207279001866487e-06, "loss": 0.3237, "step": 24253 }, { "epoch": 79.52131147540983, "grad_norm": 6.367513179779053, "learning_rate": 2.1200740681690722e-06, "loss": 0.4975, "step": 24254 }, { "epoch": 79.52459016393442, "grad_norm": 4.6973724365234375, "learning_rate": 2.119420325005854e-06, "loss": 0.4059, "step": 24255 }, { "epoch": 79.52786885245902, "grad_norm": 6.661221981048584, "learning_rate": 2.1187666707043654e-06, "loss": 0.3106, "step": 24256 }, { "epoch": 79.53114754098361, "grad_norm": 6.129277229309082, "learning_rate": 2.1181131052719773e-06, "loss": 0.4446, "step": 24257 }, { "epoch": 79.5344262295082, "grad_norm": 5.390417575836182, "learning_rate": 2.1174596287160555e-06, "loss": 0.3307, "step": 24258 }, { "epoch": 79.53770491803279, "grad_norm": 5.6570634841918945, "learning_rate": 2.116806241043975e-06, "loss": 0.2708, "step": 24259 }, { "epoch": 79.54098360655738, "grad_norm": 4.512884140014648, "learning_rate": 2.116152942263101e-06, "loss": 0.3495, "step": 24260 }, { "epoch": 79.54426229508196, "grad_norm": 5.009425163269043, "learning_rate": 2.115499732380797e-06, "loss": 0.2687, "step": 24261 }, { "epoch": 79.54754098360655, "grad_norm": 6.058065414428711, "learning_rate": 2.114846611404433e-06, "loss": 0.4929, "step": 24262 }, { "epoch": 79.55081967213114, "grad_norm": 4.46851110458374, "learning_rate": 2.1141935793413726e-06, "loss": 0.3071, "step": 24263 }, { "epoch": 79.55409836065574, "grad_norm": 4.241330146789551, "learning_rate": 2.1135406361989763e-06, "loss": 0.3107, "step": 24264 }, { "epoch": 79.55737704918033, "grad_norm": 8.259147644042969, "learning_rate": 2.112887781984613e-06, "loss": 0.5391, "step": 24265 }, { "epoch": 79.56065573770492, "grad_norm": 5.449702262878418, "learning_rate": 2.1122350167056384e-06, "loss": 0.3856, "step": 24266 }, { "epoch": 79.56393442622951, "grad_norm": 3.7964212894439697, "learning_rate": 2.111582340369417e-06, "loss": 0.2962, "step": 24267 }, { "epoch": 79.5672131147541, "grad_norm": 6.1192827224731445, "learning_rate": 2.1109297529833027e-06, "loss": 0.4361, "step": 24268 }, { "epoch": 79.57049180327868, "grad_norm": 4.394861698150635, "learning_rate": 2.110277254554661e-06, "loss": 0.266, "step": 24269 }, { "epoch": 79.57377049180327, "grad_norm": 7.172664165496826, "learning_rate": 2.1096248450908463e-06, "loss": 0.2495, "step": 24270 }, { "epoch": 79.57704918032788, "grad_norm": 7.653151988983154, "learning_rate": 2.108972524599213e-06, "loss": 0.3359, "step": 24271 }, { "epoch": 79.58032786885246, "grad_norm": 6.954565048217773, "learning_rate": 2.1083202930871216e-06, "loss": 0.3461, "step": 24272 }, { "epoch": 79.58360655737705, "grad_norm": 5.072975158691406, "learning_rate": 2.1076681505619247e-06, "loss": 0.4541, "step": 24273 }, { "epoch": 79.58688524590164, "grad_norm": 4.911689758300781, "learning_rate": 2.107016097030975e-06, "loss": 0.5388, "step": 24274 }, { "epoch": 79.59016393442623, "grad_norm": 7.58889627456665, "learning_rate": 2.106364132501623e-06, "loss": 0.2926, "step": 24275 }, { "epoch": 79.59344262295082, "grad_norm": 4.846299171447754, "learning_rate": 2.105712256981225e-06, "loss": 0.2953, "step": 24276 }, { "epoch": 79.5967213114754, "grad_norm": 5.3340325355529785, "learning_rate": 2.1050604704771294e-06, "loss": 0.5192, "step": 24277 }, { "epoch": 79.6, "grad_norm": 5.065394401550293, "learning_rate": 2.1044087729966856e-06, "loss": 0.3303, "step": 24278 }, { "epoch": 79.6032786885246, "grad_norm": 5.595554351806641, "learning_rate": 2.1037571645472397e-06, "loss": 0.3504, "step": 24279 }, { "epoch": 79.60655737704919, "grad_norm": 3.652911901473999, "learning_rate": 2.103105645136145e-06, "loss": 0.3485, "step": 24280 }, { "epoch": 79.60983606557377, "grad_norm": 4.904077053070068, "learning_rate": 2.102454214770745e-06, "loss": 0.2763, "step": 24281 }, { "epoch": 79.61311475409836, "grad_norm": 22.001930236816406, "learning_rate": 2.101802873458384e-06, "loss": 0.2048, "step": 24282 }, { "epoch": 79.61639344262295, "grad_norm": 4.642014026641846, "learning_rate": 2.101151621206409e-06, "loss": 0.4921, "step": 24283 }, { "epoch": 79.61967213114754, "grad_norm": 5.314323902130127, "learning_rate": 2.1005004580221578e-06, "loss": 0.3787, "step": 24284 }, { "epoch": 79.62295081967213, "grad_norm": 5.602720260620117, "learning_rate": 2.0998493839129807e-06, "loss": 0.3058, "step": 24285 }, { "epoch": 79.62622950819672, "grad_norm": 8.136082649230957, "learning_rate": 2.0991983988862163e-06, "loss": 0.3652, "step": 24286 }, { "epoch": 79.62950819672132, "grad_norm": 4.863259315490723, "learning_rate": 2.098547502949205e-06, "loss": 0.2288, "step": 24287 }, { "epoch": 79.6327868852459, "grad_norm": 4.809487819671631, "learning_rate": 2.0978966961092826e-06, "loss": 0.3958, "step": 24288 }, { "epoch": 79.6360655737705, "grad_norm": 5.257741451263428, "learning_rate": 2.097245978373794e-06, "loss": 0.3851, "step": 24289 }, { "epoch": 79.63934426229508, "grad_norm": 4.953583240509033, "learning_rate": 2.0965953497500747e-06, "loss": 0.4288, "step": 24290 }, { "epoch": 79.64262295081967, "grad_norm": 4.783683776855469, "learning_rate": 2.0959448102454594e-06, "loss": 0.1706, "step": 24291 }, { "epoch": 79.64590163934426, "grad_norm": 4.769247531890869, "learning_rate": 2.0952943598672847e-06, "loss": 0.4673, "step": 24292 }, { "epoch": 79.64918032786885, "grad_norm": 6.858333110809326, "learning_rate": 2.0946439986228817e-06, "loss": 0.2728, "step": 24293 }, { "epoch": 79.65245901639344, "grad_norm": 11.03083610534668, "learning_rate": 2.093993726519591e-06, "loss": 0.3974, "step": 24294 }, { "epoch": 79.65573770491804, "grad_norm": 3.5880844593048096, "learning_rate": 2.0933435435647398e-06, "loss": 0.302, "step": 24295 }, { "epoch": 79.65901639344263, "grad_norm": 7.119924068450928, "learning_rate": 2.0926934497656616e-06, "loss": 0.528, "step": 24296 }, { "epoch": 79.66229508196722, "grad_norm": 5.982209205627441, "learning_rate": 2.0920434451296845e-06, "loss": 0.3683, "step": 24297 }, { "epoch": 79.6655737704918, "grad_norm": 4.806993007659912, "learning_rate": 2.091393529664141e-06, "loss": 0.2769, "step": 24298 }, { "epoch": 79.66885245901639, "grad_norm": 8.431428909301758, "learning_rate": 2.0907437033763587e-06, "loss": 0.3009, "step": 24299 }, { "epoch": 79.67213114754098, "grad_norm": 6.149631977081299, "learning_rate": 2.0900939662736654e-06, "loss": 0.3808, "step": 24300 }, { "epoch": 79.67540983606557, "grad_norm": 4.945957183837891, "learning_rate": 2.089444318363384e-06, "loss": 0.298, "step": 24301 }, { "epoch": 79.67868852459016, "grad_norm": 8.525816917419434, "learning_rate": 2.0887947596528455e-06, "loss": 0.4419, "step": 24302 }, { "epoch": 79.68196721311476, "grad_norm": 6.182198524475098, "learning_rate": 2.0881452901493714e-06, "loss": 0.4827, "step": 24303 }, { "epoch": 79.68524590163935, "grad_norm": 4.993375301361084, "learning_rate": 2.0874959098602854e-06, "loss": 0.345, "step": 24304 }, { "epoch": 79.68852459016394, "grad_norm": 5.871800422668457, "learning_rate": 2.0868466187929105e-06, "loss": 0.5192, "step": 24305 }, { "epoch": 79.69180327868852, "grad_norm": 4.093712329864502, "learning_rate": 2.086197416954564e-06, "loss": 0.3267, "step": 24306 }, { "epoch": 79.69508196721311, "grad_norm": 5.355624675750732, "learning_rate": 2.0855483043525737e-06, "loss": 0.3861, "step": 24307 }, { "epoch": 79.6983606557377, "grad_norm": 3.806098461151123, "learning_rate": 2.0848992809942537e-06, "loss": 0.3727, "step": 24308 }, { "epoch": 79.70163934426229, "grad_norm": 5.08809757232666, "learning_rate": 2.084250346886926e-06, "loss": 0.437, "step": 24309 }, { "epoch": 79.70491803278688, "grad_norm": 5.4373860359191895, "learning_rate": 2.0836015020379018e-06, "loss": 0.344, "step": 24310 }, { "epoch": 79.70819672131148, "grad_norm": 16.408309936523438, "learning_rate": 2.082952746454504e-06, "loss": 0.4727, "step": 24311 }, { "epoch": 79.71147540983607, "grad_norm": 5.65498161315918, "learning_rate": 2.0823040801440464e-06, "loss": 0.3699, "step": 24312 }, { "epoch": 79.71475409836066, "grad_norm": 4.770843982696533, "learning_rate": 2.081655503113843e-06, "loss": 0.2789, "step": 24313 }, { "epoch": 79.71803278688525, "grad_norm": 4.1294660568237305, "learning_rate": 2.0810070153712035e-06, "loss": 0.7479, "step": 24314 }, { "epoch": 79.72131147540983, "grad_norm": 4.660440444946289, "learning_rate": 2.080358616923447e-06, "loss": 0.2948, "step": 24315 }, { "epoch": 79.72459016393442, "grad_norm": 4.9540863037109375, "learning_rate": 2.0797103077778803e-06, "loss": 0.4189, "step": 24316 }, { "epoch": 79.72786885245901, "grad_norm": 6.924975872039795, "learning_rate": 2.0790620879418133e-06, "loss": 0.3094, "step": 24317 }, { "epoch": 79.73114754098361, "grad_norm": 6.817123889923096, "learning_rate": 2.0784139574225593e-06, "loss": 0.2676, "step": 24318 }, { "epoch": 79.7344262295082, "grad_norm": 3.8318581581115723, "learning_rate": 2.0777659162274244e-06, "loss": 0.3024, "step": 24319 }, { "epoch": 79.73770491803279, "grad_norm": 7.037481784820557, "learning_rate": 2.077117964363713e-06, "loss": 0.3017, "step": 24320 }, { "epoch": 79.74098360655738, "grad_norm": 5.59904670715332, "learning_rate": 2.076470101838737e-06, "loss": 0.3178, "step": 24321 }, { "epoch": 79.74426229508197, "grad_norm": 4.599860191345215, "learning_rate": 2.075822328659799e-06, "loss": 0.325, "step": 24322 }, { "epoch": 79.74754098360656, "grad_norm": 5.573529243469238, "learning_rate": 2.0751746448342004e-06, "loss": 0.4863, "step": 24323 }, { "epoch": 79.75081967213114, "grad_norm": 5.329012870788574, "learning_rate": 2.0745270503692503e-06, "loss": 0.3296, "step": 24324 }, { "epoch": 79.75409836065573, "grad_norm": 4.675894260406494, "learning_rate": 2.0738795452722482e-06, "loss": 0.4259, "step": 24325 }, { "epoch": 79.75737704918033, "grad_norm": 4.795365333557129, "learning_rate": 2.0732321295504955e-06, "loss": 0.4431, "step": 24326 }, { "epoch": 79.76065573770492, "grad_norm": 4.531769275665283, "learning_rate": 2.0725848032112893e-06, "loss": 0.5201, "step": 24327 }, { "epoch": 79.76393442622951, "grad_norm": 28.5762996673584, "learning_rate": 2.0719375662619345e-06, "loss": 0.369, "step": 24328 }, { "epoch": 79.7672131147541, "grad_norm": 3.8340914249420166, "learning_rate": 2.071290418709727e-06, "loss": 0.4054, "step": 24329 }, { "epoch": 79.77049180327869, "grad_norm": 4.20127534866333, "learning_rate": 2.0706433605619635e-06, "loss": 0.3386, "step": 24330 }, { "epoch": 79.77377049180328, "grad_norm": 4.58323335647583, "learning_rate": 2.069996391825941e-06, "loss": 0.4053, "step": 24331 }, { "epoch": 79.77704918032786, "grad_norm": 4.384117603302002, "learning_rate": 2.0693495125089515e-06, "loss": 0.3773, "step": 24332 }, { "epoch": 79.78032786885245, "grad_norm": 4.747466087341309, "learning_rate": 2.0687027226182944e-06, "loss": 0.2183, "step": 24333 }, { "epoch": 79.78360655737706, "grad_norm": 4.187624454498291, "learning_rate": 2.068056022161261e-06, "loss": 0.2984, "step": 24334 }, { "epoch": 79.78688524590164, "grad_norm": 15.430399894714355, "learning_rate": 2.0674094111451436e-06, "loss": 0.4776, "step": 24335 }, { "epoch": 79.79016393442623, "grad_norm": 7.875360012054443, "learning_rate": 2.0667628895772295e-06, "loss": 0.4345, "step": 24336 }, { "epoch": 79.79344262295082, "grad_norm": 4.610375881195068, "learning_rate": 2.066116457464815e-06, "loss": 0.387, "step": 24337 }, { "epoch": 79.79672131147541, "grad_norm": 4.634967803955078, "learning_rate": 2.065470114815187e-06, "loss": 0.4068, "step": 24338 }, { "epoch": 79.8, "grad_norm": 5.553009033203125, "learning_rate": 2.064823861635633e-06, "loss": 0.3476, "step": 24339 }, { "epoch": 79.80327868852459, "grad_norm": 5.129706859588623, "learning_rate": 2.064177697933437e-06, "loss": 0.4774, "step": 24340 }, { "epoch": 79.80655737704917, "grad_norm": 5.317697525024414, "learning_rate": 2.063531623715893e-06, "loss": 0.243, "step": 24341 }, { "epoch": 79.80983606557378, "grad_norm": 5.601022720336914, "learning_rate": 2.0628856389902806e-06, "loss": 0.5331, "step": 24342 }, { "epoch": 79.81311475409836, "grad_norm": 11.593999862670898, "learning_rate": 2.0622397437638854e-06, "loss": 0.4669, "step": 24343 }, { "epoch": 79.81639344262295, "grad_norm": 4.688983917236328, "learning_rate": 2.0615939380439908e-06, "loss": 0.363, "step": 24344 }, { "epoch": 79.81967213114754, "grad_norm": 5.235318183898926, "learning_rate": 2.060948221837875e-06, "loss": 0.4557, "step": 24345 }, { "epoch": 79.82295081967213, "grad_norm": 4.727092742919922, "learning_rate": 2.0603025951528257e-06, "loss": 0.378, "step": 24346 }, { "epoch": 79.82622950819672, "grad_norm": 5.339601039886475, "learning_rate": 2.0596570579961196e-06, "loss": 0.2821, "step": 24347 }, { "epoch": 79.8295081967213, "grad_norm": 3.8934450149536133, "learning_rate": 2.0590116103750366e-06, "loss": 0.3876, "step": 24348 }, { "epoch": 79.8327868852459, "grad_norm": 5.208271503448486, "learning_rate": 2.0583662522968508e-06, "loss": 0.3349, "step": 24349 }, { "epoch": 79.8360655737705, "grad_norm": 5.556578159332275, "learning_rate": 2.057720983768846e-06, "loss": 0.4008, "step": 24350 }, { "epoch": 79.83934426229509, "grad_norm": 5.1251959800720215, "learning_rate": 2.0570758047982943e-06, "loss": 0.4516, "step": 24351 }, { "epoch": 79.84262295081967, "grad_norm": 6.327174663543701, "learning_rate": 2.0564307153924723e-06, "loss": 0.3576, "step": 24352 }, { "epoch": 79.84590163934426, "grad_norm": 6.046069622039795, "learning_rate": 2.0557857155586502e-06, "loss": 0.4818, "step": 24353 }, { "epoch": 79.84918032786885, "grad_norm": 4.83763313293457, "learning_rate": 2.0551408053041066e-06, "loss": 0.3794, "step": 24354 }, { "epoch": 79.85245901639344, "grad_norm": 6.557338714599609, "learning_rate": 2.0544959846361114e-06, "loss": 0.2719, "step": 24355 }, { "epoch": 79.85573770491803, "grad_norm": 4.1509199142456055, "learning_rate": 2.053851253561935e-06, "loss": 0.4255, "step": 24356 }, { "epoch": 79.85901639344263, "grad_norm": 4.323642730712891, "learning_rate": 2.0532066120888473e-06, "loss": 0.3877, "step": 24357 }, { "epoch": 79.86229508196722, "grad_norm": 4.935496807098389, "learning_rate": 2.0525620602241157e-06, "loss": 0.2575, "step": 24358 }, { "epoch": 79.8655737704918, "grad_norm": 3.7032530307769775, "learning_rate": 2.0519175979750116e-06, "loss": 0.2941, "step": 24359 }, { "epoch": 79.8688524590164, "grad_norm": 4.217994689941406, "learning_rate": 2.051273225348802e-06, "loss": 0.3991, "step": 24360 }, { "epoch": 79.87213114754098, "grad_norm": 7.945991039276123, "learning_rate": 2.0506289423527503e-06, "loss": 0.4336, "step": 24361 }, { "epoch": 79.87540983606557, "grad_norm": 4.535737991333008, "learning_rate": 2.0499847489941207e-06, "loss": 0.2261, "step": 24362 }, { "epoch": 79.87868852459016, "grad_norm": 5.9957098960876465, "learning_rate": 2.049340645280181e-06, "loss": 0.6356, "step": 24363 }, { "epoch": 79.88196721311475, "grad_norm": 9.195682525634766, "learning_rate": 2.048696631218192e-06, "loss": 0.4312, "step": 24364 }, { "epoch": 79.88524590163935, "grad_norm": 5.605373382568359, "learning_rate": 2.0480527068154167e-06, "loss": 0.6721, "step": 24365 }, { "epoch": 79.88852459016394, "grad_norm": 5.5453009605407715, "learning_rate": 2.0474088720791117e-06, "loss": 0.5555, "step": 24366 }, { "epoch": 79.89180327868853, "grad_norm": 5.37545108795166, "learning_rate": 2.0467651270165433e-06, "loss": 0.3174, "step": 24367 }, { "epoch": 79.89508196721312, "grad_norm": 5.279775142669678, "learning_rate": 2.0461214716349675e-06, "loss": 0.5879, "step": 24368 }, { "epoch": 79.8983606557377, "grad_norm": 15.412894248962402, "learning_rate": 2.045477905941642e-06, "loss": 0.3021, "step": 24369 }, { "epoch": 79.90163934426229, "grad_norm": 9.414788246154785, "learning_rate": 2.0448344299438206e-06, "loss": 0.4025, "step": 24370 }, { "epoch": 79.90491803278688, "grad_norm": 5.534676551818848, "learning_rate": 2.0441910436487646e-06, "loss": 0.3684, "step": 24371 }, { "epoch": 79.90819672131147, "grad_norm": 4.4094767570495605, "learning_rate": 2.043547747063728e-06, "loss": 0.4886, "step": 24372 }, { "epoch": 79.91147540983607, "grad_norm": 6.712964057922363, "learning_rate": 2.042904540195959e-06, "loss": 0.3748, "step": 24373 }, { "epoch": 79.91475409836066, "grad_norm": 4.441682815551758, "learning_rate": 2.0422614230527183e-06, "loss": 0.5873, "step": 24374 }, { "epoch": 79.91803278688525, "grad_norm": 5.483354568481445, "learning_rate": 2.041618395641254e-06, "loss": 0.6258, "step": 24375 }, { "epoch": 79.92131147540984, "grad_norm": 4.953818321228027, "learning_rate": 2.0409754579688137e-06, "loss": 0.4417, "step": 24376 }, { "epoch": 79.92459016393443, "grad_norm": 4.164646148681641, "learning_rate": 2.0403326100426533e-06, "loss": 0.2679, "step": 24377 }, { "epoch": 79.92786885245901, "grad_norm": 4.409585475921631, "learning_rate": 2.0396898518700183e-06, "loss": 0.4321, "step": 24378 }, { "epoch": 79.9311475409836, "grad_norm": 6.320054531097412, "learning_rate": 2.039047183458155e-06, "loss": 0.4838, "step": 24379 }, { "epoch": 79.93442622950819, "grad_norm": 4.207515716552734, "learning_rate": 2.0384046048143146e-06, "loss": 0.4867, "step": 24380 }, { "epoch": 79.9377049180328, "grad_norm": 4.645687103271484, "learning_rate": 2.0377621159457395e-06, "loss": 0.1992, "step": 24381 }, { "epoch": 79.94098360655738, "grad_norm": 5.441115856170654, "learning_rate": 2.0371197168596757e-06, "loss": 0.4942, "step": 24382 }, { "epoch": 79.94426229508197, "grad_norm": 4.532219886779785, "learning_rate": 2.0364774075633665e-06, "loss": 0.3655, "step": 24383 }, { "epoch": 79.94754098360656, "grad_norm": 5.674887180328369, "learning_rate": 2.0358351880640516e-06, "loss": 0.3546, "step": 24384 }, { "epoch": 79.95081967213115, "grad_norm": 7.114583492279053, "learning_rate": 2.035193058368978e-06, "loss": 0.4784, "step": 24385 }, { "epoch": 79.95409836065573, "grad_norm": 5.901174068450928, "learning_rate": 2.0345510184853846e-06, "loss": 0.474, "step": 24386 }, { "epoch": 79.95737704918032, "grad_norm": 4.391219139099121, "learning_rate": 2.0339090684205108e-06, "loss": 0.3907, "step": 24387 }, { "epoch": 79.96065573770491, "grad_norm": 6.936789035797119, "learning_rate": 2.0332672081815917e-06, "loss": 0.6542, "step": 24388 }, { "epoch": 79.96393442622951, "grad_norm": 6.047386646270752, "learning_rate": 2.0326254377758704e-06, "loss": 0.3482, "step": 24389 }, { "epoch": 79.9672131147541, "grad_norm": 5.317279815673828, "learning_rate": 2.0319837572105817e-06, "loss": 0.47, "step": 24390 }, { "epoch": 79.97049180327869, "grad_norm": 6.945305824279785, "learning_rate": 2.031342166492961e-06, "loss": 0.2886, "step": 24391 }, { "epoch": 79.97377049180328, "grad_norm": 5.327325344085693, "learning_rate": 2.0307006656302396e-06, "loss": 0.2804, "step": 24392 }, { "epoch": 79.97704918032787, "grad_norm": 4.5051093101501465, "learning_rate": 2.0300592546296572e-06, "loss": 0.4648, "step": 24393 }, { "epoch": 79.98032786885246, "grad_norm": 6.183778285980225, "learning_rate": 2.029417933498443e-06, "loss": 0.3731, "step": 24394 }, { "epoch": 79.98360655737704, "grad_norm": 7.154295444488525, "learning_rate": 2.02877670224383e-06, "loss": 0.3139, "step": 24395 }, { "epoch": 79.98688524590163, "grad_norm": 5.2367401123046875, "learning_rate": 2.028135560873047e-06, "loss": 0.4281, "step": 24396 }, { "epoch": 79.99016393442623, "grad_norm": 6.242121696472168, "learning_rate": 2.0274945093933205e-06, "loss": 0.2578, "step": 24397 }, { "epoch": 79.99344262295082, "grad_norm": 5.472814083099365, "learning_rate": 2.0268535478118868e-06, "loss": 0.3289, "step": 24398 }, { "epoch": 79.99672131147541, "grad_norm": 4.474648475646973, "learning_rate": 2.026212676135969e-06, "loss": 0.2683, "step": 24399 }, { "epoch": 80.0, "grad_norm": 5.129579544067383, "learning_rate": 2.025571894372794e-06, "loss": 0.402, "step": 24400 }, { "epoch": 80.00327868852459, "grad_norm": 19.99497413635254, "learning_rate": 2.0249312025295842e-06, "loss": 0.442, "step": 24401 }, { "epoch": 80.00655737704918, "grad_norm": 6.505887985229492, "learning_rate": 2.02429060061357e-06, "loss": 0.4388, "step": 24402 }, { "epoch": 80.00983606557377, "grad_norm": 3.9088683128356934, "learning_rate": 2.023650088631972e-06, "loss": 0.3366, "step": 24403 }, { "epoch": 80.01311475409837, "grad_norm": 6.5207085609436035, "learning_rate": 2.0230096665920117e-06, "loss": 0.3668, "step": 24404 }, { "epoch": 80.01639344262296, "grad_norm": 7.858559608459473, "learning_rate": 2.0223693345009097e-06, "loss": 0.2702, "step": 24405 }, { "epoch": 80.01967213114754, "grad_norm": 4.684046268463135, "learning_rate": 2.0217290923658904e-06, "loss": 0.3221, "step": 24406 }, { "epoch": 80.02295081967213, "grad_norm": 4.553437232971191, "learning_rate": 2.0210889401941714e-06, "loss": 0.2975, "step": 24407 }, { "epoch": 80.02622950819672, "grad_norm": 7.651905536651611, "learning_rate": 2.02044887799297e-06, "loss": 0.3004, "step": 24408 }, { "epoch": 80.02950819672131, "grad_norm": 5.565485000610352, "learning_rate": 2.0198089057695046e-06, "loss": 0.4954, "step": 24409 }, { "epoch": 80.0327868852459, "grad_norm": 5.333937168121338, "learning_rate": 2.019169023530988e-06, "loss": 0.3143, "step": 24410 }, { "epoch": 80.03606557377049, "grad_norm": 7.105400085449219, "learning_rate": 2.0185292312846417e-06, "loss": 0.6014, "step": 24411 }, { "epoch": 80.03934426229509, "grad_norm": 3.794275999069214, "learning_rate": 2.0178895290376767e-06, "loss": 0.2141, "step": 24412 }, { "epoch": 80.04262295081968, "grad_norm": 5.243277072906494, "learning_rate": 2.0172499167973068e-06, "loss": 0.326, "step": 24413 }, { "epoch": 80.04590163934427, "grad_norm": 6.701414585113525, "learning_rate": 2.0166103945707415e-06, "loss": 0.4444, "step": 24414 }, { "epoch": 80.04918032786885, "grad_norm": 4.8849101066589355, "learning_rate": 2.0159709623651967e-06, "loss": 0.2667, "step": 24415 }, { "epoch": 80.05245901639344, "grad_norm": 7.037072658538818, "learning_rate": 2.0153316201878816e-06, "loss": 0.5175, "step": 24416 }, { "epoch": 80.05573770491803, "grad_norm": 5.045742511749268, "learning_rate": 2.014692368046003e-06, "loss": 0.568, "step": 24417 }, { "epoch": 80.05901639344262, "grad_norm": 6.853798866271973, "learning_rate": 2.014053205946769e-06, "loss": 0.2403, "step": 24418 }, { "epoch": 80.0622950819672, "grad_norm": 5.064899921417236, "learning_rate": 2.013414133897391e-06, "loss": 0.3316, "step": 24419 }, { "epoch": 80.06557377049181, "grad_norm": 4.764613151550293, "learning_rate": 2.012775151905072e-06, "loss": 0.6329, "step": 24420 }, { "epoch": 80.0688524590164, "grad_norm": 7.319243431091309, "learning_rate": 2.0121362599770187e-06, "loss": 0.5712, "step": 24421 }, { "epoch": 80.07213114754099, "grad_norm": 6.182982444763184, "learning_rate": 2.0114974581204303e-06, "loss": 0.5598, "step": 24422 }, { "epoch": 80.07540983606557, "grad_norm": 5.520605087280273, "learning_rate": 2.0108587463425187e-06, "loss": 0.5138, "step": 24423 }, { "epoch": 80.07868852459016, "grad_norm": 4.339900970458984, "learning_rate": 2.0102201246504806e-06, "loss": 0.2569, "step": 24424 }, { "epoch": 80.08196721311475, "grad_norm": 4.938859939575195, "learning_rate": 2.009581593051514e-06, "loss": 0.4623, "step": 24425 }, { "epoch": 80.08524590163934, "grad_norm": 5.37555456161499, "learning_rate": 2.008943151552827e-06, "loss": 0.5705, "step": 24426 }, { "epoch": 80.08852459016393, "grad_norm": 5.209061622619629, "learning_rate": 2.0083048001616134e-06, "loss": 0.3256, "step": 24427 }, { "epoch": 80.09180327868853, "grad_norm": 4.0987091064453125, "learning_rate": 2.0076665388850734e-06, "loss": 0.149, "step": 24428 }, { "epoch": 80.09508196721312, "grad_norm": 5.254971504211426, "learning_rate": 2.0070283677304004e-06, "loss": 0.3457, "step": 24429 }, { "epoch": 80.09836065573771, "grad_norm": 9.374889373779297, "learning_rate": 2.006390286704796e-06, "loss": 0.3802, "step": 24430 }, { "epoch": 80.1016393442623, "grad_norm": 5.259250164031982, "learning_rate": 2.005752295815452e-06, "loss": 0.3797, "step": 24431 }, { "epoch": 80.10491803278688, "grad_norm": 9.545734405517578, "learning_rate": 2.0051143950695595e-06, "loss": 0.4365, "step": 24432 }, { "epoch": 80.10819672131147, "grad_norm": 12.316039085388184, "learning_rate": 2.0044765844743175e-06, "loss": 0.5445, "step": 24433 }, { "epoch": 80.11147540983606, "grad_norm": 5.553407669067383, "learning_rate": 2.003838864036917e-06, "loss": 0.3993, "step": 24434 }, { "epoch": 80.11475409836065, "grad_norm": 4.648059844970703, "learning_rate": 2.0032012337645458e-06, "loss": 0.5236, "step": 24435 }, { "epoch": 80.11803278688525, "grad_norm": 5.68349027633667, "learning_rate": 2.0025636936643923e-06, "loss": 0.4092, "step": 24436 }, { "epoch": 80.12131147540984, "grad_norm": 5.1444830894470215, "learning_rate": 2.0019262437436516e-06, "loss": 0.322, "step": 24437 }, { "epoch": 80.12459016393443, "grad_norm": 5.553797245025635, "learning_rate": 2.001288884009509e-06, "loss": 0.0973, "step": 24438 }, { "epoch": 80.12786885245902, "grad_norm": 4.321649551391602, "learning_rate": 2.00065161446915e-06, "loss": 0.5702, "step": 24439 }, { "epoch": 80.1311475409836, "grad_norm": 4.8526482582092285, "learning_rate": 2.000014435129759e-06, "loss": 0.2744, "step": 24440 }, { "epoch": 80.1344262295082, "grad_norm": 4.499276638031006, "learning_rate": 1.9993773459985254e-06, "loss": 0.2208, "step": 24441 }, { "epoch": 80.13770491803278, "grad_norm": 5.054052829742432, "learning_rate": 1.9987403470826306e-06, "loss": 0.2842, "step": 24442 }, { "epoch": 80.14098360655737, "grad_norm": 6.7967610359191895, "learning_rate": 1.998103438389258e-06, "loss": 0.4654, "step": 24443 }, { "epoch": 80.14426229508197, "grad_norm": 11.338094711303711, "learning_rate": 1.9974666199255864e-06, "loss": 0.4453, "step": 24444 }, { "epoch": 80.14754098360656, "grad_norm": 4.33989953994751, "learning_rate": 1.996829891698803e-06, "loss": 0.3676, "step": 24445 }, { "epoch": 80.15081967213115, "grad_norm": 7.611995220184326, "learning_rate": 1.996193253716082e-06, "loss": 0.2731, "step": 24446 }, { "epoch": 80.15409836065574, "grad_norm": 4.021815299987793, "learning_rate": 1.9955567059846046e-06, "loss": 0.3696, "step": 24447 }, { "epoch": 80.15737704918033, "grad_norm": 4.734577655792236, "learning_rate": 1.994920248511548e-06, "loss": 0.2786, "step": 24448 }, { "epoch": 80.16065573770491, "grad_norm": 4.080777645111084, "learning_rate": 1.9942838813040857e-06, "loss": 0.4256, "step": 24449 }, { "epoch": 80.1639344262295, "grad_norm": 16.832456588745117, "learning_rate": 1.9936476043693997e-06, "loss": 0.2168, "step": 24450 }, { "epoch": 80.1672131147541, "grad_norm": 5.010598182678223, "learning_rate": 1.993011417714661e-06, "loss": 0.4823, "step": 24451 }, { "epoch": 80.1704918032787, "grad_norm": 4.568327903747559, "learning_rate": 1.992375321347044e-06, "loss": 0.2271, "step": 24452 }, { "epoch": 80.17377049180328, "grad_norm": 17.38738441467285, "learning_rate": 1.9917393152737186e-06, "loss": 0.433, "step": 24453 }, { "epoch": 80.17704918032787, "grad_norm": 3.807884454727173, "learning_rate": 1.9911033995018615e-06, "loss": 0.2104, "step": 24454 }, { "epoch": 80.18032786885246, "grad_norm": 5.104324817657471, "learning_rate": 1.9904675740386405e-06, "loss": 0.4384, "step": 24455 }, { "epoch": 80.18360655737705, "grad_norm": 4.625618934631348, "learning_rate": 1.9898318388912265e-06, "loss": 0.2846, "step": 24456 }, { "epoch": 80.18688524590164, "grad_norm": 5.608620643615723, "learning_rate": 1.9891961940667825e-06, "loss": 0.3502, "step": 24457 }, { "epoch": 80.19016393442622, "grad_norm": 6.1503005027771, "learning_rate": 1.9885606395724845e-06, "loss": 0.336, "step": 24458 }, { "epoch": 80.19344262295083, "grad_norm": 5.267137050628662, "learning_rate": 1.987925175415495e-06, "loss": 0.4281, "step": 24459 }, { "epoch": 80.19672131147541, "grad_norm": 5.343198299407959, "learning_rate": 1.9872898016029796e-06, "loss": 0.3658, "step": 24460 }, { "epoch": 80.2, "grad_norm": 5.771115303039551, "learning_rate": 1.9866545181421016e-06, "loss": 0.2922, "step": 24461 }, { "epoch": 80.20327868852459, "grad_norm": 5.312379837036133, "learning_rate": 1.986019325040024e-06, "loss": 0.4222, "step": 24462 }, { "epoch": 80.20655737704918, "grad_norm": 4.749899387359619, "learning_rate": 1.9853842223039144e-06, "loss": 0.3563, "step": 24463 }, { "epoch": 80.20983606557377, "grad_norm": 5.637993335723877, "learning_rate": 1.9847492099409294e-06, "loss": 0.4765, "step": 24464 }, { "epoch": 80.21311475409836, "grad_norm": 7.455258846282959, "learning_rate": 1.984114287958232e-06, "loss": 0.5503, "step": 24465 }, { "epoch": 80.21639344262294, "grad_norm": 8.532328605651855, "learning_rate": 1.9834794563629767e-06, "loss": 0.4106, "step": 24466 }, { "epoch": 80.21967213114755, "grad_norm": 11.457383155822754, "learning_rate": 1.9828447151623288e-06, "loss": 0.3425, "step": 24467 }, { "epoch": 80.22295081967214, "grad_norm": 4.597899436950684, "learning_rate": 1.9822100643634436e-06, "loss": 0.3875, "step": 24468 }, { "epoch": 80.22622950819672, "grad_norm": 5.43859338760376, "learning_rate": 1.981575503973474e-06, "loss": 0.4534, "step": 24469 }, { "epoch": 80.22950819672131, "grad_norm": 6.301974296569824, "learning_rate": 1.9809410339995773e-06, "loss": 0.2645, "step": 24470 }, { "epoch": 80.2327868852459, "grad_norm": 4.89599084854126, "learning_rate": 1.980306654448909e-06, "loss": 0.31, "step": 24471 }, { "epoch": 80.23606557377049, "grad_norm": 4.7145233154296875, "learning_rate": 1.9796723653286233e-06, "loss": 0.5645, "step": 24472 }, { "epoch": 80.23934426229508, "grad_norm": 3.730905771255493, "learning_rate": 1.979038166645869e-06, "loss": 0.3862, "step": 24473 }, { "epoch": 80.24262295081967, "grad_norm": 5.062283992767334, "learning_rate": 1.9784040584078003e-06, "loss": 0.3672, "step": 24474 }, { "epoch": 80.24590163934427, "grad_norm": 6.160156726837158, "learning_rate": 1.9777700406215626e-06, "loss": 0.392, "step": 24475 }, { "epoch": 80.24918032786886, "grad_norm": 4.898171901702881, "learning_rate": 1.9771361132943123e-06, "loss": 0.5391, "step": 24476 }, { "epoch": 80.25245901639344, "grad_norm": 4.569100379943848, "learning_rate": 1.9765022764331932e-06, "loss": 0.3578, "step": 24477 }, { "epoch": 80.25573770491803, "grad_norm": 5.381184101104736, "learning_rate": 1.975868530045352e-06, "loss": 0.4246, "step": 24478 }, { "epoch": 80.25901639344262, "grad_norm": 6.462708950042725, "learning_rate": 1.9752348741379366e-06, "loss": 0.3094, "step": 24479 }, { "epoch": 80.26229508196721, "grad_norm": 6.387297630310059, "learning_rate": 1.9746013087180936e-06, "loss": 0.5998, "step": 24480 }, { "epoch": 80.2655737704918, "grad_norm": 6.051181793212891, "learning_rate": 1.9739678337929615e-06, "loss": 0.5481, "step": 24481 }, { "epoch": 80.26885245901639, "grad_norm": 5.900330066680908, "learning_rate": 1.9733344493696902e-06, "loss": 0.3345, "step": 24482 }, { "epoch": 80.27213114754099, "grad_norm": 4.775456428527832, "learning_rate": 1.9727011554554177e-06, "loss": 0.3666, "step": 24483 }, { "epoch": 80.27540983606558, "grad_norm": 4.109919548034668, "learning_rate": 1.9720679520572848e-06, "loss": 0.3251, "step": 24484 }, { "epoch": 80.27868852459017, "grad_norm": 5.6192803382873535, "learning_rate": 1.9714348391824345e-06, "loss": 0.4294, "step": 24485 }, { "epoch": 80.28196721311475, "grad_norm": 5.139535903930664, "learning_rate": 1.970801816838004e-06, "loss": 0.2594, "step": 24486 }, { "epoch": 80.28524590163934, "grad_norm": 8.077205657958984, "learning_rate": 1.970168885031131e-06, "loss": 0.729, "step": 24487 }, { "epoch": 80.28852459016393, "grad_norm": 5.046250820159912, "learning_rate": 1.9695360437689504e-06, "loss": 0.3947, "step": 24488 }, { "epoch": 80.29180327868852, "grad_norm": 5.34006404876709, "learning_rate": 1.968903293058604e-06, "loss": 0.5417, "step": 24489 }, { "epoch": 80.29508196721312, "grad_norm": 6.584179878234863, "learning_rate": 1.968270632907222e-06, "loss": 0.4122, "step": 24490 }, { "epoch": 80.29836065573771, "grad_norm": 5.308058261871338, "learning_rate": 1.9676380633219396e-06, "loss": 0.3605, "step": 24491 }, { "epoch": 80.3016393442623, "grad_norm": 5.14036226272583, "learning_rate": 1.9670055843098877e-06, "loss": 0.241, "step": 24492 }, { "epoch": 80.30491803278689, "grad_norm": 4.736240386962891, "learning_rate": 1.966373195878202e-06, "loss": 0.2505, "step": 24493 }, { "epoch": 80.30819672131148, "grad_norm": 4.855616569519043, "learning_rate": 1.965740898034012e-06, "loss": 0.4001, "step": 24494 }, { "epoch": 80.31147540983606, "grad_norm": 6.907907009124756, "learning_rate": 1.965108690784446e-06, "loss": 0.2436, "step": 24495 }, { "epoch": 80.31475409836065, "grad_norm": 4.970593452453613, "learning_rate": 1.9644765741366323e-06, "loss": 0.3372, "step": 24496 }, { "epoch": 80.31803278688524, "grad_norm": 5.423051357269287, "learning_rate": 1.963844548097702e-06, "loss": 0.4469, "step": 24497 }, { "epoch": 80.32131147540984, "grad_norm": 4.210341930389404, "learning_rate": 1.9632126126747796e-06, "loss": 0.4287, "step": 24498 }, { "epoch": 80.32459016393443, "grad_norm": 4.605563640594482, "learning_rate": 1.962580767874992e-06, "loss": 0.407, "step": 24499 }, { "epoch": 80.32786885245902, "grad_norm": 4.693596839904785, "learning_rate": 1.9619490137054633e-06, "loss": 0.2461, "step": 24500 }, { "epoch": 80.33114754098361, "grad_norm": 4.616114616394043, "learning_rate": 1.961317350173313e-06, "loss": 0.231, "step": 24501 }, { "epoch": 80.3344262295082, "grad_norm": 6.580508708953857, "learning_rate": 1.9606857772856713e-06, "loss": 0.5423, "step": 24502 }, { "epoch": 80.33770491803278, "grad_norm": 4.452738285064697, "learning_rate": 1.960054295049656e-06, "loss": 0.3568, "step": 24503 }, { "epoch": 80.34098360655737, "grad_norm": 5.801731586456299, "learning_rate": 1.9594229034723877e-06, "loss": 0.3086, "step": 24504 }, { "epoch": 80.34426229508196, "grad_norm": 5.430866241455078, "learning_rate": 1.958791602560983e-06, "loss": 0.3277, "step": 24505 }, { "epoch": 80.34754098360656, "grad_norm": 6.95399808883667, "learning_rate": 1.958160392322568e-06, "loss": 0.3402, "step": 24506 }, { "epoch": 80.35081967213115, "grad_norm": 6.718459129333496, "learning_rate": 1.9575292727642547e-06, "loss": 0.4791, "step": 24507 }, { "epoch": 80.35409836065574, "grad_norm": 5.485462665557861, "learning_rate": 1.9568982438931614e-06, "loss": 0.3502, "step": 24508 }, { "epoch": 80.35737704918033, "grad_norm": 5.614391803741455, "learning_rate": 1.9562673057164007e-06, "loss": 0.3593, "step": 24509 }, { "epoch": 80.36065573770492, "grad_norm": 4.765326023101807, "learning_rate": 1.9556364582410925e-06, "loss": 0.241, "step": 24510 }, { "epoch": 80.3639344262295, "grad_norm": 3.7110657691955566, "learning_rate": 1.9550057014743462e-06, "loss": 0.345, "step": 24511 }, { "epoch": 80.3672131147541, "grad_norm": 4.844849109649658, "learning_rate": 1.954375035423276e-06, "loss": 0.4088, "step": 24512 }, { "epoch": 80.37049180327868, "grad_norm": 4.64362096786499, "learning_rate": 1.953744460094993e-06, "loss": 0.4084, "step": 24513 }, { "epoch": 80.37377049180328, "grad_norm": 7.521331310272217, "learning_rate": 1.953113975496603e-06, "loss": 0.2633, "step": 24514 }, { "epoch": 80.37704918032787, "grad_norm": 7.390109539031982, "learning_rate": 1.952483581635224e-06, "loss": 0.3212, "step": 24515 }, { "epoch": 80.38032786885246, "grad_norm": 4.555109977722168, "learning_rate": 1.951853278517959e-06, "loss": 0.2845, "step": 24516 }, { "epoch": 80.38360655737705, "grad_norm": 4.519309043884277, "learning_rate": 1.951223066151917e-06, "loss": 0.3899, "step": 24517 }, { "epoch": 80.38688524590164, "grad_norm": 5.380904197692871, "learning_rate": 1.9505929445442007e-06, "loss": 0.2194, "step": 24518 }, { "epoch": 80.39016393442623, "grad_norm": 7.761627197265625, "learning_rate": 1.9499629137019205e-06, "loss": 0.4807, "step": 24519 }, { "epoch": 80.39344262295081, "grad_norm": 5.309587478637695, "learning_rate": 1.9493329736321786e-06, "loss": 0.4482, "step": 24520 }, { "epoch": 80.3967213114754, "grad_norm": 3.6655781269073486, "learning_rate": 1.948703124342077e-06, "loss": 0.2602, "step": 24521 }, { "epoch": 80.4, "grad_norm": 5.300726413726807, "learning_rate": 1.9480733658387175e-06, "loss": 0.3627, "step": 24522 }, { "epoch": 80.4032786885246, "grad_norm": 3.7473678588867188, "learning_rate": 1.9474436981292057e-06, "loss": 0.1895, "step": 24523 }, { "epoch": 80.40655737704918, "grad_norm": 4.457822799682617, "learning_rate": 1.946814121220637e-06, "loss": 0.331, "step": 24524 }, { "epoch": 80.40983606557377, "grad_norm": 4.6037278175354, "learning_rate": 1.9461846351201143e-06, "loss": 0.4025, "step": 24525 }, { "epoch": 80.41311475409836, "grad_norm": 4.39584493637085, "learning_rate": 1.9455552398347323e-06, "loss": 0.308, "step": 24526 }, { "epoch": 80.41639344262295, "grad_norm": 6.63163948059082, "learning_rate": 1.944925935371588e-06, "loss": 0.3369, "step": 24527 }, { "epoch": 80.41967213114754, "grad_norm": 4.871188640594482, "learning_rate": 1.9442967217377805e-06, "loss": 0.3693, "step": 24528 }, { "epoch": 80.42295081967212, "grad_norm": 4.781863689422607, "learning_rate": 1.943667598940404e-06, "loss": 0.4285, "step": 24529 }, { "epoch": 80.42622950819673, "grad_norm": 5.555623531341553, "learning_rate": 1.9430385669865513e-06, "loss": 0.3492, "step": 24530 }, { "epoch": 80.42950819672132, "grad_norm": 5.082584381103516, "learning_rate": 1.942409625883314e-06, "loss": 0.4238, "step": 24531 }, { "epoch": 80.4327868852459, "grad_norm": 40.16790008544922, "learning_rate": 1.941780775637787e-06, "loss": 0.3127, "step": 24532 }, { "epoch": 80.43606557377049, "grad_norm": 5.788175106048584, "learning_rate": 1.941152016257062e-06, "loss": 0.2997, "step": 24533 }, { "epoch": 80.43934426229508, "grad_norm": 5.137372970581055, "learning_rate": 1.940523347748223e-06, "loss": 0.2786, "step": 24534 }, { "epoch": 80.44262295081967, "grad_norm": 5.331644058227539, "learning_rate": 1.9398947701183666e-06, "loss": 0.3538, "step": 24535 }, { "epoch": 80.44590163934426, "grad_norm": 6.147644996643066, "learning_rate": 1.939266283374578e-06, "loss": 0.3282, "step": 24536 }, { "epoch": 80.44918032786886, "grad_norm": 17.789562225341797, "learning_rate": 1.938637887523939e-06, "loss": 0.6891, "step": 24537 }, { "epoch": 80.45245901639345, "grad_norm": 5.34458589553833, "learning_rate": 1.9380095825735423e-06, "loss": 0.2771, "step": 24538 }, { "epoch": 80.45573770491804, "grad_norm": 6.061081409454346, "learning_rate": 1.9373813685304697e-06, "loss": 0.2747, "step": 24539 }, { "epoch": 80.45901639344262, "grad_norm": 5.301626205444336, "learning_rate": 1.9367532454018036e-06, "loss": 0.4411, "step": 24540 }, { "epoch": 80.46229508196721, "grad_norm": 5.809657096862793, "learning_rate": 1.9361252131946307e-06, "loss": 0.3727, "step": 24541 }, { "epoch": 80.4655737704918, "grad_norm": 5.681722640991211, "learning_rate": 1.9354972719160304e-06, "loss": 0.4838, "step": 24542 }, { "epoch": 80.46885245901639, "grad_norm": 4.669218063354492, "learning_rate": 1.9348694215730824e-06, "loss": 0.4656, "step": 24543 }, { "epoch": 80.47213114754098, "grad_norm": 4.6498122215271, "learning_rate": 1.9342416621728656e-06, "loss": 0.3646, "step": 24544 }, { "epoch": 80.47540983606558, "grad_norm": 5.801912307739258, "learning_rate": 1.9336139937224618e-06, "loss": 0.3892, "step": 24545 }, { "epoch": 80.47868852459017, "grad_norm": 7.599240303039551, "learning_rate": 1.932986416228949e-06, "loss": 0.4987, "step": 24546 }, { "epoch": 80.48196721311476, "grad_norm": 5.58673620223999, "learning_rate": 1.9323589296994005e-06, "loss": 0.4011, "step": 24547 }, { "epoch": 80.48524590163935, "grad_norm": 4.7933430671691895, "learning_rate": 1.9317315341408915e-06, "loss": 0.2834, "step": 24548 }, { "epoch": 80.48852459016393, "grad_norm": 4.354794979095459, "learning_rate": 1.9311042295605e-06, "loss": 0.2952, "step": 24549 }, { "epoch": 80.49180327868852, "grad_norm": 5.410614013671875, "learning_rate": 1.9304770159652984e-06, "loss": 0.5545, "step": 24550 }, { "epoch": 80.49508196721311, "grad_norm": 3.9712982177734375, "learning_rate": 1.9298498933623588e-06, "loss": 0.3763, "step": 24551 }, { "epoch": 80.4983606557377, "grad_norm": 4.4284186363220215, "learning_rate": 1.9292228617587525e-06, "loss": 0.3173, "step": 24552 }, { "epoch": 80.5016393442623, "grad_norm": 6.0685224533081055, "learning_rate": 1.928595921161548e-06, "loss": 0.3635, "step": 24553 }, { "epoch": 80.50491803278689, "grad_norm": 6.171198844909668, "learning_rate": 1.9279690715778176e-06, "loss": 0.3931, "step": 24554 }, { "epoch": 80.50819672131148, "grad_norm": 9.032899856567383, "learning_rate": 1.9273423130146298e-06, "loss": 0.3839, "step": 24555 }, { "epoch": 80.51147540983607, "grad_norm": 7.803059101104736, "learning_rate": 1.9267156454790514e-06, "loss": 0.3298, "step": 24556 }, { "epoch": 80.51475409836065, "grad_norm": 7.450284957885742, "learning_rate": 1.926089068978144e-06, "loss": 0.0992, "step": 24557 }, { "epoch": 80.51803278688524, "grad_norm": 22.72575569152832, "learning_rate": 1.9254625835189813e-06, "loss": 0.371, "step": 24558 }, { "epoch": 80.52131147540983, "grad_norm": 6.954463958740234, "learning_rate": 1.924836189108622e-06, "loss": 0.6415, "step": 24559 }, { "epoch": 80.52459016393442, "grad_norm": 5.573513507843018, "learning_rate": 1.9242098857541315e-06, "loss": 0.5139, "step": 24560 }, { "epoch": 80.52786885245902, "grad_norm": 5.121615409851074, "learning_rate": 1.923583673462569e-06, "loss": 0.2965, "step": 24561 }, { "epoch": 80.53114754098361, "grad_norm": 5.058235168457031, "learning_rate": 1.9229575522410006e-06, "loss": 0.3553, "step": 24562 }, { "epoch": 80.5344262295082, "grad_norm": 9.359959602355957, "learning_rate": 1.9223315220964834e-06, "loss": 0.3183, "step": 24563 }, { "epoch": 80.53770491803279, "grad_norm": 4.436875343322754, "learning_rate": 1.9217055830360766e-06, "loss": 0.2708, "step": 24564 }, { "epoch": 80.54098360655738, "grad_norm": 7.156558513641357, "learning_rate": 1.9210797350668385e-06, "loss": 0.3058, "step": 24565 }, { "epoch": 80.54426229508196, "grad_norm": 5.091914176940918, "learning_rate": 1.920453978195824e-06, "loss": 0.6791, "step": 24566 }, { "epoch": 80.54754098360655, "grad_norm": 5.822409629821777, "learning_rate": 1.9198283124300954e-06, "loss": 0.294, "step": 24567 }, { "epoch": 80.55081967213114, "grad_norm": 5.284563064575195, "learning_rate": 1.919202737776702e-06, "loss": 0.2613, "step": 24568 }, { "epoch": 80.55409836065574, "grad_norm": 5.119543075561523, "learning_rate": 1.9185772542427008e-06, "loss": 0.3459, "step": 24569 }, { "epoch": 80.55737704918033, "grad_norm": 4.311409950256348, "learning_rate": 1.9179518618351413e-06, "loss": 0.4379, "step": 24570 }, { "epoch": 80.56065573770492, "grad_norm": 4.664435386657715, "learning_rate": 1.9173265605610793e-06, "loss": 0.6327, "step": 24571 }, { "epoch": 80.56393442622951, "grad_norm": 4.645366668701172, "learning_rate": 1.9167013504275643e-06, "loss": 0.414, "step": 24572 }, { "epoch": 80.5672131147541, "grad_norm": 4.428897857666016, "learning_rate": 1.916076231441647e-06, "loss": 0.6398, "step": 24573 }, { "epoch": 80.57049180327868, "grad_norm": 4.037278652191162, "learning_rate": 1.915451203610372e-06, "loss": 0.3496, "step": 24574 }, { "epoch": 80.57377049180327, "grad_norm": 5.16827917098999, "learning_rate": 1.9148262669407936e-06, "loss": 0.2648, "step": 24575 }, { "epoch": 80.57704918032788, "grad_norm": 5.998560428619385, "learning_rate": 1.914201421439955e-06, "loss": 0.4143, "step": 24576 }, { "epoch": 80.58032786885246, "grad_norm": 4.870880126953125, "learning_rate": 1.9135766671149025e-06, "loss": 0.2594, "step": 24577 }, { "epoch": 80.58360655737705, "grad_norm": 7.254086017608643, "learning_rate": 1.912952003972681e-06, "loss": 0.4196, "step": 24578 }, { "epoch": 80.58688524590164, "grad_norm": 5.362941741943359, "learning_rate": 1.912327432020332e-06, "loss": 0.4888, "step": 24579 }, { "epoch": 80.59016393442623, "grad_norm": 4.095503807067871, "learning_rate": 1.911702951264902e-06, "loss": 0.1689, "step": 24580 }, { "epoch": 80.59344262295082, "grad_norm": 5.29813814163208, "learning_rate": 1.911078561713432e-06, "loss": 0.5522, "step": 24581 }, { "epoch": 80.5967213114754, "grad_norm": 3.887080192565918, "learning_rate": 1.9104542633729604e-06, "loss": 0.6789, "step": 24582 }, { "epoch": 80.6, "grad_norm": 5.093245029449463, "learning_rate": 1.9098300562505266e-06, "loss": 0.2435, "step": 24583 }, { "epoch": 80.6032786885246, "grad_norm": 5.150585174560547, "learning_rate": 1.9092059403531727e-06, "loss": 0.3942, "step": 24584 }, { "epoch": 80.60655737704919, "grad_norm": 4.8273606300354, "learning_rate": 1.908581915687934e-06, "loss": 0.4357, "step": 24585 }, { "epoch": 80.60983606557377, "grad_norm": 4.60538911819458, "learning_rate": 1.907957982261844e-06, "loss": 0.2311, "step": 24586 }, { "epoch": 80.61311475409836, "grad_norm": 5.671760559082031, "learning_rate": 1.9073341400819444e-06, "loss": 0.3775, "step": 24587 }, { "epoch": 80.61639344262295, "grad_norm": 4.81899881362915, "learning_rate": 1.9067103891552675e-06, "loss": 0.4636, "step": 24588 }, { "epoch": 80.61967213114754, "grad_norm": 5.948966026306152, "learning_rate": 1.9060867294888419e-06, "loss": 0.3933, "step": 24589 }, { "epoch": 80.62295081967213, "grad_norm": 5.399540424346924, "learning_rate": 1.9054631610897079e-06, "loss": 0.4009, "step": 24590 }, { "epoch": 80.62622950819672, "grad_norm": 4.998879432678223, "learning_rate": 1.9048396839648919e-06, "loss": 0.4864, "step": 24591 }, { "epoch": 80.62950819672132, "grad_norm": 5.285756587982178, "learning_rate": 1.9042162981214264e-06, "loss": 0.5395, "step": 24592 }, { "epoch": 80.6327868852459, "grad_norm": 4.096707344055176, "learning_rate": 1.9035930035663364e-06, "loss": 0.2228, "step": 24593 }, { "epoch": 80.6360655737705, "grad_norm": 4.653067111968994, "learning_rate": 1.9029698003066555e-06, "loss": 0.2981, "step": 24594 }, { "epoch": 80.63934426229508, "grad_norm": 13.938518524169922, "learning_rate": 1.902346688349409e-06, "loss": 0.5562, "step": 24595 }, { "epoch": 80.64262295081967, "grad_norm": 6.466781139373779, "learning_rate": 1.9017236677016215e-06, "loss": 0.2704, "step": 24596 }, { "epoch": 80.64590163934426, "grad_norm": 9.132320404052734, "learning_rate": 1.9011007383703218e-06, "loss": 0.3881, "step": 24597 }, { "epoch": 80.64918032786885, "grad_norm": 5.393002510070801, "learning_rate": 1.9004779003625317e-06, "loss": 0.3264, "step": 24598 }, { "epoch": 80.65245901639344, "grad_norm": 7.913574695587158, "learning_rate": 1.899855153685275e-06, "loss": 0.323, "step": 24599 }, { "epoch": 80.65573770491804, "grad_norm": 5.620388984680176, "learning_rate": 1.89923249834557e-06, "loss": 0.3458, "step": 24600 }, { "epoch": 80.65901639344263, "grad_norm": 6.435213565826416, "learning_rate": 1.898609934350445e-06, "loss": 0.52, "step": 24601 }, { "epoch": 80.66229508196722, "grad_norm": 7.137232303619385, "learning_rate": 1.8979874617069161e-06, "loss": 0.1488, "step": 24602 }, { "epoch": 80.6655737704918, "grad_norm": 4.434338569641113, "learning_rate": 1.8973650804220024e-06, "loss": 0.426, "step": 24603 }, { "epoch": 80.66885245901639, "grad_norm": 4.463388442993164, "learning_rate": 1.8967427905027225e-06, "loss": 0.3175, "step": 24604 }, { "epoch": 80.67213114754098, "grad_norm": 5.00559139251709, "learning_rate": 1.89612059195609e-06, "loss": 0.3195, "step": 24605 }, { "epoch": 80.67540983606557, "grad_norm": 7.124659061431885, "learning_rate": 1.8954984847891257e-06, "loss": 0.5379, "step": 24606 }, { "epoch": 80.67868852459016, "grad_norm": 4.818629741668701, "learning_rate": 1.8948764690088427e-06, "loss": 0.295, "step": 24607 }, { "epoch": 80.68196721311476, "grad_norm": 4.539249897003174, "learning_rate": 1.8942545446222548e-06, "loss": 0.426, "step": 24608 }, { "epoch": 80.68524590163935, "grad_norm": 4.265788555145264, "learning_rate": 1.8936327116363728e-06, "loss": 0.3834, "step": 24609 }, { "epoch": 80.68852459016394, "grad_norm": 4.819830417633057, "learning_rate": 1.8930109700582133e-06, "loss": 0.1524, "step": 24610 }, { "epoch": 80.69180327868852, "grad_norm": 5.099092960357666, "learning_rate": 1.892389319894783e-06, "loss": 0.3218, "step": 24611 }, { "epoch": 80.69508196721311, "grad_norm": 6.306713104248047, "learning_rate": 1.8917677611530939e-06, "loss": 0.3978, "step": 24612 }, { "epoch": 80.6983606557377, "grad_norm": 5.309069633483887, "learning_rate": 1.8911462938401503e-06, "loss": 0.3649, "step": 24613 }, { "epoch": 80.70163934426229, "grad_norm": 5.107009410858154, "learning_rate": 1.890524917962967e-06, "loss": 0.3543, "step": 24614 }, { "epoch": 80.70491803278688, "grad_norm": 5.727121353149414, "learning_rate": 1.889903633528547e-06, "loss": 0.3935, "step": 24615 }, { "epoch": 80.70819672131148, "grad_norm": 4.558255195617676, "learning_rate": 1.8892824405438948e-06, "loss": 0.1639, "step": 24616 }, { "epoch": 80.71147540983607, "grad_norm": 3.4911975860595703, "learning_rate": 1.8886613390160168e-06, "loss": 0.269, "step": 24617 }, { "epoch": 80.71475409836066, "grad_norm": 7.146574974060059, "learning_rate": 1.8880403289519133e-06, "loss": 0.4879, "step": 24618 }, { "epoch": 80.71803278688525, "grad_norm": 4.664901256561279, "learning_rate": 1.8874194103585918e-06, "loss": 0.3116, "step": 24619 }, { "epoch": 80.72131147540983, "grad_norm": 4.981675148010254, "learning_rate": 1.8867985832430514e-06, "loss": 0.324, "step": 24620 }, { "epoch": 80.72459016393442, "grad_norm": 4.841719627380371, "learning_rate": 1.8861778476122926e-06, "loss": 0.3721, "step": 24621 }, { "epoch": 80.72786885245901, "grad_norm": 4.944794654846191, "learning_rate": 1.8855572034733128e-06, "loss": 0.2597, "step": 24622 }, { "epoch": 80.73114754098361, "grad_norm": 5.449227333068848, "learning_rate": 1.8849366508331146e-06, "loss": 0.3582, "step": 24623 }, { "epoch": 80.7344262295082, "grad_norm": 5.017332077026367, "learning_rate": 1.8843161896986928e-06, "loss": 0.4487, "step": 24624 }, { "epoch": 80.73770491803279, "grad_norm": 5.048268795013428, "learning_rate": 1.883695820077045e-06, "loss": 0.4496, "step": 24625 }, { "epoch": 80.74098360655738, "grad_norm": 4.66359281539917, "learning_rate": 1.8830755419751623e-06, "loss": 0.2969, "step": 24626 }, { "epoch": 80.74426229508197, "grad_norm": 5.591133117675781, "learning_rate": 1.8824553554000457e-06, "loss": 0.2979, "step": 24627 }, { "epoch": 80.74754098360656, "grad_norm": 4.5571064949035645, "learning_rate": 1.8818352603586843e-06, "loss": 0.3975, "step": 24628 }, { "epoch": 80.75081967213114, "grad_norm": 5.089902400970459, "learning_rate": 1.881215256858071e-06, "loss": 0.3692, "step": 24629 }, { "epoch": 80.75409836065573, "grad_norm": 5.878654956817627, "learning_rate": 1.8805953449051984e-06, "loss": 0.3918, "step": 24630 }, { "epoch": 80.75737704918033, "grad_norm": 4.7873311042785645, "learning_rate": 1.8799755245070516e-06, "loss": 0.4474, "step": 24631 }, { "epoch": 80.76065573770492, "grad_norm": 3.907346725463867, "learning_rate": 1.8793557956706265e-06, "loss": 0.3451, "step": 24632 }, { "epoch": 80.76393442622951, "grad_norm": 6.626347064971924, "learning_rate": 1.8787361584029084e-06, "loss": 0.2069, "step": 24633 }, { "epoch": 80.7672131147541, "grad_norm": 6.659648418426514, "learning_rate": 1.878116612710883e-06, "loss": 0.5537, "step": 24634 }, { "epoch": 80.77049180327869, "grad_norm": 4.53923225402832, "learning_rate": 1.8774971586015356e-06, "loss": 0.4466, "step": 24635 }, { "epoch": 80.77377049180328, "grad_norm": 8.163318634033203, "learning_rate": 1.8768777960818563e-06, "loss": 0.3786, "step": 24636 }, { "epoch": 80.77704918032786, "grad_norm": 6.457895755767822, "learning_rate": 1.876258525158825e-06, "loss": 0.1979, "step": 24637 }, { "epoch": 80.78032786885245, "grad_norm": 3.987277030944824, "learning_rate": 1.875639345839425e-06, "loss": 0.3892, "step": 24638 }, { "epoch": 80.78360655737706, "grad_norm": 5.106279373168945, "learning_rate": 1.8750202581306365e-06, "loss": 0.2997, "step": 24639 }, { "epoch": 80.78688524590164, "grad_norm": 4.484580993652344, "learning_rate": 1.8744012620394458e-06, "loss": 0.42, "step": 24640 }, { "epoch": 80.79016393442623, "grad_norm": 9.479813575744629, "learning_rate": 1.8737823575728287e-06, "loss": 0.5057, "step": 24641 }, { "epoch": 80.79344262295082, "grad_norm": 7.067941665649414, "learning_rate": 1.8731635447377617e-06, "loss": 0.522, "step": 24642 }, { "epoch": 80.79672131147541, "grad_norm": 5.2426581382751465, "learning_rate": 1.872544823541228e-06, "loss": 0.3019, "step": 24643 }, { "epoch": 80.8, "grad_norm": 4.6594929695129395, "learning_rate": 1.8719261939902023e-06, "loss": 0.3465, "step": 24644 }, { "epoch": 80.80327868852459, "grad_norm": 6.873448848724365, "learning_rate": 1.8713076560916577e-06, "loss": 0.5209, "step": 24645 }, { "epoch": 80.80655737704917, "grad_norm": 4.187926769256592, "learning_rate": 1.870689209852573e-06, "loss": 0.3262, "step": 24646 }, { "epoch": 80.80983606557378, "grad_norm": 7.089010715484619, "learning_rate": 1.8700708552799196e-06, "loss": 0.3245, "step": 24647 }, { "epoch": 80.81311475409836, "grad_norm": 10.448962211608887, "learning_rate": 1.8694525923806683e-06, "loss": 0.3369, "step": 24648 }, { "epoch": 80.81639344262295, "grad_norm": 5.790935039520264, "learning_rate": 1.8688344211617948e-06, "loss": 0.2667, "step": 24649 }, { "epoch": 80.81967213114754, "grad_norm": 4.82123327255249, "learning_rate": 1.868216341630268e-06, "loss": 0.4483, "step": 24650 }, { "epoch": 80.82295081967213, "grad_norm": 4.855632305145264, "learning_rate": 1.8675983537930564e-06, "loss": 0.3014, "step": 24651 }, { "epoch": 80.82622950819672, "grad_norm": 5.625643253326416, "learning_rate": 1.8669804576571271e-06, "loss": 0.4794, "step": 24652 }, { "epoch": 80.8295081967213, "grad_norm": 5.417882442474365, "learning_rate": 1.866362653229451e-06, "loss": 0.3235, "step": 24653 }, { "epoch": 80.8327868852459, "grad_norm": 5.324858665466309, "learning_rate": 1.8657449405169937e-06, "loss": 0.4566, "step": 24654 }, { "epoch": 80.8360655737705, "grad_norm": 5.001082897186279, "learning_rate": 1.8651273195267184e-06, "loss": 0.2638, "step": 24655 }, { "epoch": 80.83934426229509, "grad_norm": 5.120519161224365, "learning_rate": 1.8645097902655917e-06, "loss": 0.4434, "step": 24656 }, { "epoch": 80.84262295081967, "grad_norm": 5.185694694519043, "learning_rate": 1.8638923527405728e-06, "loss": 0.4908, "step": 24657 }, { "epoch": 80.84590163934426, "grad_norm": 13.610136032104492, "learning_rate": 1.8632750069586304e-06, "loss": 0.5694, "step": 24658 }, { "epoch": 80.84918032786885, "grad_norm": 5.068897724151611, "learning_rate": 1.862657752926722e-06, "loss": 0.5589, "step": 24659 }, { "epoch": 80.85245901639344, "grad_norm": 6.731620788574219, "learning_rate": 1.862040590651808e-06, "loss": 0.3027, "step": 24660 }, { "epoch": 80.85573770491803, "grad_norm": 15.737356185913086, "learning_rate": 1.861423520140846e-06, "loss": 0.3878, "step": 24661 }, { "epoch": 80.85901639344263, "grad_norm": 8.344366073608398, "learning_rate": 1.8608065414007969e-06, "loss": 0.4281, "step": 24662 }, { "epoch": 80.86229508196722, "grad_norm": 6.345864295959473, "learning_rate": 1.8601896544386177e-06, "loss": 0.5343, "step": 24663 }, { "epoch": 80.8655737704918, "grad_norm": 5.163136959075928, "learning_rate": 1.8595728592612627e-06, "loss": 0.4186, "step": 24664 }, { "epoch": 80.8688524590164, "grad_norm": 4.384865760803223, "learning_rate": 1.8589561558756864e-06, "loss": 0.6348, "step": 24665 }, { "epoch": 80.87213114754098, "grad_norm": 3.7476136684417725, "learning_rate": 1.8583395442888452e-06, "loss": 0.3017, "step": 24666 }, { "epoch": 80.87540983606557, "grad_norm": 5.509026050567627, "learning_rate": 1.8577230245076915e-06, "loss": 0.366, "step": 24667 }, { "epoch": 80.87868852459016, "grad_norm": 5.148995399475098, "learning_rate": 1.8571065965391767e-06, "loss": 0.4603, "step": 24668 }, { "epoch": 80.88196721311475, "grad_norm": 5.35368013381958, "learning_rate": 1.85649026039025e-06, "loss": 0.2671, "step": 24669 }, { "epoch": 80.88524590163935, "grad_norm": 4.909246921539307, "learning_rate": 1.8558740160678622e-06, "loss": 0.2782, "step": 24670 }, { "epoch": 80.88852459016394, "grad_norm": 5.173598766326904, "learning_rate": 1.8552578635789642e-06, "loss": 0.2384, "step": 24671 }, { "epoch": 80.89180327868853, "grad_norm": 5.623385429382324, "learning_rate": 1.8546418029305023e-06, "loss": 0.7613, "step": 24672 }, { "epoch": 80.89508196721312, "grad_norm": 4.228702545166016, "learning_rate": 1.8540258341294227e-06, "loss": 0.2183, "step": 24673 }, { "epoch": 80.8983606557377, "grad_norm": 5.1425299644470215, "learning_rate": 1.8534099571826702e-06, "loss": 0.3313, "step": 24674 }, { "epoch": 80.90163934426229, "grad_norm": 8.86436939239502, "learning_rate": 1.852794172097192e-06, "loss": 0.3524, "step": 24675 }, { "epoch": 80.90491803278688, "grad_norm": 6.223690986633301, "learning_rate": 1.8521784788799314e-06, "loss": 0.4735, "step": 24676 }, { "epoch": 80.90819672131147, "grad_norm": 6.482858657836914, "learning_rate": 1.8515628775378292e-06, "loss": 0.3513, "step": 24677 }, { "epoch": 80.91147540983607, "grad_norm": 5.221504211425781, "learning_rate": 1.8509473680778256e-06, "loss": 0.4683, "step": 24678 }, { "epoch": 80.91475409836066, "grad_norm": 6.57187032699585, "learning_rate": 1.8503319505068662e-06, "loss": 0.3596, "step": 24679 }, { "epoch": 80.91803278688525, "grad_norm": 5.258632659912109, "learning_rate": 1.8497166248318876e-06, "loss": 0.3714, "step": 24680 }, { "epoch": 80.92131147540984, "grad_norm": 6.51783561706543, "learning_rate": 1.8491013910598277e-06, "loss": 0.1772, "step": 24681 }, { "epoch": 80.92459016393443, "grad_norm": 4.606077194213867, "learning_rate": 1.8484862491976252e-06, "loss": 0.1853, "step": 24682 }, { "epoch": 80.92786885245901, "grad_norm": 4.229990005493164, "learning_rate": 1.8478711992522125e-06, "loss": 0.413, "step": 24683 }, { "epoch": 80.9311475409836, "grad_norm": 4.78536319732666, "learning_rate": 1.8472562412305307e-06, "loss": 0.527, "step": 24684 }, { "epoch": 80.93442622950819, "grad_norm": 7.5479936599731445, "learning_rate": 1.8466413751395117e-06, "loss": 0.2824, "step": 24685 }, { "epoch": 80.9377049180328, "grad_norm": 5.068553447723389, "learning_rate": 1.8460266009860884e-06, "loss": 0.3359, "step": 24686 }, { "epoch": 80.94098360655738, "grad_norm": 4.830589771270752, "learning_rate": 1.8454119187771912e-06, "loss": 0.334, "step": 24687 }, { "epoch": 80.94426229508197, "grad_norm": 4.62663459777832, "learning_rate": 1.844797328519755e-06, "loss": 0.4761, "step": 24688 }, { "epoch": 80.94754098360656, "grad_norm": 10.059842109680176, "learning_rate": 1.8441828302207089e-06, "loss": 0.2545, "step": 24689 }, { "epoch": 80.95081967213115, "grad_norm": 5.823269844055176, "learning_rate": 1.843568423886981e-06, "loss": 0.4353, "step": 24690 }, { "epoch": 80.95409836065573, "grad_norm": 4.786978244781494, "learning_rate": 1.8429541095254965e-06, "loss": 0.4867, "step": 24691 }, { "epoch": 80.95737704918032, "grad_norm": 4.315085411071777, "learning_rate": 1.8423398871431897e-06, "loss": 0.3731, "step": 24692 }, { "epoch": 80.96065573770491, "grad_norm": 10.21085262298584, "learning_rate": 1.8417257567469815e-06, "loss": 0.3263, "step": 24693 }, { "epoch": 80.96393442622951, "grad_norm": 18.0612850189209, "learning_rate": 1.8411117183437977e-06, "loss": 0.5508, "step": 24694 }, { "epoch": 80.9672131147541, "grad_norm": 5.722143173217773, "learning_rate": 1.8404977719405603e-06, "loss": 0.506, "step": 24695 }, { "epoch": 80.97049180327869, "grad_norm": 31.52726173400879, "learning_rate": 1.8398839175441962e-06, "loss": 0.3603, "step": 24696 }, { "epoch": 80.97377049180328, "grad_norm": 5.088210582733154, "learning_rate": 1.8392701551616255e-06, "loss": 0.271, "step": 24697 }, { "epoch": 80.97704918032787, "grad_norm": 5.413014888763428, "learning_rate": 1.8386564847997668e-06, "loss": 0.5228, "step": 24698 }, { "epoch": 80.98032786885246, "grad_norm": 5.084234237670898, "learning_rate": 1.8380429064655448e-06, "loss": 0.2963, "step": 24699 }, { "epoch": 80.98360655737704, "grad_norm": 5.573196887969971, "learning_rate": 1.8374294201658738e-06, "loss": 0.4956, "step": 24700 }, { "epoch": 80.98688524590163, "grad_norm": 4.7119574546813965, "learning_rate": 1.8368160259076718e-06, "loss": 0.5259, "step": 24701 }, { "epoch": 80.99016393442623, "grad_norm": 7.899916648864746, "learning_rate": 1.8362027236978585e-06, "loss": 0.447, "step": 24702 }, { "epoch": 80.99344262295082, "grad_norm": 5.433050632476807, "learning_rate": 1.8355895135433488e-06, "loss": 0.4506, "step": 24703 }, { "epoch": 80.99672131147541, "grad_norm": 4.782585144042969, "learning_rate": 1.8349763954510525e-06, "loss": 0.1968, "step": 24704 }, { "epoch": 81.0, "grad_norm": 5.214920520782471, "learning_rate": 1.8343633694278895e-06, "loss": 0.5434, "step": 24705 }, { "epoch": 81.00327868852459, "grad_norm": 6.9660749435424805, "learning_rate": 1.83375043548077e-06, "loss": 0.5434, "step": 24706 }, { "epoch": 81.00655737704918, "grad_norm": 4.401575565338135, "learning_rate": 1.8331375936166052e-06, "loss": 0.3557, "step": 24707 }, { "epoch": 81.00983606557377, "grad_norm": 5.2620744705200195, "learning_rate": 1.832524843842306e-06, "loss": 0.4594, "step": 24708 }, { "epoch": 81.01311475409837, "grad_norm": 5.409432888031006, "learning_rate": 1.831912186164777e-06, "loss": 0.4429, "step": 24709 }, { "epoch": 81.01639344262296, "grad_norm": 4.4425249099731445, "learning_rate": 1.8312996205909351e-06, "loss": 0.1831, "step": 24710 }, { "epoch": 81.01967213114754, "grad_norm": 5.0635576248168945, "learning_rate": 1.8306871471276821e-06, "loss": 0.4243, "step": 24711 }, { "epoch": 81.02295081967213, "grad_norm": 5.525841236114502, "learning_rate": 1.8300747657819263e-06, "loss": 0.5057, "step": 24712 }, { "epoch": 81.02622950819672, "grad_norm": 6.026971817016602, "learning_rate": 1.8294624765605684e-06, "loss": 0.2832, "step": 24713 }, { "epoch": 81.02950819672131, "grad_norm": 5.802945613861084, "learning_rate": 1.82885027947052e-06, "loss": 0.3847, "step": 24714 }, { "epoch": 81.0327868852459, "grad_norm": 5.242325305938721, "learning_rate": 1.8282381745186805e-06, "loss": 0.4895, "step": 24715 }, { "epoch": 81.03606557377049, "grad_norm": 3.86346697807312, "learning_rate": 1.8276261617119517e-06, "loss": 0.5135, "step": 24716 }, { "epoch": 81.03934426229509, "grad_norm": 4.267590045928955, "learning_rate": 1.8270142410572344e-06, "loss": 0.5032, "step": 24717 }, { "epoch": 81.04262295081968, "grad_norm": 5.034181118011475, "learning_rate": 1.8264024125614277e-06, "loss": 0.4128, "step": 24718 }, { "epoch": 81.04590163934427, "grad_norm": 6.9457573890686035, "learning_rate": 1.8257906762314348e-06, "loss": 0.3566, "step": 24719 }, { "epoch": 81.04918032786885, "grad_norm": 3.7788729667663574, "learning_rate": 1.8251790320741502e-06, "loss": 0.4097, "step": 24720 }, { "epoch": 81.05245901639344, "grad_norm": 5.003811359405518, "learning_rate": 1.824567480096473e-06, "loss": 0.2579, "step": 24721 }, { "epoch": 81.05573770491803, "grad_norm": 5.867173194885254, "learning_rate": 1.8239560203052941e-06, "loss": 0.2493, "step": 24722 }, { "epoch": 81.05901639344262, "grad_norm": 4.086030960083008, "learning_rate": 1.823344652707515e-06, "loss": 0.7745, "step": 24723 }, { "epoch": 81.0622950819672, "grad_norm": 4.614166736602783, "learning_rate": 1.8227333773100263e-06, "loss": 0.426, "step": 24724 }, { "epoch": 81.06557377049181, "grad_norm": 4.989224433898926, "learning_rate": 1.822122194119722e-06, "loss": 0.2697, "step": 24725 }, { "epoch": 81.0688524590164, "grad_norm": 4.945592403411865, "learning_rate": 1.8215111031434895e-06, "loss": 0.6495, "step": 24726 }, { "epoch": 81.07213114754099, "grad_norm": 4.278903484344482, "learning_rate": 1.8209001043882246e-06, "loss": 0.3745, "step": 24727 }, { "epoch": 81.07540983606557, "grad_norm": 5.101582050323486, "learning_rate": 1.8202891978608161e-06, "loss": 0.4427, "step": 24728 }, { "epoch": 81.07868852459016, "grad_norm": 4.827156066894531, "learning_rate": 1.819678383568152e-06, "loss": 0.3229, "step": 24729 }, { "epoch": 81.08196721311475, "grad_norm": 13.168139457702637, "learning_rate": 1.8190676615171187e-06, "loss": 0.383, "step": 24730 }, { "epoch": 81.08524590163934, "grad_norm": 4.0316243171691895, "learning_rate": 1.8184570317146012e-06, "loss": 0.3141, "step": 24731 }, { "epoch": 81.08852459016393, "grad_norm": 20.330793380737305, "learning_rate": 1.81784649416749e-06, "loss": 0.5342, "step": 24732 }, { "epoch": 81.09180327868853, "grad_norm": 4.614210605621338, "learning_rate": 1.8172360488826668e-06, "loss": 0.3675, "step": 24733 }, { "epoch": 81.09508196721312, "grad_norm": 4.4659223556518555, "learning_rate": 1.8166256958670147e-06, "loss": 0.2847, "step": 24734 }, { "epoch": 81.09836065573771, "grad_norm": 3.854895830154419, "learning_rate": 1.816015435127415e-06, "loss": 0.2014, "step": 24735 }, { "epoch": 81.1016393442623, "grad_norm": 5.496379852294922, "learning_rate": 1.8154052666707523e-06, "loss": 0.5928, "step": 24736 }, { "epoch": 81.10491803278688, "grad_norm": 4.610098361968994, "learning_rate": 1.814795190503905e-06, "loss": 0.3561, "step": 24737 }, { "epoch": 81.10819672131147, "grad_norm": 8.514187812805176, "learning_rate": 1.8141852066337529e-06, "loss": 0.319, "step": 24738 }, { "epoch": 81.11147540983606, "grad_norm": 5.542542934417725, "learning_rate": 1.8135753150671708e-06, "loss": 0.5161, "step": 24739 }, { "epoch": 81.11475409836065, "grad_norm": 4.141446590423584, "learning_rate": 1.8129655158110415e-06, "loss": 0.4737, "step": 24740 }, { "epoch": 81.11803278688525, "grad_norm": 5.139791965484619, "learning_rate": 1.812355808872238e-06, "loss": 0.1219, "step": 24741 }, { "epoch": 81.12131147540984, "grad_norm": 4.535854816436768, "learning_rate": 1.8117461942576353e-06, "loss": 0.4336, "step": 24742 }, { "epoch": 81.12459016393443, "grad_norm": 8.327536582946777, "learning_rate": 1.8111366719741085e-06, "loss": 0.4507, "step": 24743 }, { "epoch": 81.12786885245902, "grad_norm": 7.41295051574707, "learning_rate": 1.810527242028528e-06, "loss": 0.3883, "step": 24744 }, { "epoch": 81.1311475409836, "grad_norm": 5.316961765289307, "learning_rate": 1.8099179044277704e-06, "loss": 0.3247, "step": 24745 }, { "epoch": 81.1344262295082, "grad_norm": 5.419201374053955, "learning_rate": 1.8093086591787036e-06, "loss": 0.3268, "step": 24746 }, { "epoch": 81.13770491803278, "grad_norm": 4.523679733276367, "learning_rate": 1.8086995062881952e-06, "loss": 0.3452, "step": 24747 }, { "epoch": 81.14098360655737, "grad_norm": 4.779476165771484, "learning_rate": 1.8080904457631187e-06, "loss": 0.5059, "step": 24748 }, { "epoch": 81.14426229508197, "grad_norm": 6.424264430999756, "learning_rate": 1.80748147761034e-06, "loss": 0.3625, "step": 24749 }, { "epoch": 81.14754098360656, "grad_norm": 4.556501865386963, "learning_rate": 1.8068726018367244e-06, "loss": 0.2876, "step": 24750 }, { "epoch": 81.15081967213115, "grad_norm": 7.765811920166016, "learning_rate": 1.8062638184491399e-06, "loss": 0.5027, "step": 24751 }, { "epoch": 81.15409836065574, "grad_norm": 5.476548194885254, "learning_rate": 1.8056551274544508e-06, "loss": 0.6911, "step": 24752 }, { "epoch": 81.15737704918033, "grad_norm": 5.133529186248779, "learning_rate": 1.8050465288595177e-06, "loss": 0.3962, "step": 24753 }, { "epoch": 81.16065573770491, "grad_norm": 4.9960126876831055, "learning_rate": 1.804438022671209e-06, "loss": 0.3826, "step": 24754 }, { "epoch": 81.1639344262295, "grad_norm": 4.878946304321289, "learning_rate": 1.8038296088963813e-06, "loss": 0.356, "step": 24755 }, { "epoch": 81.1672131147541, "grad_norm": 6.022042751312256, "learning_rate": 1.8032212875418976e-06, "loss": 0.3208, "step": 24756 }, { "epoch": 81.1704918032787, "grad_norm": 4.7334794998168945, "learning_rate": 1.802613058614614e-06, "loss": 0.344, "step": 24757 }, { "epoch": 81.17377049180328, "grad_norm": 12.097679138183594, "learning_rate": 1.802004922121393e-06, "loss": 0.3791, "step": 24758 }, { "epoch": 81.17704918032787, "grad_norm": 4.402233123779297, "learning_rate": 1.8013968780690905e-06, "loss": 0.4046, "step": 24759 }, { "epoch": 81.18032786885246, "grad_norm": 6.526089668273926, "learning_rate": 1.8007889264645629e-06, "loss": 0.5513, "step": 24760 }, { "epoch": 81.18360655737705, "grad_norm": 4.987417221069336, "learning_rate": 1.8001810673146625e-06, "loss": 0.2849, "step": 24761 }, { "epoch": 81.18688524590164, "grad_norm": 4.470285415649414, "learning_rate": 1.7995733006262494e-06, "loss": 0.3623, "step": 24762 }, { "epoch": 81.19016393442622, "grad_norm": 5.446788311004639, "learning_rate": 1.7989656264061727e-06, "loss": 0.4656, "step": 24763 }, { "epoch": 81.19344262295083, "grad_norm": 5.1717095375061035, "learning_rate": 1.7983580446612859e-06, "loss": 0.2948, "step": 24764 }, { "epoch": 81.19672131147541, "grad_norm": 6.14178991317749, "learning_rate": 1.7977505553984376e-06, "loss": 0.3356, "step": 24765 }, { "epoch": 81.2, "grad_norm": 5.2071533203125, "learning_rate": 1.7971431586244814e-06, "loss": 0.3019, "step": 24766 }, { "epoch": 81.20327868852459, "grad_norm": 5.168248176574707, "learning_rate": 1.7965358543462663e-06, "loss": 0.2635, "step": 24767 }, { "epoch": 81.20655737704918, "grad_norm": 5.536091327667236, "learning_rate": 1.795928642570638e-06, "loss": 0.4426, "step": 24768 }, { "epoch": 81.20983606557377, "grad_norm": 5.700603008270264, "learning_rate": 1.795321523304444e-06, "loss": 0.4759, "step": 24769 }, { "epoch": 81.21311475409836, "grad_norm": 4.13986873626709, "learning_rate": 1.7947144965545294e-06, "loss": 0.2891, "step": 24770 }, { "epoch": 81.21639344262294, "grad_norm": 3.821798801422119, "learning_rate": 1.7941075623277416e-06, "loss": 0.2653, "step": 24771 }, { "epoch": 81.21967213114755, "grad_norm": 6.361691951751709, "learning_rate": 1.793500720630923e-06, "loss": 0.2562, "step": 24772 }, { "epoch": 81.22295081967214, "grad_norm": 4.362610816955566, "learning_rate": 1.7928939714709171e-06, "loss": 0.2779, "step": 24773 }, { "epoch": 81.22622950819672, "grad_norm": 30.45277976989746, "learning_rate": 1.7922873148545617e-06, "loss": 0.1832, "step": 24774 }, { "epoch": 81.22950819672131, "grad_norm": 5.194215774536133, "learning_rate": 1.7916807507887035e-06, "loss": 0.4005, "step": 24775 }, { "epoch": 81.2327868852459, "grad_norm": 5.444872856140137, "learning_rate": 1.7910742792801793e-06, "loss": 0.3487, "step": 24776 }, { "epoch": 81.23606557377049, "grad_norm": 4.297277927398682, "learning_rate": 1.7904679003358283e-06, "loss": 0.3985, "step": 24777 }, { "epoch": 81.23934426229508, "grad_norm": 5.679680824279785, "learning_rate": 1.7898616139624848e-06, "loss": 0.3899, "step": 24778 }, { "epoch": 81.24262295081967, "grad_norm": 4.758969306945801, "learning_rate": 1.7892554201669898e-06, "loss": 0.3924, "step": 24779 }, { "epoch": 81.24590163934427, "grad_norm": 5.059562683105469, "learning_rate": 1.7886493189561783e-06, "loss": 0.4004, "step": 24780 }, { "epoch": 81.24918032786886, "grad_norm": 6.775383472442627, "learning_rate": 1.7880433103368822e-06, "loss": 0.6153, "step": 24781 }, { "epoch": 81.25245901639344, "grad_norm": 5.051005840301514, "learning_rate": 1.7874373943159362e-06, "loss": 0.348, "step": 24782 }, { "epoch": 81.25573770491803, "grad_norm": 4.881005764007568, "learning_rate": 1.7868315709001704e-06, "loss": 0.4848, "step": 24783 }, { "epoch": 81.25901639344262, "grad_norm": 4.826909065246582, "learning_rate": 1.7862258400964206e-06, "loss": 0.4021, "step": 24784 }, { "epoch": 81.26229508196721, "grad_norm": 4.715978145599365, "learning_rate": 1.7856202019115144e-06, "loss": 0.3153, "step": 24785 }, { "epoch": 81.2655737704918, "grad_norm": 5.748363971710205, "learning_rate": 1.7850146563522809e-06, "loss": 0.1549, "step": 24786 }, { "epoch": 81.26885245901639, "grad_norm": 7.327149868011475, "learning_rate": 1.7844092034255466e-06, "loss": 0.3756, "step": 24787 }, { "epoch": 81.27213114754099, "grad_norm": 8.13970947265625, "learning_rate": 1.7838038431381433e-06, "loss": 0.5582, "step": 24788 }, { "epoch": 81.27540983606558, "grad_norm": 6.19744348526001, "learning_rate": 1.7831985754968938e-06, "loss": 0.2743, "step": 24789 }, { "epoch": 81.27868852459017, "grad_norm": 5.132174015045166, "learning_rate": 1.7825934005086243e-06, "loss": 0.3246, "step": 24790 }, { "epoch": 81.28196721311475, "grad_norm": 8.20767879486084, "learning_rate": 1.7819883181801557e-06, "loss": 0.2758, "step": 24791 }, { "epoch": 81.28524590163934, "grad_norm": 4.280643463134766, "learning_rate": 1.7813833285183156e-06, "loss": 0.2597, "step": 24792 }, { "epoch": 81.28852459016393, "grad_norm": 4.628956317901611, "learning_rate": 1.7807784315299237e-06, "loss": 0.397, "step": 24793 }, { "epoch": 81.29180327868852, "grad_norm": 9.52250862121582, "learning_rate": 1.7801736272218007e-06, "loss": 0.3852, "step": 24794 }, { "epoch": 81.29508196721312, "grad_norm": 7.527112007141113, "learning_rate": 1.7795689156007667e-06, "loss": 0.3984, "step": 24795 }, { "epoch": 81.29836065573771, "grad_norm": 5.9219746589660645, "learning_rate": 1.7789642966736376e-06, "loss": 0.3872, "step": 24796 }, { "epoch": 81.3016393442623, "grad_norm": 5.589074611663818, "learning_rate": 1.7783597704472365e-06, "loss": 0.4991, "step": 24797 }, { "epoch": 81.30491803278689, "grad_norm": 5.941618919372559, "learning_rate": 1.7777553369283773e-06, "loss": 0.2783, "step": 24798 }, { "epoch": 81.30819672131148, "grad_norm": 5.3997321128845215, "learning_rate": 1.7771509961238754e-06, "loss": 0.4119, "step": 24799 }, { "epoch": 81.31147540983606, "grad_norm": 8.835878372192383, "learning_rate": 1.776546748040544e-06, "loss": 0.3509, "step": 24800 }, { "epoch": 81.31475409836065, "grad_norm": 4.615081310272217, "learning_rate": 1.7759425926852002e-06, "loss": 0.2241, "step": 24801 }, { "epoch": 81.31803278688524, "grad_norm": 7.869420051574707, "learning_rate": 1.7753385300646542e-06, "loss": 0.4655, "step": 24802 }, { "epoch": 81.32131147540984, "grad_norm": 4.906815052032471, "learning_rate": 1.7747345601857157e-06, "loss": 0.2114, "step": 24803 }, { "epoch": 81.32459016393443, "grad_norm": 4.939545154571533, "learning_rate": 1.7741306830551996e-06, "loss": 0.2944, "step": 24804 }, { "epoch": 81.32786885245902, "grad_norm": 4.828052520751953, "learning_rate": 1.7735268986799125e-06, "loss": 0.5101, "step": 24805 }, { "epoch": 81.33114754098361, "grad_norm": 5.685340881347656, "learning_rate": 1.7729232070666602e-06, "loss": 0.3485, "step": 24806 }, { "epoch": 81.3344262295082, "grad_norm": 4.209393501281738, "learning_rate": 1.772319608222256e-06, "loss": 0.4327, "step": 24807 }, { "epoch": 81.33770491803278, "grad_norm": 6.110722541809082, "learning_rate": 1.7717161021535034e-06, "loss": 0.3922, "step": 24808 }, { "epoch": 81.34098360655737, "grad_norm": 6.285175800323486, "learning_rate": 1.7711126888672037e-06, "loss": 0.46, "step": 24809 }, { "epoch": 81.34426229508196, "grad_norm": 6.665340423583984, "learning_rate": 1.7705093683701669e-06, "loss": 0.4848, "step": 24810 }, { "epoch": 81.34754098360656, "grad_norm": 5.159748077392578, "learning_rate": 1.7699061406691931e-06, "loss": 0.2655, "step": 24811 }, { "epoch": 81.35081967213115, "grad_norm": 3.719359874725342, "learning_rate": 1.7693030057710847e-06, "loss": 0.2509, "step": 24812 }, { "epoch": 81.35409836065574, "grad_norm": 18.741863250732422, "learning_rate": 1.7686999636826407e-06, "loss": 0.3139, "step": 24813 }, { "epoch": 81.35737704918033, "grad_norm": 10.381089210510254, "learning_rate": 1.7680970144106657e-06, "loss": 0.4562, "step": 24814 }, { "epoch": 81.36065573770492, "grad_norm": 5.5923237800598145, "learning_rate": 1.7674941579619553e-06, "loss": 0.2746, "step": 24815 }, { "epoch": 81.3639344262295, "grad_norm": 5.2221760749816895, "learning_rate": 1.7668913943433087e-06, "loss": 0.2986, "step": 24816 }, { "epoch": 81.3672131147541, "grad_norm": 5.403482913970947, "learning_rate": 1.7662887235615189e-06, "loss": 0.4072, "step": 24817 }, { "epoch": 81.37049180327868, "grad_norm": 5.087378025054932, "learning_rate": 1.7656861456233876e-06, "loss": 0.6058, "step": 24818 }, { "epoch": 81.37377049180328, "grad_norm": 5.101686000823975, "learning_rate": 1.7650836605357058e-06, "loss": 0.3397, "step": 24819 }, { "epoch": 81.37704918032787, "grad_norm": 5.9150519371032715, "learning_rate": 1.7644812683052682e-06, "loss": 0.366, "step": 24820 }, { "epoch": 81.38032786885246, "grad_norm": 7.890542507171631, "learning_rate": 1.7638789689388669e-06, "loss": 0.4298, "step": 24821 }, { "epoch": 81.38360655737705, "grad_norm": 5.571421146392822, "learning_rate": 1.7632767624432923e-06, "loss": 0.3279, "step": 24822 }, { "epoch": 81.38688524590164, "grad_norm": 5.826022148132324, "learning_rate": 1.7626746488253377e-06, "loss": 0.5596, "step": 24823 }, { "epoch": 81.39016393442623, "grad_norm": 5.383939743041992, "learning_rate": 1.7620726280917911e-06, "loss": 0.6077, "step": 24824 }, { "epoch": 81.39344262295081, "grad_norm": 5.203976154327393, "learning_rate": 1.7614707002494413e-06, "loss": 0.3604, "step": 24825 }, { "epoch": 81.3967213114754, "grad_norm": 4.870658874511719, "learning_rate": 1.760868865305072e-06, "loss": 0.3849, "step": 24826 }, { "epoch": 81.4, "grad_norm": 5.121240615844727, "learning_rate": 1.7602671232654755e-06, "loss": 0.4139, "step": 24827 }, { "epoch": 81.4032786885246, "grad_norm": 6.7558207511901855, "learning_rate": 1.7596654741374353e-06, "loss": 0.3083, "step": 24828 }, { "epoch": 81.40655737704918, "grad_norm": 4.342887878417969, "learning_rate": 1.7590639179277335e-06, "loss": 0.4268, "step": 24829 }, { "epoch": 81.40983606557377, "grad_norm": 3.916545867919922, "learning_rate": 1.7584624546431527e-06, "loss": 0.5007, "step": 24830 }, { "epoch": 81.41311475409836, "grad_norm": 4.7588090896606445, "learning_rate": 1.7578610842904798e-06, "loss": 0.2683, "step": 24831 }, { "epoch": 81.41639344262295, "grad_norm": 4.89968204498291, "learning_rate": 1.7572598068764913e-06, "loss": 0.2533, "step": 24832 }, { "epoch": 81.41967213114754, "grad_norm": 3.4359726905822754, "learning_rate": 1.7566586224079695e-06, "loss": 0.3006, "step": 24833 }, { "epoch": 81.42295081967212, "grad_norm": 4.173172473907471, "learning_rate": 1.756057530891694e-06, "loss": 0.22, "step": 24834 }, { "epoch": 81.42622950819673, "grad_norm": 5.718389511108398, "learning_rate": 1.7554565323344375e-06, "loss": 0.3603, "step": 24835 }, { "epoch": 81.42950819672132, "grad_norm": 5.681128978729248, "learning_rate": 1.7548556267429829e-06, "loss": 0.3414, "step": 24836 }, { "epoch": 81.4327868852459, "grad_norm": 4.951589584350586, "learning_rate": 1.7542548141241044e-06, "loss": 0.2666, "step": 24837 }, { "epoch": 81.43606557377049, "grad_norm": 5.377659797668457, "learning_rate": 1.753654094484577e-06, "loss": 0.4656, "step": 24838 }, { "epoch": 81.43934426229508, "grad_norm": 6.423050880432129, "learning_rate": 1.7530534678311706e-06, "loss": 0.5434, "step": 24839 }, { "epoch": 81.44262295081967, "grad_norm": 5.081338405609131, "learning_rate": 1.752452934170663e-06, "loss": 0.1667, "step": 24840 }, { "epoch": 81.44590163934426, "grad_norm": 4.60821533203125, "learning_rate": 1.7518524935098247e-06, "loss": 0.3072, "step": 24841 }, { "epoch": 81.44918032786886, "grad_norm": 4.50534200668335, "learning_rate": 1.7512521458554254e-06, "loss": 0.2443, "step": 24842 }, { "epoch": 81.45245901639345, "grad_norm": 6.253002166748047, "learning_rate": 1.7506518912142313e-06, "loss": 0.6077, "step": 24843 }, { "epoch": 81.45573770491804, "grad_norm": 3.6564786434173584, "learning_rate": 1.7500517295930174e-06, "loss": 0.2468, "step": 24844 }, { "epoch": 81.45901639344262, "grad_norm": 4.564825057983398, "learning_rate": 1.749451660998548e-06, "loss": 0.4031, "step": 24845 }, { "epoch": 81.46229508196721, "grad_norm": 5.9770426750183105, "learning_rate": 1.7488516854375904e-06, "loss": 0.3699, "step": 24846 }, { "epoch": 81.4655737704918, "grad_norm": 4.603392601013184, "learning_rate": 1.7482518029169082e-06, "loss": 0.3356, "step": 24847 }, { "epoch": 81.46885245901639, "grad_norm": 4.051689147949219, "learning_rate": 1.7476520134432639e-06, "loss": 0.1953, "step": 24848 }, { "epoch": 81.47213114754098, "grad_norm": 4.00428581237793, "learning_rate": 1.7470523170234265e-06, "loss": 0.3978, "step": 24849 }, { "epoch": 81.47540983606558, "grad_norm": 4.977463245391846, "learning_rate": 1.7464527136641552e-06, "loss": 0.3364, "step": 24850 }, { "epoch": 81.47868852459017, "grad_norm": 7.28819465637207, "learning_rate": 1.7458532033722098e-06, "loss": 0.5094, "step": 24851 }, { "epoch": 81.48196721311476, "grad_norm": 4.872305393218994, "learning_rate": 1.7452537861543507e-06, "loss": 0.4607, "step": 24852 }, { "epoch": 81.48524590163935, "grad_norm": 5.135038375854492, "learning_rate": 1.74465446201734e-06, "loss": 0.2418, "step": 24853 }, { "epoch": 81.48852459016393, "grad_norm": 4.199606895446777, "learning_rate": 1.7440552309679337e-06, "loss": 0.3773, "step": 24854 }, { "epoch": 81.49180327868852, "grad_norm": 16.187641143798828, "learning_rate": 1.7434560930128853e-06, "loss": 0.366, "step": 24855 }, { "epoch": 81.49508196721311, "grad_norm": 4.02344274520874, "learning_rate": 1.742857048158958e-06, "loss": 0.5086, "step": 24856 }, { "epoch": 81.4983606557377, "grad_norm": 4.66011381149292, "learning_rate": 1.7422580964129022e-06, "loss": 0.3438, "step": 24857 }, { "epoch": 81.5016393442623, "grad_norm": 4.093557357788086, "learning_rate": 1.7416592377814722e-06, "loss": 0.28, "step": 24858 }, { "epoch": 81.50491803278689, "grad_norm": 5.932847023010254, "learning_rate": 1.7410604722714187e-06, "loss": 0.3555, "step": 24859 }, { "epoch": 81.50819672131148, "grad_norm": 15.97655200958252, "learning_rate": 1.7404617998894967e-06, "loss": 0.4733, "step": 24860 }, { "epoch": 81.51147540983607, "grad_norm": 9.824732780456543, "learning_rate": 1.739863220642457e-06, "loss": 0.5197, "step": 24861 }, { "epoch": 81.51475409836065, "grad_norm": 15.469843864440918, "learning_rate": 1.739264734537045e-06, "loss": 0.2893, "step": 24862 }, { "epoch": 81.51803278688524, "grad_norm": 4.894447326660156, "learning_rate": 1.7386663415800142e-06, "loss": 0.5048, "step": 24863 }, { "epoch": 81.52131147540983, "grad_norm": 5.948063373565674, "learning_rate": 1.7380680417781104e-06, "loss": 0.5183, "step": 24864 }, { "epoch": 81.52459016393442, "grad_norm": 3.8914942741394043, "learning_rate": 1.737469835138078e-06, "loss": 0.5762, "step": 24865 }, { "epoch": 81.52786885245902, "grad_norm": 5.1337995529174805, "learning_rate": 1.7368717216666654e-06, "loss": 0.3675, "step": 24866 }, { "epoch": 81.53114754098361, "grad_norm": 4.419267654418945, "learning_rate": 1.7362737013706165e-06, "loss": 0.4238, "step": 24867 }, { "epoch": 81.5344262295082, "grad_norm": 4.914438724517822, "learning_rate": 1.7356757742566732e-06, "loss": 0.3703, "step": 24868 }, { "epoch": 81.53770491803279, "grad_norm": 7.216773986816406, "learning_rate": 1.7350779403315754e-06, "loss": 0.456, "step": 24869 }, { "epoch": 81.54098360655738, "grad_norm": 4.478941917419434, "learning_rate": 1.7344801996020699e-06, "loss": 0.2995, "step": 24870 }, { "epoch": 81.54426229508196, "grad_norm": 5.330139636993408, "learning_rate": 1.7338825520748947e-06, "loss": 0.5611, "step": 24871 }, { "epoch": 81.54754098360655, "grad_norm": 8.207845687866211, "learning_rate": 1.7332849977567878e-06, "loss": 0.4631, "step": 24872 }, { "epoch": 81.55081967213114, "grad_norm": 5.415322780609131, "learning_rate": 1.7326875366544882e-06, "loss": 0.2245, "step": 24873 }, { "epoch": 81.55409836065574, "grad_norm": 5.2691330909729, "learning_rate": 1.7320901687747294e-06, "loss": 0.2213, "step": 24874 }, { "epoch": 81.55737704918033, "grad_norm": 5.363638877868652, "learning_rate": 1.7314928941242537e-06, "loss": 0.3104, "step": 24875 }, { "epoch": 81.56065573770492, "grad_norm": 5.536300182342529, "learning_rate": 1.7308957127097926e-06, "loss": 0.3884, "step": 24876 }, { "epoch": 81.56393442622951, "grad_norm": 5.731686592102051, "learning_rate": 1.7302986245380792e-06, "loss": 0.3396, "step": 24877 }, { "epoch": 81.5672131147541, "grad_norm": 4.719399929046631, "learning_rate": 1.7297016296158453e-06, "loss": 0.2041, "step": 24878 }, { "epoch": 81.57049180327868, "grad_norm": 6.677804470062256, "learning_rate": 1.7291047279498274e-06, "loss": 0.5377, "step": 24879 }, { "epoch": 81.57377049180327, "grad_norm": 6.262526988983154, "learning_rate": 1.7285079195467524e-06, "loss": 0.7215, "step": 24880 }, { "epoch": 81.57704918032788, "grad_norm": 4.860306262969971, "learning_rate": 1.7279112044133516e-06, "loss": 0.4382, "step": 24881 }, { "epoch": 81.58032786885246, "grad_norm": 4.464200973510742, "learning_rate": 1.7273145825563498e-06, "loss": 0.2554, "step": 24882 }, { "epoch": 81.58360655737705, "grad_norm": 5.9385905265808105, "learning_rate": 1.7267180539824802e-06, "loss": 0.3888, "step": 24883 }, { "epoch": 81.58688524590164, "grad_norm": 5.479262828826904, "learning_rate": 1.7261216186984665e-06, "loss": 0.4283, "step": 24884 }, { "epoch": 81.59016393442623, "grad_norm": 4.123634338378906, "learning_rate": 1.7255252767110342e-06, "loss": 0.4268, "step": 24885 }, { "epoch": 81.59344262295082, "grad_norm": 6.063730239868164, "learning_rate": 1.7249290280269071e-06, "loss": 0.3043, "step": 24886 }, { "epoch": 81.5967213114754, "grad_norm": 4.938747406005859, "learning_rate": 1.7243328726528074e-06, "loss": 0.4649, "step": 24887 }, { "epoch": 81.6, "grad_norm": 4.625741481781006, "learning_rate": 1.723736810595461e-06, "loss": 0.3723, "step": 24888 }, { "epoch": 81.6032786885246, "grad_norm": 4.622920513153076, "learning_rate": 1.723140841861588e-06, "loss": 0.5175, "step": 24889 }, { "epoch": 81.60655737704919, "grad_norm": 7.635303020477295, "learning_rate": 1.7225449664579076e-06, "loss": 0.5272, "step": 24890 }, { "epoch": 81.60983606557377, "grad_norm": 6.126079559326172, "learning_rate": 1.7219491843911362e-06, "loss": 0.3312, "step": 24891 }, { "epoch": 81.61311475409836, "grad_norm": 6.210977077484131, "learning_rate": 1.7213534956679978e-06, "loss": 0.3599, "step": 24892 }, { "epoch": 81.61639344262295, "grad_norm": 5.395608901977539, "learning_rate": 1.7207579002952057e-06, "loss": 0.4863, "step": 24893 }, { "epoch": 81.61967213114754, "grad_norm": 5.178171634674072, "learning_rate": 1.7201623982794769e-06, "loss": 0.3768, "step": 24894 }, { "epoch": 81.62295081967213, "grad_norm": 6.81563663482666, "learning_rate": 1.7195669896275235e-06, "loss": 0.365, "step": 24895 }, { "epoch": 81.62622950819672, "grad_norm": 4.245861053466797, "learning_rate": 1.718971674346064e-06, "loss": 0.3207, "step": 24896 }, { "epoch": 81.62950819672132, "grad_norm": 5.084503650665283, "learning_rate": 1.718376452441809e-06, "loss": 0.2932, "step": 24897 }, { "epoch": 81.6327868852459, "grad_norm": 8.016884803771973, "learning_rate": 1.7177813239214702e-06, "loss": 0.3421, "step": 24898 }, { "epoch": 81.6360655737705, "grad_norm": 8.390631675720215, "learning_rate": 1.717186288791759e-06, "loss": 0.2176, "step": 24899 }, { "epoch": 81.63934426229508, "grad_norm": 6.1167988777160645, "learning_rate": 1.7165913470593809e-06, "loss": 0.47, "step": 24900 }, { "epoch": 81.64262295081967, "grad_norm": 5.459200859069824, "learning_rate": 1.7159964987310519e-06, "loss": 0.3495, "step": 24901 }, { "epoch": 81.64590163934426, "grad_norm": 9.43826961517334, "learning_rate": 1.715401743813474e-06, "loss": 0.482, "step": 24902 }, { "epoch": 81.64918032786885, "grad_norm": 4.730371475219727, "learning_rate": 1.7148070823133555e-06, "loss": 0.3791, "step": 24903 }, { "epoch": 81.65245901639344, "grad_norm": 7.861002445220947, "learning_rate": 1.7142125142374e-06, "loss": 0.3113, "step": 24904 }, { "epoch": 81.65573770491804, "grad_norm": 5.344272136688232, "learning_rate": 1.7136180395923152e-06, "loss": 0.4531, "step": 24905 }, { "epoch": 81.65901639344263, "grad_norm": 6.4230427742004395, "learning_rate": 1.7130236583848026e-06, "loss": 0.2989, "step": 24906 }, { "epoch": 81.66229508196722, "grad_norm": 4.20864725112915, "learning_rate": 1.7124293706215656e-06, "loss": 0.4849, "step": 24907 }, { "epoch": 81.6655737704918, "grad_norm": 5.053653240203857, "learning_rate": 1.711835176309301e-06, "loss": 0.4464, "step": 24908 }, { "epoch": 81.66885245901639, "grad_norm": 4.963048934936523, "learning_rate": 1.7112410754547149e-06, "loss": 0.2753, "step": 24909 }, { "epoch": 81.67213114754098, "grad_norm": 4.516168594360352, "learning_rate": 1.7106470680645037e-06, "loss": 0.2257, "step": 24910 }, { "epoch": 81.67540983606557, "grad_norm": 4.8927507400512695, "learning_rate": 1.7100531541453623e-06, "loss": 0.3816, "step": 24911 }, { "epoch": 81.67868852459016, "grad_norm": 8.796947479248047, "learning_rate": 1.7094593337039956e-06, "loss": 0.4822, "step": 24912 }, { "epoch": 81.68196721311476, "grad_norm": 5.043408393859863, "learning_rate": 1.7088656067470932e-06, "loss": 0.487, "step": 24913 }, { "epoch": 81.68524590163935, "grad_norm": 5.014036655426025, "learning_rate": 1.7082719732813501e-06, "loss": 0.1903, "step": 24914 }, { "epoch": 81.68852459016394, "grad_norm": 7.941771507263184, "learning_rate": 1.7076784333134634e-06, "loss": 0.6403, "step": 24915 }, { "epoch": 81.69180327868852, "grad_norm": 5.655672550201416, "learning_rate": 1.707084986850125e-06, "loss": 0.2573, "step": 24916 }, { "epoch": 81.69508196721311, "grad_norm": 6.842977523803711, "learning_rate": 1.706491633898023e-06, "loss": 0.2602, "step": 24917 }, { "epoch": 81.6983606557377, "grad_norm": 4.960270404815674, "learning_rate": 1.7058983744638535e-06, "loss": 0.5348, "step": 24918 }, { "epoch": 81.70163934426229, "grad_norm": 10.444258689880371, "learning_rate": 1.705305208554303e-06, "loss": 0.6381, "step": 24919 }, { "epoch": 81.70491803278688, "grad_norm": 4.921958923339844, "learning_rate": 1.704712136176061e-06, "loss": 0.3681, "step": 24920 }, { "epoch": 81.70819672131148, "grad_norm": 5.307465076446533, "learning_rate": 1.7041191573358118e-06, "loss": 0.2262, "step": 24921 }, { "epoch": 81.71147540983607, "grad_norm": 4.14699649810791, "learning_rate": 1.703526272040248e-06, "loss": 0.2742, "step": 24922 }, { "epoch": 81.71475409836066, "grad_norm": 6.824187755584717, "learning_rate": 1.7029334802960507e-06, "loss": 0.4415, "step": 24923 }, { "epoch": 81.71803278688525, "grad_norm": 6.828197479248047, "learning_rate": 1.7023407821099058e-06, "loss": 0.3375, "step": 24924 }, { "epoch": 81.72131147540983, "grad_norm": 4.192390441894531, "learning_rate": 1.7017481774884959e-06, "loss": 0.392, "step": 24925 }, { "epoch": 81.72459016393442, "grad_norm": 9.274173736572266, "learning_rate": 1.701155666438501e-06, "loss": 0.6652, "step": 24926 }, { "epoch": 81.72786885245901, "grad_norm": 3.835151195526123, "learning_rate": 1.7005632489666068e-06, "loss": 0.3529, "step": 24927 }, { "epoch": 81.73114754098361, "grad_norm": 5.305703163146973, "learning_rate": 1.6999709250794916e-06, "loss": 0.5286, "step": 24928 }, { "epoch": 81.7344262295082, "grad_norm": 10.298686981201172, "learning_rate": 1.699378694783833e-06, "loss": 0.2996, "step": 24929 }, { "epoch": 81.73770491803279, "grad_norm": 6.128426551818848, "learning_rate": 1.6987865580863083e-06, "loss": 0.3461, "step": 24930 }, { "epoch": 81.74098360655738, "grad_norm": 5.064950466156006, "learning_rate": 1.6981945149935996e-06, "loss": 0.3153, "step": 24931 }, { "epoch": 81.74426229508197, "grad_norm": 5.700026035308838, "learning_rate": 1.6976025655123784e-06, "loss": 0.5145, "step": 24932 }, { "epoch": 81.74754098360656, "grad_norm": 6.722548484802246, "learning_rate": 1.6970107096493204e-06, "loss": 0.5171, "step": 24933 }, { "epoch": 81.75081967213114, "grad_norm": 5.373387336730957, "learning_rate": 1.6964189474110981e-06, "loss": 0.4756, "step": 24934 }, { "epoch": 81.75409836065573, "grad_norm": 5.126096248626709, "learning_rate": 1.6958272788043872e-06, "loss": 0.6452, "step": 24935 }, { "epoch": 81.75737704918033, "grad_norm": 12.018157958984375, "learning_rate": 1.695235703835858e-06, "loss": 0.2537, "step": 24936 }, { "epoch": 81.76065573770492, "grad_norm": 6.3762431144714355, "learning_rate": 1.6946442225121817e-06, "loss": 0.4072, "step": 24937 }, { "epoch": 81.76393442622951, "grad_norm": 16.41640853881836, "learning_rate": 1.6940528348400276e-06, "loss": 0.2213, "step": 24938 }, { "epoch": 81.7672131147541, "grad_norm": 5.164290904998779, "learning_rate": 1.6934615408260602e-06, "loss": 0.2317, "step": 24939 }, { "epoch": 81.77049180327869, "grad_norm": 6.699151515960693, "learning_rate": 1.6928703404769531e-06, "loss": 0.3639, "step": 24940 }, { "epoch": 81.77377049180328, "grad_norm": 5.739553928375244, "learning_rate": 1.692279233799371e-06, "loss": 0.1534, "step": 24941 }, { "epoch": 81.77704918032786, "grad_norm": 8.092121124267578, "learning_rate": 1.6916882207999774e-06, "loss": 0.4197, "step": 24942 }, { "epoch": 81.78032786885245, "grad_norm": 6.185072422027588, "learning_rate": 1.691097301485436e-06, "loss": 0.4929, "step": 24943 }, { "epoch": 81.78360655737706, "grad_norm": 4.620674133300781, "learning_rate": 1.6905064758624134e-06, "loss": 0.2668, "step": 24944 }, { "epoch": 81.78688524590164, "grad_norm": 5.777376651763916, "learning_rate": 1.6899157439375714e-06, "loss": 0.2722, "step": 24945 }, { "epoch": 81.79016393442623, "grad_norm": 4.616820812225342, "learning_rate": 1.6893251057175675e-06, "loss": 0.4967, "step": 24946 }, { "epoch": 81.79344262295082, "grad_norm": 5.67905330657959, "learning_rate": 1.6887345612090633e-06, "loss": 0.5865, "step": 24947 }, { "epoch": 81.79672131147541, "grad_norm": 4.053490161895752, "learning_rate": 1.6881441104187203e-06, "loss": 0.155, "step": 24948 }, { "epoch": 81.8, "grad_norm": 5.667828559875488, "learning_rate": 1.687553753353195e-06, "loss": 0.416, "step": 24949 }, { "epoch": 81.80327868852459, "grad_norm": 4.55912971496582, "learning_rate": 1.6869634900191434e-06, "loss": 0.3653, "step": 24950 }, { "epoch": 81.80655737704917, "grad_norm": 4.616132736206055, "learning_rate": 1.6863733204232213e-06, "loss": 0.3736, "step": 24951 }, { "epoch": 81.80983606557378, "grad_norm": 4.55237340927124, "learning_rate": 1.6857832445720823e-06, "loss": 0.2527, "step": 24952 }, { "epoch": 81.81311475409836, "grad_norm": 5.431637287139893, "learning_rate": 1.6851932624723844e-06, "loss": 0.3733, "step": 24953 }, { "epoch": 81.81639344262295, "grad_norm": 3.7217071056365967, "learning_rate": 1.6846033741307778e-06, "loss": 0.1387, "step": 24954 }, { "epoch": 81.81967213114754, "grad_norm": 4.675744533538818, "learning_rate": 1.6840135795539137e-06, "loss": 0.34, "step": 24955 }, { "epoch": 81.82295081967213, "grad_norm": 4.524510383605957, "learning_rate": 1.6834238787484403e-06, "loss": 0.3621, "step": 24956 }, { "epoch": 81.82622950819672, "grad_norm": 7.056583404541016, "learning_rate": 1.6828342717210121e-06, "loss": 0.2884, "step": 24957 }, { "epoch": 81.8295081967213, "grad_norm": 9.903937339782715, "learning_rate": 1.6822447584782754e-06, "loss": 0.5038, "step": 24958 }, { "epoch": 81.8327868852459, "grad_norm": 3.9995083808898926, "learning_rate": 1.6816553390268774e-06, "loss": 0.2709, "step": 24959 }, { "epoch": 81.8360655737705, "grad_norm": 4.023221015930176, "learning_rate": 1.6810660133734625e-06, "loss": 0.4218, "step": 24960 }, { "epoch": 81.83934426229509, "grad_norm": 5.310999393463135, "learning_rate": 1.680476781524679e-06, "loss": 0.3217, "step": 24961 }, { "epoch": 81.84262295081967, "grad_norm": 6.048392295837402, "learning_rate": 1.6798876434871703e-06, "loss": 0.3257, "step": 24962 }, { "epoch": 81.84590163934426, "grad_norm": 6.011876106262207, "learning_rate": 1.6792985992675803e-06, "loss": 0.4063, "step": 24963 }, { "epoch": 81.84918032786885, "grad_norm": 5.037203311920166, "learning_rate": 1.6787096488725462e-06, "loss": 0.2789, "step": 24964 }, { "epoch": 81.85245901639344, "grad_norm": 5.245901584625244, "learning_rate": 1.678120792308716e-06, "loss": 0.4152, "step": 24965 }, { "epoch": 81.85573770491803, "grad_norm": 4.393086910247803, "learning_rate": 1.6775320295827257e-06, "loss": 0.2057, "step": 24966 }, { "epoch": 81.85901639344263, "grad_norm": 6.83512020111084, "learning_rate": 1.6769433607012132e-06, "loss": 0.3718, "step": 24967 }, { "epoch": 81.86229508196722, "grad_norm": 5.214875221252441, "learning_rate": 1.6763547856708206e-06, "loss": 0.3124, "step": 24968 }, { "epoch": 81.8655737704918, "grad_norm": 5.050097942352295, "learning_rate": 1.675766304498182e-06, "loss": 0.4355, "step": 24969 }, { "epoch": 81.8688524590164, "grad_norm": 6.1422247886657715, "learning_rate": 1.6751779171899307e-06, "loss": 0.4548, "step": 24970 }, { "epoch": 81.87213114754098, "grad_norm": 5.429065704345703, "learning_rate": 1.6745896237527071e-06, "loss": 0.3618, "step": 24971 }, { "epoch": 81.87540983606557, "grad_norm": 5.8490424156188965, "learning_rate": 1.6740014241931414e-06, "loss": 0.2373, "step": 24972 }, { "epoch": 81.87868852459016, "grad_norm": 5.136557102203369, "learning_rate": 1.6734133185178636e-06, "loss": 0.5139, "step": 24973 }, { "epoch": 81.88196721311475, "grad_norm": 4.746736526489258, "learning_rate": 1.672825306733511e-06, "loss": 0.4109, "step": 24974 }, { "epoch": 81.88524590163935, "grad_norm": 5.233725547790527, "learning_rate": 1.6722373888467102e-06, "loss": 0.3774, "step": 24975 }, { "epoch": 81.88852459016394, "grad_norm": 5.2324395179748535, "learning_rate": 1.6716495648640928e-06, "loss": 0.3248, "step": 24976 }, { "epoch": 81.89180327868853, "grad_norm": 4.71321439743042, "learning_rate": 1.6710618347922858e-06, "loss": 0.2075, "step": 24977 }, { "epoch": 81.89508196721312, "grad_norm": 4.629823684692383, "learning_rate": 1.6704741986379136e-06, "loss": 0.254, "step": 24978 }, { "epoch": 81.8983606557377, "grad_norm": 6.53681755065918, "learning_rate": 1.6698866564076077e-06, "loss": 0.5228, "step": 24979 }, { "epoch": 81.90163934426229, "grad_norm": 4.848870754241943, "learning_rate": 1.6692992081079918e-06, "loss": 0.3593, "step": 24980 }, { "epoch": 81.90491803278688, "grad_norm": 4.834438323974609, "learning_rate": 1.6687118537456881e-06, "loss": 0.3315, "step": 24981 }, { "epoch": 81.90819672131147, "grad_norm": 6.173121452331543, "learning_rate": 1.6681245933273182e-06, "loss": 0.468, "step": 24982 }, { "epoch": 81.91147540983607, "grad_norm": 4.538995265960693, "learning_rate": 1.667537426859509e-06, "loss": 0.2947, "step": 24983 }, { "epoch": 81.91475409836066, "grad_norm": 4.46865701675415, "learning_rate": 1.6669503543488797e-06, "loss": 0.3313, "step": 24984 }, { "epoch": 81.91803278688525, "grad_norm": 7.513923645019531, "learning_rate": 1.6663633758020482e-06, "loss": 0.4697, "step": 24985 }, { "epoch": 81.92131147540984, "grad_norm": 4.756322383880615, "learning_rate": 1.6657764912256324e-06, "loss": 0.3316, "step": 24986 }, { "epoch": 81.92459016393443, "grad_norm": 5.0146708488464355, "learning_rate": 1.6651897006262552e-06, "loss": 0.5355, "step": 24987 }, { "epoch": 81.92786885245901, "grad_norm": 5.980052947998047, "learning_rate": 1.66460300401053e-06, "loss": 0.3264, "step": 24988 }, { "epoch": 81.9311475409836, "grad_norm": 4.312601089477539, "learning_rate": 1.6640164013850724e-06, "loss": 0.45, "step": 24989 }, { "epoch": 81.93442622950819, "grad_norm": 6.346894264221191, "learning_rate": 1.6634298927564962e-06, "loss": 0.3798, "step": 24990 }, { "epoch": 81.9377049180328, "grad_norm": 21.76016616821289, "learning_rate": 1.6628434781314152e-06, "loss": 0.5116, "step": 24991 }, { "epoch": 81.94098360655738, "grad_norm": 5.075520992279053, "learning_rate": 1.6622571575164437e-06, "loss": 0.5452, "step": 24992 }, { "epoch": 81.94426229508197, "grad_norm": 4.950448989868164, "learning_rate": 1.661670930918191e-06, "loss": 0.6111, "step": 24993 }, { "epoch": 81.94754098360656, "grad_norm": 4.877209663391113, "learning_rate": 1.6610847983432698e-06, "loss": 0.3178, "step": 24994 }, { "epoch": 81.95081967213115, "grad_norm": 5.587578296661377, "learning_rate": 1.6604987597982846e-06, "loss": 0.2901, "step": 24995 }, { "epoch": 81.95409836065573, "grad_norm": 6.514734268188477, "learning_rate": 1.6599128152898492e-06, "loss": 0.251, "step": 24996 }, { "epoch": 81.95737704918032, "grad_norm": 5.588383674621582, "learning_rate": 1.6593269648245691e-06, "loss": 0.5006, "step": 24997 }, { "epoch": 81.96065573770491, "grad_norm": 6.491786956787109, "learning_rate": 1.6587412084090492e-06, "loss": 0.3321, "step": 24998 }, { "epoch": 81.96393442622951, "grad_norm": 42.56300735473633, "learning_rate": 1.658155546049892e-06, "loss": 0.4082, "step": 24999 }, { "epoch": 81.9672131147541, "grad_norm": 6.846665859222412, "learning_rate": 1.657569977753707e-06, "loss": 0.339, "step": 25000 }, { "epoch": 81.97049180327869, "grad_norm": 4.7674150466918945, "learning_rate": 1.6569845035270948e-06, "loss": 0.3559, "step": 25001 }, { "epoch": 81.97377049180328, "grad_norm": 7.032192230224609, "learning_rate": 1.6563991233766563e-06, "loss": 0.6502, "step": 25002 }, { "epoch": 81.97704918032787, "grad_norm": 3.91174054145813, "learning_rate": 1.6558138373089927e-06, "loss": 0.2703, "step": 25003 }, { "epoch": 81.98032786885246, "grad_norm": 4.299664497375488, "learning_rate": 1.655228645330702e-06, "loss": 0.4082, "step": 25004 }, { "epoch": 81.98360655737704, "grad_norm": 4.954019546508789, "learning_rate": 1.654643547448388e-06, "loss": 0.2392, "step": 25005 }, { "epoch": 81.98688524590163, "grad_norm": 7.776699542999268, "learning_rate": 1.654058543668644e-06, "loss": 0.4772, "step": 25006 }, { "epoch": 81.99016393442623, "grad_norm": 5.290733337402344, "learning_rate": 1.6534736339980673e-06, "loss": 0.2831, "step": 25007 }, { "epoch": 81.99344262295082, "grad_norm": 4.821288585662842, "learning_rate": 1.6528888184432513e-06, "loss": 0.2695, "step": 25008 }, { "epoch": 81.99672131147541, "grad_norm": 5.064499855041504, "learning_rate": 1.652304097010795e-06, "loss": 0.3042, "step": 25009 }, { "epoch": 82.0, "grad_norm": 6.09384298324585, "learning_rate": 1.6517194697072903e-06, "loss": 0.4983, "step": 25010 }, { "epoch": 82.00327868852459, "grad_norm": 4.60113000869751, "learning_rate": 1.6511349365393282e-06, "loss": 0.3313, "step": 25011 }, { "epoch": 82.00655737704918, "grad_norm": 6.954416751861572, "learning_rate": 1.6505504975134968e-06, "loss": 0.4437, "step": 25012 }, { "epoch": 82.00983606557377, "grad_norm": 9.199953079223633, "learning_rate": 1.6499661526363931e-06, "loss": 0.6169, "step": 25013 }, { "epoch": 82.01311475409837, "grad_norm": 5.530235290527344, "learning_rate": 1.6493819019146028e-06, "loss": 0.5994, "step": 25014 }, { "epoch": 82.01639344262296, "grad_norm": 5.861674785614014, "learning_rate": 1.6487977453547143e-06, "loss": 0.3743, "step": 25015 }, { "epoch": 82.01967213114754, "grad_norm": 4.379199504852295, "learning_rate": 1.6482136829633122e-06, "loss": 0.2557, "step": 25016 }, { "epoch": 82.02295081967213, "grad_norm": 4.523035526275635, "learning_rate": 1.6476297147469866e-06, "loss": 0.3027, "step": 25017 }, { "epoch": 82.02622950819672, "grad_norm": 5.415533542633057, "learning_rate": 1.64704584071232e-06, "loss": 0.4395, "step": 25018 }, { "epoch": 82.02950819672131, "grad_norm": 5.400857925415039, "learning_rate": 1.6464620608658943e-06, "loss": 0.3237, "step": 25019 }, { "epoch": 82.0327868852459, "grad_norm": 4.45985746383667, "learning_rate": 1.645878375214296e-06, "loss": 0.224, "step": 25020 }, { "epoch": 82.03606557377049, "grad_norm": 5.837567329406738, "learning_rate": 1.6452947837641054e-06, "loss": 0.3914, "step": 25021 }, { "epoch": 82.03934426229509, "grad_norm": 5.8253583908081055, "learning_rate": 1.644711286521904e-06, "loss": 0.4268, "step": 25022 }, { "epoch": 82.04262295081968, "grad_norm": 4.599496364593506, "learning_rate": 1.6441278834942665e-06, "loss": 0.4933, "step": 25023 }, { "epoch": 82.04590163934427, "grad_norm": 4.047767639160156, "learning_rate": 1.6435445746877776e-06, "loss": 0.37, "step": 25024 }, { "epoch": 82.04918032786885, "grad_norm": 4.315886974334717, "learning_rate": 1.6429613601090123e-06, "loss": 0.3683, "step": 25025 }, { "epoch": 82.05245901639344, "grad_norm": 5.076103210449219, "learning_rate": 1.642378239764545e-06, "loss": 0.2673, "step": 25026 }, { "epoch": 82.05573770491803, "grad_norm": 20.836557388305664, "learning_rate": 1.641795213660955e-06, "loss": 0.3467, "step": 25027 }, { "epoch": 82.05901639344262, "grad_norm": 6.244696140289307, "learning_rate": 1.6412122818048137e-06, "loss": 0.3339, "step": 25028 }, { "epoch": 82.0622950819672, "grad_norm": 4.858902931213379, "learning_rate": 1.6406294442026949e-06, "loss": 0.3079, "step": 25029 }, { "epoch": 82.06557377049181, "grad_norm": 6.397939682006836, "learning_rate": 1.6400467008611686e-06, "loss": 0.291, "step": 25030 }, { "epoch": 82.0688524590164, "grad_norm": 5.017876148223877, "learning_rate": 1.639464051786811e-06, "loss": 0.3475, "step": 25031 }, { "epoch": 82.07213114754099, "grad_norm": 5.973368167877197, "learning_rate": 1.6388814969861876e-06, "loss": 0.4593, "step": 25032 }, { "epoch": 82.07540983606557, "grad_norm": 4.652606964111328, "learning_rate": 1.6382990364658702e-06, "loss": 0.389, "step": 25033 }, { "epoch": 82.07868852459016, "grad_norm": 5.071335792541504, "learning_rate": 1.6377166702324209e-06, "loss": 0.5589, "step": 25034 }, { "epoch": 82.08196721311475, "grad_norm": 6.325332164764404, "learning_rate": 1.6371343982924148e-06, "loss": 0.2169, "step": 25035 }, { "epoch": 82.08524590163934, "grad_norm": 4.799328327178955, "learning_rate": 1.6365522206524132e-06, "loss": 0.492, "step": 25036 }, { "epoch": 82.08852459016393, "grad_norm": 6.165915489196777, "learning_rate": 1.6359701373189806e-06, "loss": 0.5096, "step": 25037 }, { "epoch": 82.09180327868853, "grad_norm": 4.690781593322754, "learning_rate": 1.6353881482986789e-06, "loss": 0.3731, "step": 25038 }, { "epoch": 82.09508196721312, "grad_norm": 5.1557745933532715, "learning_rate": 1.6348062535980757e-06, "loss": 0.3985, "step": 25039 }, { "epoch": 82.09836065573771, "grad_norm": 5.423150062561035, "learning_rate": 1.6342244532237294e-06, "loss": 0.3263, "step": 25040 }, { "epoch": 82.1016393442623, "grad_norm": 4.6713056564331055, "learning_rate": 1.6336427471822002e-06, "loss": 0.4033, "step": 25041 }, { "epoch": 82.10491803278688, "grad_norm": 7.684961795806885, "learning_rate": 1.6330611354800485e-06, "loss": 0.3892, "step": 25042 }, { "epoch": 82.10819672131147, "grad_norm": 5.976787567138672, "learning_rate": 1.632479618123829e-06, "loss": 0.5848, "step": 25043 }, { "epoch": 82.11147540983606, "grad_norm": 6.201546669006348, "learning_rate": 1.631898195120104e-06, "loss": 0.4237, "step": 25044 }, { "epoch": 82.11475409836065, "grad_norm": 5.622354030609131, "learning_rate": 1.6313168664754287e-06, "loss": 0.4046, "step": 25045 }, { "epoch": 82.11803278688525, "grad_norm": 5.112272262573242, "learning_rate": 1.6307356321963551e-06, "loss": 0.5927, "step": 25046 }, { "epoch": 82.12131147540984, "grad_norm": 5.0665388107299805, "learning_rate": 1.6301544922894385e-06, "loss": 0.3696, "step": 25047 }, { "epoch": 82.12459016393443, "grad_norm": 4.400571346282959, "learning_rate": 1.6295734467612334e-06, "loss": 0.3303, "step": 25048 }, { "epoch": 82.12786885245902, "grad_norm": 4.51556396484375, "learning_rate": 1.6289924956182923e-06, "loss": 0.2806, "step": 25049 }, { "epoch": 82.1311475409836, "grad_norm": 4.1563849449157715, "learning_rate": 1.6284116388671633e-06, "loss": 0.32, "step": 25050 }, { "epoch": 82.1344262295082, "grad_norm": 7.007330417633057, "learning_rate": 1.6278308765143958e-06, "loss": 0.4218, "step": 25051 }, { "epoch": 82.13770491803278, "grad_norm": 4.506392478942871, "learning_rate": 1.627250208566542e-06, "loss": 0.3244, "step": 25052 }, { "epoch": 82.14098360655737, "grad_norm": 4.787752628326416, "learning_rate": 1.626669635030147e-06, "loss": 0.2957, "step": 25053 }, { "epoch": 82.14426229508197, "grad_norm": 6.615737438201904, "learning_rate": 1.6260891559117587e-06, "loss": 0.4765, "step": 25054 }, { "epoch": 82.14754098360656, "grad_norm": 5.243069171905518, "learning_rate": 1.6255087712179218e-06, "loss": 0.4623, "step": 25055 }, { "epoch": 82.15081967213115, "grad_norm": 7.472012519836426, "learning_rate": 1.624928480955178e-06, "loss": 0.3171, "step": 25056 }, { "epoch": 82.15409836065574, "grad_norm": 5.52097225189209, "learning_rate": 1.6243482851300763e-06, "loss": 0.4472, "step": 25057 }, { "epoch": 82.15737704918033, "grad_norm": 6.105013847351074, "learning_rate": 1.6237681837491559e-06, "loss": 0.5876, "step": 25058 }, { "epoch": 82.16065573770491, "grad_norm": 5.0812273025512695, "learning_rate": 1.6231881768189583e-06, "loss": 0.2287, "step": 25059 }, { "epoch": 82.1639344262295, "grad_norm": 4.931200981140137, "learning_rate": 1.6226082643460206e-06, "loss": 0.5031, "step": 25060 }, { "epoch": 82.1672131147541, "grad_norm": 5.622974395751953, "learning_rate": 1.6220284463368885e-06, "loss": 0.2786, "step": 25061 }, { "epoch": 82.1704918032787, "grad_norm": 7.308785438537598, "learning_rate": 1.6214487227980969e-06, "loss": 0.4468, "step": 25062 }, { "epoch": 82.17377049180328, "grad_norm": 4.317833423614502, "learning_rate": 1.6208690937361816e-06, "loss": 0.4428, "step": 25063 }, { "epoch": 82.17704918032787, "grad_norm": 8.182398796081543, "learning_rate": 1.6202895591576772e-06, "loss": 0.3777, "step": 25064 }, { "epoch": 82.18032786885246, "grad_norm": 4.156322479248047, "learning_rate": 1.6197101190691233e-06, "loss": 0.4058, "step": 25065 }, { "epoch": 82.18360655737705, "grad_norm": 7.172354221343994, "learning_rate": 1.619130773477051e-06, "loss": 0.5114, "step": 25066 }, { "epoch": 82.18688524590164, "grad_norm": 4.151605606079102, "learning_rate": 1.6185515223879934e-06, "loss": 0.2811, "step": 25067 }, { "epoch": 82.19016393442622, "grad_norm": 5.579764366149902, "learning_rate": 1.6179723658084811e-06, "loss": 0.371, "step": 25068 }, { "epoch": 82.19344262295083, "grad_norm": 4.778337001800537, "learning_rate": 1.617393303745044e-06, "loss": 0.4385, "step": 25069 }, { "epoch": 82.19672131147541, "grad_norm": 4.416961669921875, "learning_rate": 1.6168143362042144e-06, "loss": 0.2281, "step": 25070 }, { "epoch": 82.2, "grad_norm": 4.5830559730529785, "learning_rate": 1.6162354631925203e-06, "loss": 0.4294, "step": 25071 }, { "epoch": 82.20327868852459, "grad_norm": 5.438065528869629, "learning_rate": 1.6156566847164868e-06, "loss": 0.3151, "step": 25072 }, { "epoch": 82.20655737704918, "grad_norm": 6.789941310882568, "learning_rate": 1.6150780007826428e-06, "loss": 0.4215, "step": 25073 }, { "epoch": 82.20983606557377, "grad_norm": 5.156021595001221, "learning_rate": 1.6144994113975122e-06, "loss": 0.1817, "step": 25074 }, { "epoch": 82.21311475409836, "grad_norm": 4.3441972732543945, "learning_rate": 1.6139209165676184e-06, "loss": 0.5214, "step": 25075 }, { "epoch": 82.21639344262294, "grad_norm": 5.42153263092041, "learning_rate": 1.6133425162994864e-06, "loss": 0.5824, "step": 25076 }, { "epoch": 82.21967213114755, "grad_norm": 5.694153785705566, "learning_rate": 1.6127642105996388e-06, "loss": 0.3201, "step": 25077 }, { "epoch": 82.22295081967214, "grad_norm": 3.8140718936920166, "learning_rate": 1.6121859994745925e-06, "loss": 0.363, "step": 25078 }, { "epoch": 82.22622950819672, "grad_norm": 6.185452461242676, "learning_rate": 1.6116078829308724e-06, "loss": 0.3514, "step": 25079 }, { "epoch": 82.22950819672131, "grad_norm": 4.863508224487305, "learning_rate": 1.6110298609749953e-06, "loss": 0.4173, "step": 25080 }, { "epoch": 82.2327868852459, "grad_norm": 8.843549728393555, "learning_rate": 1.6104519336134794e-06, "loss": 0.2261, "step": 25081 }, { "epoch": 82.23606557377049, "grad_norm": 6.541079998016357, "learning_rate": 1.6098741008528373e-06, "loss": 0.3514, "step": 25082 }, { "epoch": 82.23934426229508, "grad_norm": 4.731276512145996, "learning_rate": 1.6092963626995918e-06, "loss": 0.5546, "step": 25083 }, { "epoch": 82.24262295081967, "grad_norm": 6.024446487426758, "learning_rate": 1.6087187191602527e-06, "loss": 0.4137, "step": 25084 }, { "epoch": 82.24590163934427, "grad_norm": 5.229333877563477, "learning_rate": 1.6081411702413364e-06, "loss": 0.4667, "step": 25085 }, { "epoch": 82.24918032786886, "grad_norm": 8.565899848937988, "learning_rate": 1.6075637159493495e-06, "loss": 0.3946, "step": 25086 }, { "epoch": 82.25245901639344, "grad_norm": 5.772932529449463, "learning_rate": 1.6069863562908117e-06, "loss": 0.4741, "step": 25087 }, { "epoch": 82.25573770491803, "grad_norm": 18.347400665283203, "learning_rate": 1.6064090912722286e-06, "loss": 0.2424, "step": 25088 }, { "epoch": 82.25901639344262, "grad_norm": 4.3323469161987305, "learning_rate": 1.6058319209001105e-06, "loss": 0.2749, "step": 25089 }, { "epoch": 82.26229508196721, "grad_norm": 5.227607727050781, "learning_rate": 1.6052548451809623e-06, "loss": 0.2913, "step": 25090 }, { "epoch": 82.2655737704918, "grad_norm": 4.4934492111206055, "learning_rate": 1.6046778641212968e-06, "loss": 0.3457, "step": 25091 }, { "epoch": 82.26885245901639, "grad_norm": 6.477654933929443, "learning_rate": 1.6041009777276173e-06, "loss": 0.3364, "step": 25092 }, { "epoch": 82.27213114754099, "grad_norm": 4.67807149887085, "learning_rate": 1.6035241860064278e-06, "loss": 0.3221, "step": 25093 }, { "epoch": 82.27540983606558, "grad_norm": 5.730904579162598, "learning_rate": 1.6029474889642348e-06, "loss": 0.3404, "step": 25094 }, { "epoch": 82.27868852459017, "grad_norm": 6.777076244354248, "learning_rate": 1.602370886607536e-06, "loss": 0.3183, "step": 25095 }, { "epoch": 82.28196721311475, "grad_norm": 4.412967205047607, "learning_rate": 1.6017943789428402e-06, "loss": 0.2846, "step": 25096 }, { "epoch": 82.28524590163934, "grad_norm": 5.184345245361328, "learning_rate": 1.6012179659766447e-06, "loss": 0.2442, "step": 25097 }, { "epoch": 82.28852459016393, "grad_norm": 4.29900598526001, "learning_rate": 1.6006416477154485e-06, "loss": 0.4269, "step": 25098 }, { "epoch": 82.29180327868852, "grad_norm": 5.669738292694092, "learning_rate": 1.6000654241657488e-06, "loss": 0.3975, "step": 25099 }, { "epoch": 82.29508196721312, "grad_norm": 4.316452980041504, "learning_rate": 1.5994892953340468e-06, "loss": 0.3938, "step": 25100 }, { "epoch": 82.29836065573771, "grad_norm": 4.9147491455078125, "learning_rate": 1.5989132612268388e-06, "loss": 0.3687, "step": 25101 }, { "epoch": 82.3016393442623, "grad_norm": 6.525640487670898, "learning_rate": 1.5983373218506171e-06, "loss": 0.4471, "step": 25102 }, { "epoch": 82.30491803278689, "grad_norm": 7.152563571929932, "learning_rate": 1.5977614772118765e-06, "loss": 0.2214, "step": 25103 }, { "epoch": 82.30819672131148, "grad_norm": 4.270219326019287, "learning_rate": 1.5971857273171132e-06, "loss": 0.3754, "step": 25104 }, { "epoch": 82.31147540983606, "grad_norm": 7.271642208099365, "learning_rate": 1.5966100721728184e-06, "loss": 0.2291, "step": 25105 }, { "epoch": 82.31475409836065, "grad_norm": 6.36660623550415, "learning_rate": 1.5960345117854814e-06, "loss": 0.3932, "step": 25106 }, { "epoch": 82.31803278688524, "grad_norm": 5.828049182891846, "learning_rate": 1.5954590461615937e-06, "loss": 0.291, "step": 25107 }, { "epoch": 82.32131147540984, "grad_norm": 4.918052673339844, "learning_rate": 1.5948836753076412e-06, "loss": 0.4143, "step": 25108 }, { "epoch": 82.32459016393443, "grad_norm": 5.064399719238281, "learning_rate": 1.5943083992301166e-06, "loss": 0.5171, "step": 25109 }, { "epoch": 82.32786885245902, "grad_norm": 4.932799816131592, "learning_rate": 1.5937332179355047e-06, "loss": 0.8373, "step": 25110 }, { "epoch": 82.33114754098361, "grad_norm": 4.607056617736816, "learning_rate": 1.5931581314302914e-06, "loss": 0.2878, "step": 25111 }, { "epoch": 82.3344262295082, "grad_norm": 5.697445392608643, "learning_rate": 1.592583139720958e-06, "loss": 0.3178, "step": 25112 }, { "epoch": 82.33770491803278, "grad_norm": 4.675144672393799, "learning_rate": 1.5920082428139938e-06, "loss": 0.3248, "step": 25113 }, { "epoch": 82.34098360655737, "grad_norm": 4.364222526550293, "learning_rate": 1.5914334407158793e-06, "loss": 0.2644, "step": 25114 }, { "epoch": 82.34426229508196, "grad_norm": 4.908178806304932, "learning_rate": 1.590858733433095e-06, "loss": 0.3741, "step": 25115 }, { "epoch": 82.34754098360656, "grad_norm": 5.986622333526611, "learning_rate": 1.5902841209721198e-06, "loss": 0.2759, "step": 25116 }, { "epoch": 82.35081967213115, "grad_norm": 4.711810111999512, "learning_rate": 1.5897096033394377e-06, "loss": 0.5743, "step": 25117 }, { "epoch": 82.35409836065574, "grad_norm": 6.2973551750183105, "learning_rate": 1.5891351805415233e-06, "loss": 0.3469, "step": 25118 }, { "epoch": 82.35737704918033, "grad_norm": 4.76272439956665, "learning_rate": 1.5885608525848561e-06, "loss": 0.304, "step": 25119 }, { "epoch": 82.36065573770492, "grad_norm": 4.717286586761475, "learning_rate": 1.5879866194759096e-06, "loss": 0.4633, "step": 25120 }, { "epoch": 82.3639344262295, "grad_norm": 4.242273807525635, "learning_rate": 1.587412481221159e-06, "loss": 0.4249, "step": 25121 }, { "epoch": 82.3672131147541, "grad_norm": 5.21420431137085, "learning_rate": 1.5868384378270817e-06, "loss": 0.331, "step": 25122 }, { "epoch": 82.37049180327868, "grad_norm": 4.571977138519287, "learning_rate": 1.5862644893001478e-06, "loss": 0.2353, "step": 25123 }, { "epoch": 82.37377049180328, "grad_norm": 4.944120407104492, "learning_rate": 1.5856906356468305e-06, "loss": 0.423, "step": 25124 }, { "epoch": 82.37704918032787, "grad_norm": 4.632366180419922, "learning_rate": 1.5851168768735981e-06, "loss": 0.6179, "step": 25125 }, { "epoch": 82.38032786885246, "grad_norm": 5.029324531555176, "learning_rate": 1.584543212986923e-06, "loss": 0.2611, "step": 25126 }, { "epoch": 82.38360655737705, "grad_norm": 5.020160675048828, "learning_rate": 1.583969643993275e-06, "loss": 0.2836, "step": 25127 }, { "epoch": 82.38688524590164, "grad_norm": 4.493772983551025, "learning_rate": 1.5833961698991163e-06, "loss": 0.191, "step": 25128 }, { "epoch": 82.39016393442623, "grad_norm": 6.198846340179443, "learning_rate": 1.5828227907109194e-06, "loss": 0.4465, "step": 25129 }, { "epoch": 82.39344262295081, "grad_norm": 4.285780429840088, "learning_rate": 1.5822495064351474e-06, "loss": 0.5131, "step": 25130 }, { "epoch": 82.3967213114754, "grad_norm": 4.830199718475342, "learning_rate": 1.5816763170782612e-06, "loss": 0.259, "step": 25131 }, { "epoch": 82.4, "grad_norm": 5.394554138183594, "learning_rate": 1.5811032226467304e-06, "loss": 0.2264, "step": 25132 }, { "epoch": 82.4032786885246, "grad_norm": 4.7088623046875, "learning_rate": 1.5805302231470144e-06, "loss": 0.6299, "step": 25133 }, { "epoch": 82.40655737704918, "grad_norm": 7.428195953369141, "learning_rate": 1.5799573185855722e-06, "loss": 0.3234, "step": 25134 }, { "epoch": 82.40983606557377, "grad_norm": 3.9113786220550537, "learning_rate": 1.5793845089688676e-06, "loss": 0.2356, "step": 25135 }, { "epoch": 82.41311475409836, "grad_norm": 5.459858417510986, "learning_rate": 1.5788117943033577e-06, "loss": 0.3469, "step": 25136 }, { "epoch": 82.41639344262295, "grad_norm": 9.375993728637695, "learning_rate": 1.5782391745955006e-06, "loss": 0.408, "step": 25137 }, { "epoch": 82.41967213114754, "grad_norm": 13.749466896057129, "learning_rate": 1.5776666498517511e-06, "loss": 0.136, "step": 25138 }, { "epoch": 82.42295081967212, "grad_norm": 6.943284034729004, "learning_rate": 1.57709422007857e-06, "loss": 0.5022, "step": 25139 }, { "epoch": 82.42622950819673, "grad_norm": 3.95076847076416, "learning_rate": 1.5765218852824094e-06, "loss": 0.2927, "step": 25140 }, { "epoch": 82.42950819672132, "grad_norm": 6.4614410400390625, "learning_rate": 1.5759496454697221e-06, "loss": 0.5097, "step": 25141 }, { "epoch": 82.4327868852459, "grad_norm": 5.653584003448486, "learning_rate": 1.5753775006469607e-06, "loss": 0.4044, "step": 25142 }, { "epoch": 82.43606557377049, "grad_norm": 5.631644248962402, "learning_rate": 1.5748054508205767e-06, "loss": 0.284, "step": 25143 }, { "epoch": 82.43934426229508, "grad_norm": 4.372064113616943, "learning_rate": 1.574233495997022e-06, "loss": 0.3267, "step": 25144 }, { "epoch": 82.44262295081967, "grad_norm": 4.974976062774658, "learning_rate": 1.5736616361827461e-06, "loss": 0.7592, "step": 25145 }, { "epoch": 82.44590163934426, "grad_norm": 4.918408393859863, "learning_rate": 1.5730898713841968e-06, "loss": 0.6554, "step": 25146 }, { "epoch": 82.44918032786886, "grad_norm": 4.5975141525268555, "learning_rate": 1.5725182016078178e-06, "loss": 0.3021, "step": 25147 }, { "epoch": 82.45245901639345, "grad_norm": 3.912869453430176, "learning_rate": 1.5719466268600615e-06, "loss": 0.3215, "step": 25148 }, { "epoch": 82.45573770491804, "grad_norm": 7.0180864334106445, "learning_rate": 1.5713751471473692e-06, "loss": 0.4819, "step": 25149 }, { "epoch": 82.45901639344262, "grad_norm": 5.480571746826172, "learning_rate": 1.5708037624761862e-06, "loss": 0.3326, "step": 25150 }, { "epoch": 82.46229508196721, "grad_norm": 5.577145099639893, "learning_rate": 1.5702324728529527e-06, "loss": 0.2844, "step": 25151 }, { "epoch": 82.4655737704918, "grad_norm": 7.241253852844238, "learning_rate": 1.5696612782841147e-06, "loss": 0.37, "step": 25152 }, { "epoch": 82.46885245901639, "grad_norm": 5.916701316833496, "learning_rate": 1.5690901787761114e-06, "loss": 0.6294, "step": 25153 }, { "epoch": 82.47213114754098, "grad_norm": 4.913863658905029, "learning_rate": 1.5685191743353823e-06, "loss": 0.4009, "step": 25154 }, { "epoch": 82.47540983606558, "grad_norm": 5.577663898468018, "learning_rate": 1.5679482649683664e-06, "loss": 0.6556, "step": 25155 }, { "epoch": 82.47868852459017, "grad_norm": 4.073611736297607, "learning_rate": 1.5673774506814977e-06, "loss": 0.177, "step": 25156 }, { "epoch": 82.48196721311476, "grad_norm": 5.044681072235107, "learning_rate": 1.5668067314812197e-06, "loss": 0.4308, "step": 25157 }, { "epoch": 82.48524590163935, "grad_norm": 4.154664993286133, "learning_rate": 1.5662361073739629e-06, "loss": 0.3479, "step": 25158 }, { "epoch": 82.48852459016393, "grad_norm": 5.417768955230713, "learning_rate": 1.5656655783661634e-06, "loss": 0.303, "step": 25159 }, { "epoch": 82.49180327868852, "grad_norm": 5.377200603485107, "learning_rate": 1.5650951444642516e-06, "loss": 0.4264, "step": 25160 }, { "epoch": 82.49508196721311, "grad_norm": 7.406689643859863, "learning_rate": 1.5645248056746642e-06, "loss": 0.4623, "step": 25161 }, { "epoch": 82.4983606557377, "grad_norm": 5.032729148864746, "learning_rate": 1.563954562003831e-06, "loss": 0.2042, "step": 25162 }, { "epoch": 82.5016393442623, "grad_norm": 15.95809268951416, "learning_rate": 1.5633844134581811e-06, "loss": 0.3014, "step": 25163 }, { "epoch": 82.50491803278689, "grad_norm": 5.833241939544678, "learning_rate": 1.5628143600441415e-06, "loss": 0.356, "step": 25164 }, { "epoch": 82.50819672131148, "grad_norm": 5.634994029998779, "learning_rate": 1.5622444017681438e-06, "loss": 0.6364, "step": 25165 }, { "epoch": 82.51147540983607, "grad_norm": 3.9560532569885254, "learning_rate": 1.561674538636615e-06, "loss": 0.4311, "step": 25166 }, { "epoch": 82.51475409836065, "grad_norm": 5.1816630363464355, "learning_rate": 1.561104770655979e-06, "loss": 0.4225, "step": 25167 }, { "epoch": 82.51803278688524, "grad_norm": 5.297311782836914, "learning_rate": 1.5605350978326606e-06, "loss": 0.356, "step": 25168 }, { "epoch": 82.52131147540983, "grad_norm": 4.243213176727295, "learning_rate": 1.5599655201730812e-06, "loss": 0.5135, "step": 25169 }, { "epoch": 82.52459016393442, "grad_norm": 4.040510177612305, "learning_rate": 1.5593960376836693e-06, "loss": 0.3436, "step": 25170 }, { "epoch": 82.52786885245902, "grad_norm": 5.334184646606445, "learning_rate": 1.5588266503708428e-06, "loss": 0.58, "step": 25171 }, { "epoch": 82.53114754098361, "grad_norm": 6.242084980010986, "learning_rate": 1.558257358241022e-06, "loss": 0.2986, "step": 25172 }, { "epoch": 82.5344262295082, "grad_norm": 4.183608531951904, "learning_rate": 1.5576881613006246e-06, "loss": 0.5534, "step": 25173 }, { "epoch": 82.53770491803279, "grad_norm": 4.5391974449157715, "learning_rate": 1.5571190595560736e-06, "loss": 0.306, "step": 25174 }, { "epoch": 82.54098360655738, "grad_norm": 5.965123653411865, "learning_rate": 1.5565500530137834e-06, "loss": 0.3646, "step": 25175 }, { "epoch": 82.54426229508196, "grad_norm": 6.9853034019470215, "learning_rate": 1.5559811416801695e-06, "loss": 0.3311, "step": 25176 }, { "epoch": 82.54754098360655, "grad_norm": 5.661866664886475, "learning_rate": 1.555412325561646e-06, "loss": 0.2882, "step": 25177 }, { "epoch": 82.55081967213114, "grad_norm": 3.6756250858306885, "learning_rate": 1.554843604664632e-06, "loss": 0.3114, "step": 25178 }, { "epoch": 82.55409836065574, "grad_norm": 4.757260799407959, "learning_rate": 1.5542749789955358e-06, "loss": 0.3404, "step": 25179 }, { "epoch": 82.55737704918033, "grad_norm": 4.582414150238037, "learning_rate": 1.553706448560769e-06, "loss": 0.3571, "step": 25180 }, { "epoch": 82.56065573770492, "grad_norm": 4.2360053062438965, "learning_rate": 1.5531380133667461e-06, "loss": 0.2905, "step": 25181 }, { "epoch": 82.56393442622951, "grad_norm": 4.744228839874268, "learning_rate": 1.5525696734198748e-06, "loss": 0.4548, "step": 25182 }, { "epoch": 82.5672131147541, "grad_norm": 4.79449987411499, "learning_rate": 1.5520014287265605e-06, "loss": 0.547, "step": 25183 }, { "epoch": 82.57049180327868, "grad_norm": 6.659020900726318, "learning_rate": 1.5514332792932174e-06, "loss": 0.4528, "step": 25184 }, { "epoch": 82.57377049180327, "grad_norm": 3.7958202362060547, "learning_rate": 1.550865225126248e-06, "loss": 0.1146, "step": 25185 }, { "epoch": 82.57704918032788, "grad_norm": 4.791402339935303, "learning_rate": 1.550297266232057e-06, "loss": 0.2766, "step": 25186 }, { "epoch": 82.58032786885246, "grad_norm": 10.316519737243652, "learning_rate": 1.5497294026170484e-06, "loss": 0.4414, "step": 25187 }, { "epoch": 82.58360655737705, "grad_norm": 3.9037420749664307, "learning_rate": 1.5491616342876292e-06, "loss": 0.4369, "step": 25188 }, { "epoch": 82.58688524590164, "grad_norm": 5.293684482574463, "learning_rate": 1.5485939612501988e-06, "loss": 0.3837, "step": 25189 }, { "epoch": 82.59016393442623, "grad_norm": 5.742382049560547, "learning_rate": 1.5480263835111564e-06, "loss": 0.2616, "step": 25190 }, { "epoch": 82.59344262295082, "grad_norm": 4.2060065269470215, "learning_rate": 1.5474589010769082e-06, "loss": 0.3696, "step": 25191 }, { "epoch": 82.5967213114754, "grad_norm": 4.461858749389648, "learning_rate": 1.5468915139538476e-06, "loss": 0.4648, "step": 25192 }, { "epoch": 82.6, "grad_norm": 9.346540451049805, "learning_rate": 1.5463242221483742e-06, "loss": 0.4941, "step": 25193 }, { "epoch": 82.6032786885246, "grad_norm": 7.15645170211792, "learning_rate": 1.5457570256668864e-06, "loss": 0.5338, "step": 25194 }, { "epoch": 82.60655737704919, "grad_norm": 5.037214279174805, "learning_rate": 1.5451899245157742e-06, "loss": 0.4764, "step": 25195 }, { "epoch": 82.60983606557377, "grad_norm": 6.372318744659424, "learning_rate": 1.5446229187014393e-06, "loss": 0.3091, "step": 25196 }, { "epoch": 82.61311475409836, "grad_norm": 4.127138137817383, "learning_rate": 1.5440560082302725e-06, "loss": 0.3253, "step": 25197 }, { "epoch": 82.61639344262295, "grad_norm": 4.811924457550049, "learning_rate": 1.5434891931086671e-06, "loss": 0.5167, "step": 25198 }, { "epoch": 82.61967213114754, "grad_norm": 4.224198818206787, "learning_rate": 1.5429224733430104e-06, "loss": 0.4887, "step": 25199 }, { "epoch": 82.62295081967213, "grad_norm": 5.866121768951416, "learning_rate": 1.5423558489396983e-06, "loss": 0.2517, "step": 25200 }, { "epoch": 82.62622950819672, "grad_norm": 7.176183223724365, "learning_rate": 1.541789319905117e-06, "loss": 0.2783, "step": 25201 }, { "epoch": 82.62950819672132, "grad_norm": 4.033351421356201, "learning_rate": 1.5412228862456569e-06, "loss": 0.4289, "step": 25202 }, { "epoch": 82.6327868852459, "grad_norm": 3.8529598712921143, "learning_rate": 1.5406565479677006e-06, "loss": 0.3169, "step": 25203 }, { "epoch": 82.6360655737705, "grad_norm": 4.243241786956787, "learning_rate": 1.5400903050776394e-06, "loss": 0.3349, "step": 25204 }, { "epoch": 82.63934426229508, "grad_norm": 6.497984409332275, "learning_rate": 1.5395241575818565e-06, "loss": 0.4341, "step": 25205 }, { "epoch": 82.64262295081967, "grad_norm": 4.756158828735352, "learning_rate": 1.5389581054867354e-06, "loss": 0.2153, "step": 25206 }, { "epoch": 82.64590163934426, "grad_norm": 4.696633815765381, "learning_rate": 1.5383921487986585e-06, "loss": 0.5569, "step": 25207 }, { "epoch": 82.64918032786885, "grad_norm": 5.600113868713379, "learning_rate": 1.5378262875240058e-06, "loss": 0.5312, "step": 25208 }, { "epoch": 82.65245901639344, "grad_norm": 7.706052303314209, "learning_rate": 1.5372605216691627e-06, "loss": 0.2809, "step": 25209 }, { "epoch": 82.65573770491804, "grad_norm": 5.193806171417236, "learning_rate": 1.5366948512405066e-06, "loss": 0.2882, "step": 25210 }, { "epoch": 82.65901639344263, "grad_norm": 4.044198513031006, "learning_rate": 1.5361292762444157e-06, "loss": 0.3877, "step": 25211 }, { "epoch": 82.66229508196722, "grad_norm": 3.928013801574707, "learning_rate": 1.5355637966872638e-06, "loss": 0.2529, "step": 25212 }, { "epoch": 82.6655737704918, "grad_norm": 4.682987689971924, "learning_rate": 1.5349984125754347e-06, "loss": 0.4581, "step": 25213 }, { "epoch": 82.66885245901639, "grad_norm": 5.298662185668945, "learning_rate": 1.5344331239152998e-06, "loss": 0.3623, "step": 25214 }, { "epoch": 82.67213114754098, "grad_norm": 4.027653694152832, "learning_rate": 1.533867930713232e-06, "loss": 0.3815, "step": 25215 }, { "epoch": 82.67540983606557, "grad_norm": 5.588288307189941, "learning_rate": 1.533302832975605e-06, "loss": 0.367, "step": 25216 }, { "epoch": 82.67868852459016, "grad_norm": 5.138272762298584, "learning_rate": 1.5327378307087937e-06, "loss": 0.3229, "step": 25217 }, { "epoch": 82.68196721311476, "grad_norm": 6.276293754577637, "learning_rate": 1.5321729239191663e-06, "loss": 0.3609, "step": 25218 }, { "epoch": 82.68524590163935, "grad_norm": 4.781484127044678, "learning_rate": 1.5316081126130944e-06, "loss": 0.3283, "step": 25219 }, { "epoch": 82.68852459016394, "grad_norm": 4.82063102722168, "learning_rate": 1.5310433967969463e-06, "loss": 0.4423, "step": 25220 }, { "epoch": 82.69180327868852, "grad_norm": 5.226006507873535, "learning_rate": 1.5304787764770857e-06, "loss": 0.3285, "step": 25221 }, { "epoch": 82.69508196721311, "grad_norm": 5.546745777130127, "learning_rate": 1.5299142516598864e-06, "loss": 0.5283, "step": 25222 }, { "epoch": 82.6983606557377, "grad_norm": 4.895414352416992, "learning_rate": 1.5293498223517101e-06, "loss": 0.4109, "step": 25223 }, { "epoch": 82.70163934426229, "grad_norm": 4.2981414794921875, "learning_rate": 1.5287854885589227e-06, "loss": 0.3702, "step": 25224 }, { "epoch": 82.70491803278688, "grad_norm": 5.893007755279541, "learning_rate": 1.5282212502878835e-06, "loss": 0.4378, "step": 25225 }, { "epoch": 82.70819672131148, "grad_norm": 3.7297821044921875, "learning_rate": 1.5276571075449608e-06, "loss": 0.171, "step": 25226 }, { "epoch": 82.71147540983607, "grad_norm": 4.0163421630859375, "learning_rate": 1.527093060336513e-06, "loss": 0.348, "step": 25227 }, { "epoch": 82.71475409836066, "grad_norm": 5.683448314666748, "learning_rate": 1.5265291086689005e-06, "loss": 0.3011, "step": 25228 }, { "epoch": 82.71803278688525, "grad_norm": 4.211265563964844, "learning_rate": 1.5259652525484803e-06, "loss": 0.4734, "step": 25229 }, { "epoch": 82.72131147540983, "grad_norm": 4.644227981567383, "learning_rate": 1.525401491981615e-06, "loss": 0.3188, "step": 25230 }, { "epoch": 82.72459016393442, "grad_norm": 6.156368255615234, "learning_rate": 1.52483782697466e-06, "loss": 0.479, "step": 25231 }, { "epoch": 82.72786885245901, "grad_norm": 5.768285274505615, "learning_rate": 1.5242742575339696e-06, "loss": 0.3994, "step": 25232 }, { "epoch": 82.73114754098361, "grad_norm": 6.334478378295898, "learning_rate": 1.523710783665897e-06, "loss": 0.3438, "step": 25233 }, { "epoch": 82.7344262295082, "grad_norm": 5.694327354431152, "learning_rate": 1.5231474053768013e-06, "loss": 0.3369, "step": 25234 }, { "epoch": 82.73770491803279, "grad_norm": 4.6637349128723145, "learning_rate": 1.522584122673032e-06, "loss": 0.4548, "step": 25235 }, { "epoch": 82.74098360655738, "grad_norm": 6.316756725311279, "learning_rate": 1.5220209355609383e-06, "loss": 0.3041, "step": 25236 }, { "epoch": 82.74426229508197, "grad_norm": 5.073029041290283, "learning_rate": 1.5214578440468764e-06, "loss": 0.3586, "step": 25237 }, { "epoch": 82.74754098360656, "grad_norm": 4.909664154052734, "learning_rate": 1.5208948481371932e-06, "loss": 0.2375, "step": 25238 }, { "epoch": 82.75081967213114, "grad_norm": 5.265125274658203, "learning_rate": 1.520331947838234e-06, "loss": 0.3997, "step": 25239 }, { "epoch": 82.75409836065573, "grad_norm": 3.997286796569824, "learning_rate": 1.519769143156351e-06, "loss": 0.1982, "step": 25240 }, { "epoch": 82.75737704918033, "grad_norm": 3.902639865875244, "learning_rate": 1.5192064340978874e-06, "loss": 0.1348, "step": 25241 }, { "epoch": 82.76065573770492, "grad_norm": 6.599041938781738, "learning_rate": 1.5186438206691879e-06, "loss": 0.2495, "step": 25242 }, { "epoch": 82.76393442622951, "grad_norm": 4.185823440551758, "learning_rate": 1.5180813028765995e-06, "loss": 0.4192, "step": 25243 }, { "epoch": 82.7672131147541, "grad_norm": 4.933048248291016, "learning_rate": 1.5175188807264628e-06, "loss": 0.4404, "step": 25244 }, { "epoch": 82.77049180327869, "grad_norm": 4.381196975708008, "learning_rate": 1.5169565542251208e-06, "loss": 0.2217, "step": 25245 }, { "epoch": 82.77377049180328, "grad_norm": 9.404826164245605, "learning_rate": 1.5163943233789147e-06, "loss": 0.5548, "step": 25246 }, { "epoch": 82.77704918032786, "grad_norm": 4.413273334503174, "learning_rate": 1.5158321881941796e-06, "loss": 0.2736, "step": 25247 }, { "epoch": 82.78032786885245, "grad_norm": 4.225705146789551, "learning_rate": 1.5152701486772613e-06, "loss": 0.2454, "step": 25248 }, { "epoch": 82.78360655737706, "grad_norm": 6.929009437561035, "learning_rate": 1.514708204834493e-06, "loss": 0.2234, "step": 25249 }, { "epoch": 82.78688524590164, "grad_norm": 7.266702175140381, "learning_rate": 1.5141463566722126e-06, "loss": 0.3563, "step": 25250 }, { "epoch": 82.79016393442623, "grad_norm": 5.6436662673950195, "learning_rate": 1.5135846041967529e-06, "loss": 0.4548, "step": 25251 }, { "epoch": 82.79344262295082, "grad_norm": 5.420115947723389, "learning_rate": 1.513022947414453e-06, "loss": 0.2312, "step": 25252 }, { "epoch": 82.79672131147541, "grad_norm": 4.686547756195068, "learning_rate": 1.5124613863316429e-06, "loss": 0.3553, "step": 25253 }, { "epoch": 82.8, "grad_norm": 5.115795135498047, "learning_rate": 1.511899920954656e-06, "loss": 0.3761, "step": 25254 }, { "epoch": 82.80327868852459, "grad_norm": 6.285800933837891, "learning_rate": 1.5113385512898204e-06, "loss": 0.6149, "step": 25255 }, { "epoch": 82.80655737704917, "grad_norm": 5.741426944732666, "learning_rate": 1.5107772773434715e-06, "loss": 0.2181, "step": 25256 }, { "epoch": 82.80983606557378, "grad_norm": 6.886508941650391, "learning_rate": 1.510216099121935e-06, "loss": 0.5188, "step": 25257 }, { "epoch": 82.81311475409836, "grad_norm": 8.277022361755371, "learning_rate": 1.5096550166315393e-06, "loss": 0.4065, "step": 25258 }, { "epoch": 82.81639344262295, "grad_norm": 4.6855621337890625, "learning_rate": 1.5090940298786105e-06, "loss": 0.2745, "step": 25259 }, { "epoch": 82.81967213114754, "grad_norm": 6.187777519226074, "learning_rate": 1.5085331388694736e-06, "loss": 0.3225, "step": 25260 }, { "epoch": 82.82295081967213, "grad_norm": 6.829029560089111, "learning_rate": 1.5079723436104553e-06, "loss": 0.5019, "step": 25261 }, { "epoch": 82.82622950819672, "grad_norm": 4.3175554275512695, "learning_rate": 1.507411644107879e-06, "loss": 0.4653, "step": 25262 }, { "epoch": 82.8295081967213, "grad_norm": 4.296903610229492, "learning_rate": 1.506851040368067e-06, "loss": 0.2079, "step": 25263 }, { "epoch": 82.8327868852459, "grad_norm": 4.811232089996338, "learning_rate": 1.5062905323973375e-06, "loss": 0.4431, "step": 25264 }, { "epoch": 82.8360655737705, "grad_norm": 5.746710777282715, "learning_rate": 1.505730120202016e-06, "loss": 0.2591, "step": 25265 }, { "epoch": 82.83934426229509, "grad_norm": 5.660295486450195, "learning_rate": 1.5051698037884187e-06, "loss": 0.6673, "step": 25266 }, { "epoch": 82.84262295081967, "grad_norm": 5.9236884117126465, "learning_rate": 1.5046095831628638e-06, "loss": 0.2777, "step": 25267 }, { "epoch": 82.84590163934426, "grad_norm": 4.053123950958252, "learning_rate": 1.5040494583316678e-06, "loss": 0.2279, "step": 25268 }, { "epoch": 82.84918032786885, "grad_norm": 16.074176788330078, "learning_rate": 1.5034894293011492e-06, "loss": 0.3082, "step": 25269 }, { "epoch": 82.85245901639344, "grad_norm": 5.076273441314697, "learning_rate": 1.502929496077622e-06, "loss": 0.4544, "step": 25270 }, { "epoch": 82.85573770491803, "grad_norm": 4.362079620361328, "learning_rate": 1.5023696586673985e-06, "loss": 0.5697, "step": 25271 }, { "epoch": 82.85901639344263, "grad_norm": 5.3401336669921875, "learning_rate": 1.5018099170767942e-06, "loss": 0.3578, "step": 25272 }, { "epoch": 82.86229508196722, "grad_norm": 4.478420257568359, "learning_rate": 1.5012502713121145e-06, "loss": 0.4058, "step": 25273 }, { "epoch": 82.8655737704918, "grad_norm": 5.913166522979736, "learning_rate": 1.5006907213796785e-06, "loss": 0.4001, "step": 25274 }, { "epoch": 82.8688524590164, "grad_norm": 3.6568033695220947, "learning_rate": 1.5001312672857915e-06, "loss": 0.2998, "step": 25275 }, { "epoch": 82.87213114754098, "grad_norm": 4.264822006225586, "learning_rate": 1.499571909036761e-06, "loss": 0.4344, "step": 25276 }, { "epoch": 82.87540983606557, "grad_norm": 19.326791763305664, "learning_rate": 1.4990126466388944e-06, "loss": 0.323, "step": 25277 }, { "epoch": 82.87868852459016, "grad_norm": 6.424370765686035, "learning_rate": 1.4984534800985008e-06, "loss": 0.4917, "step": 25278 }, { "epoch": 82.88196721311475, "grad_norm": 7.33903694152832, "learning_rate": 1.4978944094218828e-06, "loss": 0.5907, "step": 25279 }, { "epoch": 82.88524590163935, "grad_norm": 4.435788631439209, "learning_rate": 1.4973354346153468e-06, "loss": 0.2337, "step": 25280 }, { "epoch": 82.88852459016394, "grad_norm": 4.263982772827148, "learning_rate": 1.496776555685191e-06, "loss": 0.4876, "step": 25281 }, { "epoch": 82.89180327868853, "grad_norm": 5.975342750549316, "learning_rate": 1.4962177726377235e-06, "loss": 0.2201, "step": 25282 }, { "epoch": 82.89508196721312, "grad_norm": 5.216200351715088, "learning_rate": 1.495659085479242e-06, "loss": 0.2603, "step": 25283 }, { "epoch": 82.8983606557377, "grad_norm": 4.300319671630859, "learning_rate": 1.4951004942160463e-06, "loss": 0.3063, "step": 25284 }, { "epoch": 82.90163934426229, "grad_norm": 7.19598388671875, "learning_rate": 1.494541998854434e-06, "loss": 0.5198, "step": 25285 }, { "epoch": 82.90491803278688, "grad_norm": 4.054668426513672, "learning_rate": 1.4939835994007058e-06, "loss": 0.2346, "step": 25286 }, { "epoch": 82.90819672131147, "grad_norm": 3.8276278972625732, "learning_rate": 1.493425295861156e-06, "loss": 0.2634, "step": 25287 }, { "epoch": 82.91147540983607, "grad_norm": 4.456614971160889, "learning_rate": 1.4928670882420816e-06, "loss": 0.4079, "step": 25288 }, { "epoch": 82.91475409836066, "grad_norm": 4.288290023803711, "learning_rate": 1.4923089765497733e-06, "loss": 0.2189, "step": 25289 }, { "epoch": 82.91803278688525, "grad_norm": 6.092576026916504, "learning_rate": 1.49175096079053e-06, "loss": 0.2899, "step": 25290 }, { "epoch": 82.92131147540984, "grad_norm": 5.605123043060303, "learning_rate": 1.4911930409706398e-06, "loss": 0.5734, "step": 25291 }, { "epoch": 82.92459016393443, "grad_norm": 4.907416343688965, "learning_rate": 1.4906352170963933e-06, "loss": 0.2863, "step": 25292 }, { "epoch": 82.92786885245901, "grad_norm": 4.910012722015381, "learning_rate": 1.4900774891740855e-06, "loss": 0.4291, "step": 25293 }, { "epoch": 82.9311475409836, "grad_norm": 7.734903335571289, "learning_rate": 1.489519857210001e-06, "loss": 0.2604, "step": 25294 }, { "epoch": 82.93442622950819, "grad_norm": 4.422749042510986, "learning_rate": 1.4889623212104266e-06, "loss": 0.4092, "step": 25295 }, { "epoch": 82.9377049180328, "grad_norm": 4.101255893707275, "learning_rate": 1.4884048811816532e-06, "loss": 0.4021, "step": 25296 }, { "epoch": 82.94098360655738, "grad_norm": 6.812615394592285, "learning_rate": 1.487847537129965e-06, "loss": 0.2245, "step": 25297 }, { "epoch": 82.94426229508197, "grad_norm": 4.451093673706055, "learning_rate": 1.487290289061647e-06, "loss": 0.4647, "step": 25298 }, { "epoch": 82.94754098360656, "grad_norm": 4.681985378265381, "learning_rate": 1.4867331369829786e-06, "loss": 0.5347, "step": 25299 }, { "epoch": 82.95081967213115, "grad_norm": 6.852635383605957, "learning_rate": 1.486176080900248e-06, "loss": 0.4111, "step": 25300 }, { "epoch": 82.95409836065573, "grad_norm": 5.462512016296387, "learning_rate": 1.4856191208197347e-06, "loss": 0.4028, "step": 25301 }, { "epoch": 82.95737704918032, "grad_norm": 5.43936824798584, "learning_rate": 1.485062256747719e-06, "loss": 0.2589, "step": 25302 }, { "epoch": 82.96065573770491, "grad_norm": 4.320892810821533, "learning_rate": 1.4845054886904764e-06, "loss": 0.4305, "step": 25303 }, { "epoch": 82.96393442622951, "grad_norm": 5.156314373016357, "learning_rate": 1.4839488166542914e-06, "loss": 0.4765, "step": 25304 }, { "epoch": 82.9672131147541, "grad_norm": 5.032286167144775, "learning_rate": 1.483392240645437e-06, "loss": 0.2995, "step": 25305 }, { "epoch": 82.97049180327869, "grad_norm": 3.7645771503448486, "learning_rate": 1.4828357606701915e-06, "loss": 0.2009, "step": 25306 }, { "epoch": 82.97377049180328, "grad_norm": 5.3838701248168945, "learning_rate": 1.4822793767348253e-06, "loss": 0.5144, "step": 25307 }, { "epoch": 82.97704918032787, "grad_norm": 4.573257923126221, "learning_rate": 1.481723088845617e-06, "loss": 0.35, "step": 25308 }, { "epoch": 82.98032786885246, "grad_norm": 5.250638484954834, "learning_rate": 1.4811668970088388e-06, "loss": 0.414, "step": 25309 }, { "epoch": 82.98360655737704, "grad_norm": 5.472866535186768, "learning_rate": 1.4806108012307607e-06, "loss": 0.338, "step": 25310 }, { "epoch": 82.98688524590163, "grad_norm": 5.436770915985107, "learning_rate": 1.480054801517654e-06, "loss": 0.6259, "step": 25311 }, { "epoch": 82.99016393442623, "grad_norm": 5.419683456420898, "learning_rate": 1.4794988978757852e-06, "loss": 0.4983, "step": 25312 }, { "epoch": 82.99344262295082, "grad_norm": 4.036950588226318, "learning_rate": 1.4789430903114277e-06, "loss": 0.2054, "step": 25313 }, { "epoch": 82.99672131147541, "grad_norm": 4.601530075073242, "learning_rate": 1.4783873788308466e-06, "loss": 0.4091, "step": 25314 }, { "epoch": 83.0, "grad_norm": 4.7271246910095215, "learning_rate": 1.4778317634403082e-06, "loss": 0.3826, "step": 25315 }, { "epoch": 83.00327868852459, "grad_norm": 5.404736042022705, "learning_rate": 1.4772762441460764e-06, "loss": 0.2588, "step": 25316 }, { "epoch": 83.00655737704918, "grad_norm": 5.391903877258301, "learning_rate": 1.476720820954417e-06, "loss": 0.2392, "step": 25317 }, { "epoch": 83.00983606557377, "grad_norm": 4.8129496574401855, "learning_rate": 1.4761654938715931e-06, "loss": 0.3963, "step": 25318 }, { "epoch": 83.01311475409837, "grad_norm": 5.930535793304443, "learning_rate": 1.475610262903865e-06, "loss": 0.3697, "step": 25319 }, { "epoch": 83.01639344262296, "grad_norm": 4.459763526916504, "learning_rate": 1.4750551280574931e-06, "loss": 0.3456, "step": 25320 }, { "epoch": 83.01967213114754, "grad_norm": 4.575133323669434, "learning_rate": 1.4745000893387395e-06, "loss": 0.6742, "step": 25321 }, { "epoch": 83.02295081967213, "grad_norm": 5.65281343460083, "learning_rate": 1.4739451467538634e-06, "loss": 0.4662, "step": 25322 }, { "epoch": 83.02622950819672, "grad_norm": 4.871890544891357, "learning_rate": 1.4733903003091189e-06, "loss": 0.3238, "step": 25323 }, { "epoch": 83.02950819672131, "grad_norm": 6.451268196105957, "learning_rate": 1.472835550010765e-06, "loss": 0.2232, "step": 25324 }, { "epoch": 83.0327868852459, "grad_norm": 4.81025505065918, "learning_rate": 1.4722808958650537e-06, "loss": 0.4468, "step": 25325 }, { "epoch": 83.03606557377049, "grad_norm": 5.48989200592041, "learning_rate": 1.4717263378782442e-06, "loss": 0.1525, "step": 25326 }, { "epoch": 83.03934426229509, "grad_norm": 3.9042463302612305, "learning_rate": 1.4711718760565884e-06, "loss": 0.6163, "step": 25327 }, { "epoch": 83.04262295081968, "grad_norm": 5.474270820617676, "learning_rate": 1.4706175104063369e-06, "loss": 0.5383, "step": 25328 }, { "epoch": 83.04590163934427, "grad_norm": 6.4916534423828125, "learning_rate": 1.470063240933739e-06, "loss": 0.4307, "step": 25329 }, { "epoch": 83.04918032786885, "grad_norm": 4.055896759033203, "learning_rate": 1.4695090676450484e-06, "loss": 0.435, "step": 25330 }, { "epoch": 83.05245901639344, "grad_norm": 4.8263373374938965, "learning_rate": 1.468954990546514e-06, "loss": 0.3155, "step": 25331 }, { "epoch": 83.05573770491803, "grad_norm": 5.194811820983887, "learning_rate": 1.4684010096443813e-06, "loss": 0.5288, "step": 25332 }, { "epoch": 83.05901639344262, "grad_norm": 4.546694278717041, "learning_rate": 1.4678471249448955e-06, "loss": 0.3803, "step": 25333 }, { "epoch": 83.0622950819672, "grad_norm": 5.272706508636475, "learning_rate": 1.4672933364543062e-06, "loss": 0.3642, "step": 25334 }, { "epoch": 83.06557377049181, "grad_norm": 6.055649280548096, "learning_rate": 1.4667396441788572e-06, "loss": 0.2942, "step": 25335 }, { "epoch": 83.0688524590164, "grad_norm": 5.079578876495361, "learning_rate": 1.4661860481247913e-06, "loss": 0.3122, "step": 25336 }, { "epoch": 83.07213114754099, "grad_norm": 6.419730186462402, "learning_rate": 1.4656325482983503e-06, "loss": 0.467, "step": 25337 }, { "epoch": 83.07540983606557, "grad_norm": 8.758450508117676, "learning_rate": 1.4650791447057734e-06, "loss": 0.3026, "step": 25338 }, { "epoch": 83.07868852459016, "grad_norm": 5.112332820892334, "learning_rate": 1.4645258373533056e-06, "loss": 0.4329, "step": 25339 }, { "epoch": 83.08196721311475, "grad_norm": 4.869410514831543, "learning_rate": 1.4639726262471842e-06, "loss": 0.2579, "step": 25340 }, { "epoch": 83.08524590163934, "grad_norm": 4.862540245056152, "learning_rate": 1.4634195113936446e-06, "loss": 0.3332, "step": 25341 }, { "epoch": 83.08852459016393, "grad_norm": 5.042625904083252, "learning_rate": 1.4628664927989268e-06, "loss": 0.2918, "step": 25342 }, { "epoch": 83.09180327868853, "grad_norm": 5.307054042816162, "learning_rate": 1.4623135704692658e-06, "loss": 0.5798, "step": 25343 }, { "epoch": 83.09508196721312, "grad_norm": 4.665453910827637, "learning_rate": 1.461760744410895e-06, "loss": 0.3544, "step": 25344 }, { "epoch": 83.09836065573771, "grad_norm": 4.6380391120910645, "learning_rate": 1.4612080146300512e-06, "loss": 0.6198, "step": 25345 }, { "epoch": 83.1016393442623, "grad_norm": 4.259033203125, "learning_rate": 1.460655381132965e-06, "loss": 0.1906, "step": 25346 }, { "epoch": 83.10491803278688, "grad_norm": 4.319397926330566, "learning_rate": 1.4601028439258657e-06, "loss": 0.2395, "step": 25347 }, { "epoch": 83.10819672131147, "grad_norm": 5.479097366333008, "learning_rate": 1.4595504030149888e-06, "loss": 0.4659, "step": 25348 }, { "epoch": 83.11147540983606, "grad_norm": 5.070423126220703, "learning_rate": 1.458998058406561e-06, "loss": 0.5069, "step": 25349 }, { "epoch": 83.11475409836065, "grad_norm": 4.989224910736084, "learning_rate": 1.45844581010681e-06, "loss": 0.1606, "step": 25350 }, { "epoch": 83.11803278688525, "grad_norm": 6.066772937774658, "learning_rate": 1.4578936581219616e-06, "loss": 0.281, "step": 25351 }, { "epoch": 83.12131147540984, "grad_norm": 5.268829822540283, "learning_rate": 1.4573416024582465e-06, "loss": 0.4236, "step": 25352 }, { "epoch": 83.12459016393443, "grad_norm": 5.660773754119873, "learning_rate": 1.4567896431218863e-06, "loss": 0.5368, "step": 25353 }, { "epoch": 83.12786885245902, "grad_norm": 5.434036731719971, "learning_rate": 1.4562377801191053e-06, "loss": 0.3264, "step": 25354 }, { "epoch": 83.1311475409836, "grad_norm": 3.9128506183624268, "learning_rate": 1.4556860134561246e-06, "loss": 0.2373, "step": 25355 }, { "epoch": 83.1344262295082, "grad_norm": 4.5529022216796875, "learning_rate": 1.4551343431391707e-06, "loss": 0.4726, "step": 25356 }, { "epoch": 83.13770491803278, "grad_norm": 5.050436496734619, "learning_rate": 1.4545827691744607e-06, "loss": 0.6175, "step": 25357 }, { "epoch": 83.14098360655737, "grad_norm": 5.430278301239014, "learning_rate": 1.4540312915682154e-06, "loss": 0.2464, "step": 25358 }, { "epoch": 83.14426229508197, "grad_norm": 7.5738606452941895, "learning_rate": 1.4534799103266505e-06, "loss": 0.4343, "step": 25359 }, { "epoch": 83.14754098360656, "grad_norm": 5.20313835144043, "learning_rate": 1.4529286254559882e-06, "loss": 0.4128, "step": 25360 }, { "epoch": 83.15081967213115, "grad_norm": 16.535463333129883, "learning_rate": 1.4523774369624422e-06, "loss": 0.6485, "step": 25361 }, { "epoch": 83.15409836065574, "grad_norm": 6.111084461212158, "learning_rate": 1.4518263448522285e-06, "loss": 0.2438, "step": 25362 }, { "epoch": 83.15737704918033, "grad_norm": 4.522833824157715, "learning_rate": 1.4512753491315601e-06, "loss": 0.3688, "step": 25363 }, { "epoch": 83.16065573770491, "grad_norm": 4.564185619354248, "learning_rate": 1.4507244498066485e-06, "loss": 0.3864, "step": 25364 }, { "epoch": 83.1639344262295, "grad_norm": 3.5961923599243164, "learning_rate": 1.4501736468837101e-06, "loss": 0.2439, "step": 25365 }, { "epoch": 83.1672131147541, "grad_norm": 5.731735706329346, "learning_rate": 1.4496229403689532e-06, "loss": 0.36, "step": 25366 }, { "epoch": 83.1704918032787, "grad_norm": 5.353298664093018, "learning_rate": 1.4490723302685872e-06, "loss": 0.376, "step": 25367 }, { "epoch": 83.17377049180328, "grad_norm": 4.703145503997803, "learning_rate": 1.4485218165888204e-06, "loss": 0.3148, "step": 25368 }, { "epoch": 83.17704918032787, "grad_norm": 4.448554515838623, "learning_rate": 1.4479713993358624e-06, "loss": 0.3031, "step": 25369 }, { "epoch": 83.18032786885246, "grad_norm": 9.047577857971191, "learning_rate": 1.4474210785159205e-06, "loss": 0.4092, "step": 25370 }, { "epoch": 83.18360655737705, "grad_norm": 9.830069541931152, "learning_rate": 1.4468708541351973e-06, "loss": 0.4812, "step": 25371 }, { "epoch": 83.18688524590164, "grad_norm": 4.576605319976807, "learning_rate": 1.4463207261998958e-06, "loss": 0.243, "step": 25372 }, { "epoch": 83.19016393442622, "grad_norm": 7.036559581756592, "learning_rate": 1.4457706947162242e-06, "loss": 0.3268, "step": 25373 }, { "epoch": 83.19344262295083, "grad_norm": 5.519000053405762, "learning_rate": 1.4452207596903822e-06, "loss": 0.4023, "step": 25374 }, { "epoch": 83.19672131147541, "grad_norm": 5.493109226226807, "learning_rate": 1.4446709211285703e-06, "loss": 0.3992, "step": 25375 }, { "epoch": 83.2, "grad_norm": 4.846986293792725, "learning_rate": 1.4441211790369892e-06, "loss": 0.5484, "step": 25376 }, { "epoch": 83.20327868852459, "grad_norm": 12.15268611907959, "learning_rate": 1.443571533421836e-06, "loss": 0.1592, "step": 25377 }, { "epoch": 83.20655737704918, "grad_norm": 4.618360996246338, "learning_rate": 1.4430219842893123e-06, "loss": 0.2788, "step": 25378 }, { "epoch": 83.20983606557377, "grad_norm": 6.894974231719971, "learning_rate": 1.4424725316456133e-06, "loss": 0.2446, "step": 25379 }, { "epoch": 83.21311475409836, "grad_norm": 5.0407490730285645, "learning_rate": 1.441923175496933e-06, "loss": 0.2884, "step": 25380 }, { "epoch": 83.21639344262294, "grad_norm": 5.896736145019531, "learning_rate": 1.4413739158494654e-06, "loss": 0.4095, "step": 25381 }, { "epoch": 83.21967213114755, "grad_norm": 4.7097859382629395, "learning_rate": 1.4408247527094077e-06, "loss": 0.2981, "step": 25382 }, { "epoch": 83.22295081967214, "grad_norm": 8.891352653503418, "learning_rate": 1.4402756860829503e-06, "loss": 0.3281, "step": 25383 }, { "epoch": 83.22622950819672, "grad_norm": 4.585860729217529, "learning_rate": 1.439726715976285e-06, "loss": 0.485, "step": 25384 }, { "epoch": 83.22950819672131, "grad_norm": 4.070919990539551, "learning_rate": 1.4391778423955983e-06, "loss": 0.2726, "step": 25385 }, { "epoch": 83.2327868852459, "grad_norm": 5.018887042999268, "learning_rate": 1.4386290653470859e-06, "loss": 0.3838, "step": 25386 }, { "epoch": 83.23606557377049, "grad_norm": 9.077392578125, "learning_rate": 1.4380803848369306e-06, "loss": 0.5085, "step": 25387 }, { "epoch": 83.23934426229508, "grad_norm": 4.108853340148926, "learning_rate": 1.4375318008713224e-06, "loss": 0.2684, "step": 25388 }, { "epoch": 83.24262295081967, "grad_norm": 5.721518039703369, "learning_rate": 1.4369833134564458e-06, "loss": 0.2275, "step": 25389 }, { "epoch": 83.24590163934427, "grad_norm": 9.227551460266113, "learning_rate": 1.436434922598483e-06, "loss": 0.3905, "step": 25390 }, { "epoch": 83.24918032786886, "grad_norm": 5.258607864379883, "learning_rate": 1.4358866283036233e-06, "loss": 0.4208, "step": 25391 }, { "epoch": 83.25245901639344, "grad_norm": 3.835674524307251, "learning_rate": 1.435338430578046e-06, "loss": 0.2034, "step": 25392 }, { "epoch": 83.25573770491803, "grad_norm": 4.6178765296936035, "learning_rate": 1.434790329427932e-06, "loss": 0.3759, "step": 25393 }, { "epoch": 83.25901639344262, "grad_norm": 5.4126787185668945, "learning_rate": 1.4342423248594616e-06, "loss": 0.3558, "step": 25394 }, { "epoch": 83.26229508196721, "grad_norm": 13.683808326721191, "learning_rate": 1.433694416878817e-06, "loss": 0.4135, "step": 25395 }, { "epoch": 83.2655737704918, "grad_norm": 6.372555255889893, "learning_rate": 1.4331466054921739e-06, "loss": 0.2498, "step": 25396 }, { "epoch": 83.26885245901639, "grad_norm": 5.280959606170654, "learning_rate": 1.4325988907057087e-06, "loss": 0.4705, "step": 25397 }, { "epoch": 83.27213114754099, "grad_norm": 5.006741523742676, "learning_rate": 1.4320512725256007e-06, "loss": 0.3341, "step": 25398 }, { "epoch": 83.27540983606558, "grad_norm": 3.742563486099243, "learning_rate": 1.431503750958023e-06, "loss": 0.2426, "step": 25399 }, { "epoch": 83.27868852459017, "grad_norm": 4.969178199768066, "learning_rate": 1.4309563260091474e-06, "loss": 0.2857, "step": 25400 }, { "epoch": 83.28196721311475, "grad_norm": 5.108144283294678, "learning_rate": 1.430408997685151e-06, "loss": 0.4859, "step": 25401 }, { "epoch": 83.28524590163934, "grad_norm": 4.200159549713135, "learning_rate": 1.4298617659922031e-06, "loss": 0.5684, "step": 25402 }, { "epoch": 83.28852459016393, "grad_norm": 5.030449390411377, "learning_rate": 1.4293146309364726e-06, "loss": 0.5814, "step": 25403 }, { "epoch": 83.29180327868852, "grad_norm": 4.455667018890381, "learning_rate": 1.428767592524133e-06, "loss": 0.3653, "step": 25404 }, { "epoch": 83.29508196721312, "grad_norm": 5.7245259284973145, "learning_rate": 1.4282206507613506e-06, "loss": 0.4251, "step": 25405 }, { "epoch": 83.29836065573771, "grad_norm": 4.604767799377441, "learning_rate": 1.4276738056542928e-06, "loss": 0.284, "step": 25406 }, { "epoch": 83.3016393442623, "grad_norm": 4.603463649749756, "learning_rate": 1.4271270572091234e-06, "loss": 0.2457, "step": 25407 }, { "epoch": 83.30491803278689, "grad_norm": 4.949476718902588, "learning_rate": 1.426580405432013e-06, "loss": 0.4101, "step": 25408 }, { "epoch": 83.30819672131148, "grad_norm": 6.564693927764893, "learning_rate": 1.4260338503291216e-06, "loss": 0.4578, "step": 25409 }, { "epoch": 83.31147540983606, "grad_norm": 38.557071685791016, "learning_rate": 1.4254873919066137e-06, "loss": 0.3489, "step": 25410 }, { "epoch": 83.31475409836065, "grad_norm": 4.8608503341674805, "learning_rate": 1.424941030170649e-06, "loss": 0.3229, "step": 25411 }, { "epoch": 83.31803278688524, "grad_norm": 5.739535331726074, "learning_rate": 1.4243947651273915e-06, "loss": 0.4968, "step": 25412 }, { "epoch": 83.32131147540984, "grad_norm": 4.770941734313965, "learning_rate": 1.4238485967829995e-06, "loss": 0.2948, "step": 25413 }, { "epoch": 83.32459016393443, "grad_norm": 3.4187846183776855, "learning_rate": 1.4233025251436317e-06, "loss": 0.1993, "step": 25414 }, { "epoch": 83.32786885245902, "grad_norm": 5.347785949707031, "learning_rate": 1.4227565502154461e-06, "loss": 0.3116, "step": 25415 }, { "epoch": 83.33114754098361, "grad_norm": 5.511544704437256, "learning_rate": 1.4222106720045959e-06, "loss": 0.3137, "step": 25416 }, { "epoch": 83.3344262295082, "grad_norm": 4.999880790710449, "learning_rate": 1.4216648905172402e-06, "loss": 0.4926, "step": 25417 }, { "epoch": 83.33770491803278, "grad_norm": 5.154884338378906, "learning_rate": 1.4211192057595335e-06, "loss": 0.4479, "step": 25418 }, { "epoch": 83.34098360655737, "grad_norm": 4.678302764892578, "learning_rate": 1.420573617737626e-06, "loss": 0.4006, "step": 25419 }, { "epoch": 83.34426229508196, "grad_norm": 4.661776542663574, "learning_rate": 1.4200281264576709e-06, "loss": 0.3557, "step": 25420 }, { "epoch": 83.34754098360656, "grad_norm": 16.948745727539062, "learning_rate": 1.4194827319258208e-06, "loss": 0.2558, "step": 25421 }, { "epoch": 83.35081967213115, "grad_norm": 4.866575241088867, "learning_rate": 1.4189374341482243e-06, "loss": 0.4087, "step": 25422 }, { "epoch": 83.35409836065574, "grad_norm": 5.188984394073486, "learning_rate": 1.4183922331310306e-06, "loss": 0.3109, "step": 25423 }, { "epoch": 83.35737704918033, "grad_norm": 3.867412567138672, "learning_rate": 1.4178471288803852e-06, "loss": 0.1694, "step": 25424 }, { "epoch": 83.36065573770492, "grad_norm": 5.857673645019531, "learning_rate": 1.4173021214024384e-06, "loss": 0.4236, "step": 25425 }, { "epoch": 83.3639344262295, "grad_norm": 4.241668701171875, "learning_rate": 1.4167572107033346e-06, "loss": 0.5255, "step": 25426 }, { "epoch": 83.3672131147541, "grad_norm": 15.51539421081543, "learning_rate": 1.4162123967892161e-06, "loss": 0.4665, "step": 25427 }, { "epoch": 83.37049180327868, "grad_norm": 5.922938823699951, "learning_rate": 1.4156676796662293e-06, "loss": 0.3168, "step": 25428 }, { "epoch": 83.37377049180328, "grad_norm": 6.456819534301758, "learning_rate": 1.4151230593405118e-06, "loss": 0.3007, "step": 25429 }, { "epoch": 83.37704918032787, "grad_norm": 5.981853485107422, "learning_rate": 1.4145785358182107e-06, "loss": 0.5006, "step": 25430 }, { "epoch": 83.38032786885246, "grad_norm": 4.774538516998291, "learning_rate": 1.414034109105462e-06, "loss": 0.1944, "step": 25431 }, { "epoch": 83.38360655737705, "grad_norm": 4.1283278465271, "learning_rate": 1.4134897792084067e-06, "loss": 0.388, "step": 25432 }, { "epoch": 83.38688524590164, "grad_norm": 5.912039279937744, "learning_rate": 1.4129455461331797e-06, "loss": 0.3371, "step": 25433 }, { "epoch": 83.39016393442623, "grad_norm": 5.085849761962891, "learning_rate": 1.412401409885923e-06, "loss": 0.4547, "step": 25434 }, { "epoch": 83.39344262295081, "grad_norm": 7.874063014984131, "learning_rate": 1.4118573704727678e-06, "loss": 0.2744, "step": 25435 }, { "epoch": 83.3967213114754, "grad_norm": 6.393005847930908, "learning_rate": 1.4113134278998508e-06, "loss": 0.4776, "step": 25436 }, { "epoch": 83.4, "grad_norm": 5.7953033447265625, "learning_rate": 1.4107695821733026e-06, "loss": 0.4193, "step": 25437 }, { "epoch": 83.4032786885246, "grad_norm": 7.231961250305176, "learning_rate": 1.4102258332992602e-06, "loss": 0.5018, "step": 25438 }, { "epoch": 83.40655737704918, "grad_norm": 4.957704067230225, "learning_rate": 1.4096821812838525e-06, "loss": 0.3777, "step": 25439 }, { "epoch": 83.40983606557377, "grad_norm": 5.535904884338379, "learning_rate": 1.4091386261332107e-06, "loss": 0.4656, "step": 25440 }, { "epoch": 83.41311475409836, "grad_norm": 23.950786590576172, "learning_rate": 1.4085951678534627e-06, "loss": 0.5183, "step": 25441 }, { "epoch": 83.41639344262295, "grad_norm": 3.8734545707702637, "learning_rate": 1.4080518064507342e-06, "loss": 0.3026, "step": 25442 }, { "epoch": 83.41967213114754, "grad_norm": 4.643672466278076, "learning_rate": 1.4075085419311573e-06, "loss": 0.394, "step": 25443 }, { "epoch": 83.42295081967212, "grad_norm": 5.118135929107666, "learning_rate": 1.406965374300856e-06, "loss": 0.4071, "step": 25444 }, { "epoch": 83.42622950819673, "grad_norm": 5.526844024658203, "learning_rate": 1.406422303565954e-06, "loss": 0.3042, "step": 25445 }, { "epoch": 83.42950819672132, "grad_norm": 5.654446125030518, "learning_rate": 1.4058793297325745e-06, "loss": 0.4467, "step": 25446 }, { "epoch": 83.4327868852459, "grad_norm": 5.041749477386475, "learning_rate": 1.4053364528068425e-06, "loss": 0.4245, "step": 25447 }, { "epoch": 83.43606557377049, "grad_norm": 4.837209224700928, "learning_rate": 1.4047936727948786e-06, "loss": 0.4703, "step": 25448 }, { "epoch": 83.43934426229508, "grad_norm": 4.550757884979248, "learning_rate": 1.4042509897028e-06, "loss": 0.3115, "step": 25449 }, { "epoch": 83.44262295081967, "grad_norm": 5.4627532958984375, "learning_rate": 1.403708403536731e-06, "loss": 0.247, "step": 25450 }, { "epoch": 83.44590163934426, "grad_norm": 4.284688949584961, "learning_rate": 1.4031659143027886e-06, "loss": 0.4024, "step": 25451 }, { "epoch": 83.44918032786886, "grad_norm": 8.077570915222168, "learning_rate": 1.4026235220070883e-06, "loss": 0.4456, "step": 25452 }, { "epoch": 83.45245901639345, "grad_norm": 4.5553083419799805, "learning_rate": 1.402081226655745e-06, "loss": 0.4287, "step": 25453 }, { "epoch": 83.45573770491804, "grad_norm": 6.116387844085693, "learning_rate": 1.4015390282548779e-06, "loss": 0.2907, "step": 25454 }, { "epoch": 83.45901639344262, "grad_norm": 3.8831396102905273, "learning_rate": 1.4009969268105973e-06, "loss": 0.195, "step": 25455 }, { "epoch": 83.46229508196721, "grad_norm": 7.215624809265137, "learning_rate": 1.4004549223290165e-06, "loss": 0.4494, "step": 25456 }, { "epoch": 83.4655737704918, "grad_norm": 6.4209136962890625, "learning_rate": 1.3999130148162487e-06, "loss": 0.3865, "step": 25457 }, { "epoch": 83.46885245901639, "grad_norm": 6.319340229034424, "learning_rate": 1.3993712042784035e-06, "loss": 0.3775, "step": 25458 }, { "epoch": 83.47213114754098, "grad_norm": 4.362370491027832, "learning_rate": 1.3988294907215883e-06, "loss": 0.3067, "step": 25459 }, { "epoch": 83.47540983606558, "grad_norm": 4.623932361602783, "learning_rate": 1.3982878741519167e-06, "loss": 0.2019, "step": 25460 }, { "epoch": 83.47868852459017, "grad_norm": 6.783864498138428, "learning_rate": 1.3977463545754922e-06, "loss": 0.4424, "step": 25461 }, { "epoch": 83.48196721311476, "grad_norm": 5.261396408081055, "learning_rate": 1.3972049319984216e-06, "loss": 0.2774, "step": 25462 }, { "epoch": 83.48524590163935, "grad_norm": 5.936288833618164, "learning_rate": 1.3966636064268068e-06, "loss": 0.4239, "step": 25463 }, { "epoch": 83.48852459016393, "grad_norm": 4.888272762298584, "learning_rate": 1.3961223778667587e-06, "loss": 0.2154, "step": 25464 }, { "epoch": 83.49180327868852, "grad_norm": 5.131629467010498, "learning_rate": 1.3955812463243767e-06, "loss": 0.5333, "step": 25465 }, { "epoch": 83.49508196721311, "grad_norm": 5.45512580871582, "learning_rate": 1.3950402118057615e-06, "loss": 0.3347, "step": 25466 }, { "epoch": 83.4983606557377, "grad_norm": 4.371094226837158, "learning_rate": 1.3944992743170149e-06, "loss": 0.3506, "step": 25467 }, { "epoch": 83.5016393442623, "grad_norm": 4.266334533691406, "learning_rate": 1.3939584338642353e-06, "loss": 0.4622, "step": 25468 }, { "epoch": 83.50491803278689, "grad_norm": 4.621413230895996, "learning_rate": 1.3934176904535235e-06, "loss": 0.2448, "step": 25469 }, { "epoch": 83.50819672131148, "grad_norm": 4.240844249725342, "learning_rate": 1.3928770440909766e-06, "loss": 0.3819, "step": 25470 }, { "epoch": 83.51147540983607, "grad_norm": 6.884177207946777, "learning_rate": 1.39233649478269e-06, "loss": 0.4494, "step": 25471 }, { "epoch": 83.51475409836065, "grad_norm": 3.44437575340271, "learning_rate": 1.3917960425347565e-06, "loss": 0.3981, "step": 25472 }, { "epoch": 83.51803278688524, "grad_norm": 6.580263614654541, "learning_rate": 1.3912556873532756e-06, "loss": 0.2359, "step": 25473 }, { "epoch": 83.52131147540983, "grad_norm": 5.73331356048584, "learning_rate": 1.3907154292443381e-06, "loss": 0.3139, "step": 25474 }, { "epoch": 83.52459016393442, "grad_norm": 4.1294097900390625, "learning_rate": 1.3901752682140345e-06, "loss": 0.4747, "step": 25475 }, { "epoch": 83.52786885245902, "grad_norm": 5.3252458572387695, "learning_rate": 1.3896352042684546e-06, "loss": 0.3902, "step": 25476 }, { "epoch": 83.53114754098361, "grad_norm": 6.3436598777771, "learning_rate": 1.3890952374136934e-06, "loss": 0.4013, "step": 25477 }, { "epoch": 83.5344262295082, "grad_norm": 4.292317867279053, "learning_rate": 1.3885553676558361e-06, "loss": 0.1506, "step": 25478 }, { "epoch": 83.53770491803279, "grad_norm": 5.700450420379639, "learning_rate": 1.3880155950009699e-06, "loss": 0.3255, "step": 25479 }, { "epoch": 83.54098360655738, "grad_norm": 6.79884672164917, "learning_rate": 1.3874759194551835e-06, "loss": 0.6194, "step": 25480 }, { "epoch": 83.54426229508196, "grad_norm": 8.557575225830078, "learning_rate": 1.3869363410245574e-06, "loss": 0.4078, "step": 25481 }, { "epoch": 83.54754098360655, "grad_norm": 5.441761493682861, "learning_rate": 1.3863968597151822e-06, "loss": 0.4101, "step": 25482 }, { "epoch": 83.55081967213114, "grad_norm": 3.722407579421997, "learning_rate": 1.3858574755331388e-06, "loss": 0.2332, "step": 25483 }, { "epoch": 83.55409836065574, "grad_norm": 5.382960796356201, "learning_rate": 1.385318188484508e-06, "loss": 0.192, "step": 25484 }, { "epoch": 83.55737704918033, "grad_norm": 6.182711124420166, "learning_rate": 1.3847789985753701e-06, "loss": 0.5009, "step": 25485 }, { "epoch": 83.56065573770492, "grad_norm": 4.1076273918151855, "learning_rate": 1.3842399058118083e-06, "loss": 0.4938, "step": 25486 }, { "epoch": 83.56393442622951, "grad_norm": 3.3724894523620605, "learning_rate": 1.3837009101998999e-06, "loss": 0.205, "step": 25487 }, { "epoch": 83.5672131147541, "grad_norm": 5.016980171203613, "learning_rate": 1.3831620117457222e-06, "loss": 0.4493, "step": 25488 }, { "epoch": 83.57049180327868, "grad_norm": 3.5990562438964844, "learning_rate": 1.3826232104553505e-06, "loss": 0.1404, "step": 25489 }, { "epoch": 83.57377049180327, "grad_norm": 4.72454309463501, "learning_rate": 1.382084506334863e-06, "loss": 0.2088, "step": 25490 }, { "epoch": 83.57704918032788, "grad_norm": 4.233213424682617, "learning_rate": 1.3815458993903341e-06, "loss": 0.3035, "step": 25491 }, { "epoch": 83.58032786885246, "grad_norm": 4.438563823699951, "learning_rate": 1.3810073896278352e-06, "loss": 0.3349, "step": 25492 }, { "epoch": 83.58360655737705, "grad_norm": 5.504570484161377, "learning_rate": 1.3804689770534408e-06, "loss": 0.42, "step": 25493 }, { "epoch": 83.58688524590164, "grad_norm": 5.429894924163818, "learning_rate": 1.3799306616732178e-06, "loss": 0.3391, "step": 25494 }, { "epoch": 83.59016393442623, "grad_norm": 3.654841661453247, "learning_rate": 1.3793924434932416e-06, "loss": 0.1513, "step": 25495 }, { "epoch": 83.59344262295082, "grad_norm": 5.211432456970215, "learning_rate": 1.3788543225195782e-06, "loss": 0.3621, "step": 25496 }, { "epoch": 83.5967213114754, "grad_norm": 9.04981803894043, "learning_rate": 1.3783162987582965e-06, "loss": 0.5743, "step": 25497 }, { "epoch": 83.6, "grad_norm": 5.63754415512085, "learning_rate": 1.3777783722154603e-06, "loss": 0.2837, "step": 25498 }, { "epoch": 83.6032786885246, "grad_norm": 5.762155055999756, "learning_rate": 1.3772405428971403e-06, "loss": 0.5517, "step": 25499 }, { "epoch": 83.60655737704919, "grad_norm": 5.340451240539551, "learning_rate": 1.3767028108093994e-06, "loss": 0.3715, "step": 25500 }, { "epoch": 83.60983606557377, "grad_norm": 4.1809401512146, "learning_rate": 1.3761651759582994e-06, "loss": 0.3733, "step": 25501 }, { "epoch": 83.61311475409836, "grad_norm": 4.1346025466918945, "learning_rate": 1.3756276383499012e-06, "loss": 0.3063, "step": 25502 }, { "epoch": 83.61639344262295, "grad_norm": 5.011256217956543, "learning_rate": 1.375090197990271e-06, "loss": 0.4241, "step": 25503 }, { "epoch": 83.61967213114754, "grad_norm": 4.527877330780029, "learning_rate": 1.3745528548854658e-06, "loss": 0.3902, "step": 25504 }, { "epoch": 83.62295081967213, "grad_norm": 5.1001877784729, "learning_rate": 1.3740156090415447e-06, "loss": 0.5512, "step": 25505 }, { "epoch": 83.62622950819672, "grad_norm": 6.324408531188965, "learning_rate": 1.3734784604645667e-06, "loss": 0.5545, "step": 25506 }, { "epoch": 83.62950819672132, "grad_norm": 4.76460599899292, "learning_rate": 1.3729414091605898e-06, "loss": 0.3008, "step": 25507 }, { "epoch": 83.6327868852459, "grad_norm": 4.5358052253723145, "learning_rate": 1.3724044551356662e-06, "loss": 0.2704, "step": 25508 }, { "epoch": 83.6360655737705, "grad_norm": 5.248152732849121, "learning_rate": 1.371867598395854e-06, "loss": 0.3772, "step": 25509 }, { "epoch": 83.63934426229508, "grad_norm": 6.764976978302002, "learning_rate": 1.3713308389472068e-06, "loss": 0.4126, "step": 25510 }, { "epoch": 83.64262295081967, "grad_norm": 6.305974960327148, "learning_rate": 1.3707941767957734e-06, "loss": 0.3002, "step": 25511 }, { "epoch": 83.64590163934426, "grad_norm": 6.1723103523254395, "learning_rate": 1.3702576119476098e-06, "loss": 0.3933, "step": 25512 }, { "epoch": 83.64918032786885, "grad_norm": 7.565112590789795, "learning_rate": 1.3697211444087644e-06, "loss": 0.2444, "step": 25513 }, { "epoch": 83.65245901639344, "grad_norm": 5.1198554039001465, "learning_rate": 1.369184774185286e-06, "loss": 0.5509, "step": 25514 }, { "epoch": 83.65573770491804, "grad_norm": 4.2968430519104, "learning_rate": 1.3686485012832207e-06, "loss": 0.3802, "step": 25515 }, { "epoch": 83.65901639344263, "grad_norm": 4.480171203613281, "learning_rate": 1.3681123257086204e-06, "loss": 0.3801, "step": 25516 }, { "epoch": 83.66229508196722, "grad_norm": 5.307529449462891, "learning_rate": 1.3675762474675291e-06, "loss": 0.3887, "step": 25517 }, { "epoch": 83.6655737704918, "grad_norm": 7.026603698730469, "learning_rate": 1.367040266565991e-06, "loss": 0.4276, "step": 25518 }, { "epoch": 83.66885245901639, "grad_norm": 5.485440254211426, "learning_rate": 1.3665043830100489e-06, "loss": 0.4926, "step": 25519 }, { "epoch": 83.67213114754098, "grad_norm": 7.041551113128662, "learning_rate": 1.3659685968057457e-06, "loss": 0.4519, "step": 25520 }, { "epoch": 83.67540983606557, "grad_norm": 4.993297100067139, "learning_rate": 1.3654329079591243e-06, "loss": 0.3221, "step": 25521 }, { "epoch": 83.67868852459016, "grad_norm": 4.787394046783447, "learning_rate": 1.364897316476226e-06, "loss": 0.4051, "step": 25522 }, { "epoch": 83.68196721311476, "grad_norm": 4.0369768142700195, "learning_rate": 1.3643618223630883e-06, "loss": 0.1606, "step": 25523 }, { "epoch": 83.68524590163935, "grad_norm": 5.33909273147583, "learning_rate": 1.3638264256257473e-06, "loss": 0.2879, "step": 25524 }, { "epoch": 83.68852459016394, "grad_norm": 5.520630359649658, "learning_rate": 1.3632911262702454e-06, "loss": 0.2834, "step": 25525 }, { "epoch": 83.69180327868852, "grad_norm": 5.08116340637207, "learning_rate": 1.3627559243026155e-06, "loss": 0.4059, "step": 25526 }, { "epoch": 83.69508196721311, "grad_norm": 12.214090347290039, "learning_rate": 1.3622208197288933e-06, "loss": 0.365, "step": 25527 }, { "epoch": 83.6983606557377, "grad_norm": 4.914585113525391, "learning_rate": 1.3616858125551092e-06, "loss": 0.3382, "step": 25528 }, { "epoch": 83.70163934426229, "grad_norm": 5.887566089630127, "learning_rate": 1.3611509027873027e-06, "loss": 0.6878, "step": 25529 }, { "epoch": 83.70491803278688, "grad_norm": 12.35983657836914, "learning_rate": 1.3606160904315013e-06, "loss": 0.5724, "step": 25530 }, { "epoch": 83.70819672131148, "grad_norm": 4.1742377281188965, "learning_rate": 1.360081375493737e-06, "loss": 0.3652, "step": 25531 }, { "epoch": 83.71147540983607, "grad_norm": 8.807738304138184, "learning_rate": 1.359546757980037e-06, "loss": 0.3614, "step": 25532 }, { "epoch": 83.71475409836066, "grad_norm": 5.583738803863525, "learning_rate": 1.3590122378964299e-06, "loss": 0.6054, "step": 25533 }, { "epoch": 83.71803278688525, "grad_norm": 4.35371732711792, "learning_rate": 1.3584778152489465e-06, "loss": 0.4799, "step": 25534 }, { "epoch": 83.72131147540983, "grad_norm": 4.802356243133545, "learning_rate": 1.3579434900436105e-06, "loss": 0.3379, "step": 25535 }, { "epoch": 83.72459016393442, "grad_norm": 5.009592533111572, "learning_rate": 1.3574092622864465e-06, "loss": 0.3626, "step": 25536 }, { "epoch": 83.72786885245901, "grad_norm": 3.8369979858398438, "learning_rate": 1.3568751319834783e-06, "loss": 0.5747, "step": 25537 }, { "epoch": 83.73114754098361, "grad_norm": 6.537738800048828, "learning_rate": 1.35634109914073e-06, "loss": 0.3578, "step": 25538 }, { "epoch": 83.7344262295082, "grad_norm": 5.116487503051758, "learning_rate": 1.3558071637642245e-06, "loss": 0.4188, "step": 25539 }, { "epoch": 83.73770491803279, "grad_norm": 4.517775535583496, "learning_rate": 1.3552733258599804e-06, "loss": 0.2215, "step": 25540 }, { "epoch": 83.74098360655738, "grad_norm": 4.542948246002197, "learning_rate": 1.354739585434015e-06, "loss": 0.225, "step": 25541 }, { "epoch": 83.74426229508197, "grad_norm": 5.176174163818359, "learning_rate": 1.3542059424923526e-06, "loss": 0.1956, "step": 25542 }, { "epoch": 83.74754098360656, "grad_norm": 4.263942718505859, "learning_rate": 1.353672397041007e-06, "loss": 0.2289, "step": 25543 }, { "epoch": 83.75081967213114, "grad_norm": 3.823838472366333, "learning_rate": 1.3531389490859958e-06, "loss": 0.479, "step": 25544 }, { "epoch": 83.75409836065573, "grad_norm": 4.705795764923096, "learning_rate": 1.352605598633333e-06, "loss": 0.1679, "step": 25545 }, { "epoch": 83.75737704918033, "grad_norm": 4.819622993469238, "learning_rate": 1.3520723456890305e-06, "loss": 0.3747, "step": 25546 }, { "epoch": 83.76065573770492, "grad_norm": 4.553578853607178, "learning_rate": 1.351539190259107e-06, "loss": 0.3186, "step": 25547 }, { "epoch": 83.76393442622951, "grad_norm": 4.787629127502441, "learning_rate": 1.3510061323495704e-06, "loss": 0.2509, "step": 25548 }, { "epoch": 83.7672131147541, "grad_norm": 5.572867393493652, "learning_rate": 1.3504731719664333e-06, "loss": 0.4763, "step": 25549 }, { "epoch": 83.77049180327869, "grad_norm": 4.804939270019531, "learning_rate": 1.3499403091157015e-06, "loss": 0.3568, "step": 25550 }, { "epoch": 83.77377049180328, "grad_norm": 4.542148113250732, "learning_rate": 1.3494075438033882e-06, "loss": 0.3218, "step": 25551 }, { "epoch": 83.77704918032786, "grad_norm": 4.036242485046387, "learning_rate": 1.3488748760355009e-06, "loss": 0.227, "step": 25552 }, { "epoch": 83.78032786885245, "grad_norm": 4.026574611663818, "learning_rate": 1.3483423058180423e-06, "loss": 0.2376, "step": 25553 }, { "epoch": 83.78360655737706, "grad_norm": 4.208667755126953, "learning_rate": 1.3478098331570188e-06, "loss": 0.3314, "step": 25554 }, { "epoch": 83.78688524590164, "grad_norm": 4.441809177398682, "learning_rate": 1.347277458058437e-06, "loss": 0.3163, "step": 25555 }, { "epoch": 83.79016393442623, "grad_norm": 4.3533806800842285, "learning_rate": 1.3467451805282995e-06, "loss": 0.6099, "step": 25556 }, { "epoch": 83.79344262295082, "grad_norm": 3.47062349319458, "learning_rate": 1.346213000572606e-06, "loss": 0.3283, "step": 25557 }, { "epoch": 83.79672131147541, "grad_norm": 4.776370525360107, "learning_rate": 1.3456809181973573e-06, "loss": 0.2548, "step": 25558 }, { "epoch": 83.8, "grad_norm": 3.9881136417388916, "learning_rate": 1.3451489334085555e-06, "loss": 0.3919, "step": 25559 }, { "epoch": 83.80327868852459, "grad_norm": 4.543819904327393, "learning_rate": 1.3446170462121987e-06, "loss": 0.2725, "step": 25560 }, { "epoch": 83.80655737704917, "grad_norm": 3.9647490978240967, "learning_rate": 1.3440852566142825e-06, "loss": 0.3842, "step": 25561 }, { "epoch": 83.80983606557378, "grad_norm": 4.330167293548584, "learning_rate": 1.3435535646208076e-06, "loss": 0.4009, "step": 25562 }, { "epoch": 83.81311475409836, "grad_norm": 5.83281135559082, "learning_rate": 1.3430219702377655e-06, "loss": 0.3821, "step": 25563 }, { "epoch": 83.81639344262295, "grad_norm": 6.33535099029541, "learning_rate": 1.3424904734711497e-06, "loss": 0.4661, "step": 25564 }, { "epoch": 83.81967213114754, "grad_norm": 5.029538631439209, "learning_rate": 1.3419590743269573e-06, "loss": 0.4906, "step": 25565 }, { "epoch": 83.82295081967213, "grad_norm": 5.171255111694336, "learning_rate": 1.341427772811179e-06, "loss": 0.4736, "step": 25566 }, { "epoch": 83.82622950819672, "grad_norm": 4.768851280212402, "learning_rate": 1.3408965689298037e-06, "loss": 0.2056, "step": 25567 }, { "epoch": 83.8295081967213, "grad_norm": 4.033571720123291, "learning_rate": 1.3403654626888241e-06, "loss": 0.2051, "step": 25568 }, { "epoch": 83.8327868852459, "grad_norm": 4.646294593811035, "learning_rate": 1.3398344540942277e-06, "loss": 0.2668, "step": 25569 }, { "epoch": 83.8360655737705, "grad_norm": 4.328646659851074, "learning_rate": 1.3393035431520018e-06, "loss": 0.4107, "step": 25570 }, { "epoch": 83.83934426229509, "grad_norm": 4.761571407318115, "learning_rate": 1.338772729868134e-06, "loss": 0.2053, "step": 25571 }, { "epoch": 83.84262295081967, "grad_norm": 5.387983798980713, "learning_rate": 1.3382420142486064e-06, "loss": 0.3607, "step": 25572 }, { "epoch": 83.84590163934426, "grad_norm": 4.5813517570495605, "learning_rate": 1.3377113962994081e-06, "loss": 0.3338, "step": 25573 }, { "epoch": 83.84918032786885, "grad_norm": 8.373992919921875, "learning_rate": 1.3371808760265214e-06, "loss": 0.5131, "step": 25574 }, { "epoch": 83.85245901639344, "grad_norm": 4.201138496398926, "learning_rate": 1.336650453435926e-06, "loss": 0.2474, "step": 25575 }, { "epoch": 83.85573770491803, "grad_norm": 9.73258113861084, "learning_rate": 1.3361201285336034e-06, "loss": 0.5482, "step": 25576 }, { "epoch": 83.85901639344263, "grad_norm": 3.825058698654175, "learning_rate": 1.3355899013255358e-06, "loss": 0.3156, "step": 25577 }, { "epoch": 83.86229508196722, "grad_norm": 3.9481353759765625, "learning_rate": 1.3350597718177017e-06, "loss": 0.3158, "step": 25578 }, { "epoch": 83.8655737704918, "grad_norm": 4.298294544219971, "learning_rate": 1.3345297400160773e-06, "loss": 0.3293, "step": 25579 }, { "epoch": 83.8688524590164, "grad_norm": 4.8024516105651855, "learning_rate": 1.3339998059266402e-06, "loss": 0.3199, "step": 25580 }, { "epoch": 83.87213114754098, "grad_norm": 5.0996904373168945, "learning_rate": 1.3334699695553633e-06, "loss": 0.4543, "step": 25581 }, { "epoch": 83.87540983606557, "grad_norm": 4.570189476013184, "learning_rate": 1.3329402309082252e-06, "loss": 0.5733, "step": 25582 }, { "epoch": 83.87868852459016, "grad_norm": 6.181427478790283, "learning_rate": 1.3324105899911977e-06, "loss": 0.4319, "step": 25583 }, { "epoch": 83.88196721311475, "grad_norm": 5.413463115692139, "learning_rate": 1.3318810468102528e-06, "loss": 0.2898, "step": 25584 }, { "epoch": 83.88524590163935, "grad_norm": 5.165477752685547, "learning_rate": 1.3313516013713602e-06, "loss": 0.2074, "step": 25585 }, { "epoch": 83.88852459016394, "grad_norm": 5.184732913970947, "learning_rate": 1.3308222536804927e-06, "loss": 0.4194, "step": 25586 }, { "epoch": 83.89180327868853, "grad_norm": 5.625340461730957, "learning_rate": 1.330293003743619e-06, "loss": 0.339, "step": 25587 }, { "epoch": 83.89508196721312, "grad_norm": 4.945412635803223, "learning_rate": 1.3297638515667055e-06, "loss": 0.4055, "step": 25588 }, { "epoch": 83.8983606557377, "grad_norm": 4.588108539581299, "learning_rate": 1.3292347971557162e-06, "loss": 0.5802, "step": 25589 }, { "epoch": 83.90163934426229, "grad_norm": 4.620436191558838, "learning_rate": 1.328705840516623e-06, "loss": 0.2236, "step": 25590 }, { "epoch": 83.90491803278688, "grad_norm": 4.975064277648926, "learning_rate": 1.328176981655388e-06, "loss": 0.4797, "step": 25591 }, { "epoch": 83.90819672131147, "grad_norm": 3.335923671722412, "learning_rate": 1.3276482205779727e-06, "loss": 0.4341, "step": 25592 }, { "epoch": 83.91147540983607, "grad_norm": 4.401823043823242, "learning_rate": 1.3271195572903418e-06, "loss": 0.557, "step": 25593 }, { "epoch": 83.91475409836066, "grad_norm": 5.202513694763184, "learning_rate": 1.326590991798452e-06, "loss": 0.4086, "step": 25594 }, { "epoch": 83.91803278688525, "grad_norm": 18.410511016845703, "learning_rate": 1.3260625241082704e-06, "loss": 0.4187, "step": 25595 }, { "epoch": 83.92131147540984, "grad_norm": 5.648209095001221, "learning_rate": 1.3255341542257515e-06, "loss": 0.2853, "step": 25596 }, { "epoch": 83.92459016393443, "grad_norm": 6.220576763153076, "learning_rate": 1.3250058821568546e-06, "loss": 0.1891, "step": 25597 }, { "epoch": 83.92786885245901, "grad_norm": 5.05501127243042, "learning_rate": 1.3244777079075332e-06, "loss": 0.3385, "step": 25598 }, { "epoch": 83.9311475409836, "grad_norm": 5.161456108093262, "learning_rate": 1.3239496314837486e-06, "loss": 0.2259, "step": 25599 }, { "epoch": 83.93442622950819, "grad_norm": 2.915011167526245, "learning_rate": 1.3234216528914534e-06, "loss": 0.2993, "step": 25600 }, { "epoch": 83.9377049180328, "grad_norm": 5.493282318115234, "learning_rate": 1.3228937721365997e-06, "loss": 0.2298, "step": 25601 }, { "epoch": 83.94098360655738, "grad_norm": 9.95486831665039, "learning_rate": 1.3223659892251384e-06, "loss": 0.2754, "step": 25602 }, { "epoch": 83.94426229508197, "grad_norm": 4.764378547668457, "learning_rate": 1.3218383041630257e-06, "loss": 0.3892, "step": 25603 }, { "epoch": 83.94754098360656, "grad_norm": 5.989047050476074, "learning_rate": 1.321310716956209e-06, "loss": 0.5664, "step": 25604 }, { "epoch": 83.95081967213115, "grad_norm": 4.655505180358887, "learning_rate": 1.320783227610637e-06, "loss": 0.6151, "step": 25605 }, { "epoch": 83.95409836065573, "grad_norm": 4.066325664520264, "learning_rate": 1.3202558361322593e-06, "loss": 0.3443, "step": 25606 }, { "epoch": 83.95737704918032, "grad_norm": 4.823190689086914, "learning_rate": 1.319728542527019e-06, "loss": 0.3189, "step": 25607 }, { "epoch": 83.96065573770491, "grad_norm": 11.440642356872559, "learning_rate": 1.3192013468008659e-06, "loss": 0.4739, "step": 25608 }, { "epoch": 83.96393442622951, "grad_norm": 4.6424102783203125, "learning_rate": 1.3186742489597448e-06, "loss": 0.367, "step": 25609 }, { "epoch": 83.9672131147541, "grad_norm": 9.545269966125488, "learning_rate": 1.3181472490095949e-06, "loss": 0.5068, "step": 25610 }, { "epoch": 83.97049180327869, "grad_norm": 7.1749958992004395, "learning_rate": 1.3176203469563641e-06, "loss": 0.4674, "step": 25611 }, { "epoch": 83.97377049180328, "grad_norm": 4.573359489440918, "learning_rate": 1.3170935428059905e-06, "loss": 0.4, "step": 25612 }, { "epoch": 83.97704918032787, "grad_norm": 4.783077239990234, "learning_rate": 1.3165668365644136e-06, "loss": 0.3903, "step": 25613 }, { "epoch": 83.98032786885246, "grad_norm": 4.3059515953063965, "learning_rate": 1.3160402282375762e-06, "loss": 0.3833, "step": 25614 }, { "epoch": 83.98360655737704, "grad_norm": 6.047746181488037, "learning_rate": 1.3155137178314148e-06, "loss": 0.4963, "step": 25615 }, { "epoch": 83.98688524590163, "grad_norm": 6.686910629272461, "learning_rate": 1.3149873053518659e-06, "loss": 0.3902, "step": 25616 }, { "epoch": 83.99016393442623, "grad_norm": 4.968437671661377, "learning_rate": 1.3144609908048622e-06, "loss": 0.4256, "step": 25617 }, { "epoch": 83.99344262295082, "grad_norm": 5.585901737213135, "learning_rate": 1.313934774196345e-06, "loss": 0.305, "step": 25618 }, { "epoch": 83.99672131147541, "grad_norm": 4.072494029998779, "learning_rate": 1.3134086555322435e-06, "loss": 0.222, "step": 25619 }, { "epoch": 84.0, "grad_norm": 4.995166778564453, "learning_rate": 1.3128826348184886e-06, "loss": 0.2503, "step": 25620 }, { "epoch": 84.00327868852459, "grad_norm": 11.031171798706055, "learning_rate": 1.312356712061017e-06, "loss": 0.3758, "step": 25621 }, { "epoch": 84.00655737704918, "grad_norm": 4.100174427032471, "learning_rate": 1.311830887265757e-06, "loss": 0.4004, "step": 25622 }, { "epoch": 84.00983606557377, "grad_norm": 4.977358341217041, "learning_rate": 1.3113051604386361e-06, "loss": 0.409, "step": 25623 }, { "epoch": 84.01311475409837, "grad_norm": 5.557905197143555, "learning_rate": 1.310779531585582e-06, "loss": 0.3926, "step": 25624 }, { "epoch": 84.01639344262296, "grad_norm": 3.992457151412964, "learning_rate": 1.3102540007125254e-06, "loss": 0.4838, "step": 25625 }, { "epoch": 84.01967213114754, "grad_norm": 4.899331092834473, "learning_rate": 1.309728567825389e-06, "loss": 0.4311, "step": 25626 }, { "epoch": 84.02295081967213, "grad_norm": 3.373844623565674, "learning_rate": 1.3092032329300997e-06, "loss": 0.2633, "step": 25627 }, { "epoch": 84.02622950819672, "grad_norm": 5.165089130401611, "learning_rate": 1.308677996032578e-06, "loss": 0.281, "step": 25628 }, { "epoch": 84.02950819672131, "grad_norm": 95.26359558105469, "learning_rate": 1.3081528571387504e-06, "loss": 0.5189, "step": 25629 }, { "epoch": 84.0327868852459, "grad_norm": 8.028388023376465, "learning_rate": 1.3076278162545365e-06, "loss": 0.4061, "step": 25630 }, { "epoch": 84.03606557377049, "grad_norm": 5.257213115692139, "learning_rate": 1.307102873385857e-06, "loss": 0.4183, "step": 25631 }, { "epoch": 84.03934426229509, "grad_norm": 5.222053050994873, "learning_rate": 1.3065780285386308e-06, "loss": 0.3804, "step": 25632 }, { "epoch": 84.04262295081968, "grad_norm": 11.487505912780762, "learning_rate": 1.3060532817187743e-06, "loss": 0.3365, "step": 25633 }, { "epoch": 84.04590163934427, "grad_norm": 6.250740051269531, "learning_rate": 1.3055286329322082e-06, "loss": 0.2807, "step": 25634 }, { "epoch": 84.04918032786885, "grad_norm": 5.4043450355529785, "learning_rate": 1.3050040821848476e-06, "loss": 0.344, "step": 25635 }, { "epoch": 84.05245901639344, "grad_norm": 6.449398040771484, "learning_rate": 1.3044796294826056e-06, "loss": 0.2279, "step": 25636 }, { "epoch": 84.05573770491803, "grad_norm": 4.338031768798828, "learning_rate": 1.3039552748313945e-06, "loss": 0.0919, "step": 25637 }, { "epoch": 84.05901639344262, "grad_norm": 4.698768615722656, "learning_rate": 1.3034310182371323e-06, "loss": 0.4286, "step": 25638 }, { "epoch": 84.0622950819672, "grad_norm": 6.803460121154785, "learning_rate": 1.3029068597057272e-06, "loss": 0.5405, "step": 25639 }, { "epoch": 84.06557377049181, "grad_norm": 5.350253105163574, "learning_rate": 1.3023827992430904e-06, "loss": 0.2909, "step": 25640 }, { "epoch": 84.0688524590164, "grad_norm": 5.3028764724731445, "learning_rate": 1.3018588368551278e-06, "loss": 0.4723, "step": 25641 }, { "epoch": 84.07213114754099, "grad_norm": 4.312860488891602, "learning_rate": 1.301334972547753e-06, "loss": 0.3049, "step": 25642 }, { "epoch": 84.07540983606557, "grad_norm": 4.989782333374023, "learning_rate": 1.3008112063268707e-06, "loss": 0.4017, "step": 25643 }, { "epoch": 84.07868852459016, "grad_norm": 6.727983474731445, "learning_rate": 1.300287538198387e-06, "loss": 0.4676, "step": 25644 }, { "epoch": 84.08196721311475, "grad_norm": 4.4801459312438965, "learning_rate": 1.2997639681682072e-06, "loss": 0.2231, "step": 25645 }, { "epoch": 84.08524590163934, "grad_norm": 4.43047571182251, "learning_rate": 1.2992404962422323e-06, "loss": 0.3336, "step": 25646 }, { "epoch": 84.08852459016393, "grad_norm": 4.226130485534668, "learning_rate": 1.2987171224263695e-06, "loss": 0.3111, "step": 25647 }, { "epoch": 84.09180327868853, "grad_norm": 4.41124153137207, "learning_rate": 1.2981938467265176e-06, "loss": 0.3284, "step": 25648 }, { "epoch": 84.09508196721312, "grad_norm": 5.2924299240112305, "learning_rate": 1.2976706691485786e-06, "loss": 0.3861, "step": 25649 }, { "epoch": 84.09836065573771, "grad_norm": 4.831655025482178, "learning_rate": 1.2971475896984475e-06, "loss": 0.3472, "step": 25650 }, { "epoch": 84.1016393442623, "grad_norm": 5.03205680847168, "learning_rate": 1.29662460838203e-06, "loss": 0.393, "step": 25651 }, { "epoch": 84.10491803278688, "grad_norm": 3.382037878036499, "learning_rate": 1.2961017252052176e-06, "loss": 0.2743, "step": 25652 }, { "epoch": 84.10819672131147, "grad_norm": 7.689664363861084, "learning_rate": 1.2955789401739094e-06, "loss": 0.4058, "step": 25653 }, { "epoch": 84.11147540983606, "grad_norm": 3.8154635429382324, "learning_rate": 1.295056253293996e-06, "loss": 0.3151, "step": 25654 }, { "epoch": 84.11475409836065, "grad_norm": 9.31859016418457, "learning_rate": 1.2945336645713758e-06, "loss": 0.4498, "step": 25655 }, { "epoch": 84.11803278688525, "grad_norm": 4.58242130279541, "learning_rate": 1.294011174011941e-06, "loss": 0.2416, "step": 25656 }, { "epoch": 84.12131147540984, "grad_norm": 4.855349063873291, "learning_rate": 1.2934887816215825e-06, "loss": 0.6251, "step": 25657 }, { "epoch": 84.12459016393443, "grad_norm": 7.647076606750488, "learning_rate": 1.2929664874061898e-06, "loss": 0.5166, "step": 25658 }, { "epoch": 84.12786885245902, "grad_norm": 3.9963231086730957, "learning_rate": 1.2924442913716507e-06, "loss": 0.3994, "step": 25659 }, { "epoch": 84.1311475409836, "grad_norm": 6.62930965423584, "learning_rate": 1.291922193523858e-06, "loss": 0.2794, "step": 25660 }, { "epoch": 84.1344262295082, "grad_norm": 9.04755687713623, "learning_rate": 1.291400193868697e-06, "loss": 0.6243, "step": 25661 }, { "epoch": 84.13770491803278, "grad_norm": 5.795501708984375, "learning_rate": 1.2908782924120534e-06, "loss": 0.3744, "step": 25662 }, { "epoch": 84.14098360655737, "grad_norm": 4.6989521980285645, "learning_rate": 1.2903564891598097e-06, "loss": 0.2917, "step": 25663 }, { "epoch": 84.14426229508197, "grad_norm": 4.502335548400879, "learning_rate": 1.289834784117855e-06, "loss": 0.3245, "step": 25664 }, { "epoch": 84.14754098360656, "grad_norm": 10.495039939880371, "learning_rate": 1.2893131772920685e-06, "loss": 0.517, "step": 25665 }, { "epoch": 84.15081967213115, "grad_norm": 5.693197727203369, "learning_rate": 1.2887916686883317e-06, "loss": 0.4537, "step": 25666 }, { "epoch": 84.15409836065574, "grad_norm": 6.674521446228027, "learning_rate": 1.2882702583125284e-06, "loss": 0.3326, "step": 25667 }, { "epoch": 84.15737704918033, "grad_norm": 5.329165935516357, "learning_rate": 1.2877489461705351e-06, "loss": 0.3277, "step": 25668 }, { "epoch": 84.16065573770491, "grad_norm": 4.394134044647217, "learning_rate": 1.2872277322682292e-06, "loss": 0.4384, "step": 25669 }, { "epoch": 84.1639344262295, "grad_norm": 3.804273843765259, "learning_rate": 1.2867066166114917e-06, "loss": 0.4829, "step": 25670 }, { "epoch": 84.1672131147541, "grad_norm": 5.940616607666016, "learning_rate": 1.2861855992061966e-06, "loss": 0.2783, "step": 25671 }, { "epoch": 84.1704918032787, "grad_norm": 3.8449628353118896, "learning_rate": 1.2856646800582172e-06, "loss": 0.5212, "step": 25672 }, { "epoch": 84.17377049180328, "grad_norm": 4.895927906036377, "learning_rate": 1.285143859173431e-06, "loss": 0.2881, "step": 25673 }, { "epoch": 84.17704918032787, "grad_norm": 3.756709337234497, "learning_rate": 1.284623136557709e-06, "loss": 0.2661, "step": 25674 }, { "epoch": 84.18032786885246, "grad_norm": 11.166712760925293, "learning_rate": 1.2841025122169225e-06, "loss": 0.3217, "step": 25675 }, { "epoch": 84.18360655737705, "grad_norm": 5.211026668548584, "learning_rate": 1.283581986156941e-06, "loss": 0.2662, "step": 25676 }, { "epoch": 84.18688524590164, "grad_norm": 7.631934642791748, "learning_rate": 1.283061558383637e-06, "loss": 0.3931, "step": 25677 }, { "epoch": 84.19016393442622, "grad_norm": 5.550719738006592, "learning_rate": 1.282541228902877e-06, "loss": 0.3553, "step": 25678 }, { "epoch": 84.19344262295083, "grad_norm": 7.1370673179626465, "learning_rate": 1.28202099772053e-06, "loss": 0.4381, "step": 25679 }, { "epoch": 84.19672131147541, "grad_norm": 32.31490707397461, "learning_rate": 1.2815008648424565e-06, "loss": 0.3696, "step": 25680 }, { "epoch": 84.2, "grad_norm": 4.755892276763916, "learning_rate": 1.2809808302745298e-06, "loss": 0.2467, "step": 25681 }, { "epoch": 84.20327868852459, "grad_norm": 6.3397111892700195, "learning_rate": 1.2804608940226082e-06, "loss": 0.7612, "step": 25682 }, { "epoch": 84.20655737704918, "grad_norm": 4.2397894859313965, "learning_rate": 1.2799410560925573e-06, "loss": 0.4619, "step": 25683 }, { "epoch": 84.20983606557377, "grad_norm": 10.031548500061035, "learning_rate": 1.2794213164902368e-06, "loss": 0.3936, "step": 25684 }, { "epoch": 84.21311475409836, "grad_norm": 6.294593334197998, "learning_rate": 1.2789016752215055e-06, "loss": 0.3863, "step": 25685 }, { "epoch": 84.21639344262294, "grad_norm": 4.710256576538086, "learning_rate": 1.2783821322922286e-06, "loss": 0.1671, "step": 25686 }, { "epoch": 84.21967213114755, "grad_norm": 4.666799068450928, "learning_rate": 1.2778626877082611e-06, "loss": 0.4404, "step": 25687 }, { "epoch": 84.22295081967214, "grad_norm": 4.149724006652832, "learning_rate": 1.27734334147546e-06, "loss": 0.507, "step": 25688 }, { "epoch": 84.22622950819672, "grad_norm": 4.006430625915527, "learning_rate": 1.27682409359968e-06, "loss": 0.3901, "step": 25689 }, { "epoch": 84.22950819672131, "grad_norm": 4.69501256942749, "learning_rate": 1.2763049440867814e-06, "loss": 0.3632, "step": 25690 }, { "epoch": 84.2327868852459, "grad_norm": 4.602898597717285, "learning_rate": 1.2757858929426136e-06, "loss": 0.2144, "step": 25691 }, { "epoch": 84.23606557377049, "grad_norm": 6.032453536987305, "learning_rate": 1.2752669401730321e-06, "loss": 0.4728, "step": 25692 }, { "epoch": 84.23934426229508, "grad_norm": 4.29262113571167, "learning_rate": 1.2747480857838846e-06, "loss": 0.3141, "step": 25693 }, { "epoch": 84.24262295081967, "grad_norm": 5.972306728363037, "learning_rate": 1.274229329781026e-06, "loss": 0.5476, "step": 25694 }, { "epoch": 84.24590163934427, "grad_norm": 6.066045761108398, "learning_rate": 1.2737106721703042e-06, "loss": 0.5914, "step": 25695 }, { "epoch": 84.24918032786886, "grad_norm": 4.936239242553711, "learning_rate": 1.2731921129575685e-06, "loss": 0.4445, "step": 25696 }, { "epoch": 84.25245901639344, "grad_norm": 5.109858989715576, "learning_rate": 1.272673652148665e-06, "loss": 0.3075, "step": 25697 }, { "epoch": 84.25573770491803, "grad_norm": 14.837797164916992, "learning_rate": 1.2721552897494372e-06, "loss": 0.24, "step": 25698 }, { "epoch": 84.25901639344262, "grad_norm": 5.074519634246826, "learning_rate": 1.2716370257657362e-06, "loss": 0.3349, "step": 25699 }, { "epoch": 84.26229508196721, "grad_norm": 5.667101860046387, "learning_rate": 1.2711188602034031e-06, "loss": 0.2003, "step": 25700 }, { "epoch": 84.2655737704918, "grad_norm": 5.102837085723877, "learning_rate": 1.2706007930682795e-06, "loss": 0.3777, "step": 25701 }, { "epoch": 84.26885245901639, "grad_norm": 5.201500415802002, "learning_rate": 1.2700828243662078e-06, "loss": 0.2671, "step": 25702 }, { "epoch": 84.27213114754099, "grad_norm": 4.416220664978027, "learning_rate": 1.2695649541030297e-06, "loss": 0.3621, "step": 25703 }, { "epoch": 84.27540983606558, "grad_norm": 4.018189430236816, "learning_rate": 1.2690471822845852e-06, "loss": 0.4922, "step": 25704 }, { "epoch": 84.27868852459017, "grad_norm": 7.03521728515625, "learning_rate": 1.2685295089167115e-06, "loss": 0.5474, "step": 25705 }, { "epoch": 84.28196721311475, "grad_norm": 8.389995574951172, "learning_rate": 1.2680119340052432e-06, "loss": 0.2761, "step": 25706 }, { "epoch": 84.28524590163934, "grad_norm": 6.547142028808594, "learning_rate": 1.2674944575560221e-06, "loss": 0.4574, "step": 25707 }, { "epoch": 84.28852459016393, "grad_norm": 4.205887794494629, "learning_rate": 1.2669770795748803e-06, "loss": 0.3352, "step": 25708 }, { "epoch": 84.29180327868852, "grad_norm": 5.4885663986206055, "learning_rate": 1.266459800067652e-06, "loss": 0.3796, "step": 25709 }, { "epoch": 84.29508196721312, "grad_norm": 5.660494327545166, "learning_rate": 1.2659426190401703e-06, "loss": 0.2424, "step": 25710 }, { "epoch": 84.29836065573771, "grad_norm": 7.805741786956787, "learning_rate": 1.2654255364982636e-06, "loss": 0.34, "step": 25711 }, { "epoch": 84.3016393442623, "grad_norm": 8.692988395690918, "learning_rate": 1.264908552447769e-06, "loss": 0.3526, "step": 25712 }, { "epoch": 84.30491803278689, "grad_norm": 5.02939510345459, "learning_rate": 1.2643916668945123e-06, "loss": 0.3364, "step": 25713 }, { "epoch": 84.30819672131148, "grad_norm": 6.236544609069824, "learning_rate": 1.2638748798443224e-06, "loss": 0.4512, "step": 25714 }, { "epoch": 84.31147540983606, "grad_norm": 4.916502952575684, "learning_rate": 1.2633581913030236e-06, "loss": 0.3202, "step": 25715 }, { "epoch": 84.31475409836065, "grad_norm": 8.121291160583496, "learning_rate": 1.2628416012764477e-06, "loss": 0.443, "step": 25716 }, { "epoch": 84.31803278688524, "grad_norm": 5.225851535797119, "learning_rate": 1.262325109770418e-06, "loss": 0.2944, "step": 25717 }, { "epoch": 84.32131147540984, "grad_norm": 4.464503765106201, "learning_rate": 1.2618087167907567e-06, "loss": 0.4565, "step": 25718 }, { "epoch": 84.32459016393443, "grad_norm": 3.794154167175293, "learning_rate": 1.2612924223432854e-06, "loss": 0.414, "step": 25719 }, { "epoch": 84.32786885245902, "grad_norm": 4.674829483032227, "learning_rate": 1.2607762264338297e-06, "loss": 0.4153, "step": 25720 }, { "epoch": 84.33114754098361, "grad_norm": 5.154465198516846, "learning_rate": 1.2602601290682094e-06, "loss": 0.2924, "step": 25721 }, { "epoch": 84.3344262295082, "grad_norm": 19.019861221313477, "learning_rate": 1.2597441302522407e-06, "loss": 0.4295, "step": 25722 }, { "epoch": 84.33770491803278, "grad_norm": 6.423211574554443, "learning_rate": 1.2592282299917468e-06, "loss": 0.3538, "step": 25723 }, { "epoch": 84.34098360655737, "grad_norm": 5.787572383880615, "learning_rate": 1.2587124282925435e-06, "loss": 0.3947, "step": 25724 }, { "epoch": 84.34426229508196, "grad_norm": 23.963348388671875, "learning_rate": 1.2581967251604422e-06, "loss": 0.2295, "step": 25725 }, { "epoch": 84.34754098360656, "grad_norm": 4.191034317016602, "learning_rate": 1.257681120601265e-06, "loss": 0.3549, "step": 25726 }, { "epoch": 84.35081967213115, "grad_norm": 5.447690486907959, "learning_rate": 1.2571656146208233e-06, "loss": 0.3424, "step": 25727 }, { "epoch": 84.35409836065574, "grad_norm": 5.755520820617676, "learning_rate": 1.2566502072249276e-06, "loss": 0.3313, "step": 25728 }, { "epoch": 84.35737704918033, "grad_norm": 3.797227382659912, "learning_rate": 1.2561348984193932e-06, "loss": 0.1511, "step": 25729 }, { "epoch": 84.36065573770492, "grad_norm": 4.150360584259033, "learning_rate": 1.2556196882100302e-06, "loss": 0.3928, "step": 25730 }, { "epoch": 84.3639344262295, "grad_norm": 6.070572853088379, "learning_rate": 1.2551045766026459e-06, "loss": 0.3341, "step": 25731 }, { "epoch": 84.3672131147541, "grad_norm": 3.916727066040039, "learning_rate": 1.254589563603048e-06, "loss": 0.3684, "step": 25732 }, { "epoch": 84.37049180327868, "grad_norm": 6.101372241973877, "learning_rate": 1.2540746492170476e-06, "loss": 0.5286, "step": 25733 }, { "epoch": 84.37377049180328, "grad_norm": 24.58010482788086, "learning_rate": 1.2535598334504496e-06, "loss": 0.4311, "step": 25734 }, { "epoch": 84.37704918032787, "grad_norm": 6.778799057006836, "learning_rate": 1.2530451163090585e-06, "loss": 0.4076, "step": 25735 }, { "epoch": 84.38032786885246, "grad_norm": 6.79254150390625, "learning_rate": 1.2525304977986784e-06, "loss": 0.5036, "step": 25736 }, { "epoch": 84.38360655737705, "grad_norm": 4.609060287475586, "learning_rate": 1.2520159779251096e-06, "loss": 0.2453, "step": 25737 }, { "epoch": 84.38688524590164, "grad_norm": 4.8507795333862305, "learning_rate": 1.251501556694158e-06, "loss": 0.4853, "step": 25738 }, { "epoch": 84.39016393442623, "grad_norm": 3.6591713428497314, "learning_rate": 1.2509872341116225e-06, "loss": 0.2813, "step": 25739 }, { "epoch": 84.39344262295081, "grad_norm": 4.783587455749512, "learning_rate": 1.2504730101833029e-06, "loss": 0.3162, "step": 25740 }, { "epoch": 84.3967213114754, "grad_norm": 7.409036636352539, "learning_rate": 1.2499588849149957e-06, "loss": 0.2973, "step": 25741 }, { "epoch": 84.4, "grad_norm": 3.9754748344421387, "learning_rate": 1.249444858312502e-06, "loss": 0.146, "step": 25742 }, { "epoch": 84.4032786885246, "grad_norm": 10.499122619628906, "learning_rate": 1.2489309303816144e-06, "loss": 0.5877, "step": 25743 }, { "epoch": 84.40655737704918, "grad_norm": 8.728487014770508, "learning_rate": 1.248417101128131e-06, "loss": 0.248, "step": 25744 }, { "epoch": 84.40983606557377, "grad_norm": 4.923343658447266, "learning_rate": 1.2479033705578414e-06, "loss": 0.294, "step": 25745 }, { "epoch": 84.41311475409836, "grad_norm": 11.259298324584961, "learning_rate": 1.2473897386765432e-06, "loss": 0.2912, "step": 25746 }, { "epoch": 84.41639344262295, "grad_norm": 5.365102291107178, "learning_rate": 1.2468762054900264e-06, "loss": 0.5361, "step": 25747 }, { "epoch": 84.41967213114754, "grad_norm": 5.667903900146484, "learning_rate": 1.2463627710040816e-06, "loss": 0.5045, "step": 25748 }, { "epoch": 84.42295081967212, "grad_norm": 6.015373229980469, "learning_rate": 1.2458494352244966e-06, "loss": 0.3585, "step": 25749 }, { "epoch": 84.42622950819673, "grad_norm": 5.0006866455078125, "learning_rate": 1.245336198157061e-06, "loss": 0.337, "step": 25750 }, { "epoch": 84.42950819672132, "grad_norm": 4.522140026092529, "learning_rate": 1.2448230598075627e-06, "loss": 0.5602, "step": 25751 }, { "epoch": 84.4327868852459, "grad_norm": 5.339937210083008, "learning_rate": 1.2443100201817892e-06, "loss": 0.4752, "step": 25752 }, { "epoch": 84.43606557377049, "grad_norm": 5.629638195037842, "learning_rate": 1.2437970792855225e-06, "loss": 0.3716, "step": 25753 }, { "epoch": 84.43934426229508, "grad_norm": 6.395479679107666, "learning_rate": 1.2432842371245468e-06, "loss": 0.4809, "step": 25754 }, { "epoch": 84.44262295081967, "grad_norm": 3.972200870513916, "learning_rate": 1.2427714937046476e-06, "loss": 0.3667, "step": 25755 }, { "epoch": 84.44590163934426, "grad_norm": 4.585238456726074, "learning_rate": 1.2422588490316056e-06, "loss": 0.2441, "step": 25756 }, { "epoch": 84.44918032786886, "grad_norm": 4.935293197631836, "learning_rate": 1.2417463031111998e-06, "loss": 0.5784, "step": 25757 }, { "epoch": 84.45245901639345, "grad_norm": 3.7201077938079834, "learning_rate": 1.2412338559492099e-06, "loss": 0.4392, "step": 25758 }, { "epoch": 84.45573770491804, "grad_norm": 4.599332809448242, "learning_rate": 1.2407215075514157e-06, "loss": 0.3795, "step": 25759 }, { "epoch": 84.45901639344262, "grad_norm": 7.033146381378174, "learning_rate": 1.2402092579235948e-06, "loss": 0.2445, "step": 25760 }, { "epoch": 84.46229508196721, "grad_norm": 5.428157329559326, "learning_rate": 1.2396971070715226e-06, "loss": 0.5779, "step": 25761 }, { "epoch": 84.4655737704918, "grad_norm": 5.2020134925842285, "learning_rate": 1.2391850550009743e-06, "loss": 0.4977, "step": 25762 }, { "epoch": 84.46885245901639, "grad_norm": 4.758561134338379, "learning_rate": 1.23867310171772e-06, "loss": 0.3553, "step": 25763 }, { "epoch": 84.47213114754098, "grad_norm": 4.6314215660095215, "learning_rate": 1.2381612472275395e-06, "loss": 0.1383, "step": 25764 }, { "epoch": 84.47540983606558, "grad_norm": 4.772921562194824, "learning_rate": 1.2376494915362003e-06, "loss": 0.2755, "step": 25765 }, { "epoch": 84.47868852459017, "grad_norm": 6.474032402038574, "learning_rate": 1.2371378346494733e-06, "loss": 0.3853, "step": 25766 }, { "epoch": 84.48196721311476, "grad_norm": 4.943482875823975, "learning_rate": 1.2366262765731264e-06, "loss": 0.39, "step": 25767 }, { "epoch": 84.48524590163935, "grad_norm": 5.538001537322998, "learning_rate": 1.2361148173129323e-06, "loss": 0.3157, "step": 25768 }, { "epoch": 84.48852459016393, "grad_norm": 5.1340155601501465, "learning_rate": 1.2356034568746554e-06, "loss": 0.3077, "step": 25769 }, { "epoch": 84.49180327868852, "grad_norm": 4.08870792388916, "learning_rate": 1.2350921952640627e-06, "loss": 0.563, "step": 25770 }, { "epoch": 84.49508196721311, "grad_norm": 4.553900718688965, "learning_rate": 1.2345810324869156e-06, "loss": 0.5193, "step": 25771 }, { "epoch": 84.4983606557377, "grad_norm": 4.800388336181641, "learning_rate": 1.2340699685489844e-06, "loss": 0.2823, "step": 25772 }, { "epoch": 84.5016393442623, "grad_norm": 5.507325649261475, "learning_rate": 1.2335590034560285e-06, "loss": 0.4755, "step": 25773 }, { "epoch": 84.50491803278689, "grad_norm": 4.300413608551025, "learning_rate": 1.233048137213807e-06, "loss": 0.3533, "step": 25774 }, { "epoch": 84.50819672131148, "grad_norm": 4.583999156951904, "learning_rate": 1.2325373698280852e-06, "loss": 0.3843, "step": 25775 }, { "epoch": 84.51147540983607, "grad_norm": 5.846457004547119, "learning_rate": 1.2320267013046206e-06, "loss": 0.3669, "step": 25776 }, { "epoch": 84.51475409836065, "grad_norm": 9.43199348449707, "learning_rate": 1.2315161316491685e-06, "loss": 0.466, "step": 25777 }, { "epoch": 84.51803278688524, "grad_norm": 5.052135944366455, "learning_rate": 1.2310056608674925e-06, "loss": 0.3001, "step": 25778 }, { "epoch": 84.52131147540983, "grad_norm": 4.099625587463379, "learning_rate": 1.2304952889653444e-06, "loss": 0.3385, "step": 25779 }, { "epoch": 84.52459016393442, "grad_norm": 4.72567081451416, "learning_rate": 1.2299850159484794e-06, "loss": 0.5401, "step": 25780 }, { "epoch": 84.52786885245902, "grad_norm": 4.485829830169678, "learning_rate": 1.229474841822651e-06, "loss": 0.3921, "step": 25781 }, { "epoch": 84.53114754098361, "grad_norm": 4.60252046585083, "learning_rate": 1.2289647665936143e-06, "loss": 0.2982, "step": 25782 }, { "epoch": 84.5344262295082, "grad_norm": 5.400003910064697, "learning_rate": 1.2284547902671195e-06, "loss": 0.2535, "step": 25783 }, { "epoch": 84.53770491803279, "grad_norm": 5.22202730178833, "learning_rate": 1.227944912848914e-06, "loss": 0.227, "step": 25784 }, { "epoch": 84.54098360655738, "grad_norm": 4.942856311798096, "learning_rate": 1.2274351343447533e-06, "loss": 0.438, "step": 25785 }, { "epoch": 84.54426229508196, "grad_norm": 7.337334632873535, "learning_rate": 1.2269254547603826e-06, "loss": 0.4975, "step": 25786 }, { "epoch": 84.54754098360655, "grad_norm": 5.585437297821045, "learning_rate": 1.2264158741015497e-06, "loss": 0.49, "step": 25787 }, { "epoch": 84.55081967213114, "grad_norm": 4.294212341308594, "learning_rate": 1.2259063923739988e-06, "loss": 0.521, "step": 25788 }, { "epoch": 84.55409836065574, "grad_norm": 4.127471923828125, "learning_rate": 1.2253970095834744e-06, "loss": 0.2299, "step": 25789 }, { "epoch": 84.55737704918033, "grad_norm": 5.510618209838867, "learning_rate": 1.224887725735725e-06, "loss": 0.5413, "step": 25790 }, { "epoch": 84.56065573770492, "grad_norm": 4.4232635498046875, "learning_rate": 1.2243785408364895e-06, "loss": 0.2956, "step": 25791 }, { "epoch": 84.56393442622951, "grad_norm": 5.097114562988281, "learning_rate": 1.2238694548915109e-06, "loss": 0.2989, "step": 25792 }, { "epoch": 84.5672131147541, "grad_norm": 5.025040149688721, "learning_rate": 1.2233604679065259e-06, "loss": 0.3956, "step": 25793 }, { "epoch": 84.57049180327868, "grad_norm": 4.245690822601318, "learning_rate": 1.2228515798872797e-06, "loss": 0.3113, "step": 25794 }, { "epoch": 84.57377049180327, "grad_norm": 17.79958724975586, "learning_rate": 1.222342790839508e-06, "loss": 0.3795, "step": 25795 }, { "epoch": 84.57704918032788, "grad_norm": 4.992424011230469, "learning_rate": 1.2218341007689483e-06, "loss": 0.3357, "step": 25796 }, { "epoch": 84.58032786885246, "grad_norm": 4.789069652557373, "learning_rate": 1.2213255096813325e-06, "loss": 0.2707, "step": 25797 }, { "epoch": 84.58360655737705, "grad_norm": 6.047327518463135, "learning_rate": 1.220817017582403e-06, "loss": 0.235, "step": 25798 }, { "epoch": 84.58688524590164, "grad_norm": 4.430704593658447, "learning_rate": 1.2203086244778883e-06, "loss": 0.4586, "step": 25799 }, { "epoch": 84.59016393442623, "grad_norm": 4.101248741149902, "learning_rate": 1.219800330373524e-06, "loss": 0.3761, "step": 25800 }, { "epoch": 84.59344262295082, "grad_norm": 4.814366817474365, "learning_rate": 1.2192921352750387e-06, "loss": 0.4174, "step": 25801 }, { "epoch": 84.5967213114754, "grad_norm": 4.914118766784668, "learning_rate": 1.2187840391881623e-06, "loss": 0.3647, "step": 25802 }, { "epoch": 84.6, "grad_norm": 7.959709167480469, "learning_rate": 1.218276042118629e-06, "loss": 0.4928, "step": 25803 }, { "epoch": 84.6032786885246, "grad_norm": 6.310919284820557, "learning_rate": 1.2177681440721635e-06, "loss": 0.3494, "step": 25804 }, { "epoch": 84.60655737704919, "grad_norm": 4.83306360244751, "learning_rate": 1.2172603450544928e-06, "loss": 0.1976, "step": 25805 }, { "epoch": 84.60983606557377, "grad_norm": 4.913966655731201, "learning_rate": 1.2167526450713418e-06, "loss": 0.2974, "step": 25806 }, { "epoch": 84.61311475409836, "grad_norm": 6.315331935882568, "learning_rate": 1.216245044128439e-06, "loss": 0.3868, "step": 25807 }, { "epoch": 84.61639344262295, "grad_norm": 4.234615802764893, "learning_rate": 1.2157375422315065e-06, "loss": 0.1864, "step": 25808 }, { "epoch": 84.61967213114754, "grad_norm": 4.780302047729492, "learning_rate": 1.2152301393862665e-06, "loss": 0.4058, "step": 25809 }, { "epoch": 84.62295081967213, "grad_norm": 4.087810039520264, "learning_rate": 1.2147228355984387e-06, "loss": 0.3092, "step": 25810 }, { "epoch": 84.62622950819672, "grad_norm": 4.338942050933838, "learning_rate": 1.2142156308737464e-06, "loss": 0.4017, "step": 25811 }, { "epoch": 84.62950819672132, "grad_norm": 3.9759740829467773, "learning_rate": 1.2137085252179092e-06, "loss": 0.235, "step": 25812 }, { "epoch": 84.6327868852459, "grad_norm": 5.661607265472412, "learning_rate": 1.213201518636643e-06, "loss": 0.2285, "step": 25813 }, { "epoch": 84.6360655737705, "grad_norm": 4.697462558746338, "learning_rate": 1.2126946111356651e-06, "loss": 0.4433, "step": 25814 }, { "epoch": 84.63934426229508, "grad_norm": 4.905675411224365, "learning_rate": 1.2121878027206912e-06, "loss": 0.3706, "step": 25815 }, { "epoch": 84.64262295081967, "grad_norm": 6.460015296936035, "learning_rate": 1.2116810933974377e-06, "loss": 0.4141, "step": 25816 }, { "epoch": 84.64590163934426, "grad_norm": 5.567919731140137, "learning_rate": 1.2111744831716188e-06, "loss": 0.4366, "step": 25817 }, { "epoch": 84.64918032786885, "grad_norm": 5.3162360191345215, "learning_rate": 1.2106679720489445e-06, "loss": 0.362, "step": 25818 }, { "epoch": 84.65245901639344, "grad_norm": 4.325389862060547, "learning_rate": 1.2101615600351258e-06, "loss": 0.251, "step": 25819 }, { "epoch": 84.65573770491804, "grad_norm": 8.302382469177246, "learning_rate": 1.2096552471358768e-06, "loss": 0.3293, "step": 25820 }, { "epoch": 84.65901639344263, "grad_norm": 5.417243480682373, "learning_rate": 1.2091490333569044e-06, "loss": 0.3475, "step": 25821 }, { "epoch": 84.66229508196722, "grad_norm": 4.028064727783203, "learning_rate": 1.2086429187039172e-06, "loss": 0.1987, "step": 25822 }, { "epoch": 84.6655737704918, "grad_norm": 4.7746262550354, "learning_rate": 1.2081369031826185e-06, "loss": 0.3148, "step": 25823 }, { "epoch": 84.66885245901639, "grad_norm": 4.282886981964111, "learning_rate": 1.2076309867987212e-06, "loss": 0.5409, "step": 25824 }, { "epoch": 84.67213114754098, "grad_norm": 4.8269219398498535, "learning_rate": 1.2071251695579255e-06, "loss": 0.5098, "step": 25825 }, { "epoch": 84.67540983606557, "grad_norm": 4.5658183097839355, "learning_rate": 1.2066194514659356e-06, "loss": 0.4378, "step": 25826 }, { "epoch": 84.67868852459016, "grad_norm": 5.357815265655518, "learning_rate": 1.2061138325284528e-06, "loss": 0.4166, "step": 25827 }, { "epoch": 84.68196721311476, "grad_norm": 40.53517150878906, "learning_rate": 1.2056083127511808e-06, "loss": 0.385, "step": 25828 }, { "epoch": 84.68524590163935, "grad_norm": 4.6165971755981445, "learning_rate": 1.20510289213982e-06, "loss": 0.5303, "step": 25829 }, { "epoch": 84.68852459016394, "grad_norm": 5.207580089569092, "learning_rate": 1.2045975707000657e-06, "loss": 0.4181, "step": 25830 }, { "epoch": 84.69180327868852, "grad_norm": 9.763522148132324, "learning_rate": 1.2040923484376221e-06, "loss": 0.3523, "step": 25831 }, { "epoch": 84.69508196721311, "grad_norm": 4.681564807891846, "learning_rate": 1.2035872253581816e-06, "loss": 0.507, "step": 25832 }, { "epoch": 84.6983606557377, "grad_norm": 6.208982467651367, "learning_rate": 1.2030822014674392e-06, "loss": 0.2824, "step": 25833 }, { "epoch": 84.70163934426229, "grad_norm": 7.555271625518799, "learning_rate": 1.2025772767710931e-06, "loss": 0.5632, "step": 25834 }, { "epoch": 84.70491803278688, "grad_norm": 5.245825290679932, "learning_rate": 1.2020724512748362e-06, "loss": 0.1776, "step": 25835 }, { "epoch": 84.70819672131148, "grad_norm": 5.978392601013184, "learning_rate": 1.2015677249843572e-06, "loss": 0.4961, "step": 25836 }, { "epoch": 84.71147540983607, "grad_norm": 5.661495208740234, "learning_rate": 1.2010630979053527e-06, "loss": 0.4585, "step": 25837 }, { "epoch": 84.71475409836066, "grad_norm": 7.4936017990112305, "learning_rate": 1.2005585700435096e-06, "loss": 0.399, "step": 25838 }, { "epoch": 84.71803278688525, "grad_norm": 10.260628700256348, "learning_rate": 1.2000541414045185e-06, "loss": 0.5197, "step": 25839 }, { "epoch": 84.72131147540983, "grad_norm": 4.316810131072998, "learning_rate": 1.1995498119940663e-06, "loss": 0.3806, "step": 25840 }, { "epoch": 84.72459016393442, "grad_norm": 4.5689697265625, "learning_rate": 1.1990455818178382e-06, "loss": 0.5478, "step": 25841 }, { "epoch": 84.72786885245901, "grad_norm": 4.651968002319336, "learning_rate": 1.198541450881524e-06, "loss": 0.3539, "step": 25842 }, { "epoch": 84.73114754098361, "grad_norm": 4.00513219833374, "learning_rate": 1.1980374191908061e-06, "loss": 0.6583, "step": 25843 }, { "epoch": 84.7344262295082, "grad_norm": 3.6463348865509033, "learning_rate": 1.1975334867513687e-06, "loss": 0.3214, "step": 25844 }, { "epoch": 84.73770491803279, "grad_norm": 4.37913179397583, "learning_rate": 1.1970296535688909e-06, "loss": 0.2401, "step": 25845 }, { "epoch": 84.74098360655738, "grad_norm": 4.858579635620117, "learning_rate": 1.1965259196490574e-06, "loss": 0.2963, "step": 25846 }, { "epoch": 84.74426229508197, "grad_norm": 3.9739222526550293, "learning_rate": 1.1960222849975488e-06, "loss": 0.5843, "step": 25847 }, { "epoch": 84.74754098360656, "grad_norm": 4.090814590454102, "learning_rate": 1.1955187496200427e-06, "loss": 0.3552, "step": 25848 }, { "epoch": 84.75081967213114, "grad_norm": 4.530030250549316, "learning_rate": 1.1950153135222152e-06, "loss": 0.3357, "step": 25849 }, { "epoch": 84.75409836065573, "grad_norm": 5.257428169250488, "learning_rate": 1.194511976709747e-06, "loss": 0.3134, "step": 25850 }, { "epoch": 84.75737704918033, "grad_norm": 4.29077672958374, "learning_rate": 1.1940087391883104e-06, "loss": 0.3449, "step": 25851 }, { "epoch": 84.76065573770492, "grad_norm": 7.532749176025391, "learning_rate": 1.1935056009635826e-06, "loss": 0.395, "step": 25852 }, { "epoch": 84.76393442622951, "grad_norm": 6.135979175567627, "learning_rate": 1.1930025620412355e-06, "loss": 0.4258, "step": 25853 }, { "epoch": 84.7672131147541, "grad_norm": 7.897559642791748, "learning_rate": 1.192499622426938e-06, "loss": 0.3508, "step": 25854 }, { "epoch": 84.77049180327869, "grad_norm": 4.898955345153809, "learning_rate": 1.191996782126369e-06, "loss": 0.4665, "step": 25855 }, { "epoch": 84.77377049180328, "grad_norm": 6.807145595550537, "learning_rate": 1.1914940411451925e-06, "loss": 0.3099, "step": 25856 }, { "epoch": 84.77704918032786, "grad_norm": 4.379804611206055, "learning_rate": 1.1909913994890797e-06, "loss": 0.3278, "step": 25857 }, { "epoch": 84.78032786885245, "grad_norm": 4.643850803375244, "learning_rate": 1.1904888571636963e-06, "loss": 0.4499, "step": 25858 }, { "epoch": 84.78360655737706, "grad_norm": 4.894213676452637, "learning_rate": 1.1899864141747131e-06, "loss": 0.2981, "step": 25859 }, { "epoch": 84.78688524590164, "grad_norm": 9.9577054977417, "learning_rate": 1.1894840705277922e-06, "loss": 0.3304, "step": 25860 }, { "epoch": 84.79016393442623, "grad_norm": 4.742063522338867, "learning_rate": 1.188981826228599e-06, "loss": 0.3863, "step": 25861 }, { "epoch": 84.79344262295082, "grad_norm": 4.228828430175781, "learning_rate": 1.188479681282796e-06, "loss": 0.5932, "step": 25862 }, { "epoch": 84.79672131147541, "grad_norm": 4.422536849975586, "learning_rate": 1.187977635696047e-06, "loss": 0.2138, "step": 25863 }, { "epoch": 84.8, "grad_norm": 7.858107566833496, "learning_rate": 1.1874756894740137e-06, "loss": 0.337, "step": 25864 }, { "epoch": 84.80327868852459, "grad_norm": 4.613705635070801, "learning_rate": 1.1869738426223532e-06, "loss": 0.339, "step": 25865 }, { "epoch": 84.80655737704917, "grad_norm": 5.016792297363281, "learning_rate": 1.1864720951467267e-06, "loss": 0.5037, "step": 25866 }, { "epoch": 84.80983606557378, "grad_norm": 5.726505756378174, "learning_rate": 1.1859704470527888e-06, "loss": 0.3936, "step": 25867 }, { "epoch": 84.81311475409836, "grad_norm": 4.082520008087158, "learning_rate": 1.1854688983462003e-06, "loss": 0.5508, "step": 25868 }, { "epoch": 84.81639344262295, "grad_norm": 4.781201362609863, "learning_rate": 1.1849674490326157e-06, "loss": 0.4716, "step": 25869 }, { "epoch": 84.81967213114754, "grad_norm": 4.426141262054443, "learning_rate": 1.1844660991176882e-06, "loss": 0.5355, "step": 25870 }, { "epoch": 84.82295081967213, "grad_norm": 4.954982280731201, "learning_rate": 1.1839648486070687e-06, "loss": 0.2596, "step": 25871 }, { "epoch": 84.82622950819672, "grad_norm": 8.410066604614258, "learning_rate": 1.183463697506414e-06, "loss": 0.2191, "step": 25872 }, { "epoch": 84.8295081967213, "grad_norm": 7.3607096672058105, "learning_rate": 1.1829626458213738e-06, "loss": 0.331, "step": 25873 }, { "epoch": 84.8327868852459, "grad_norm": 6.102473258972168, "learning_rate": 1.182461693557596e-06, "loss": 0.2576, "step": 25874 }, { "epoch": 84.8360655737705, "grad_norm": 4.768650531768799, "learning_rate": 1.1819608407207294e-06, "loss": 0.3521, "step": 25875 }, { "epoch": 84.83934426229509, "grad_norm": 5.306131362915039, "learning_rate": 1.181460087316424e-06, "loss": 0.3919, "step": 25876 }, { "epoch": 84.84262295081967, "grad_norm": 5.400144577026367, "learning_rate": 1.180959433350326e-06, "loss": 0.3644, "step": 25877 }, { "epoch": 84.84590163934426, "grad_norm": 5.357058525085449, "learning_rate": 1.1804588788280792e-06, "loss": 0.3212, "step": 25878 }, { "epoch": 84.84918032786885, "grad_norm": 4.461447715759277, "learning_rate": 1.1799584237553274e-06, "loss": 0.3729, "step": 25879 }, { "epoch": 84.85245901639344, "grad_norm": 5.797774314880371, "learning_rate": 1.1794580681377155e-06, "loss": 0.2003, "step": 25880 }, { "epoch": 84.85573770491803, "grad_norm": 4.455497741699219, "learning_rate": 1.1789578119808864e-06, "loss": 0.2889, "step": 25881 }, { "epoch": 84.85901639344263, "grad_norm": 5.603047847747803, "learning_rate": 1.1784576552904792e-06, "loss": 0.3908, "step": 25882 }, { "epoch": 84.86229508196722, "grad_norm": 3.5869052410125732, "learning_rate": 1.1779575980721313e-06, "loss": 0.1761, "step": 25883 }, { "epoch": 84.8655737704918, "grad_norm": 9.032719612121582, "learning_rate": 1.1774576403314864e-06, "loss": 0.2834, "step": 25884 }, { "epoch": 84.8688524590164, "grad_norm": 5.150653839111328, "learning_rate": 1.1769577820741807e-06, "loss": 0.3801, "step": 25885 }, { "epoch": 84.87213114754098, "grad_norm": 4.620018005371094, "learning_rate": 1.1764580233058464e-06, "loss": 0.3592, "step": 25886 }, { "epoch": 84.87540983606557, "grad_norm": 3.958559989929199, "learning_rate": 1.1759583640321248e-06, "loss": 0.3032, "step": 25887 }, { "epoch": 84.87868852459016, "grad_norm": 5.918869972229004, "learning_rate": 1.1754588042586469e-06, "loss": 0.1921, "step": 25888 }, { "epoch": 84.88196721311475, "grad_norm": 6.240801811218262, "learning_rate": 1.1749593439910444e-06, "loss": 0.4121, "step": 25889 }, { "epoch": 84.88524590163935, "grad_norm": 14.372726440429688, "learning_rate": 1.1744599832349535e-06, "loss": 0.347, "step": 25890 }, { "epoch": 84.88852459016394, "grad_norm": 4.563727378845215, "learning_rate": 1.1739607219960026e-06, "loss": 0.2599, "step": 25891 }, { "epoch": 84.89180327868853, "grad_norm": 3.9898457527160645, "learning_rate": 1.1734615602798205e-06, "loss": 0.319, "step": 25892 }, { "epoch": 84.89508196721312, "grad_norm": 3.9542782306671143, "learning_rate": 1.1729624980920352e-06, "loss": 0.3393, "step": 25893 }, { "epoch": 84.8983606557377, "grad_norm": 6.029454231262207, "learning_rate": 1.1724635354382775e-06, "loss": 0.4923, "step": 25894 }, { "epoch": 84.90163934426229, "grad_norm": 3.9195847511291504, "learning_rate": 1.1719646723241707e-06, "loss": 0.2574, "step": 25895 }, { "epoch": 84.90491803278688, "grad_norm": 4.478836536407471, "learning_rate": 1.1714659087553426e-06, "loss": 0.4898, "step": 25896 }, { "epoch": 84.90819672131147, "grad_norm": 11.899362564086914, "learning_rate": 1.1709672447374132e-06, "loss": 0.2018, "step": 25897 }, { "epoch": 84.91147540983607, "grad_norm": 5.779626369476318, "learning_rate": 1.17046868027601e-06, "loss": 0.3298, "step": 25898 }, { "epoch": 84.91475409836066, "grad_norm": 6.121345043182373, "learning_rate": 1.1699702153767523e-06, "loss": 0.3616, "step": 25899 }, { "epoch": 84.91803278688525, "grad_norm": 4.749801158905029, "learning_rate": 1.1694718500452618e-06, "loss": 0.2539, "step": 25900 }, { "epoch": 84.92131147540984, "grad_norm": 5.154409885406494, "learning_rate": 1.1689735842871552e-06, "loss": 0.2251, "step": 25901 }, { "epoch": 84.92459016393443, "grad_norm": 6.50679349899292, "learning_rate": 1.1684754181080559e-06, "loss": 0.2617, "step": 25902 }, { "epoch": 84.92786885245901, "grad_norm": 4.814182281494141, "learning_rate": 1.1679773515135796e-06, "loss": 0.5321, "step": 25903 }, { "epoch": 84.9311475409836, "grad_norm": 5.34353494644165, "learning_rate": 1.1674793845093402e-06, "loss": 0.3647, "step": 25904 }, { "epoch": 84.93442622950819, "grad_norm": 4.177707195281982, "learning_rate": 1.1669815171009557e-06, "loss": 0.4281, "step": 25905 }, { "epoch": 84.9377049180328, "grad_norm": 4.77972936630249, "learning_rate": 1.166483749294035e-06, "loss": 0.3418, "step": 25906 }, { "epoch": 84.94098360655738, "grad_norm": 4.436031341552734, "learning_rate": 1.165986081094198e-06, "loss": 0.4209, "step": 25907 }, { "epoch": 84.94426229508197, "grad_norm": 6.716009140014648, "learning_rate": 1.1654885125070525e-06, "loss": 0.5299, "step": 25908 }, { "epoch": 84.94754098360656, "grad_norm": 7.487445831298828, "learning_rate": 1.1649910435382095e-06, "loss": 0.178, "step": 25909 }, { "epoch": 84.95081967213115, "grad_norm": 5.256946086883545, "learning_rate": 1.1644936741932755e-06, "loss": 0.5744, "step": 25910 }, { "epoch": 84.95409836065573, "grad_norm": 9.147467613220215, "learning_rate": 1.1639964044778652e-06, "loss": 0.3362, "step": 25911 }, { "epoch": 84.95737704918032, "grad_norm": 6.616178035736084, "learning_rate": 1.1634992343975826e-06, "loss": 0.2231, "step": 25912 }, { "epoch": 84.96065573770491, "grad_norm": 5.960622310638428, "learning_rate": 1.1630021639580335e-06, "loss": 0.3275, "step": 25913 }, { "epoch": 84.96393442622951, "grad_norm": 11.888482093811035, "learning_rate": 1.1625051931648212e-06, "loss": 0.4027, "step": 25914 }, { "epoch": 84.9672131147541, "grad_norm": 5.491804599761963, "learning_rate": 1.1620083220235534e-06, "loss": 0.3795, "step": 25915 }, { "epoch": 84.97049180327869, "grad_norm": 4.410815715789795, "learning_rate": 1.1615115505398323e-06, "loss": 0.3021, "step": 25916 }, { "epoch": 84.97377049180328, "grad_norm": 4.315524101257324, "learning_rate": 1.1610148787192565e-06, "loss": 0.3452, "step": 25917 }, { "epoch": 84.97704918032787, "grad_norm": 5.566204071044922, "learning_rate": 1.1605183065674285e-06, "loss": 0.4846, "step": 25918 }, { "epoch": 84.98032786885246, "grad_norm": 5.438114643096924, "learning_rate": 1.1600218340899461e-06, "loss": 0.3292, "step": 25919 }, { "epoch": 84.98360655737704, "grad_norm": 6.549191474914551, "learning_rate": 1.15952546129241e-06, "loss": 0.6, "step": 25920 }, { "epoch": 84.98688524590163, "grad_norm": 6.058956146240234, "learning_rate": 1.1590291881804162e-06, "loss": 0.2944, "step": 25921 }, { "epoch": 84.99016393442623, "grad_norm": 4.000202655792236, "learning_rate": 1.1585330147595608e-06, "loss": 0.3843, "step": 25922 }, { "epoch": 84.99344262295082, "grad_norm": 6.019015789031982, "learning_rate": 1.1580369410354365e-06, "loss": 0.3936, "step": 25923 }, { "epoch": 84.99672131147541, "grad_norm": 7.6180877685546875, "learning_rate": 1.1575409670136417e-06, "loss": 0.3256, "step": 25924 }, { "epoch": 85.0, "grad_norm": 4.134662628173828, "learning_rate": 1.1570450926997657e-06, "loss": 0.4401, "step": 25925 }, { "epoch": 85.00327868852459, "grad_norm": 5.474493980407715, "learning_rate": 1.1565493180994002e-06, "loss": 0.3617, "step": 25926 }, { "epoch": 85.00655737704918, "grad_norm": 4.91357421875, "learning_rate": 1.1560536432181346e-06, "loss": 0.4201, "step": 25927 }, { "epoch": 85.00983606557377, "grad_norm": 4.536875247955322, "learning_rate": 1.1555580680615608e-06, "loss": 0.4094, "step": 25928 }, { "epoch": 85.01311475409837, "grad_norm": 6.343522071838379, "learning_rate": 1.1550625926352665e-06, "loss": 0.4781, "step": 25929 }, { "epoch": 85.01639344262296, "grad_norm": 4.422089099884033, "learning_rate": 1.1545672169448375e-06, "loss": 0.2855, "step": 25930 }, { "epoch": 85.01967213114754, "grad_norm": 4.430703639984131, "learning_rate": 1.1540719409958612e-06, "loss": 0.3017, "step": 25931 }, { "epoch": 85.02295081967213, "grad_norm": 4.885659694671631, "learning_rate": 1.1535767647939177e-06, "loss": 0.3015, "step": 25932 }, { "epoch": 85.02622950819672, "grad_norm": 5.182833671569824, "learning_rate": 1.1530816883445972e-06, "loss": 0.3816, "step": 25933 }, { "epoch": 85.02950819672131, "grad_norm": 4.1739654541015625, "learning_rate": 1.1525867116534782e-06, "loss": 0.2298, "step": 25934 }, { "epoch": 85.0327868852459, "grad_norm": 5.256150245666504, "learning_rate": 1.1520918347261412e-06, "loss": 0.206, "step": 25935 }, { "epoch": 85.03606557377049, "grad_norm": 5.427680015563965, "learning_rate": 1.1515970575681712e-06, "loss": 0.3234, "step": 25936 }, { "epoch": 85.03934426229509, "grad_norm": 4.066925525665283, "learning_rate": 1.151102380185144e-06, "loss": 0.2501, "step": 25937 }, { "epoch": 85.04262295081968, "grad_norm": 5.900191307067871, "learning_rate": 1.150607802582635e-06, "loss": 0.44, "step": 25938 }, { "epoch": 85.04590163934427, "grad_norm": 5.173207759857178, "learning_rate": 1.1501133247662278e-06, "loss": 0.3415, "step": 25939 }, { "epoch": 85.04918032786885, "grad_norm": 6.098185062408447, "learning_rate": 1.1496189467414932e-06, "loss": 0.3146, "step": 25940 }, { "epoch": 85.05245901639344, "grad_norm": 4.250857353210449, "learning_rate": 1.1491246685140078e-06, "loss": 0.3865, "step": 25941 }, { "epoch": 85.05573770491803, "grad_norm": 5.776094913482666, "learning_rate": 1.1486304900893418e-06, "loss": 0.3995, "step": 25942 }, { "epoch": 85.05901639344262, "grad_norm": 7.180326461791992, "learning_rate": 1.148136411473072e-06, "loss": 0.4909, "step": 25943 }, { "epoch": 85.0622950819672, "grad_norm": 4.91526460647583, "learning_rate": 1.147642432670768e-06, "loss": 0.1797, "step": 25944 }, { "epoch": 85.06557377049181, "grad_norm": 10.236978530883789, "learning_rate": 1.147148553687998e-06, "loss": 0.3342, "step": 25945 }, { "epoch": 85.0688524590164, "grad_norm": 5.304696559906006, "learning_rate": 1.1466547745303348e-06, "loss": 0.2913, "step": 25946 }, { "epoch": 85.07213114754099, "grad_norm": 7.188737392425537, "learning_rate": 1.1461610952033442e-06, "loss": 0.2848, "step": 25947 }, { "epoch": 85.07540983606557, "grad_norm": 4.2025675773620605, "learning_rate": 1.1456675157125918e-06, "loss": 0.3917, "step": 25948 }, { "epoch": 85.07868852459016, "grad_norm": 4.972183704376221, "learning_rate": 1.1451740360636432e-06, "loss": 0.167, "step": 25949 }, { "epoch": 85.08196721311475, "grad_norm": 7.026518821716309, "learning_rate": 1.144680656262066e-06, "loss": 0.434, "step": 25950 }, { "epoch": 85.08524590163934, "grad_norm": 8.820362091064453, "learning_rate": 1.1441873763134227e-06, "loss": 0.4068, "step": 25951 }, { "epoch": 85.08852459016393, "grad_norm": 5.5461835861206055, "learning_rate": 1.1436941962232729e-06, "loss": 0.4014, "step": 25952 }, { "epoch": 85.09180327868853, "grad_norm": 4.486217021942139, "learning_rate": 1.1432011159971778e-06, "loss": 0.2887, "step": 25953 }, { "epoch": 85.09508196721312, "grad_norm": 5.35001277923584, "learning_rate": 1.142708135640701e-06, "loss": 0.1963, "step": 25954 }, { "epoch": 85.09836065573771, "grad_norm": 6.041323661804199, "learning_rate": 1.1422152551593991e-06, "loss": 0.1358, "step": 25955 }, { "epoch": 85.1016393442623, "grad_norm": 5.854676723480225, "learning_rate": 1.1417224745588306e-06, "loss": 0.5576, "step": 25956 }, { "epoch": 85.10491803278688, "grad_norm": 6.6298418045043945, "learning_rate": 1.1412297938445505e-06, "loss": 0.2527, "step": 25957 }, { "epoch": 85.10819672131147, "grad_norm": 12.532767295837402, "learning_rate": 1.1407372130221138e-06, "loss": 0.3847, "step": 25958 }, { "epoch": 85.11147540983606, "grad_norm": 4.955127716064453, "learning_rate": 1.1402447320970788e-06, "loss": 0.4085, "step": 25959 }, { "epoch": 85.11475409836065, "grad_norm": 5.544297695159912, "learning_rate": 1.1397523510749952e-06, "loss": 0.3971, "step": 25960 }, { "epoch": 85.11803278688525, "grad_norm": 5.227926254272461, "learning_rate": 1.1392600699614175e-06, "loss": 0.3891, "step": 25961 }, { "epoch": 85.12131147540984, "grad_norm": 5.579463005065918, "learning_rate": 1.1387678887618926e-06, "loss": 0.2045, "step": 25962 }, { "epoch": 85.12459016393443, "grad_norm": 5.147097110748291, "learning_rate": 1.1382758074819744e-06, "loss": 0.2179, "step": 25963 }, { "epoch": 85.12786885245902, "grad_norm": 4.34424352645874, "learning_rate": 1.1377838261272111e-06, "loss": 0.4032, "step": 25964 }, { "epoch": 85.1311475409836, "grad_norm": 5.492557525634766, "learning_rate": 1.1372919447031505e-06, "loss": 0.4427, "step": 25965 }, { "epoch": 85.1344262295082, "grad_norm": 4.852896213531494, "learning_rate": 1.1368001632153348e-06, "loss": 0.378, "step": 25966 }, { "epoch": 85.13770491803278, "grad_norm": 6.34457540512085, "learning_rate": 1.1363084816693148e-06, "loss": 0.3529, "step": 25967 }, { "epoch": 85.14098360655737, "grad_norm": 113.60517120361328, "learning_rate": 1.1358169000706331e-06, "loss": 0.5074, "step": 25968 }, { "epoch": 85.14426229508197, "grad_norm": 4.9930644035339355, "learning_rate": 1.135325418424832e-06, "loss": 0.4541, "step": 25969 }, { "epoch": 85.14754098360656, "grad_norm": 6.805826663970947, "learning_rate": 1.1348340367374543e-06, "loss": 0.437, "step": 25970 }, { "epoch": 85.15081967213115, "grad_norm": 4.728977680206299, "learning_rate": 1.1343427550140373e-06, "loss": 0.4625, "step": 25971 }, { "epoch": 85.15409836065574, "grad_norm": 5.033139705657959, "learning_rate": 1.1338515732601262e-06, "loss": 0.6535, "step": 25972 }, { "epoch": 85.15737704918033, "grad_norm": 4.874757766723633, "learning_rate": 1.133360491481258e-06, "loss": 0.4555, "step": 25973 }, { "epoch": 85.16065573770491, "grad_norm": 6.512134075164795, "learning_rate": 1.1328695096829678e-06, "loss": 0.3657, "step": 25974 }, { "epoch": 85.1639344262295, "grad_norm": 6.13377571105957, "learning_rate": 1.1323786278707916e-06, "loss": 0.1835, "step": 25975 }, { "epoch": 85.1672131147541, "grad_norm": 7.097603797912598, "learning_rate": 1.1318878460502692e-06, "loss": 0.3595, "step": 25976 }, { "epoch": 85.1704918032787, "grad_norm": 5.851254463195801, "learning_rate": 1.1313971642269317e-06, "loss": 0.4704, "step": 25977 }, { "epoch": 85.17377049180328, "grad_norm": 6.284117221832275, "learning_rate": 1.1309065824063115e-06, "loss": 0.446, "step": 25978 }, { "epoch": 85.17704918032787, "grad_norm": 4.9774556159973145, "learning_rate": 1.1304161005939397e-06, "loss": 0.2923, "step": 25979 }, { "epoch": 85.18032786885246, "grad_norm": 4.840638160705566, "learning_rate": 1.1299257187953505e-06, "loss": 0.4492, "step": 25980 }, { "epoch": 85.18360655737705, "grad_norm": 5.566284656524658, "learning_rate": 1.129435437016071e-06, "loss": 0.4336, "step": 25981 }, { "epoch": 85.18688524590164, "grad_norm": 5.656106948852539, "learning_rate": 1.12894525526163e-06, "loss": 0.3461, "step": 25982 }, { "epoch": 85.19016393442622, "grad_norm": 4.149112224578857, "learning_rate": 1.1284551735375548e-06, "loss": 0.374, "step": 25983 }, { "epoch": 85.19344262295083, "grad_norm": 5.4714789390563965, "learning_rate": 1.1279651918493706e-06, "loss": 0.4528, "step": 25984 }, { "epoch": 85.19672131147541, "grad_norm": 5.624484062194824, "learning_rate": 1.1274753102026037e-06, "loss": 0.28, "step": 25985 }, { "epoch": 85.2, "grad_norm": 8.066879272460938, "learning_rate": 1.1269855286027798e-06, "loss": 0.3568, "step": 25986 }, { "epoch": 85.20327868852459, "grad_norm": 5.483297348022461, "learning_rate": 1.1264958470554178e-06, "loss": 0.3442, "step": 25987 }, { "epoch": 85.20655737704918, "grad_norm": 4.151820182800293, "learning_rate": 1.1260062655660408e-06, "loss": 0.2522, "step": 25988 }, { "epoch": 85.20983606557377, "grad_norm": 4.418091297149658, "learning_rate": 1.1255167841401704e-06, "loss": 0.3061, "step": 25989 }, { "epoch": 85.21311475409836, "grad_norm": 4.9782562255859375, "learning_rate": 1.1250274027833264e-06, "loss": 0.3272, "step": 25990 }, { "epoch": 85.21639344262294, "grad_norm": 5.430209159851074, "learning_rate": 1.1245381215010243e-06, "loss": 0.2923, "step": 25991 }, { "epoch": 85.21967213114755, "grad_norm": 4.8675923347473145, "learning_rate": 1.1240489402987841e-06, "loss": 0.4482, "step": 25992 }, { "epoch": 85.22295081967214, "grad_norm": 5.157901287078857, "learning_rate": 1.1235598591821217e-06, "loss": 0.3554, "step": 25993 }, { "epoch": 85.22622950819672, "grad_norm": 5.819310188293457, "learning_rate": 1.1230708781565481e-06, "loss": 0.3859, "step": 25994 }, { "epoch": 85.22950819672131, "grad_norm": 6.1295857429504395, "learning_rate": 1.122581997227583e-06, "loss": 0.4963, "step": 25995 }, { "epoch": 85.2327868852459, "grad_norm": 5.310833930969238, "learning_rate": 1.122093216400736e-06, "loss": 0.4835, "step": 25996 }, { "epoch": 85.23606557377049, "grad_norm": 5.22252082824707, "learning_rate": 1.1216045356815153e-06, "loss": 0.2066, "step": 25997 }, { "epoch": 85.23934426229508, "grad_norm": 7.203972339630127, "learning_rate": 1.121115955075438e-06, "loss": 0.4858, "step": 25998 }, { "epoch": 85.24262295081967, "grad_norm": 4.60439920425415, "learning_rate": 1.1206274745880097e-06, "loss": 0.4705, "step": 25999 }, { "epoch": 85.24590163934427, "grad_norm": 4.782002925872803, "learning_rate": 1.1201390942247392e-06, "loss": 0.2864, "step": 26000 }, { "epoch": 85.24918032786886, "grad_norm": 5.082082748413086, "learning_rate": 1.119650813991131e-06, "loss": 0.4264, "step": 26001 }, { "epoch": 85.25245901639344, "grad_norm": 5.408503532409668, "learning_rate": 1.1191626338926943e-06, "loss": 0.3469, "step": 26002 }, { "epoch": 85.25573770491803, "grad_norm": 3.9185280799865723, "learning_rate": 1.118674553934934e-06, "loss": 0.4523, "step": 26003 }, { "epoch": 85.25901639344262, "grad_norm": 3.780302047729492, "learning_rate": 1.118186574123351e-06, "loss": 0.2639, "step": 26004 }, { "epoch": 85.26229508196721, "grad_norm": 4.685503959655762, "learning_rate": 1.1176986944634505e-06, "loss": 0.2513, "step": 26005 }, { "epoch": 85.2655737704918, "grad_norm": 4.267528057098389, "learning_rate": 1.1172109149607292e-06, "loss": 0.498, "step": 26006 }, { "epoch": 85.26885245901639, "grad_norm": 5.054806232452393, "learning_rate": 1.1167232356206936e-06, "loss": 0.41, "step": 26007 }, { "epoch": 85.27213114754099, "grad_norm": 6.112119674682617, "learning_rate": 1.1162356564488398e-06, "loss": 0.5827, "step": 26008 }, { "epoch": 85.27540983606558, "grad_norm": 5.554141044616699, "learning_rate": 1.115748177450665e-06, "loss": 0.2837, "step": 26009 }, { "epoch": 85.27868852459017, "grad_norm": 4.505380630493164, "learning_rate": 1.1152607986316655e-06, "loss": 0.4746, "step": 26010 }, { "epoch": 85.28196721311475, "grad_norm": 3.1234583854675293, "learning_rate": 1.1147735199973397e-06, "loss": 0.1704, "step": 26011 }, { "epoch": 85.28524590163934, "grad_norm": 5.284109115600586, "learning_rate": 1.1142863415531813e-06, "loss": 0.1694, "step": 26012 }, { "epoch": 85.28852459016393, "grad_norm": 5.760908603668213, "learning_rate": 1.1137992633046835e-06, "loss": 0.3266, "step": 26013 }, { "epoch": 85.29180327868852, "grad_norm": 6.329810619354248, "learning_rate": 1.1133122852573352e-06, "loss": 0.4369, "step": 26014 }, { "epoch": 85.29508196721312, "grad_norm": 3.6520302295684814, "learning_rate": 1.1128254074166334e-06, "loss": 0.3474, "step": 26015 }, { "epoch": 85.29836065573771, "grad_norm": 4.398014068603516, "learning_rate": 1.1123386297880657e-06, "loss": 0.2605, "step": 26016 }, { "epoch": 85.3016393442623, "grad_norm": 4.863844871520996, "learning_rate": 1.111851952377121e-06, "loss": 0.4844, "step": 26017 }, { "epoch": 85.30491803278689, "grad_norm": 13.712925910949707, "learning_rate": 1.1113653751892862e-06, "loss": 0.4003, "step": 26018 }, { "epoch": 85.30819672131148, "grad_norm": 4.801918029785156, "learning_rate": 1.1108788982300467e-06, "loss": 0.5265, "step": 26019 }, { "epoch": 85.31147540983606, "grad_norm": 4.9843974113464355, "learning_rate": 1.1103925215048927e-06, "loss": 0.4153, "step": 26020 }, { "epoch": 85.31475409836065, "grad_norm": 12.199984550476074, "learning_rate": 1.1099062450193054e-06, "loss": 0.258, "step": 26021 }, { "epoch": 85.31803278688524, "grad_norm": 7.525475978851318, "learning_rate": 1.109420068778768e-06, "loss": 0.5814, "step": 26022 }, { "epoch": 85.32131147540984, "grad_norm": 4.197147846221924, "learning_rate": 1.108933992788762e-06, "loss": 0.3517, "step": 26023 }, { "epoch": 85.32459016393443, "grad_norm": 5.523581504821777, "learning_rate": 1.1084480170547718e-06, "loss": 0.2502, "step": 26024 }, { "epoch": 85.32786885245902, "grad_norm": 5.368575096130371, "learning_rate": 1.107962141582275e-06, "loss": 0.4374, "step": 26025 }, { "epoch": 85.33114754098361, "grad_norm": 4.194260120391846, "learning_rate": 1.1074763663767497e-06, "loss": 0.3118, "step": 26026 }, { "epoch": 85.3344262295082, "grad_norm": 6.208241939544678, "learning_rate": 1.1069906914436735e-06, "loss": 0.5011, "step": 26027 }, { "epoch": 85.33770491803278, "grad_norm": 6.730416297912598, "learning_rate": 1.1065051167885244e-06, "loss": 0.4354, "step": 26028 }, { "epoch": 85.34098360655737, "grad_norm": 5.3957624435424805, "learning_rate": 1.1060196424167779e-06, "loss": 0.2923, "step": 26029 }, { "epoch": 85.34426229508196, "grad_norm": 4.923326015472412, "learning_rate": 1.1055342683339066e-06, "loss": 0.417, "step": 26030 }, { "epoch": 85.34754098360656, "grad_norm": 5.285682201385498, "learning_rate": 1.1050489945453847e-06, "loss": 0.4354, "step": 26031 }, { "epoch": 85.35081967213115, "grad_norm": 5.671515941619873, "learning_rate": 1.1045638210566823e-06, "loss": 0.5173, "step": 26032 }, { "epoch": 85.35409836065574, "grad_norm": 4.095366954803467, "learning_rate": 1.1040787478732728e-06, "loss": 0.3168, "step": 26033 }, { "epoch": 85.35737704918033, "grad_norm": 4.951163291931152, "learning_rate": 1.1035937750006254e-06, "loss": 0.5249, "step": 26034 }, { "epoch": 85.36065573770492, "grad_norm": 6.340353488922119, "learning_rate": 1.1031089024442088e-06, "loss": 0.4544, "step": 26035 }, { "epoch": 85.3639344262295, "grad_norm": 5.766030311584473, "learning_rate": 1.1026241302094864e-06, "loss": 0.2801, "step": 26036 }, { "epoch": 85.3672131147541, "grad_norm": 6.256205081939697, "learning_rate": 1.1021394583019306e-06, "loss": 0.4685, "step": 26037 }, { "epoch": 85.37049180327868, "grad_norm": 4.0463995933532715, "learning_rate": 1.1016548867270037e-06, "loss": 0.3616, "step": 26038 }, { "epoch": 85.37377049180328, "grad_norm": 4.864731788635254, "learning_rate": 1.1011704154901704e-06, "loss": 0.3277, "step": 26039 }, { "epoch": 85.37704918032787, "grad_norm": 5.198459148406982, "learning_rate": 1.1006860445968902e-06, "loss": 0.5954, "step": 26040 }, { "epoch": 85.38032786885246, "grad_norm": 5.197267055511475, "learning_rate": 1.1002017740526305e-06, "loss": 0.3537, "step": 26041 }, { "epoch": 85.38360655737705, "grad_norm": 4.782561779022217, "learning_rate": 1.0997176038628498e-06, "loss": 0.4452, "step": 26042 }, { "epoch": 85.38688524590164, "grad_norm": 4.199326992034912, "learning_rate": 1.0992335340330062e-06, "loss": 0.3535, "step": 26043 }, { "epoch": 85.39016393442623, "grad_norm": 5.039383888244629, "learning_rate": 1.0987495645685575e-06, "loss": 0.4316, "step": 26044 }, { "epoch": 85.39344262295081, "grad_norm": 4.433802604675293, "learning_rate": 1.0982656954749637e-06, "loss": 0.7171, "step": 26045 }, { "epoch": 85.3967213114754, "grad_norm": 4.884158134460449, "learning_rate": 1.0977819267576807e-06, "loss": 0.3886, "step": 26046 }, { "epoch": 85.4, "grad_norm": 3.9373321533203125, "learning_rate": 1.0972982584221592e-06, "loss": 0.1515, "step": 26047 }, { "epoch": 85.4032786885246, "grad_norm": 4.965339660644531, "learning_rate": 1.0968146904738596e-06, "loss": 0.1941, "step": 26048 }, { "epoch": 85.40655737704918, "grad_norm": 5.833566188812256, "learning_rate": 1.096331222918231e-06, "loss": 0.5235, "step": 26049 }, { "epoch": 85.40983606557377, "grad_norm": 3.899517059326172, "learning_rate": 1.0958478557607222e-06, "loss": 0.3074, "step": 26050 }, { "epoch": 85.41311475409836, "grad_norm": 5.287352085113525, "learning_rate": 1.09536458900679e-06, "loss": 0.5264, "step": 26051 }, { "epoch": 85.41639344262295, "grad_norm": 4.102189064025879, "learning_rate": 1.094881422661881e-06, "loss": 0.2436, "step": 26052 }, { "epoch": 85.41967213114754, "grad_norm": 5.8237810134887695, "learning_rate": 1.0943983567314399e-06, "loss": 0.4234, "step": 26053 }, { "epoch": 85.42295081967212, "grad_norm": 5.335268020629883, "learning_rate": 1.0939153912209187e-06, "loss": 0.4996, "step": 26054 }, { "epoch": 85.42622950819673, "grad_norm": 5.817290306091309, "learning_rate": 1.0934325261357625e-06, "loss": 0.4137, "step": 26055 }, { "epoch": 85.42950819672132, "grad_norm": 5.496476650238037, "learning_rate": 1.0929497614814145e-06, "loss": 0.642, "step": 26056 }, { "epoch": 85.4327868852459, "grad_norm": 5.25567626953125, "learning_rate": 1.092467097263319e-06, "loss": 0.2843, "step": 26057 }, { "epoch": 85.43606557377049, "grad_norm": 5.661688804626465, "learning_rate": 1.091984533486916e-06, "loss": 0.2122, "step": 26058 }, { "epoch": 85.43934426229508, "grad_norm": 21.505178451538086, "learning_rate": 1.0915020701576529e-06, "loss": 0.3788, "step": 26059 }, { "epoch": 85.44262295081967, "grad_norm": 5.47282600402832, "learning_rate": 1.0910197072809647e-06, "loss": 0.4151, "step": 26060 }, { "epoch": 85.44590163934426, "grad_norm": 4.331573009490967, "learning_rate": 1.090537444862293e-06, "loss": 0.3726, "step": 26061 }, { "epoch": 85.44918032786886, "grad_norm": 4.228154182434082, "learning_rate": 1.0900552829070731e-06, "loss": 0.538, "step": 26062 }, { "epoch": 85.45245901639345, "grad_norm": 11.3573637008667, "learning_rate": 1.0895732214207465e-06, "loss": 0.3743, "step": 26063 }, { "epoch": 85.45573770491804, "grad_norm": 4.274728298187256, "learning_rate": 1.0890912604087456e-06, "loss": 0.3079, "step": 26064 }, { "epoch": 85.45901639344262, "grad_norm": 4.092423915863037, "learning_rate": 1.0886093998765069e-06, "loss": 0.5145, "step": 26065 }, { "epoch": 85.46229508196721, "grad_norm": 4.669205665588379, "learning_rate": 1.0881276398294593e-06, "loss": 0.3285, "step": 26066 }, { "epoch": 85.4655737704918, "grad_norm": 5.53082799911499, "learning_rate": 1.087645980273041e-06, "loss": 0.4458, "step": 26067 }, { "epoch": 85.46885245901639, "grad_norm": 4.565452575683594, "learning_rate": 1.0871644212126808e-06, "loss": 0.2317, "step": 26068 }, { "epoch": 85.47213114754098, "grad_norm": 4.544345855712891, "learning_rate": 1.086682962653809e-06, "loss": 0.4376, "step": 26069 }, { "epoch": 85.47540983606558, "grad_norm": 4.1100358963012695, "learning_rate": 1.0862016046018541e-06, "loss": 0.1966, "step": 26070 }, { "epoch": 85.47868852459017, "grad_norm": 4.171047210693359, "learning_rate": 1.0857203470622424e-06, "loss": 0.3032, "step": 26071 }, { "epoch": 85.48196721311476, "grad_norm": 4.3379716873168945, "learning_rate": 1.0852391900404046e-06, "loss": 0.311, "step": 26072 }, { "epoch": 85.48524590163935, "grad_norm": 6.507368564605713, "learning_rate": 1.0847581335417634e-06, "loss": 0.2628, "step": 26073 }, { "epoch": 85.48852459016393, "grad_norm": 5.522449970245361, "learning_rate": 1.0842771775717443e-06, "loss": 0.2117, "step": 26074 }, { "epoch": 85.49180327868852, "grad_norm": 5.371657848358154, "learning_rate": 1.0837963221357672e-06, "loss": 0.4479, "step": 26075 }, { "epoch": 85.49508196721311, "grad_norm": 13.577816009521484, "learning_rate": 1.0833155672392592e-06, "loss": 0.3332, "step": 26076 }, { "epoch": 85.4983606557377, "grad_norm": 4.975970268249512, "learning_rate": 1.0828349128876404e-06, "loss": 0.3094, "step": 26077 }, { "epoch": 85.5016393442623, "grad_norm": 5.31635856628418, "learning_rate": 1.0823543590863283e-06, "loss": 0.6597, "step": 26078 }, { "epoch": 85.50491803278689, "grad_norm": 4.412354946136475, "learning_rate": 1.0818739058407413e-06, "loss": 0.463, "step": 26079 }, { "epoch": 85.50819672131148, "grad_norm": 5.538721561431885, "learning_rate": 1.0813935531563002e-06, "loss": 0.5379, "step": 26080 }, { "epoch": 85.51147540983607, "grad_norm": 3.898191213607788, "learning_rate": 1.08091330103842e-06, "loss": 0.2478, "step": 26081 }, { "epoch": 85.51475409836065, "grad_norm": 6.164646148681641, "learning_rate": 1.0804331494925157e-06, "loss": 0.2977, "step": 26082 }, { "epoch": 85.51803278688524, "grad_norm": 6.200227737426758, "learning_rate": 1.0799530985240025e-06, "loss": 0.3687, "step": 26083 }, { "epoch": 85.52131147540983, "grad_norm": 5.8217453956604, "learning_rate": 1.0794731481382902e-06, "loss": 0.4065, "step": 26084 }, { "epoch": 85.52459016393442, "grad_norm": 5.494593620300293, "learning_rate": 1.0789932983407946e-06, "loss": 0.2924, "step": 26085 }, { "epoch": 85.52786885245902, "grad_norm": 4.835808277130127, "learning_rate": 1.0785135491369259e-06, "loss": 0.44, "step": 26086 }, { "epoch": 85.53114754098361, "grad_norm": 5.17691707611084, "learning_rate": 1.0780339005320917e-06, "loss": 0.4082, "step": 26087 }, { "epoch": 85.5344262295082, "grad_norm": 4.911744594573975, "learning_rate": 1.077554352531701e-06, "loss": 0.4412, "step": 26088 }, { "epoch": 85.53770491803279, "grad_norm": 9.056374549865723, "learning_rate": 1.0770749051411633e-06, "loss": 0.5258, "step": 26089 }, { "epoch": 85.54098360655738, "grad_norm": 6.2166829109191895, "learning_rate": 1.076595558365884e-06, "loss": 0.3963, "step": 26090 }, { "epoch": 85.54426229508196, "grad_norm": 7.08190393447876, "learning_rate": 1.0761163122112672e-06, "loss": 0.4059, "step": 26091 }, { "epoch": 85.54754098360655, "grad_norm": 4.813568592071533, "learning_rate": 1.075637166682717e-06, "loss": 0.3088, "step": 26092 }, { "epoch": 85.55081967213114, "grad_norm": 5.1482625007629395, "learning_rate": 1.0751581217856378e-06, "loss": 0.4538, "step": 26093 }, { "epoch": 85.55409836065574, "grad_norm": 3.5422415733337402, "learning_rate": 1.0746791775254296e-06, "loss": 0.0878, "step": 26094 }, { "epoch": 85.55737704918033, "grad_norm": 5.511166572570801, "learning_rate": 1.0742003339074946e-06, "loss": 0.3544, "step": 26095 }, { "epoch": 85.56065573770492, "grad_norm": 4.524912357330322, "learning_rate": 1.073721590937229e-06, "loss": 0.4232, "step": 26096 }, { "epoch": 85.56393442622951, "grad_norm": 5.3491010665893555, "learning_rate": 1.0732429486200346e-06, "loss": 0.4414, "step": 26097 }, { "epoch": 85.5672131147541, "grad_norm": 5.703125476837158, "learning_rate": 1.0727644069613085e-06, "loss": 0.2969, "step": 26098 }, { "epoch": 85.57049180327868, "grad_norm": 4.625288963317871, "learning_rate": 1.0722859659664442e-06, "loss": 0.246, "step": 26099 }, { "epoch": 85.57377049180327, "grad_norm": 4.641609191894531, "learning_rate": 1.0718076256408394e-06, "loss": 0.2206, "step": 26100 }, { "epoch": 85.57704918032788, "grad_norm": 5.126869201660156, "learning_rate": 1.0713293859898865e-06, "loss": 0.3471, "step": 26101 }, { "epoch": 85.58032786885246, "grad_norm": 6.279023170471191, "learning_rate": 1.070851247018977e-06, "loss": 0.2931, "step": 26102 }, { "epoch": 85.58360655737705, "grad_norm": 4.750313758850098, "learning_rate": 1.070373208733505e-06, "loss": 0.3094, "step": 26103 }, { "epoch": 85.58688524590164, "grad_norm": 3.5444815158843994, "learning_rate": 1.0698952711388588e-06, "loss": 0.4412, "step": 26104 }, { "epoch": 85.59016393442623, "grad_norm": 5.711530685424805, "learning_rate": 1.0694174342404295e-06, "loss": 0.338, "step": 26105 }, { "epoch": 85.59344262295082, "grad_norm": 9.185612678527832, "learning_rate": 1.0689396980436017e-06, "loss": 0.5421, "step": 26106 }, { "epoch": 85.5967213114754, "grad_norm": 6.180932521820068, "learning_rate": 1.0684620625537677e-06, "loss": 0.7303, "step": 26107 }, { "epoch": 85.6, "grad_norm": 5.209171295166016, "learning_rate": 1.067984527776309e-06, "loss": 0.283, "step": 26108 }, { "epoch": 85.6032786885246, "grad_norm": 8.953025817871094, "learning_rate": 1.0675070937166131e-06, "loss": 0.2265, "step": 26109 }, { "epoch": 85.60655737704919, "grad_norm": 5.7257080078125, "learning_rate": 1.0670297603800595e-06, "loss": 0.5224, "step": 26110 }, { "epoch": 85.60983606557377, "grad_norm": 4.831215858459473, "learning_rate": 1.0665525277720345e-06, "loss": 0.4533, "step": 26111 }, { "epoch": 85.61311475409836, "grad_norm": 8.029955863952637, "learning_rate": 1.0660753958979198e-06, "loss": 0.659, "step": 26112 }, { "epoch": 85.61639344262295, "grad_norm": 4.552441120147705, "learning_rate": 1.065598364763093e-06, "loss": 0.1901, "step": 26113 }, { "epoch": 85.61967213114754, "grad_norm": 4.48699951171875, "learning_rate": 1.065121434372932e-06, "loss": 0.4856, "step": 26114 }, { "epoch": 85.62295081967213, "grad_norm": 11.435239791870117, "learning_rate": 1.0646446047328186e-06, "loss": 0.312, "step": 26115 }, { "epoch": 85.62622950819672, "grad_norm": 4.659153461456299, "learning_rate": 1.064167875848129e-06, "loss": 0.3013, "step": 26116 }, { "epoch": 85.62950819672132, "grad_norm": 4.135838031768799, "learning_rate": 1.0636912477242367e-06, "loss": 0.3645, "step": 26117 }, { "epoch": 85.6327868852459, "grad_norm": 5.164205074310303, "learning_rate": 1.0632147203665144e-06, "loss": 0.2047, "step": 26118 }, { "epoch": 85.6360655737705, "grad_norm": 4.791226387023926, "learning_rate": 1.062738293780341e-06, "loss": 0.3491, "step": 26119 }, { "epoch": 85.63934426229508, "grad_norm": 3.7421460151672363, "learning_rate": 1.0622619679710856e-06, "loss": 0.2643, "step": 26120 }, { "epoch": 85.64262295081967, "grad_norm": 4.724806785583496, "learning_rate": 1.0617857429441191e-06, "loss": 0.3541, "step": 26121 }, { "epoch": 85.64590163934426, "grad_norm": 4.922976493835449, "learning_rate": 1.061309618704811e-06, "loss": 0.2167, "step": 26122 }, { "epoch": 85.64918032786885, "grad_norm": 5.058835983276367, "learning_rate": 1.0608335952585302e-06, "loss": 0.5299, "step": 26123 }, { "epoch": 85.65245901639344, "grad_norm": 5.046523094177246, "learning_rate": 1.0603576726106468e-06, "loss": 0.4055, "step": 26124 }, { "epoch": 85.65573770491804, "grad_norm": 3.7723617553710938, "learning_rate": 1.0598818507665255e-06, "loss": 0.3285, "step": 26125 }, { "epoch": 85.65901639344263, "grad_norm": 4.351907253265381, "learning_rate": 1.0594061297315316e-06, "loss": 0.1548, "step": 26126 }, { "epoch": 85.66229508196722, "grad_norm": 5.89356803894043, "learning_rate": 1.058930509511027e-06, "loss": 0.317, "step": 26127 }, { "epoch": 85.6655737704918, "grad_norm": 4.8748087882995605, "learning_rate": 1.058454990110379e-06, "loss": 0.3375, "step": 26128 }, { "epoch": 85.66885245901639, "grad_norm": 4.928168773651123, "learning_rate": 1.0579795715349494e-06, "loss": 0.2681, "step": 26129 }, { "epoch": 85.67213114754098, "grad_norm": 4.42640495300293, "learning_rate": 1.057504253790096e-06, "loss": 0.3251, "step": 26130 }, { "epoch": 85.67540983606557, "grad_norm": 4.937267303466797, "learning_rate": 1.0570290368811786e-06, "loss": 0.3898, "step": 26131 }, { "epoch": 85.67868852459016, "grad_norm": 5.686552047729492, "learning_rate": 1.05655392081356e-06, "loss": 0.4203, "step": 26132 }, { "epoch": 85.68196721311476, "grad_norm": 4.1343994140625, "learning_rate": 1.0560789055925935e-06, "loss": 0.2904, "step": 26133 }, { "epoch": 85.68524590163935, "grad_norm": 5.342308044433594, "learning_rate": 1.0556039912236371e-06, "loss": 0.4862, "step": 26134 }, { "epoch": 85.68852459016394, "grad_norm": 5.179203510284424, "learning_rate": 1.0551291777120465e-06, "loss": 0.2921, "step": 26135 }, { "epoch": 85.69180327868852, "grad_norm": 5.919584274291992, "learning_rate": 1.0546544650631719e-06, "loss": 0.4291, "step": 26136 }, { "epoch": 85.69508196721311, "grad_norm": 4.454458713531494, "learning_rate": 1.054179853282371e-06, "loss": 0.405, "step": 26137 }, { "epoch": 85.6983606557377, "grad_norm": 4.679348945617676, "learning_rate": 1.0537053423749932e-06, "loss": 0.3894, "step": 26138 }, { "epoch": 85.70163934426229, "grad_norm": 5.538600921630859, "learning_rate": 1.0532309323463896e-06, "loss": 0.4823, "step": 26139 }, { "epoch": 85.70491803278688, "grad_norm": 5.768784046173096, "learning_rate": 1.0527566232019083e-06, "loss": 0.2637, "step": 26140 }, { "epoch": 85.70819672131148, "grad_norm": 5.971961498260498, "learning_rate": 1.0522824149469003e-06, "loss": 0.3807, "step": 26141 }, { "epoch": 85.71147540983607, "grad_norm": 7.010079383850098, "learning_rate": 1.0518083075867113e-06, "loss": 0.3109, "step": 26142 }, { "epoch": 85.71475409836066, "grad_norm": 5.29196834564209, "learning_rate": 1.0513343011266873e-06, "loss": 0.3792, "step": 26143 }, { "epoch": 85.71803278688525, "grad_norm": 4.79551887512207, "learning_rate": 1.0508603955721718e-06, "loss": 0.4475, "step": 26144 }, { "epoch": 85.72131147540983, "grad_norm": 3.551974296569824, "learning_rate": 1.0503865909285116e-06, "loss": 0.3268, "step": 26145 }, { "epoch": 85.72459016393442, "grad_norm": 5.713612079620361, "learning_rate": 1.049912887201049e-06, "loss": 0.2492, "step": 26146 }, { "epoch": 85.72786885245901, "grad_norm": 3.5985822677612305, "learning_rate": 1.049439284395123e-06, "loss": 0.2146, "step": 26147 }, { "epoch": 85.73114754098361, "grad_norm": 4.571593761444092, "learning_rate": 1.048965782516076e-06, "loss": 0.2859, "step": 26148 }, { "epoch": 85.7344262295082, "grad_norm": 4.45146369934082, "learning_rate": 1.0484923815692449e-06, "loss": 0.3462, "step": 26149 }, { "epoch": 85.73770491803279, "grad_norm": 4.802957534790039, "learning_rate": 1.04801908155997e-06, "loss": 0.45, "step": 26150 }, { "epoch": 85.74098360655738, "grad_norm": 4.763085842132568, "learning_rate": 1.047545882493589e-06, "loss": 0.4232, "step": 26151 }, { "epoch": 85.74426229508197, "grad_norm": 3.7720727920532227, "learning_rate": 1.0470727843754336e-06, "loss": 0.2542, "step": 26152 }, { "epoch": 85.74754098360656, "grad_norm": 4.06545877456665, "learning_rate": 1.0465997872108447e-06, "loss": 0.2645, "step": 26153 }, { "epoch": 85.75081967213114, "grad_norm": 3.736016273498535, "learning_rate": 1.0461268910051515e-06, "loss": 0.228, "step": 26154 }, { "epoch": 85.75409836065573, "grad_norm": 5.034539699554443, "learning_rate": 1.045654095763684e-06, "loss": 0.4817, "step": 26155 }, { "epoch": 85.75737704918033, "grad_norm": 4.494002819061279, "learning_rate": 1.0451814014917805e-06, "loss": 0.4844, "step": 26156 }, { "epoch": 85.76065573770492, "grad_norm": 4.181623458862305, "learning_rate": 1.0447088081947664e-06, "loss": 0.3022, "step": 26157 }, { "epoch": 85.76393442622951, "grad_norm": 11.701102256774902, "learning_rate": 1.04423631587797e-06, "loss": 0.3579, "step": 26158 }, { "epoch": 85.7672131147541, "grad_norm": 4.821500301361084, "learning_rate": 1.0437639245467234e-06, "loss": 0.401, "step": 26159 }, { "epoch": 85.77049180327869, "grad_norm": 6.969939708709717, "learning_rate": 1.0432916342063503e-06, "loss": 0.4266, "step": 26160 }, { "epoch": 85.77377049180328, "grad_norm": 11.225218772888184, "learning_rate": 1.0428194448621764e-06, "loss": 0.3294, "step": 26161 }, { "epoch": 85.77704918032786, "grad_norm": 5.877706527709961, "learning_rate": 1.042347356519524e-06, "loss": 0.2585, "step": 26162 }, { "epoch": 85.78032786885245, "grad_norm": 4.57657527923584, "learning_rate": 1.0418753691837213e-06, "loss": 0.2366, "step": 26163 }, { "epoch": 85.78360655737706, "grad_norm": 7.119013786315918, "learning_rate": 1.0414034828600883e-06, "loss": 0.2961, "step": 26164 }, { "epoch": 85.78688524590164, "grad_norm": 3.6538257598876953, "learning_rate": 1.040931697553945e-06, "loss": 0.2606, "step": 26165 }, { "epoch": 85.79016393442623, "grad_norm": 5.735857963562012, "learning_rate": 1.04046001327061e-06, "loss": 0.3901, "step": 26166 }, { "epoch": 85.79344262295082, "grad_norm": 4.32105827331543, "learning_rate": 1.0399884300154062e-06, "loss": 0.6305, "step": 26167 }, { "epoch": 85.79672131147541, "grad_norm": 4.565007209777832, "learning_rate": 1.0395169477936495e-06, "loss": 0.4305, "step": 26168 }, { "epoch": 85.8, "grad_norm": 23.791351318359375, "learning_rate": 1.0390455666106547e-06, "loss": 0.4869, "step": 26169 }, { "epoch": 85.80327868852459, "grad_norm": 4.639367580413818, "learning_rate": 1.0385742864717364e-06, "loss": 0.4827, "step": 26170 }, { "epoch": 85.80655737704917, "grad_norm": 4.642697334289551, "learning_rate": 1.0381031073822135e-06, "loss": 0.4308, "step": 26171 }, { "epoch": 85.80983606557378, "grad_norm": 4.792637348175049, "learning_rate": 1.0376320293473952e-06, "loss": 0.2603, "step": 26172 }, { "epoch": 85.81311475409836, "grad_norm": 10.274195671081543, "learning_rate": 1.0371610523725939e-06, "loss": 0.4976, "step": 26173 }, { "epoch": 85.81639344262295, "grad_norm": 4.419618606567383, "learning_rate": 1.0366901764631221e-06, "loss": 0.3313, "step": 26174 }, { "epoch": 85.81967213114754, "grad_norm": 5.325417518615723, "learning_rate": 1.0362194016242843e-06, "loss": 0.3509, "step": 26175 }, { "epoch": 85.82295081967213, "grad_norm": 4.52438497543335, "learning_rate": 1.0357487278613964e-06, "loss": 0.3847, "step": 26176 }, { "epoch": 85.82622950819672, "grad_norm": 7.358208179473877, "learning_rate": 1.0352781551797608e-06, "loss": 0.2308, "step": 26177 }, { "epoch": 85.8295081967213, "grad_norm": 4.522105693817139, "learning_rate": 1.0348076835846866e-06, "loss": 0.2115, "step": 26178 }, { "epoch": 85.8327868852459, "grad_norm": 3.7478482723236084, "learning_rate": 1.0343373130814737e-06, "loss": 0.2354, "step": 26179 }, { "epoch": 85.8360655737705, "grad_norm": 4.319435119628906, "learning_rate": 1.0338670436754316e-06, "loss": 0.244, "step": 26180 }, { "epoch": 85.83934426229509, "grad_norm": 4.848761558532715, "learning_rate": 1.0333968753718616e-06, "loss": 0.5897, "step": 26181 }, { "epoch": 85.84262295081967, "grad_norm": 7.695873260498047, "learning_rate": 1.0329268081760646e-06, "loss": 0.3541, "step": 26182 }, { "epoch": 85.84590163934426, "grad_norm": 5.801889419555664, "learning_rate": 1.03245684209334e-06, "loss": 0.3119, "step": 26183 }, { "epoch": 85.84918032786885, "grad_norm": 6.527378082275391, "learning_rate": 1.0319869771289893e-06, "loss": 0.289, "step": 26184 }, { "epoch": 85.85245901639344, "grad_norm": 6.214911937713623, "learning_rate": 1.03151721328831e-06, "loss": 0.4396, "step": 26185 }, { "epoch": 85.85573770491803, "grad_norm": 6.213937759399414, "learning_rate": 1.0310475505765993e-06, "loss": 0.3159, "step": 26186 }, { "epoch": 85.85901639344263, "grad_norm": 7.3441267013549805, "learning_rate": 1.030577988999153e-06, "loss": 0.409, "step": 26187 }, { "epoch": 85.86229508196722, "grad_norm": 4.595002174377441, "learning_rate": 1.0301085285612632e-06, "loss": 0.3551, "step": 26188 }, { "epoch": 85.8655737704918, "grad_norm": 4.461104393005371, "learning_rate": 1.0296391692682284e-06, "loss": 0.3049, "step": 26189 }, { "epoch": 85.8688524590164, "grad_norm": 8.904492378234863, "learning_rate": 1.0291699111253395e-06, "loss": 0.3542, "step": 26190 }, { "epoch": 85.87213114754098, "grad_norm": 6.8409833908081055, "learning_rate": 1.0287007541378857e-06, "loss": 0.4236, "step": 26191 }, { "epoch": 85.87540983606557, "grad_norm": 4.78737211227417, "learning_rate": 1.0282316983111584e-06, "loss": 0.6053, "step": 26192 }, { "epoch": 85.87868852459016, "grad_norm": 6.974626064300537, "learning_rate": 1.0277627436504478e-06, "loss": 0.3594, "step": 26193 }, { "epoch": 85.88196721311475, "grad_norm": 5.4585795402526855, "learning_rate": 1.0272938901610408e-06, "loss": 0.3368, "step": 26194 }, { "epoch": 85.88524590163935, "grad_norm": 5.838704586029053, "learning_rate": 1.0268251378482252e-06, "loss": 0.5034, "step": 26195 }, { "epoch": 85.88852459016394, "grad_norm": 4.7218780517578125, "learning_rate": 1.0263564867172838e-06, "loss": 0.5596, "step": 26196 }, { "epoch": 85.89180327868853, "grad_norm": 5.484736919403076, "learning_rate": 1.0258879367735053e-06, "loss": 0.204, "step": 26197 }, { "epoch": 85.89508196721312, "grad_norm": 5.6158833503723145, "learning_rate": 1.0254194880221712e-06, "loss": 0.4257, "step": 26198 }, { "epoch": 85.8983606557377, "grad_norm": 4.756710052490234, "learning_rate": 1.024951140468563e-06, "loss": 0.3729, "step": 26199 }, { "epoch": 85.90163934426229, "grad_norm": 5.6870598793029785, "learning_rate": 1.024482894117963e-06, "loss": 0.424, "step": 26200 }, { "epoch": 85.90491803278688, "grad_norm": 5.422687530517578, "learning_rate": 1.0240147489756481e-06, "loss": 0.3534, "step": 26201 }, { "epoch": 85.90819672131147, "grad_norm": 5.7624192237854, "learning_rate": 1.0235467050469016e-06, "loss": 0.4896, "step": 26202 }, { "epoch": 85.91147540983607, "grad_norm": 4.3602423667907715, "learning_rate": 1.0230787623369997e-06, "loss": 0.3176, "step": 26203 }, { "epoch": 85.91475409836066, "grad_norm": 5.181311130523682, "learning_rate": 1.0226109208512158e-06, "loss": 0.5445, "step": 26204 }, { "epoch": 85.91803278688525, "grad_norm": 7.19341516494751, "learning_rate": 1.02214318059483e-06, "loss": 0.3982, "step": 26205 }, { "epoch": 85.92131147540984, "grad_norm": 4.55354642868042, "learning_rate": 1.021675541573115e-06, "loss": 0.4418, "step": 26206 }, { "epoch": 85.92459016393443, "grad_norm": 4.780168056488037, "learning_rate": 1.021208003791343e-06, "loss": 0.1289, "step": 26207 }, { "epoch": 85.92786885245901, "grad_norm": 5.129991054534912, "learning_rate": 1.0207405672547842e-06, "loss": 0.4401, "step": 26208 }, { "epoch": 85.9311475409836, "grad_norm": 6.301767826080322, "learning_rate": 1.0202732319687147e-06, "loss": 0.581, "step": 26209 }, { "epoch": 85.93442622950819, "grad_norm": 5.697660446166992, "learning_rate": 1.0198059979384e-06, "loss": 0.6395, "step": 26210 }, { "epoch": 85.9377049180328, "grad_norm": 4.481351375579834, "learning_rate": 1.0193388651691082e-06, "loss": 0.5208, "step": 26211 }, { "epoch": 85.94098360655738, "grad_norm": 5.427488803863525, "learning_rate": 1.0188718336661096e-06, "loss": 0.3704, "step": 26212 }, { "epoch": 85.94426229508197, "grad_norm": 6.3692708015441895, "learning_rate": 1.01840490343467e-06, "loss": 0.4401, "step": 26213 }, { "epoch": 85.94754098360656, "grad_norm": 6.3454179763793945, "learning_rate": 1.0179380744800505e-06, "loss": 0.2449, "step": 26214 }, { "epoch": 85.95081967213115, "grad_norm": 5.672575950622559, "learning_rate": 1.0174713468075214e-06, "loss": 0.2227, "step": 26215 }, { "epoch": 85.95409836065573, "grad_norm": 4.169752597808838, "learning_rate": 1.017004720422341e-06, "loss": 0.326, "step": 26216 }, { "epoch": 85.95737704918032, "grad_norm": 8.961065292358398, "learning_rate": 1.0165381953297737e-06, "loss": 0.6219, "step": 26217 }, { "epoch": 85.96065573770491, "grad_norm": 4.939879417419434, "learning_rate": 1.0160717715350755e-06, "loss": 0.1704, "step": 26218 }, { "epoch": 85.96393442622951, "grad_norm": 5.88884973526001, "learning_rate": 1.015605449043512e-06, "loss": 0.4306, "step": 26219 }, { "epoch": 85.9672131147541, "grad_norm": 3.8487656116485596, "learning_rate": 1.0151392278603378e-06, "loss": 0.3254, "step": 26220 }, { "epoch": 85.97049180327869, "grad_norm": 4.6692938804626465, "learning_rate": 1.0146731079908112e-06, "loss": 0.4027, "step": 26221 }, { "epoch": 85.97377049180328, "grad_norm": 4.569010257720947, "learning_rate": 1.0142070894401856e-06, "loss": 0.3264, "step": 26222 }, { "epoch": 85.97704918032787, "grad_norm": 5.480956077575684, "learning_rate": 1.0137411722137202e-06, "loss": 0.2911, "step": 26223 }, { "epoch": 85.98032786885246, "grad_norm": 3.975266695022583, "learning_rate": 1.0132753563166663e-06, "loss": 0.3101, "step": 26224 }, { "epoch": 85.98360655737704, "grad_norm": 5.484348297119141, "learning_rate": 1.0128096417542765e-06, "loss": 0.2804, "step": 26225 }, { "epoch": 85.98688524590163, "grad_norm": 4.0924577713012695, "learning_rate": 1.012344028531802e-06, "loss": 0.4394, "step": 26226 }, { "epoch": 85.99016393442623, "grad_norm": 4.661812782287598, "learning_rate": 1.011878516654492e-06, "loss": 0.2339, "step": 26227 }, { "epoch": 85.99344262295082, "grad_norm": 4.966808319091797, "learning_rate": 1.0114131061275988e-06, "loss": 0.4578, "step": 26228 }, { "epoch": 85.99672131147541, "grad_norm": 4.993566989898682, "learning_rate": 1.0109477969563685e-06, "loss": 0.3008, "step": 26229 }, { "epoch": 86.0, "grad_norm": 3.882283926010132, "learning_rate": 1.010482589146048e-06, "loss": 0.4184, "step": 26230 }, { "epoch": 86.00327868852459, "grad_norm": 4.0728607177734375, "learning_rate": 1.0100174827018815e-06, "loss": 0.4531, "step": 26231 }, { "epoch": 86.00655737704918, "grad_norm": 5.229101657867432, "learning_rate": 1.0095524776291165e-06, "loss": 0.4026, "step": 26232 }, { "epoch": 86.00983606557377, "grad_norm": 4.30343770980835, "learning_rate": 1.0090875739329953e-06, "loss": 0.4139, "step": 26233 }, { "epoch": 86.01311475409837, "grad_norm": 6.311957836151123, "learning_rate": 1.0086227716187602e-06, "loss": 0.2914, "step": 26234 }, { "epoch": 86.01639344262296, "grad_norm": 7.97554349899292, "learning_rate": 1.0081580706916493e-06, "loss": 0.7476, "step": 26235 }, { "epoch": 86.01967213114754, "grad_norm": 4.672484397888184, "learning_rate": 1.0076934711569076e-06, "loss": 0.2979, "step": 26236 }, { "epoch": 86.02295081967213, "grad_norm": 4.883879661560059, "learning_rate": 1.0072289730197725e-06, "loss": 0.5123, "step": 26237 }, { "epoch": 86.02622950819672, "grad_norm": 4.058013916015625, "learning_rate": 1.0067645762854794e-06, "loss": 0.3788, "step": 26238 }, { "epoch": 86.02950819672131, "grad_norm": 4.005636692047119, "learning_rate": 1.0063002809592682e-06, "loss": 0.2096, "step": 26239 }, { "epoch": 86.0327868852459, "grad_norm": 6.505403995513916, "learning_rate": 1.005836087046369e-06, "loss": 0.2512, "step": 26240 }, { "epoch": 86.03606557377049, "grad_norm": 5.097951889038086, "learning_rate": 1.0053719945520213e-06, "loss": 0.5054, "step": 26241 }, { "epoch": 86.03934426229509, "grad_norm": 6.270565032958984, "learning_rate": 1.0049080034814574e-06, "loss": 0.3966, "step": 26242 }, { "epoch": 86.04262295081968, "grad_norm": 6.215964317321777, "learning_rate": 1.0044441138399074e-06, "loss": 0.2749, "step": 26243 }, { "epoch": 86.04590163934427, "grad_norm": 4.032972812652588, "learning_rate": 1.003980325632602e-06, "loss": 0.2874, "step": 26244 }, { "epoch": 86.04918032786885, "grad_norm": 4.693368911743164, "learning_rate": 1.0035166388647732e-06, "loss": 0.3721, "step": 26245 }, { "epoch": 86.05245901639344, "grad_norm": 4.954008102416992, "learning_rate": 1.0030530535416494e-06, "loss": 0.4693, "step": 26246 }, { "epoch": 86.05573770491803, "grad_norm": 5.691200256347656, "learning_rate": 1.0025895696684563e-06, "loss": 0.4698, "step": 26247 }, { "epoch": 86.05901639344262, "grad_norm": 5.104306221008301, "learning_rate": 1.0021261872504184e-06, "loss": 0.3301, "step": 26248 }, { "epoch": 86.0622950819672, "grad_norm": 6.260163307189941, "learning_rate": 1.0016629062927653e-06, "loss": 0.2038, "step": 26249 }, { "epoch": 86.06557377049181, "grad_norm": 9.928205490112305, "learning_rate": 1.001199726800719e-06, "loss": 0.3361, "step": 26250 }, { "epoch": 86.0688524590164, "grad_norm": 4.314095973968506, "learning_rate": 1.0007366487795024e-06, "loss": 0.363, "step": 26251 }, { "epoch": 86.07213114754099, "grad_norm": 5.734046936035156, "learning_rate": 1.0002736722343365e-06, "loss": 0.4256, "step": 26252 }, { "epoch": 86.07540983606557, "grad_norm": 5.081588268280029, "learning_rate": 9.998107971704396e-07, "loss": 0.3314, "step": 26253 }, { "epoch": 86.07868852459016, "grad_norm": 5.563052177429199, "learning_rate": 9.993480235930376e-07, "loss": 0.6815, "step": 26254 }, { "epoch": 86.08196721311475, "grad_norm": 4.120784282684326, "learning_rate": 9.988853515073437e-07, "loss": 0.2583, "step": 26255 }, { "epoch": 86.08524590163934, "grad_norm": 4.093113422393799, "learning_rate": 9.984227809185764e-07, "loss": 0.5548, "step": 26256 }, { "epoch": 86.08852459016393, "grad_norm": 4.532381534576416, "learning_rate": 9.979603118319504e-07, "loss": 0.3217, "step": 26257 }, { "epoch": 86.09180327868853, "grad_norm": 19.1622371673584, "learning_rate": 9.974979442526821e-07, "loss": 0.4194, "step": 26258 }, { "epoch": 86.09508196721312, "grad_norm": 5.848634243011475, "learning_rate": 9.970356781859858e-07, "loss": 0.4391, "step": 26259 }, { "epoch": 86.09836065573771, "grad_norm": 5.993618965148926, "learning_rate": 9.965735136370713e-07, "loss": 0.4488, "step": 26260 }, { "epoch": 86.1016393442623, "grad_norm": 4.8959174156188965, "learning_rate": 9.961114506111537e-07, "loss": 0.4021, "step": 26261 }, { "epoch": 86.10491803278688, "grad_norm": 3.788585662841797, "learning_rate": 9.956494891134405e-07, "loss": 0.2309, "step": 26262 }, { "epoch": 86.10819672131147, "grad_norm": 5.2754292488098145, "learning_rate": 9.951876291491391e-07, "loss": 0.3422, "step": 26263 }, { "epoch": 86.11147540983606, "grad_norm": 5.011423110961914, "learning_rate": 9.947258707234631e-07, "loss": 0.1859, "step": 26264 }, { "epoch": 86.11475409836065, "grad_norm": 5.221798896789551, "learning_rate": 9.942642138416147e-07, "loss": 0.5888, "step": 26265 }, { "epoch": 86.11803278688525, "grad_norm": 4.809025287628174, "learning_rate": 9.938026585088e-07, "loss": 0.332, "step": 26266 }, { "epoch": 86.12131147540984, "grad_norm": 4.579117774963379, "learning_rate": 9.93341204730226e-07, "loss": 0.1584, "step": 26267 }, { "epoch": 86.12459016393443, "grad_norm": 4.136543273925781, "learning_rate": 9.928798525110938e-07, "loss": 0.3913, "step": 26268 }, { "epoch": 86.12786885245902, "grad_norm": 6.173383712768555, "learning_rate": 9.924186018566072e-07, "loss": 0.4387, "step": 26269 }, { "epoch": 86.1311475409836, "grad_norm": 18.718229293823242, "learning_rate": 9.919574527719645e-07, "loss": 0.2631, "step": 26270 }, { "epoch": 86.1344262295082, "grad_norm": 3.983386278152466, "learning_rate": 9.914964052623688e-07, "loss": 0.4417, "step": 26271 }, { "epoch": 86.13770491803278, "grad_norm": 3.713886260986328, "learning_rate": 9.910354593330185e-07, "loss": 0.2847, "step": 26272 }, { "epoch": 86.14098360655737, "grad_norm": 5.032954216003418, "learning_rate": 9.905746149891104e-07, "loss": 0.3576, "step": 26273 }, { "epoch": 86.14426229508197, "grad_norm": 4.869852066040039, "learning_rate": 9.901138722358383e-07, "loss": 0.3644, "step": 26274 }, { "epoch": 86.14754098360656, "grad_norm": 6.656705856323242, "learning_rate": 9.896532310784023e-07, "loss": 0.2482, "step": 26275 }, { "epoch": 86.15081967213115, "grad_norm": 17.10618782043457, "learning_rate": 9.89192691521994e-07, "loss": 0.4194, "step": 26276 }, { "epoch": 86.15409836065574, "grad_norm": 5.287468433380127, "learning_rate": 9.88732253571808e-07, "loss": 0.4568, "step": 26277 }, { "epoch": 86.15737704918033, "grad_norm": 4.426985740661621, "learning_rate": 9.882719172330347e-07, "loss": 0.6446, "step": 26278 }, { "epoch": 86.16065573770491, "grad_norm": 8.368118286132812, "learning_rate": 9.878116825108641e-07, "loss": 0.3623, "step": 26279 }, { "epoch": 86.1639344262295, "grad_norm": 5.664261817932129, "learning_rate": 9.87351549410488e-07, "loss": 0.4144, "step": 26280 }, { "epoch": 86.1672131147541, "grad_norm": 6.983009338378906, "learning_rate": 9.868915179370953e-07, "loss": 0.3693, "step": 26281 }, { "epoch": 86.1704918032787, "grad_norm": 4.390824317932129, "learning_rate": 9.864315880958708e-07, "loss": 0.418, "step": 26282 }, { "epoch": 86.17377049180328, "grad_norm": 4.497113227844238, "learning_rate": 9.859717598920003e-07, "loss": 0.3433, "step": 26283 }, { "epoch": 86.17704918032787, "grad_norm": 7.073866844177246, "learning_rate": 9.855120333306722e-07, "loss": 0.3275, "step": 26284 }, { "epoch": 86.18032786885246, "grad_norm": 6.088232517242432, "learning_rate": 9.850524084170698e-07, "loss": 0.5917, "step": 26285 }, { "epoch": 86.18360655737705, "grad_norm": 5.9847564697265625, "learning_rate": 9.845928851563735e-07, "loss": 0.4239, "step": 26286 }, { "epoch": 86.18688524590164, "grad_norm": 4.821468830108643, "learning_rate": 9.841334635537647e-07, "loss": 0.2854, "step": 26287 }, { "epoch": 86.19016393442622, "grad_norm": 5.778138160705566, "learning_rate": 9.83674143614427e-07, "loss": 0.2803, "step": 26288 }, { "epoch": 86.19344262295083, "grad_norm": 4.011406421661377, "learning_rate": 9.832149253435375e-07, "loss": 0.1816, "step": 26289 }, { "epoch": 86.19672131147541, "grad_norm": 4.709205627441406, "learning_rate": 9.827558087462751e-07, "loss": 0.3373, "step": 26290 }, { "epoch": 86.2, "grad_norm": 4.633603096008301, "learning_rate": 9.822967938278172e-07, "loss": 0.3553, "step": 26291 }, { "epoch": 86.20327868852459, "grad_norm": 5.209685802459717, "learning_rate": 9.81837880593336e-07, "loss": 0.5307, "step": 26292 }, { "epoch": 86.20655737704918, "grad_norm": 10.761858940124512, "learning_rate": 9.813790690480118e-07, "loss": 0.7049, "step": 26293 }, { "epoch": 86.20983606557377, "grad_norm": 4.458265781402588, "learning_rate": 9.809203591970163e-07, "loss": 0.3182, "step": 26294 }, { "epoch": 86.21311475409836, "grad_norm": 5.695396423339844, "learning_rate": 9.804617510455194e-07, "loss": 0.3772, "step": 26295 }, { "epoch": 86.21639344262294, "grad_norm": 4.120445728302002, "learning_rate": 9.80003244598694e-07, "loss": 0.3077, "step": 26296 }, { "epoch": 86.21967213114755, "grad_norm": 4.756192207336426, "learning_rate": 9.795448398617114e-07, "loss": 0.3747, "step": 26297 }, { "epoch": 86.22295081967214, "grad_norm": 4.524580955505371, "learning_rate": 9.790865368397406e-07, "loss": 0.748, "step": 26298 }, { "epoch": 86.22622950819672, "grad_norm": 4.419925212860107, "learning_rate": 9.786283355379477e-07, "loss": 0.3419, "step": 26299 }, { "epoch": 86.22950819672131, "grad_norm": 4.550134181976318, "learning_rate": 9.781702359614986e-07, "loss": 0.5504, "step": 26300 }, { "epoch": 86.2327868852459, "grad_norm": 5.208734512329102, "learning_rate": 9.777122381155623e-07, "loss": 0.6071, "step": 26301 }, { "epoch": 86.23606557377049, "grad_norm": 4.644250869750977, "learning_rate": 9.772543420053015e-07, "loss": 0.3274, "step": 26302 }, { "epoch": 86.23934426229508, "grad_norm": 5.990630149841309, "learning_rate": 9.767965476358786e-07, "loss": 0.2429, "step": 26303 }, { "epoch": 86.24262295081967, "grad_norm": 4.566065788269043, "learning_rate": 9.763388550124564e-07, "loss": 0.3254, "step": 26304 }, { "epoch": 86.24590163934427, "grad_norm": 5.809558391571045, "learning_rate": 9.758812641401948e-07, "loss": 0.3274, "step": 26305 }, { "epoch": 86.24918032786886, "grad_norm": 4.0937886238098145, "learning_rate": 9.754237750242569e-07, "loss": 0.5664, "step": 26306 }, { "epoch": 86.25245901639344, "grad_norm": 5.519715309143066, "learning_rate": 9.74966387669798e-07, "loss": 0.392, "step": 26307 }, { "epoch": 86.25573770491803, "grad_norm": 5.116183280944824, "learning_rate": 9.745091020819775e-07, "loss": 0.4877, "step": 26308 }, { "epoch": 86.25901639344262, "grad_norm": 5.987769603729248, "learning_rate": 9.74051918265948e-07, "loss": 0.2955, "step": 26309 }, { "epoch": 86.26229508196721, "grad_norm": 4.64520788192749, "learning_rate": 9.73594836226871e-07, "loss": 0.4109, "step": 26310 }, { "epoch": 86.2655737704918, "grad_norm": 4.830352783203125, "learning_rate": 9.731378559698968e-07, "loss": 0.4507, "step": 26311 }, { "epoch": 86.26885245901639, "grad_norm": 4.515933990478516, "learning_rate": 9.726809775001788e-07, "loss": 0.2065, "step": 26312 }, { "epoch": 86.27213114754099, "grad_norm": 5.137685298919678, "learning_rate": 9.722242008228676e-07, "loss": 0.4195, "step": 26313 }, { "epoch": 86.27540983606558, "grad_norm": 4.8345184326171875, "learning_rate": 9.71767525943116e-07, "loss": 0.3068, "step": 26314 }, { "epoch": 86.27868852459017, "grad_norm": 4.130196571350098, "learning_rate": 9.713109528660737e-07, "loss": 0.1826, "step": 26315 }, { "epoch": 86.28196721311475, "grad_norm": 4.976320743560791, "learning_rate": 9.708544815968845e-07, "loss": 0.1862, "step": 26316 }, { "epoch": 86.28524590163934, "grad_norm": 4.963348865509033, "learning_rate": 9.703981121407013e-07, "loss": 0.3344, "step": 26317 }, { "epoch": 86.28852459016393, "grad_norm": 4.230684280395508, "learning_rate": 9.699418445026687e-07, "loss": 0.3434, "step": 26318 }, { "epoch": 86.29180327868852, "grad_norm": 4.835719108581543, "learning_rate": 9.69485678687928e-07, "loss": 0.5158, "step": 26319 }, { "epoch": 86.29508196721312, "grad_norm": 5.3583879470825195, "learning_rate": 9.690296147016277e-07, "loss": 0.6126, "step": 26320 }, { "epoch": 86.29836065573771, "grad_norm": 5.721220970153809, "learning_rate": 9.685736525489088e-07, "loss": 0.4107, "step": 26321 }, { "epoch": 86.3016393442623, "grad_norm": 4.304614543914795, "learning_rate": 9.681177922349105e-07, "loss": 0.3914, "step": 26322 }, { "epoch": 86.30491803278689, "grad_norm": 4.8107733726501465, "learning_rate": 9.676620337647758e-07, "loss": 0.4029, "step": 26323 }, { "epoch": 86.30819672131148, "grad_norm": 4.242916107177734, "learning_rate": 9.672063771436434e-07, "loss": 0.2306, "step": 26324 }, { "epoch": 86.31147540983606, "grad_norm": 5.760313510894775, "learning_rate": 9.66750822376651e-07, "loss": 0.266, "step": 26325 }, { "epoch": 86.31475409836065, "grad_norm": 4.904837608337402, "learning_rate": 9.662953694689337e-07, "loss": 0.1105, "step": 26326 }, { "epoch": 86.31803278688524, "grad_norm": 4.397872447967529, "learning_rate": 9.6584001842563e-07, "loss": 0.2887, "step": 26327 }, { "epoch": 86.32131147540984, "grad_norm": 4.915557384490967, "learning_rate": 9.653847692518737e-07, "loss": 0.2141, "step": 26328 }, { "epoch": 86.32459016393443, "grad_norm": 4.042927265167236, "learning_rate": 9.649296219527982e-07, "loss": 0.397, "step": 26329 }, { "epoch": 86.32786885245902, "grad_norm": 6.842466831207275, "learning_rate": 9.64474576533534e-07, "loss": 0.2391, "step": 26330 }, { "epoch": 86.33114754098361, "grad_norm": 4.219193935394287, "learning_rate": 9.640196329992124e-07, "loss": 0.2414, "step": 26331 }, { "epoch": 86.3344262295082, "grad_norm": 5.732675552368164, "learning_rate": 9.635647913549672e-07, "loss": 0.2822, "step": 26332 }, { "epoch": 86.33770491803278, "grad_norm": 7.030012607574463, "learning_rate": 9.631100516059234e-07, "loss": 0.2996, "step": 26333 }, { "epoch": 86.34098360655737, "grad_norm": 4.783864974975586, "learning_rate": 9.626554137572109e-07, "loss": 0.3117, "step": 26334 }, { "epoch": 86.34426229508196, "grad_norm": 5.021545886993408, "learning_rate": 9.622008778139514e-07, "loss": 0.3666, "step": 26335 }, { "epoch": 86.34754098360656, "grad_norm": 5.498166561126709, "learning_rate": 9.617464437812774e-07, "loss": 0.4657, "step": 26336 }, { "epoch": 86.35081967213115, "grad_norm": 5.136632442474365, "learning_rate": 9.612921116643092e-07, "loss": 0.3728, "step": 26337 }, { "epoch": 86.35409836065574, "grad_norm": 4.65880012512207, "learning_rate": 9.608378814681706e-07, "loss": 0.2913, "step": 26338 }, { "epoch": 86.35737704918033, "grad_norm": 8.016154289245605, "learning_rate": 9.60383753197981e-07, "loss": 0.4093, "step": 26339 }, { "epoch": 86.36065573770492, "grad_norm": 12.123343467712402, "learning_rate": 9.599297268588647e-07, "loss": 0.665, "step": 26340 }, { "epoch": 86.3639344262295, "grad_norm": 4.318464279174805, "learning_rate": 9.594758024559415e-07, "loss": 0.2888, "step": 26341 }, { "epoch": 86.3672131147541, "grad_norm": 3.53796648979187, "learning_rate": 9.590219799943278e-07, "loss": 0.2658, "step": 26342 }, { "epoch": 86.37049180327868, "grad_norm": 11.804732322692871, "learning_rate": 9.585682594791413e-07, "loss": 0.382, "step": 26343 }, { "epoch": 86.37377049180328, "grad_norm": 5.185533046722412, "learning_rate": 9.581146409154962e-07, "loss": 0.2943, "step": 26344 }, { "epoch": 86.37704918032787, "grad_norm": 4.946727275848389, "learning_rate": 9.57661124308512e-07, "loss": 0.3989, "step": 26345 }, { "epoch": 86.38032786885246, "grad_norm": 4.6625847816467285, "learning_rate": 9.572077096633003e-07, "loss": 0.4769, "step": 26346 }, { "epoch": 86.38360655737705, "grad_norm": 5.735645771026611, "learning_rate": 9.567543969849746e-07, "loss": 0.263, "step": 26347 }, { "epoch": 86.38688524590164, "grad_norm": 4.9016008377075195, "learning_rate": 9.56301186278643e-07, "loss": 0.3251, "step": 26348 }, { "epoch": 86.39016393442623, "grad_norm": 5.236457824707031, "learning_rate": 9.558480775494205e-07, "loss": 0.5652, "step": 26349 }, { "epoch": 86.39344262295081, "grad_norm": 4.2007951736450195, "learning_rate": 9.55395070802414e-07, "loss": 0.2206, "step": 26350 }, { "epoch": 86.3967213114754, "grad_norm": 4.921142578125, "learning_rate": 9.549421660427326e-07, "loss": 0.3962, "step": 26351 }, { "epoch": 86.4, "grad_norm": 5.494514465332031, "learning_rate": 9.544893632754816e-07, "loss": 0.1265, "step": 26352 }, { "epoch": 86.4032786885246, "grad_norm": 8.978959083557129, "learning_rate": 9.540366625057683e-07, "loss": 0.3031, "step": 26353 }, { "epoch": 86.40655737704918, "grad_norm": 6.913906574249268, "learning_rate": 9.535840637386983e-07, "loss": 0.3691, "step": 26354 }, { "epoch": 86.40983606557377, "grad_norm": 4.4970855712890625, "learning_rate": 9.531315669793739e-07, "loss": 0.3729, "step": 26355 }, { "epoch": 86.41311475409836, "grad_norm": 5.700615406036377, "learning_rate": 9.526791722328977e-07, "loss": 0.3462, "step": 26356 }, { "epoch": 86.41639344262295, "grad_norm": 5.125598907470703, "learning_rate": 9.522268795043676e-07, "loss": 0.453, "step": 26357 }, { "epoch": 86.41967213114754, "grad_norm": 5.247246265411377, "learning_rate": 9.517746887988899e-07, "loss": 0.308, "step": 26358 }, { "epoch": 86.42295081967212, "grad_norm": 4.869114398956299, "learning_rate": 9.513226001215592e-07, "loss": 0.4867, "step": 26359 }, { "epoch": 86.42622950819673, "grad_norm": 4.825100421905518, "learning_rate": 9.508706134774748e-07, "loss": 0.2803, "step": 26360 }, { "epoch": 86.42950819672132, "grad_norm": 7.527291297912598, "learning_rate": 9.504187288717315e-07, "loss": 0.3825, "step": 26361 }, { "epoch": 86.4327868852459, "grad_norm": 10.906586647033691, "learning_rate": 9.499669463094285e-07, "loss": 0.3533, "step": 26362 }, { "epoch": 86.43606557377049, "grad_norm": 4.828794002532959, "learning_rate": 9.495152657956574e-07, "loss": 0.3583, "step": 26363 }, { "epoch": 86.43934426229508, "grad_norm": 4.800424098968506, "learning_rate": 9.490636873355108e-07, "loss": 0.1587, "step": 26364 }, { "epoch": 86.44262295081967, "grad_norm": 3.906602382659912, "learning_rate": 9.486122109340812e-07, "loss": 0.4743, "step": 26365 }, { "epoch": 86.44590163934426, "grad_norm": 5.744691371917725, "learning_rate": 9.481608365964612e-07, "loss": 0.4513, "step": 26366 }, { "epoch": 86.44918032786886, "grad_norm": 4.200473308563232, "learning_rate": 9.4770956432774e-07, "loss": 0.278, "step": 26367 }, { "epoch": 86.45245901639345, "grad_norm": 4.588695049285889, "learning_rate": 9.472583941330027e-07, "loss": 0.3815, "step": 26368 }, { "epoch": 86.45573770491804, "grad_norm": 5.943088054656982, "learning_rate": 9.468073260173427e-07, "loss": 0.3098, "step": 26369 }, { "epoch": 86.45901639344262, "grad_norm": 3.81394624710083, "learning_rate": 9.463563599858417e-07, "loss": 0.3377, "step": 26370 }, { "epoch": 86.46229508196721, "grad_norm": 4.735942840576172, "learning_rate": 9.459054960435865e-07, "loss": 0.3345, "step": 26371 }, { "epoch": 86.4655737704918, "grad_norm": 5.34171199798584, "learning_rate": 9.4545473419566e-07, "loss": 0.3389, "step": 26372 }, { "epoch": 86.46885245901639, "grad_norm": 4.442264080047607, "learning_rate": 9.450040744471467e-07, "loss": 0.3429, "step": 26373 }, { "epoch": 86.47213114754098, "grad_norm": 3.6391730308532715, "learning_rate": 9.445535168031273e-07, "loss": 0.2176, "step": 26374 }, { "epoch": 86.47540983606558, "grad_norm": 4.1271772384643555, "learning_rate": 9.441030612686797e-07, "loss": 0.4961, "step": 26375 }, { "epoch": 86.47868852459017, "grad_norm": 9.368424415588379, "learning_rate": 9.436527078488888e-07, "loss": 0.4461, "step": 26376 }, { "epoch": 86.48196721311476, "grad_norm": 5.240837097167969, "learning_rate": 9.432024565488295e-07, "loss": 0.4123, "step": 26377 }, { "epoch": 86.48524590163935, "grad_norm": 5.222430229187012, "learning_rate": 9.427523073735767e-07, "loss": 0.2884, "step": 26378 }, { "epoch": 86.48852459016393, "grad_norm": 6.830661773681641, "learning_rate": 9.423022603282117e-07, "loss": 0.5453, "step": 26379 }, { "epoch": 86.49180327868852, "grad_norm": 4.508813381195068, "learning_rate": 9.418523154178061e-07, "loss": 0.3254, "step": 26380 }, { "epoch": 86.49508196721311, "grad_norm": 6.154411315917969, "learning_rate": 9.414024726474335e-07, "loss": 0.4631, "step": 26381 }, { "epoch": 86.4983606557377, "grad_norm": 5.333332061767578, "learning_rate": 9.409527320221668e-07, "loss": 0.2964, "step": 26382 }, { "epoch": 86.5016393442623, "grad_norm": 4.621151924133301, "learning_rate": 9.405030935470749e-07, "loss": 0.3745, "step": 26383 }, { "epoch": 86.50491803278689, "grad_norm": 5.209977626800537, "learning_rate": 9.400535572272329e-07, "loss": 0.4469, "step": 26384 }, { "epoch": 86.50819672131148, "grad_norm": 5.384149074554443, "learning_rate": 9.396041230677056e-07, "loss": 0.4933, "step": 26385 }, { "epoch": 86.51147540983607, "grad_norm": 5.44691801071167, "learning_rate": 9.391547910735632e-07, "loss": 0.3169, "step": 26386 }, { "epoch": 86.51475409836065, "grad_norm": 4.901487350463867, "learning_rate": 9.387055612498697e-07, "loss": 0.5944, "step": 26387 }, { "epoch": 86.51803278688524, "grad_norm": 4.032243728637695, "learning_rate": 9.382564336016942e-07, "loss": 0.3319, "step": 26388 }, { "epoch": 86.52131147540983, "grad_norm": 5.9889726638793945, "learning_rate": 9.378074081340983e-07, "loss": 0.3991, "step": 26389 }, { "epoch": 86.52459016393442, "grad_norm": 5.679990768432617, "learning_rate": 9.373584848521477e-07, "loss": 0.3926, "step": 26390 }, { "epoch": 86.52786885245902, "grad_norm": 4.235228538513184, "learning_rate": 9.369096637608998e-07, "loss": 0.3513, "step": 26391 }, { "epoch": 86.53114754098361, "grad_norm": 5.6253790855407715, "learning_rate": 9.364609448654217e-07, "loss": 0.4329, "step": 26392 }, { "epoch": 86.5344262295082, "grad_norm": 4.5063018798828125, "learning_rate": 9.360123281707689e-07, "loss": 0.2561, "step": 26393 }, { "epoch": 86.53770491803279, "grad_norm": 10.374996185302734, "learning_rate": 9.355638136820022e-07, "loss": 0.27, "step": 26394 }, { "epoch": 86.54098360655738, "grad_norm": 4.891600131988525, "learning_rate": 9.351154014041775e-07, "loss": 0.2006, "step": 26395 }, { "epoch": 86.54426229508196, "grad_norm": 5.44216775894165, "learning_rate": 9.346670913423506e-07, "loss": 0.1724, "step": 26396 }, { "epoch": 86.54754098360655, "grad_norm": 6.826213359832764, "learning_rate": 9.342188835015798e-07, "loss": 0.4461, "step": 26397 }, { "epoch": 86.55081967213114, "grad_norm": 6.291494846343994, "learning_rate": 9.337707778869165e-07, "loss": 0.6037, "step": 26398 }, { "epoch": 86.55409836065574, "grad_norm": 5.303937911987305, "learning_rate": 9.333227745034146e-07, "loss": 0.5234, "step": 26399 }, { "epoch": 86.55737704918033, "grad_norm": 4.51319694519043, "learning_rate": 9.328748733561233e-07, "loss": 0.2564, "step": 26400 }, { "epoch": 86.56065573770492, "grad_norm": 3.573975086212158, "learning_rate": 9.324270744500973e-07, "loss": 0.4809, "step": 26401 }, { "epoch": 86.56393442622951, "grad_norm": 5.4721174240112305, "learning_rate": 9.319793777903829e-07, "loss": 0.2533, "step": 26402 }, { "epoch": 86.5672131147541, "grad_norm": 4.849509239196777, "learning_rate": 9.315317833820303e-07, "loss": 0.2724, "step": 26403 }, { "epoch": 86.57049180327868, "grad_norm": 4.598644733428955, "learning_rate": 9.310842912300844e-07, "loss": 0.3451, "step": 26404 }, { "epoch": 86.57377049180327, "grad_norm": 4.630107402801514, "learning_rate": 9.306369013395933e-07, "loss": 0.2669, "step": 26405 }, { "epoch": 86.57704918032788, "grad_norm": 3.882683277130127, "learning_rate": 9.301896137156019e-07, "loss": 0.3752, "step": 26406 }, { "epoch": 86.58032786885246, "grad_norm": 4.4005818367004395, "learning_rate": 9.297424283631517e-07, "loss": 0.2621, "step": 26407 }, { "epoch": 86.58360655737705, "grad_norm": 5.345098972320557, "learning_rate": 9.292953452872877e-07, "loss": 0.2684, "step": 26408 }, { "epoch": 86.58688524590164, "grad_norm": 4.518368244171143, "learning_rate": 9.288483644930469e-07, "loss": 0.3768, "step": 26409 }, { "epoch": 86.59016393442623, "grad_norm": 4.790369033813477, "learning_rate": 9.28401485985474e-07, "loss": 0.2497, "step": 26410 }, { "epoch": 86.59344262295082, "grad_norm": 4.106083393096924, "learning_rate": 9.279547097696073e-07, "loss": 0.3371, "step": 26411 }, { "epoch": 86.5967213114754, "grad_norm": 4.501544952392578, "learning_rate": 9.27508035850484e-07, "loss": 0.2844, "step": 26412 }, { "epoch": 86.6, "grad_norm": 5.685571193695068, "learning_rate": 9.270614642331377e-07, "loss": 0.3766, "step": 26413 }, { "epoch": 86.6032786885246, "grad_norm": 5.237213611602783, "learning_rate": 9.2661499492261e-07, "loss": 0.2229, "step": 26414 }, { "epoch": 86.60655737704919, "grad_norm": 4.359432697296143, "learning_rate": 9.261686279239313e-07, "loss": 0.3001, "step": 26415 }, { "epoch": 86.60983606557377, "grad_norm": 7.627599239349365, "learning_rate": 9.257223632421353e-07, "loss": 0.3518, "step": 26416 }, { "epoch": 86.61311475409836, "grad_norm": 4.510677814483643, "learning_rate": 9.252762008822535e-07, "loss": 0.331, "step": 26417 }, { "epoch": 86.61639344262295, "grad_norm": 6.366597652435303, "learning_rate": 9.248301408493199e-07, "loss": 0.3402, "step": 26418 }, { "epoch": 86.61967213114754, "grad_norm": 6.443331718444824, "learning_rate": 9.243841831483613e-07, "loss": 0.5035, "step": 26419 }, { "epoch": 86.62295081967213, "grad_norm": 5.059694766998291, "learning_rate": 9.239383277844083e-07, "loss": 0.3141, "step": 26420 }, { "epoch": 86.62622950819672, "grad_norm": 5.443604946136475, "learning_rate": 9.234925747624857e-07, "loss": 0.4936, "step": 26421 }, { "epoch": 86.62950819672132, "grad_norm": 4.104730129241943, "learning_rate": 9.230469240876228e-07, "loss": 0.3197, "step": 26422 }, { "epoch": 86.6327868852459, "grad_norm": 10.482548713684082, "learning_rate": 9.226013757648433e-07, "loss": 0.4938, "step": 26423 }, { "epoch": 86.6360655737705, "grad_norm": 5.978575706481934, "learning_rate": 9.2215592979917e-07, "loss": 0.5778, "step": 26424 }, { "epoch": 86.63934426229508, "grad_norm": 4.390432357788086, "learning_rate": 9.217105861956288e-07, "loss": 0.2235, "step": 26425 }, { "epoch": 86.64262295081967, "grad_norm": 4.110462188720703, "learning_rate": 9.21265344959239e-07, "loss": 0.2521, "step": 26426 }, { "epoch": 86.64590163934426, "grad_norm": 4.410324573516846, "learning_rate": 9.208202060950211e-07, "loss": 0.3923, "step": 26427 }, { "epoch": 86.64918032786885, "grad_norm": 6.209941387176514, "learning_rate": 9.203751696079976e-07, "loss": 0.281, "step": 26428 }, { "epoch": 86.65245901639344, "grad_norm": 5.48280143737793, "learning_rate": 9.199302355031836e-07, "loss": 0.4653, "step": 26429 }, { "epoch": 86.65573770491804, "grad_norm": 3.9861273765563965, "learning_rate": 9.194854037855982e-07, "loss": 0.2555, "step": 26430 }, { "epoch": 86.65901639344263, "grad_norm": 3.4728405475616455, "learning_rate": 9.190406744602531e-07, "loss": 0.2304, "step": 26431 }, { "epoch": 86.66229508196722, "grad_norm": 5.610238552093506, "learning_rate": 9.185960475321687e-07, "loss": 0.3568, "step": 26432 }, { "epoch": 86.6655737704918, "grad_norm": 6.146344184875488, "learning_rate": 9.181515230063564e-07, "loss": 0.2217, "step": 26433 }, { "epoch": 86.66885245901639, "grad_norm": 6.194145679473877, "learning_rate": 9.17707100887828e-07, "loss": 0.3714, "step": 26434 }, { "epoch": 86.67213114754098, "grad_norm": 5.699002742767334, "learning_rate": 9.172627811815938e-07, "loss": 0.4207, "step": 26435 }, { "epoch": 86.67540983606557, "grad_norm": 7.925821304321289, "learning_rate": 9.168185638926664e-07, "loss": 0.3392, "step": 26436 }, { "epoch": 86.67868852459016, "grad_norm": 4.60178279876709, "learning_rate": 9.16374449026054e-07, "loss": 0.209, "step": 26437 }, { "epoch": 86.68196721311476, "grad_norm": 3.75382137298584, "learning_rate": 9.15930436586765e-07, "loss": 0.2713, "step": 26438 }, { "epoch": 86.68524590163935, "grad_norm": 4.876315593719482, "learning_rate": 9.154865265798029e-07, "loss": 0.376, "step": 26439 }, { "epoch": 86.68852459016394, "grad_norm": 5.610429286956787, "learning_rate": 9.150427190101785e-07, "loss": 0.3015, "step": 26440 }, { "epoch": 86.69180327868852, "grad_norm": 4.871074199676514, "learning_rate": 9.145990138828931e-07, "loss": 0.4599, "step": 26441 }, { "epoch": 86.69508196721311, "grad_norm": 4.502925872802734, "learning_rate": 9.141554112029494e-07, "loss": 0.3728, "step": 26442 }, { "epoch": 86.6983606557377, "grad_norm": 5.0104804039001465, "learning_rate": 9.137119109753512e-07, "loss": 0.4735, "step": 26443 }, { "epoch": 86.70163934426229, "grad_norm": 8.8840913772583, "learning_rate": 9.132685132050967e-07, "loss": 0.4255, "step": 26444 }, { "epoch": 86.70491803278688, "grad_norm": 5.629513740539551, "learning_rate": 9.128252178971896e-07, "loss": 0.2104, "step": 26445 }, { "epoch": 86.70819672131148, "grad_norm": 8.549947738647461, "learning_rate": 9.123820250566262e-07, "loss": 0.3133, "step": 26446 }, { "epoch": 86.71147540983607, "grad_norm": 4.822473049163818, "learning_rate": 9.119389346884034e-07, "loss": 0.3306, "step": 26447 }, { "epoch": 86.71475409836066, "grad_norm": 4.5675435066223145, "learning_rate": 9.114959467975171e-07, "loss": 0.304, "step": 26448 }, { "epoch": 86.71803278688525, "grad_norm": 5.010459899902344, "learning_rate": 9.110530613889656e-07, "loss": 0.3455, "step": 26449 }, { "epoch": 86.72131147540983, "grad_norm": 4.745097637176514, "learning_rate": 9.106102784677418e-07, "loss": 0.4228, "step": 26450 }, { "epoch": 86.72459016393442, "grad_norm": 6.421722412109375, "learning_rate": 9.10167598038837e-07, "loss": 0.3448, "step": 26451 }, { "epoch": 86.72786885245901, "grad_norm": 3.396876573562622, "learning_rate": 9.097250201072405e-07, "loss": 0.4865, "step": 26452 }, { "epoch": 86.73114754098361, "grad_norm": 6.543011665344238, "learning_rate": 9.092825446779496e-07, "loss": 0.2398, "step": 26453 }, { "epoch": 86.7344262295082, "grad_norm": 4.521341800689697, "learning_rate": 9.088401717559492e-07, "loss": 0.3737, "step": 26454 }, { "epoch": 86.73770491803279, "grad_norm": 6.284567832946777, "learning_rate": 9.083979013462285e-07, "loss": 0.4445, "step": 26455 }, { "epoch": 86.74098360655738, "grad_norm": 4.93441915512085, "learning_rate": 9.079557334537736e-07, "loss": 0.2623, "step": 26456 }, { "epoch": 86.74426229508197, "grad_norm": 4.671170711517334, "learning_rate": 9.075136680835705e-07, "loss": 0.4888, "step": 26457 }, { "epoch": 86.74754098360656, "grad_norm": 6.161104679107666, "learning_rate": 9.070717052406052e-07, "loss": 0.4488, "step": 26458 }, { "epoch": 86.75081967213114, "grad_norm": 4.743947505950928, "learning_rate": 9.066298449298616e-07, "loss": 0.3604, "step": 26459 }, { "epoch": 86.75409836065573, "grad_norm": 4.0386834144592285, "learning_rate": 9.061880871563211e-07, "loss": 0.3738, "step": 26460 }, { "epoch": 86.75737704918033, "grad_norm": 7.651838779449463, "learning_rate": 9.057464319249631e-07, "loss": 0.2836, "step": 26461 }, { "epoch": 86.76065573770492, "grad_norm": 3.579942464828491, "learning_rate": 9.053048792407715e-07, "loss": 0.3899, "step": 26462 }, { "epoch": 86.76393442622951, "grad_norm": 3.4334664344787598, "learning_rate": 9.048634291087244e-07, "loss": 0.2566, "step": 26463 }, { "epoch": 86.7672131147541, "grad_norm": 4.383077621459961, "learning_rate": 9.044220815337979e-07, "loss": 0.4308, "step": 26464 }, { "epoch": 86.77049180327869, "grad_norm": 12.499256134033203, "learning_rate": 9.039808365209668e-07, "loss": 0.391, "step": 26465 }, { "epoch": 86.77377049180328, "grad_norm": 4.742086887359619, "learning_rate": 9.035396940752128e-07, "loss": 0.406, "step": 26466 }, { "epoch": 86.77704918032786, "grad_norm": 4.367757320404053, "learning_rate": 9.030986542015052e-07, "loss": 0.1853, "step": 26467 }, { "epoch": 86.78032786885245, "grad_norm": 6.935666084289551, "learning_rate": 9.026577169048201e-07, "loss": 0.4802, "step": 26468 }, { "epoch": 86.78360655737706, "grad_norm": 4.302635669708252, "learning_rate": 9.022168821901267e-07, "loss": 0.4338, "step": 26469 }, { "epoch": 86.78688524590164, "grad_norm": 5.574473857879639, "learning_rate": 9.017761500623968e-07, "loss": 0.2726, "step": 26470 }, { "epoch": 86.79016393442623, "grad_norm": 6.704622745513916, "learning_rate": 9.013355205266017e-07, "loss": 0.2508, "step": 26471 }, { "epoch": 86.79344262295082, "grad_norm": 5.484628677368164, "learning_rate": 9.008949935877087e-07, "loss": 0.2507, "step": 26472 }, { "epoch": 86.79672131147541, "grad_norm": 5.035774230957031, "learning_rate": 9.00454569250685e-07, "loss": 0.3836, "step": 26473 }, { "epoch": 86.8, "grad_norm": 4.8076958656311035, "learning_rate": 9.000142475204965e-07, "loss": 0.4152, "step": 26474 }, { "epoch": 86.80327868852459, "grad_norm": 4.0001959800720215, "learning_rate": 8.995740284021104e-07, "loss": 0.7195, "step": 26475 }, { "epoch": 86.80655737704917, "grad_norm": 5.47381067276001, "learning_rate": 8.991339119004882e-07, "loss": 0.5653, "step": 26476 }, { "epoch": 86.80983606557378, "grad_norm": 4.357806205749512, "learning_rate": 8.986938980205928e-07, "loss": 0.2991, "step": 26477 }, { "epoch": 86.81311475409836, "grad_norm": 4.920276165008545, "learning_rate": 8.98253986767389e-07, "loss": 0.5694, "step": 26478 }, { "epoch": 86.81639344262295, "grad_norm": 4.42660665512085, "learning_rate": 8.978141781458339e-07, "loss": 0.2178, "step": 26479 }, { "epoch": 86.81967213114754, "grad_norm": 5.55787467956543, "learning_rate": 8.973744721608857e-07, "loss": 0.2712, "step": 26480 }, { "epoch": 86.82295081967213, "grad_norm": 5.164229393005371, "learning_rate": 8.969348688175073e-07, "loss": 0.2342, "step": 26481 }, { "epoch": 86.82622950819672, "grad_norm": 10.288384437561035, "learning_rate": 8.964953681206534e-07, "loss": 0.6275, "step": 26482 }, { "epoch": 86.8295081967213, "grad_norm": 4.840834140777588, "learning_rate": 8.960559700752769e-07, "loss": 0.3864, "step": 26483 }, { "epoch": 86.8327868852459, "grad_norm": 10.177099227905273, "learning_rate": 8.956166746863371e-07, "loss": 0.3344, "step": 26484 }, { "epoch": 86.8360655737705, "grad_norm": 6.397878646850586, "learning_rate": 8.951774819587855e-07, "loss": 0.4298, "step": 26485 }, { "epoch": 86.83934426229509, "grad_norm": 4.2786736488342285, "learning_rate": 8.947383918975749e-07, "loss": 0.4994, "step": 26486 }, { "epoch": 86.84262295081967, "grad_norm": 5.148370265960693, "learning_rate": 8.942994045076536e-07, "loss": 0.3497, "step": 26487 }, { "epoch": 86.84590163934426, "grad_norm": 4.89304780960083, "learning_rate": 8.938605197939765e-07, "loss": 0.2404, "step": 26488 }, { "epoch": 86.84918032786885, "grad_norm": 4.205660820007324, "learning_rate": 8.934217377614896e-07, "loss": 0.1839, "step": 26489 }, { "epoch": 86.85245901639344, "grad_norm": 4.480020046234131, "learning_rate": 8.929830584151411e-07, "loss": 0.4555, "step": 26490 }, { "epoch": 86.85573770491803, "grad_norm": 4.448550224304199, "learning_rate": 8.92544481759876e-07, "loss": 0.4511, "step": 26491 }, { "epoch": 86.85901639344263, "grad_norm": 4.255406379699707, "learning_rate": 8.921060078006427e-07, "loss": 0.2911, "step": 26492 }, { "epoch": 86.86229508196722, "grad_norm": 4.182330131530762, "learning_rate": 8.916676365423848e-07, "loss": 0.4335, "step": 26493 }, { "epoch": 86.8655737704918, "grad_norm": 5.312692642211914, "learning_rate": 8.91229367990043e-07, "loss": 0.3089, "step": 26494 }, { "epoch": 86.8688524590164, "grad_norm": 20.501699447631836, "learning_rate": 8.90791202148562e-07, "loss": 0.3522, "step": 26495 }, { "epoch": 86.87213114754098, "grad_norm": 5.532691955566406, "learning_rate": 8.903531390228792e-07, "loss": 0.5077, "step": 26496 }, { "epoch": 86.87540983606557, "grad_norm": 4.764894485473633, "learning_rate": 8.899151786179383e-07, "loss": 0.3581, "step": 26497 }, { "epoch": 86.87868852459016, "grad_norm": 5.253335475921631, "learning_rate": 8.894773209386764e-07, "loss": 0.3498, "step": 26498 }, { "epoch": 86.88196721311475, "grad_norm": 4.755031108856201, "learning_rate": 8.890395659900297e-07, "loss": 0.247, "step": 26499 }, { "epoch": 86.88524590163935, "grad_norm": 4.197247505187988, "learning_rate": 8.88601913776933e-07, "loss": 0.4343, "step": 26500 }, { "epoch": 86.88852459016394, "grad_norm": 4.813003063201904, "learning_rate": 8.881643643043258e-07, "loss": 0.3904, "step": 26501 }, { "epoch": 86.89180327868853, "grad_norm": 5.794186115264893, "learning_rate": 8.877269175771386e-07, "loss": 0.3396, "step": 26502 }, { "epoch": 86.89508196721312, "grad_norm": 5.292295932769775, "learning_rate": 8.872895736003051e-07, "loss": 0.4101, "step": 26503 }, { "epoch": 86.8983606557377, "grad_norm": 5.609420299530029, "learning_rate": 8.868523323787548e-07, "loss": 0.2749, "step": 26504 }, { "epoch": 86.90163934426229, "grad_norm": 5.089348316192627, "learning_rate": 8.864151939174204e-07, "loss": 0.4547, "step": 26505 }, { "epoch": 86.90491803278688, "grad_norm": 4.195580959320068, "learning_rate": 8.859781582212323e-07, "loss": 0.4523, "step": 26506 }, { "epoch": 86.90819672131147, "grad_norm": 4.6276068687438965, "learning_rate": 8.855412252951157e-07, "loss": 0.42, "step": 26507 }, { "epoch": 86.91147540983607, "grad_norm": 4.7630133628845215, "learning_rate": 8.851043951439975e-07, "loss": 0.3444, "step": 26508 }, { "epoch": 86.91475409836066, "grad_norm": 4.668262004852295, "learning_rate": 8.846676677728039e-07, "loss": 0.4062, "step": 26509 }, { "epoch": 86.91803278688525, "grad_norm": 23.362462997436523, "learning_rate": 8.84231043186461e-07, "loss": 0.4479, "step": 26510 }, { "epoch": 86.92131147540984, "grad_norm": 5.155488014221191, "learning_rate": 8.837945213898924e-07, "loss": 0.398, "step": 26511 }, { "epoch": 86.92459016393443, "grad_norm": 5.098143100738525, "learning_rate": 8.833581023880178e-07, "loss": 0.3223, "step": 26512 }, { "epoch": 86.92786885245901, "grad_norm": 4.358339786529541, "learning_rate": 8.829217861857575e-07, "loss": 0.6512, "step": 26513 }, { "epoch": 86.9311475409836, "grad_norm": 4.2035088539123535, "learning_rate": 8.824855727880366e-07, "loss": 0.3012, "step": 26514 }, { "epoch": 86.93442622950819, "grad_norm": 4.2534589767456055, "learning_rate": 8.820494621997699e-07, "loss": 0.3891, "step": 26515 }, { "epoch": 86.9377049180328, "grad_norm": 8.978649139404297, "learning_rate": 8.816134544258748e-07, "loss": 0.4677, "step": 26516 }, { "epoch": 86.94098360655738, "grad_norm": 4.816768646240234, "learning_rate": 8.811775494712682e-07, "loss": 0.2242, "step": 26517 }, { "epoch": 86.94426229508197, "grad_norm": 4.766095161437988, "learning_rate": 8.807417473408675e-07, "loss": 0.4624, "step": 26518 }, { "epoch": 86.94754098360656, "grad_norm": 4.7971320152282715, "learning_rate": 8.803060480395853e-07, "loss": 0.5106, "step": 26519 }, { "epoch": 86.95081967213115, "grad_norm": 5.7096967697143555, "learning_rate": 8.798704515723344e-07, "loss": 0.3951, "step": 26520 }, { "epoch": 86.95409836065573, "grad_norm": 5.24428653717041, "learning_rate": 8.794349579440264e-07, "loss": 0.2581, "step": 26521 }, { "epoch": 86.95737704918032, "grad_norm": 4.091702938079834, "learning_rate": 8.789995671595708e-07, "loss": 0.4331, "step": 26522 }, { "epoch": 86.96065573770491, "grad_norm": 16.63051986694336, "learning_rate": 8.785642792238814e-07, "loss": 0.3032, "step": 26523 }, { "epoch": 86.96393442622951, "grad_norm": 5.2419514656066895, "learning_rate": 8.781290941418619e-07, "loss": 0.4163, "step": 26524 }, { "epoch": 86.9672131147541, "grad_norm": 4.402784824371338, "learning_rate": 8.776940119184219e-07, "loss": 0.3379, "step": 26525 }, { "epoch": 86.97049180327869, "grad_norm": 5.048679351806641, "learning_rate": 8.772590325584651e-07, "loss": 0.2021, "step": 26526 }, { "epoch": 86.97377049180328, "grad_norm": 4.344703197479248, "learning_rate": 8.768241560669e-07, "loss": 0.5621, "step": 26527 }, { "epoch": 86.97704918032787, "grad_norm": 5.433610916137695, "learning_rate": 8.76389382448628e-07, "loss": 0.2769, "step": 26528 }, { "epoch": 86.98032786885246, "grad_norm": 3.784315586090088, "learning_rate": 8.759547117085498e-07, "loss": 0.3379, "step": 26529 }, { "epoch": 86.98360655737704, "grad_norm": 5.029212474822998, "learning_rate": 8.755201438515703e-07, "loss": 0.4869, "step": 26530 }, { "epoch": 86.98688524590163, "grad_norm": 4.55474853515625, "learning_rate": 8.75085678882589e-07, "loss": 0.3417, "step": 26531 }, { "epoch": 86.99016393442623, "grad_norm": 4.321817398071289, "learning_rate": 8.746513168065019e-07, "loss": 0.214, "step": 26532 }, { "epoch": 86.99344262295082, "grad_norm": 3.853060007095337, "learning_rate": 8.742170576282116e-07, "loss": 0.3072, "step": 26533 }, { "epoch": 86.99672131147541, "grad_norm": 4.6966872215271, "learning_rate": 8.737829013526122e-07, "loss": 0.3377, "step": 26534 }, { "epoch": 87.0, "grad_norm": 4.607462406158447, "learning_rate": 8.733488479845997e-07, "loss": 0.3817, "step": 26535 }, { "epoch": 87.00327868852459, "grad_norm": 4.087272644042969, "learning_rate": 8.729148975290658e-07, "loss": 0.2606, "step": 26536 }, { "epoch": 87.00655737704918, "grad_norm": 4.465076923370361, "learning_rate": 8.724810499909087e-07, "loss": 0.3672, "step": 26537 }, { "epoch": 87.00983606557377, "grad_norm": 10.17539119720459, "learning_rate": 8.720473053750178e-07, "loss": 0.3972, "step": 26538 }, { "epoch": 87.01311475409837, "grad_norm": 4.633932590484619, "learning_rate": 8.716136636862815e-07, "loss": 0.2866, "step": 26539 }, { "epoch": 87.01639344262296, "grad_norm": 4.72941255569458, "learning_rate": 8.711801249295959e-07, "loss": 0.1982, "step": 26540 }, { "epoch": 87.01967213114754, "grad_norm": 4.721251010894775, "learning_rate": 8.707466891098449e-07, "loss": 0.4098, "step": 26541 }, { "epoch": 87.02295081967213, "grad_norm": 3.8962008953094482, "learning_rate": 8.703133562319166e-07, "loss": 0.4371, "step": 26542 }, { "epoch": 87.02622950819672, "grad_norm": 4.091716289520264, "learning_rate": 8.698801263006962e-07, "loss": 0.4972, "step": 26543 }, { "epoch": 87.02950819672131, "grad_norm": 3.5167994499206543, "learning_rate": 8.69446999321073e-07, "loss": 0.5255, "step": 26544 }, { "epoch": 87.0327868852459, "grad_norm": 4.5571513175964355, "learning_rate": 8.690139752979277e-07, "loss": 0.441, "step": 26545 }, { "epoch": 87.03606557377049, "grad_norm": 14.807920455932617, "learning_rate": 8.685810542361429e-07, "loss": 0.2622, "step": 26546 }, { "epoch": 87.03934426229509, "grad_norm": 6.3699469566345215, "learning_rate": 8.681482361406024e-07, "loss": 0.3165, "step": 26547 }, { "epoch": 87.04262295081968, "grad_norm": 4.500926971435547, "learning_rate": 8.677155210161825e-07, "loss": 0.2608, "step": 26548 }, { "epoch": 87.04590163934427, "grad_norm": 6.0002970695495605, "learning_rate": 8.67282908867767e-07, "loss": 0.5286, "step": 26549 }, { "epoch": 87.04918032786885, "grad_norm": 6.01070499420166, "learning_rate": 8.668503997002331e-07, "loss": 0.3412, "step": 26550 }, { "epoch": 87.05245901639344, "grad_norm": 23.978137969970703, "learning_rate": 8.664179935184569e-07, "loss": 0.3877, "step": 26551 }, { "epoch": 87.05573770491803, "grad_norm": 4.6005330085754395, "learning_rate": 8.659856903273123e-07, "loss": 0.3303, "step": 26552 }, { "epoch": 87.05901639344262, "grad_norm": 4.989238739013672, "learning_rate": 8.655534901316776e-07, "loss": 0.3643, "step": 26553 }, { "epoch": 87.0622950819672, "grad_norm": 4.448715686798096, "learning_rate": 8.651213929364244e-07, "loss": 0.3376, "step": 26554 }, { "epoch": 87.06557377049181, "grad_norm": 10.467167854309082, "learning_rate": 8.646893987464266e-07, "loss": 0.423, "step": 26555 }, { "epoch": 87.0688524590164, "grad_norm": 4.998315334320068, "learning_rate": 8.642575075665516e-07, "loss": 0.497, "step": 26556 }, { "epoch": 87.07213114754099, "grad_norm": 3.9090688228607178, "learning_rate": 8.638257194016741e-07, "loss": 0.3671, "step": 26557 }, { "epoch": 87.07540983606557, "grad_norm": 4.930181980133057, "learning_rate": 8.633940342566604e-07, "loss": 0.5788, "step": 26558 }, { "epoch": 87.07868852459016, "grad_norm": 5.437631130218506, "learning_rate": 8.629624521363789e-07, "loss": 0.3452, "step": 26559 }, { "epoch": 87.08196721311475, "grad_norm": 5.459812641143799, "learning_rate": 8.625309730456966e-07, "loss": 0.3962, "step": 26560 }, { "epoch": 87.08524590163934, "grad_norm": 4.86305570602417, "learning_rate": 8.620995969894752e-07, "loss": 0.4897, "step": 26561 }, { "epoch": 87.08852459016393, "grad_norm": 4.089590072631836, "learning_rate": 8.616683239725842e-07, "loss": 0.2907, "step": 26562 }, { "epoch": 87.09180327868853, "grad_norm": 3.9171204566955566, "learning_rate": 8.612371539998843e-07, "loss": 0.2914, "step": 26563 }, { "epoch": 87.09508196721312, "grad_norm": 3.2968733310699463, "learning_rate": 8.60806087076238e-07, "loss": 0.3018, "step": 26564 }, { "epoch": 87.09836065573771, "grad_norm": 5.314922332763672, "learning_rate": 8.603751232065027e-07, "loss": 0.3164, "step": 26565 }, { "epoch": 87.1016393442623, "grad_norm": 4.373353958129883, "learning_rate": 8.599442623955423e-07, "loss": 0.4634, "step": 26566 }, { "epoch": 87.10491803278688, "grad_norm": 5.33411169052124, "learning_rate": 8.59513504648215e-07, "loss": 0.4669, "step": 26567 }, { "epoch": 87.10819672131147, "grad_norm": 6.85769510269165, "learning_rate": 8.59082849969376e-07, "loss": 0.3727, "step": 26568 }, { "epoch": 87.11147540983606, "grad_norm": 7.098616600036621, "learning_rate": 8.586522983638801e-07, "loss": 0.5946, "step": 26569 }, { "epoch": 87.11475409836065, "grad_norm": 4.8000311851501465, "learning_rate": 8.582218498365857e-07, "loss": 0.3293, "step": 26570 }, { "epoch": 87.11803278688525, "grad_norm": 4.224015712738037, "learning_rate": 8.577915043923457e-07, "loss": 0.6346, "step": 26571 }, { "epoch": 87.12131147540984, "grad_norm": 4.376277923583984, "learning_rate": 8.573612620360106e-07, "loss": 0.3851, "step": 26572 }, { "epoch": 87.12459016393443, "grad_norm": 5.404353141784668, "learning_rate": 8.569311227724342e-07, "loss": 0.4147, "step": 26573 }, { "epoch": 87.12786885245902, "grad_norm": 9.922849655151367, "learning_rate": 8.565010866064649e-07, "loss": 0.2539, "step": 26574 }, { "epoch": 87.1311475409836, "grad_norm": 4.949301242828369, "learning_rate": 8.560711535429533e-07, "loss": 0.4238, "step": 26575 }, { "epoch": 87.1344262295082, "grad_norm": 5.331554412841797, "learning_rate": 8.556413235867467e-07, "loss": 0.4985, "step": 26576 }, { "epoch": 87.13770491803278, "grad_norm": 5.001744270324707, "learning_rate": 8.552115967426922e-07, "loss": 0.5488, "step": 26577 }, { "epoch": 87.14098360655737, "grad_norm": 5.029689311981201, "learning_rate": 8.547819730156337e-07, "loss": 0.3052, "step": 26578 }, { "epoch": 87.14426229508197, "grad_norm": 4.1619791984558105, "learning_rate": 8.543524524104185e-07, "loss": 0.2238, "step": 26579 }, { "epoch": 87.14754098360656, "grad_norm": 4.002050399780273, "learning_rate": 8.539230349318883e-07, "loss": 0.2754, "step": 26580 }, { "epoch": 87.15081967213115, "grad_norm": 11.116683959960938, "learning_rate": 8.534937205848859e-07, "loss": 0.2781, "step": 26581 }, { "epoch": 87.15409836065574, "grad_norm": 5.366584777832031, "learning_rate": 8.530645093742506e-07, "loss": 0.3908, "step": 26582 }, { "epoch": 87.15737704918033, "grad_norm": 6.605829238891602, "learning_rate": 8.526354013048244e-07, "loss": 0.3336, "step": 26583 }, { "epoch": 87.16065573770491, "grad_norm": 4.775393962860107, "learning_rate": 8.522063963814442e-07, "loss": 0.3174, "step": 26584 }, { "epoch": 87.1639344262295, "grad_norm": 5.2499613761901855, "learning_rate": 8.517774946089475e-07, "loss": 0.339, "step": 26585 }, { "epoch": 87.1672131147541, "grad_norm": 6.6185431480407715, "learning_rate": 8.513486959921724e-07, "loss": 0.5722, "step": 26586 }, { "epoch": 87.1704918032787, "grad_norm": 4.990525722503662, "learning_rate": 8.50920000535953e-07, "loss": 0.3549, "step": 26587 }, { "epoch": 87.17377049180328, "grad_norm": 4.34505558013916, "learning_rate": 8.504914082451221e-07, "loss": 0.4408, "step": 26588 }, { "epoch": 87.17704918032787, "grad_norm": 15.828421592712402, "learning_rate": 8.500629191245157e-07, "loss": 0.2367, "step": 26589 }, { "epoch": 87.18032786885246, "grad_norm": 3.874091386795044, "learning_rate": 8.496345331789624e-07, "loss": 0.2507, "step": 26590 }, { "epoch": 87.18360655737705, "grad_norm": 5.303983211517334, "learning_rate": 8.492062504132925e-07, "loss": 0.2776, "step": 26591 }, { "epoch": 87.18688524590164, "grad_norm": 5.165563106536865, "learning_rate": 8.487780708323379e-07, "loss": 0.4761, "step": 26592 }, { "epoch": 87.19016393442622, "grad_norm": 5.457754611968994, "learning_rate": 8.483499944409257e-07, "loss": 0.4744, "step": 26593 }, { "epoch": 87.19344262295083, "grad_norm": 5.160457611083984, "learning_rate": 8.479220212438832e-07, "loss": 0.4386, "step": 26594 }, { "epoch": 87.19672131147541, "grad_norm": 7.186300754547119, "learning_rate": 8.474941512460333e-07, "loss": 0.1979, "step": 26595 }, { "epoch": 87.2, "grad_norm": 3.901019811630249, "learning_rate": 8.470663844522053e-07, "loss": 0.4879, "step": 26596 }, { "epoch": 87.20327868852459, "grad_norm": 6.171709060668945, "learning_rate": 8.4663872086722e-07, "loss": 0.3066, "step": 26597 }, { "epoch": 87.20655737704918, "grad_norm": 4.309901714324951, "learning_rate": 8.462111604959e-07, "loss": 0.1709, "step": 26598 }, { "epoch": 87.20983606557377, "grad_norm": 7.91340970993042, "learning_rate": 8.457837033430672e-07, "loss": 0.4608, "step": 26599 }, { "epoch": 87.21311475409836, "grad_norm": 5.667435646057129, "learning_rate": 8.453563494135397e-07, "loss": 0.3153, "step": 26600 }, { "epoch": 87.21639344262294, "grad_norm": 5.07412576675415, "learning_rate": 8.449290987121395e-07, "loss": 0.519, "step": 26601 }, { "epoch": 87.21967213114755, "grad_norm": 4.357193946838379, "learning_rate": 8.445019512436814e-07, "loss": 0.1636, "step": 26602 }, { "epoch": 87.22295081967214, "grad_norm": 4.424288749694824, "learning_rate": 8.440749070129839e-07, "loss": 0.4627, "step": 26603 }, { "epoch": 87.22622950819672, "grad_norm": 6.790322303771973, "learning_rate": 8.436479660248608e-07, "loss": 0.2519, "step": 26604 }, { "epoch": 87.22950819672131, "grad_norm": 5.205329895019531, "learning_rate": 8.432211282841274e-07, "loss": 0.4591, "step": 26605 }, { "epoch": 87.2327868852459, "grad_norm": 3.9311323165893555, "learning_rate": 8.427943937955974e-07, "loss": 0.216, "step": 26606 }, { "epoch": 87.23606557377049, "grad_norm": 4.730581760406494, "learning_rate": 8.423677625640814e-07, "loss": 0.5432, "step": 26607 }, { "epoch": 87.23934426229508, "grad_norm": 4.87673807144165, "learning_rate": 8.41941234594389e-07, "loss": 0.407, "step": 26608 }, { "epoch": 87.24262295081967, "grad_norm": 5.041359901428223, "learning_rate": 8.415148098913318e-07, "loss": 0.4067, "step": 26609 }, { "epoch": 87.24590163934427, "grad_norm": 6.842828273773193, "learning_rate": 8.410884884597182e-07, "loss": 0.3656, "step": 26610 }, { "epoch": 87.24918032786886, "grad_norm": 10.456765174865723, "learning_rate": 8.406622703043554e-07, "loss": 0.389, "step": 26611 }, { "epoch": 87.25245901639344, "grad_norm": 4.3765645027160645, "learning_rate": 8.402361554300475e-07, "loss": 0.3239, "step": 26612 }, { "epoch": 87.25573770491803, "grad_norm": 4.735754013061523, "learning_rate": 8.398101438416007e-07, "loss": 0.4374, "step": 26613 }, { "epoch": 87.25901639344262, "grad_norm": 6.451127052307129, "learning_rate": 8.393842355438186e-07, "loss": 0.2637, "step": 26614 }, { "epoch": 87.26229508196721, "grad_norm": 4.003920555114746, "learning_rate": 8.389584305415055e-07, "loss": 0.3899, "step": 26615 }, { "epoch": 87.2655737704918, "grad_norm": 5.674853324890137, "learning_rate": 8.385327288394607e-07, "loss": 0.3754, "step": 26616 }, { "epoch": 87.26885245901639, "grad_norm": 4.24168586730957, "learning_rate": 8.381071304424826e-07, "loss": 0.3487, "step": 26617 }, { "epoch": 87.27213114754099, "grad_norm": 7.170236587524414, "learning_rate": 8.376816353553751e-07, "loss": 0.2594, "step": 26618 }, { "epoch": 87.27540983606558, "grad_norm": 3.803853750228882, "learning_rate": 8.372562435829335e-07, "loss": 0.2268, "step": 26619 }, { "epoch": 87.27868852459017, "grad_norm": 6.276350498199463, "learning_rate": 8.368309551299536e-07, "loss": 0.5367, "step": 26620 }, { "epoch": 87.28196721311475, "grad_norm": 5.275786399841309, "learning_rate": 8.364057700012318e-07, "loss": 0.5914, "step": 26621 }, { "epoch": 87.28524590163934, "grad_norm": 4.419055938720703, "learning_rate": 8.359806882015631e-07, "loss": 0.3275, "step": 26622 }, { "epoch": 87.28852459016393, "grad_norm": 4.800491809844971, "learning_rate": 8.355557097357414e-07, "loss": 0.3959, "step": 26623 }, { "epoch": 87.29180327868852, "grad_norm": 4.520431995391846, "learning_rate": 8.351308346085562e-07, "loss": 0.2061, "step": 26624 }, { "epoch": 87.29508196721312, "grad_norm": 4.880702495574951, "learning_rate": 8.347060628248016e-07, "loss": 0.5398, "step": 26625 }, { "epoch": 87.29836065573771, "grad_norm": 3.884854555130005, "learning_rate": 8.342813943892625e-07, "loss": 0.1177, "step": 26626 }, { "epoch": 87.3016393442623, "grad_norm": 4.386462211608887, "learning_rate": 8.33856829306734e-07, "loss": 0.2894, "step": 26627 }, { "epoch": 87.30491803278689, "grad_norm": 5.169862270355225, "learning_rate": 8.334323675819989e-07, "loss": 0.3845, "step": 26628 }, { "epoch": 87.30819672131148, "grad_norm": 5.598898887634277, "learning_rate": 8.330080092198445e-07, "loss": 0.3246, "step": 26629 }, { "epoch": 87.31147540983606, "grad_norm": 4.537067890167236, "learning_rate": 8.325837542250548e-07, "loss": 0.3204, "step": 26630 }, { "epoch": 87.31475409836065, "grad_norm": 3.985062837600708, "learning_rate": 8.32159602602417e-07, "loss": 0.4777, "step": 26631 }, { "epoch": 87.31803278688524, "grad_norm": 5.481588363647461, "learning_rate": 8.317355543567119e-07, "loss": 0.3528, "step": 26632 }, { "epoch": 87.32131147540984, "grad_norm": 7.4755024909973145, "learning_rate": 8.313116094927209e-07, "loss": 0.5604, "step": 26633 }, { "epoch": 87.32459016393443, "grad_norm": 5.895224571228027, "learning_rate": 8.308877680152227e-07, "loss": 0.4086, "step": 26634 }, { "epoch": 87.32786885245902, "grad_norm": 4.8337202072143555, "learning_rate": 8.304640299290001e-07, "loss": 0.4692, "step": 26635 }, { "epoch": 87.33114754098361, "grad_norm": 3.1787848472595215, "learning_rate": 8.300403952388292e-07, "loss": 0.1844, "step": 26636 }, { "epoch": 87.3344262295082, "grad_norm": 5.562046051025391, "learning_rate": 8.296168639494872e-07, "loss": 0.6905, "step": 26637 }, { "epoch": 87.33770491803278, "grad_norm": 5.506570339202881, "learning_rate": 8.291934360657494e-07, "loss": 0.422, "step": 26638 }, { "epoch": 87.34098360655737, "grad_norm": 4.239671230316162, "learning_rate": 8.287701115923907e-07, "loss": 0.232, "step": 26639 }, { "epoch": 87.34426229508196, "grad_norm": 3.6869428157806396, "learning_rate": 8.283468905341862e-07, "loss": 0.4464, "step": 26640 }, { "epoch": 87.34754098360656, "grad_norm": 5.009697437286377, "learning_rate": 8.279237728959044e-07, "loss": 0.2036, "step": 26641 }, { "epoch": 87.35081967213115, "grad_norm": 4.593070030212402, "learning_rate": 8.275007586823203e-07, "loss": 0.27, "step": 26642 }, { "epoch": 87.35409836065574, "grad_norm": 4.773484230041504, "learning_rate": 8.270778478982022e-07, "loss": 0.398, "step": 26643 }, { "epoch": 87.35737704918033, "grad_norm": 5.493470191955566, "learning_rate": 8.266550405483164e-07, "loss": 0.3428, "step": 26644 }, { "epoch": 87.36065573770492, "grad_norm": 4.594645023345947, "learning_rate": 8.262323366374358e-07, "loss": 0.6046, "step": 26645 }, { "epoch": 87.3639344262295, "grad_norm": 4.205593585968018, "learning_rate": 8.258097361703232e-07, "loss": 0.5205, "step": 26646 }, { "epoch": 87.3672131147541, "grad_norm": 4.024164199829102, "learning_rate": 8.253872391517426e-07, "loss": 0.3368, "step": 26647 }, { "epoch": 87.37049180327868, "grad_norm": 4.89554500579834, "learning_rate": 8.249648455864623e-07, "loss": 0.2992, "step": 26648 }, { "epoch": 87.37377049180328, "grad_norm": 5.0186662673950195, "learning_rate": 8.24542555479243e-07, "loss": 0.4043, "step": 26649 }, { "epoch": 87.37704918032787, "grad_norm": 5.576961994171143, "learning_rate": 8.241203688348464e-07, "loss": 0.4598, "step": 26650 }, { "epoch": 87.38032786885246, "grad_norm": 5.151280403137207, "learning_rate": 8.236982856580333e-07, "loss": 0.3959, "step": 26651 }, { "epoch": 87.38360655737705, "grad_norm": 5.243416786193848, "learning_rate": 8.232763059535609e-07, "loss": 0.5821, "step": 26652 }, { "epoch": 87.38688524590164, "grad_norm": 5.427786350250244, "learning_rate": 8.22854429726192e-07, "loss": 0.3781, "step": 26653 }, { "epoch": 87.39016393442623, "grad_norm": 4.815378665924072, "learning_rate": 8.224326569806806e-07, "loss": 0.4961, "step": 26654 }, { "epoch": 87.39344262295081, "grad_norm": 7.64705753326416, "learning_rate": 8.220109877217842e-07, "loss": 0.5425, "step": 26655 }, { "epoch": 87.3967213114754, "grad_norm": 5.7765607833862305, "learning_rate": 8.215894219542541e-07, "loss": 0.3523, "step": 26656 }, { "epoch": 87.4, "grad_norm": 6.547125339508057, "learning_rate": 8.211679596828481e-07, "loss": 0.4474, "step": 26657 }, { "epoch": 87.4032786885246, "grad_norm": 4.875455856323242, "learning_rate": 8.207466009123166e-07, "loss": 0.4021, "step": 26658 }, { "epoch": 87.40655737704918, "grad_norm": 5.507096290588379, "learning_rate": 8.203253456474114e-07, "loss": 0.4334, "step": 26659 }, { "epoch": 87.40983606557377, "grad_norm": 4.833586692810059, "learning_rate": 8.199041938928809e-07, "loss": 0.2275, "step": 26660 }, { "epoch": 87.41311475409836, "grad_norm": 3.3942925930023193, "learning_rate": 8.19483145653478e-07, "loss": 0.2083, "step": 26661 }, { "epoch": 87.41639344262295, "grad_norm": 8.95760440826416, "learning_rate": 8.190622009339466e-07, "loss": 0.1942, "step": 26662 }, { "epoch": 87.41967213114754, "grad_norm": 6.415674686431885, "learning_rate": 8.186413597390353e-07, "loss": 0.2916, "step": 26663 }, { "epoch": 87.42295081967212, "grad_norm": 4.830106735229492, "learning_rate": 8.182206220734889e-07, "loss": 0.3354, "step": 26664 }, { "epoch": 87.42622950819673, "grad_norm": 5.278558254241943, "learning_rate": 8.177999879420507e-07, "loss": 0.5078, "step": 26665 }, { "epoch": 87.42950819672132, "grad_norm": 5.323668003082275, "learning_rate": 8.173794573494654e-07, "loss": 0.4065, "step": 26666 }, { "epoch": 87.4327868852459, "grad_norm": 5.310962200164795, "learning_rate": 8.169590303004749e-07, "loss": 0.3161, "step": 26667 }, { "epoch": 87.43606557377049, "grad_norm": 6.912222385406494, "learning_rate": 8.165387067998187e-07, "loss": 0.2591, "step": 26668 }, { "epoch": 87.43934426229508, "grad_norm": 4.947505950927734, "learning_rate": 8.161184868522354e-07, "loss": 0.2596, "step": 26669 }, { "epoch": 87.44262295081967, "grad_norm": 5.145961761474609, "learning_rate": 8.156983704624665e-07, "loss": 0.3737, "step": 26670 }, { "epoch": 87.44590163934426, "grad_norm": 5.230339527130127, "learning_rate": 8.152783576352486e-07, "loss": 0.2857, "step": 26671 }, { "epoch": 87.44918032786886, "grad_norm": 5.927516937255859, "learning_rate": 8.148584483753163e-07, "loss": 0.3772, "step": 26672 }, { "epoch": 87.45245901639345, "grad_norm": 4.816779136657715, "learning_rate": 8.144386426874029e-07, "loss": 0.4522, "step": 26673 }, { "epoch": 87.45573770491804, "grad_norm": 4.988564968109131, "learning_rate": 8.140189405762478e-07, "loss": 0.368, "step": 26674 }, { "epoch": 87.45901639344262, "grad_norm": 3.7704269886016846, "learning_rate": 8.135993420465782e-07, "loss": 0.3848, "step": 26675 }, { "epoch": 87.46229508196721, "grad_norm": 3.703369140625, "learning_rate": 8.131798471031294e-07, "loss": 0.2753, "step": 26676 }, { "epoch": 87.4655737704918, "grad_norm": 3.8515264987945557, "learning_rate": 8.127604557506285e-07, "loss": 0.2947, "step": 26677 }, { "epoch": 87.46885245901639, "grad_norm": 4.510996341705322, "learning_rate": 8.123411679938043e-07, "loss": 0.3178, "step": 26678 }, { "epoch": 87.47213114754098, "grad_norm": 4.422416687011719, "learning_rate": 8.119219838373871e-07, "loss": 0.3054, "step": 26679 }, { "epoch": 87.47540983606558, "grad_norm": 15.05803394317627, "learning_rate": 8.115029032861044e-07, "loss": 0.4863, "step": 26680 }, { "epoch": 87.47868852459017, "grad_norm": 4.52681303024292, "learning_rate": 8.110839263446791e-07, "loss": 0.2862, "step": 26681 }, { "epoch": 87.48196721311476, "grad_norm": 4.6000542640686035, "learning_rate": 8.10665053017835e-07, "loss": 0.194, "step": 26682 }, { "epoch": 87.48524590163935, "grad_norm": 3.743159294128418, "learning_rate": 8.102462833102986e-07, "loss": 0.2674, "step": 26683 }, { "epoch": 87.48852459016393, "grad_norm": 3.5246379375457764, "learning_rate": 8.098276172267905e-07, "loss": 0.2945, "step": 26684 }, { "epoch": 87.49180327868852, "grad_norm": 8.279446601867676, "learning_rate": 8.094090547720312e-07, "loss": 0.4027, "step": 26685 }, { "epoch": 87.49508196721311, "grad_norm": 4.07603645324707, "learning_rate": 8.089905959507394e-07, "loss": 0.5393, "step": 26686 }, { "epoch": 87.4983606557377, "grad_norm": 11.572628021240234, "learning_rate": 8.085722407676355e-07, "loss": 0.26, "step": 26687 }, { "epoch": 87.5016393442623, "grad_norm": 4.120908737182617, "learning_rate": 8.08153989227437e-07, "loss": 0.1609, "step": 26688 }, { "epoch": 87.50491803278689, "grad_norm": 4.7778639793396, "learning_rate": 8.077358413348602e-07, "loss": 0.1999, "step": 26689 }, { "epoch": 87.50819672131148, "grad_norm": 4.780117988586426, "learning_rate": 8.073177970946167e-07, "loss": 0.3421, "step": 26690 }, { "epoch": 87.51147540983607, "grad_norm": 4.225348949432373, "learning_rate": 8.06899856511425e-07, "loss": 0.3395, "step": 26691 }, { "epoch": 87.51475409836065, "grad_norm": 5.376717567443848, "learning_rate": 8.064820195899958e-07, "loss": 0.258, "step": 26692 }, { "epoch": 87.51803278688524, "grad_norm": 8.205437660217285, "learning_rate": 8.060642863350387e-07, "loss": 0.3371, "step": 26693 }, { "epoch": 87.52131147540983, "grad_norm": 6.6598381996154785, "learning_rate": 8.056466567512677e-07, "loss": 0.326, "step": 26694 }, { "epoch": 87.52459016393442, "grad_norm": 4.3522210121154785, "learning_rate": 8.052291308433901e-07, "loss": 0.3061, "step": 26695 }, { "epoch": 87.52786885245902, "grad_norm": 5.214644908905029, "learning_rate": 8.048117086161134e-07, "loss": 0.4715, "step": 26696 }, { "epoch": 87.53114754098361, "grad_norm": 5.780490875244141, "learning_rate": 8.043943900741469e-07, "loss": 0.2687, "step": 26697 }, { "epoch": 87.5344262295082, "grad_norm": 4.378376007080078, "learning_rate": 8.039771752221948e-07, "loss": 0.2604, "step": 26698 }, { "epoch": 87.53770491803279, "grad_norm": 5.240731239318848, "learning_rate": 8.03560064064961e-07, "loss": 0.3939, "step": 26699 }, { "epoch": 87.54098360655738, "grad_norm": 10.45470142364502, "learning_rate": 8.031430566071474e-07, "loss": 0.4, "step": 26700 }, { "epoch": 87.54426229508196, "grad_norm": 4.401015758514404, "learning_rate": 8.027261528534602e-07, "loss": 0.2125, "step": 26701 }, { "epoch": 87.54754098360655, "grad_norm": 5.446710109710693, "learning_rate": 8.02309352808599e-07, "loss": 0.378, "step": 26702 }, { "epoch": 87.55081967213114, "grad_norm": 7.281991004943848, "learning_rate": 8.018926564772622e-07, "loss": 0.4115, "step": 26703 }, { "epoch": 87.55409836065574, "grad_norm": 6.1483964920043945, "learning_rate": 8.014760638641484e-07, "loss": 0.3658, "step": 26704 }, { "epoch": 87.55737704918033, "grad_norm": 4.069131851196289, "learning_rate": 8.01059574973957e-07, "loss": 0.5859, "step": 26705 }, { "epoch": 87.56065573770492, "grad_norm": 5.183969974517822, "learning_rate": 8.006431898113843e-07, "loss": 0.4037, "step": 26706 }, { "epoch": 87.56393442622951, "grad_norm": 5.349430561065674, "learning_rate": 8.002269083811232e-07, "loss": 0.4433, "step": 26707 }, { "epoch": 87.5672131147541, "grad_norm": 4.608757019042969, "learning_rate": 7.998107306878688e-07, "loss": 0.407, "step": 26708 }, { "epoch": 87.57049180327868, "grad_norm": 4.373920440673828, "learning_rate": 7.993946567363154e-07, "loss": 0.4462, "step": 26709 }, { "epoch": 87.57377049180327, "grad_norm": 4.476964950561523, "learning_rate": 7.989786865311533e-07, "loss": 0.4674, "step": 26710 }, { "epoch": 87.57704918032788, "grad_norm": 4.082366943359375, "learning_rate": 7.985628200770724e-07, "loss": 0.4426, "step": 26711 }, { "epoch": 87.58032786885246, "grad_norm": 4.817340850830078, "learning_rate": 7.98147057378762e-07, "loss": 0.3411, "step": 26712 }, { "epoch": 87.58360655737705, "grad_norm": 4.672024250030518, "learning_rate": 7.977313984409129e-07, "loss": 0.3452, "step": 26713 }, { "epoch": 87.58688524590164, "grad_norm": 4.123124599456787, "learning_rate": 7.973158432682104e-07, "loss": 0.317, "step": 26714 }, { "epoch": 87.59016393442623, "grad_norm": 5.574371337890625, "learning_rate": 7.969003918653395e-07, "loss": 0.2834, "step": 26715 }, { "epoch": 87.59344262295082, "grad_norm": 4.797244548797607, "learning_rate": 7.964850442369854e-07, "loss": 0.1665, "step": 26716 }, { "epoch": 87.5967213114754, "grad_norm": 4.786236763000488, "learning_rate": 7.960698003878309e-07, "loss": 0.1305, "step": 26717 }, { "epoch": 87.6, "grad_norm": 6.359014987945557, "learning_rate": 7.956546603225601e-07, "loss": 0.677, "step": 26718 }, { "epoch": 87.6032786885246, "grad_norm": 4.468616485595703, "learning_rate": 7.952396240458538e-07, "loss": 0.275, "step": 26719 }, { "epoch": 87.60655737704919, "grad_norm": 5.1010661125183105, "learning_rate": 7.948246915623903e-07, "loss": 0.343, "step": 26720 }, { "epoch": 87.60983606557377, "grad_norm": 3.6661438941955566, "learning_rate": 7.944098628768481e-07, "loss": 0.2371, "step": 26721 }, { "epoch": 87.61311475409836, "grad_norm": 4.7787861824035645, "learning_rate": 7.939951379939081e-07, "loss": 0.3295, "step": 26722 }, { "epoch": 87.61639344262295, "grad_norm": 6.448612213134766, "learning_rate": 7.935805169182442e-07, "loss": 0.3197, "step": 26723 }, { "epoch": 87.61967213114754, "grad_norm": 3.6039090156555176, "learning_rate": 7.931659996545326e-07, "loss": 0.3036, "step": 26724 }, { "epoch": 87.62295081967213, "grad_norm": 4.65488338470459, "learning_rate": 7.927515862074453e-07, "loss": 0.4858, "step": 26725 }, { "epoch": 87.62622950819672, "grad_norm": 4.1100382804870605, "learning_rate": 7.923372765816584e-07, "loss": 0.2959, "step": 26726 }, { "epoch": 87.62950819672132, "grad_norm": 4.701299667358398, "learning_rate": 7.919230707818426e-07, "loss": 0.4174, "step": 26727 }, { "epoch": 87.6327868852459, "grad_norm": 10.690401077270508, "learning_rate": 7.915089688126687e-07, "loss": 0.4215, "step": 26728 }, { "epoch": 87.6360655737705, "grad_norm": 4.073456287384033, "learning_rate": 7.910949706788051e-07, "loss": 0.2363, "step": 26729 }, { "epoch": 87.63934426229508, "grad_norm": 5.0393853187561035, "learning_rate": 7.906810763849182e-07, "loss": 0.5852, "step": 26730 }, { "epoch": 87.64262295081967, "grad_norm": 7.059525012969971, "learning_rate": 7.902672859356808e-07, "loss": 0.2502, "step": 26731 }, { "epoch": 87.64590163934426, "grad_norm": 4.719656944274902, "learning_rate": 7.898535993357537e-07, "loss": 0.2607, "step": 26732 }, { "epoch": 87.64918032786885, "grad_norm": 4.751197338104248, "learning_rate": 7.894400165898042e-07, "loss": 0.3824, "step": 26733 }, { "epoch": 87.65245901639344, "grad_norm": 4.826012134552002, "learning_rate": 7.890265377024942e-07, "loss": 0.4405, "step": 26734 }, { "epoch": 87.65573770491804, "grad_norm": 18.573522567749023, "learning_rate": 7.886131626784876e-07, "loss": 0.3275, "step": 26735 }, { "epoch": 87.65901639344263, "grad_norm": 4.051462173461914, "learning_rate": 7.881998915224453e-07, "loss": 0.3683, "step": 26736 }, { "epoch": 87.66229508196722, "grad_norm": 4.907174587249756, "learning_rate": 7.877867242390269e-07, "loss": 0.4565, "step": 26737 }, { "epoch": 87.6655737704918, "grad_norm": 5.341179847717285, "learning_rate": 7.873736608328896e-07, "loss": 0.3959, "step": 26738 }, { "epoch": 87.66885245901639, "grad_norm": 5.098751544952393, "learning_rate": 7.869607013086955e-07, "loss": 0.4608, "step": 26739 }, { "epoch": 87.67213114754098, "grad_norm": 5.343831539154053, "learning_rate": 7.865478456710984e-07, "loss": 0.4559, "step": 26740 }, { "epoch": 87.67540983606557, "grad_norm": 4.638710021972656, "learning_rate": 7.861350939247536e-07, "loss": 0.2767, "step": 26741 }, { "epoch": 87.67868852459016, "grad_norm": 6.472879409790039, "learning_rate": 7.857224460743163e-07, "loss": 0.274, "step": 26742 }, { "epoch": 87.68196721311476, "grad_norm": 5.5648345947265625, "learning_rate": 7.85309902124436e-07, "loss": 0.1714, "step": 26743 }, { "epoch": 87.68524590163935, "grad_norm": 4.388840675354004, "learning_rate": 7.848974620797701e-07, "loss": 0.3606, "step": 26744 }, { "epoch": 87.68852459016394, "grad_norm": 5.261516571044922, "learning_rate": 7.844851259449659e-07, "loss": 0.4665, "step": 26745 }, { "epoch": 87.69180327868852, "grad_norm": 4.35664176940918, "learning_rate": 7.840728937246733e-07, "loss": 0.307, "step": 26746 }, { "epoch": 87.69508196721311, "grad_norm": 4.4089741706848145, "learning_rate": 7.836607654235418e-07, "loss": 0.3235, "step": 26747 }, { "epoch": 87.6983606557377, "grad_norm": 4.844711780548096, "learning_rate": 7.832487410462175e-07, "loss": 0.2427, "step": 26748 }, { "epoch": 87.70163934426229, "grad_norm": 4.13380765914917, "learning_rate": 7.828368205973447e-07, "loss": 0.4617, "step": 26749 }, { "epoch": 87.70491803278688, "grad_norm": 10.367474555969238, "learning_rate": 7.824250040815729e-07, "loss": 0.4318, "step": 26750 }, { "epoch": 87.70819672131148, "grad_norm": 5.161655426025391, "learning_rate": 7.820132915035428e-07, "loss": 0.3569, "step": 26751 }, { "epoch": 87.71147540983607, "grad_norm": 4.322249412536621, "learning_rate": 7.816016828678952e-07, "loss": 0.306, "step": 26752 }, { "epoch": 87.71475409836066, "grad_norm": 4.6946587562561035, "learning_rate": 7.811901781792741e-07, "loss": 0.2497, "step": 26753 }, { "epoch": 87.71803278688525, "grad_norm": 5.508293628692627, "learning_rate": 7.807787774423204e-07, "loss": 0.3323, "step": 26754 }, { "epoch": 87.72131147540983, "grad_norm": 3.9930944442749023, "learning_rate": 7.803674806616712e-07, "loss": 0.2688, "step": 26755 }, { "epoch": 87.72459016393442, "grad_norm": 5.665664196014404, "learning_rate": 7.79956287841962e-07, "loss": 0.3876, "step": 26756 }, { "epoch": 87.72786885245901, "grad_norm": 5.932664394378662, "learning_rate": 7.795451989878355e-07, "loss": 0.4465, "step": 26757 }, { "epoch": 87.73114754098361, "grad_norm": 5.017866611480713, "learning_rate": 7.791342141039227e-07, "loss": 0.5482, "step": 26758 }, { "epoch": 87.7344262295082, "grad_norm": 5.486343860626221, "learning_rate": 7.787233331948584e-07, "loss": 0.3372, "step": 26759 }, { "epoch": 87.73770491803279, "grad_norm": 5.62187385559082, "learning_rate": 7.78312556265276e-07, "loss": 0.4718, "step": 26760 }, { "epoch": 87.74098360655738, "grad_norm": 6.535943031311035, "learning_rate": 7.779018833198082e-07, "loss": 0.5075, "step": 26761 }, { "epoch": 87.74426229508197, "grad_norm": 9.447089195251465, "learning_rate": 7.774913143630858e-07, "loss": 0.2392, "step": 26762 }, { "epoch": 87.74754098360656, "grad_norm": 4.874966621398926, "learning_rate": 7.770808493997372e-07, "loss": 0.4991, "step": 26763 }, { "epoch": 87.75081967213114, "grad_norm": 5.663644313812256, "learning_rate": 7.7667048843439e-07, "loss": 0.3673, "step": 26764 }, { "epoch": 87.75409836065573, "grad_norm": 4.882282257080078, "learning_rate": 7.76260231471675e-07, "loss": 0.5829, "step": 26765 }, { "epoch": 87.75737704918033, "grad_norm": 4.909481048583984, "learning_rate": 7.75850078516216e-07, "loss": 0.615, "step": 26766 }, { "epoch": 87.76065573770492, "grad_norm": 4.256840229034424, "learning_rate": 7.754400295726383e-07, "loss": 0.4048, "step": 26767 }, { "epoch": 87.76393442622951, "grad_norm": 4.6183929443359375, "learning_rate": 7.75030084645565e-07, "loss": 0.3706, "step": 26768 }, { "epoch": 87.7672131147541, "grad_norm": 5.099254131317139, "learning_rate": 7.746202437396178e-07, "loss": 0.2482, "step": 26769 }, { "epoch": 87.77049180327869, "grad_norm": 5.21540641784668, "learning_rate": 7.742105068594208e-07, "loss": 0.3508, "step": 26770 }, { "epoch": 87.77377049180328, "grad_norm": 4.998854160308838, "learning_rate": 7.738008740095925e-07, "loss": 0.4101, "step": 26771 }, { "epoch": 87.77704918032786, "grad_norm": 4.518822193145752, "learning_rate": 7.733913451947528e-07, "loss": 0.3272, "step": 26772 }, { "epoch": 87.78032786885245, "grad_norm": 5.5944976806640625, "learning_rate": 7.729819204195166e-07, "loss": 0.3386, "step": 26773 }, { "epoch": 87.78360655737706, "grad_norm": 4.114341735839844, "learning_rate": 7.725725996885047e-07, "loss": 0.1289, "step": 26774 }, { "epoch": 87.78688524590164, "grad_norm": 4.189088821411133, "learning_rate": 7.721633830063313e-07, "loss": 0.2308, "step": 26775 }, { "epoch": 87.79016393442623, "grad_norm": 4.6214680671691895, "learning_rate": 7.717542703776105e-07, "loss": 0.2068, "step": 26776 }, { "epoch": 87.79344262295082, "grad_norm": 4.109823226928711, "learning_rate": 7.713452618069528e-07, "loss": 0.6638, "step": 26777 }, { "epoch": 87.79672131147541, "grad_norm": 5.418787479400635, "learning_rate": 7.709363572989747e-07, "loss": 0.3075, "step": 26778 }, { "epoch": 87.8, "grad_norm": 4.209824562072754, "learning_rate": 7.705275568582848e-07, "loss": 0.2537, "step": 26779 }, { "epoch": 87.80327868852459, "grad_norm": 5.915404796600342, "learning_rate": 7.701188604894927e-07, "loss": 0.4237, "step": 26780 }, { "epoch": 87.80655737704917, "grad_norm": 4.183737754821777, "learning_rate": 7.69710268197208e-07, "loss": 0.2235, "step": 26781 }, { "epoch": 87.80983606557378, "grad_norm": 6.647652626037598, "learning_rate": 7.693017799860347e-07, "loss": 0.5054, "step": 26782 }, { "epoch": 87.81311475409836, "grad_norm": 6.638562202453613, "learning_rate": 7.688933958605837e-07, "loss": 0.5118, "step": 26783 }, { "epoch": 87.81639344262295, "grad_norm": 4.963425159454346, "learning_rate": 7.684851158254569e-07, "loss": 0.7205, "step": 26784 }, { "epoch": 87.81967213114754, "grad_norm": 5.7501020431518555, "learning_rate": 7.680769398852594e-07, "loss": 0.3977, "step": 26785 }, { "epoch": 87.82295081967213, "grad_norm": 5.10010290145874, "learning_rate": 7.67668868044591e-07, "loss": 0.4882, "step": 26786 }, { "epoch": 87.82622950819672, "grad_norm": 14.734933853149414, "learning_rate": 7.672609003080578e-07, "loss": 0.3535, "step": 26787 }, { "epoch": 87.8295081967213, "grad_norm": 4.640099048614502, "learning_rate": 7.668530366802562e-07, "loss": 0.6506, "step": 26788 }, { "epoch": 87.8327868852459, "grad_norm": 7.490758895874023, "learning_rate": 7.664452771657882e-07, "loss": 0.5573, "step": 26789 }, { "epoch": 87.8360655737705, "grad_norm": 4.732056140899658, "learning_rate": 7.660376217692477e-07, "loss": 0.2954, "step": 26790 }, { "epoch": 87.83934426229509, "grad_norm": 4.325979709625244, "learning_rate": 7.656300704952358e-07, "loss": 0.3453, "step": 26791 }, { "epoch": 87.84262295081967, "grad_norm": 6.459318161010742, "learning_rate": 7.652226233483462e-07, "loss": 0.116, "step": 26792 }, { "epoch": 87.84590163934426, "grad_norm": 5.2942705154418945, "learning_rate": 7.648152803331732e-07, "loss": 0.3327, "step": 26793 }, { "epoch": 87.84918032786885, "grad_norm": 14.205565452575684, "learning_rate": 7.644080414543098e-07, "loss": 0.3469, "step": 26794 }, { "epoch": 87.85245901639344, "grad_norm": 6.657988548278809, "learning_rate": 7.640009067163468e-07, "loss": 0.5973, "step": 26795 }, { "epoch": 87.85573770491803, "grad_norm": 5.217187404632568, "learning_rate": 7.635938761238781e-07, "loss": 0.1353, "step": 26796 }, { "epoch": 87.85901639344263, "grad_norm": 4.250675201416016, "learning_rate": 7.631869496814926e-07, "loss": 0.2286, "step": 26797 }, { "epoch": 87.86229508196722, "grad_norm": 5.779541492462158, "learning_rate": 7.627801273937762e-07, "loss": 0.4283, "step": 26798 }, { "epoch": 87.8655737704918, "grad_norm": 7.079283714294434, "learning_rate": 7.623734092653201e-07, "loss": 0.2961, "step": 26799 }, { "epoch": 87.8688524590164, "grad_norm": 6.943876266479492, "learning_rate": 7.619667953007081e-07, "loss": 0.341, "step": 26800 }, { "epoch": 87.87213114754098, "grad_norm": 5.080809593200684, "learning_rate": 7.615602855045256e-07, "loss": 0.3871, "step": 26801 }, { "epoch": 87.87540983606557, "grad_norm": 5.228437423706055, "learning_rate": 7.611538798813545e-07, "loss": 0.4584, "step": 26802 }, { "epoch": 87.87868852459016, "grad_norm": 4.329170227050781, "learning_rate": 7.60747578435782e-07, "loss": 0.3473, "step": 26803 }, { "epoch": 87.88196721311475, "grad_norm": 5.1718034744262695, "learning_rate": 7.603413811723858e-07, "loss": 0.4835, "step": 26804 }, { "epoch": 87.88524590163935, "grad_norm": 6.929427146911621, "learning_rate": 7.599352880957467e-07, "loss": 0.3541, "step": 26805 }, { "epoch": 87.88852459016394, "grad_norm": 6.192427635192871, "learning_rate": 7.595292992104453e-07, "loss": 0.4001, "step": 26806 }, { "epoch": 87.89180327868853, "grad_norm": 5.7568769454956055, "learning_rate": 7.591234145210591e-07, "loss": 0.3068, "step": 26807 }, { "epoch": 87.89508196721312, "grad_norm": 5.701220512390137, "learning_rate": 7.587176340321634e-07, "loss": 0.3566, "step": 26808 }, { "epoch": 87.8983606557377, "grad_norm": 6.240152359008789, "learning_rate": 7.583119577483356e-07, "loss": 0.4248, "step": 26809 }, { "epoch": 87.90163934426229, "grad_norm": 5.7952728271484375, "learning_rate": 7.579063856741498e-07, "loss": 0.2696, "step": 26810 }, { "epoch": 87.90491803278688, "grad_norm": 5.466262340545654, "learning_rate": 7.57500917814179e-07, "loss": 0.3313, "step": 26811 }, { "epoch": 87.90819672131147, "grad_norm": 5.785993576049805, "learning_rate": 7.570955541729941e-07, "loss": 0.342, "step": 26812 }, { "epoch": 87.91147540983607, "grad_norm": 5.527697563171387, "learning_rate": 7.566902947551679e-07, "loss": 0.3606, "step": 26813 }, { "epoch": 87.91475409836066, "grad_norm": 9.085844993591309, "learning_rate": 7.562851395652693e-07, "loss": 0.3874, "step": 26814 }, { "epoch": 87.91803278688525, "grad_norm": 4.794114589691162, "learning_rate": 7.558800886078665e-07, "loss": 0.4908, "step": 26815 }, { "epoch": 87.92131147540984, "grad_norm": 4.602921009063721, "learning_rate": 7.554751418875261e-07, "loss": 0.4041, "step": 26816 }, { "epoch": 87.92459016393443, "grad_norm": 4.771955490112305, "learning_rate": 7.550702994088177e-07, "loss": 0.3002, "step": 26817 }, { "epoch": 87.92786885245901, "grad_norm": 6.584268093109131, "learning_rate": 7.546655611763032e-07, "loss": 0.4932, "step": 26818 }, { "epoch": 87.9311475409836, "grad_norm": 4.79629373550415, "learning_rate": 7.542609271945467e-07, "loss": 0.3583, "step": 26819 }, { "epoch": 87.93442622950819, "grad_norm": 5.612736225128174, "learning_rate": 7.538563974681123e-07, "loss": 0.5676, "step": 26820 }, { "epoch": 87.9377049180328, "grad_norm": 4.568350315093994, "learning_rate": 7.534519720015576e-07, "loss": 0.2966, "step": 26821 }, { "epoch": 87.94098360655738, "grad_norm": 4.132085800170898, "learning_rate": 7.530476507994488e-07, "loss": 0.2161, "step": 26822 }, { "epoch": 87.94426229508197, "grad_norm": 5.496664047241211, "learning_rate": 7.526434338663424e-07, "loss": 0.322, "step": 26823 }, { "epoch": 87.94754098360656, "grad_norm": 5.0275092124938965, "learning_rate": 7.522393212067958e-07, "loss": 0.4141, "step": 26824 }, { "epoch": 87.95081967213115, "grad_norm": 14.26405143737793, "learning_rate": 7.518353128253642e-07, "loss": 0.3351, "step": 26825 }, { "epoch": 87.95409836065573, "grad_norm": 4.93070125579834, "learning_rate": 7.514314087266062e-07, "loss": 0.2626, "step": 26826 }, { "epoch": 87.95737704918032, "grad_norm": 8.099761009216309, "learning_rate": 7.510276089150758e-07, "loss": 0.5685, "step": 26827 }, { "epoch": 87.96065573770491, "grad_norm": 5.126265048980713, "learning_rate": 7.506239133953264e-07, "loss": 0.3041, "step": 26828 }, { "epoch": 87.96393442622951, "grad_norm": 5.846761226654053, "learning_rate": 7.502203221719062e-07, "loss": 0.3133, "step": 26829 }, { "epoch": 87.9672131147541, "grad_norm": 5.303978443145752, "learning_rate": 7.498168352493718e-07, "loss": 0.238, "step": 26830 }, { "epoch": 87.97049180327869, "grad_norm": 5.726813793182373, "learning_rate": 7.494134526322705e-07, "loss": 0.3669, "step": 26831 }, { "epoch": 87.97377049180328, "grad_norm": 6.502087116241455, "learning_rate": 7.490101743251499e-07, "loss": 0.2674, "step": 26832 }, { "epoch": 87.97704918032787, "grad_norm": 5.316159248352051, "learning_rate": 7.486070003325585e-07, "loss": 0.3116, "step": 26833 }, { "epoch": 87.98032786885246, "grad_norm": 4.499611854553223, "learning_rate": 7.482039306590405e-07, "loss": 0.4095, "step": 26834 }, { "epoch": 87.98360655737704, "grad_norm": 4.66576623916626, "learning_rate": 7.478009653091444e-07, "loss": 0.2983, "step": 26835 }, { "epoch": 87.98688524590163, "grad_norm": 5.3317742347717285, "learning_rate": 7.473981042874135e-07, "loss": 0.3193, "step": 26836 }, { "epoch": 87.99016393442623, "grad_norm": 5.264640808105469, "learning_rate": 7.469953475983871e-07, "loss": 0.3303, "step": 26837 }, { "epoch": 87.99344262295082, "grad_norm": 4.918723106384277, "learning_rate": 7.465926952466085e-07, "loss": 0.2823, "step": 26838 }, { "epoch": 87.99672131147541, "grad_norm": 6.675366401672363, "learning_rate": 7.461901472366195e-07, "loss": 0.4035, "step": 26839 }, { "epoch": 88.0, "grad_norm": 5.606700420379639, "learning_rate": 7.457877035729588e-07, "loss": 0.2532, "step": 26840 }, { "epoch": 88.00327868852459, "grad_norm": 4.784417629241943, "learning_rate": 7.453853642601638e-07, "loss": 0.3905, "step": 26841 }, { "epoch": 88.00655737704918, "grad_norm": 3.600137710571289, "learning_rate": 7.449831293027687e-07, "loss": 0.3496, "step": 26842 }, { "epoch": 88.00983606557377, "grad_norm": 4.9226179122924805, "learning_rate": 7.445809987053143e-07, "loss": 0.2592, "step": 26843 }, { "epoch": 88.01311475409837, "grad_norm": 4.329571723937988, "learning_rate": 7.441789724723314e-07, "loss": 0.4816, "step": 26844 }, { "epoch": 88.01639344262296, "grad_norm": 5.066388130187988, "learning_rate": 7.437770506083542e-07, "loss": 0.5022, "step": 26845 }, { "epoch": 88.01967213114754, "grad_norm": 4.244696140289307, "learning_rate": 7.433752331179156e-07, "loss": 0.2593, "step": 26846 }, { "epoch": 88.02295081967213, "grad_norm": 5.7242112159729, "learning_rate": 7.429735200055432e-07, "loss": 0.2101, "step": 26847 }, { "epoch": 88.02622950819672, "grad_norm": 4.881803035736084, "learning_rate": 7.425719112757723e-07, "loss": 0.4799, "step": 26848 }, { "epoch": 88.02950819672131, "grad_norm": 5.7022013664245605, "learning_rate": 7.42170406933127e-07, "loss": 0.3536, "step": 26849 }, { "epoch": 88.0327868852459, "grad_norm": 4.580362319946289, "learning_rate": 7.417690069821371e-07, "loss": 0.3767, "step": 26850 }, { "epoch": 88.03606557377049, "grad_norm": 4.0706562995910645, "learning_rate": 7.413677114273255e-07, "loss": 0.325, "step": 26851 }, { "epoch": 88.03934426229509, "grad_norm": 4.4982709884643555, "learning_rate": 7.409665202732208e-07, "loss": 0.444, "step": 26852 }, { "epoch": 88.04262295081968, "grad_norm": 6.6486897468566895, "learning_rate": 7.405654335243461e-07, "loss": 0.3358, "step": 26853 }, { "epoch": 88.04590163934427, "grad_norm": 4.656204700469971, "learning_rate": 7.401644511852224e-07, "loss": 0.2758, "step": 26854 }, { "epoch": 88.04918032786885, "grad_norm": 5.124514579772949, "learning_rate": 7.397635732603725e-07, "loss": 0.3025, "step": 26855 }, { "epoch": 88.05245901639344, "grad_norm": 8.282803535461426, "learning_rate": 7.393627997543184e-07, "loss": 0.402, "step": 26856 }, { "epoch": 88.05573770491803, "grad_norm": 4.528599262237549, "learning_rate": 7.389621306715744e-07, "loss": 0.2392, "step": 26857 }, { "epoch": 88.05901639344262, "grad_norm": 5.2745866775512695, "learning_rate": 7.385615660166634e-07, "loss": 0.6252, "step": 26858 }, { "epoch": 88.0622950819672, "grad_norm": 5.194652557373047, "learning_rate": 7.381611057941007e-07, "loss": 0.2464, "step": 26859 }, { "epoch": 88.06557377049181, "grad_norm": 5.383783340454102, "learning_rate": 7.377607500083994e-07, "loss": 0.5258, "step": 26860 }, { "epoch": 88.0688524590164, "grad_norm": 4.91303014755249, "learning_rate": 7.37360498664077e-07, "loss": 0.2482, "step": 26861 }, { "epoch": 88.07213114754099, "grad_norm": 7.0704240798950195, "learning_rate": 7.369603517656465e-07, "loss": 0.4972, "step": 26862 }, { "epoch": 88.07540983606557, "grad_norm": 7.04500675201416, "learning_rate": 7.365603093176188e-07, "loss": 0.5191, "step": 26863 }, { "epoch": 88.07868852459016, "grad_norm": 5.281407356262207, "learning_rate": 7.361603713245036e-07, "loss": 0.5404, "step": 26864 }, { "epoch": 88.08196721311475, "grad_norm": 5.106075763702393, "learning_rate": 7.357605377908139e-07, "loss": 0.3961, "step": 26865 }, { "epoch": 88.08524590163934, "grad_norm": 5.393221855163574, "learning_rate": 7.353608087210573e-07, "loss": 0.292, "step": 26866 }, { "epoch": 88.08852459016393, "grad_norm": 4.6265764236450195, "learning_rate": 7.349611841197391e-07, "loss": 0.4332, "step": 26867 }, { "epoch": 88.09180327868853, "grad_norm": 4.732553005218506, "learning_rate": 7.345616639913678e-07, "loss": 0.3044, "step": 26868 }, { "epoch": 88.09508196721312, "grad_norm": 4.717343330383301, "learning_rate": 7.341622483404454e-07, "loss": 0.2964, "step": 26869 }, { "epoch": 88.09836065573771, "grad_norm": 4.599440574645996, "learning_rate": 7.337629371714794e-07, "loss": 0.2376, "step": 26870 }, { "epoch": 88.1016393442623, "grad_norm": 4.043966293334961, "learning_rate": 7.333637304889707e-07, "loss": 0.3564, "step": 26871 }, { "epoch": 88.10491803278688, "grad_norm": 8.436348915100098, "learning_rate": 7.329646282974201e-07, "loss": 0.2947, "step": 26872 }, { "epoch": 88.10819672131147, "grad_norm": 5.738617420196533, "learning_rate": 7.325656306013274e-07, "loss": 0.3037, "step": 26873 }, { "epoch": 88.11147540983606, "grad_norm": 5.335603713989258, "learning_rate": 7.321667374051955e-07, "loss": 0.2197, "step": 26874 }, { "epoch": 88.11475409836065, "grad_norm": 15.109969139099121, "learning_rate": 7.317679487135188e-07, "loss": 0.344, "step": 26875 }, { "epoch": 88.11803278688525, "grad_norm": 6.961419105529785, "learning_rate": 7.313692645307946e-07, "loss": 0.2886, "step": 26876 }, { "epoch": 88.12131147540984, "grad_norm": 4.473926544189453, "learning_rate": 7.309706848615183e-07, "loss": 0.2929, "step": 26877 }, { "epoch": 88.12459016393443, "grad_norm": 5.2060418128967285, "learning_rate": 7.305722097101864e-07, "loss": 0.5168, "step": 26878 }, { "epoch": 88.12786885245902, "grad_norm": 5.580639362335205, "learning_rate": 7.301738390812907e-07, "loss": 0.5577, "step": 26879 }, { "epoch": 88.1311475409836, "grad_norm": 5.47620964050293, "learning_rate": 7.297755729793221e-07, "loss": 0.299, "step": 26880 }, { "epoch": 88.1344262295082, "grad_norm": 10.43008804321289, "learning_rate": 7.293774114087737e-07, "loss": 0.6002, "step": 26881 }, { "epoch": 88.13770491803278, "grad_norm": 3.711383581161499, "learning_rate": 7.289793543741319e-07, "loss": 0.3193, "step": 26882 }, { "epoch": 88.14098360655737, "grad_norm": 4.254181861877441, "learning_rate": 7.285814018798887e-07, "loss": 0.3027, "step": 26883 }, { "epoch": 88.14426229508197, "grad_norm": 6.273804664611816, "learning_rate": 7.281835539305304e-07, "loss": 0.2787, "step": 26884 }, { "epoch": 88.14754098360656, "grad_norm": 3.846623659133911, "learning_rate": 7.277858105305436e-07, "loss": 0.2129, "step": 26885 }, { "epoch": 88.15081967213115, "grad_norm": 3.3226847648620605, "learning_rate": 7.273881716844089e-07, "loss": 0.2515, "step": 26886 }, { "epoch": 88.15409836065574, "grad_norm": 3.798032522201538, "learning_rate": 7.269906373966174e-07, "loss": 0.2311, "step": 26887 }, { "epoch": 88.15737704918033, "grad_norm": 6.509669303894043, "learning_rate": 7.265932076716464e-07, "loss": 0.2567, "step": 26888 }, { "epoch": 88.16065573770491, "grad_norm": 3.9484076499938965, "learning_rate": 7.261958825139792e-07, "loss": 0.3877, "step": 26889 }, { "epoch": 88.1639344262295, "grad_norm": 6.7548322677612305, "learning_rate": 7.257986619280943e-07, "loss": 0.4126, "step": 26890 }, { "epoch": 88.1672131147541, "grad_norm": 5.5843586921691895, "learning_rate": 7.254015459184748e-07, "loss": 0.594, "step": 26891 }, { "epoch": 88.1704918032787, "grad_norm": 4.882465839385986, "learning_rate": 7.250045344895951e-07, "loss": 0.3106, "step": 26892 }, { "epoch": 88.17377049180328, "grad_norm": 3.955449104309082, "learning_rate": 7.246076276459324e-07, "loss": 0.262, "step": 26893 }, { "epoch": 88.17704918032787, "grad_norm": 5.263370037078857, "learning_rate": 7.242108253919633e-07, "loss": 0.2119, "step": 26894 }, { "epoch": 88.18032786885246, "grad_norm": 5.062334060668945, "learning_rate": 7.238141277321608e-07, "loss": 0.4087, "step": 26895 }, { "epoch": 88.18360655737705, "grad_norm": 5.458135604858398, "learning_rate": 7.234175346709993e-07, "loss": 0.4292, "step": 26896 }, { "epoch": 88.18688524590164, "grad_norm": 4.300048828125, "learning_rate": 7.230210462129505e-07, "loss": 0.3118, "step": 26897 }, { "epoch": 88.19016393442622, "grad_norm": 4.866457939147949, "learning_rate": 7.226246623624844e-07, "loss": 0.4479, "step": 26898 }, { "epoch": 88.19344262295083, "grad_norm": 6.2568840980529785, "learning_rate": 7.222283831240706e-07, "loss": 0.1421, "step": 26899 }, { "epoch": 88.19672131147541, "grad_norm": 10.708992004394531, "learning_rate": 7.218322085021801e-07, "loss": 0.4695, "step": 26900 }, { "epoch": 88.2, "grad_norm": 4.706027030944824, "learning_rate": 7.21436138501278e-07, "loss": 0.246, "step": 26901 }, { "epoch": 88.20327868852459, "grad_norm": 4.6453857421875, "learning_rate": 7.210401731258298e-07, "loss": 0.2804, "step": 26902 }, { "epoch": 88.20655737704918, "grad_norm": 5.323269844055176, "learning_rate": 7.206443123803009e-07, "loss": 0.3764, "step": 26903 }, { "epoch": 88.20983606557377, "grad_norm": 4.020177364349365, "learning_rate": 7.202485562691563e-07, "loss": 0.1597, "step": 26904 }, { "epoch": 88.21311475409836, "grad_norm": 4.491880893707275, "learning_rate": 7.198529047968583e-07, "loss": 0.2187, "step": 26905 }, { "epoch": 88.21639344262294, "grad_norm": 5.837386131286621, "learning_rate": 7.194573579678677e-07, "loss": 0.3086, "step": 26906 }, { "epoch": 88.21967213114755, "grad_norm": 4.237581729888916, "learning_rate": 7.190619157866429e-07, "loss": 0.2688, "step": 26907 }, { "epoch": 88.22295081967214, "grad_norm": 4.561394214630127, "learning_rate": 7.186665782576474e-07, "loss": 0.3897, "step": 26908 }, { "epoch": 88.22622950819672, "grad_norm": 4.779336452484131, "learning_rate": 7.182713453853352e-07, "loss": 0.4581, "step": 26909 }, { "epoch": 88.22950819672131, "grad_norm": 5.733036041259766, "learning_rate": 7.178762171741626e-07, "loss": 0.3241, "step": 26910 }, { "epoch": 88.2327868852459, "grad_norm": 4.115889549255371, "learning_rate": 7.174811936285886e-07, "loss": 0.4756, "step": 26911 }, { "epoch": 88.23606557377049, "grad_norm": 4.04171895980835, "learning_rate": 7.170862747530649e-07, "loss": 0.5144, "step": 26912 }, { "epoch": 88.23934426229508, "grad_norm": 29.78754234313965, "learning_rate": 7.166914605520447e-07, "loss": 0.1949, "step": 26913 }, { "epoch": 88.24262295081967, "grad_norm": 5.425118446350098, "learning_rate": 7.162967510299811e-07, "loss": 0.445, "step": 26914 }, { "epoch": 88.24590163934427, "grad_norm": 4.250611305236816, "learning_rate": 7.159021461913251e-07, "loss": 0.2961, "step": 26915 }, { "epoch": 88.24918032786886, "grad_norm": 8.092888832092285, "learning_rate": 7.155076460405231e-07, "loss": 0.3123, "step": 26916 }, { "epoch": 88.25245901639344, "grad_norm": 4.740816116333008, "learning_rate": 7.151132505820279e-07, "loss": 0.2607, "step": 26917 }, { "epoch": 88.25573770491803, "grad_norm": 4.000370502471924, "learning_rate": 7.147189598202853e-07, "loss": 0.2491, "step": 26918 }, { "epoch": 88.25901639344262, "grad_norm": 5.6458964347839355, "learning_rate": 7.143247737597392e-07, "loss": 0.1755, "step": 26919 }, { "epoch": 88.26229508196721, "grad_norm": 7.249646186828613, "learning_rate": 7.139306924048373e-07, "loss": 0.2694, "step": 26920 }, { "epoch": 88.2655737704918, "grad_norm": 7.666624546051025, "learning_rate": 7.135367157600193e-07, "loss": 0.4793, "step": 26921 }, { "epoch": 88.26885245901639, "grad_norm": 7.296714782714844, "learning_rate": 7.131428438297327e-07, "loss": 0.3022, "step": 26922 }, { "epoch": 88.27213114754099, "grad_norm": 4.806179046630859, "learning_rate": 7.127490766184164e-07, "loss": 0.3468, "step": 26923 }, { "epoch": 88.27540983606558, "grad_norm": 5.1757941246032715, "learning_rate": 7.1235541413051e-07, "loss": 0.3345, "step": 26924 }, { "epoch": 88.27868852459017, "grad_norm": 7.485350608825684, "learning_rate": 7.119618563704522e-07, "loss": 0.4479, "step": 26925 }, { "epoch": 88.28196721311475, "grad_norm": 4.526852130889893, "learning_rate": 7.115684033426829e-07, "loss": 0.3884, "step": 26926 }, { "epoch": 88.28524590163934, "grad_norm": 4.156457424163818, "learning_rate": 7.111750550516372e-07, "loss": 0.2841, "step": 26927 }, { "epoch": 88.28852459016393, "grad_norm": 4.351293087005615, "learning_rate": 7.107818115017507e-07, "loss": 0.4346, "step": 26928 }, { "epoch": 88.29180327868852, "grad_norm": 4.743964195251465, "learning_rate": 7.103886726974562e-07, "loss": 0.533, "step": 26929 }, { "epoch": 88.29508196721312, "grad_norm": 7.105353355407715, "learning_rate": 7.099956386431894e-07, "loss": 0.201, "step": 26930 }, { "epoch": 88.29836065573771, "grad_norm": 4.996912002563477, "learning_rate": 7.09602709343381e-07, "loss": 0.3636, "step": 26931 }, { "epoch": 88.3016393442623, "grad_norm": 3.170649766921997, "learning_rate": 7.09209884802462e-07, "loss": 0.3554, "step": 26932 }, { "epoch": 88.30491803278689, "grad_norm": 5.289580821990967, "learning_rate": 7.088171650248621e-07, "loss": 0.2862, "step": 26933 }, { "epoch": 88.30819672131148, "grad_norm": 4.9650797843933105, "learning_rate": 7.084245500150066e-07, "loss": 0.3574, "step": 26934 }, { "epoch": 88.31147540983606, "grad_norm": 6.742478847503662, "learning_rate": 7.080320397773266e-07, "loss": 0.3544, "step": 26935 }, { "epoch": 88.31475409836065, "grad_norm": 5.176868438720703, "learning_rate": 7.076396343162473e-07, "loss": 0.4013, "step": 26936 }, { "epoch": 88.31803278688524, "grad_norm": 5.602933883666992, "learning_rate": 7.072473336361929e-07, "loss": 0.5134, "step": 26937 }, { "epoch": 88.32131147540984, "grad_norm": 6.158091068267822, "learning_rate": 7.068551377415844e-07, "loss": 0.2487, "step": 26938 }, { "epoch": 88.32459016393443, "grad_norm": 4.3246169090271, "learning_rate": 7.064630466368483e-07, "loss": 0.3885, "step": 26939 }, { "epoch": 88.32786885245902, "grad_norm": 4.878698825836182, "learning_rate": 7.060710603264054e-07, "loss": 0.4121, "step": 26940 }, { "epoch": 88.33114754098361, "grad_norm": 9.262742042541504, "learning_rate": 7.056791788146733e-07, "loss": 0.5129, "step": 26941 }, { "epoch": 88.3344262295082, "grad_norm": 5.594508647918701, "learning_rate": 7.052874021060707e-07, "loss": 0.4748, "step": 26942 }, { "epoch": 88.33770491803278, "grad_norm": 9.625886917114258, "learning_rate": 7.048957302050186e-07, "loss": 0.5036, "step": 26943 }, { "epoch": 88.34098360655737, "grad_norm": 4.286397457122803, "learning_rate": 7.04504163115931e-07, "loss": 0.3887, "step": 26944 }, { "epoch": 88.34426229508196, "grad_norm": 5.292749881744385, "learning_rate": 7.041127008432247e-07, "loss": 0.5201, "step": 26945 }, { "epoch": 88.34754098360656, "grad_norm": 5.826927185058594, "learning_rate": 7.037213433913126e-07, "loss": 0.6117, "step": 26946 }, { "epoch": 88.35081967213115, "grad_norm": 4.659241199493408, "learning_rate": 7.033300907646068e-07, "loss": 0.4274, "step": 26947 }, { "epoch": 88.35409836065574, "grad_norm": 6.516834259033203, "learning_rate": 7.029389429675215e-07, "loss": 0.4469, "step": 26948 }, { "epoch": 88.35737704918033, "grad_norm": 7.305557727813721, "learning_rate": 7.025479000044666e-07, "loss": 0.3233, "step": 26949 }, { "epoch": 88.36065573770492, "grad_norm": 5.2352776527404785, "learning_rate": 7.021569618798507e-07, "loss": 0.3366, "step": 26950 }, { "epoch": 88.3639344262295, "grad_norm": 6.314229488372803, "learning_rate": 7.017661285980814e-07, "loss": 0.5204, "step": 26951 }, { "epoch": 88.3672131147541, "grad_norm": 4.345345497131348, "learning_rate": 7.013754001635676e-07, "loss": 0.405, "step": 26952 }, { "epoch": 88.37049180327868, "grad_norm": 4.831668853759766, "learning_rate": 7.009847765807143e-07, "loss": 0.2798, "step": 26953 }, { "epoch": 88.37377049180328, "grad_norm": 9.045868873596191, "learning_rate": 7.005942578539271e-07, "loss": 0.4251, "step": 26954 }, { "epoch": 88.37704918032787, "grad_norm": 6.023303031921387, "learning_rate": 7.002038439876057e-07, "loss": 0.5993, "step": 26955 }, { "epoch": 88.38032786885246, "grad_norm": 5.750218391418457, "learning_rate": 6.998135349861579e-07, "loss": 0.3815, "step": 26956 }, { "epoch": 88.38360655737705, "grad_norm": 4.537169456481934, "learning_rate": 6.994233308539822e-07, "loss": 0.2364, "step": 26957 }, { "epoch": 88.38688524590164, "grad_norm": 4.248875617980957, "learning_rate": 6.990332315954784e-07, "loss": 0.4284, "step": 26958 }, { "epoch": 88.39016393442623, "grad_norm": 5.075209617614746, "learning_rate": 6.986432372150431e-07, "loss": 0.2419, "step": 26959 }, { "epoch": 88.39344262295081, "grad_norm": 5.7503132820129395, "learning_rate": 6.982533477170795e-07, "loss": 0.3509, "step": 26960 }, { "epoch": 88.3967213114754, "grad_norm": 5.233653545379639, "learning_rate": 6.978635631059794e-07, "loss": 0.3325, "step": 26961 }, { "epoch": 88.4, "grad_norm": 6.261904239654541, "learning_rate": 6.974738833861383e-07, "loss": 0.3098, "step": 26962 }, { "epoch": 88.4032786885246, "grad_norm": 4.583286762237549, "learning_rate": 6.970843085619528e-07, "loss": 0.4929, "step": 26963 }, { "epoch": 88.40655737704918, "grad_norm": 4.5272345542907715, "learning_rate": 6.966948386378147e-07, "loss": 0.3647, "step": 26964 }, { "epoch": 88.40983606557377, "grad_norm": 4.247230529785156, "learning_rate": 6.963054736181152e-07, "loss": 0.4089, "step": 26965 }, { "epoch": 88.41311475409836, "grad_norm": 4.095798492431641, "learning_rate": 6.959162135072428e-07, "loss": 0.4052, "step": 26966 }, { "epoch": 88.41639344262295, "grad_norm": 3.862395763397217, "learning_rate": 6.955270583095919e-07, "loss": 0.4482, "step": 26967 }, { "epoch": 88.41967213114754, "grad_norm": 5.740139007568359, "learning_rate": 6.951380080295467e-07, "loss": 0.2656, "step": 26968 }, { "epoch": 88.42295081967212, "grad_norm": 5.578222751617432, "learning_rate": 6.947490626714926e-07, "loss": 0.4831, "step": 26969 }, { "epoch": 88.42622950819673, "grad_norm": 4.745235443115234, "learning_rate": 6.943602222398204e-07, "loss": 0.3087, "step": 26970 }, { "epoch": 88.42950819672132, "grad_norm": 3.422739028930664, "learning_rate": 6.939714867389124e-07, "loss": 0.4891, "step": 26971 }, { "epoch": 88.4327868852459, "grad_norm": 29.407320022583008, "learning_rate": 6.935828561731517e-07, "loss": 0.4795, "step": 26972 }, { "epoch": 88.43606557377049, "grad_norm": 4.864736557006836, "learning_rate": 6.93194330546918e-07, "loss": 0.2906, "step": 26973 }, { "epoch": 88.43934426229508, "grad_norm": 5.269002914428711, "learning_rate": 6.928059098645979e-07, "loss": 0.487, "step": 26974 }, { "epoch": 88.44262295081967, "grad_norm": 7.338691234588623, "learning_rate": 6.924175941305666e-07, "loss": 0.3385, "step": 26975 }, { "epoch": 88.44590163934426, "grad_norm": 5.455846786499023, "learning_rate": 6.920293833492053e-07, "loss": 0.312, "step": 26976 }, { "epoch": 88.44918032786886, "grad_norm": 4.533163547515869, "learning_rate": 6.916412775248893e-07, "loss": 0.3258, "step": 26977 }, { "epoch": 88.45245901639345, "grad_norm": 5.579282283782959, "learning_rate": 6.912532766619973e-07, "loss": 0.4887, "step": 26978 }, { "epoch": 88.45573770491804, "grad_norm": 3.959348678588867, "learning_rate": 6.908653807649035e-07, "loss": 0.4134, "step": 26979 }, { "epoch": 88.45901639344262, "grad_norm": 5.448296070098877, "learning_rate": 6.904775898379811e-07, "loss": 0.4052, "step": 26980 }, { "epoch": 88.46229508196721, "grad_norm": 3.855590581893921, "learning_rate": 6.900899038856024e-07, "loss": 0.6167, "step": 26981 }, { "epoch": 88.4655737704918, "grad_norm": 6.5838141441345215, "learning_rate": 6.897023229121413e-07, "loss": 0.2891, "step": 26982 }, { "epoch": 88.46885245901639, "grad_norm": 4.573240280151367, "learning_rate": 6.893148469219669e-07, "loss": 0.3609, "step": 26983 }, { "epoch": 88.47213114754098, "grad_norm": 4.806645393371582, "learning_rate": 6.889274759194475e-07, "loss": 0.4762, "step": 26984 }, { "epoch": 88.47540983606558, "grad_norm": 4.414767265319824, "learning_rate": 6.885402099089533e-07, "loss": 0.4289, "step": 26985 }, { "epoch": 88.47868852459017, "grad_norm": 4.480599880218506, "learning_rate": 6.881530488948474e-07, "loss": 0.3912, "step": 26986 }, { "epoch": 88.48196721311476, "grad_norm": 4.773974895477295, "learning_rate": 6.877659928814984e-07, "loss": 0.454, "step": 26987 }, { "epoch": 88.48524590163935, "grad_norm": 4.648745059967041, "learning_rate": 6.873790418732718e-07, "loss": 0.2757, "step": 26988 }, { "epoch": 88.48852459016393, "grad_norm": 6.845466136932373, "learning_rate": 6.869921958745285e-07, "loss": 0.4401, "step": 26989 }, { "epoch": 88.49180327868852, "grad_norm": 4.786999225616455, "learning_rate": 6.866054548896295e-07, "loss": 0.488, "step": 26990 }, { "epoch": 88.49508196721311, "grad_norm": 17.373865127563477, "learning_rate": 6.86218818922939e-07, "loss": 0.2797, "step": 26991 }, { "epoch": 88.4983606557377, "grad_norm": 12.995606422424316, "learning_rate": 6.858322879788148e-07, "loss": 0.4762, "step": 26992 }, { "epoch": 88.5016393442623, "grad_norm": 7.142528533935547, "learning_rate": 6.854458620616166e-07, "loss": 0.3411, "step": 26993 }, { "epoch": 88.50491803278689, "grad_norm": 4.369643211364746, "learning_rate": 6.850595411756999e-07, "loss": 0.1697, "step": 26994 }, { "epoch": 88.50819672131148, "grad_norm": 5.594306468963623, "learning_rate": 6.846733253254223e-07, "loss": 0.3403, "step": 26995 }, { "epoch": 88.51147540983607, "grad_norm": 4.238091468811035, "learning_rate": 6.842872145151391e-07, "loss": 0.2913, "step": 26996 }, { "epoch": 88.51475409836065, "grad_norm": 6.750340938568115, "learning_rate": 6.839012087492037e-07, "loss": 0.4061, "step": 26997 }, { "epoch": 88.51803278688524, "grad_norm": 4.838461875915527, "learning_rate": 6.83515308031969e-07, "loss": 0.3185, "step": 26998 }, { "epoch": 88.52131147540983, "grad_norm": 4.456482887268066, "learning_rate": 6.831295123677829e-07, "loss": 0.1956, "step": 26999 }, { "epoch": 88.52459016393442, "grad_norm": 7.124014854431152, "learning_rate": 6.827438217610016e-07, "loss": 0.3169, "step": 27000 }, { "epoch": 88.52786885245902, "grad_norm": 8.263039588928223, "learning_rate": 6.823582362159697e-07, "loss": 0.4359, "step": 27001 }, { "epoch": 88.53114754098361, "grad_norm": 3.7180190086364746, "learning_rate": 6.819727557370381e-07, "loss": 0.111, "step": 27002 }, { "epoch": 88.5344262295082, "grad_norm": 5.07220983505249, "learning_rate": 6.8158738032855e-07, "loss": 0.3911, "step": 27003 }, { "epoch": 88.53770491803279, "grad_norm": 4.1410040855407715, "learning_rate": 6.81202109994854e-07, "loss": 0.3925, "step": 27004 }, { "epoch": 88.54098360655738, "grad_norm": 5.915829181671143, "learning_rate": 6.808169447402935e-07, "loss": 0.4884, "step": 27005 }, { "epoch": 88.54426229508196, "grad_norm": 5.099154472351074, "learning_rate": 6.804318845692115e-07, "loss": 0.3644, "step": 27006 }, { "epoch": 88.54754098360655, "grad_norm": 5.01785945892334, "learning_rate": 6.800469294859491e-07, "loss": 0.5461, "step": 27007 }, { "epoch": 88.55081967213114, "grad_norm": 5.9590020179748535, "learning_rate": 6.796620794948483e-07, "loss": 0.2485, "step": 27008 }, { "epoch": 88.55409836065574, "grad_norm": 5.856152534484863, "learning_rate": 6.79277334600249e-07, "loss": 0.5447, "step": 27009 }, { "epoch": 88.55737704918033, "grad_norm": 4.837955951690674, "learning_rate": 6.788926948064889e-07, "loss": 0.6225, "step": 27010 }, { "epoch": 88.56065573770492, "grad_norm": 4.899770259857178, "learning_rate": 6.785081601179044e-07, "loss": 0.2617, "step": 27011 }, { "epoch": 88.56393442622951, "grad_norm": 4.367698669433594, "learning_rate": 6.78123730538831e-07, "loss": 0.4479, "step": 27012 }, { "epoch": 88.5672131147541, "grad_norm": 3.8790090084075928, "learning_rate": 6.777394060736076e-07, "loss": 0.2078, "step": 27013 }, { "epoch": 88.57049180327868, "grad_norm": 4.295159816741943, "learning_rate": 6.773551867265637e-07, "loss": 0.3286, "step": 27014 }, { "epoch": 88.57377049180327, "grad_norm": 4.906009674072266, "learning_rate": 6.769710725020329e-07, "loss": 0.3296, "step": 27015 }, { "epoch": 88.57704918032788, "grad_norm": 4.115455150604248, "learning_rate": 6.765870634043469e-07, "loss": 0.3591, "step": 27016 }, { "epoch": 88.58032786885246, "grad_norm": 7.1959381103515625, "learning_rate": 6.76203159437837e-07, "loss": 0.3889, "step": 27017 }, { "epoch": 88.58360655737705, "grad_norm": 4.349888801574707, "learning_rate": 6.758193606068286e-07, "loss": 0.5549, "step": 27018 }, { "epoch": 88.58688524590164, "grad_norm": 4.544242858886719, "learning_rate": 6.754356669156526e-07, "loss": 0.3425, "step": 27019 }, { "epoch": 88.59016393442623, "grad_norm": 4.706412315368652, "learning_rate": 6.750520783686354e-07, "loss": 0.5429, "step": 27020 }, { "epoch": 88.59344262295082, "grad_norm": 8.431591987609863, "learning_rate": 6.746685949700993e-07, "loss": 0.2993, "step": 27021 }, { "epoch": 88.5967213114754, "grad_norm": 4.831925392150879, "learning_rate": 6.742852167243729e-07, "loss": 0.4104, "step": 27022 }, { "epoch": 88.6, "grad_norm": 4.810810565948486, "learning_rate": 6.739019436357774e-07, "loss": 0.5283, "step": 27023 }, { "epoch": 88.6032786885246, "grad_norm": 5.389037609100342, "learning_rate": 6.735187757086337e-07, "loss": 0.256, "step": 27024 }, { "epoch": 88.60655737704919, "grad_norm": 6.58470344543457, "learning_rate": 6.731357129472605e-07, "loss": 0.367, "step": 27025 }, { "epoch": 88.60983606557377, "grad_norm": 7.3883233070373535, "learning_rate": 6.727527553559821e-07, "loss": 0.3879, "step": 27026 }, { "epoch": 88.61311475409836, "grad_norm": 4.276110649108887, "learning_rate": 6.72369902939114e-07, "loss": 0.4471, "step": 27027 }, { "epoch": 88.61639344262295, "grad_norm": 6.39464807510376, "learning_rate": 6.719871557009738e-07, "loss": 0.4608, "step": 27028 }, { "epoch": 88.61967213114754, "grad_norm": 14.033141136169434, "learning_rate": 6.71604513645876e-07, "loss": 0.3512, "step": 27029 }, { "epoch": 88.62295081967213, "grad_norm": 5.305404186248779, "learning_rate": 6.712219767781369e-07, "loss": 0.5177, "step": 27030 }, { "epoch": 88.62622950819672, "grad_norm": 5.611781597137451, "learning_rate": 6.7083954510207e-07, "loss": 0.2744, "step": 27031 }, { "epoch": 88.62950819672132, "grad_norm": 8.228352546691895, "learning_rate": 6.704572186219871e-07, "loss": 0.2231, "step": 27032 }, { "epoch": 88.6327868852459, "grad_norm": 6.412230014801025, "learning_rate": 6.700749973421982e-07, "loss": 0.2396, "step": 27033 }, { "epoch": 88.6360655737705, "grad_norm": 5.9623847007751465, "learning_rate": 6.696928812670156e-07, "loss": 0.307, "step": 27034 }, { "epoch": 88.63934426229508, "grad_norm": 4.409096717834473, "learning_rate": 6.693108704007467e-07, "loss": 0.4055, "step": 27035 }, { "epoch": 88.64262295081967, "grad_norm": 7.641373634338379, "learning_rate": 6.689289647476993e-07, "loss": 0.5709, "step": 27036 }, { "epoch": 88.64590163934426, "grad_norm": 4.955871105194092, "learning_rate": 6.685471643121799e-07, "loss": 0.3345, "step": 27037 }, { "epoch": 88.64918032786885, "grad_norm": 4.281665325164795, "learning_rate": 6.681654690984917e-07, "loss": 0.2789, "step": 27038 }, { "epoch": 88.65245901639344, "grad_norm": 5.959794044494629, "learning_rate": 6.677838791109425e-07, "loss": 0.3453, "step": 27039 }, { "epoch": 88.65573770491804, "grad_norm": 13.712775230407715, "learning_rate": 6.674023943538333e-07, "loss": 0.4867, "step": 27040 }, { "epoch": 88.65901639344263, "grad_norm": 5.769227027893066, "learning_rate": 6.670210148314648e-07, "loss": 0.2506, "step": 27041 }, { "epoch": 88.66229508196722, "grad_norm": 5.1837029457092285, "learning_rate": 6.666397405481373e-07, "loss": 0.2819, "step": 27042 }, { "epoch": 88.6655737704918, "grad_norm": 6.429954528808594, "learning_rate": 6.662585715081515e-07, "loss": 0.5087, "step": 27043 }, { "epoch": 88.66885245901639, "grad_norm": 4.408247470855713, "learning_rate": 6.658775077158064e-07, "loss": 0.3172, "step": 27044 }, { "epoch": 88.67213114754098, "grad_norm": 6.082093238830566, "learning_rate": 6.654965491753962e-07, "loss": 0.4814, "step": 27045 }, { "epoch": 88.67540983606557, "grad_norm": 4.108407974243164, "learning_rate": 6.651156958912175e-07, "loss": 0.1174, "step": 27046 }, { "epoch": 88.67868852459016, "grad_norm": 4.550446510314941, "learning_rate": 6.647349478675658e-07, "loss": 0.5877, "step": 27047 }, { "epoch": 88.68196721311476, "grad_norm": 9.06989574432373, "learning_rate": 6.643543051087342e-07, "loss": 0.3545, "step": 27048 }, { "epoch": 88.68524590163935, "grad_norm": 4.043239116668701, "learning_rate": 6.639737676190138e-07, "loss": 0.4027, "step": 27049 }, { "epoch": 88.68852459016394, "grad_norm": 5.434812545776367, "learning_rate": 6.635933354026969e-07, "loss": 0.3681, "step": 27050 }, { "epoch": 88.69180327868852, "grad_norm": 4.404420375823975, "learning_rate": 6.632130084640708e-07, "loss": 0.3403, "step": 27051 }, { "epoch": 88.69508196721311, "grad_norm": 5.542692184448242, "learning_rate": 6.62832786807428e-07, "loss": 0.4118, "step": 27052 }, { "epoch": 88.6983606557377, "grad_norm": 5.506571292877197, "learning_rate": 6.624526704370526e-07, "loss": 0.4801, "step": 27053 }, { "epoch": 88.70163934426229, "grad_norm": 6.778221130371094, "learning_rate": 6.620726593572324e-07, "loss": 0.4806, "step": 27054 }, { "epoch": 88.70491803278688, "grad_norm": 4.098108768463135, "learning_rate": 6.616927535722506e-07, "loss": 0.2574, "step": 27055 }, { "epoch": 88.70819672131148, "grad_norm": 8.177924156188965, "learning_rate": 6.613129530863948e-07, "loss": 0.2571, "step": 27056 }, { "epoch": 88.71147540983607, "grad_norm": 4.366252422332764, "learning_rate": 6.609332579039441e-07, "loss": 0.5878, "step": 27057 }, { "epoch": 88.71475409836066, "grad_norm": 3.4202969074249268, "learning_rate": 6.605536680291813e-07, "loss": 0.3189, "step": 27058 }, { "epoch": 88.71803278688525, "grad_norm": 8.378449440002441, "learning_rate": 6.601741834663855e-07, "loss": 0.3857, "step": 27059 }, { "epoch": 88.72131147540983, "grad_norm": 5.971685886383057, "learning_rate": 6.597948042198377e-07, "loss": 0.2668, "step": 27060 }, { "epoch": 88.72459016393442, "grad_norm": 7.608409404754639, "learning_rate": 6.594155302938143e-07, "loss": 0.1712, "step": 27061 }, { "epoch": 88.72786885245901, "grad_norm": 4.797069072723389, "learning_rate": 6.590363616925933e-07, "loss": 0.2811, "step": 27062 }, { "epoch": 88.73114754098361, "grad_norm": 4.8008503913879395, "learning_rate": 6.586572984204498e-07, "loss": 0.4544, "step": 27063 }, { "epoch": 88.7344262295082, "grad_norm": 9.635512351989746, "learning_rate": 6.582783404816562e-07, "loss": 0.4519, "step": 27064 }, { "epoch": 88.73770491803279, "grad_norm": 4.145665168762207, "learning_rate": 6.578994878804878e-07, "loss": 0.2882, "step": 27065 }, { "epoch": 88.74098360655738, "grad_norm": 4.457791805267334, "learning_rate": 6.575207406212169e-07, "loss": 0.321, "step": 27066 }, { "epoch": 88.74426229508197, "grad_norm": 4.260960102081299, "learning_rate": 6.571420987081134e-07, "loss": 0.3714, "step": 27067 }, { "epoch": 88.74754098360656, "grad_norm": 5.145694255828857, "learning_rate": 6.56763562145446e-07, "loss": 0.303, "step": 27068 }, { "epoch": 88.75081967213114, "grad_norm": 3.877821683883667, "learning_rate": 6.563851309374847e-07, "loss": 0.2544, "step": 27069 }, { "epoch": 88.75409836065573, "grad_norm": 5.116362571716309, "learning_rate": 6.560068050884961e-07, "loss": 0.3696, "step": 27070 }, { "epoch": 88.75737704918033, "grad_norm": 8.108296394348145, "learning_rate": 6.556285846027444e-07, "loss": 0.5088, "step": 27071 }, { "epoch": 88.76065573770492, "grad_norm": 5.522564888000488, "learning_rate": 6.552504694844974e-07, "loss": 0.6588, "step": 27072 }, { "epoch": 88.76393442622951, "grad_norm": 4.907853126525879, "learning_rate": 6.548724597380174e-07, "loss": 0.2547, "step": 27073 }, { "epoch": 88.7672131147541, "grad_norm": 5.3267598152160645, "learning_rate": 6.544945553675663e-07, "loss": 0.3256, "step": 27074 }, { "epoch": 88.77049180327869, "grad_norm": 4.879147052764893, "learning_rate": 6.541167563774065e-07, "loss": 0.2146, "step": 27075 }, { "epoch": 88.77377049180328, "grad_norm": 5.540156364440918, "learning_rate": 6.537390627717977e-07, "loss": 0.34, "step": 27076 }, { "epoch": 88.77704918032786, "grad_norm": 4.518277168273926, "learning_rate": 6.533614745549977e-07, "loss": 0.2527, "step": 27077 }, { "epoch": 88.78032786885245, "grad_norm": 98.16169738769531, "learning_rate": 6.529839917312664e-07, "loss": 0.2861, "step": 27078 }, { "epoch": 88.78360655737706, "grad_norm": 8.884652137756348, "learning_rate": 6.526066143048593e-07, "loss": 0.3805, "step": 27079 }, { "epoch": 88.78688524590164, "grad_norm": 4.29354190826416, "learning_rate": 6.522293422800308e-07, "loss": 0.2183, "step": 27080 }, { "epoch": 88.79016393442623, "grad_norm": 7.268932819366455, "learning_rate": 6.518521756610352e-07, "loss": 0.2062, "step": 27081 }, { "epoch": 88.79344262295082, "grad_norm": 6.46542501449585, "learning_rate": 6.51475114452127e-07, "loss": 0.3736, "step": 27082 }, { "epoch": 88.79672131147541, "grad_norm": 4.558140277862549, "learning_rate": 6.51098158657556e-07, "loss": 0.3411, "step": 27083 }, { "epoch": 88.8, "grad_norm": 5.88116979598999, "learning_rate": 6.507213082815745e-07, "loss": 0.3059, "step": 27084 }, { "epoch": 88.80327868852459, "grad_norm": 6.923539161682129, "learning_rate": 6.5034456332843e-07, "loss": 0.4076, "step": 27085 }, { "epoch": 88.80655737704917, "grad_norm": 6.4198174476623535, "learning_rate": 6.499679238023726e-07, "loss": 0.4049, "step": 27086 }, { "epoch": 88.80983606557378, "grad_norm": 4.374819278717041, "learning_rate": 6.495913897076489e-07, "loss": 0.4825, "step": 27087 }, { "epoch": 88.81311475409836, "grad_norm": 6.263607025146484, "learning_rate": 6.492149610485032e-07, "loss": 0.3632, "step": 27088 }, { "epoch": 88.81639344262295, "grad_norm": 3.8092331886291504, "learning_rate": 6.488386378291823e-07, "loss": 0.3292, "step": 27089 }, { "epoch": 88.81967213114754, "grad_norm": 10.474151611328125, "learning_rate": 6.48462420053927e-07, "loss": 0.4721, "step": 27090 }, { "epoch": 88.82295081967213, "grad_norm": 4.918569087982178, "learning_rate": 6.480863077269827e-07, "loss": 0.3632, "step": 27091 }, { "epoch": 88.82622950819672, "grad_norm": 4.654695987701416, "learning_rate": 6.477103008525875e-07, "loss": 0.3129, "step": 27092 }, { "epoch": 88.8295081967213, "grad_norm": 4.947610378265381, "learning_rate": 6.473343994349845e-07, "loss": 0.3829, "step": 27093 }, { "epoch": 88.8327868852459, "grad_norm": 5.091281890869141, "learning_rate": 6.46958603478407e-07, "loss": 0.4677, "step": 27094 }, { "epoch": 88.8360655737705, "grad_norm": 4.6181488037109375, "learning_rate": 6.465829129870993e-07, "loss": 0.4265, "step": 27095 }, { "epoch": 88.83934426229509, "grad_norm": 8.608736038208008, "learning_rate": 6.462073279652936e-07, "loss": 0.3844, "step": 27096 }, { "epoch": 88.84262295081967, "grad_norm": 5.597352981567383, "learning_rate": 6.458318484172255e-07, "loss": 0.3966, "step": 27097 }, { "epoch": 88.84590163934426, "grad_norm": 4.145051002502441, "learning_rate": 6.454564743471281e-07, "loss": 0.4326, "step": 27098 }, { "epoch": 88.84918032786885, "grad_norm": 4.599108695983887, "learning_rate": 6.45081205759236e-07, "loss": 0.4298, "step": 27099 }, { "epoch": 88.85245901639344, "grad_norm": 4.775196075439453, "learning_rate": 6.447060426577812e-07, "loss": 0.2921, "step": 27100 }, { "epoch": 88.85573770491803, "grad_norm": 5.3563385009765625, "learning_rate": 6.443309850469915e-07, "loss": 0.2301, "step": 27101 }, { "epoch": 88.85901639344263, "grad_norm": 3.3838179111480713, "learning_rate": 6.43956032931099e-07, "loss": 0.1578, "step": 27102 }, { "epoch": 88.86229508196722, "grad_norm": 4.685023784637451, "learning_rate": 6.435811863143271e-07, "loss": 0.3133, "step": 27103 }, { "epoch": 88.8655737704918, "grad_norm": 4.432811260223389, "learning_rate": 6.432064452009079e-07, "loss": 0.4404, "step": 27104 }, { "epoch": 88.8688524590164, "grad_norm": 4.600676536560059, "learning_rate": 6.428318095950648e-07, "loss": 0.3339, "step": 27105 }, { "epoch": 88.87213114754098, "grad_norm": 4.838573455810547, "learning_rate": 6.424572795010209e-07, "loss": 0.4356, "step": 27106 }, { "epoch": 88.87540983606557, "grad_norm": 4.157340049743652, "learning_rate": 6.420828549229996e-07, "loss": 0.4269, "step": 27107 }, { "epoch": 88.87868852459016, "grad_norm": 5.588371276855469, "learning_rate": 6.417085358652264e-07, "loss": 0.5417, "step": 27108 }, { "epoch": 88.88196721311475, "grad_norm": 4.466794490814209, "learning_rate": 6.413343223319191e-07, "loss": 0.2759, "step": 27109 }, { "epoch": 88.88524590163935, "grad_norm": 4.044095993041992, "learning_rate": 6.409602143272975e-07, "loss": 0.192, "step": 27110 }, { "epoch": 88.88852459016394, "grad_norm": 4.684042453765869, "learning_rate": 6.405862118555784e-07, "loss": 0.3649, "step": 27111 }, { "epoch": 88.89180327868853, "grad_norm": 6.1217193603515625, "learning_rate": 6.402123149209838e-07, "loss": 0.4092, "step": 27112 }, { "epoch": 88.89508196721312, "grad_norm": 4.5438127517700195, "learning_rate": 6.398385235277271e-07, "loss": 0.1458, "step": 27113 }, { "epoch": 88.8983606557377, "grad_norm": 4.646644115447998, "learning_rate": 6.394648376800217e-07, "loss": 0.3492, "step": 27114 }, { "epoch": 88.90163934426229, "grad_norm": 4.20334529876709, "learning_rate": 6.39091257382084e-07, "loss": 0.2604, "step": 27115 }, { "epoch": 88.90491803278688, "grad_norm": 5.81234073638916, "learning_rate": 6.387177826381241e-07, "loss": 0.465, "step": 27116 }, { "epoch": 88.90819672131147, "grad_norm": 6.421924114227295, "learning_rate": 6.383444134523554e-07, "loss": 0.4565, "step": 27117 }, { "epoch": 88.91147540983607, "grad_norm": 5.545001029968262, "learning_rate": 6.379711498289864e-07, "loss": 0.4219, "step": 27118 }, { "epoch": 88.91475409836066, "grad_norm": 4.6396894454956055, "learning_rate": 6.375979917722286e-07, "loss": 0.6159, "step": 27119 }, { "epoch": 88.91803278688525, "grad_norm": 9.051016807556152, "learning_rate": 6.37224939286285e-07, "loss": 0.4949, "step": 27120 }, { "epoch": 88.92131147540984, "grad_norm": 3.834057569503784, "learning_rate": 6.368519923753669e-07, "loss": 0.2795, "step": 27121 }, { "epoch": 88.92459016393443, "grad_norm": 5.862525939941406, "learning_rate": 6.364791510436774e-07, "loss": 0.3971, "step": 27122 }, { "epoch": 88.92786885245901, "grad_norm": 5.421848773956299, "learning_rate": 6.361064152954199e-07, "loss": 0.3694, "step": 27123 }, { "epoch": 88.9311475409836, "grad_norm": 4.715566635131836, "learning_rate": 6.357337851348e-07, "loss": 0.3339, "step": 27124 }, { "epoch": 88.93442622950819, "grad_norm": 11.334310531616211, "learning_rate": 6.353612605660186e-07, "loss": 0.5526, "step": 27125 }, { "epoch": 88.9377049180328, "grad_norm": 4.642408847808838, "learning_rate": 6.349888415932737e-07, "loss": 0.3048, "step": 27126 }, { "epoch": 88.94098360655738, "grad_norm": 4.001103401184082, "learning_rate": 6.346165282207684e-07, "loss": 0.1427, "step": 27127 }, { "epoch": 88.94426229508197, "grad_norm": 8.471722602844238, "learning_rate": 6.342443204526993e-07, "loss": 0.2902, "step": 27128 }, { "epoch": 88.94754098360656, "grad_norm": 5.785065174102783, "learning_rate": 6.338722182932632e-07, "loss": 0.3069, "step": 27129 }, { "epoch": 88.95081967213115, "grad_norm": 4.415701866149902, "learning_rate": 6.335002217466557e-07, "loss": 0.3075, "step": 27130 }, { "epoch": 88.95409836065573, "grad_norm": 4.7169413566589355, "learning_rate": 6.331283308170721e-07, "loss": 0.3022, "step": 27131 }, { "epoch": 88.95737704918032, "grad_norm": 6.714138031005859, "learning_rate": 6.32756545508707e-07, "loss": 0.4413, "step": 27132 }, { "epoch": 88.96065573770491, "grad_norm": 5.713794231414795, "learning_rate": 6.323848658257493e-07, "loss": 0.6092, "step": 27133 }, { "epoch": 88.96393442622951, "grad_norm": 5.416301727294922, "learning_rate": 6.320132917723931e-07, "loss": 0.4255, "step": 27134 }, { "epoch": 88.9672131147541, "grad_norm": 4.722077369689941, "learning_rate": 6.316418233528277e-07, "loss": 0.7356, "step": 27135 }, { "epoch": 88.97049180327869, "grad_norm": 4.245290279388428, "learning_rate": 6.312704605712417e-07, "loss": 0.218, "step": 27136 }, { "epoch": 88.97377049180328, "grad_norm": 6.701041221618652, "learning_rate": 6.308992034318196e-07, "loss": 0.5801, "step": 27137 }, { "epoch": 88.97704918032787, "grad_norm": 4.971004486083984, "learning_rate": 6.305280519387525e-07, "loss": 0.3132, "step": 27138 }, { "epoch": 88.98032786885246, "grad_norm": 5.682275295257568, "learning_rate": 6.301570060962237e-07, "loss": 0.4225, "step": 27139 }, { "epoch": 88.98360655737704, "grad_norm": 4.007609844207764, "learning_rate": 6.297860659084176e-07, "loss": 0.2012, "step": 27140 }, { "epoch": 88.98688524590163, "grad_norm": 4.478569507598877, "learning_rate": 6.294152313795155e-07, "loss": 0.3379, "step": 27141 }, { "epoch": 88.99016393442623, "grad_norm": 5.326605796813965, "learning_rate": 6.290445025136971e-07, "loss": 0.3027, "step": 27142 }, { "epoch": 88.99344262295082, "grad_norm": 5.856112003326416, "learning_rate": 6.286738793151482e-07, "loss": 0.3621, "step": 27143 }, { "epoch": 88.99672131147541, "grad_norm": 4.704978942871094, "learning_rate": 6.283033617880441e-07, "loss": 0.1693, "step": 27144 }, { "epoch": 89.0, "grad_norm": 14.113816261291504, "learning_rate": 6.279329499365649e-07, "loss": 0.3962, "step": 27145 }, { "epoch": 89.00327868852459, "grad_norm": 4.857480049133301, "learning_rate": 6.27562643764883e-07, "loss": 0.1869, "step": 27146 }, { "epoch": 89.00655737704918, "grad_norm": 7.043089389801025, "learning_rate": 6.271924432771803e-07, "loss": 0.35, "step": 27147 }, { "epoch": 89.00983606557377, "grad_norm": 7.988589763641357, "learning_rate": 6.26822348477627e-07, "loss": 0.303, "step": 27148 }, { "epoch": 89.01311475409837, "grad_norm": 4.723181247711182, "learning_rate": 6.264523593703975e-07, "loss": 0.3679, "step": 27149 }, { "epoch": 89.01639344262296, "grad_norm": 4.531446933746338, "learning_rate": 6.260824759596629e-07, "loss": 0.419, "step": 27150 }, { "epoch": 89.01967213114754, "grad_norm": 4.499668598175049, "learning_rate": 6.257126982495965e-07, "loss": 0.3456, "step": 27151 }, { "epoch": 89.02295081967213, "grad_norm": 4.308209419250488, "learning_rate": 6.25343026244365e-07, "loss": 0.4495, "step": 27152 }, { "epoch": 89.02622950819672, "grad_norm": 5.086734294891357, "learning_rate": 6.249734599481394e-07, "loss": 0.3942, "step": 27153 }, { "epoch": 89.02950819672131, "grad_norm": 4.706677436828613, "learning_rate": 6.246039993650844e-07, "loss": 0.2443, "step": 27154 }, { "epoch": 89.0327868852459, "grad_norm": 4.165597438812256, "learning_rate": 6.242346444993664e-07, "loss": 0.2752, "step": 27155 }, { "epoch": 89.03606557377049, "grad_norm": 4.788019180297852, "learning_rate": 6.238653953551521e-07, "loss": 0.3236, "step": 27156 }, { "epoch": 89.03934426229509, "grad_norm": 5.102344036102295, "learning_rate": 6.23496251936605e-07, "loss": 0.4615, "step": 27157 }, { "epoch": 89.04262295081968, "grad_norm": 4.7394938468933105, "learning_rate": 6.231272142478862e-07, "loss": 0.34, "step": 27158 }, { "epoch": 89.04590163934427, "grad_norm": 4.951510906219482, "learning_rate": 6.227582822931566e-07, "loss": 0.436, "step": 27159 }, { "epoch": 89.04918032786885, "grad_norm": 3.8155694007873535, "learning_rate": 6.223894560765786e-07, "loss": 0.2422, "step": 27160 }, { "epoch": 89.05245901639344, "grad_norm": 6.494748592376709, "learning_rate": 6.220207356023101e-07, "loss": 0.4591, "step": 27161 }, { "epoch": 89.05573770491803, "grad_norm": 6.754678726196289, "learning_rate": 6.216521208745074e-07, "loss": 0.4444, "step": 27162 }, { "epoch": 89.05901639344262, "grad_norm": 4.767094612121582, "learning_rate": 6.212836118973276e-07, "loss": 0.2206, "step": 27163 }, { "epoch": 89.0622950819672, "grad_norm": 5.102421760559082, "learning_rate": 6.20915208674927e-07, "loss": 0.4844, "step": 27164 }, { "epoch": 89.06557377049181, "grad_norm": 5.9871015548706055, "learning_rate": 6.205469112114603e-07, "loss": 0.5563, "step": 27165 }, { "epoch": 89.0688524590164, "grad_norm": 4.642480373382568, "learning_rate": 6.201787195110787e-07, "loss": 0.2723, "step": 27166 }, { "epoch": 89.07213114754099, "grad_norm": 5.477230072021484, "learning_rate": 6.198106335779342e-07, "loss": 0.3667, "step": 27167 }, { "epoch": 89.07540983606557, "grad_norm": 4.5639142990112305, "learning_rate": 6.194426534161768e-07, "loss": 0.3428, "step": 27168 }, { "epoch": 89.07868852459016, "grad_norm": 19.59790802001953, "learning_rate": 6.190747790299589e-07, "loss": 0.3623, "step": 27169 }, { "epoch": 89.08196721311475, "grad_norm": 10.820808410644531, "learning_rate": 6.187070104234261e-07, "loss": 0.2877, "step": 27170 }, { "epoch": 89.08524590163934, "grad_norm": 7.227452754974365, "learning_rate": 6.183393476007248e-07, "loss": 0.2853, "step": 27171 }, { "epoch": 89.08852459016393, "grad_norm": 6.088459491729736, "learning_rate": 6.179717905660021e-07, "loss": 0.2904, "step": 27172 }, { "epoch": 89.09180327868853, "grad_norm": 5.279173851013184, "learning_rate": 6.176043393234021e-07, "loss": 0.3894, "step": 27173 }, { "epoch": 89.09508196721312, "grad_norm": 4.807445049285889, "learning_rate": 6.172369938770695e-07, "loss": 0.2412, "step": 27174 }, { "epoch": 89.09836065573771, "grad_norm": 4.708181381225586, "learning_rate": 6.168697542311453e-07, "loss": 0.258, "step": 27175 }, { "epoch": 89.1016393442623, "grad_norm": 5.24395227432251, "learning_rate": 6.165026203897695e-07, "loss": 0.1953, "step": 27176 }, { "epoch": 89.10491803278688, "grad_norm": 5.1705498695373535, "learning_rate": 6.161355923570844e-07, "loss": 0.3458, "step": 27177 }, { "epoch": 89.10819672131147, "grad_norm": 4.057231903076172, "learning_rate": 6.157686701372267e-07, "loss": 0.3771, "step": 27178 }, { "epoch": 89.11147540983606, "grad_norm": 4.929867267608643, "learning_rate": 6.154018537343331e-07, "loss": 0.3549, "step": 27179 }, { "epoch": 89.11475409836065, "grad_norm": 5.316378593444824, "learning_rate": 6.150351431525425e-07, "loss": 0.2914, "step": 27180 }, { "epoch": 89.11803278688525, "grad_norm": 4.680260181427002, "learning_rate": 6.146685383959894e-07, "loss": 0.4851, "step": 27181 }, { "epoch": 89.12131147540984, "grad_norm": 4.88744592666626, "learning_rate": 6.143020394688049e-07, "loss": 0.6355, "step": 27182 }, { "epoch": 89.12459016393443, "grad_norm": 6.598453044891357, "learning_rate": 6.139356463751245e-07, "loss": 0.4208, "step": 27183 }, { "epoch": 89.12786885245902, "grad_norm": 9.148674011230469, "learning_rate": 6.135693591190795e-07, "loss": 0.3665, "step": 27184 }, { "epoch": 89.1311475409836, "grad_norm": 3.9640913009643555, "learning_rate": 6.132031777047976e-07, "loss": 0.3168, "step": 27185 }, { "epoch": 89.1344262295082, "grad_norm": 6.067571640014648, "learning_rate": 6.12837102136411e-07, "loss": 0.3719, "step": 27186 }, { "epoch": 89.13770491803278, "grad_norm": 5.482446670532227, "learning_rate": 6.124711324180466e-07, "loss": 0.4141, "step": 27187 }, { "epoch": 89.14098360655737, "grad_norm": 4.536748886108398, "learning_rate": 6.121052685538298e-07, "loss": 0.2626, "step": 27188 }, { "epoch": 89.14426229508197, "grad_norm": 3.7854645252227783, "learning_rate": 6.117395105478863e-07, "loss": 0.3205, "step": 27189 }, { "epoch": 89.14754098360656, "grad_norm": 4.47396993637085, "learning_rate": 6.113738584043427e-07, "loss": 0.5243, "step": 27190 }, { "epoch": 89.15081967213115, "grad_norm": 8.221010208129883, "learning_rate": 6.110083121273214e-07, "loss": 0.3503, "step": 27191 }, { "epoch": 89.15409836065574, "grad_norm": 4.585372447967529, "learning_rate": 6.106428717209423e-07, "loss": 0.307, "step": 27192 }, { "epoch": 89.15737704918033, "grad_norm": 4.320398807525635, "learning_rate": 6.102775371893277e-07, "loss": 0.2808, "step": 27193 }, { "epoch": 89.16065573770491, "grad_norm": 4.531191349029541, "learning_rate": 6.099123085365954e-07, "loss": 0.2588, "step": 27194 }, { "epoch": 89.1639344262295, "grad_norm": 6.197641849517822, "learning_rate": 6.095471857668667e-07, "loss": 0.3048, "step": 27195 }, { "epoch": 89.1672131147541, "grad_norm": 4.594581604003906, "learning_rate": 6.09182168884257e-07, "loss": 0.3075, "step": 27196 }, { "epoch": 89.1704918032787, "grad_norm": 3.208811044692993, "learning_rate": 6.088172578928819e-07, "loss": 0.1932, "step": 27197 }, { "epoch": 89.17377049180328, "grad_norm": 5.09036922454834, "learning_rate": 6.084524527968549e-07, "loss": 0.4386, "step": 27198 }, { "epoch": 89.17704918032787, "grad_norm": 4.302351474761963, "learning_rate": 6.080877536002938e-07, "loss": 0.3302, "step": 27199 }, { "epoch": 89.18032786885246, "grad_norm": 4.616005897521973, "learning_rate": 6.077231603073075e-07, "loss": 0.2989, "step": 27200 }, { "epoch": 89.18360655737705, "grad_norm": 3.7255771160125732, "learning_rate": 6.073586729220082e-07, "loss": 0.2518, "step": 27201 }, { "epoch": 89.18688524590164, "grad_norm": 4.701780796051025, "learning_rate": 6.06994291448505e-07, "loss": 0.3901, "step": 27202 }, { "epoch": 89.19016393442622, "grad_norm": 4.886799335479736, "learning_rate": 6.066300158909077e-07, "loss": 0.2903, "step": 27203 }, { "epoch": 89.19344262295083, "grad_norm": 3.8537683486938477, "learning_rate": 6.062658462533244e-07, "loss": 0.584, "step": 27204 }, { "epoch": 89.19672131147541, "grad_norm": 12.17794132232666, "learning_rate": 6.059017825398606e-07, "loss": 0.4251, "step": 27205 }, { "epoch": 89.2, "grad_norm": 7.862053871154785, "learning_rate": 6.055378247546217e-07, "loss": 0.2923, "step": 27206 }, { "epoch": 89.20327868852459, "grad_norm": 5.21627140045166, "learning_rate": 6.051739729017103e-07, "loss": 0.3565, "step": 27207 }, { "epoch": 89.20655737704918, "grad_norm": 12.38406753540039, "learning_rate": 6.048102269852318e-07, "loss": 0.5596, "step": 27208 }, { "epoch": 89.20983606557377, "grad_norm": 14.782525062561035, "learning_rate": 6.044465870092863e-07, "loss": 0.2906, "step": 27209 }, { "epoch": 89.21311475409836, "grad_norm": 4.4035868644714355, "learning_rate": 6.040830529779751e-07, "loss": 0.4799, "step": 27210 }, { "epoch": 89.21639344262294, "grad_norm": 9.421091079711914, "learning_rate": 6.037196248953947e-07, "loss": 0.4631, "step": 27211 }, { "epoch": 89.21967213114755, "grad_norm": 7.581618309020996, "learning_rate": 6.033563027656475e-07, "loss": 0.3033, "step": 27212 }, { "epoch": 89.22295081967214, "grad_norm": 8.381294250488281, "learning_rate": 6.029930865928268e-07, "loss": 0.5409, "step": 27213 }, { "epoch": 89.22622950819672, "grad_norm": 5.631507396697998, "learning_rate": 6.026299763810306e-07, "loss": 0.5116, "step": 27214 }, { "epoch": 89.22950819672131, "grad_norm": 3.991769790649414, "learning_rate": 6.022669721343499e-07, "loss": 0.2808, "step": 27215 }, { "epoch": 89.2327868852459, "grad_norm": 6.0971198081970215, "learning_rate": 6.019040738568826e-07, "loss": 0.3213, "step": 27216 }, { "epoch": 89.23606557377049, "grad_norm": 4.599724769592285, "learning_rate": 6.015412815527177e-07, "loss": 0.3365, "step": 27217 }, { "epoch": 89.23934426229508, "grad_norm": 6.385594367980957, "learning_rate": 6.011785952259474e-07, "loss": 0.2018, "step": 27218 }, { "epoch": 89.24262295081967, "grad_norm": 4.723684787750244, "learning_rate": 6.008160148806596e-07, "loss": 0.3908, "step": 27219 }, { "epoch": 89.24590163934427, "grad_norm": 4.367820739746094, "learning_rate": 6.004535405209433e-07, "loss": 0.3896, "step": 27220 }, { "epoch": 89.24918032786886, "grad_norm": 5.232367992401123, "learning_rate": 6.000911721508884e-07, "loss": 0.4356, "step": 27221 }, { "epoch": 89.25245901639344, "grad_norm": 4.114328384399414, "learning_rate": 5.997289097745784e-07, "loss": 0.3438, "step": 27222 }, { "epoch": 89.25573770491803, "grad_norm": 4.531684875488281, "learning_rate": 5.99366753396099e-07, "loss": 0.4883, "step": 27223 }, { "epoch": 89.25901639344262, "grad_norm": 4.788871765136719, "learning_rate": 5.990047030195323e-07, "loss": 0.2969, "step": 27224 }, { "epoch": 89.26229508196721, "grad_norm": 3.446112632751465, "learning_rate": 5.986427586489629e-07, "loss": 0.1401, "step": 27225 }, { "epoch": 89.2655737704918, "grad_norm": 5.0351057052612305, "learning_rate": 5.982809202884721e-07, "loss": 0.3653, "step": 27226 }, { "epoch": 89.26885245901639, "grad_norm": 4.0366740226745605, "learning_rate": 5.979191879421386e-07, "loss": 0.3009, "step": 27227 }, { "epoch": 89.27213114754099, "grad_norm": 4.289886951446533, "learning_rate": 5.975575616140406e-07, "loss": 0.3029, "step": 27228 }, { "epoch": 89.27540983606558, "grad_norm": 7.68772554397583, "learning_rate": 5.971960413082589e-07, "loss": 0.2174, "step": 27229 }, { "epoch": 89.27868852459017, "grad_norm": 5.830977439880371, "learning_rate": 5.968346270288683e-07, "loss": 0.5314, "step": 27230 }, { "epoch": 89.28196721311475, "grad_norm": 4.36475133895874, "learning_rate": 5.964733187799444e-07, "loss": 0.4741, "step": 27231 }, { "epoch": 89.28524590163934, "grad_norm": 5.02277135848999, "learning_rate": 5.961121165655592e-07, "loss": 0.4703, "step": 27232 }, { "epoch": 89.28852459016393, "grad_norm": 8.749576568603516, "learning_rate": 5.957510203897898e-07, "loss": 0.2838, "step": 27233 }, { "epoch": 89.29180327868852, "grad_norm": 5.071040153503418, "learning_rate": 5.953900302567039e-07, "loss": 0.5232, "step": 27234 }, { "epoch": 89.29508196721312, "grad_norm": 5.035222053527832, "learning_rate": 5.950291461703739e-07, "loss": 0.3004, "step": 27235 }, { "epoch": 89.29836065573771, "grad_norm": 8.261739730834961, "learning_rate": 5.946683681348697e-07, "loss": 0.4334, "step": 27236 }, { "epoch": 89.3016393442623, "grad_norm": 4.658605575561523, "learning_rate": 5.943076961542594e-07, "loss": 0.15, "step": 27237 }, { "epoch": 89.30491803278689, "grad_norm": 5.025073051452637, "learning_rate": 5.939471302326072e-07, "loss": 0.35, "step": 27238 }, { "epoch": 89.30819672131148, "grad_norm": 4.525402545928955, "learning_rate": 5.935866703739824e-07, "loss": 0.1979, "step": 27239 }, { "epoch": 89.31147540983606, "grad_norm": 5.461381912231445, "learning_rate": 5.932263165824481e-07, "loss": 0.1322, "step": 27240 }, { "epoch": 89.31475409836065, "grad_norm": 4.778416156768799, "learning_rate": 5.928660688620658e-07, "loss": 0.3071, "step": 27241 }, { "epoch": 89.31803278688524, "grad_norm": 5.216557025909424, "learning_rate": 5.92505927216902e-07, "loss": 0.3268, "step": 27242 }, { "epoch": 89.32131147540984, "grad_norm": 4.637772560119629, "learning_rate": 5.921458916510147e-07, "loss": 0.2441, "step": 27243 }, { "epoch": 89.32459016393443, "grad_norm": 6.129844665527344, "learning_rate": 5.91785962168464e-07, "loss": 0.3952, "step": 27244 }, { "epoch": 89.32786885245902, "grad_norm": 4.73242712020874, "learning_rate": 5.914261387733089e-07, "loss": 0.2537, "step": 27245 }, { "epoch": 89.33114754098361, "grad_norm": 4.9885358810424805, "learning_rate": 5.910664214696049e-07, "loss": 0.3259, "step": 27246 }, { "epoch": 89.3344262295082, "grad_norm": 4.640655994415283, "learning_rate": 5.907068102614122e-07, "loss": 0.5221, "step": 27247 }, { "epoch": 89.33770491803278, "grad_norm": 5.190974712371826, "learning_rate": 5.903473051527831e-07, "loss": 0.4916, "step": 27248 }, { "epoch": 89.34098360655737, "grad_norm": 127.23004150390625, "learning_rate": 5.899879061477709e-07, "loss": 0.2145, "step": 27249 }, { "epoch": 89.34426229508196, "grad_norm": 5.671969890594482, "learning_rate": 5.896286132504281e-07, "loss": 0.2323, "step": 27250 }, { "epoch": 89.34754098360656, "grad_norm": 5.461749076843262, "learning_rate": 5.89269426464808e-07, "loss": 0.3586, "step": 27251 }, { "epoch": 89.35081967213115, "grad_norm": 11.08254337310791, "learning_rate": 5.889103457949608e-07, "loss": 0.5548, "step": 27252 }, { "epoch": 89.35409836065574, "grad_norm": 6.125171661376953, "learning_rate": 5.885513712449331e-07, "loss": 0.4292, "step": 27253 }, { "epoch": 89.35737704918033, "grad_norm": 7.296778678894043, "learning_rate": 5.881925028187741e-07, "loss": 0.3793, "step": 27254 }, { "epoch": 89.36065573770492, "grad_norm": 6.368175983428955, "learning_rate": 5.878337405205314e-07, "loss": 0.4241, "step": 27255 }, { "epoch": 89.3639344262295, "grad_norm": 6.4248456954956055, "learning_rate": 5.874750843542487e-07, "loss": 0.4266, "step": 27256 }, { "epoch": 89.3672131147541, "grad_norm": 9.967083930969238, "learning_rate": 5.871165343239726e-07, "loss": 0.5965, "step": 27257 }, { "epoch": 89.37049180327868, "grad_norm": 4.732659816741943, "learning_rate": 5.867580904337433e-07, "loss": 0.56, "step": 27258 }, { "epoch": 89.37377049180328, "grad_norm": 5.925509452819824, "learning_rate": 5.863997526876019e-07, "loss": 0.3007, "step": 27259 }, { "epoch": 89.37704918032787, "grad_norm": 3.6283931732177734, "learning_rate": 5.860415210895942e-07, "loss": 0.3888, "step": 27260 }, { "epoch": 89.38032786885246, "grad_norm": 4.906674385070801, "learning_rate": 5.856833956437546e-07, "loss": 0.2211, "step": 27261 }, { "epoch": 89.38360655737705, "grad_norm": 6.046024322509766, "learning_rate": 5.853253763541244e-07, "loss": 0.3259, "step": 27262 }, { "epoch": 89.38688524590164, "grad_norm": 5.800114631652832, "learning_rate": 5.849674632247382e-07, "loss": 0.2542, "step": 27263 }, { "epoch": 89.39016393442623, "grad_norm": 4.617380619049072, "learning_rate": 5.846096562596338e-07, "loss": 0.4444, "step": 27264 }, { "epoch": 89.39344262295081, "grad_norm": 4.002053260803223, "learning_rate": 5.842519554628445e-07, "loss": 0.2409, "step": 27265 }, { "epoch": 89.3967213114754, "grad_norm": 8.681689262390137, "learning_rate": 5.838943608384051e-07, "loss": 0.4474, "step": 27266 }, { "epoch": 89.4, "grad_norm": 5.121934413909912, "learning_rate": 5.835368723903456e-07, "loss": 0.3836, "step": 27267 }, { "epoch": 89.4032786885246, "grad_norm": 6.110713481903076, "learning_rate": 5.831794901226995e-07, "loss": 0.4219, "step": 27268 }, { "epoch": 89.40655737704918, "grad_norm": 4.5927653312683105, "learning_rate": 5.828222140394957e-07, "loss": 0.4317, "step": 27269 }, { "epoch": 89.40983606557377, "grad_norm": 4.188055515289307, "learning_rate": 5.824650441447632e-07, "loss": 0.4046, "step": 27270 }, { "epoch": 89.41311475409836, "grad_norm": 7.95428466796875, "learning_rate": 5.821079804425301e-07, "loss": 0.4746, "step": 27271 }, { "epoch": 89.41639344262295, "grad_norm": 4.989520072937012, "learning_rate": 5.817510229368184e-07, "loss": 0.3497, "step": 27272 }, { "epoch": 89.41967213114754, "grad_norm": 4.874398708343506, "learning_rate": 5.813941716316585e-07, "loss": 0.2063, "step": 27273 }, { "epoch": 89.42295081967212, "grad_norm": 6.599970817565918, "learning_rate": 5.810374265310726e-07, "loss": 0.315, "step": 27274 }, { "epoch": 89.42622950819673, "grad_norm": 4.813044548034668, "learning_rate": 5.80680787639083e-07, "loss": 0.247, "step": 27275 }, { "epoch": 89.42950819672132, "grad_norm": 9.483183860778809, "learning_rate": 5.803242549597099e-07, "loss": 0.3629, "step": 27276 }, { "epoch": 89.4327868852459, "grad_norm": 4.737753391265869, "learning_rate": 5.799678284969757e-07, "loss": 0.2538, "step": 27277 }, { "epoch": 89.43606557377049, "grad_norm": 7.493487358093262, "learning_rate": 5.796115082548981e-07, "loss": 0.2676, "step": 27278 }, { "epoch": 89.43934426229508, "grad_norm": 5.067250728607178, "learning_rate": 5.792552942374962e-07, "loss": 0.2582, "step": 27279 }, { "epoch": 89.44262295081967, "grad_norm": 4.486485481262207, "learning_rate": 5.788991864487847e-07, "loss": 0.4649, "step": 27280 }, { "epoch": 89.44590163934426, "grad_norm": 6.1081862449646, "learning_rate": 5.785431848927814e-07, "loss": 0.5297, "step": 27281 }, { "epoch": 89.44918032786886, "grad_norm": 5.637501239776611, "learning_rate": 5.781872895735008e-07, "loss": 0.3847, "step": 27282 }, { "epoch": 89.45245901639345, "grad_norm": 4.645248889923096, "learning_rate": 5.77831500494953e-07, "loss": 0.3968, "step": 27283 }, { "epoch": 89.45573770491804, "grad_norm": 4.928693771362305, "learning_rate": 5.774758176611505e-07, "loss": 0.3752, "step": 27284 }, { "epoch": 89.45901639344262, "grad_norm": 7.207882404327393, "learning_rate": 5.771202410761079e-07, "loss": 0.3981, "step": 27285 }, { "epoch": 89.46229508196721, "grad_norm": 4.30542516708374, "learning_rate": 5.767647707438306e-07, "loss": 0.4393, "step": 27286 }, { "epoch": 89.4655737704918, "grad_norm": 6.787459850311279, "learning_rate": 5.764094066683268e-07, "loss": 0.3104, "step": 27287 }, { "epoch": 89.46885245901639, "grad_norm": 5.870171070098877, "learning_rate": 5.760541488536076e-07, "loss": 0.5681, "step": 27288 }, { "epoch": 89.47213114754098, "grad_norm": 5.842123508453369, "learning_rate": 5.756989973036753e-07, "loss": 0.4252, "step": 27289 }, { "epoch": 89.47540983606558, "grad_norm": 12.02243709564209, "learning_rate": 5.753439520225356e-07, "loss": 0.4242, "step": 27290 }, { "epoch": 89.47868852459017, "grad_norm": 3.8542957305908203, "learning_rate": 5.74989013014191e-07, "loss": 0.5412, "step": 27291 }, { "epoch": 89.48196721311476, "grad_norm": 7.2708868980407715, "learning_rate": 5.746341802826461e-07, "loss": 0.3495, "step": 27292 }, { "epoch": 89.48524590163935, "grad_norm": 4.959430694580078, "learning_rate": 5.742794538319008e-07, "loss": 0.2486, "step": 27293 }, { "epoch": 89.48852459016393, "grad_norm": 4.8995513916015625, "learning_rate": 5.739248336659531e-07, "loss": 0.3575, "step": 27294 }, { "epoch": 89.49180327868852, "grad_norm": 5.947575569152832, "learning_rate": 5.735703197888054e-07, "loss": 0.264, "step": 27295 }, { "epoch": 89.49508196721311, "grad_norm": 4.471481800079346, "learning_rate": 5.732159122044534e-07, "loss": 0.361, "step": 27296 }, { "epoch": 89.4983606557377, "grad_norm": 4.593481540679932, "learning_rate": 5.728616109168938e-07, "loss": 0.4696, "step": 27297 }, { "epoch": 89.5016393442623, "grad_norm": 4.645969390869141, "learning_rate": 5.725074159301192e-07, "loss": 0.2001, "step": 27298 }, { "epoch": 89.50491803278689, "grad_norm": 4.623683929443359, "learning_rate": 5.721533272481272e-07, "loss": 0.3287, "step": 27299 }, { "epoch": 89.50819672131148, "grad_norm": 4.6476521492004395, "learning_rate": 5.717993448749093e-07, "loss": 0.3529, "step": 27300 }, { "epoch": 89.51147540983607, "grad_norm": 4.435469150543213, "learning_rate": 5.714454688144556e-07, "loss": 0.5168, "step": 27301 }, { "epoch": 89.51475409836065, "grad_norm": 4.369414329528809, "learning_rate": 5.710916990707571e-07, "loss": 0.3099, "step": 27302 }, { "epoch": 89.51803278688524, "grad_norm": 5.353812217712402, "learning_rate": 5.707380356478042e-07, "loss": 0.2708, "step": 27303 }, { "epoch": 89.52131147540983, "grad_norm": 4.662478446960449, "learning_rate": 5.703844785495838e-07, "loss": 0.4368, "step": 27304 }, { "epoch": 89.52459016393442, "grad_norm": 3.705674648284912, "learning_rate": 5.700310277800836e-07, "loss": 0.2048, "step": 27305 }, { "epoch": 89.52786885245902, "grad_norm": 5.137757301330566, "learning_rate": 5.696776833432882e-07, "loss": 0.3967, "step": 27306 }, { "epoch": 89.53114754098361, "grad_norm": 10.869466781616211, "learning_rate": 5.693244452431801e-07, "loss": 0.3507, "step": 27307 }, { "epoch": 89.5344262295082, "grad_norm": 4.9288859367370605, "learning_rate": 5.68971313483745e-07, "loss": 0.5862, "step": 27308 }, { "epoch": 89.53770491803279, "grad_norm": 4.555391788482666, "learning_rate": 5.68618288068965e-07, "loss": 0.4517, "step": 27309 }, { "epoch": 89.54098360655738, "grad_norm": 4.096534729003906, "learning_rate": 5.682653690028206e-07, "loss": 0.2937, "step": 27310 }, { "epoch": 89.54426229508196, "grad_norm": 4.3204545974731445, "learning_rate": 5.679125562892884e-07, "loss": 0.3487, "step": 27311 }, { "epoch": 89.54754098360655, "grad_norm": 4.04057502746582, "learning_rate": 5.675598499323509e-07, "loss": 0.3348, "step": 27312 }, { "epoch": 89.55081967213114, "grad_norm": 5.453087329864502, "learning_rate": 5.672072499359826e-07, "loss": 0.3177, "step": 27313 }, { "epoch": 89.55409836065574, "grad_norm": 4.400705814361572, "learning_rate": 5.668547563041604e-07, "loss": 0.2681, "step": 27314 }, { "epoch": 89.55737704918033, "grad_norm": 5.7674994468688965, "learning_rate": 5.665023690408577e-07, "loss": 0.3941, "step": 27315 }, { "epoch": 89.56065573770492, "grad_norm": 9.057226181030273, "learning_rate": 5.661500881500514e-07, "loss": 0.4769, "step": 27316 }, { "epoch": 89.56393442622951, "grad_norm": 6.69070291519165, "learning_rate": 5.657979136357106e-07, "loss": 0.3631, "step": 27317 }, { "epoch": 89.5672131147541, "grad_norm": 7.5469770431518555, "learning_rate": 5.654458455018075e-07, "loss": 0.3348, "step": 27318 }, { "epoch": 89.57049180327868, "grad_norm": 7.592597007751465, "learning_rate": 5.650938837523124e-07, "loss": 0.4041, "step": 27319 }, { "epoch": 89.57377049180327, "grad_norm": 5.543453693389893, "learning_rate": 5.64742028391192e-07, "loss": 0.456, "step": 27320 }, { "epoch": 89.57704918032788, "grad_norm": 3.619352102279663, "learning_rate": 5.643902794224165e-07, "loss": 0.29, "step": 27321 }, { "epoch": 89.58032786885246, "grad_norm": 7.8152241706848145, "learning_rate": 5.640386368499517e-07, "loss": 0.3737, "step": 27322 }, { "epoch": 89.58360655737705, "grad_norm": 5.291548728942871, "learning_rate": 5.636871006777622e-07, "loss": 0.4348, "step": 27323 }, { "epoch": 89.58688524590164, "grad_norm": 5.37535285949707, "learning_rate": 5.633356709098103e-07, "loss": 0.369, "step": 27324 }, { "epoch": 89.59016393442623, "grad_norm": 6.199463844299316, "learning_rate": 5.629843475500618e-07, "loss": 0.3908, "step": 27325 }, { "epoch": 89.59344262295082, "grad_norm": 5.008688926696777, "learning_rate": 5.626331306024768e-07, "loss": 0.4301, "step": 27326 }, { "epoch": 89.5967213114754, "grad_norm": 4.1205973625183105, "learning_rate": 5.622820200710156e-07, "loss": 0.1291, "step": 27327 }, { "epoch": 89.6, "grad_norm": 6.470931529998779, "learning_rate": 5.619310159596358e-07, "loss": 0.2391, "step": 27328 }, { "epoch": 89.6032786885246, "grad_norm": 5.873848915100098, "learning_rate": 5.615801182722979e-07, "loss": 0.4257, "step": 27329 }, { "epoch": 89.60655737704919, "grad_norm": 4.290688991546631, "learning_rate": 5.612293270129588e-07, "loss": 0.3656, "step": 27330 }, { "epoch": 89.60983606557377, "grad_norm": 5.727921485900879, "learning_rate": 5.608786421855728e-07, "loss": 0.4769, "step": 27331 }, { "epoch": 89.61311475409836, "grad_norm": 5.698098659515381, "learning_rate": 5.605280637940935e-07, "loss": 0.3388, "step": 27332 }, { "epoch": 89.61639344262295, "grad_norm": 6.597499370574951, "learning_rate": 5.601775918424745e-07, "loss": 0.2639, "step": 27333 }, { "epoch": 89.61967213114754, "grad_norm": 7.748355388641357, "learning_rate": 5.598272263346682e-07, "loss": 0.3763, "step": 27334 }, { "epoch": 89.62295081967213, "grad_norm": 4.863282680511475, "learning_rate": 5.594769672746259e-07, "loss": 0.2812, "step": 27335 }, { "epoch": 89.62622950819672, "grad_norm": 5.411890506744385, "learning_rate": 5.591268146662975e-07, "loss": 0.4054, "step": 27336 }, { "epoch": 89.62950819672132, "grad_norm": 3.8646442890167236, "learning_rate": 5.58776768513628e-07, "loss": 0.2493, "step": 27337 }, { "epoch": 89.6327868852459, "grad_norm": 4.313838481903076, "learning_rate": 5.584268288205674e-07, "loss": 0.6199, "step": 27338 }, { "epoch": 89.6360655737705, "grad_norm": 4.436397075653076, "learning_rate": 5.580769955910625e-07, "loss": 0.5678, "step": 27339 }, { "epoch": 89.63934426229508, "grad_norm": 4.892858505249023, "learning_rate": 5.577272688290547e-07, "loss": 0.2804, "step": 27340 }, { "epoch": 89.64262295081967, "grad_norm": 5.225571155548096, "learning_rate": 5.573776485384908e-07, "loss": 0.1712, "step": 27341 }, { "epoch": 89.64590163934426, "grad_norm": 4.703813552856445, "learning_rate": 5.570281347233109e-07, "loss": 0.2358, "step": 27342 }, { "epoch": 89.64918032786885, "grad_norm": 4.299859046936035, "learning_rate": 5.566787273874563e-07, "loss": 0.4249, "step": 27343 }, { "epoch": 89.65245901639344, "grad_norm": 4.738345146179199, "learning_rate": 5.563294265348695e-07, "loss": 0.4827, "step": 27344 }, { "epoch": 89.65573770491804, "grad_norm": 4.382874965667725, "learning_rate": 5.559802321694874e-07, "loss": 0.264, "step": 27345 }, { "epoch": 89.65901639344263, "grad_norm": 4.5654401779174805, "learning_rate": 5.556311442952455e-07, "loss": 0.2401, "step": 27346 }, { "epoch": 89.66229508196722, "grad_norm": 5.073394298553467, "learning_rate": 5.552821629160842e-07, "loss": 0.1697, "step": 27347 }, { "epoch": 89.6655737704918, "grad_norm": 4.654282093048096, "learning_rate": 5.549332880359359e-07, "loss": 0.4501, "step": 27348 }, { "epoch": 89.66885245901639, "grad_norm": 10.737448692321777, "learning_rate": 5.545845196587352e-07, "loss": 0.3276, "step": 27349 }, { "epoch": 89.67213114754098, "grad_norm": 6.174493789672852, "learning_rate": 5.542358577884144e-07, "loss": 0.3852, "step": 27350 }, { "epoch": 89.67540983606557, "grad_norm": 5.41402006149292, "learning_rate": 5.538873024289059e-07, "loss": 0.391, "step": 27351 }, { "epoch": 89.67868852459016, "grad_norm": 6.066370487213135, "learning_rate": 5.535388535841391e-07, "loss": 0.3009, "step": 27352 }, { "epoch": 89.68196721311476, "grad_norm": 5.329108715057373, "learning_rate": 5.531905112580449e-07, "loss": 0.2764, "step": 27353 }, { "epoch": 89.68524590163935, "grad_norm": 4.3059210777282715, "learning_rate": 5.528422754545471e-07, "loss": 0.4683, "step": 27354 }, { "epoch": 89.68852459016394, "grad_norm": 8.76142692565918, "learning_rate": 5.524941461775779e-07, "loss": 0.3853, "step": 27355 }, { "epoch": 89.69180327868852, "grad_norm": 4.891282081604004, "learning_rate": 5.521461234310599e-07, "loss": 0.6115, "step": 27356 }, { "epoch": 89.69508196721311, "grad_norm": 4.722187042236328, "learning_rate": 5.517982072189165e-07, "loss": 0.3222, "step": 27357 }, { "epoch": 89.6983606557377, "grad_norm": 5.306504726409912, "learning_rate": 5.514503975450735e-07, "loss": 0.4508, "step": 27358 }, { "epoch": 89.70163934426229, "grad_norm": 5.536418437957764, "learning_rate": 5.51102694413449e-07, "loss": 0.4305, "step": 27359 }, { "epoch": 89.70491803278688, "grad_norm": 4.492210865020752, "learning_rate": 5.507550978279674e-07, "loss": 0.2209, "step": 27360 }, { "epoch": 89.70819672131148, "grad_norm": 5.733145236968994, "learning_rate": 5.504076077925468e-07, "loss": 0.5046, "step": 27361 }, { "epoch": 89.71147540983607, "grad_norm": 5.767312049865723, "learning_rate": 5.500602243111064e-07, "loss": 0.4237, "step": 27362 }, { "epoch": 89.71475409836066, "grad_norm": 4.683509826660156, "learning_rate": 5.497129473875606e-07, "loss": 0.4731, "step": 27363 }, { "epoch": 89.71803278688525, "grad_norm": 6.3237504959106445, "learning_rate": 5.493657770258287e-07, "loss": 0.3802, "step": 27364 }, { "epoch": 89.72131147540983, "grad_norm": 4.2594757080078125, "learning_rate": 5.49018713229823e-07, "loss": 0.353, "step": 27365 }, { "epoch": 89.72459016393442, "grad_norm": 4.275397777557373, "learning_rate": 5.486717560034582e-07, "loss": 0.2764, "step": 27366 }, { "epoch": 89.72786885245901, "grad_norm": 4.257629871368408, "learning_rate": 5.483249053506456e-07, "loss": 0.532, "step": 27367 }, { "epoch": 89.73114754098361, "grad_norm": 7.489462852478027, "learning_rate": 5.479781612752976e-07, "loss": 0.4578, "step": 27368 }, { "epoch": 89.7344262295082, "grad_norm": 4.47251033782959, "learning_rate": 5.476315237813235e-07, "loss": 0.4527, "step": 27369 }, { "epoch": 89.73770491803279, "grad_norm": 7.235543727874756, "learning_rate": 5.47284992872632e-07, "loss": 0.4495, "step": 27370 }, { "epoch": 89.74098360655738, "grad_norm": 4.061992645263672, "learning_rate": 5.469385685531314e-07, "loss": 0.2963, "step": 27371 }, { "epoch": 89.74426229508197, "grad_norm": 5.152210712432861, "learning_rate": 5.465922508267252e-07, "loss": 0.4451, "step": 27372 }, { "epoch": 89.74754098360656, "grad_norm": 4.738142013549805, "learning_rate": 5.462460396973212e-07, "loss": 0.301, "step": 27373 }, { "epoch": 89.75081967213114, "grad_norm": 5.433000564575195, "learning_rate": 5.458999351688232e-07, "loss": 0.2609, "step": 27374 }, { "epoch": 89.75409836065573, "grad_norm": 3.9055533409118652, "learning_rate": 5.455539372451335e-07, "loss": 0.2828, "step": 27375 }, { "epoch": 89.75737704918033, "grad_norm": 3.957503318786621, "learning_rate": 5.452080459301512e-07, "loss": 0.372, "step": 27376 }, { "epoch": 89.76065573770492, "grad_norm": 10.030702590942383, "learning_rate": 5.44862261227781e-07, "loss": 0.3574, "step": 27377 }, { "epoch": 89.76393442622951, "grad_norm": 4.172642707824707, "learning_rate": 5.445165831419186e-07, "loss": 0.3624, "step": 27378 }, { "epoch": 89.7672131147541, "grad_norm": 4.0024542808532715, "learning_rate": 5.441710116764642e-07, "loss": 0.2694, "step": 27379 }, { "epoch": 89.77049180327869, "grad_norm": 8.867588996887207, "learning_rate": 5.438255468353104e-07, "loss": 0.2792, "step": 27380 }, { "epoch": 89.77377049180328, "grad_norm": 3.7211906909942627, "learning_rate": 5.434801886223584e-07, "loss": 0.4535, "step": 27381 }, { "epoch": 89.77704918032786, "grad_norm": 4.6531662940979, "learning_rate": 5.431349370414984e-07, "loss": 0.3884, "step": 27382 }, { "epoch": 89.78032786885245, "grad_norm": 4.349542617797852, "learning_rate": 5.427897920966252e-07, "loss": 0.1082, "step": 27383 }, { "epoch": 89.78360655737706, "grad_norm": 4.866032600402832, "learning_rate": 5.4244475379163e-07, "loss": 0.3419, "step": 27384 }, { "epoch": 89.78688524590164, "grad_norm": 3.225527763366699, "learning_rate": 5.42099822130403e-07, "loss": 0.2738, "step": 27385 }, { "epoch": 89.79016393442623, "grad_norm": 6.73557186126709, "learning_rate": 5.417549971168345e-07, "loss": 0.4923, "step": 27386 }, { "epoch": 89.79344262295082, "grad_norm": 4.991814613342285, "learning_rate": 5.414102787548126e-07, "loss": 0.2233, "step": 27387 }, { "epoch": 89.79672131147541, "grad_norm": 7.176562786102295, "learning_rate": 5.41065667048225e-07, "loss": 0.5259, "step": 27388 }, { "epoch": 89.8, "grad_norm": 4.840157508850098, "learning_rate": 5.407211620009545e-07, "loss": 0.2705, "step": 27389 }, { "epoch": 89.80327868852459, "grad_norm": 5.29144287109375, "learning_rate": 5.4037676361689e-07, "loss": 0.3293, "step": 27390 }, { "epoch": 89.80655737704917, "grad_norm": 8.932157516479492, "learning_rate": 5.400324718999139e-07, "loss": 0.2237, "step": 27391 }, { "epoch": 89.80983606557378, "grad_norm": 5.657495975494385, "learning_rate": 5.396882868539044e-07, "loss": 0.2435, "step": 27392 }, { "epoch": 89.81311475409836, "grad_norm": 12.139570236206055, "learning_rate": 5.393442084827482e-07, "loss": 0.4214, "step": 27393 }, { "epoch": 89.81639344262295, "grad_norm": 3.8805150985717773, "learning_rate": 5.390002367903225e-07, "loss": 0.5151, "step": 27394 }, { "epoch": 89.81967213114754, "grad_norm": 4.98209810256958, "learning_rate": 5.38656371780506e-07, "loss": 0.3546, "step": 27395 }, { "epoch": 89.82295081967213, "grad_norm": 5.397681713104248, "learning_rate": 5.383126134571748e-07, "loss": 0.3424, "step": 27396 }, { "epoch": 89.82622950819672, "grad_norm": 4.324080467224121, "learning_rate": 5.379689618242089e-07, "loss": 0.4467, "step": 27397 }, { "epoch": 89.8295081967213, "grad_norm": 6.246337413787842, "learning_rate": 5.3762541688548e-07, "loss": 0.2848, "step": 27398 }, { "epoch": 89.8327868852459, "grad_norm": 4.501016139984131, "learning_rate": 5.372819786448613e-07, "loss": 0.2243, "step": 27399 }, { "epoch": 89.8360655737705, "grad_norm": 39.0427131652832, "learning_rate": 5.369386471062287e-07, "loss": 0.333, "step": 27400 }, { "epoch": 89.83934426229509, "grad_norm": 4.355088233947754, "learning_rate": 5.365954222734526e-07, "loss": 0.4438, "step": 27401 }, { "epoch": 89.84262295081967, "grad_norm": 5.079152584075928, "learning_rate": 5.362523041504009e-07, "loss": 0.5576, "step": 27402 }, { "epoch": 89.84590163934426, "grad_norm": 5.057651042938232, "learning_rate": 5.359092927409459e-07, "loss": 0.1309, "step": 27403 }, { "epoch": 89.84918032786885, "grad_norm": 5.194307327270508, "learning_rate": 5.355663880489537e-07, "loss": 0.4543, "step": 27404 }, { "epoch": 89.85245901639344, "grad_norm": 4.942205429077148, "learning_rate": 5.352235900782899e-07, "loss": 0.3605, "step": 27405 }, { "epoch": 89.85573770491803, "grad_norm": 4.10479211807251, "learning_rate": 5.348808988328213e-07, "loss": 0.167, "step": 27406 }, { "epoch": 89.85901639344263, "grad_norm": 6.571002006530762, "learning_rate": 5.345383143164118e-07, "loss": 0.6703, "step": 27407 }, { "epoch": 89.86229508196722, "grad_norm": 4.488156795501709, "learning_rate": 5.341958365329247e-07, "loss": 0.2627, "step": 27408 }, { "epoch": 89.8655737704918, "grad_norm": 4.6313276290893555, "learning_rate": 5.338534654862226e-07, "loss": 0.497, "step": 27409 }, { "epoch": 89.8688524590164, "grad_norm": 5.6994194984436035, "learning_rate": 5.335112011801635e-07, "loss": 0.4436, "step": 27410 }, { "epoch": 89.87213114754098, "grad_norm": 5.44511079788208, "learning_rate": 5.331690436186076e-07, "loss": 0.525, "step": 27411 }, { "epoch": 89.87540983606557, "grad_norm": 5.048524379730225, "learning_rate": 5.328269928054164e-07, "loss": 0.5162, "step": 27412 }, { "epoch": 89.87868852459016, "grad_norm": 4.228927135467529, "learning_rate": 5.324850487444422e-07, "loss": 0.2998, "step": 27413 }, { "epoch": 89.88196721311475, "grad_norm": 4.807658672332764, "learning_rate": 5.321432114395441e-07, "loss": 0.3231, "step": 27414 }, { "epoch": 89.88524590163935, "grad_norm": 4.1222381591796875, "learning_rate": 5.318014808945737e-07, "loss": 0.306, "step": 27415 }, { "epoch": 89.88852459016394, "grad_norm": 4.632772922515869, "learning_rate": 5.314598571133867e-07, "loss": 0.2434, "step": 27416 }, { "epoch": 89.89180327868853, "grad_norm": 6.264381408691406, "learning_rate": 5.311183400998355e-07, "loss": 0.4646, "step": 27417 }, { "epoch": 89.89508196721312, "grad_norm": 4.173925876617432, "learning_rate": 5.307769298577703e-07, "loss": 0.4277, "step": 27418 }, { "epoch": 89.8983606557377, "grad_norm": 4.857818126678467, "learning_rate": 5.304356263910393e-07, "loss": 0.4923, "step": 27419 }, { "epoch": 89.90163934426229, "grad_norm": 5.571343421936035, "learning_rate": 5.300944297034927e-07, "loss": 0.5176, "step": 27420 }, { "epoch": 89.90491803278688, "grad_norm": 12.510440826416016, "learning_rate": 5.297533397989785e-07, "loss": 0.3583, "step": 27421 }, { "epoch": 89.90819672131147, "grad_norm": 4.844504356384277, "learning_rate": 5.294123566813425e-07, "loss": 0.3523, "step": 27422 }, { "epoch": 89.91147540983607, "grad_norm": 4.2505035400390625, "learning_rate": 5.290714803544284e-07, "loss": 0.5127, "step": 27423 }, { "epoch": 89.91475409836066, "grad_norm": 3.6274712085723877, "learning_rate": 5.287307108220796e-07, "loss": 0.5376, "step": 27424 }, { "epoch": 89.91803278688525, "grad_norm": 6.583784580230713, "learning_rate": 5.28390048088141e-07, "loss": 0.4644, "step": 27425 }, { "epoch": 89.92131147540984, "grad_norm": 6.513114929199219, "learning_rate": 5.280494921564527e-07, "loss": 0.2365, "step": 27426 }, { "epoch": 89.92459016393443, "grad_norm": 4.4800333976745605, "learning_rate": 5.27709043030854e-07, "loss": 0.4083, "step": 27427 }, { "epoch": 89.92786885245901, "grad_norm": 4.966362476348877, "learning_rate": 5.27368700715184e-07, "loss": 0.2789, "step": 27428 }, { "epoch": 89.9311475409836, "grad_norm": 8.32819938659668, "learning_rate": 5.270284652132829e-07, "loss": 0.3256, "step": 27429 }, { "epoch": 89.93442622950819, "grad_norm": 4.820769309997559, "learning_rate": 5.266883365289844e-07, "loss": 0.3296, "step": 27430 }, { "epoch": 89.9377049180328, "grad_norm": 14.655757904052734, "learning_rate": 5.263483146661242e-07, "loss": 0.5834, "step": 27431 }, { "epoch": 89.94098360655738, "grad_norm": 4.639509677886963, "learning_rate": 5.260083996285359e-07, "loss": 0.3367, "step": 27432 }, { "epoch": 89.94426229508197, "grad_norm": 5.593489646911621, "learning_rate": 5.256685914200555e-07, "loss": 0.4802, "step": 27433 }, { "epoch": 89.94754098360656, "grad_norm": 4.586453437805176, "learning_rate": 5.25328890044512e-07, "loss": 0.3538, "step": 27434 }, { "epoch": 89.95081967213115, "grad_norm": 4.573610782623291, "learning_rate": 5.249892955057368e-07, "loss": 0.5107, "step": 27435 }, { "epoch": 89.95409836065573, "grad_norm": 8.227558135986328, "learning_rate": 5.246498078075579e-07, "loss": 0.397, "step": 27436 }, { "epoch": 89.95737704918032, "grad_norm": 4.43222713470459, "learning_rate": 5.243104269538035e-07, "loss": 0.4397, "step": 27437 }, { "epoch": 89.96065573770491, "grad_norm": 7.943042278289795, "learning_rate": 5.239711529483027e-07, "loss": 0.1978, "step": 27438 }, { "epoch": 89.96393442622951, "grad_norm": 5.048036575317383, "learning_rate": 5.236319857948802e-07, "loss": 0.1757, "step": 27439 }, { "epoch": 89.9672131147541, "grad_norm": 4.787603378295898, "learning_rate": 5.232929254973595e-07, "loss": 0.2617, "step": 27440 }, { "epoch": 89.97049180327869, "grad_norm": 3.9056153297424316, "learning_rate": 5.229539720595634e-07, "loss": 0.195, "step": 27441 }, { "epoch": 89.97377049180328, "grad_norm": 6.734726428985596, "learning_rate": 5.226151254853152e-07, "loss": 0.2286, "step": 27442 }, { "epoch": 89.97704918032787, "grad_norm": 5.11092472076416, "learning_rate": 5.222763857784364e-07, "loss": 0.4372, "step": 27443 }, { "epoch": 89.98032786885246, "grad_norm": 6.098991394042969, "learning_rate": 5.219377529427461e-07, "loss": 0.3386, "step": 27444 }, { "epoch": 89.98360655737704, "grad_norm": 4.773082733154297, "learning_rate": 5.215992269820602e-07, "loss": 0.228, "step": 27445 }, { "epoch": 89.98688524590163, "grad_norm": 3.8481950759887695, "learning_rate": 5.212608079001991e-07, "loss": 0.3995, "step": 27446 }, { "epoch": 89.99016393442623, "grad_norm": 4.576786994934082, "learning_rate": 5.209224957009785e-07, "loss": 0.4546, "step": 27447 }, { "epoch": 89.99344262295082, "grad_norm": 3.8094515800476074, "learning_rate": 5.205842903882108e-07, "loss": 0.3855, "step": 27448 }, { "epoch": 89.99672131147541, "grad_norm": 4.115564823150635, "learning_rate": 5.202461919657131e-07, "loss": 0.4397, "step": 27449 }, { "epoch": 90.0, "grad_norm": 5.482054710388184, "learning_rate": 5.199082004372958e-07, "loss": 0.2602, "step": 27450 }, { "epoch": 90.00327868852459, "grad_norm": 4.2777862548828125, "learning_rate": 5.195703158067689e-07, "loss": 0.5695, "step": 27451 }, { "epoch": 90.00655737704918, "grad_norm": 14.152124404907227, "learning_rate": 5.192325380779461e-07, "loss": 0.2182, "step": 27452 }, { "epoch": 90.00983606557377, "grad_norm": 4.86186408996582, "learning_rate": 5.188948672546335e-07, "loss": 0.2983, "step": 27453 }, { "epoch": 90.01311475409837, "grad_norm": 4.566562175750732, "learning_rate": 5.185573033406388e-07, "loss": 0.4234, "step": 27454 }, { "epoch": 90.01639344262296, "grad_norm": 3.881220817565918, "learning_rate": 5.18219846339767e-07, "loss": 0.5106, "step": 27455 }, { "epoch": 90.01967213114754, "grad_norm": 4.032906532287598, "learning_rate": 5.178824962558271e-07, "loss": 0.1239, "step": 27456 }, { "epoch": 90.02295081967213, "grad_norm": 3.555163860321045, "learning_rate": 5.175452530926206e-07, "loss": 0.2033, "step": 27457 }, { "epoch": 90.02622950819672, "grad_norm": 4.485861301422119, "learning_rate": 5.1720811685395e-07, "loss": 0.218, "step": 27458 }, { "epoch": 90.02950819672131, "grad_norm": 6.218567371368408, "learning_rate": 5.168710875436178e-07, "loss": 0.2474, "step": 27459 }, { "epoch": 90.0327868852459, "grad_norm": 4.216720104217529, "learning_rate": 5.165341651654243e-07, "loss": 0.3407, "step": 27460 }, { "epoch": 90.03606557377049, "grad_norm": 11.786297798156738, "learning_rate": 5.161973497231687e-07, "loss": 0.3751, "step": 27461 }, { "epoch": 90.03934426229509, "grad_norm": 4.120304584503174, "learning_rate": 5.158606412206491e-07, "loss": 0.25, "step": 27462 }, { "epoch": 90.04262295081968, "grad_norm": 5.020786285400391, "learning_rate": 5.155240396616601e-07, "loss": 0.3707, "step": 27463 }, { "epoch": 90.04590163934427, "grad_norm": 5.855714797973633, "learning_rate": 5.151875450499999e-07, "loss": 0.4546, "step": 27464 }, { "epoch": 90.04918032786885, "grad_norm": 4.30082893371582, "learning_rate": 5.148511573894621e-07, "loss": 0.4206, "step": 27465 }, { "epoch": 90.05245901639344, "grad_norm": 8.334004402160645, "learning_rate": 5.145148766838404e-07, "loss": 0.2772, "step": 27466 }, { "epoch": 90.05573770491803, "grad_norm": 3.1832120418548584, "learning_rate": 5.141787029369238e-07, "loss": 0.3267, "step": 27467 }, { "epoch": 90.05901639344262, "grad_norm": 6.313509941101074, "learning_rate": 5.138426361525062e-07, "loss": 0.4656, "step": 27468 }, { "epoch": 90.0622950819672, "grad_norm": 4.344094276428223, "learning_rate": 5.135066763343765e-07, "loss": 0.3593, "step": 27469 }, { "epoch": 90.06557377049181, "grad_norm": 4.639011859893799, "learning_rate": 5.13170823486322e-07, "loss": 0.4399, "step": 27470 }, { "epoch": 90.0688524590164, "grad_norm": 12.215433120727539, "learning_rate": 5.128350776121294e-07, "loss": 0.3526, "step": 27471 }, { "epoch": 90.07213114754099, "grad_norm": 5.390096187591553, "learning_rate": 5.124994387155868e-07, "loss": 0.3741, "step": 27472 }, { "epoch": 90.07540983606557, "grad_norm": 3.747168779373169, "learning_rate": 5.121639068004769e-07, "loss": 0.5976, "step": 27473 }, { "epoch": 90.07868852459016, "grad_norm": 4.053986072540283, "learning_rate": 5.118284818705843e-07, "loss": 0.353, "step": 27474 }, { "epoch": 90.08196721311475, "grad_norm": 10.369637489318848, "learning_rate": 5.114931639296916e-07, "loss": 0.3272, "step": 27475 }, { "epoch": 90.08524590163934, "grad_norm": 6.659965991973877, "learning_rate": 5.111579529815768e-07, "loss": 0.3356, "step": 27476 }, { "epoch": 90.08852459016393, "grad_norm": 5.84329891204834, "learning_rate": 5.108228490300227e-07, "loss": 0.5223, "step": 27477 }, { "epoch": 90.09180327868853, "grad_norm": 5.378194332122803, "learning_rate": 5.104878520788082e-07, "loss": 0.4702, "step": 27478 }, { "epoch": 90.09508196721312, "grad_norm": 4.917609214782715, "learning_rate": 5.101529621317103e-07, "loss": 0.3512, "step": 27479 }, { "epoch": 90.09836065573771, "grad_norm": 6.333014488220215, "learning_rate": 5.098181791925016e-07, "loss": 0.4575, "step": 27480 }, { "epoch": 90.1016393442623, "grad_norm": 4.618563652038574, "learning_rate": 5.094835032649637e-07, "loss": 0.4068, "step": 27481 }, { "epoch": 90.10491803278688, "grad_norm": 4.54977560043335, "learning_rate": 5.091489343528655e-07, "loss": 0.5444, "step": 27482 }, { "epoch": 90.10819672131147, "grad_norm": 4.701599597930908, "learning_rate": 5.08814472459982e-07, "loss": 0.506, "step": 27483 }, { "epoch": 90.11147540983606, "grad_norm": 3.6937201023101807, "learning_rate": 5.084801175900811e-07, "loss": 0.3345, "step": 27484 }, { "epoch": 90.11475409836065, "grad_norm": 4.421031475067139, "learning_rate": 5.081458697469377e-07, "loss": 0.4328, "step": 27485 }, { "epoch": 90.11803278688525, "grad_norm": 4.544961929321289, "learning_rate": 5.078117289343188e-07, "loss": 0.4627, "step": 27486 }, { "epoch": 90.12131147540984, "grad_norm": 4.972862720489502, "learning_rate": 5.074776951559923e-07, "loss": 0.3004, "step": 27487 }, { "epoch": 90.12459016393443, "grad_norm": 4.490538120269775, "learning_rate": 5.071437684157243e-07, "loss": 0.3472, "step": 27488 }, { "epoch": 90.12786885245902, "grad_norm": 4.3977274894714355, "learning_rate": 5.068099487172785e-07, "loss": 0.31, "step": 27489 }, { "epoch": 90.1311475409836, "grad_norm": 7.577133655548096, "learning_rate": 5.064762360644226e-07, "loss": 0.1606, "step": 27490 }, { "epoch": 90.1344262295082, "grad_norm": 4.437901496887207, "learning_rate": 5.061426304609184e-07, "loss": 0.3843, "step": 27491 }, { "epoch": 90.13770491803278, "grad_norm": 4.837281227111816, "learning_rate": 5.058091319105263e-07, "loss": 0.4654, "step": 27492 }, { "epoch": 90.14098360655737, "grad_norm": 3.841754913330078, "learning_rate": 5.054757404170074e-07, "loss": 0.3298, "step": 27493 }, { "epoch": 90.14426229508197, "grad_norm": 6.676450729370117, "learning_rate": 5.051424559841223e-07, "loss": 0.3679, "step": 27494 }, { "epoch": 90.14754098360656, "grad_norm": 4.427374362945557, "learning_rate": 5.048092786156278e-07, "loss": 0.3902, "step": 27495 }, { "epoch": 90.15081967213115, "grad_norm": 4.229529857635498, "learning_rate": 5.044762083152821e-07, "loss": 0.4862, "step": 27496 }, { "epoch": 90.15409836065574, "grad_norm": 13.148717880249023, "learning_rate": 5.041432450868377e-07, "loss": 0.5068, "step": 27497 }, { "epoch": 90.15737704918033, "grad_norm": 5.712875843048096, "learning_rate": 5.038103889340529e-07, "loss": 0.4673, "step": 27498 }, { "epoch": 90.16065573770491, "grad_norm": 8.838539123535156, "learning_rate": 5.0347763986068e-07, "loss": 0.4338, "step": 27499 }, { "epoch": 90.1639344262295, "grad_norm": 7.9928202629089355, "learning_rate": 5.031449978704705e-07, "loss": 0.3648, "step": 27500 }, { "epoch": 90.1672131147541, "grad_norm": 5.115351676940918, "learning_rate": 5.028124629671737e-07, "loss": 0.3178, "step": 27501 }, { "epoch": 90.1704918032787, "grad_norm": 4.562191009521484, "learning_rate": 5.024800351545423e-07, "loss": 0.3914, "step": 27502 }, { "epoch": 90.17377049180328, "grad_norm": 6.150172710418701, "learning_rate": 5.02147714436324e-07, "loss": 0.3915, "step": 27503 }, { "epoch": 90.17704918032787, "grad_norm": 5.178626537322998, "learning_rate": 5.01815500816264e-07, "loss": 0.2658, "step": 27504 }, { "epoch": 90.18032786885246, "grad_norm": 6.185421466827393, "learning_rate": 5.014833942981112e-07, "loss": 0.3275, "step": 27505 }, { "epoch": 90.18360655737705, "grad_norm": 6.854031562805176, "learning_rate": 5.011513948856083e-07, "loss": 0.4198, "step": 27506 }, { "epoch": 90.18688524590164, "grad_norm": 6.216830730438232, "learning_rate": 5.008195025824991e-07, "loss": 0.4099, "step": 27507 }, { "epoch": 90.19016393442622, "grad_norm": 7.003665447235107, "learning_rate": 5.004877173925282e-07, "loss": 0.1335, "step": 27508 }, { "epoch": 90.19344262295083, "grad_norm": 4.313722133636475, "learning_rate": 5.00156039319436e-07, "loss": 0.5216, "step": 27509 }, { "epoch": 90.19672131147541, "grad_norm": 6.076152801513672, "learning_rate": 4.998244683669595e-07, "loss": 0.3957, "step": 27510 }, { "epoch": 90.2, "grad_norm": 4.235598564147949, "learning_rate": 4.994930045388414e-07, "loss": 0.3729, "step": 27511 }, { "epoch": 90.20327868852459, "grad_norm": 4.8283538818359375, "learning_rate": 4.991616478388173e-07, "loss": 0.1569, "step": 27512 }, { "epoch": 90.20655737704918, "grad_norm": 4.39716100692749, "learning_rate": 4.988303982706244e-07, "loss": 0.3386, "step": 27513 }, { "epoch": 90.20983606557377, "grad_norm": 8.760913848876953, "learning_rate": 4.984992558379976e-07, "loss": 0.4161, "step": 27514 }, { "epoch": 90.21311475409836, "grad_norm": 4.335661888122559, "learning_rate": 4.981682205446692e-07, "loss": 0.3007, "step": 27515 }, { "epoch": 90.21639344262294, "grad_norm": 5.19720458984375, "learning_rate": 4.978372923943742e-07, "loss": 0.2864, "step": 27516 }, { "epoch": 90.21967213114755, "grad_norm": 3.845165491104126, "learning_rate": 4.975064713908451e-07, "loss": 0.2562, "step": 27517 }, { "epoch": 90.22295081967214, "grad_norm": 4.504781723022461, "learning_rate": 4.971757575378089e-07, "loss": 0.3791, "step": 27518 }, { "epoch": 90.22622950819672, "grad_norm": 4.725109577178955, "learning_rate": 4.96845150838996e-07, "loss": 0.2462, "step": 27519 }, { "epoch": 90.22950819672131, "grad_norm": 5.320695400238037, "learning_rate": 4.965146512981367e-07, "loss": 0.3116, "step": 27520 }, { "epoch": 90.2327868852459, "grad_norm": 14.577814102172852, "learning_rate": 4.961842589189559e-07, "loss": 0.3028, "step": 27521 }, { "epoch": 90.23606557377049, "grad_norm": 6.158327102661133, "learning_rate": 4.958539737051782e-07, "loss": 0.4596, "step": 27522 }, { "epoch": 90.23934426229508, "grad_norm": 5.008234977722168, "learning_rate": 4.955237956605274e-07, "loss": 0.3563, "step": 27523 }, { "epoch": 90.24262295081967, "grad_norm": 4.638070583343506, "learning_rate": 4.951937247887295e-07, "loss": 0.283, "step": 27524 }, { "epoch": 90.24590163934427, "grad_norm": 5.398006439208984, "learning_rate": 4.948637610935058e-07, "loss": 0.3909, "step": 27525 }, { "epoch": 90.24918032786886, "grad_norm": 4.094966888427734, "learning_rate": 4.945339045785747e-07, "loss": 0.2845, "step": 27526 }, { "epoch": 90.25245901639344, "grad_norm": 5.238771915435791, "learning_rate": 4.942041552476585e-07, "loss": 0.3156, "step": 27527 }, { "epoch": 90.25573770491803, "grad_norm": 5.427196025848389, "learning_rate": 4.938745131044708e-07, "loss": 0.5434, "step": 27528 }, { "epoch": 90.25901639344262, "grad_norm": 4.360825538635254, "learning_rate": 4.935449781527346e-07, "loss": 0.4221, "step": 27529 }, { "epoch": 90.26229508196721, "grad_norm": 3.7292778491973877, "learning_rate": 4.932155503961621e-07, "loss": 0.5227, "step": 27530 }, { "epoch": 90.2655737704918, "grad_norm": 5.1008830070495605, "learning_rate": 4.928862298384695e-07, "loss": 0.3672, "step": 27531 }, { "epoch": 90.26885245901639, "grad_norm": 5.179539203643799, "learning_rate": 4.925570164833681e-07, "loss": 0.3928, "step": 27532 }, { "epoch": 90.27213114754099, "grad_norm": 4.463090419769287, "learning_rate": 4.922279103345729e-07, "loss": 0.5053, "step": 27533 }, { "epoch": 90.27540983606558, "grad_norm": 4.7480034828186035, "learning_rate": 4.918989113957939e-07, "loss": 0.2997, "step": 27534 }, { "epoch": 90.27868852459017, "grad_norm": 4.033162593841553, "learning_rate": 4.915700196707407e-07, "loss": 0.3232, "step": 27535 }, { "epoch": 90.28196721311475, "grad_norm": 4.019626617431641, "learning_rate": 4.912412351631202e-07, "loss": 0.3509, "step": 27536 }, { "epoch": 90.28524590163934, "grad_norm": 8.505782127380371, "learning_rate": 4.909125578766427e-07, "loss": 0.4176, "step": 27537 }, { "epoch": 90.28852459016393, "grad_norm": 4.806117534637451, "learning_rate": 4.905839878150131e-07, "loss": 0.2822, "step": 27538 }, { "epoch": 90.29180327868852, "grad_norm": 4.119276523590088, "learning_rate": 4.902555249819363e-07, "loss": 0.4552, "step": 27539 }, { "epoch": 90.29508196721312, "grad_norm": 8.095861434936523, "learning_rate": 4.899271693811159e-07, "loss": 0.5342, "step": 27540 }, { "epoch": 90.29836065573771, "grad_norm": 6.96613073348999, "learning_rate": 4.895989210162532e-07, "loss": 0.3708, "step": 27541 }, { "epoch": 90.3016393442623, "grad_norm": 6.8494768142700195, "learning_rate": 4.892707798910535e-07, "loss": 0.3353, "step": 27542 }, { "epoch": 90.30491803278689, "grad_norm": 6.36618709564209, "learning_rate": 4.889427460092133e-07, "loss": 0.4354, "step": 27543 }, { "epoch": 90.30819672131148, "grad_norm": 4.610544681549072, "learning_rate": 4.886148193744333e-07, "loss": 0.3308, "step": 27544 }, { "epoch": 90.31147540983606, "grad_norm": 15.898433685302734, "learning_rate": 4.882869999904083e-07, "loss": 0.4024, "step": 27545 }, { "epoch": 90.31475409836065, "grad_norm": 5.446386337280273, "learning_rate": 4.879592878608396e-07, "loss": 0.3857, "step": 27546 }, { "epoch": 90.31803278688524, "grad_norm": 6.2056169509887695, "learning_rate": 4.87631682989419e-07, "loss": 0.193, "step": 27547 }, { "epoch": 90.32131147540984, "grad_norm": 4.170953273773193, "learning_rate": 4.873041853798421e-07, "loss": 0.1295, "step": 27548 }, { "epoch": 90.32459016393443, "grad_norm": 4.280185699462891, "learning_rate": 4.869767950357995e-07, "loss": 0.2035, "step": 27549 }, { "epoch": 90.32786885245902, "grad_norm": 5.199851036071777, "learning_rate": 4.866495119609871e-07, "loss": 0.307, "step": 27550 }, { "epoch": 90.33114754098361, "grad_norm": 4.47821044921875, "learning_rate": 4.863223361590919e-07, "loss": 0.3387, "step": 27551 }, { "epoch": 90.3344262295082, "grad_norm": 4.993767738342285, "learning_rate": 4.859952676338042e-07, "loss": 0.3132, "step": 27552 }, { "epoch": 90.33770491803278, "grad_norm": 3.8560235500335693, "learning_rate": 4.856683063888101e-07, "loss": 0.3666, "step": 27553 }, { "epoch": 90.34098360655737, "grad_norm": 8.713979721069336, "learning_rate": 4.853414524278e-07, "loss": 0.3897, "step": 27554 }, { "epoch": 90.34426229508196, "grad_norm": 4.7484283447265625, "learning_rate": 4.850147057544585e-07, "loss": 0.3432, "step": 27555 }, { "epoch": 90.34754098360656, "grad_norm": 5.004576206207275, "learning_rate": 4.846880663724685e-07, "loss": 0.2805, "step": 27556 }, { "epoch": 90.35081967213115, "grad_norm": 5.363656520843506, "learning_rate": 4.843615342855123e-07, "loss": 0.3484, "step": 27557 }, { "epoch": 90.35409836065574, "grad_norm": 3.6037282943725586, "learning_rate": 4.840351094972761e-07, "loss": 0.2816, "step": 27558 }, { "epoch": 90.35737704918033, "grad_norm": 5.064136028289795, "learning_rate": 4.837087920114369e-07, "loss": 0.2697, "step": 27559 }, { "epoch": 90.36065573770492, "grad_norm": 7.548642635345459, "learning_rate": 4.833825818316751e-07, "loss": 0.3586, "step": 27560 }, { "epoch": 90.3639344262295, "grad_norm": 5.3463873863220215, "learning_rate": 4.83056478961671e-07, "loss": 0.3237, "step": 27561 }, { "epoch": 90.3672131147541, "grad_norm": 9.7382173538208, "learning_rate": 4.827304834050994e-07, "loss": 0.3368, "step": 27562 }, { "epoch": 90.37049180327868, "grad_norm": 3.8883824348449707, "learning_rate": 4.824045951656364e-07, "loss": 0.3203, "step": 27563 }, { "epoch": 90.37377049180328, "grad_norm": 6.156789779663086, "learning_rate": 4.820788142469579e-07, "loss": 0.3832, "step": 27564 }, { "epoch": 90.37704918032787, "grad_norm": 5.051287651062012, "learning_rate": 4.817531406527376e-07, "loss": 0.533, "step": 27565 }, { "epoch": 90.38032786885246, "grad_norm": 6.659017562866211, "learning_rate": 4.81427574386647e-07, "loss": 0.4957, "step": 27566 }, { "epoch": 90.38360655737705, "grad_norm": 4.75103759765625, "learning_rate": 4.811021154523566e-07, "loss": 0.3911, "step": 27567 }, { "epoch": 90.38688524590164, "grad_norm": 4.379805088043213, "learning_rate": 4.807767638535376e-07, "loss": 0.508, "step": 27568 }, { "epoch": 90.39016393442623, "grad_norm": 3.7782723903656006, "learning_rate": 4.804515195938586e-07, "loss": 0.1648, "step": 27569 }, { "epoch": 90.39344262295081, "grad_norm": 5.642185688018799, "learning_rate": 4.801263826769864e-07, "loss": 0.3417, "step": 27570 }, { "epoch": 90.3967213114754, "grad_norm": 5.369298934936523, "learning_rate": 4.798013531065859e-07, "loss": 0.3372, "step": 27571 }, { "epoch": 90.4, "grad_norm": 7.2862372398376465, "learning_rate": 4.794764308863242e-07, "loss": 0.4293, "step": 27572 }, { "epoch": 90.4032786885246, "grad_norm": 10.621467590332031, "learning_rate": 4.791516160198661e-07, "loss": 0.49, "step": 27573 }, { "epoch": 90.40655737704918, "grad_norm": 5.140732288360596, "learning_rate": 4.788269085108721e-07, "loss": 0.4543, "step": 27574 }, { "epoch": 90.40983606557377, "grad_norm": 4.88139009475708, "learning_rate": 4.785023083630025e-07, "loss": 0.482, "step": 27575 }, { "epoch": 90.41311475409836, "grad_norm": 4.325371742248535, "learning_rate": 4.78177815579921e-07, "loss": 0.2487, "step": 27576 }, { "epoch": 90.41639344262295, "grad_norm": 4.117870330810547, "learning_rate": 4.778534301652849e-07, "loss": 0.4803, "step": 27577 }, { "epoch": 90.41967213114754, "grad_norm": 5.633523464202881, "learning_rate": 4.775291521227521e-07, "loss": 0.3017, "step": 27578 }, { "epoch": 90.42295081967212, "grad_norm": 3.858571767807007, "learning_rate": 4.772049814559787e-07, "loss": 0.1502, "step": 27579 }, { "epoch": 90.42622950819673, "grad_norm": 6.810392379760742, "learning_rate": 4.768809181686185e-07, "loss": 0.2691, "step": 27580 }, { "epoch": 90.42950819672132, "grad_norm": 4.703730583190918, "learning_rate": 4.7655696226432957e-07, "loss": 0.5135, "step": 27581 }, { "epoch": 90.4327868852459, "grad_norm": 5.613104820251465, "learning_rate": 4.762331137467624e-07, "loss": 0.3214, "step": 27582 }, { "epoch": 90.43606557377049, "grad_norm": 5.905898571014404, "learning_rate": 4.759093726195696e-07, "loss": 0.2698, "step": 27583 }, { "epoch": 90.43934426229508, "grad_norm": 6.096116542816162, "learning_rate": 4.7558573888639937e-07, "loss": 0.3988, "step": 27584 }, { "epoch": 90.44262295081967, "grad_norm": 5.514699459075928, "learning_rate": 4.752622125509043e-07, "loss": 0.3081, "step": 27585 }, { "epoch": 90.44590163934426, "grad_norm": 5.309741973876953, "learning_rate": 4.749387936167316e-07, "loss": 0.4551, "step": 27586 }, { "epoch": 90.44918032786886, "grad_norm": 4.9116387367248535, "learning_rate": 4.7461548208752706e-07, "loss": 0.5434, "step": 27587 }, { "epoch": 90.45245901639345, "grad_norm": 5.209381580352783, "learning_rate": 4.7429227796693564e-07, "loss": 0.3577, "step": 27588 }, { "epoch": 90.45573770491804, "grad_norm": 4.349350452423096, "learning_rate": 4.7396918125860445e-07, "loss": 0.2129, "step": 27589 }, { "epoch": 90.45901639344262, "grad_norm": 5.614696979522705, "learning_rate": 4.73646191966175e-07, "loss": 0.2333, "step": 27590 }, { "epoch": 90.46229508196721, "grad_norm": 8.131402015686035, "learning_rate": 4.7332331009328993e-07, "loss": 0.4087, "step": 27591 }, { "epoch": 90.4655737704918, "grad_norm": 10.118456840515137, "learning_rate": 4.730005356435896e-07, "loss": 0.248, "step": 27592 }, { "epoch": 90.46885245901639, "grad_norm": 6.918341159820557, "learning_rate": 4.726778686207123e-07, "loss": 0.2571, "step": 27593 }, { "epoch": 90.47213114754098, "grad_norm": 4.53534460067749, "learning_rate": 4.7235530902829954e-07, "loss": 0.2481, "step": 27594 }, { "epoch": 90.47540983606558, "grad_norm": 5.249360084533691, "learning_rate": 4.7203285686998723e-07, "loss": 0.3172, "step": 27595 }, { "epoch": 90.47868852459017, "grad_norm": 4.221401214599609, "learning_rate": 4.7171051214941146e-07, "loss": 0.3321, "step": 27596 }, { "epoch": 90.48196721311476, "grad_norm": 5.572314739227295, "learning_rate": 4.713882748702048e-07, "loss": 0.2846, "step": 27597 }, { "epoch": 90.48524590163935, "grad_norm": 4.234503746032715, "learning_rate": 4.7106614503600323e-07, "loss": 0.3921, "step": 27598 }, { "epoch": 90.48852459016393, "grad_norm": 5.317947864532471, "learning_rate": 4.707441226504395e-07, "loss": 0.2031, "step": 27599 }, { "epoch": 90.49180327868852, "grad_norm": 4.403347492218018, "learning_rate": 4.7042220771714273e-07, "loss": 0.3437, "step": 27600 }, { "epoch": 90.49508196721311, "grad_norm": 4.809566497802734, "learning_rate": 4.7010040023974355e-07, "loss": 0.2498, "step": 27601 }, { "epoch": 90.4983606557377, "grad_norm": 4.91471004486084, "learning_rate": 4.6977870022187124e-07, "loss": 0.5176, "step": 27602 }, { "epoch": 90.5016393442623, "grad_norm": 5.372771739959717, "learning_rate": 4.694571076671539e-07, "loss": 0.2802, "step": 27603 }, { "epoch": 90.50491803278689, "grad_norm": 3.728893280029297, "learning_rate": 4.691356225792165e-07, "loss": 0.4393, "step": 27604 }, { "epoch": 90.50819672131148, "grad_norm": 4.977473735809326, "learning_rate": 4.68814244961685e-07, "loss": 0.3815, "step": 27605 }, { "epoch": 90.51147540983607, "grad_norm": 4.5807318687438965, "learning_rate": 4.68492974818181e-07, "loss": 0.4314, "step": 27606 }, { "epoch": 90.51475409836065, "grad_norm": 5.40330171585083, "learning_rate": 4.681718121523304e-07, "loss": 0.4313, "step": 27607 }, { "epoch": 90.51803278688524, "grad_norm": 4.769661903381348, "learning_rate": 4.678507569677537e-07, "loss": 0.4259, "step": 27608 }, { "epoch": 90.52131147540983, "grad_norm": 6.230536937713623, "learning_rate": 4.6752980926806915e-07, "loss": 0.3803, "step": 27609 }, { "epoch": 90.52459016393442, "grad_norm": 4.506417274475098, "learning_rate": 4.6720896905689815e-07, "loss": 0.2394, "step": 27610 }, { "epoch": 90.52786885245902, "grad_norm": 5.3375349044799805, "learning_rate": 4.6688823633785796e-07, "loss": 0.5191, "step": 27611 }, { "epoch": 90.53114754098361, "grad_norm": 5.598742961883545, "learning_rate": 4.6656761111456337e-07, "loss": 0.3388, "step": 27612 }, { "epoch": 90.5344262295082, "grad_norm": 4.517755508422852, "learning_rate": 4.6624709339063267e-07, "loss": 0.3629, "step": 27613 }, { "epoch": 90.53770491803279, "grad_norm": 4.206028938293457, "learning_rate": 4.659266831696796e-07, "loss": 0.3432, "step": 27614 }, { "epoch": 90.54098360655738, "grad_norm": 4.533386707305908, "learning_rate": 4.656063804553135e-07, "loss": 0.3467, "step": 27615 }, { "epoch": 90.54426229508196, "grad_norm": 7.8000640869140625, "learning_rate": 4.6528618525115034e-07, "loss": 0.224, "step": 27616 }, { "epoch": 90.54754098360655, "grad_norm": 10.999534606933594, "learning_rate": 4.649660975607995e-07, "loss": 0.2824, "step": 27617 }, { "epoch": 90.55081967213114, "grad_norm": 3.9035043716430664, "learning_rate": 4.646461173878691e-07, "loss": 0.5131, "step": 27618 }, { "epoch": 90.55409836065574, "grad_norm": 4.456297874450684, "learning_rate": 4.6432624473596756e-07, "loss": 0.3277, "step": 27619 }, { "epoch": 90.55737704918033, "grad_norm": 16.380714416503906, "learning_rate": 4.6400647960870294e-07, "loss": 0.3632, "step": 27620 }, { "epoch": 90.56065573770492, "grad_norm": 3.9322659969329834, "learning_rate": 4.6368682200968016e-07, "loss": 0.3034, "step": 27621 }, { "epoch": 90.56393442622951, "grad_norm": 3.947786331176758, "learning_rate": 4.633672719425042e-07, "loss": 0.3474, "step": 27622 }, { "epoch": 90.5672131147541, "grad_norm": 4.200923919677734, "learning_rate": 4.6304782941077654e-07, "loss": 0.2398, "step": 27623 }, { "epoch": 90.57049180327868, "grad_norm": 4.402974605560303, "learning_rate": 4.6272849441810097e-07, "loss": 0.2467, "step": 27624 }, { "epoch": 90.57377049180327, "grad_norm": 5.447577953338623, "learning_rate": 4.624092669680791e-07, "loss": 0.4326, "step": 27625 }, { "epoch": 90.57704918032788, "grad_norm": 4.37024450302124, "learning_rate": 4.620901470643091e-07, "loss": 0.2574, "step": 27626 }, { "epoch": 90.58032786885246, "grad_norm": 5.483648300170898, "learning_rate": 4.6177113471038813e-07, "loss": 0.2941, "step": 27627 }, { "epoch": 90.58360655737705, "grad_norm": 5.475935935974121, "learning_rate": 4.6145222990991554e-07, "loss": 0.4403, "step": 27628 }, { "epoch": 90.58688524590164, "grad_norm": 6.668384552001953, "learning_rate": 4.6113343266648735e-07, "loss": 0.6618, "step": 27629 }, { "epoch": 90.59016393442623, "grad_norm": 4.713696002960205, "learning_rate": 4.608147429836973e-07, "loss": 0.3825, "step": 27630 }, { "epoch": 90.59344262295082, "grad_norm": 4.360854625701904, "learning_rate": 4.6049616086513925e-07, "loss": 0.1647, "step": 27631 }, { "epoch": 90.5967213114754, "grad_norm": 8.637747764587402, "learning_rate": 4.601776863144047e-07, "loss": 0.4634, "step": 27632 }, { "epoch": 90.6, "grad_norm": 3.5760128498077393, "learning_rate": 4.5985931933508757e-07, "loss": 0.4825, "step": 27633 }, { "epoch": 90.6032786885246, "grad_norm": 4.850333213806152, "learning_rate": 4.595410599307748e-07, "loss": 0.3942, "step": 27634 }, { "epoch": 90.60655737704919, "grad_norm": 5.423864364624023, "learning_rate": 4.592229081050559e-07, "loss": 0.4073, "step": 27635 }, { "epoch": 90.60983606557377, "grad_norm": 6.1520490646362305, "learning_rate": 4.5890486386151787e-07, "loss": 0.3419, "step": 27636 }, { "epoch": 90.61311475409836, "grad_norm": 4.1980156898498535, "learning_rate": 4.5858692720374907e-07, "loss": 0.6555, "step": 27637 }, { "epoch": 90.61639344262295, "grad_norm": 4.893970966339111, "learning_rate": 4.5826909813533326e-07, "loss": 0.2192, "step": 27638 }, { "epoch": 90.61967213114754, "grad_norm": 6.629705429077148, "learning_rate": 4.5795137665985424e-07, "loss": 0.3564, "step": 27639 }, { "epoch": 90.62295081967213, "grad_norm": 3.725003480911255, "learning_rate": 4.5763376278089353e-07, "loss": 0.3069, "step": 27640 }, { "epoch": 90.62622950819672, "grad_norm": 5.328713417053223, "learning_rate": 4.5731625650203504e-07, "loss": 0.2427, "step": 27641 }, { "epoch": 90.62950819672132, "grad_norm": 4.4285759925842285, "learning_rate": 4.5699885782685806e-07, "loss": 0.3014, "step": 27642 }, { "epoch": 90.6327868852459, "grad_norm": 3.999969244003296, "learning_rate": 4.5668156675894083e-07, "loss": 0.1844, "step": 27643 }, { "epoch": 90.6360655737705, "grad_norm": 5.0165605545043945, "learning_rate": 4.5636438330186053e-07, "loss": 0.3525, "step": 27644 }, { "epoch": 90.63934426229508, "grad_norm": 4.552615165710449, "learning_rate": 4.560473074591942e-07, "loss": 0.3748, "step": 27645 }, { "epoch": 90.64262295081967, "grad_norm": 7.176437854766846, "learning_rate": 4.5573033923451915e-07, "loss": 0.3978, "step": 27646 }, { "epoch": 90.64590163934426, "grad_norm": 4.353165626525879, "learning_rate": 4.5541347863140794e-07, "loss": 0.4558, "step": 27647 }, { "epoch": 90.64918032786885, "grad_norm": 6.4234209060668945, "learning_rate": 4.5509672565343443e-07, "loss": 0.4466, "step": 27648 }, { "epoch": 90.65245901639344, "grad_norm": 8.483325004577637, "learning_rate": 4.5478008030416686e-07, "loss": 0.3309, "step": 27649 }, { "epoch": 90.65573770491804, "grad_norm": 5.088569641113281, "learning_rate": 4.5446354258718017e-07, "loss": 0.3442, "step": 27650 }, { "epoch": 90.65901639344263, "grad_norm": 7.289646625518799, "learning_rate": 4.541471125060426e-07, "loss": 0.3952, "step": 27651 }, { "epoch": 90.66229508196722, "grad_norm": 4.373498916625977, "learning_rate": 4.538307900643213e-07, "loss": 0.1973, "step": 27652 }, { "epoch": 90.6655737704918, "grad_norm": 6.584164142608643, "learning_rate": 4.5351457526558116e-07, "loss": 0.4808, "step": 27653 }, { "epoch": 90.66885245901639, "grad_norm": 5.230125427246094, "learning_rate": 4.531984681133916e-07, "loss": 0.3908, "step": 27654 }, { "epoch": 90.67213114754098, "grad_norm": 4.527294635772705, "learning_rate": 4.5288246861131646e-07, "loss": 0.3627, "step": 27655 }, { "epoch": 90.67540983606557, "grad_norm": 4.354671955108643, "learning_rate": 4.525665767629173e-07, "loss": 0.2748, "step": 27656 }, { "epoch": 90.67868852459016, "grad_norm": 7.747053146362305, "learning_rate": 4.5225079257175677e-07, "loss": 0.2344, "step": 27657 }, { "epoch": 90.68196721311476, "grad_norm": 6.359946250915527, "learning_rate": 4.5193511604139426e-07, "loss": 0.3601, "step": 27658 }, { "epoch": 90.68524590163935, "grad_norm": 4.476337909698486, "learning_rate": 4.516195471753926e-07, "loss": 0.4345, "step": 27659 }, { "epoch": 90.68852459016394, "grad_norm": 4.299811840057373, "learning_rate": 4.513040859773088e-07, "loss": 0.4894, "step": 27660 }, { "epoch": 90.69180327868852, "grad_norm": 3.5877625942230225, "learning_rate": 4.50988732450699e-07, "loss": 0.36, "step": 27661 }, { "epoch": 90.69508196721311, "grad_norm": 8.005793571472168, "learning_rate": 4.5067348659911804e-07, "loss": 0.4966, "step": 27662 }, { "epoch": 90.6983606557377, "grad_norm": 5.877671718597412, "learning_rate": 4.5035834842612423e-07, "loss": 0.4045, "step": 27663 }, { "epoch": 90.70163934426229, "grad_norm": 4.657781600952148, "learning_rate": 4.5004331793526926e-07, "loss": 0.5214, "step": 27664 }, { "epoch": 90.70491803278688, "grad_norm": 5.283817768096924, "learning_rate": 4.4972839513010346e-07, "loss": 0.4049, "step": 27665 }, { "epoch": 90.70819672131148, "grad_norm": 5.171114921569824, "learning_rate": 4.494135800141808e-07, "loss": 0.3447, "step": 27666 }, { "epoch": 90.71147540983607, "grad_norm": 4.722936630249023, "learning_rate": 4.4909887259105165e-07, "loss": 0.2407, "step": 27667 }, { "epoch": 90.71475409836066, "grad_norm": 6.253888130187988, "learning_rate": 4.4878427286425997e-07, "loss": 0.2019, "step": 27668 }, { "epoch": 90.71803278688525, "grad_norm": 4.41489315032959, "learning_rate": 4.484697808373595e-07, "loss": 0.2752, "step": 27669 }, { "epoch": 90.72131147540983, "grad_norm": 5.423416614532471, "learning_rate": 4.4815539651389186e-07, "loss": 0.3518, "step": 27670 }, { "epoch": 90.72459016393442, "grad_norm": 4.294651508331299, "learning_rate": 4.478411198974031e-07, "loss": 0.2744, "step": 27671 }, { "epoch": 90.72786885245901, "grad_norm": 4.2064619064331055, "learning_rate": 4.475269509914382e-07, "loss": 0.4063, "step": 27672 }, { "epoch": 90.73114754098361, "grad_norm": 3.8813083171844482, "learning_rate": 4.472128897995398e-07, "loss": 0.3664, "step": 27673 }, { "epoch": 90.7344262295082, "grad_norm": 10.441813468933105, "learning_rate": 4.468989363252485e-07, "loss": 0.2598, "step": 27674 }, { "epoch": 90.73770491803279, "grad_norm": 5.376674175262451, "learning_rate": 4.465850905721025e-07, "loss": 0.3806, "step": 27675 }, { "epoch": 90.74098360655738, "grad_norm": 6.031412124633789, "learning_rate": 4.462713525436435e-07, "loss": 0.443, "step": 27676 }, { "epoch": 90.74426229508197, "grad_norm": 4.352997303009033, "learning_rate": 4.4595772224340974e-07, "loss": 0.4043, "step": 27677 }, { "epoch": 90.74754098360656, "grad_norm": 5.304678916931152, "learning_rate": 4.45644199674935e-07, "loss": 0.5872, "step": 27678 }, { "epoch": 90.75081967213114, "grad_norm": 4.919147491455078, "learning_rate": 4.453307848417554e-07, "loss": 0.3208, "step": 27679 }, { "epoch": 90.75409836065573, "grad_norm": 8.887829780578613, "learning_rate": 4.45017477747407e-07, "loss": 0.4695, "step": 27680 }, { "epoch": 90.75737704918033, "grad_norm": 4.0213704109191895, "learning_rate": 4.447042783954214e-07, "loss": 0.52, "step": 27681 }, { "epoch": 90.76065573770492, "grad_norm": 4.654116153717041, "learning_rate": 4.4439118678932913e-07, "loss": 0.3369, "step": 27682 }, { "epoch": 90.76393442622951, "grad_norm": 5.638422966003418, "learning_rate": 4.440782029326618e-07, "loss": 0.2302, "step": 27683 }, { "epoch": 90.7672131147541, "grad_norm": 6.237642288208008, "learning_rate": 4.437653268289477e-07, "loss": 0.2317, "step": 27684 }, { "epoch": 90.77049180327869, "grad_norm": 21.45613670349121, "learning_rate": 4.434525584817162e-07, "loss": 0.1716, "step": 27685 }, { "epoch": 90.77377049180328, "grad_norm": 4.901851654052734, "learning_rate": 4.431398978944945e-07, "loss": 0.3784, "step": 27686 }, { "epoch": 90.77704918032786, "grad_norm": 5.7632575035095215, "learning_rate": 4.428273450708065e-07, "loss": 0.4795, "step": 27687 }, { "epoch": 90.78032786885245, "grad_norm": 6.406145095825195, "learning_rate": 4.4251490001417594e-07, "loss": 0.2749, "step": 27688 }, { "epoch": 90.78360655737706, "grad_norm": 9.744625091552734, "learning_rate": 4.422025627281279e-07, "loss": 0.4086, "step": 27689 }, { "epoch": 90.78688524590164, "grad_norm": 4.899126052856445, "learning_rate": 4.418903332161839e-07, "loss": 0.3067, "step": 27690 }, { "epoch": 90.79016393442623, "grad_norm": 4.278788089752197, "learning_rate": 4.4157821148186453e-07, "loss": 0.502, "step": 27691 }, { "epoch": 90.79344262295082, "grad_norm": 4.108241081237793, "learning_rate": 4.4126619752868695e-07, "loss": 0.616, "step": 27692 }, { "epoch": 90.79672131147541, "grad_norm": 3.903275966644287, "learning_rate": 4.4095429136017386e-07, "loss": 0.4364, "step": 27693 }, { "epoch": 90.8, "grad_norm": 7.455316543579102, "learning_rate": 4.406424929798403e-07, "loss": 0.5393, "step": 27694 }, { "epoch": 90.80327868852459, "grad_norm": 4.96807861328125, "learning_rate": 4.403308023912012e-07, "loss": 0.3546, "step": 27695 }, { "epoch": 90.80655737704917, "grad_norm": 4.438145160675049, "learning_rate": 4.400192195977715e-07, "loss": 0.3722, "step": 27696 }, { "epoch": 90.80983606557378, "grad_norm": 6.269505500793457, "learning_rate": 4.39707744603064e-07, "loss": 0.2173, "step": 27697 }, { "epoch": 90.81311475409836, "grad_norm": 6.8094801902771, "learning_rate": 4.393963774105936e-07, "loss": 0.3157, "step": 27698 }, { "epoch": 90.81639344262295, "grad_norm": 5.4033098220825195, "learning_rate": 4.390851180238698e-07, "loss": 0.3658, "step": 27699 }, { "epoch": 90.81967213114754, "grad_norm": 4.4311981201171875, "learning_rate": 4.387739664464019e-07, "loss": 0.5162, "step": 27700 }, { "epoch": 90.82295081967213, "grad_norm": 5.355589389801025, "learning_rate": 4.3846292268169723e-07, "loss": 0.5169, "step": 27701 }, { "epoch": 90.82622950819672, "grad_norm": 6.200017929077148, "learning_rate": 4.381519867332651e-07, "loss": 0.3715, "step": 27702 }, { "epoch": 90.8295081967213, "grad_norm": 4.40180778503418, "learning_rate": 4.3784115860461273e-07, "loss": 0.3889, "step": 27703 }, { "epoch": 90.8327868852459, "grad_norm": 4.769243240356445, "learning_rate": 4.375304382992418e-07, "loss": 0.4254, "step": 27704 }, { "epoch": 90.8360655737705, "grad_norm": 7.799971580505371, "learning_rate": 4.3721982582065724e-07, "loss": 0.3607, "step": 27705 }, { "epoch": 90.83934426229509, "grad_norm": 4.047650337219238, "learning_rate": 4.3690932117236404e-07, "loss": 0.2906, "step": 27706 }, { "epoch": 90.84262295081967, "grad_norm": 4.86508846282959, "learning_rate": 4.3659892435786056e-07, "loss": 0.453, "step": 27707 }, { "epoch": 90.84590163934426, "grad_norm": 4.416780948638916, "learning_rate": 4.3628863538064726e-07, "loss": 0.3022, "step": 27708 }, { "epoch": 90.84918032786885, "grad_norm": 4.560404300689697, "learning_rate": 4.359784542442236e-07, "loss": 0.3147, "step": 27709 }, { "epoch": 90.85245901639344, "grad_norm": 4.455760955810547, "learning_rate": 4.356683809520856e-07, "loss": 0.2696, "step": 27710 }, { "epoch": 90.85573770491803, "grad_norm": 7.215854644775391, "learning_rate": 4.3535841550773285e-07, "loss": 0.3583, "step": 27711 }, { "epoch": 90.85901639344263, "grad_norm": 7.121883869171143, "learning_rate": 4.3504855791465797e-07, "loss": 0.2405, "step": 27712 }, { "epoch": 90.86229508196722, "grad_norm": 5.341561794281006, "learning_rate": 4.3473880817635703e-07, "loss": 0.4147, "step": 27713 }, { "epoch": 90.8655737704918, "grad_norm": 4.581457614898682, "learning_rate": 4.344291662963185e-07, "loss": 0.3624, "step": 27714 }, { "epoch": 90.8688524590164, "grad_norm": 5.065124988555908, "learning_rate": 4.341196322780394e-07, "loss": 0.406, "step": 27715 }, { "epoch": 90.87213114754098, "grad_norm": 5.088325500488281, "learning_rate": 4.33810206125006e-07, "loss": 0.3996, "step": 27716 }, { "epoch": 90.87540983606557, "grad_norm": 5.609043121337891, "learning_rate": 4.335008878407088e-07, "loss": 0.431, "step": 27717 }, { "epoch": 90.87868852459016, "grad_norm": 8.032692909240723, "learning_rate": 4.331916774286371e-07, "loss": 0.3284, "step": 27718 }, { "epoch": 90.88196721311475, "grad_norm": 5.182547569274902, "learning_rate": 4.328825748922749e-07, "loss": 0.3012, "step": 27719 }, { "epoch": 90.88524590163935, "grad_norm": 4.544591903686523, "learning_rate": 4.3257358023511054e-07, "loss": 0.2598, "step": 27720 }, { "epoch": 90.88852459016394, "grad_norm": 5.30848503112793, "learning_rate": 4.322646934606245e-07, "loss": 0.2964, "step": 27721 }, { "epoch": 90.89180327868853, "grad_norm": 6.724113464355469, "learning_rate": 4.319559145723029e-07, "loss": 0.5202, "step": 27722 }, { "epoch": 90.89508196721312, "grad_norm": 4.559887886047363, "learning_rate": 4.316472435736274e-07, "loss": 0.3398, "step": 27723 }, { "epoch": 90.8983606557377, "grad_norm": 5.870240688323975, "learning_rate": 4.3133868046807636e-07, "loss": 0.3508, "step": 27724 }, { "epoch": 90.90163934426229, "grad_norm": 5.665439605712891, "learning_rate": 4.310302252591325e-07, "loss": 0.4634, "step": 27725 }, { "epoch": 90.90491803278688, "grad_norm": 6.340301990509033, "learning_rate": 4.3072187795027087e-07, "loss": 0.553, "step": 27726 }, { "epoch": 90.90819672131147, "grad_norm": 4.903650283813477, "learning_rate": 4.3041363854496974e-07, "loss": 0.3235, "step": 27727 }, { "epoch": 90.91147540983607, "grad_norm": 22.37098503112793, "learning_rate": 4.3010550704670416e-07, "loss": 0.4578, "step": 27728 }, { "epoch": 90.91475409836066, "grad_norm": 5.027098655700684, "learning_rate": 4.297974834589502e-07, "loss": 0.5249, "step": 27729 }, { "epoch": 90.91803278688525, "grad_norm": 8.33530044555664, "learning_rate": 4.294895677851807e-07, "loss": 0.1499, "step": 27730 }, { "epoch": 90.92131147540984, "grad_norm": 4.325631141662598, "learning_rate": 4.2918176002886616e-07, "loss": 0.4266, "step": 27731 }, { "epoch": 90.92459016393443, "grad_norm": 5.4231462478637695, "learning_rate": 4.288740601934782e-07, "loss": 0.3258, "step": 27732 }, { "epoch": 90.92786885245901, "grad_norm": 4.167054653167725, "learning_rate": 4.285664682824875e-07, "loss": 0.3013, "step": 27733 }, { "epoch": 90.9311475409836, "grad_norm": 6.41246223449707, "learning_rate": 4.282589842993612e-07, "loss": 0.3979, "step": 27734 }, { "epoch": 90.93442622950819, "grad_norm": 4.485942840576172, "learning_rate": 4.2795160824756764e-07, "loss": 0.2554, "step": 27735 }, { "epoch": 90.9377049180328, "grad_norm": 5.46239709854126, "learning_rate": 4.2764434013057077e-07, "loss": 0.3468, "step": 27736 }, { "epoch": 90.94098360655738, "grad_norm": 4.457671165466309, "learning_rate": 4.2733717995183776e-07, "loss": 0.2655, "step": 27737 }, { "epoch": 90.94426229508197, "grad_norm": 9.808024406433105, "learning_rate": 4.2703012771483034e-07, "loss": 0.3168, "step": 27738 }, { "epoch": 90.94754098360656, "grad_norm": 4.8403449058532715, "learning_rate": 4.2672318342301233e-07, "loss": 0.4018, "step": 27739 }, { "epoch": 90.95081967213115, "grad_norm": 4.352163791656494, "learning_rate": 4.2641634707984324e-07, "loss": 0.4155, "step": 27740 }, { "epoch": 90.95409836065573, "grad_norm": 6.454381465911865, "learning_rate": 4.2610961868878473e-07, "loss": 0.387, "step": 27741 }, { "epoch": 90.95737704918032, "grad_norm": 4.142384052276611, "learning_rate": 4.258029982532952e-07, "loss": 0.2534, "step": 27742 }, { "epoch": 90.96065573770491, "grad_norm": 4.973153114318848, "learning_rate": 4.2549648577683064e-07, "loss": 0.1927, "step": 27743 }, { "epoch": 90.96393442622951, "grad_norm": 5.481851577758789, "learning_rate": 4.2519008126284845e-07, "loss": 0.3202, "step": 27744 }, { "epoch": 90.9672131147541, "grad_norm": 3.9402923583984375, "learning_rate": 4.248837847148024e-07, "loss": 0.2, "step": 27745 }, { "epoch": 90.97049180327869, "grad_norm": 4.198123455047607, "learning_rate": 4.245775961361487e-07, "loss": 0.1925, "step": 27746 }, { "epoch": 90.97377049180328, "grad_norm": 4.99208927154541, "learning_rate": 4.2427151553033783e-07, "loss": 0.4855, "step": 27747 }, { "epoch": 90.97704918032787, "grad_norm": 6.3466081619262695, "learning_rate": 4.239655429008227e-07, "loss": 0.4613, "step": 27748 }, { "epoch": 90.98032786885246, "grad_norm": 4.032442092895508, "learning_rate": 4.236596782510505e-07, "loss": 0.4461, "step": 27749 }, { "epoch": 90.98360655737704, "grad_norm": 5.642721176147461, "learning_rate": 4.233539215844751e-07, "loss": 0.2927, "step": 27750 }, { "epoch": 90.98688524590163, "grad_norm": 4.8782958984375, "learning_rate": 4.2304827290454045e-07, "loss": 0.3402, "step": 27751 }, { "epoch": 90.99016393442623, "grad_norm": 5.28685188293457, "learning_rate": 4.2274273221469373e-07, "loss": 0.2082, "step": 27752 }, { "epoch": 90.99344262295082, "grad_norm": 5.5899577140808105, "learning_rate": 4.22437299518379e-07, "loss": 0.479, "step": 27753 }, { "epoch": 90.99672131147541, "grad_norm": 6.308412075042725, "learning_rate": 4.2213197481904443e-07, "loss": 0.423, "step": 27754 }, { "epoch": 91.0, "grad_norm": 3.8124499320983887, "learning_rate": 4.218267581201296e-07, "loss": 0.4408, "step": 27755 }, { "epoch": 91.00327868852459, "grad_norm": 8.86684513092041, "learning_rate": 4.215216494250773e-07, "loss": 0.4202, "step": 27756 }, { "epoch": 91.00655737704918, "grad_norm": 4.480006694793701, "learning_rate": 4.21216648737327e-07, "loss": 0.2229, "step": 27757 }, { "epoch": 91.00983606557377, "grad_norm": 4.455845355987549, "learning_rate": 4.209117560603171e-07, "loss": 0.196, "step": 27758 }, { "epoch": 91.01311475409837, "grad_norm": 4.958367824554443, "learning_rate": 4.206069713974881e-07, "loss": 0.3049, "step": 27759 }, { "epoch": 91.01639344262296, "grad_norm": 3.8844335079193115, "learning_rate": 4.2030229475227615e-07, "loss": 0.5296, "step": 27760 }, { "epoch": 91.01967213114754, "grad_norm": 6.388569355010986, "learning_rate": 4.199977261281163e-07, "loss": 0.3229, "step": 27761 }, { "epoch": 91.02295081967213, "grad_norm": 16.66637420654297, "learning_rate": 4.1969326552844136e-07, "loss": 0.401, "step": 27762 }, { "epoch": 91.02622950819672, "grad_norm": 5.363528728485107, "learning_rate": 4.1938891295668636e-07, "loss": 0.387, "step": 27763 }, { "epoch": 91.02950819672131, "grad_norm": 4.42648983001709, "learning_rate": 4.1908466841628303e-07, "loss": 0.4083, "step": 27764 }, { "epoch": 91.0327868852459, "grad_norm": 6.943442344665527, "learning_rate": 4.187805319106619e-07, "loss": 0.5205, "step": 27765 }, { "epoch": 91.03606557377049, "grad_norm": 5.5027265548706055, "learning_rate": 4.1847650344325143e-07, "loss": 0.3755, "step": 27766 }, { "epoch": 91.03934426229509, "grad_norm": 4.566113471984863, "learning_rate": 4.181725830174821e-07, "loss": 0.3806, "step": 27767 }, { "epoch": 91.04262295081968, "grad_norm": 5.181573867797852, "learning_rate": 4.17868770636779e-07, "loss": 0.5143, "step": 27768 }, { "epoch": 91.04590163934427, "grad_norm": 4.919901371002197, "learning_rate": 4.1756506630456827e-07, "loss": 0.2403, "step": 27769 }, { "epoch": 91.04918032786885, "grad_norm": 5.441098213195801, "learning_rate": 4.1726147002427385e-07, "loss": 0.2376, "step": 27770 }, { "epoch": 91.05245901639344, "grad_norm": 4.637106418609619, "learning_rate": 4.169579817993208e-07, "loss": 0.4963, "step": 27771 }, { "epoch": 91.05573770491803, "grad_norm": 5.2640485763549805, "learning_rate": 4.1665460163312963e-07, "loss": 0.4122, "step": 27772 }, { "epoch": 91.05901639344262, "grad_norm": 5.253617763519287, "learning_rate": 4.1635132952912216e-07, "loss": 0.4525, "step": 27773 }, { "epoch": 91.0622950819672, "grad_norm": 5.026594161987305, "learning_rate": 4.1604816549071783e-07, "loss": 0.2243, "step": 27774 }, { "epoch": 91.06557377049181, "grad_norm": 3.953904628753662, "learning_rate": 4.1574510952133607e-07, "loss": 0.1751, "step": 27775 }, { "epoch": 91.0688524590164, "grad_norm": 4.129374027252197, "learning_rate": 4.15442161624392e-07, "loss": 0.4184, "step": 27776 }, { "epoch": 91.07213114754099, "grad_norm": 5.448979377746582, "learning_rate": 4.1513932180330505e-07, "loss": 0.2879, "step": 27777 }, { "epoch": 91.07540983606557, "grad_norm": 7.652943134307861, "learning_rate": 4.14836590061487e-07, "loss": 0.3888, "step": 27778 }, { "epoch": 91.07868852459016, "grad_norm": 4.74729061126709, "learning_rate": 4.145339664023507e-07, "loss": 0.3979, "step": 27779 }, { "epoch": 91.08196721311475, "grad_norm": 7.17111873626709, "learning_rate": 4.1423145082931216e-07, "loss": 0.5569, "step": 27780 }, { "epoch": 91.08524590163934, "grad_norm": 6.917560577392578, "learning_rate": 4.13929043345781e-07, "loss": 0.4001, "step": 27781 }, { "epoch": 91.08852459016393, "grad_norm": 4.522705554962158, "learning_rate": 4.136267439551667e-07, "loss": 0.2291, "step": 27782 }, { "epoch": 91.09180327868853, "grad_norm": 4.517162322998047, "learning_rate": 4.1332455266087866e-07, "loss": 0.3177, "step": 27783 }, { "epoch": 91.09508196721312, "grad_norm": 5.417970180511475, "learning_rate": 4.1302246946632206e-07, "loss": 0.3889, "step": 27784 }, { "epoch": 91.09836065573771, "grad_norm": 4.954452037811279, "learning_rate": 4.1272049437490636e-07, "loss": 0.3538, "step": 27785 }, { "epoch": 91.1016393442623, "grad_norm": 14.062862396240234, "learning_rate": 4.1241862739003546e-07, "loss": 0.3862, "step": 27786 }, { "epoch": 91.10491803278688, "grad_norm": 4.801652431488037, "learning_rate": 4.1211686851511333e-07, "loss": 0.1469, "step": 27787 }, { "epoch": 91.10819672131147, "grad_norm": 5.252610683441162, "learning_rate": 4.1181521775354063e-07, "loss": 0.3425, "step": 27788 }, { "epoch": 91.11147540983606, "grad_norm": 4.06960916519165, "learning_rate": 4.115136751087223e-07, "loss": 0.3946, "step": 27789 }, { "epoch": 91.11475409836065, "grad_norm": 24.809181213378906, "learning_rate": 4.1121224058405687e-07, "loss": 0.3133, "step": 27790 }, { "epoch": 91.11803278688525, "grad_norm": 4.762194633483887, "learning_rate": 4.109109141829437e-07, "loss": 0.4348, "step": 27791 }, { "epoch": 91.12131147540984, "grad_norm": 3.6627578735351562, "learning_rate": 4.10609695908778e-07, "loss": 0.5369, "step": 27792 }, { "epoch": 91.12459016393443, "grad_norm": 4.623040676116943, "learning_rate": 4.1030858576496025e-07, "loss": 0.5775, "step": 27793 }, { "epoch": 91.12786885245902, "grad_norm": 5.483898162841797, "learning_rate": 4.1000758375488336e-07, "loss": 0.3561, "step": 27794 }, { "epoch": 91.1311475409836, "grad_norm": 5.290987014770508, "learning_rate": 4.097066898819424e-07, "loss": 0.4125, "step": 27795 }, { "epoch": 91.1344262295082, "grad_norm": 4.30919885635376, "learning_rate": 4.094059041495302e-07, "loss": 0.3057, "step": 27796 }, { "epoch": 91.13770491803278, "grad_norm": 8.324377059936523, "learning_rate": 4.091052265610362e-07, "loss": 0.5633, "step": 27797 }, { "epoch": 91.14098360655737, "grad_norm": 5.04221773147583, "learning_rate": 4.088046571198545e-07, "loss": 0.2507, "step": 27798 }, { "epoch": 91.14426229508197, "grad_norm": 5.408464431762695, "learning_rate": 4.0850419582937227e-07, "loss": 0.2403, "step": 27799 }, { "epoch": 91.14754098360656, "grad_norm": 4.692224025726318, "learning_rate": 4.0820384269297796e-07, "loss": 0.272, "step": 27800 }, { "epoch": 91.15081967213115, "grad_norm": 4.821423053741455, "learning_rate": 4.0790359771405774e-07, "loss": 0.2685, "step": 27801 }, { "epoch": 91.15409836065574, "grad_norm": 5.9327921867370605, "learning_rate": 4.076034608959978e-07, "loss": 0.328, "step": 27802 }, { "epoch": 91.15737704918033, "grad_norm": 6.473835468292236, "learning_rate": 4.0730343224218314e-07, "loss": 0.3349, "step": 27803 }, { "epoch": 91.16065573770491, "grad_norm": 4.856049537658691, "learning_rate": 4.070035117559967e-07, "loss": 0.6374, "step": 27804 }, { "epoch": 91.1639344262295, "grad_norm": 5.733102798461914, "learning_rate": 4.06703699440818e-07, "loss": 0.3063, "step": 27805 }, { "epoch": 91.1672131147541, "grad_norm": 5.557199478149414, "learning_rate": 4.0640399530003095e-07, "loss": 0.4439, "step": 27806 }, { "epoch": 91.1704918032787, "grad_norm": 4.083489418029785, "learning_rate": 4.0610439933701396e-07, "loss": 0.3012, "step": 27807 }, { "epoch": 91.17377049180328, "grad_norm": 3.376344919204712, "learning_rate": 4.0580491155514436e-07, "loss": 0.1747, "step": 27808 }, { "epoch": 91.17704918032787, "grad_norm": 9.191031455993652, "learning_rate": 4.0550553195780053e-07, "loss": 0.2981, "step": 27809 }, { "epoch": 91.18032786885246, "grad_norm": 3.713301658630371, "learning_rate": 4.052062605483564e-07, "loss": 0.3131, "step": 27810 }, { "epoch": 91.18360655737705, "grad_norm": 4.417366027832031, "learning_rate": 4.049070973301883e-07, "loss": 0.3301, "step": 27811 }, { "epoch": 91.18688524590164, "grad_norm": 5.322583198547363, "learning_rate": 4.0460804230667004e-07, "loss": 0.4532, "step": 27812 }, { "epoch": 91.19016393442622, "grad_norm": 4.38964319229126, "learning_rate": 4.0430909548117236e-07, "loss": 0.333, "step": 27813 }, { "epoch": 91.19344262295083, "grad_norm": 5.633648872375488, "learning_rate": 4.0401025685706473e-07, "loss": 0.5143, "step": 27814 }, { "epoch": 91.19672131147541, "grad_norm": 5.130037784576416, "learning_rate": 4.0371152643772003e-07, "loss": 0.4205, "step": 27815 }, { "epoch": 91.2, "grad_norm": 4.064992904663086, "learning_rate": 4.034129042265067e-07, "loss": 0.352, "step": 27816 }, { "epoch": 91.20327868852459, "grad_norm": 4.089390754699707, "learning_rate": 4.031143902267898e-07, "loss": 0.517, "step": 27817 }, { "epoch": 91.20655737704918, "grad_norm": 4.526196002960205, "learning_rate": 4.0281598444193546e-07, "loss": 0.2761, "step": 27818 }, { "epoch": 91.20983606557377, "grad_norm": 7.45630407333374, "learning_rate": 4.0251768687531115e-07, "loss": 0.2349, "step": 27819 }, { "epoch": 91.21311475409836, "grad_norm": 5.038726329803467, "learning_rate": 4.0221949753027845e-07, "loss": 0.3389, "step": 27820 }, { "epoch": 91.21639344262294, "grad_norm": 5.359532833099365, "learning_rate": 4.019214164102003e-07, "loss": 0.3804, "step": 27821 }, { "epoch": 91.21967213114755, "grad_norm": 4.598383903503418, "learning_rate": 4.016234435184374e-07, "loss": 0.2448, "step": 27822 }, { "epoch": 91.22295081967214, "grad_norm": 4.673665523529053, "learning_rate": 4.013255788583492e-07, "loss": 0.3962, "step": 27823 }, { "epoch": 91.22622950819672, "grad_norm": 8.572271347045898, "learning_rate": 4.010278224332953e-07, "loss": 0.2796, "step": 27824 }, { "epoch": 91.22950819672131, "grad_norm": 6.817227840423584, "learning_rate": 4.007301742466341e-07, "loss": 0.5957, "step": 27825 }, { "epoch": 91.2327868852459, "grad_norm": 4.922850131988525, "learning_rate": 4.004326343017195e-07, "loss": 0.3266, "step": 27826 }, { "epoch": 91.23606557377049, "grad_norm": 3.7640771865844727, "learning_rate": 4.0013520260190897e-07, "loss": 0.3545, "step": 27827 }, { "epoch": 91.23934426229508, "grad_norm": 4.518048286437988, "learning_rate": 3.998378791505564e-07, "loss": 0.3916, "step": 27828 }, { "epoch": 91.24262295081967, "grad_norm": 4.683071613311768, "learning_rate": 3.9954066395101134e-07, "loss": 0.4786, "step": 27829 }, { "epoch": 91.24590163934427, "grad_norm": 5.116623401641846, "learning_rate": 3.992435570066278e-07, "loss": 0.3221, "step": 27830 }, { "epoch": 91.24918032786886, "grad_norm": 4.822528839111328, "learning_rate": 3.9894655832075636e-07, "loss": 0.3815, "step": 27831 }, { "epoch": 91.25245901639344, "grad_norm": 7.073688507080078, "learning_rate": 3.986496678967444e-07, "loss": 0.2742, "step": 27832 }, { "epoch": 91.25573770491803, "grad_norm": 4.296003341674805, "learning_rate": 3.983528857379404e-07, "loss": 0.3161, "step": 27833 }, { "epoch": 91.25901639344262, "grad_norm": 5.097870349884033, "learning_rate": 3.980562118476916e-07, "loss": 0.5629, "step": 27834 }, { "epoch": 91.26229508196721, "grad_norm": 20.87700080871582, "learning_rate": 3.9775964622934203e-07, "loss": 0.2411, "step": 27835 }, { "epoch": 91.2655737704918, "grad_norm": 4.520259380340576, "learning_rate": 3.974631888862357e-07, "loss": 0.2606, "step": 27836 }, { "epoch": 91.26885245901639, "grad_norm": 5.184736728668213, "learning_rate": 3.9716683982171653e-07, "loss": 0.2887, "step": 27837 }, { "epoch": 91.27213114754099, "grad_norm": 4.710405349731445, "learning_rate": 3.968705990391253e-07, "loss": 0.3685, "step": 27838 }, { "epoch": 91.27540983606558, "grad_norm": 4.9036760330200195, "learning_rate": 3.9657446654180363e-07, "loss": 0.2949, "step": 27839 }, { "epoch": 91.27868852459017, "grad_norm": 4.044722080230713, "learning_rate": 3.96278442333089e-07, "loss": 0.3372, "step": 27840 }, { "epoch": 91.28196721311475, "grad_norm": 5.420104026794434, "learning_rate": 3.9598252641632086e-07, "loss": 0.2902, "step": 27841 }, { "epoch": 91.28524590163934, "grad_norm": 5.2062249183654785, "learning_rate": 3.9568671879483547e-07, "loss": 0.3335, "step": 27842 }, { "epoch": 91.28852459016393, "grad_norm": 5.950870037078857, "learning_rate": 3.95391019471969e-07, "loss": 0.4143, "step": 27843 }, { "epoch": 91.29180327868852, "grad_norm": 5.227590084075928, "learning_rate": 3.950954284510533e-07, "loss": 0.2459, "step": 27844 }, { "epoch": 91.29508196721312, "grad_norm": 4.0207438468933105, "learning_rate": 3.947999457354246e-07, "loss": 0.4477, "step": 27845 }, { "epoch": 91.29836065573771, "grad_norm": 5.290316104888916, "learning_rate": 3.9450457132841236e-07, "loss": 0.3906, "step": 27846 }, { "epoch": 91.3016393442623, "grad_norm": 5.8372344970703125, "learning_rate": 3.9420930523334953e-07, "loss": 0.6073, "step": 27847 }, { "epoch": 91.30491803278689, "grad_norm": 4.73098611831665, "learning_rate": 3.939141474535646e-07, "loss": 0.3721, "step": 27848 }, { "epoch": 91.30819672131148, "grad_norm": 5.086411476135254, "learning_rate": 3.9361909799238264e-07, "loss": 0.4768, "step": 27849 }, { "epoch": 91.31147540983606, "grad_norm": 5.696443557739258, "learning_rate": 3.9332415685313653e-07, "loss": 0.5829, "step": 27850 }, { "epoch": 91.31475409836065, "grad_norm": 4.305405139923096, "learning_rate": 3.9302932403914807e-07, "loss": 0.2582, "step": 27851 }, { "epoch": 91.31803278688524, "grad_norm": 4.199689865112305, "learning_rate": 3.927345995537424e-07, "loss": 0.317, "step": 27852 }, { "epoch": 91.32131147540984, "grad_norm": 5.136808395385742, "learning_rate": 3.9243998340024237e-07, "loss": 0.643, "step": 27853 }, { "epoch": 91.32459016393443, "grad_norm": 7.097021102905273, "learning_rate": 3.92145475581972e-07, "loss": 0.3265, "step": 27854 }, { "epoch": 91.32786885245902, "grad_norm": 34.86250305175781, "learning_rate": 3.9185107610225095e-07, "loss": 0.219, "step": 27855 }, { "epoch": 91.33114754098361, "grad_norm": 5.180882453918457, "learning_rate": 3.9155678496439977e-07, "loss": 0.529, "step": 27856 }, { "epoch": 91.3344262295082, "grad_norm": 4.549999237060547, "learning_rate": 3.9126260217173475e-07, "loss": 0.4422, "step": 27857 }, { "epoch": 91.33770491803278, "grad_norm": 7.181354522705078, "learning_rate": 3.909685277275743e-07, "loss": 0.3179, "step": 27858 }, { "epoch": 91.34098360655737, "grad_norm": 5.670402526855469, "learning_rate": 3.9067456163523587e-07, "loss": 0.2518, "step": 27859 }, { "epoch": 91.34426229508196, "grad_norm": 5.042319297790527, "learning_rate": 3.903807038980323e-07, "loss": 0.4615, "step": 27860 }, { "epoch": 91.34754098360656, "grad_norm": 5.532441139221191, "learning_rate": 3.900869545192787e-07, "loss": 0.3855, "step": 27861 }, { "epoch": 91.35081967213115, "grad_norm": 6.571128845214844, "learning_rate": 3.8979331350228466e-07, "loss": 0.4161, "step": 27862 }, { "epoch": 91.35409836065574, "grad_norm": 5.28865385055542, "learning_rate": 3.894997808503642e-07, "loss": 0.3816, "step": 27863 }, { "epoch": 91.35737704918033, "grad_norm": 6.336545467376709, "learning_rate": 3.892063565668269e-07, "loss": 0.2959, "step": 27864 }, { "epoch": 91.36065573770492, "grad_norm": 3.3061716556549072, "learning_rate": 3.8891304065498016e-07, "loss": 0.2392, "step": 27865 }, { "epoch": 91.3639344262295, "grad_norm": 4.557370662689209, "learning_rate": 3.886198331181301e-07, "loss": 0.4583, "step": 27866 }, { "epoch": 91.3672131147541, "grad_norm": 4.744729518890381, "learning_rate": 3.883267339595864e-07, "loss": 0.1464, "step": 27867 }, { "epoch": 91.37049180327868, "grad_norm": 5.0456647872924805, "learning_rate": 3.8803374318265195e-07, "loss": 0.3173, "step": 27868 }, { "epoch": 91.37377049180328, "grad_norm": 4.917658805847168, "learning_rate": 3.877408607906319e-07, "loss": 0.4198, "step": 27869 }, { "epoch": 91.37704918032787, "grad_norm": 5.148768901824951, "learning_rate": 3.874480867868269e-07, "loss": 0.2523, "step": 27870 }, { "epoch": 91.38032786885246, "grad_norm": 4.085678577423096, "learning_rate": 3.8715542117453987e-07, "loss": 0.3734, "step": 27871 }, { "epoch": 91.38360655737705, "grad_norm": 4.481315612792969, "learning_rate": 3.8686286395706927e-07, "loss": 0.2887, "step": 27872 }, { "epoch": 91.38688524590164, "grad_norm": 4.245935440063477, "learning_rate": 3.86570415137717e-07, "loss": 0.2132, "step": 27873 }, { "epoch": 91.39016393442623, "grad_norm": 4.5700883865356445, "learning_rate": 3.86278074719777e-07, "loss": 0.3394, "step": 27874 }, { "epoch": 91.39344262295081, "grad_norm": 5.47423791885376, "learning_rate": 3.859858427065477e-07, "loss": 0.2854, "step": 27875 }, { "epoch": 91.3967213114754, "grad_norm": 4.214055061340332, "learning_rate": 3.8569371910132436e-07, "loss": 0.1545, "step": 27876 }, { "epoch": 91.4, "grad_norm": 5.622100353240967, "learning_rate": 3.8540170390740097e-07, "loss": 0.3799, "step": 27877 }, { "epoch": 91.4032786885246, "grad_norm": 4.816583633422852, "learning_rate": 3.851097971280693e-07, "loss": 0.5918, "step": 27878 }, { "epoch": 91.40655737704918, "grad_norm": 10.265629768371582, "learning_rate": 3.848179987666223e-07, "loss": 0.2377, "step": 27879 }, { "epoch": 91.40983606557377, "grad_norm": 3.903118848800659, "learning_rate": 3.845263088263496e-07, "loss": 0.3147, "step": 27880 }, { "epoch": 91.41311475409836, "grad_norm": 22.179462432861328, "learning_rate": 3.842347273105396e-07, "loss": 0.2249, "step": 27881 }, { "epoch": 91.41639344262295, "grad_norm": 8.824228286743164, "learning_rate": 3.839432542224819e-07, "loss": 0.3813, "step": 27882 }, { "epoch": 91.41967213114754, "grad_norm": 6.323551654815674, "learning_rate": 3.836518895654617e-07, "loss": 0.2113, "step": 27883 }, { "epoch": 91.42295081967212, "grad_norm": 6.398455619812012, "learning_rate": 3.833606333427664e-07, "loss": 0.473, "step": 27884 }, { "epoch": 91.42622950819673, "grad_norm": 4.884579181671143, "learning_rate": 3.8306948555767663e-07, "loss": 0.431, "step": 27885 }, { "epoch": 91.42950819672132, "grad_norm": 5.519304275512695, "learning_rate": 3.827784462134787e-07, "loss": 0.5136, "step": 27886 }, { "epoch": 91.4327868852459, "grad_norm": 4.975332260131836, "learning_rate": 3.824875153134522e-07, "loss": 0.3751, "step": 27887 }, { "epoch": 91.43606557377049, "grad_norm": 5.002830982208252, "learning_rate": 3.821966928608789e-07, "loss": 0.2365, "step": 27888 }, { "epoch": 91.43934426229508, "grad_norm": 4.371312141418457, "learning_rate": 3.8190597885903845e-07, "loss": 0.3755, "step": 27889 }, { "epoch": 91.44262295081967, "grad_norm": 6.495502948760986, "learning_rate": 3.816153733112093e-07, "loss": 0.3236, "step": 27890 }, { "epoch": 91.44590163934426, "grad_norm": 4.64435338973999, "learning_rate": 3.8132487622066673e-07, "loss": 0.3586, "step": 27891 }, { "epoch": 91.44918032786886, "grad_norm": 9.335698127746582, "learning_rate": 3.810344875906857e-07, "loss": 0.2474, "step": 27892 }, { "epoch": 91.45245901639345, "grad_norm": 4.87918758392334, "learning_rate": 3.807442074245427e-07, "loss": 0.2843, "step": 27893 }, { "epoch": 91.45573770491804, "grad_norm": 5.109536647796631, "learning_rate": 3.8045403572551153e-07, "loss": 0.3685, "step": 27894 }, { "epoch": 91.45901639344262, "grad_norm": 3.759688138961792, "learning_rate": 3.80163972496862e-07, "loss": 0.279, "step": 27895 }, { "epoch": 91.46229508196721, "grad_norm": 4.8278913497924805, "learning_rate": 3.7987401774186585e-07, "loss": 0.4258, "step": 27896 }, { "epoch": 91.4655737704918, "grad_norm": 4.2758307456970215, "learning_rate": 3.795841714637927e-07, "loss": 0.2344, "step": 27897 }, { "epoch": 91.46885245901639, "grad_norm": 3.507253885269165, "learning_rate": 3.7929443366591104e-07, "loss": 0.2893, "step": 27898 }, { "epoch": 91.47213114754098, "grad_norm": 6.299249649047852, "learning_rate": 3.790048043514871e-07, "loss": 0.237, "step": 27899 }, { "epoch": 91.47540983606558, "grad_norm": 5.2575507164001465, "learning_rate": 3.787152835237884e-07, "loss": 0.1739, "step": 27900 }, { "epoch": 91.47868852459017, "grad_norm": 25.128847122192383, "learning_rate": 3.7842587118607666e-07, "loss": 0.5172, "step": 27901 }, { "epoch": 91.48196721311476, "grad_norm": 7.684408664703369, "learning_rate": 3.781365673416182e-07, "loss": 0.3302, "step": 27902 }, { "epoch": 91.48524590163935, "grad_norm": 6.0111541748046875, "learning_rate": 3.7784737199367373e-07, "loss": 0.468, "step": 27903 }, { "epoch": 91.48852459016393, "grad_norm": 6.197929859161377, "learning_rate": 3.775582851455062e-07, "loss": 0.4007, "step": 27904 }, { "epoch": 91.49180327868852, "grad_norm": 8.620804786682129, "learning_rate": 3.772693068003719e-07, "loss": 0.294, "step": 27905 }, { "epoch": 91.49508196721311, "grad_norm": 8.514121055603027, "learning_rate": 3.7698043696153155e-07, "loss": 0.2808, "step": 27906 }, { "epoch": 91.4983606557377, "grad_norm": 5.02213716506958, "learning_rate": 3.766916756322436e-07, "loss": 0.3036, "step": 27907 }, { "epoch": 91.5016393442623, "grad_norm": 5.435144901275635, "learning_rate": 3.7640302281576225e-07, "loss": 0.5213, "step": 27908 }, { "epoch": 91.50491803278689, "grad_norm": 19.375831604003906, "learning_rate": 3.7611447851534145e-07, "loss": 0.5086, "step": 27909 }, { "epoch": 91.50819672131148, "grad_norm": 6.984311580657959, "learning_rate": 3.7582604273423753e-07, "loss": 0.3623, "step": 27910 }, { "epoch": 91.51147540983607, "grad_norm": 5.584383487701416, "learning_rate": 3.755377154757012e-07, "loss": 0.2938, "step": 27911 }, { "epoch": 91.51475409836065, "grad_norm": 5.694177627563477, "learning_rate": 3.7524949674298427e-07, "loss": 0.4217, "step": 27912 }, { "epoch": 91.51803278688524, "grad_norm": 6.997763633728027, "learning_rate": 3.7496138653933755e-07, "loss": 0.3541, "step": 27913 }, { "epoch": 91.52131147540983, "grad_norm": 5.0965657234191895, "learning_rate": 3.7467338486800617e-07, "loss": 0.4111, "step": 27914 }, { "epoch": 91.52459016393442, "grad_norm": 4.921359539031982, "learning_rate": 3.7438549173224204e-07, "loss": 0.1537, "step": 27915 }, { "epoch": 91.52786885245902, "grad_norm": 3.8048818111419678, "learning_rate": 3.7409770713528915e-07, "loss": 0.2869, "step": 27916 }, { "epoch": 91.53114754098361, "grad_norm": 5.240363597869873, "learning_rate": 3.7381003108039385e-07, "loss": 0.4325, "step": 27917 }, { "epoch": 91.5344262295082, "grad_norm": 4.172729969024658, "learning_rate": 3.735224635707968e-07, "loss": 0.3105, "step": 27918 }, { "epoch": 91.53770491803279, "grad_norm": 6.037522792816162, "learning_rate": 3.7323500460974546e-07, "loss": 0.5346, "step": 27919 }, { "epoch": 91.54098360655738, "grad_norm": 5.363565921783447, "learning_rate": 3.729476542004784e-07, "loss": 0.4785, "step": 27920 }, { "epoch": 91.54426229508196, "grad_norm": 5.898758888244629, "learning_rate": 3.7266041234623627e-07, "loss": 0.529, "step": 27921 }, { "epoch": 91.54754098360655, "grad_norm": 4.669853687286377, "learning_rate": 3.7237327905025545e-07, "loss": 0.2941, "step": 27922 }, { "epoch": 91.55081967213114, "grad_norm": 5.259763717651367, "learning_rate": 3.720862543157788e-07, "loss": 0.213, "step": 27923 }, { "epoch": 91.55409836065574, "grad_norm": 9.664094924926758, "learning_rate": 3.717993381460394e-07, "loss": 0.3802, "step": 27924 }, { "epoch": 91.55737704918033, "grad_norm": 5.078062057495117, "learning_rate": 3.715125305442735e-07, "loss": 0.525, "step": 27925 }, { "epoch": 91.56065573770492, "grad_norm": 10.017766952514648, "learning_rate": 3.7122583151371515e-07, "loss": 0.6586, "step": 27926 }, { "epoch": 91.56393442622951, "grad_norm": 3.95459246635437, "learning_rate": 3.709392410575963e-07, "loss": 0.1131, "step": 27927 }, { "epoch": 91.5672131147541, "grad_norm": 4.150200366973877, "learning_rate": 3.706527591791487e-07, "loss": 0.308, "step": 27928 }, { "epoch": 91.57049180327868, "grad_norm": 8.132591247558594, "learning_rate": 3.7036638588160424e-07, "loss": 0.3574, "step": 27929 }, { "epoch": 91.57377049180327, "grad_norm": 5.825868606567383, "learning_rate": 3.7008012116819147e-07, "loss": 0.411, "step": 27930 }, { "epoch": 91.57704918032788, "grad_norm": 5.06174373626709, "learning_rate": 3.6979396504213673e-07, "loss": 0.2779, "step": 27931 }, { "epoch": 91.58032786885246, "grad_norm": 3.73734188079834, "learning_rate": 3.6950791750666847e-07, "loss": 0.1593, "step": 27932 }, { "epoch": 91.58360655737705, "grad_norm": 4.167530059814453, "learning_rate": 3.6922197856501195e-07, "loss": 0.3276, "step": 27933 }, { "epoch": 91.58688524590164, "grad_norm": 5.731517314910889, "learning_rate": 3.6893614822039016e-07, "loss": 0.2802, "step": 27934 }, { "epoch": 91.59016393442623, "grad_norm": 6.842041492462158, "learning_rate": 3.6865042647602825e-07, "loss": 0.239, "step": 27935 }, { "epoch": 91.59344262295082, "grad_norm": 6.028055191040039, "learning_rate": 3.6836481333514694e-07, "loss": 0.3303, "step": 27936 }, { "epoch": 91.5967213114754, "grad_norm": 4.719414234161377, "learning_rate": 3.6807930880096487e-07, "loss": 0.0939, "step": 27937 }, { "epoch": 91.6, "grad_norm": 5.047988414764404, "learning_rate": 3.67793912876705e-07, "loss": 0.3391, "step": 27938 }, { "epoch": 91.6032786885246, "grad_norm": 5.15741491317749, "learning_rate": 3.675086255655835e-07, "loss": 0.4978, "step": 27939 }, { "epoch": 91.60655737704919, "grad_norm": 5.231904983520508, "learning_rate": 3.672234468708169e-07, "loss": 0.3813, "step": 27940 }, { "epoch": 91.60983606557377, "grad_norm": 4.039738178253174, "learning_rate": 3.669383767956214e-07, "loss": 0.4004, "step": 27941 }, { "epoch": 91.61311475409836, "grad_norm": 6.131674766540527, "learning_rate": 3.666534153432133e-07, "loss": 0.5147, "step": 27942 }, { "epoch": 91.61639344262295, "grad_norm": 5.683681488037109, "learning_rate": 3.6636856251680343e-07, "loss": 0.4809, "step": 27943 }, { "epoch": 91.61967213114754, "grad_norm": 10.762539863586426, "learning_rate": 3.660838183196025e-07, "loss": 0.2652, "step": 27944 }, { "epoch": 91.62295081967213, "grad_norm": 4.985958099365234, "learning_rate": 3.6579918275482574e-07, "loss": 0.3728, "step": 27945 }, { "epoch": 91.62622950819672, "grad_norm": 4.1738200187683105, "learning_rate": 3.6551465582567945e-07, "loss": 0.3275, "step": 27946 }, { "epoch": 91.62950819672132, "grad_norm": 4.939978122711182, "learning_rate": 3.652302375353722e-07, "loss": 0.4785, "step": 27947 }, { "epoch": 91.6327868852459, "grad_norm": 5.546266078948975, "learning_rate": 3.6494592788711147e-07, "loss": 0.4797, "step": 27948 }, { "epoch": 91.6360655737705, "grad_norm": 6.718012809753418, "learning_rate": 3.646617268841046e-07, "loss": 0.5933, "step": 27949 }, { "epoch": 91.63934426229508, "grad_norm": 4.719939708709717, "learning_rate": 3.6437763452955465e-07, "loss": 0.2491, "step": 27950 }, { "epoch": 91.64262295081967, "grad_norm": 4.48192024230957, "learning_rate": 3.640936508266657e-07, "loss": 0.3304, "step": 27951 }, { "epoch": 91.64590163934426, "grad_norm": 5.5083160400390625, "learning_rate": 3.6380977577863965e-07, "loss": 0.4978, "step": 27952 }, { "epoch": 91.64918032786885, "grad_norm": 3.890371561050415, "learning_rate": 3.635260093886761e-07, "loss": 0.3869, "step": 27953 }, { "epoch": 91.65245901639344, "grad_norm": 4.36208438873291, "learning_rate": 3.63242351659977e-07, "loss": 0.4155, "step": 27954 }, { "epoch": 91.65573770491804, "grad_norm": 4.429922580718994, "learning_rate": 3.629588025957409e-07, "loss": 0.2542, "step": 27955 }, { "epoch": 91.65901639344263, "grad_norm": 5.279900074005127, "learning_rate": 3.62675362199163e-07, "loss": 0.2979, "step": 27956 }, { "epoch": 91.66229508196722, "grad_norm": 9.730707168579102, "learning_rate": 3.623920304734407e-07, "loss": 0.4077, "step": 27957 }, { "epoch": 91.6655737704918, "grad_norm": 5.705686569213867, "learning_rate": 3.6210880742176933e-07, "loss": 0.5028, "step": 27958 }, { "epoch": 91.66885245901639, "grad_norm": 5.649582386016846, "learning_rate": 3.6182569304734295e-07, "loss": 0.4473, "step": 27959 }, { "epoch": 91.67213114754098, "grad_norm": 4.400589466094971, "learning_rate": 3.615426873533523e-07, "loss": 0.44, "step": 27960 }, { "epoch": 91.67540983606557, "grad_norm": 4.740395545959473, "learning_rate": 3.612597903429882e-07, "loss": 0.4821, "step": 27961 }, { "epoch": 91.67868852459016, "grad_norm": 5.958409786224365, "learning_rate": 3.609770020194436e-07, "loss": 0.2679, "step": 27962 }, { "epoch": 91.68196721311476, "grad_norm": 4.6336798667907715, "learning_rate": 3.6069432238590497e-07, "loss": 0.4387, "step": 27963 }, { "epoch": 91.68524590163935, "grad_norm": 4.951160907745361, "learning_rate": 3.6041175144556075e-07, "loss": 0.2377, "step": 27964 }, { "epoch": 91.68852459016394, "grad_norm": 5.067210674285889, "learning_rate": 3.6012928920159617e-07, "loss": 0.5125, "step": 27965 }, { "epoch": 91.69180327868852, "grad_norm": 5.6042094230651855, "learning_rate": 3.5984693565719543e-07, "loss": 0.3006, "step": 27966 }, { "epoch": 91.69508196721311, "grad_norm": 4.592560291290283, "learning_rate": 3.595646908155448e-07, "loss": 0.3513, "step": 27967 }, { "epoch": 91.6983606557377, "grad_norm": 6.580898284912109, "learning_rate": 3.592825546798262e-07, "loss": 0.2776, "step": 27968 }, { "epoch": 91.70163934426229, "grad_norm": 5.478140830993652, "learning_rate": 3.590005272532204e-07, "loss": 0.209, "step": 27969 }, { "epoch": 91.70491803278688, "grad_norm": 6.313599109649658, "learning_rate": 3.5871860853890714e-07, "loss": 0.2308, "step": 27970 }, { "epoch": 91.70819672131148, "grad_norm": 4.175234317779541, "learning_rate": 3.584367985400661e-07, "loss": 0.3285, "step": 27971 }, { "epoch": 91.71147540983607, "grad_norm": 4.641498565673828, "learning_rate": 3.581550972598757e-07, "loss": 0.4461, "step": 27972 }, { "epoch": 91.71475409836066, "grad_norm": 5.182548522949219, "learning_rate": 3.578735047015114e-07, "loss": 0.3719, "step": 27973 }, { "epoch": 91.71803278688525, "grad_norm": 6.514684677124023, "learning_rate": 3.5759202086814713e-07, "loss": 0.5124, "step": 27974 }, { "epoch": 91.72131147540983, "grad_norm": 9.639435768127441, "learning_rate": 3.573106457629605e-07, "loss": 0.4532, "step": 27975 }, { "epoch": 91.72459016393442, "grad_norm": 5.545796871185303, "learning_rate": 3.570293793891211e-07, "loss": 0.2229, "step": 27976 }, { "epoch": 91.72786885245901, "grad_norm": 4.294266700744629, "learning_rate": 3.5674822174980195e-07, "loss": 0.4242, "step": 27977 }, { "epoch": 91.73114754098361, "grad_norm": 5.218832969665527, "learning_rate": 3.564671728481739e-07, "loss": 0.4276, "step": 27978 }, { "epoch": 91.7344262295082, "grad_norm": 5.097657203674316, "learning_rate": 3.5618623268740324e-07, "loss": 0.3009, "step": 27979 }, { "epoch": 91.73770491803279, "grad_norm": 5.379403591156006, "learning_rate": 3.5590540127066086e-07, "loss": 0.3827, "step": 27980 }, { "epoch": 91.74098360655738, "grad_norm": 12.118732452392578, "learning_rate": 3.5562467860111306e-07, "loss": 0.286, "step": 27981 }, { "epoch": 91.74426229508197, "grad_norm": 4.407747268676758, "learning_rate": 3.553440646819251e-07, "loss": 0.4459, "step": 27982 }, { "epoch": 91.74754098360656, "grad_norm": 4.7170186042785645, "learning_rate": 3.550635595162588e-07, "loss": 0.1884, "step": 27983 }, { "epoch": 91.75081967213114, "grad_norm": 4.114686965942383, "learning_rate": 3.5478316310727957e-07, "loss": 0.3495, "step": 27984 }, { "epoch": 91.75409836065573, "grad_norm": 4.565091609954834, "learning_rate": 3.545028754581492e-07, "loss": 0.3178, "step": 27985 }, { "epoch": 91.75737704918033, "grad_norm": 6.431108474731445, "learning_rate": 3.5422269657202855e-07, "loss": 0.4413, "step": 27986 }, { "epoch": 91.76065573770492, "grad_norm": 3.9655914306640625, "learning_rate": 3.53942626452074e-07, "loss": 0.408, "step": 27987 }, { "epoch": 91.76393442622951, "grad_norm": 4.786816120147705, "learning_rate": 3.536626651014474e-07, "loss": 0.4015, "step": 27988 }, { "epoch": 91.7672131147541, "grad_norm": 3.7197556495666504, "learning_rate": 3.533828125233041e-07, "loss": 0.3672, "step": 27989 }, { "epoch": 91.77049180327869, "grad_norm": 5.20824670791626, "learning_rate": 3.5310306872079816e-07, "loss": 0.3782, "step": 27990 }, { "epoch": 91.77377049180328, "grad_norm": 5.041345596313477, "learning_rate": 3.5282343369708595e-07, "loss": 0.3651, "step": 27991 }, { "epoch": 91.77704918032786, "grad_norm": 4.392991065979004, "learning_rate": 3.5254390745532054e-07, "loss": 0.4224, "step": 27992 }, { "epoch": 91.78032786885245, "grad_norm": 5.947929859161377, "learning_rate": 3.5226448999865273e-07, "loss": 0.412, "step": 27993 }, { "epoch": 91.78360655737706, "grad_norm": 4.932339191436768, "learning_rate": 3.519851813302344e-07, "loss": 0.1975, "step": 27994 }, { "epoch": 91.78688524590164, "grad_norm": 6.282548427581787, "learning_rate": 3.5170598145321424e-07, "loss": 0.3817, "step": 27995 }, { "epoch": 91.79016393442623, "grad_norm": 5.905345916748047, "learning_rate": 3.514268903707407e-07, "loss": 0.2164, "step": 27996 }, { "epoch": 91.79344262295082, "grad_norm": 5.815830707550049, "learning_rate": 3.5114790808596146e-07, "loss": 0.5557, "step": 27997 }, { "epoch": 91.79672131147541, "grad_norm": 4.739381790161133, "learning_rate": 3.508690346020216e-07, "loss": 0.3651, "step": 27998 }, { "epoch": 91.8, "grad_norm": 4.115779876708984, "learning_rate": 3.5059026992206645e-07, "loss": 0.2387, "step": 27999 }, { "epoch": 91.80327868852459, "grad_norm": 4.484498023986816, "learning_rate": 3.5031161404923797e-07, "loss": 0.1414, "step": 28000 }, { "epoch": 91.80655737704917, "grad_norm": 4.284698009490967, "learning_rate": 3.500330669866803e-07, "loss": 0.2215, "step": 28001 }, { "epoch": 91.80983606557378, "grad_norm": 4.160896301269531, "learning_rate": 3.4975462873753305e-07, "loss": 0.2711, "step": 28002 }, { "epoch": 91.81311475409836, "grad_norm": 7.926340103149414, "learning_rate": 3.494762993049361e-07, "loss": 0.3773, "step": 28003 }, { "epoch": 91.81639344262295, "grad_norm": 5.597655773162842, "learning_rate": 3.491980786920279e-07, "loss": 0.4376, "step": 28004 }, { "epoch": 91.81967213114754, "grad_norm": 5.057161331176758, "learning_rate": 3.4891996690194383e-07, "loss": 0.4172, "step": 28005 }, { "epoch": 91.82295081967213, "grad_norm": 5.235042572021484, "learning_rate": 3.4864196393782355e-07, "loss": 0.2536, "step": 28006 }, { "epoch": 91.82622950819672, "grad_norm": 5.139111518859863, "learning_rate": 3.483640698028001e-07, "loss": 0.2974, "step": 28007 }, { "epoch": 91.8295081967213, "grad_norm": 4.880712985992432, "learning_rate": 3.480862845000066e-07, "loss": 0.2964, "step": 28008 }, { "epoch": 91.8327868852459, "grad_norm": 7.4623637199401855, "learning_rate": 3.478086080325749e-07, "loss": 0.457, "step": 28009 }, { "epoch": 91.8360655737705, "grad_norm": 5.101904392242432, "learning_rate": 3.4753104040363804e-07, "loss": 0.2034, "step": 28010 }, { "epoch": 91.83934426229509, "grad_norm": 4.813209533691406, "learning_rate": 3.4725358161632474e-07, "loss": 0.318, "step": 28011 }, { "epoch": 91.84262295081967, "grad_norm": 4.4735541343688965, "learning_rate": 3.469762316737635e-07, "loss": 0.56, "step": 28012 }, { "epoch": 91.84590163934426, "grad_norm": 4.878197193145752, "learning_rate": 3.4669899057908073e-07, "loss": 0.2586, "step": 28013 }, { "epoch": 91.84918032786885, "grad_norm": 4.936831951141357, "learning_rate": 3.464218583354051e-07, "loss": 0.4167, "step": 28014 }, { "epoch": 91.85245901639344, "grad_norm": 5.08058500289917, "learning_rate": 3.461448349458607e-07, "loss": 0.4582, "step": 28015 }, { "epoch": 91.85573770491803, "grad_norm": 3.899538278579712, "learning_rate": 3.4586792041356954e-07, "loss": 0.2069, "step": 28016 }, { "epoch": 91.85901639344263, "grad_norm": 3.8755571842193604, "learning_rate": 3.4559111474165684e-07, "loss": 0.1879, "step": 28017 }, { "epoch": 91.86229508196722, "grad_norm": 5.969773769378662, "learning_rate": 3.4531441793324014e-07, "loss": 0.4495, "step": 28018 }, { "epoch": 91.8655737704918, "grad_norm": 5.9259352684021, "learning_rate": 3.450378299914425e-07, "loss": 0.4853, "step": 28019 }, { "epoch": 91.8688524590164, "grad_norm": 6.733373641967773, "learning_rate": 3.447613509193826e-07, "loss": 0.2866, "step": 28020 }, { "epoch": 91.87213114754098, "grad_norm": 4.196717739105225, "learning_rate": 3.444849807201778e-07, "loss": 0.6511, "step": 28021 }, { "epoch": 91.87540983606557, "grad_norm": 5.2941813468933105, "learning_rate": 3.4420871939694235e-07, "loss": 0.6252, "step": 28022 }, { "epoch": 91.87868852459016, "grad_norm": 6.035466194152832, "learning_rate": 3.439325669527949e-07, "loss": 0.3173, "step": 28023 }, { "epoch": 91.88196721311475, "grad_norm": 4.503762245178223, "learning_rate": 3.436565233908473e-07, "loss": 0.3775, "step": 28024 }, { "epoch": 91.88524590163935, "grad_norm": 4.907131195068359, "learning_rate": 3.4338058871421163e-07, "loss": 0.3002, "step": 28025 }, { "epoch": 91.88852459016394, "grad_norm": 7.050516605377197, "learning_rate": 3.4310476292599983e-07, "loss": 0.3427, "step": 28026 }, { "epoch": 91.89180327868853, "grad_norm": 8.626043319702148, "learning_rate": 3.4282904602932374e-07, "loss": 0.4648, "step": 28027 }, { "epoch": 91.89508196721312, "grad_norm": 6.543532848358154, "learning_rate": 3.42553438027291e-07, "loss": 0.4268, "step": 28028 }, { "epoch": 91.8983606557377, "grad_norm": 4.732476234436035, "learning_rate": 3.422779389230091e-07, "loss": 0.4444, "step": 28029 }, { "epoch": 91.90163934426229, "grad_norm": 7.410454273223877, "learning_rate": 3.420025487195855e-07, "loss": 0.3859, "step": 28030 }, { "epoch": 91.90491803278688, "grad_norm": 4.765888690948486, "learning_rate": 3.417272674201233e-07, "loss": 0.5222, "step": 28031 }, { "epoch": 91.90819672131147, "grad_norm": 5.192455291748047, "learning_rate": 3.414520950277289e-07, "loss": 0.3738, "step": 28032 }, { "epoch": 91.91147540983607, "grad_norm": 6.271040916442871, "learning_rate": 3.4117703154550544e-07, "loss": 0.1221, "step": 28033 }, { "epoch": 91.91475409836066, "grad_norm": 5.735208511352539, "learning_rate": 3.4090207697655366e-07, "loss": 0.4045, "step": 28034 }, { "epoch": 91.91803278688525, "grad_norm": 5.873988628387451, "learning_rate": 3.406272313239722e-07, "loss": 0.4022, "step": 28035 }, { "epoch": 91.92131147540984, "grad_norm": 4.923246383666992, "learning_rate": 3.403524945908632e-07, "loss": 0.3147, "step": 28036 }, { "epoch": 91.92459016393443, "grad_norm": 5.503738880157471, "learning_rate": 3.4007786678032397e-07, "loss": 0.5555, "step": 28037 }, { "epoch": 91.92786885245901, "grad_norm": 4.605237007141113, "learning_rate": 3.398033478954499e-07, "loss": 0.2389, "step": 28038 }, { "epoch": 91.9311475409836, "grad_norm": 4.21706485748291, "learning_rate": 3.395289379393363e-07, "loss": 0.2792, "step": 28039 }, { "epoch": 91.93442622950819, "grad_norm": 5.887187480926514, "learning_rate": 3.3925463691507956e-07, "loss": 0.2391, "step": 28040 }, { "epoch": 91.9377049180328, "grad_norm": 5.306257724761963, "learning_rate": 3.389804448257705e-07, "loss": 0.3547, "step": 28041 }, { "epoch": 91.94098360655738, "grad_norm": 6.638140678405762, "learning_rate": 3.387063616745023e-07, "loss": 0.4088, "step": 28042 }, { "epoch": 91.94426229508197, "grad_norm": 6.4475274085998535, "learning_rate": 3.3843238746436466e-07, "loss": 0.4001, "step": 28043 }, { "epoch": 91.94754098360656, "grad_norm": 4.782116413116455, "learning_rate": 3.381585221984485e-07, "loss": 0.3784, "step": 28044 }, { "epoch": 91.95081967213115, "grad_norm": 5.129518508911133, "learning_rate": 3.37884765879839e-07, "loss": 0.3264, "step": 28045 }, { "epoch": 91.95409836065573, "grad_norm": 5.028443336486816, "learning_rate": 3.37611118511626e-07, "loss": 0.3268, "step": 28046 }, { "epoch": 91.95737704918032, "grad_norm": 19.85547637939453, "learning_rate": 3.373375800968948e-07, "loss": 0.2979, "step": 28047 }, { "epoch": 91.96065573770491, "grad_norm": 5.004087924957275, "learning_rate": 3.3706415063872843e-07, "loss": 0.492, "step": 28048 }, { "epoch": 91.96393442622951, "grad_norm": 5.681187629699707, "learning_rate": 3.3679083014020897e-07, "loss": 0.2952, "step": 28049 }, { "epoch": 91.9672131147541, "grad_norm": 6.027667045593262, "learning_rate": 3.365176186044228e-07, "loss": 0.3777, "step": 28050 }, { "epoch": 91.97049180327869, "grad_norm": 3.8418853282928467, "learning_rate": 3.362445160344463e-07, "loss": 0.4067, "step": 28051 }, { "epoch": 91.97377049180328, "grad_norm": 8.800506591796875, "learning_rate": 3.359715224333604e-07, "loss": 0.469, "step": 28052 }, { "epoch": 91.97704918032787, "grad_norm": 3.9088945388793945, "learning_rate": 3.356986378042448e-07, "loss": 0.2725, "step": 28053 }, { "epoch": 91.98032786885246, "grad_norm": 3.3976452350616455, "learning_rate": 3.3542586215017603e-07, "loss": 0.2096, "step": 28054 }, { "epoch": 91.98360655737704, "grad_norm": 3.8564445972442627, "learning_rate": 3.351531954742282e-07, "loss": 0.3635, "step": 28055 }, { "epoch": 91.98688524590163, "grad_norm": 6.445557594299316, "learning_rate": 3.3488063777947775e-07, "loss": 0.5961, "step": 28056 }, { "epoch": 91.99016393442623, "grad_norm": 4.477526664733887, "learning_rate": 3.346081890689956e-07, "loss": 0.304, "step": 28057 }, { "epoch": 91.99344262295082, "grad_norm": 5.7678751945495605, "learning_rate": 3.3433584934585704e-07, "loss": 0.4611, "step": 28058 }, { "epoch": 91.99672131147541, "grad_norm": 4.617522716522217, "learning_rate": 3.3406361861313074e-07, "loss": 0.4186, "step": 28059 }, { "epoch": 92.0, "grad_norm": 4.453812122344971, "learning_rate": 3.3379149687388866e-07, "loss": 0.4528, "step": 28060 }, { "epoch": 92.00327868852459, "grad_norm": 3.9859111309051514, "learning_rate": 3.3351948413119616e-07, "loss": 0.2664, "step": 28061 }, { "epoch": 92.00655737704918, "grad_norm": 8.359574317932129, "learning_rate": 3.33247580388123e-07, "loss": 0.6171, "step": 28062 }, { "epoch": 92.00983606557377, "grad_norm": 6.955618381500244, "learning_rate": 3.3297578564773336e-07, "loss": 0.515, "step": 28063 }, { "epoch": 92.01311475409837, "grad_norm": 4.922215461730957, "learning_rate": 3.3270409991309485e-07, "loss": 0.3661, "step": 28064 }, { "epoch": 92.01639344262296, "grad_norm": 5.757460117340088, "learning_rate": 3.3243252318726603e-07, "loss": 0.2245, "step": 28065 }, { "epoch": 92.01967213114754, "grad_norm": 6.5316572189331055, "learning_rate": 3.3216105547331454e-07, "loss": 0.3663, "step": 28066 }, { "epoch": 92.02295081967213, "grad_norm": 5.759649753570557, "learning_rate": 3.3188969677429907e-07, "loss": 0.2484, "step": 28067 }, { "epoch": 92.02622950819672, "grad_norm": 3.691328763961792, "learning_rate": 3.3161844709327927e-07, "loss": 0.1612, "step": 28068 }, { "epoch": 92.02950819672131, "grad_norm": 6.4179768562316895, "learning_rate": 3.313473064333139e-07, "loss": 0.4476, "step": 28069 }, { "epoch": 92.0327868852459, "grad_norm": 6.974039077758789, "learning_rate": 3.310762747974605e-07, "loss": 0.4111, "step": 28070 }, { "epoch": 92.03606557377049, "grad_norm": 6.430889129638672, "learning_rate": 3.3080535218877554e-07, "loss": 0.3533, "step": 28071 }, { "epoch": 92.03934426229509, "grad_norm": 5.877418041229248, "learning_rate": 3.3053453861031316e-07, "loss": 0.2957, "step": 28072 }, { "epoch": 92.04262295081968, "grad_norm": 5.197473049163818, "learning_rate": 3.3026383406512877e-07, "loss": 0.384, "step": 28073 }, { "epoch": 92.04590163934427, "grad_norm": 3.942293643951416, "learning_rate": 3.2999323855627205e-07, "loss": 0.3357, "step": 28074 }, { "epoch": 92.04918032786885, "grad_norm": 3.2727015018463135, "learning_rate": 3.2972275208679625e-07, "loss": 0.0934, "step": 28075 }, { "epoch": 92.05245901639344, "grad_norm": 5.018256664276123, "learning_rate": 3.2945237465975223e-07, "loss": 0.4005, "step": 28076 }, { "epoch": 92.05573770491803, "grad_norm": 4.488689422607422, "learning_rate": 3.291821062781864e-07, "loss": 0.3792, "step": 28077 }, { "epoch": 92.05901639344262, "grad_norm": 4.7750396728515625, "learning_rate": 3.289119469451474e-07, "loss": 0.3193, "step": 28078 }, { "epoch": 92.0622950819672, "grad_norm": 5.782535552978516, "learning_rate": 3.286418966636817e-07, "loss": 0.5717, "step": 28079 }, { "epoch": 92.06557377049181, "grad_norm": 6.006900310516357, "learning_rate": 3.2837195543683476e-07, "loss": 0.5361, "step": 28080 }, { "epoch": 92.0688524590164, "grad_norm": 4.602387428283691, "learning_rate": 3.2810212326765066e-07, "loss": 0.5552, "step": 28081 }, { "epoch": 92.07213114754099, "grad_norm": 3.6497316360473633, "learning_rate": 3.2783240015917037e-07, "loss": 0.2041, "step": 28082 }, { "epoch": 92.07540983606557, "grad_norm": 6.5149054527282715, "learning_rate": 3.2756278611443595e-07, "loss": 0.37, "step": 28083 }, { "epoch": 92.07868852459016, "grad_norm": 4.703108310699463, "learning_rate": 3.272932811364882e-07, "loss": 0.3525, "step": 28084 }, { "epoch": 92.08196721311475, "grad_norm": 4.668316841125488, "learning_rate": 3.270238852283669e-07, "loss": 0.2654, "step": 28085 }, { "epoch": 92.08524590163934, "grad_norm": 4.876288890838623, "learning_rate": 3.267545983931075e-07, "loss": 0.2395, "step": 28086 }, { "epoch": 92.08852459016393, "grad_norm": 5.0827226638793945, "learning_rate": 3.264854206337475e-07, "loss": 0.2365, "step": 28087 }, { "epoch": 92.09180327868853, "grad_norm": 4.522289276123047, "learning_rate": 3.2621635195332236e-07, "loss": 0.3414, "step": 28088 }, { "epoch": 92.09508196721312, "grad_norm": 5.537881374359131, "learning_rate": 3.2594739235486725e-07, "loss": 0.2958, "step": 28089 }, { "epoch": 92.09836065573771, "grad_norm": 4.764278411865234, "learning_rate": 3.2567854184141324e-07, "loss": 0.2279, "step": 28090 }, { "epoch": 92.1016393442623, "grad_norm": 4.432542324066162, "learning_rate": 3.2540980041599113e-07, "loss": 0.2879, "step": 28091 }, { "epoch": 92.10491803278688, "grad_norm": 6.478280067443848, "learning_rate": 3.251411680816341e-07, "loss": 0.2933, "step": 28092 }, { "epoch": 92.10819672131147, "grad_norm": 8.586150169372559, "learning_rate": 3.248726448413686e-07, "loss": 0.2355, "step": 28093 }, { "epoch": 92.11147540983606, "grad_norm": 4.747241973876953, "learning_rate": 3.2460423069822554e-07, "loss": 0.2929, "step": 28094 }, { "epoch": 92.11475409836065, "grad_norm": 7.238321304321289, "learning_rate": 3.24335925655227e-07, "loss": 0.479, "step": 28095 }, { "epoch": 92.11803278688525, "grad_norm": 5.325250625610352, "learning_rate": 3.240677297154027e-07, "loss": 0.352, "step": 28096 }, { "epoch": 92.12131147540984, "grad_norm": 4.603334903717041, "learning_rate": 3.237996428817758e-07, "loss": 0.3761, "step": 28097 }, { "epoch": 92.12459016393443, "grad_norm": 6.550693988800049, "learning_rate": 3.235316651573661e-07, "loss": 0.3515, "step": 28098 }, { "epoch": 92.12786885245902, "grad_norm": 4.488030910491943, "learning_rate": 3.232637965452001e-07, "loss": 0.44, "step": 28099 }, { "epoch": 92.1311475409836, "grad_norm": 4.658565521240234, "learning_rate": 3.2299603704829654e-07, "loss": 0.2633, "step": 28100 }, { "epoch": 92.1344262295082, "grad_norm": 4.070135116577148, "learning_rate": 3.2272838666967177e-07, "loss": 0.3619, "step": 28101 }, { "epoch": 92.13770491803278, "grad_norm": 4.463398456573486, "learning_rate": 3.224608454123479e-07, "loss": 0.2128, "step": 28102 }, { "epoch": 92.14098360655737, "grad_norm": 9.765546798706055, "learning_rate": 3.221934132793403e-07, "loss": 0.3759, "step": 28103 }, { "epoch": 92.14426229508197, "grad_norm": 4.6210246086120605, "learning_rate": 3.219260902736632e-07, "loss": 0.2571, "step": 28104 }, { "epoch": 92.14754098360656, "grad_norm": 4.262594699859619, "learning_rate": 3.2165887639833305e-07, "loss": 0.3646, "step": 28105 }, { "epoch": 92.15081967213115, "grad_norm": 3.861518383026123, "learning_rate": 3.2139177165636304e-07, "loss": 0.3239, "step": 28106 }, { "epoch": 92.15409836065574, "grad_norm": 4.776585102081299, "learning_rate": 3.2112477605076297e-07, "loss": 0.3241, "step": 28107 }, { "epoch": 92.15737704918033, "grad_norm": 4.872154235839844, "learning_rate": 3.208578895845449e-07, "loss": 0.5546, "step": 28108 }, { "epoch": 92.16065573770491, "grad_norm": 4.551274299621582, "learning_rate": 3.2059111226071637e-07, "loss": 0.3123, "step": 28109 }, { "epoch": 92.1639344262295, "grad_norm": 4.884909152984619, "learning_rate": 3.203244440822884e-07, "loss": 0.2807, "step": 28110 }, { "epoch": 92.1672131147541, "grad_norm": 5.5549397468566895, "learning_rate": 3.200578850522673e-07, "loss": 0.2825, "step": 28111 }, { "epoch": 92.1704918032787, "grad_norm": 4.450672626495361, "learning_rate": 3.1979143517365753e-07, "loss": 0.3024, "step": 28112 }, { "epoch": 92.17377049180328, "grad_norm": 5.475885391235352, "learning_rate": 3.195250944494632e-07, "loss": 0.26, "step": 28113 }, { "epoch": 92.17704918032787, "grad_norm": 5.045116901397705, "learning_rate": 3.192588628826898e-07, "loss": 0.3936, "step": 28114 }, { "epoch": 92.18032786885246, "grad_norm": 5.880995750427246, "learning_rate": 3.189927404763382e-07, "loss": 0.5613, "step": 28115 }, { "epoch": 92.18360655737705, "grad_norm": 4.063369274139404, "learning_rate": 3.187267272334083e-07, "loss": 0.2446, "step": 28116 }, { "epoch": 92.18688524590164, "grad_norm": 4.538041591644287, "learning_rate": 3.1846082315690086e-07, "loss": 0.4479, "step": 28117 }, { "epoch": 92.19016393442622, "grad_norm": 4.257616996765137, "learning_rate": 3.181950282498136e-07, "loss": 0.3831, "step": 28118 }, { "epoch": 92.19344262295083, "grad_norm": 5.651158809661865, "learning_rate": 3.179293425151453e-07, "loss": 0.4322, "step": 28119 }, { "epoch": 92.19672131147541, "grad_norm": 6.339466571807861, "learning_rate": 3.1766376595589013e-07, "loss": 0.4104, "step": 28120 }, { "epoch": 92.2, "grad_norm": 7.526499271392822, "learning_rate": 3.1739829857504235e-07, "loss": 0.4075, "step": 28121 }, { "epoch": 92.20327868852459, "grad_norm": 3.6488492488861084, "learning_rate": 3.171329403755963e-07, "loss": 0.5721, "step": 28122 }, { "epoch": 92.20655737704918, "grad_norm": 4.319012641906738, "learning_rate": 3.1686769136054396e-07, "loss": 0.2659, "step": 28123 }, { "epoch": 92.20983606557377, "grad_norm": 4.470583915710449, "learning_rate": 3.166025515328763e-07, "loss": 0.3833, "step": 28124 }, { "epoch": 92.21311475409836, "grad_norm": 4.353234767913818, "learning_rate": 3.1633752089558434e-07, "loss": 0.2986, "step": 28125 }, { "epoch": 92.21639344262294, "grad_norm": 5.022472858428955, "learning_rate": 3.160725994516534e-07, "loss": 0.4314, "step": 28126 }, { "epoch": 92.21967213114755, "grad_norm": 4.005497932434082, "learning_rate": 3.1580778720407325e-07, "loss": 0.2563, "step": 28127 }, { "epoch": 92.22295081967214, "grad_norm": 6.333034038543701, "learning_rate": 3.1554308415583045e-07, "loss": 0.5518, "step": 28128 }, { "epoch": 92.22622950819672, "grad_norm": 5.119424819946289, "learning_rate": 3.152784903099082e-07, "loss": 0.3982, "step": 28129 }, { "epoch": 92.22950819672131, "grad_norm": 5.025073051452637, "learning_rate": 3.150140056692885e-07, "loss": 0.5363, "step": 28130 }, { "epoch": 92.2327868852459, "grad_norm": 4.686275482177734, "learning_rate": 3.147496302369579e-07, "loss": 0.3035, "step": 28131 }, { "epoch": 92.23606557377049, "grad_norm": 4.572582244873047, "learning_rate": 3.144853640158951e-07, "loss": 0.3088, "step": 28132 }, { "epoch": 92.23934426229508, "grad_norm": 3.4978177547454834, "learning_rate": 3.142212070090811e-07, "loss": 0.2516, "step": 28133 }, { "epoch": 92.24262295081967, "grad_norm": 4.052496433258057, "learning_rate": 3.1395715921949234e-07, "loss": 0.1825, "step": 28134 }, { "epoch": 92.24590163934427, "grad_norm": 4.447163105010986, "learning_rate": 3.1369322065010756e-07, "loss": 0.3889, "step": 28135 }, { "epoch": 92.24918032786886, "grad_norm": 6.797759532928467, "learning_rate": 3.134293913039033e-07, "loss": 0.3101, "step": 28136 }, { "epoch": 92.25245901639344, "grad_norm": 5.562158107757568, "learning_rate": 3.1316567118385375e-07, "loss": 0.6786, "step": 28137 }, { "epoch": 92.25573770491803, "grad_norm": 5.557758808135986, "learning_rate": 3.1290206029293444e-07, "loss": 0.3838, "step": 28138 }, { "epoch": 92.25901639344262, "grad_norm": 7.3596882820129395, "learning_rate": 3.126385586341141e-07, "loss": 0.392, "step": 28139 }, { "epoch": 92.26229508196721, "grad_norm": 7.6849284172058105, "learning_rate": 3.1237516621036803e-07, "loss": 0.4463, "step": 28140 }, { "epoch": 92.2655737704918, "grad_norm": 8.133441925048828, "learning_rate": 3.1211188302466386e-07, "loss": 0.2256, "step": 28141 }, { "epoch": 92.26885245901639, "grad_norm": 5.810282230377197, "learning_rate": 3.1184870907997156e-07, "loss": 0.3925, "step": 28142 }, { "epoch": 92.27213114754099, "grad_norm": 4.633133888244629, "learning_rate": 3.1158564437925866e-07, "loss": 0.3151, "step": 28143 }, { "epoch": 92.27540983606558, "grad_norm": 4.375790119171143, "learning_rate": 3.1132268892548945e-07, "loss": 0.1898, "step": 28144 }, { "epoch": 92.27868852459017, "grad_norm": 4.263119697570801, "learning_rate": 3.110598427216327e-07, "loss": 0.2042, "step": 28145 }, { "epoch": 92.28196721311475, "grad_norm": 4.3682541847229, "learning_rate": 3.1079710577064935e-07, "loss": 0.4083, "step": 28146 }, { "epoch": 92.28524590163934, "grad_norm": 5.224263668060303, "learning_rate": 3.105344780755015e-07, "loss": 0.6357, "step": 28147 }, { "epoch": 92.28852459016393, "grad_norm": 6.155447959899902, "learning_rate": 3.102719596391535e-07, "loss": 0.2664, "step": 28148 }, { "epoch": 92.29180327868852, "grad_norm": 4.625361442565918, "learning_rate": 3.100095504645639e-07, "loss": 0.2672, "step": 28149 }, { "epoch": 92.29508196721312, "grad_norm": 5.987818241119385, "learning_rate": 3.0974725055469054e-07, "loss": 0.3325, "step": 28150 }, { "epoch": 92.29836065573771, "grad_norm": 3.6001651287078857, "learning_rate": 3.094850599124932e-07, "loss": 0.3348, "step": 28151 }, { "epoch": 92.3016393442623, "grad_norm": 4.163421630859375, "learning_rate": 3.092229785409273e-07, "loss": 0.2018, "step": 28152 }, { "epoch": 92.30491803278689, "grad_norm": 4.50461483001709, "learning_rate": 3.0896100644294823e-07, "loss": 0.4856, "step": 28153 }, { "epoch": 92.30819672131148, "grad_norm": 6.841460227966309, "learning_rate": 3.0869914362150923e-07, "loss": 0.4598, "step": 28154 }, { "epoch": 92.31147540983606, "grad_norm": 6.248126029968262, "learning_rate": 3.0843739007956454e-07, "loss": 0.3661, "step": 28155 }, { "epoch": 92.31475409836065, "grad_norm": 5.034134387969971, "learning_rate": 3.0817574582006513e-07, "loss": 0.2606, "step": 28156 }, { "epoch": 92.31803278688524, "grad_norm": 45.7055778503418, "learning_rate": 3.0791421084595984e-07, "loss": 0.3209, "step": 28157 }, { "epoch": 92.32131147540984, "grad_norm": 8.97607421875, "learning_rate": 3.076527851602007e-07, "loss": 0.4805, "step": 28158 }, { "epoch": 92.32459016393443, "grad_norm": 5.6979079246521, "learning_rate": 3.073914687657331e-07, "loss": 0.3323, "step": 28159 }, { "epoch": 92.32786885245902, "grad_norm": 5.309817314147949, "learning_rate": 3.0713026166550586e-07, "loss": 0.4371, "step": 28160 }, { "epoch": 92.33114754098361, "grad_norm": 5.3139328956604, "learning_rate": 3.06869163862461e-07, "loss": 0.3651, "step": 28161 }, { "epoch": 92.3344262295082, "grad_norm": 6.614891052246094, "learning_rate": 3.066081753595451e-07, "loss": 0.2325, "step": 28162 }, { "epoch": 92.33770491803278, "grad_norm": 4.412484169006348, "learning_rate": 3.0634729615970136e-07, "loss": 0.2587, "step": 28163 }, { "epoch": 92.34098360655737, "grad_norm": 5.554401397705078, "learning_rate": 3.060865262658708e-07, "loss": 0.4349, "step": 28164 }, { "epoch": 92.34426229508196, "grad_norm": 5.20127010345459, "learning_rate": 3.0582586568099206e-07, "loss": 0.5591, "step": 28165 }, { "epoch": 92.34754098360656, "grad_norm": 4.3145952224731445, "learning_rate": 3.055653144080084e-07, "loss": 0.2793, "step": 28166 }, { "epoch": 92.35081967213115, "grad_norm": 5.603125095367432, "learning_rate": 3.053048724498542e-07, "loss": 0.5431, "step": 28167 }, { "epoch": 92.35409836065574, "grad_norm": 4.544497489929199, "learning_rate": 3.0504453980946813e-07, "loss": 0.2937, "step": 28168 }, { "epoch": 92.35737704918033, "grad_norm": 4.408453941345215, "learning_rate": 3.047843164897857e-07, "loss": 0.5396, "step": 28169 }, { "epoch": 92.36065573770492, "grad_norm": 3.7918310165405273, "learning_rate": 3.045242024937389e-07, "loss": 0.5709, "step": 28170 }, { "epoch": 92.3639344262295, "grad_norm": 5.03483247756958, "learning_rate": 3.042641978242633e-07, "loss": 0.4899, "step": 28171 }, { "epoch": 92.3672131147541, "grad_norm": 5.26340389251709, "learning_rate": 3.0400430248428983e-07, "loss": 0.2912, "step": 28172 }, { "epoch": 92.37049180327868, "grad_norm": 4.935428619384766, "learning_rate": 3.037445164767494e-07, "loss": 0.4023, "step": 28173 }, { "epoch": 92.37377049180328, "grad_norm": 3.9456136226654053, "learning_rate": 3.0348483980457086e-07, "loss": 0.1834, "step": 28174 }, { "epoch": 92.37704918032787, "grad_norm": 4.309652805328369, "learning_rate": 3.03225272470683e-07, "loss": 0.459, "step": 28175 }, { "epoch": 92.38032786885246, "grad_norm": 3.924837350845337, "learning_rate": 3.029658144780123e-07, "loss": 0.2558, "step": 28176 }, { "epoch": 92.38360655737705, "grad_norm": 4.426757335662842, "learning_rate": 3.0270646582948425e-07, "loss": 0.3185, "step": 28177 }, { "epoch": 92.38688524590164, "grad_norm": 5.538518905639648, "learning_rate": 3.0244722652802203e-07, "loss": 0.3984, "step": 28178 }, { "epoch": 92.39016393442623, "grad_norm": 5.053569316864014, "learning_rate": 3.0218809657655226e-07, "loss": 0.341, "step": 28179 }, { "epoch": 92.39344262295081, "grad_norm": 5.175053119659424, "learning_rate": 3.019290759779947e-07, "loss": 0.2481, "step": 28180 }, { "epoch": 92.3967213114754, "grad_norm": 4.443041801452637, "learning_rate": 3.016701647352693e-07, "loss": 0.3084, "step": 28181 }, { "epoch": 92.4, "grad_norm": 5.411262035369873, "learning_rate": 3.0141136285129825e-07, "loss": 0.2731, "step": 28182 }, { "epoch": 92.4032786885246, "grad_norm": 4.183551788330078, "learning_rate": 3.0115267032899577e-07, "loss": 0.5296, "step": 28183 }, { "epoch": 92.40655737704918, "grad_norm": 5.349452018737793, "learning_rate": 3.0089408717128287e-07, "loss": 0.4448, "step": 28184 }, { "epoch": 92.40983606557377, "grad_norm": 5.265002250671387, "learning_rate": 3.006356133810728e-07, "loss": 0.2585, "step": 28185 }, { "epoch": 92.41311475409836, "grad_norm": 5.5158843994140625, "learning_rate": 3.003772489612811e-07, "loss": 0.5612, "step": 28186 }, { "epoch": 92.41639344262295, "grad_norm": 4.770514011383057, "learning_rate": 3.001189939148208e-07, "loss": 0.4252, "step": 28187 }, { "epoch": 92.41967213114754, "grad_norm": 5.236501693725586, "learning_rate": 2.9986084824460527e-07, "loss": 0.3808, "step": 28188 }, { "epoch": 92.42295081967212, "grad_norm": 6.354701995849609, "learning_rate": 2.9960281195354325e-07, "loss": 0.4612, "step": 28189 }, { "epoch": 92.42622950819673, "grad_norm": 7.383362293243408, "learning_rate": 2.9934488504454686e-07, "loss": 0.4322, "step": 28190 }, { "epoch": 92.42950819672132, "grad_norm": 4.516720294952393, "learning_rate": 2.990870675205204e-07, "loss": 0.3382, "step": 28191 }, { "epoch": 92.4327868852459, "grad_norm": 6.141176700592041, "learning_rate": 2.988293593843761e-07, "loss": 0.4321, "step": 28192 }, { "epoch": 92.43606557377049, "grad_norm": 4.424520492553711, "learning_rate": 2.9857176063901593e-07, "loss": 0.1676, "step": 28193 }, { "epoch": 92.43934426229508, "grad_norm": 4.617629051208496, "learning_rate": 2.983142712873477e-07, "loss": 0.5573, "step": 28194 }, { "epoch": 92.44262295081967, "grad_norm": 5.857935905456543, "learning_rate": 2.9805689133227235e-07, "loss": 0.3634, "step": 28195 }, { "epoch": 92.44590163934426, "grad_norm": 5.872451305389404, "learning_rate": 2.97799620776692e-07, "loss": 0.3981, "step": 28196 }, { "epoch": 92.44918032786886, "grad_norm": 5.35484504699707, "learning_rate": 2.9754245962350993e-07, "loss": 0.387, "step": 28197 }, { "epoch": 92.45245901639345, "grad_norm": 6.064253807067871, "learning_rate": 2.9728540787562486e-07, "loss": 0.2674, "step": 28198 }, { "epoch": 92.45573770491804, "grad_norm": 16.840625762939453, "learning_rate": 2.970284655359357e-07, "loss": 0.2883, "step": 28199 }, { "epoch": 92.45901639344262, "grad_norm": 4.05520486831665, "learning_rate": 2.9677163260733667e-07, "loss": 0.2563, "step": 28200 }, { "epoch": 92.46229508196721, "grad_norm": 4.469940185546875, "learning_rate": 2.9651490909272773e-07, "loss": 0.5855, "step": 28201 }, { "epoch": 92.4655737704918, "grad_norm": 5.105506896972656, "learning_rate": 2.9625829499500324e-07, "loss": 0.2735, "step": 28202 }, { "epoch": 92.46885245901639, "grad_norm": 5.209622383117676, "learning_rate": 2.960017903170542e-07, "loss": 0.5811, "step": 28203 }, { "epoch": 92.47213114754098, "grad_norm": 5.533002853393555, "learning_rate": 2.95745395061775e-07, "loss": 0.3619, "step": 28204 }, { "epoch": 92.47540983606558, "grad_norm": 5.098758220672607, "learning_rate": 2.9548910923205776e-07, "loss": 0.3629, "step": 28205 }, { "epoch": 92.47868852459017, "grad_norm": 3.540069341659546, "learning_rate": 2.95232932830789e-07, "loss": 0.2174, "step": 28206 }, { "epoch": 92.48196721311476, "grad_norm": 5.414569854736328, "learning_rate": 2.9497686586085983e-07, "loss": 0.495, "step": 28207 }, { "epoch": 92.48524590163935, "grad_norm": 5.077127933502197, "learning_rate": 2.947209083251579e-07, "loss": 0.3941, "step": 28208 }, { "epoch": 92.48852459016393, "grad_norm": 4.241093635559082, "learning_rate": 2.944650602265686e-07, "loss": 0.3322, "step": 28209 }, { "epoch": 92.49180327868852, "grad_norm": 4.204564571380615, "learning_rate": 2.942093215679764e-07, "loss": 0.2063, "step": 28210 }, { "epoch": 92.49508196721311, "grad_norm": 3.155057430267334, "learning_rate": 2.9395369235226677e-07, "loss": 0.2465, "step": 28211 }, { "epoch": 92.4983606557377, "grad_norm": 5.086771488189697, "learning_rate": 2.936981725823207e-07, "loss": 0.2872, "step": 28212 }, { "epoch": 92.5016393442623, "grad_norm": 4.388247013092041, "learning_rate": 2.934427622610181e-07, "loss": 0.3762, "step": 28213 }, { "epoch": 92.50491803278689, "grad_norm": 6.984582901000977, "learning_rate": 2.9318746139124224e-07, "loss": 0.3619, "step": 28214 }, { "epoch": 92.50819672131148, "grad_norm": 3.774285078048706, "learning_rate": 2.9293226997586966e-07, "loss": 0.2821, "step": 28215 }, { "epoch": 92.51147540983607, "grad_norm": 9.729011535644531, "learning_rate": 2.9267718801777924e-07, "loss": 0.4069, "step": 28216 }, { "epoch": 92.51475409836065, "grad_norm": 5.641967296600342, "learning_rate": 2.924222155198453e-07, "loss": 0.306, "step": 28217 }, { "epoch": 92.51803278688524, "grad_norm": 6.499789714813232, "learning_rate": 2.9216735248494556e-07, "loss": 0.2138, "step": 28218 }, { "epoch": 92.52131147540983, "grad_norm": 5.800482273101807, "learning_rate": 2.919125989159521e-07, "loss": 0.4591, "step": 28219 }, { "epoch": 92.52459016393442, "grad_norm": 7.177245140075684, "learning_rate": 2.9165795481573836e-07, "loss": 0.3418, "step": 28220 }, { "epoch": 92.52786885245902, "grad_norm": 10.681000709533691, "learning_rate": 2.9140342018717516e-07, "loss": 0.4955, "step": 28221 }, { "epoch": 92.53114754098361, "grad_norm": 3.90480899810791, "learning_rate": 2.9114899503313145e-07, "loss": 0.1955, "step": 28222 }, { "epoch": 92.5344262295082, "grad_norm": 8.328878402709961, "learning_rate": 2.908946793564793e-07, "loss": 0.2939, "step": 28223 }, { "epoch": 92.53770491803279, "grad_norm": 5.599551677703857, "learning_rate": 2.9064047316008423e-07, "loss": 0.3638, "step": 28224 }, { "epoch": 92.54098360655738, "grad_norm": 4.59483003616333, "learning_rate": 2.903863764468129e-07, "loss": 0.3565, "step": 28225 }, { "epoch": 92.54426229508196, "grad_norm": 4.650723457336426, "learning_rate": 2.9013238921952955e-07, "loss": 0.4972, "step": 28226 }, { "epoch": 92.54754098360655, "grad_norm": 4.711627006530762, "learning_rate": 2.898785114811009e-07, "loss": 0.3428, "step": 28227 }, { "epoch": 92.55081967213114, "grad_norm": 5.476441860198975, "learning_rate": 2.8962474323438685e-07, "loss": 0.2266, "step": 28228 }, { "epoch": 92.55409836065574, "grad_norm": 6.307588577270508, "learning_rate": 2.893710844822506e-07, "loss": 0.3748, "step": 28229 }, { "epoch": 92.55737704918033, "grad_norm": 8.110177040100098, "learning_rate": 2.8911753522755105e-07, "loss": 0.396, "step": 28230 }, { "epoch": 92.56065573770492, "grad_norm": 4.6461687088012695, "learning_rate": 2.888640954731492e-07, "loss": 0.3964, "step": 28231 }, { "epoch": 92.56393442622951, "grad_norm": 5.767058372497559, "learning_rate": 2.886107652219017e-07, "loss": 0.2615, "step": 28232 }, { "epoch": 92.5672131147541, "grad_norm": 4.285019874572754, "learning_rate": 2.883575444766651e-07, "loss": 0.3672, "step": 28233 }, { "epoch": 92.57049180327868, "grad_norm": 4.7315802574157715, "learning_rate": 2.881044332402949e-07, "loss": 0.3919, "step": 28234 }, { "epoch": 92.57377049180327, "grad_norm": 4.311795234680176, "learning_rate": 2.878514315156433e-07, "loss": 0.231, "step": 28235 }, { "epoch": 92.57704918032788, "grad_norm": 3.6973342895507812, "learning_rate": 2.875985393055669e-07, "loss": 0.4309, "step": 28236 }, { "epoch": 92.58032786885246, "grad_norm": 5.734516620635986, "learning_rate": 2.873457566129145e-07, "loss": 0.2175, "step": 28237 }, { "epoch": 92.58360655737705, "grad_norm": 4.464334011077881, "learning_rate": 2.870930834405372e-07, "loss": 0.3411, "step": 28238 }, { "epoch": 92.58688524590164, "grad_norm": 5.222227573394775, "learning_rate": 2.868405197912838e-07, "loss": 0.3045, "step": 28239 }, { "epoch": 92.59016393442623, "grad_norm": 5.589418888092041, "learning_rate": 2.865880656680042e-07, "loss": 0.4673, "step": 28240 }, { "epoch": 92.59344262295082, "grad_norm": 4.992795944213867, "learning_rate": 2.863357210735429e-07, "loss": 0.6222, "step": 28241 }, { "epoch": 92.5967213114754, "grad_norm": 5.711014747619629, "learning_rate": 2.8608348601074644e-07, "loss": 0.1366, "step": 28242 }, { "epoch": 92.6, "grad_norm": 5.07485818862915, "learning_rate": 2.8583136048245697e-07, "loss": 0.6632, "step": 28243 }, { "epoch": 92.6032786885246, "grad_norm": 4.747380256652832, "learning_rate": 2.8557934449152115e-07, "loss": 0.507, "step": 28244 }, { "epoch": 92.60655737704919, "grad_norm": 4.455488204956055, "learning_rate": 2.853274380407778e-07, "loss": 0.3681, "step": 28245 }, { "epoch": 92.60983606557377, "grad_norm": 10.429494857788086, "learning_rate": 2.8507564113306795e-07, "loss": 0.3238, "step": 28246 }, { "epoch": 92.61311475409836, "grad_norm": 7.838218688964844, "learning_rate": 2.848239537712316e-07, "loss": 0.4258, "step": 28247 }, { "epoch": 92.61639344262295, "grad_norm": 4.611784934997559, "learning_rate": 2.845723759581065e-07, "loss": 0.3828, "step": 28248 }, { "epoch": 92.61967213114754, "grad_norm": 7.9097442626953125, "learning_rate": 2.843209076965292e-07, "loss": 0.3299, "step": 28249 }, { "epoch": 92.62295081967213, "grad_norm": 5.02525520324707, "learning_rate": 2.8406954898933525e-07, "loss": 0.3839, "step": 28250 }, { "epoch": 92.62622950819672, "grad_norm": 4.340909957885742, "learning_rate": 2.8381829983936013e-07, "loss": 0.5149, "step": 28251 }, { "epoch": 92.62950819672132, "grad_norm": 5.060703754425049, "learning_rate": 2.8356716024943385e-07, "loss": 0.3326, "step": 28252 }, { "epoch": 92.6327868852459, "grad_norm": 4.413576126098633, "learning_rate": 2.83316130222393e-07, "loss": 0.2063, "step": 28253 }, { "epoch": 92.6360655737705, "grad_norm": 4.737979412078857, "learning_rate": 2.8306520976106423e-07, "loss": 0.4252, "step": 28254 }, { "epoch": 92.63934426229508, "grad_norm": 5.343011856079102, "learning_rate": 2.8281439886827854e-07, "loss": 0.2401, "step": 28255 }, { "epoch": 92.64262295081967, "grad_norm": 6.3192524909973145, "learning_rate": 2.8256369754686377e-07, "loss": 0.4592, "step": 28256 }, { "epoch": 92.64590163934426, "grad_norm": 7.748781681060791, "learning_rate": 2.8231310579964646e-07, "loss": 0.6568, "step": 28257 }, { "epoch": 92.64918032786885, "grad_norm": 6.819587707519531, "learning_rate": 2.820626236294532e-07, "loss": 0.3784, "step": 28258 }, { "epoch": 92.65245901639344, "grad_norm": 3.90299129486084, "learning_rate": 2.818122510391075e-07, "loss": 0.4194, "step": 28259 }, { "epoch": 92.65573770491804, "grad_norm": 4.7503342628479, "learning_rate": 2.8156198803143355e-07, "loss": 0.4431, "step": 28260 }, { "epoch": 92.65901639344263, "grad_norm": 7.352769374847412, "learning_rate": 2.813118346092536e-07, "loss": 0.1926, "step": 28261 }, { "epoch": 92.66229508196722, "grad_norm": 4.501460075378418, "learning_rate": 2.8106179077538543e-07, "loss": 0.5317, "step": 28262 }, { "epoch": 92.6655737704918, "grad_norm": 5.968021392822266, "learning_rate": 2.8081185653265343e-07, "loss": 0.5411, "step": 28263 }, { "epoch": 92.66885245901639, "grad_norm": 5.049043655395508, "learning_rate": 2.8056203188387197e-07, "loss": 0.3147, "step": 28264 }, { "epoch": 92.67213114754098, "grad_norm": 3.880525827407837, "learning_rate": 2.8031231683185775e-07, "loss": 0.4838, "step": 28265 }, { "epoch": 92.67540983606557, "grad_norm": 5.314162254333496, "learning_rate": 2.8006271137942965e-07, "loss": 0.4163, "step": 28266 }, { "epoch": 92.67868852459016, "grad_norm": 8.652555465698242, "learning_rate": 2.7981321552940086e-07, "loss": 0.3722, "step": 28267 }, { "epoch": 92.68196721311476, "grad_norm": 5.163261890411377, "learning_rate": 2.795638292845848e-07, "loss": 0.3574, "step": 28268 }, { "epoch": 92.68524590163935, "grad_norm": 4.244592666625977, "learning_rate": 2.793145526477914e-07, "loss": 0.2778, "step": 28269 }, { "epoch": 92.68852459016394, "grad_norm": 4.554549217224121, "learning_rate": 2.7906538562183506e-07, "loss": 0.3358, "step": 28270 }, { "epoch": 92.69180327868852, "grad_norm": 3.939649820327759, "learning_rate": 2.788163282095235e-07, "loss": 0.3139, "step": 28271 }, { "epoch": 92.69508196721311, "grad_norm": 4.562446117401123, "learning_rate": 2.785673804136657e-07, "loss": 0.3034, "step": 28272 }, { "epoch": 92.6983606557377, "grad_norm": 4.844270706176758, "learning_rate": 2.7831854223706824e-07, "loss": 0.683, "step": 28273 }, { "epoch": 92.70163934426229, "grad_norm": 9.992208480834961, "learning_rate": 2.780698136825366e-07, "loss": 0.5245, "step": 28274 }, { "epoch": 92.70491803278688, "grad_norm": 5.843481063842773, "learning_rate": 2.7782119475287637e-07, "loss": 0.4089, "step": 28275 }, { "epoch": 92.70819672131148, "grad_norm": 9.030988693237305, "learning_rate": 2.775726854508909e-07, "loss": 0.5318, "step": 28276 }, { "epoch": 92.71147540983607, "grad_norm": 5.360816955566406, "learning_rate": 2.773242857793823e-07, "loss": 0.2168, "step": 28277 }, { "epoch": 92.71475409836066, "grad_norm": 9.154976844787598, "learning_rate": 2.770759957411506e-07, "loss": 0.4669, "step": 28278 }, { "epoch": 92.71803278688525, "grad_norm": 5.068160057067871, "learning_rate": 2.768278153389969e-07, "loss": 0.3555, "step": 28279 }, { "epoch": 92.72131147540983, "grad_norm": 4.942432880401611, "learning_rate": 2.765797445757201e-07, "loss": 0.4474, "step": 28280 }, { "epoch": 92.72459016393442, "grad_norm": 5.232767105102539, "learning_rate": 2.763317834541157e-07, "loss": 0.3884, "step": 28281 }, { "epoch": 92.72786885245901, "grad_norm": 3.9257655143737793, "learning_rate": 2.760839319769792e-07, "loss": 0.2574, "step": 28282 }, { "epoch": 92.73114754098361, "grad_norm": 4.9472880363464355, "learning_rate": 2.758361901471085e-07, "loss": 0.5337, "step": 28283 }, { "epoch": 92.7344262295082, "grad_norm": 4.988960266113281, "learning_rate": 2.755885579672946e-07, "loss": 0.4563, "step": 28284 }, { "epoch": 92.73770491803279, "grad_norm": 4.767604351043701, "learning_rate": 2.7534103544033185e-07, "loss": 0.2749, "step": 28285 }, { "epoch": 92.74098360655738, "grad_norm": 5.10078239440918, "learning_rate": 2.750936225690093e-07, "loss": 0.5131, "step": 28286 }, { "epoch": 92.74426229508197, "grad_norm": 4.599053859710693, "learning_rate": 2.748463193561157e-07, "loss": 0.5767, "step": 28287 }, { "epoch": 92.74754098360656, "grad_norm": 4.2169060707092285, "learning_rate": 2.745991258044434e-07, "loss": 0.3714, "step": 28288 }, { "epoch": 92.75081967213114, "grad_norm": 5.626837253570557, "learning_rate": 2.7435204191677776e-07, "loss": 0.2938, "step": 28289 }, { "epoch": 92.75409836065573, "grad_norm": 5.603160381317139, "learning_rate": 2.741050676959045e-07, "loss": 0.2727, "step": 28290 }, { "epoch": 92.75737704918033, "grad_norm": 6.101134777069092, "learning_rate": 2.73858203144608e-07, "loss": 0.2474, "step": 28291 }, { "epoch": 92.76065573770492, "grad_norm": 4.170542240142822, "learning_rate": 2.736114482656749e-07, "loss": 0.2177, "step": 28292 }, { "epoch": 92.76393442622951, "grad_norm": 4.636363506317139, "learning_rate": 2.733648030618852e-07, "loss": 0.2738, "step": 28293 }, { "epoch": 92.7672131147541, "grad_norm": 5.034291744232178, "learning_rate": 2.7311826753602e-07, "loss": 0.2816, "step": 28294 }, { "epoch": 92.77049180327869, "grad_norm": 4.64035701751709, "learning_rate": 2.728718416908582e-07, "loss": 0.2902, "step": 28295 }, { "epoch": 92.77377049180328, "grad_norm": 4.766271591186523, "learning_rate": 2.726255255291821e-07, "loss": 0.3271, "step": 28296 }, { "epoch": 92.77704918032786, "grad_norm": 5.049580097198486, "learning_rate": 2.7237931905376714e-07, "loss": 0.4522, "step": 28297 }, { "epoch": 92.78032786885245, "grad_norm": 5.1282148361206055, "learning_rate": 2.721332222673889e-07, "loss": 0.3143, "step": 28298 }, { "epoch": 92.78360655737706, "grad_norm": 4.125139236450195, "learning_rate": 2.718872351728241e-07, "loss": 0.4913, "step": 28299 }, { "epoch": 92.78688524590164, "grad_norm": 5.524397373199463, "learning_rate": 2.7164135777284383e-07, "loss": 0.3284, "step": 28300 }, { "epoch": 92.79016393442623, "grad_norm": 5.146827697753906, "learning_rate": 2.713955900702225e-07, "loss": 0.4176, "step": 28301 }, { "epoch": 92.79344262295082, "grad_norm": 4.513060092926025, "learning_rate": 2.711499320677324e-07, "loss": 0.277, "step": 28302 }, { "epoch": 92.79672131147541, "grad_norm": 4.674980640411377, "learning_rate": 2.7090438376814135e-07, "loss": 0.3427, "step": 28303 }, { "epoch": 92.8, "grad_norm": 4.031286239624023, "learning_rate": 2.706589451742181e-07, "loss": 0.2819, "step": 28304 }, { "epoch": 92.80327868852459, "grad_norm": 6.546591758728027, "learning_rate": 2.7041361628873276e-07, "loss": 0.3391, "step": 28305 }, { "epoch": 92.80655737704917, "grad_norm": 4.323649883270264, "learning_rate": 2.7016839711444977e-07, "loss": 0.2637, "step": 28306 }, { "epoch": 92.80983606557378, "grad_norm": 3.8322577476501465, "learning_rate": 2.699232876541347e-07, "loss": 0.2004, "step": 28307 }, { "epoch": 92.81311475409836, "grad_norm": 4.636756896972656, "learning_rate": 2.6967828791055083e-07, "loss": 0.4948, "step": 28308 }, { "epoch": 92.81639344262295, "grad_norm": 4.849422931671143, "learning_rate": 2.6943339788646163e-07, "loss": 0.4242, "step": 28309 }, { "epoch": 92.81967213114754, "grad_norm": 6.760359287261963, "learning_rate": 2.691886175846281e-07, "loss": 0.497, "step": 28310 }, { "epoch": 92.82295081967213, "grad_norm": 5.637084007263184, "learning_rate": 2.689439470078092e-07, "loss": 0.3508, "step": 28311 }, { "epoch": 92.82622950819672, "grad_norm": 4.144420146942139, "learning_rate": 2.6869938615876723e-07, "loss": 0.6094, "step": 28312 }, { "epoch": 92.8295081967213, "grad_norm": 4.883848667144775, "learning_rate": 2.6845493504025657e-07, "loss": 0.3394, "step": 28313 }, { "epoch": 92.8327868852459, "grad_norm": 10.49386215209961, "learning_rate": 2.68210593655035e-07, "loss": 0.3739, "step": 28314 }, { "epoch": 92.8360655737705, "grad_norm": 4.728902339935303, "learning_rate": 2.6796636200585593e-07, "loss": 0.1403, "step": 28315 }, { "epoch": 92.83934426229509, "grad_norm": 3.965238571166992, "learning_rate": 2.6772224009547707e-07, "loss": 0.1754, "step": 28316 }, { "epoch": 92.84262295081967, "grad_norm": 4.490287780761719, "learning_rate": 2.674782279266486e-07, "loss": 0.3469, "step": 28317 }, { "epoch": 92.84590163934426, "grad_norm": 4.843017101287842, "learning_rate": 2.6723432550212146e-07, "loss": 0.2882, "step": 28318 }, { "epoch": 92.84918032786885, "grad_norm": 4.288482189178467, "learning_rate": 2.6699053282464693e-07, "loss": 0.2815, "step": 28319 }, { "epoch": 92.85245901639344, "grad_norm": 5.392947673797607, "learning_rate": 2.6674684989697494e-07, "loss": 0.2487, "step": 28320 }, { "epoch": 92.85573770491803, "grad_norm": 6.343206405639648, "learning_rate": 2.6650327672185115e-07, "loss": 0.3359, "step": 28321 }, { "epoch": 92.85901639344263, "grad_norm": 5.05720853805542, "learning_rate": 2.6625981330202443e-07, "loss": 0.3635, "step": 28322 }, { "epoch": 92.86229508196722, "grad_norm": 4.26544189453125, "learning_rate": 2.6601645964023813e-07, "loss": 0.29, "step": 28323 }, { "epoch": 92.8655737704918, "grad_norm": 5.036895275115967, "learning_rate": 2.657732157392379e-07, "loss": 0.3837, "step": 28324 }, { "epoch": 92.8688524590164, "grad_norm": 4.996725082397461, "learning_rate": 2.6553008160176476e-07, "loss": 0.3989, "step": 28325 }, { "epoch": 92.87213114754098, "grad_norm": 4.236594200134277, "learning_rate": 2.65287057230561e-07, "loss": 0.5433, "step": 28326 }, { "epoch": 92.87540983606557, "grad_norm": 5.772059440612793, "learning_rate": 2.650441426283679e-07, "loss": 0.4959, "step": 28327 }, { "epoch": 92.87868852459016, "grad_norm": 5.600069046020508, "learning_rate": 2.6480133779792417e-07, "loss": 0.4774, "step": 28328 }, { "epoch": 92.88196721311475, "grad_norm": 3.9000563621520996, "learning_rate": 2.6455864274196664e-07, "loss": 0.3894, "step": 28329 }, { "epoch": 92.88524590163935, "grad_norm": 4.654942035675049, "learning_rate": 2.6431605746323196e-07, "loss": 0.2195, "step": 28330 }, { "epoch": 92.88852459016394, "grad_norm": 5.4440436363220215, "learning_rate": 2.64073581964458e-07, "loss": 0.4519, "step": 28331 }, { "epoch": 92.89180327868853, "grad_norm": 4.484406471252441, "learning_rate": 2.638312162483769e-07, "loss": 0.3585, "step": 28332 }, { "epoch": 92.89508196721312, "grad_norm": 4.800130367279053, "learning_rate": 2.635889603177222e-07, "loss": 0.3786, "step": 28333 }, { "epoch": 92.8983606557377, "grad_norm": 16.764482498168945, "learning_rate": 2.6334681417522377e-07, "loss": 0.5393, "step": 28334 }, { "epoch": 92.90163934426229, "grad_norm": 6.043920040130615, "learning_rate": 2.631047778236151e-07, "loss": 0.4656, "step": 28335 }, { "epoch": 92.90491803278688, "grad_norm": 4.874851226806641, "learning_rate": 2.6286285126562395e-07, "loss": 0.3541, "step": 28336 }, { "epoch": 92.90819672131147, "grad_norm": 6.071796894073486, "learning_rate": 2.6262103450397703e-07, "loss": 0.4061, "step": 28337 }, { "epoch": 92.91147540983607, "grad_norm": 5.141813278198242, "learning_rate": 2.623793275414033e-07, "loss": 0.3627, "step": 28338 }, { "epoch": 92.91475409836066, "grad_norm": 4.513612747192383, "learning_rate": 2.621377303806261e-07, "loss": 0.3236, "step": 28339 }, { "epoch": 92.91803278688525, "grad_norm": 4.669010639190674, "learning_rate": 2.618962430243721e-07, "loss": 0.3795, "step": 28340 }, { "epoch": 92.92131147540984, "grad_norm": 5.223010540008545, "learning_rate": 2.6165486547536255e-07, "loss": 0.3819, "step": 28341 }, { "epoch": 92.92459016393443, "grad_norm": 5.196685314178467, "learning_rate": 2.614135977363208e-07, "loss": 0.6433, "step": 28342 }, { "epoch": 92.92786885245901, "grad_norm": 5.68183708190918, "learning_rate": 2.6117243980996356e-07, "loss": 0.3351, "step": 28343 }, { "epoch": 92.9311475409836, "grad_norm": 4.321109771728516, "learning_rate": 2.609313916990153e-07, "loss": 0.4555, "step": 28344 }, { "epoch": 92.93442622950819, "grad_norm": 7.705217361450195, "learning_rate": 2.6069045340619157e-07, "loss": 0.2227, "step": 28345 }, { "epoch": 92.9377049180328, "grad_norm": 4.766512393951416, "learning_rate": 2.604496249342081e-07, "loss": 0.4143, "step": 28346 }, { "epoch": 92.94098360655738, "grad_norm": 5.531805038452148, "learning_rate": 2.6020890628578153e-07, "loss": 0.1987, "step": 28347 }, { "epoch": 92.94426229508197, "grad_norm": 6.9041666984558105, "learning_rate": 2.599682974636275e-07, "loss": 0.4104, "step": 28348 }, { "epoch": 92.94754098360656, "grad_norm": 4.796397686004639, "learning_rate": 2.5972779847045826e-07, "loss": 0.4258, "step": 28349 }, { "epoch": 92.95081967213115, "grad_norm": 4.7000651359558105, "learning_rate": 2.59487409308985e-07, "loss": 0.1946, "step": 28350 }, { "epoch": 92.95409836065573, "grad_norm": 5.4056501388549805, "learning_rate": 2.592471299819188e-07, "loss": 0.3923, "step": 28351 }, { "epoch": 92.95737704918032, "grad_norm": 4.300250053405762, "learning_rate": 2.590069604919687e-07, "loss": 0.1557, "step": 28352 }, { "epoch": 92.96065573770491, "grad_norm": 5.725376129150391, "learning_rate": 2.5876690084184366e-07, "loss": 0.2082, "step": 28353 }, { "epoch": 92.96393442622951, "grad_norm": 5.156146049499512, "learning_rate": 2.585269510342503e-07, "loss": 0.3972, "step": 28354 }, { "epoch": 92.9672131147541, "grad_norm": 3.8248894214630127, "learning_rate": 2.582871110718943e-07, "loss": 0.3829, "step": 28355 }, { "epoch": 92.97049180327869, "grad_norm": 5.234494686126709, "learning_rate": 2.5804738095747793e-07, "loss": 0.2748, "step": 28356 }, { "epoch": 92.97377049180328, "grad_norm": 5.600894451141357, "learning_rate": 2.5780776069370794e-07, "loss": 0.3724, "step": 28357 }, { "epoch": 92.97704918032787, "grad_norm": 6.61281681060791, "learning_rate": 2.5756825028328546e-07, "loss": 0.3337, "step": 28358 }, { "epoch": 92.98032786885246, "grad_norm": 4.229042053222656, "learning_rate": 2.573288497289106e-07, "loss": 0.48, "step": 28359 }, { "epoch": 92.98360655737704, "grad_norm": 6.128028392791748, "learning_rate": 2.5708955903328116e-07, "loss": 0.3143, "step": 28360 }, { "epoch": 92.98688524590163, "grad_norm": 5.589650630950928, "learning_rate": 2.568503781990983e-07, "loss": 0.2844, "step": 28361 }, { "epoch": 92.99016393442623, "grad_norm": 6.271886825561523, "learning_rate": 2.566113072290577e-07, "loss": 0.7133, "step": 28362 }, { "epoch": 92.99344262295082, "grad_norm": 4.9987335205078125, "learning_rate": 2.563723461258549e-07, "loss": 0.2264, "step": 28363 }, { "epoch": 92.99672131147541, "grad_norm": 5.383344650268555, "learning_rate": 2.5613349489218454e-07, "loss": 0.5225, "step": 28364 }, { "epoch": 93.0, "grad_norm": 6.52128267288208, "learning_rate": 2.5589475353073987e-07, "loss": 0.3216, "step": 28365 }, { "epoch": 93.00327868852459, "grad_norm": 4.599434852600098, "learning_rate": 2.556561220442144e-07, "loss": 0.2573, "step": 28366 }, { "epoch": 93.00655737704918, "grad_norm": 5.337768077850342, "learning_rate": 2.5541760043529597e-07, "loss": 0.5348, "step": 28367 }, { "epoch": 93.00983606557377, "grad_norm": 6.93931245803833, "learning_rate": 2.551791887066768e-07, "loss": 0.6507, "step": 28368 }, { "epoch": 93.01311475409837, "grad_norm": 5.015347480773926, "learning_rate": 2.549408868610448e-07, "loss": 0.4992, "step": 28369 }, { "epoch": 93.01639344262296, "grad_norm": 4.644044399261475, "learning_rate": 2.5470269490108556e-07, "loss": 0.2657, "step": 28370 }, { "epoch": 93.01967213114754, "grad_norm": 5.875094890594482, "learning_rate": 2.54464612829487e-07, "loss": 0.426, "step": 28371 }, { "epoch": 93.02295081967213, "grad_norm": 5.305070877075195, "learning_rate": 2.5422664064893244e-07, "loss": 0.3463, "step": 28372 }, { "epoch": 93.02622950819672, "grad_norm": 6.62776517868042, "learning_rate": 2.5398877836210534e-07, "loss": 0.3174, "step": 28373 }, { "epoch": 93.02950819672131, "grad_norm": 4.550739288330078, "learning_rate": 2.53751025971688e-07, "loss": 0.4636, "step": 28374 }, { "epoch": 93.0327868852459, "grad_norm": 4.7096686363220215, "learning_rate": 2.535133834803627e-07, "loss": 0.2947, "step": 28375 }, { "epoch": 93.03606557377049, "grad_norm": 4.455891132354736, "learning_rate": 2.5327585089080733e-07, "loss": 0.3515, "step": 28376 }, { "epoch": 93.03934426229509, "grad_norm": 5.015231609344482, "learning_rate": 2.530384282056997e-07, "loss": 0.3797, "step": 28377 }, { "epoch": 93.04262295081968, "grad_norm": 9.42529296875, "learning_rate": 2.5280111542771877e-07, "loss": 0.4893, "step": 28378 }, { "epoch": 93.04590163934427, "grad_norm": 4.535576343536377, "learning_rate": 2.5256391255953915e-07, "loss": 0.194, "step": 28379 }, { "epoch": 93.04918032786885, "grad_norm": 10.252668380737305, "learning_rate": 2.5232681960383754e-07, "loss": 0.2761, "step": 28380 }, { "epoch": 93.05245901639344, "grad_norm": 28.190954208374023, "learning_rate": 2.5208983656328513e-07, "loss": 0.5802, "step": 28381 }, { "epoch": 93.05573770491803, "grad_norm": 4.353715419769287, "learning_rate": 2.518529634405553e-07, "loss": 0.2516, "step": 28382 }, { "epoch": 93.05901639344262, "grad_norm": 11.694914817810059, "learning_rate": 2.5161620023831823e-07, "loss": 0.2645, "step": 28383 }, { "epoch": 93.0622950819672, "grad_norm": 5.891465663909912, "learning_rate": 2.51379546959245e-07, "loss": 0.293, "step": 28384 }, { "epoch": 93.06557377049181, "grad_norm": 4.45142126083374, "learning_rate": 2.5114300360600363e-07, "loss": 0.2327, "step": 28385 }, { "epoch": 93.0688524590164, "grad_norm": 4.673686981201172, "learning_rate": 2.509065701812607e-07, "loss": 0.2976, "step": 28386 }, { "epoch": 93.07213114754099, "grad_norm": 6.84691858291626, "learning_rate": 2.5067024668768313e-07, "loss": 0.582, "step": 28387 }, { "epoch": 93.07540983606557, "grad_norm": 6.401167392730713, "learning_rate": 2.5043403312793535e-07, "loss": 0.2201, "step": 28388 }, { "epoch": 93.07868852459016, "grad_norm": 5.37109375, "learning_rate": 2.501979295046808e-07, "loss": 0.5052, "step": 28389 }, { "epoch": 93.08196721311475, "grad_norm": 3.901045322418213, "learning_rate": 2.4996193582058183e-07, "loss": 0.4095, "step": 28390 }, { "epoch": 93.08524590163934, "grad_norm": 8.42434310913086, "learning_rate": 2.497260520782985e-07, "loss": 0.2416, "step": 28391 }, { "epoch": 93.08852459016393, "grad_norm": 3.7085299491882324, "learning_rate": 2.49490278280492e-07, "loss": 0.4043, "step": 28392 }, { "epoch": 93.09180327868853, "grad_norm": 6.657487392425537, "learning_rate": 2.4925461442982136e-07, "loss": 0.5362, "step": 28393 }, { "epoch": 93.09508196721312, "grad_norm": 3.605085611343384, "learning_rate": 2.490190605289433e-07, "loss": 0.1819, "step": 28394 }, { "epoch": 93.09836065573771, "grad_norm": 14.839190483093262, "learning_rate": 2.487836165805124e-07, "loss": 0.3208, "step": 28395 }, { "epoch": 93.1016393442623, "grad_norm": 6.663009166717529, "learning_rate": 2.4854828258718653e-07, "loss": 0.3869, "step": 28396 }, { "epoch": 93.10491803278688, "grad_norm": 4.187178134918213, "learning_rate": 2.483130585516169e-07, "loss": 0.5278, "step": 28397 }, { "epoch": 93.10819672131147, "grad_norm": 4.809381008148193, "learning_rate": 2.480779444764569e-07, "loss": 0.3447, "step": 28398 }, { "epoch": 93.11147540983606, "grad_norm": 4.619956016540527, "learning_rate": 2.4784294036435673e-07, "loss": 0.4421, "step": 28399 }, { "epoch": 93.11475409836065, "grad_norm": 6.33757209777832, "learning_rate": 2.476080462179686e-07, "loss": 0.355, "step": 28400 }, { "epoch": 93.11803278688525, "grad_norm": 7.858034133911133, "learning_rate": 2.473732620399394e-07, "loss": 0.4232, "step": 28401 }, { "epoch": 93.12131147540984, "grad_norm": 4.93094539642334, "learning_rate": 2.4713858783291686e-07, "loss": 0.4469, "step": 28402 }, { "epoch": 93.12459016393443, "grad_norm": 4.24246072769165, "learning_rate": 2.469040235995468e-07, "loss": 0.3364, "step": 28403 }, { "epoch": 93.12786885245902, "grad_norm": 4.36464262008667, "learning_rate": 2.466695693424737e-07, "loss": 0.372, "step": 28404 }, { "epoch": 93.1311475409836, "grad_norm": 5.130531311035156, "learning_rate": 2.4643522506434313e-07, "loss": 0.4963, "step": 28405 }, { "epoch": 93.1344262295082, "grad_norm": 4.547822952270508, "learning_rate": 2.462009907677976e-07, "loss": 0.4301, "step": 28406 }, { "epoch": 93.13770491803278, "grad_norm": 4.096851348876953, "learning_rate": 2.4596686645547596e-07, "loss": 0.3616, "step": 28407 }, { "epoch": 93.14098360655737, "grad_norm": 4.945239067077637, "learning_rate": 2.457328521300195e-07, "loss": 0.4001, "step": 28408 }, { "epoch": 93.14426229508197, "grad_norm": 5.177490711212158, "learning_rate": 2.4549894779406725e-07, "loss": 0.6003, "step": 28409 }, { "epoch": 93.14754098360656, "grad_norm": 4.592669486999512, "learning_rate": 2.4526515345025706e-07, "loss": 0.2718, "step": 28410 }, { "epoch": 93.15081967213115, "grad_norm": 6.54791784286499, "learning_rate": 2.4503146910122345e-07, "loss": 0.3964, "step": 28411 }, { "epoch": 93.15409836065574, "grad_norm": 4.206577777862549, "learning_rate": 2.4479789474960325e-07, "loss": 0.4261, "step": 28412 }, { "epoch": 93.15737704918033, "grad_norm": 5.2748870849609375, "learning_rate": 2.4456443039802993e-07, "loss": 0.3197, "step": 28413 }, { "epoch": 93.16065573770491, "grad_norm": 3.8078196048736572, "learning_rate": 2.4433107604913575e-07, "loss": 0.1652, "step": 28414 }, { "epoch": 93.1639344262295, "grad_norm": 3.83294677734375, "learning_rate": 2.44097831705552e-07, "loss": 0.1578, "step": 28415 }, { "epoch": 93.1672131147541, "grad_norm": 4.382152557373047, "learning_rate": 2.438646973699088e-07, "loss": 0.4769, "step": 28416 }, { "epoch": 93.1704918032787, "grad_norm": 4.906617164611816, "learning_rate": 2.4363167304483404e-07, "loss": 0.2559, "step": 28417 }, { "epoch": 93.17377049180328, "grad_norm": 6.064120769500732, "learning_rate": 2.433987587329567e-07, "loss": 0.3919, "step": 28418 }, { "epoch": 93.17704918032787, "grad_norm": 5.0575127601623535, "learning_rate": 2.4316595443690363e-07, "loss": 0.3017, "step": 28419 }, { "epoch": 93.18032786885246, "grad_norm": 4.8201494216918945, "learning_rate": 2.429332601592982e-07, "loss": 0.2881, "step": 28420 }, { "epoch": 93.18360655737705, "grad_norm": 5.318535804748535, "learning_rate": 2.4270067590276505e-07, "loss": 0.2397, "step": 28421 }, { "epoch": 93.18688524590164, "grad_norm": 4.98947811126709, "learning_rate": 2.424682016699276e-07, "loss": 0.3591, "step": 28422 }, { "epoch": 93.19016393442622, "grad_norm": 4.313427925109863, "learning_rate": 2.4223583746340486e-07, "loss": 0.2815, "step": 28423 }, { "epoch": 93.19344262295083, "grad_norm": 4.280611515045166, "learning_rate": 2.420035832858192e-07, "loss": 0.3769, "step": 28424 }, { "epoch": 93.19672131147541, "grad_norm": 4.259096145629883, "learning_rate": 2.417714391397896e-07, "loss": 0.2625, "step": 28425 }, { "epoch": 93.2, "grad_norm": 9.677286148071289, "learning_rate": 2.4153940502793185e-07, "loss": 0.3457, "step": 28426 }, { "epoch": 93.20327868852459, "grad_norm": 4.797944068908691, "learning_rate": 2.4130748095286484e-07, "loss": 0.3663, "step": 28427 }, { "epoch": 93.20655737704918, "grad_norm": 5.54404878616333, "learning_rate": 2.410756669172032e-07, "loss": 0.4122, "step": 28428 }, { "epoch": 93.20983606557377, "grad_norm": 4.427178859710693, "learning_rate": 2.4084396292355814e-07, "loss": 0.5126, "step": 28429 }, { "epoch": 93.21311475409836, "grad_norm": 6.135742664337158, "learning_rate": 2.4061236897454544e-07, "loss": 0.4682, "step": 28430 }, { "epoch": 93.21639344262294, "grad_norm": 4.617827415466309, "learning_rate": 2.4038088507277513e-07, "loss": 0.3644, "step": 28431 }, { "epoch": 93.21967213114755, "grad_norm": 7.3567376136779785, "learning_rate": 2.401495112208585e-07, "loss": 0.2022, "step": 28432 }, { "epoch": 93.22295081967214, "grad_norm": 5.398331165313721, "learning_rate": 2.399182474214035e-07, "loss": 0.4894, "step": 28433 }, { "epoch": 93.22622950819672, "grad_norm": 5.224884986877441, "learning_rate": 2.396870936770168e-07, "loss": 0.339, "step": 28434 }, { "epoch": 93.22950819672131, "grad_norm": 4.3799943923950195, "learning_rate": 2.394560499903087e-07, "loss": 0.4178, "step": 28435 }, { "epoch": 93.2327868852459, "grad_norm": 4.257490634918213, "learning_rate": 2.3922511636388035e-07, "loss": 0.357, "step": 28436 }, { "epoch": 93.23606557377049, "grad_norm": 3.7891077995300293, "learning_rate": 2.3899429280033856e-07, "loss": 0.3339, "step": 28437 }, { "epoch": 93.23934426229508, "grad_norm": 19.577245712280273, "learning_rate": 2.387635793022836e-07, "loss": 0.3851, "step": 28438 }, { "epoch": 93.24262295081967, "grad_norm": 4.030236721038818, "learning_rate": 2.3853297587231984e-07, "loss": 0.3003, "step": 28439 }, { "epoch": 93.24590163934427, "grad_norm": 6.302979946136475, "learning_rate": 2.3830248251304533e-07, "loss": 0.1983, "step": 28440 }, { "epoch": 93.24918032786886, "grad_norm": 4.720974445343018, "learning_rate": 2.3807209922706132e-07, "loss": 0.5428, "step": 28441 }, { "epoch": 93.25245901639344, "grad_norm": 4.194449424743652, "learning_rate": 2.3784182601696236e-07, "loss": 0.3626, "step": 28442 }, { "epoch": 93.25573770491803, "grad_norm": 20.954965591430664, "learning_rate": 2.3761166288534754e-07, "loss": 0.3414, "step": 28443 }, { "epoch": 93.25901639344262, "grad_norm": 5.382585525512695, "learning_rate": 2.373816098348114e-07, "loss": 0.4893, "step": 28444 }, { "epoch": 93.26229508196721, "grad_norm": 4.916491508483887, "learning_rate": 2.3715166686794967e-07, "loss": 0.5086, "step": 28445 }, { "epoch": 93.2655737704918, "grad_norm": 4.229086875915527, "learning_rate": 2.3692183398735246e-07, "loss": 0.4548, "step": 28446 }, { "epoch": 93.26885245901639, "grad_norm": 11.034568786621094, "learning_rate": 2.366921111956122e-07, "loss": 0.5137, "step": 28447 }, { "epoch": 93.27213114754099, "grad_norm": 3.642425060272217, "learning_rate": 2.3646249849532012e-07, "loss": 0.1252, "step": 28448 }, { "epoch": 93.27540983606558, "grad_norm": 4.450253486633301, "learning_rate": 2.3623299588906524e-07, "loss": 0.4075, "step": 28449 }, { "epoch": 93.27868852459017, "grad_norm": 5.443122863769531, "learning_rate": 2.3600360337943552e-07, "loss": 0.2991, "step": 28450 }, { "epoch": 93.28196721311475, "grad_norm": 4.938365459442139, "learning_rate": 2.3577432096901554e-07, "loss": 0.4613, "step": 28451 }, { "epoch": 93.28524590163934, "grad_norm": 3.8876781463623047, "learning_rate": 2.3554514866039325e-07, "loss": 0.2554, "step": 28452 }, { "epoch": 93.28852459016393, "grad_norm": 5.528035640716553, "learning_rate": 2.353160864561521e-07, "loss": 0.2583, "step": 28453 }, { "epoch": 93.29180327868852, "grad_norm": 4.386513710021973, "learning_rate": 2.3508713435887563e-07, "loss": 0.2856, "step": 28454 }, { "epoch": 93.29508196721312, "grad_norm": 24.306346893310547, "learning_rate": 2.34858292371144e-07, "loss": 0.1932, "step": 28455 }, { "epoch": 93.29836065573771, "grad_norm": 4.048255920410156, "learning_rate": 2.346295604955373e-07, "loss": 0.3242, "step": 28456 }, { "epoch": 93.3016393442623, "grad_norm": 4.784690856933594, "learning_rate": 2.3440093873463689e-07, "loss": 0.2715, "step": 28457 }, { "epoch": 93.30491803278689, "grad_norm": 3.4612464904785156, "learning_rate": 2.3417242709101951e-07, "loss": 0.3353, "step": 28458 }, { "epoch": 93.30819672131148, "grad_norm": 5.186161518096924, "learning_rate": 2.3394402556726093e-07, "loss": 0.2805, "step": 28459 }, { "epoch": 93.31147540983606, "grad_norm": 11.117923736572266, "learning_rate": 2.3371573416593795e-07, "loss": 0.2771, "step": 28460 }, { "epoch": 93.31475409836065, "grad_norm": 5.3617963790893555, "learning_rate": 2.334875528896252e-07, "loss": 0.4324, "step": 28461 }, { "epoch": 93.31803278688524, "grad_norm": 5.107254981994629, "learning_rate": 2.3325948174089507e-07, "loss": 0.3702, "step": 28462 }, { "epoch": 93.32131147540984, "grad_norm": 5.447878837585449, "learning_rate": 2.3303152072231883e-07, "loss": 0.2165, "step": 28463 }, { "epoch": 93.32459016393443, "grad_norm": 5.590457439422607, "learning_rate": 2.328036698364655e-07, "loss": 0.2976, "step": 28464 }, { "epoch": 93.32786885245902, "grad_norm": 7.5328874588012695, "learning_rate": 2.3257592908590863e-07, "loss": 0.37, "step": 28465 }, { "epoch": 93.33114754098361, "grad_norm": 5.897007942199707, "learning_rate": 2.3234829847321283e-07, "loss": 0.2457, "step": 28466 }, { "epoch": 93.3344262295082, "grad_norm": 4.704381942749023, "learning_rate": 2.32120778000946e-07, "loss": 0.4519, "step": 28467 }, { "epoch": 93.33770491803278, "grad_norm": 5.145920276641846, "learning_rate": 2.318933676716728e-07, "loss": 0.3918, "step": 28468 }, { "epoch": 93.34098360655737, "grad_norm": 4.815433979034424, "learning_rate": 2.3166606748795782e-07, "loss": 0.3156, "step": 28469 }, { "epoch": 93.34426229508196, "grad_norm": 4.692389011383057, "learning_rate": 2.3143887745236572e-07, "loss": 0.3248, "step": 28470 }, { "epoch": 93.34754098360656, "grad_norm": 4.701801776885986, "learning_rate": 2.312117975674566e-07, "loss": 0.6497, "step": 28471 }, { "epoch": 93.35081967213115, "grad_norm": 5.284509181976318, "learning_rate": 2.3098482783579068e-07, "loss": 0.1981, "step": 28472 }, { "epoch": 93.35409836065574, "grad_norm": 4.26017427444458, "learning_rate": 2.3075796825992924e-07, "loss": 0.2436, "step": 28473 }, { "epoch": 93.35737704918033, "grad_norm": 5.554187297821045, "learning_rate": 2.3053121884242912e-07, "loss": 0.3066, "step": 28474 }, { "epoch": 93.36065573770492, "grad_norm": 4.954455375671387, "learning_rate": 2.3030457958584605e-07, "loss": 0.5546, "step": 28475 }, { "epoch": 93.3639344262295, "grad_norm": 10.302910804748535, "learning_rate": 2.300780504927369e-07, "loss": 0.4073, "step": 28476 }, { "epoch": 93.3672131147541, "grad_norm": 5.30122709274292, "learning_rate": 2.2985163156565736e-07, "loss": 0.4028, "step": 28477 }, { "epoch": 93.37049180327868, "grad_norm": 4.851436614990234, "learning_rate": 2.2962532280715765e-07, "loss": 0.4495, "step": 28478 }, { "epoch": 93.37377049180328, "grad_norm": 3.462228298187256, "learning_rate": 2.2939912421979126e-07, "loss": 0.1958, "step": 28479 }, { "epoch": 93.37704918032787, "grad_norm": 5.333459377288818, "learning_rate": 2.291730358061095e-07, "loss": 0.5381, "step": 28480 }, { "epoch": 93.38032786885246, "grad_norm": 4.006365776062012, "learning_rate": 2.2894705756866032e-07, "loss": 0.366, "step": 28481 }, { "epoch": 93.38360655737705, "grad_norm": 5.506216526031494, "learning_rate": 2.2872118950999168e-07, "loss": 0.4862, "step": 28482 }, { "epoch": 93.38688524590164, "grad_norm": 4.918178558349609, "learning_rate": 2.2849543163265265e-07, "loss": 0.3261, "step": 28483 }, { "epoch": 93.39016393442623, "grad_norm": 4.086008071899414, "learning_rate": 2.2826978393918674e-07, "loss": 0.2265, "step": 28484 }, { "epoch": 93.39344262295081, "grad_norm": 5.03704833984375, "learning_rate": 2.2804424643213974e-07, "loss": 0.51, "step": 28485 }, { "epoch": 93.3967213114754, "grad_norm": 5.3603363037109375, "learning_rate": 2.278188191140529e-07, "loss": 0.1877, "step": 28486 }, { "epoch": 93.4, "grad_norm": 4.8974409103393555, "learning_rate": 2.2759350198746978e-07, "loss": 0.4619, "step": 28487 }, { "epoch": 93.4032786885246, "grad_norm": 7.669370651245117, "learning_rate": 2.2736829505493163e-07, "loss": 0.2591, "step": 28488 }, { "epoch": 93.40655737704918, "grad_norm": 6.048704624176025, "learning_rate": 2.2714319831897648e-07, "loss": 0.4353, "step": 28489 }, { "epoch": 93.40983606557377, "grad_norm": 4.415773391723633, "learning_rate": 2.2691821178214114e-07, "loss": 0.2993, "step": 28490 }, { "epoch": 93.41311475409836, "grad_norm": 3.6809756755828857, "learning_rate": 2.2669333544696693e-07, "loss": 0.4308, "step": 28491 }, { "epoch": 93.41639344262295, "grad_norm": 4.033705711364746, "learning_rate": 2.2646856931598626e-07, "loss": 0.3148, "step": 28492 }, { "epoch": 93.41967213114754, "grad_norm": 3.3898186683654785, "learning_rate": 2.2624391339173379e-07, "loss": 0.3003, "step": 28493 }, { "epoch": 93.42295081967212, "grad_norm": 4.470463752746582, "learning_rate": 2.2601936767674416e-07, "loss": 0.6545, "step": 28494 }, { "epoch": 93.42622950819673, "grad_norm": 4.020241737365723, "learning_rate": 2.2579493217354753e-07, "loss": 0.3336, "step": 28495 }, { "epoch": 93.42950819672132, "grad_norm": 4.626779556274414, "learning_rate": 2.2557060688467748e-07, "loss": 0.2243, "step": 28496 }, { "epoch": 93.4327868852459, "grad_norm": 4.391971588134766, "learning_rate": 2.2534639181265972e-07, "loss": 0.2789, "step": 28497 }, { "epoch": 93.43606557377049, "grad_norm": 4.542663097381592, "learning_rate": 2.2512228696002558e-07, "loss": 0.4362, "step": 28498 }, { "epoch": 93.43934426229508, "grad_norm": 9.02297306060791, "learning_rate": 2.2489829232930082e-07, "loss": 0.3834, "step": 28499 }, { "epoch": 93.44262295081967, "grad_norm": 4.448361396789551, "learning_rate": 2.246744079230112e-07, "loss": 0.4461, "step": 28500 }, { "epoch": 93.44590163934426, "grad_norm": 16.502857208251953, "learning_rate": 2.2445063374368137e-07, "loss": 0.4553, "step": 28501 }, { "epoch": 93.44918032786886, "grad_norm": 4.40971565246582, "learning_rate": 2.2422696979383595e-07, "loss": 0.3062, "step": 28502 }, { "epoch": 93.45245901639345, "grad_norm": 6.060490608215332, "learning_rate": 2.2400341607599296e-07, "loss": 0.3612, "step": 28503 }, { "epoch": 93.45573770491804, "grad_norm": 5.833188056945801, "learning_rate": 2.2377997259267815e-07, "loss": 0.3855, "step": 28504 }, { "epoch": 93.45901639344262, "grad_norm": 4.459880352020264, "learning_rate": 2.2355663934640837e-07, "loss": 0.2654, "step": 28505 }, { "epoch": 93.46229508196721, "grad_norm": 4.287381172180176, "learning_rate": 2.2333341633970273e-07, "loss": 0.3855, "step": 28506 }, { "epoch": 93.4655737704918, "grad_norm": 5.01399040222168, "learning_rate": 2.2311030357507812e-07, "loss": 0.3857, "step": 28507 }, { "epoch": 93.46885245901639, "grad_norm": 6.478662014007568, "learning_rate": 2.2288730105504918e-07, "loss": 0.3186, "step": 28508 }, { "epoch": 93.47213114754098, "grad_norm": 4.336791515350342, "learning_rate": 2.2266440878213168e-07, "loss": 0.3781, "step": 28509 }, { "epoch": 93.47540983606558, "grad_norm": 5.292326927185059, "learning_rate": 2.2244162675883918e-07, "loss": 0.4366, "step": 28510 }, { "epoch": 93.47868852459017, "grad_norm": 4.459080696105957, "learning_rate": 2.222189549876841e-07, "loss": 0.3852, "step": 28511 }, { "epoch": 93.48196721311476, "grad_norm": 4.775395393371582, "learning_rate": 2.2199639347117552e-07, "loss": 0.4349, "step": 28512 }, { "epoch": 93.48524590163935, "grad_norm": 4.9628400802612305, "learning_rate": 2.217739422118248e-07, "loss": 0.381, "step": 28513 }, { "epoch": 93.48852459016393, "grad_norm": 6.994680404663086, "learning_rate": 2.215516012121399e-07, "loss": 0.4754, "step": 28514 }, { "epoch": 93.49180327868852, "grad_norm": 7.089259624481201, "learning_rate": 2.2132937047462777e-07, "loss": 0.4012, "step": 28515 }, { "epoch": 93.49508196721311, "grad_norm": 3.42797589302063, "learning_rate": 2.21107250001793e-07, "loss": 0.1892, "step": 28516 }, { "epoch": 93.4983606557377, "grad_norm": 3.5568575859069824, "learning_rate": 2.2088523979614363e-07, "loss": 0.2584, "step": 28517 }, { "epoch": 93.5016393442623, "grad_norm": 4.33481502532959, "learning_rate": 2.2066333986017986e-07, "loss": 0.2904, "step": 28518 }, { "epoch": 93.50491803278689, "grad_norm": 4.547228813171387, "learning_rate": 2.2044155019640412e-07, "loss": 0.659, "step": 28519 }, { "epoch": 93.50819672131148, "grad_norm": 4.226204872131348, "learning_rate": 2.2021987080732e-07, "loss": 0.2328, "step": 28520 }, { "epoch": 93.51147540983607, "grad_norm": 4.986776351928711, "learning_rate": 2.1999830169542325e-07, "loss": 0.4905, "step": 28521 }, { "epoch": 93.51475409836065, "grad_norm": 5.661397457122803, "learning_rate": 2.197768428632152e-07, "loss": 0.4291, "step": 28522 }, { "epoch": 93.51803278688524, "grad_norm": 5.401371002197266, "learning_rate": 2.1955549431319168e-07, "loss": 0.4514, "step": 28523 }, { "epoch": 93.52131147540983, "grad_norm": 6.020586967468262, "learning_rate": 2.1933425604784953e-07, "loss": 0.4172, "step": 28524 }, { "epoch": 93.52459016393442, "grad_norm": 4.0923967361450195, "learning_rate": 2.1911312806968233e-07, "loss": 0.3114, "step": 28525 }, { "epoch": 93.52786885245902, "grad_norm": 4.2842841148376465, "learning_rate": 2.1889211038118473e-07, "loss": 0.3178, "step": 28526 }, { "epoch": 93.53114754098361, "grad_norm": 5.1258463859558105, "learning_rate": 2.1867120298484924e-07, "loss": 0.3206, "step": 28527 }, { "epoch": 93.5344262295082, "grad_norm": 5.232400894165039, "learning_rate": 2.184504058831638e-07, "loss": 0.3621, "step": 28528 }, { "epoch": 93.53770491803279, "grad_norm": 4.478094100952148, "learning_rate": 2.18229719078622e-07, "loss": 0.1967, "step": 28529 }, { "epoch": 93.54098360655738, "grad_norm": 4.01583194732666, "learning_rate": 2.1800914257371076e-07, "loss": 0.2407, "step": 28530 }, { "epoch": 93.54426229508196, "grad_norm": 4.419412612915039, "learning_rate": 2.1778867637091584e-07, "loss": 0.302, "step": 28531 }, { "epoch": 93.54754098360655, "grad_norm": 4.137817859649658, "learning_rate": 2.1756832047272525e-07, "loss": 0.2556, "step": 28532 }, { "epoch": 93.55081967213114, "grad_norm": 5.194540023803711, "learning_rate": 2.1734807488162368e-07, "loss": 0.2725, "step": 28533 }, { "epoch": 93.55409836065574, "grad_norm": 4.688809394836426, "learning_rate": 2.1712793960009248e-07, "loss": 0.2749, "step": 28534 }, { "epoch": 93.55737704918033, "grad_norm": 11.85545825958252, "learning_rate": 2.1690791463061633e-07, "loss": 0.3807, "step": 28535 }, { "epoch": 93.56065573770492, "grad_norm": 5.024948596954346, "learning_rate": 2.1668799997567548e-07, "loss": 0.2782, "step": 28536 }, { "epoch": 93.56393442622951, "grad_norm": 4.200828552246094, "learning_rate": 2.1646819563774902e-07, "loss": 0.2099, "step": 28537 }, { "epoch": 93.5672131147541, "grad_norm": 7.141528129577637, "learning_rate": 2.162485016193161e-07, "loss": 0.2499, "step": 28538 }, { "epoch": 93.57049180327868, "grad_norm": 5.764453411102295, "learning_rate": 2.1602891792285364e-07, "loss": 0.4042, "step": 28539 }, { "epoch": 93.57377049180327, "grad_norm": 6.128664016723633, "learning_rate": 2.1580944455083852e-07, "loss": 0.4372, "step": 28540 }, { "epoch": 93.57704918032788, "grad_norm": 10.38514232635498, "learning_rate": 2.1559008150574544e-07, "loss": 0.3924, "step": 28541 }, { "epoch": 93.58032786885246, "grad_norm": 5.252259731292725, "learning_rate": 2.1537082879004578e-07, "loss": 0.4188, "step": 28542 }, { "epoch": 93.58360655737705, "grad_norm": 4.597261428833008, "learning_rate": 2.151516864062142e-07, "loss": 0.3498, "step": 28543 }, { "epoch": 93.58688524590164, "grad_norm": 7.376072883605957, "learning_rate": 2.1493265435672205e-07, "loss": 0.3565, "step": 28544 }, { "epoch": 93.59016393442623, "grad_norm": 3.761584758758545, "learning_rate": 2.1471373264403738e-07, "loss": 0.4959, "step": 28545 }, { "epoch": 93.59344262295082, "grad_norm": 6.743519306182861, "learning_rate": 2.1449492127062932e-07, "loss": 0.3341, "step": 28546 }, { "epoch": 93.5967213114754, "grad_norm": 3.795389413833618, "learning_rate": 2.1427622023896587e-07, "loss": 0.3894, "step": 28547 }, { "epoch": 93.6, "grad_norm": 4.645771503448486, "learning_rate": 2.1405762955151178e-07, "loss": 0.3395, "step": 28548 }, { "epoch": 93.6032786885246, "grad_norm": 5.58024787902832, "learning_rate": 2.138391492107339e-07, "loss": 0.3395, "step": 28549 }, { "epoch": 93.60655737704919, "grad_norm": 4.324082374572754, "learning_rate": 2.1362077921909364e-07, "loss": 0.1428, "step": 28550 }, { "epoch": 93.60983606557377, "grad_norm": 4.489170074462891, "learning_rate": 2.1340251957905456e-07, "loss": 0.2778, "step": 28551 }, { "epoch": 93.61311475409836, "grad_norm": 4.793975353240967, "learning_rate": 2.1318437029307804e-07, "loss": 0.2823, "step": 28552 }, { "epoch": 93.61639344262295, "grad_norm": 5.105778217315674, "learning_rate": 2.129663313636232e-07, "loss": 0.3466, "step": 28553 }, { "epoch": 93.61967213114754, "grad_norm": 5.609583854675293, "learning_rate": 2.1274840279314923e-07, "loss": 0.4554, "step": 28554 }, { "epoch": 93.62295081967213, "grad_norm": 5.512486934661865, "learning_rate": 2.1253058458411303e-07, "loss": 0.366, "step": 28555 }, { "epoch": 93.62622950819672, "grad_norm": 3.794027328491211, "learning_rate": 2.123128767389704e-07, "loss": 0.4147, "step": 28556 }, { "epoch": 93.62950819672132, "grad_norm": 5.402811050415039, "learning_rate": 2.1209527926017716e-07, "loss": 0.3338, "step": 28557 }, { "epoch": 93.6327868852459, "grad_norm": 4.913064956665039, "learning_rate": 2.1187779215018688e-07, "loss": 0.518, "step": 28558 }, { "epoch": 93.6360655737705, "grad_norm": 5.859386920928955, "learning_rate": 2.116604154114521e-07, "loss": 0.4635, "step": 28559 }, { "epoch": 93.63934426229508, "grad_norm": 3.2505042552948, "learning_rate": 2.1144314904642194e-07, "loss": 0.3578, "step": 28560 }, { "epoch": 93.64262295081967, "grad_norm": 3.655268669128418, "learning_rate": 2.1122599305754775e-07, "loss": 0.3797, "step": 28561 }, { "epoch": 93.64590163934426, "grad_norm": 4.126946926116943, "learning_rate": 2.1100894744727985e-07, "loss": 0.3189, "step": 28562 }, { "epoch": 93.64918032786885, "grad_norm": 6.490904331207275, "learning_rate": 2.107920122180629e-07, "loss": 0.5003, "step": 28563 }, { "epoch": 93.65245901639344, "grad_norm": 5.592656135559082, "learning_rate": 2.1057518737234383e-07, "loss": 0.2331, "step": 28564 }, { "epoch": 93.65573770491804, "grad_norm": 6.083949089050293, "learning_rate": 2.103584729125696e-07, "loss": 0.2887, "step": 28565 }, { "epoch": 93.65901639344263, "grad_norm": 4.069683074951172, "learning_rate": 2.101418688411816e-07, "loss": 0.4287, "step": 28566 }, { "epoch": 93.66229508196722, "grad_norm": 5.165815830230713, "learning_rate": 2.0992537516062228e-07, "loss": 0.4026, "step": 28567 }, { "epoch": 93.6655737704918, "grad_norm": 4.887598037719727, "learning_rate": 2.0970899187333304e-07, "loss": 0.347, "step": 28568 }, { "epoch": 93.66885245901639, "grad_norm": 4.599757194519043, "learning_rate": 2.0949271898175528e-07, "loss": 0.4854, "step": 28569 }, { "epoch": 93.67213114754098, "grad_norm": 6.385752201080322, "learning_rate": 2.0927655648832702e-07, "loss": 0.5293, "step": 28570 }, { "epoch": 93.67540983606557, "grad_norm": 4.020106315612793, "learning_rate": 2.0906050439548518e-07, "loss": 0.3537, "step": 28571 }, { "epoch": 93.67868852459016, "grad_norm": 5.5397233963012695, "learning_rate": 2.0884456270566676e-07, "loss": 0.2276, "step": 28572 }, { "epoch": 93.68196721311476, "grad_norm": 5.0558695793151855, "learning_rate": 2.0862873142130425e-07, "loss": 0.3731, "step": 28573 }, { "epoch": 93.68524590163935, "grad_norm": 7.699994087219238, "learning_rate": 2.0841301054483453e-07, "loss": 0.4108, "step": 28574 }, { "epoch": 93.68852459016394, "grad_norm": 5.250174522399902, "learning_rate": 2.0819740007868906e-07, "loss": 0.3708, "step": 28575 }, { "epoch": 93.69180327868852, "grad_norm": 4.304084777832031, "learning_rate": 2.0798190002529807e-07, "loss": 0.2785, "step": 28576 }, { "epoch": 93.69508196721311, "grad_norm": 8.212600708007812, "learning_rate": 2.0776651038709184e-07, "loss": 0.3259, "step": 28577 }, { "epoch": 93.6983606557377, "grad_norm": 3.7676808834075928, "learning_rate": 2.0755123116650068e-07, "loss": 0.3498, "step": 28578 }, { "epoch": 93.70163934426229, "grad_norm": 5.868563175201416, "learning_rate": 2.0733606236595038e-07, "loss": 0.3121, "step": 28579 }, { "epoch": 93.70491803278688, "grad_norm": 6.642638206481934, "learning_rate": 2.0712100398786795e-07, "loss": 0.3247, "step": 28580 }, { "epoch": 93.70819672131148, "grad_norm": 4.786386013031006, "learning_rate": 2.0690605603467806e-07, "loss": 0.3385, "step": 28581 }, { "epoch": 93.71147540983607, "grad_norm": 5.576988697052002, "learning_rate": 2.0669121850880547e-07, "loss": 0.339, "step": 28582 }, { "epoch": 93.71475409836066, "grad_norm": 4.9300079345703125, "learning_rate": 2.0647649141267158e-07, "loss": 0.3496, "step": 28583 }, { "epoch": 93.71803278688525, "grad_norm": 4.3492512702941895, "learning_rate": 2.0626187474869662e-07, "loss": 0.5807, "step": 28584 }, { "epoch": 93.72131147540983, "grad_norm": 4.54428243637085, "learning_rate": 2.0604736851930317e-07, "loss": 0.5065, "step": 28585 }, { "epoch": 93.72459016393442, "grad_norm": 6.709577560424805, "learning_rate": 2.0583297272690927e-07, "loss": 0.5692, "step": 28586 }, { "epoch": 93.72786885245901, "grad_norm": 4.854733943939209, "learning_rate": 2.0561868737393075e-07, "loss": 0.2244, "step": 28587 }, { "epoch": 93.73114754098361, "grad_norm": 4.394631385803223, "learning_rate": 2.054045124627857e-07, "loss": 0.1597, "step": 28588 }, { "epoch": 93.7344262295082, "grad_norm": 7.304352283477783, "learning_rate": 2.0519044799588883e-07, "loss": 0.3865, "step": 28589 }, { "epoch": 93.73770491803279, "grad_norm": 5.547492504119873, "learning_rate": 2.0497649397565266e-07, "loss": 0.2079, "step": 28590 }, { "epoch": 93.74098360655738, "grad_norm": 4.283088684082031, "learning_rate": 2.0476265040449195e-07, "loss": 0.2048, "step": 28591 }, { "epoch": 93.74426229508197, "grad_norm": 5.432605266571045, "learning_rate": 2.0454891728481695e-07, "loss": 0.262, "step": 28592 }, { "epoch": 93.74754098360656, "grad_norm": 6.300498008728027, "learning_rate": 2.043352946190369e-07, "loss": 0.3125, "step": 28593 }, { "epoch": 93.75081967213114, "grad_norm": 4.281392574310303, "learning_rate": 2.0412178240956204e-07, "loss": 0.3881, "step": 28594 }, { "epoch": 93.75409836065573, "grad_norm": 4.568778991699219, "learning_rate": 2.0390838065879825e-07, "loss": 0.3391, "step": 28595 }, { "epoch": 93.75737704918033, "grad_norm": 4.341802597045898, "learning_rate": 2.036950893691536e-07, "loss": 0.5341, "step": 28596 }, { "epoch": 93.76065573770492, "grad_norm": 6.994102954864502, "learning_rate": 2.0348190854303285e-07, "loss": 0.5123, "step": 28597 }, { "epoch": 93.76393442622951, "grad_norm": 3.6893537044525146, "learning_rate": 2.0326883818283848e-07, "loss": 0.2506, "step": 28598 }, { "epoch": 93.7672131147541, "grad_norm": 4.564022064208984, "learning_rate": 2.0305587829097418e-07, "loss": 0.4848, "step": 28599 }, { "epoch": 93.77049180327869, "grad_norm": 5.016617774963379, "learning_rate": 2.028430288698413e-07, "loss": 0.2463, "step": 28600 }, { "epoch": 93.77377049180328, "grad_norm": 4.966761112213135, "learning_rate": 2.026302899218402e-07, "loss": 0.4067, "step": 28601 }, { "epoch": 93.77704918032786, "grad_norm": 7.938979625701904, "learning_rate": 2.0241766144936892e-07, "loss": 0.319, "step": 28602 }, { "epoch": 93.78032786885245, "grad_norm": 4.805622577667236, "learning_rate": 2.0220514345482444e-07, "loss": 0.4648, "step": 28603 }, { "epoch": 93.78360655737706, "grad_norm": 3.6217403411865234, "learning_rate": 2.0199273594060597e-07, "loss": 0.2867, "step": 28604 }, { "epoch": 93.78688524590164, "grad_norm": 13.11544132232666, "learning_rate": 2.0178043890910603e-07, "loss": 0.311, "step": 28605 }, { "epoch": 93.79016393442623, "grad_norm": 5.4386091232299805, "learning_rate": 2.0156825236271937e-07, "loss": 0.4059, "step": 28606 }, { "epoch": 93.79344262295082, "grad_norm": 4.758852958679199, "learning_rate": 2.0135617630383852e-07, "loss": 0.4367, "step": 28607 }, { "epoch": 93.79672131147541, "grad_norm": 5.586450576782227, "learning_rate": 2.011442107348538e-07, "loss": 0.3517, "step": 28608 }, { "epoch": 93.8, "grad_norm": 4.397833347320557, "learning_rate": 2.009323556581566e-07, "loss": 0.2028, "step": 28609 }, { "epoch": 93.80327868852459, "grad_norm": 4.641391277313232, "learning_rate": 2.0072061107613617e-07, "loss": 0.3512, "step": 28610 }, { "epoch": 93.80655737704917, "grad_norm": 4.437113285064697, "learning_rate": 2.0050897699117943e-07, "loss": 0.3993, "step": 28611 }, { "epoch": 93.80983606557378, "grad_norm": 5.038162708282471, "learning_rate": 2.002974534056723e-07, "loss": 0.1829, "step": 28612 }, { "epoch": 93.81311475409836, "grad_norm": 4.496637344360352, "learning_rate": 2.0008604032200174e-07, "loss": 0.2828, "step": 28613 }, { "epoch": 93.81639344262295, "grad_norm": 4.571314334869385, "learning_rate": 1.9987473774255028e-07, "loss": 0.2547, "step": 28614 }, { "epoch": 93.81967213114754, "grad_norm": 4.738517761230469, "learning_rate": 1.9966354566970048e-07, "loss": 0.368, "step": 28615 }, { "epoch": 93.82295081967213, "grad_norm": 5.133785247802734, "learning_rate": 1.9945246410583263e-07, "loss": 0.4004, "step": 28616 }, { "epoch": 93.82622950819672, "grad_norm": 5.608648777008057, "learning_rate": 1.992414930533293e-07, "loss": 0.2594, "step": 28617 }, { "epoch": 93.8295081967213, "grad_norm": 4.927258014678955, "learning_rate": 1.9903063251456856e-07, "loss": 0.2398, "step": 28618 }, { "epoch": 93.8327868852459, "grad_norm": 4.33429479598999, "learning_rate": 1.9881988249192852e-07, "loss": 0.4394, "step": 28619 }, { "epoch": 93.8360655737705, "grad_norm": 5.013369083404541, "learning_rate": 1.9860924298778394e-07, "loss": 0.1869, "step": 28620 }, { "epoch": 93.83934426229509, "grad_norm": 6.000375270843506, "learning_rate": 1.983987140045107e-07, "loss": 0.4147, "step": 28621 }, { "epoch": 93.84262295081967, "grad_norm": 5.283596992492676, "learning_rate": 1.981882955444836e-07, "loss": 0.3125, "step": 28622 }, { "epoch": 93.84590163934426, "grad_norm": 4.043070316314697, "learning_rate": 1.9797798761007514e-07, "loss": 0.5852, "step": 28623 }, { "epoch": 93.84918032786885, "grad_norm": 4.991717338562012, "learning_rate": 1.9776779020365677e-07, "loss": 0.4072, "step": 28624 }, { "epoch": 93.85245901639344, "grad_norm": 4.776530742645264, "learning_rate": 1.9755770332759662e-07, "loss": 0.3841, "step": 28625 }, { "epoch": 93.85573770491803, "grad_norm": 4.174049377441406, "learning_rate": 1.9734772698426717e-07, "loss": 0.2927, "step": 28626 }, { "epoch": 93.85901639344263, "grad_norm": 5.313066482543945, "learning_rate": 1.9713786117603327e-07, "loss": 0.2375, "step": 28627 }, { "epoch": 93.86229508196722, "grad_norm": 4.793234348297119, "learning_rate": 1.96928105905263e-07, "loss": 0.5249, "step": 28628 }, { "epoch": 93.8655737704918, "grad_norm": 5.283491611480713, "learning_rate": 1.9671846117432002e-07, "loss": 0.2842, "step": 28629 }, { "epoch": 93.8688524590164, "grad_norm": 5.978526592254639, "learning_rate": 1.9650892698557021e-07, "loss": 0.4368, "step": 28630 }, { "epoch": 93.87213114754098, "grad_norm": 4.761131286621094, "learning_rate": 1.9629950334137503e-07, "loss": 0.4243, "step": 28631 }, { "epoch": 93.87540983606557, "grad_norm": 6.233057498931885, "learning_rate": 1.9609019024409703e-07, "loss": 0.4677, "step": 28632 }, { "epoch": 93.87868852459016, "grad_norm": 3.9709219932556152, "learning_rate": 1.958809876960943e-07, "loss": 0.366, "step": 28633 }, { "epoch": 93.88196721311475, "grad_norm": 4.21681022644043, "learning_rate": 1.9567189569972722e-07, "loss": 0.4364, "step": 28634 }, { "epoch": 93.88524590163935, "grad_norm": 7.001120567321777, "learning_rate": 1.95462914257355e-07, "loss": 0.2252, "step": 28635 }, { "epoch": 93.88852459016394, "grad_norm": 5.093976020812988, "learning_rate": 1.9525404337133014e-07, "loss": 0.3988, "step": 28636 }, { "epoch": 93.89180327868853, "grad_norm": 5.027440547943115, "learning_rate": 1.9504528304401194e-07, "loss": 0.4091, "step": 28637 }, { "epoch": 93.89508196721312, "grad_norm": 27.33353042602539, "learning_rate": 1.9483663327775293e-07, "loss": 0.229, "step": 28638 }, { "epoch": 93.8983606557377, "grad_norm": 7.400225639343262, "learning_rate": 1.9462809407490456e-07, "loss": 0.2449, "step": 28639 }, { "epoch": 93.90163934426229, "grad_norm": 4.321882724761963, "learning_rate": 1.9441966543782055e-07, "loss": 0.51, "step": 28640 }, { "epoch": 93.90491803278688, "grad_norm": 9.123929977416992, "learning_rate": 1.9421134736885006e-07, "loss": 0.4774, "step": 28641 }, { "epoch": 93.90819672131147, "grad_norm": 4.626916408538818, "learning_rate": 1.9400313987034236e-07, "loss": 0.4438, "step": 28642 }, { "epoch": 93.91147540983607, "grad_norm": 5.211053848266602, "learning_rate": 1.9379504294464335e-07, "loss": 0.3923, "step": 28643 }, { "epoch": 93.91475409836066, "grad_norm": 9.907319068908691, "learning_rate": 1.9358705659410225e-07, "loss": 0.3627, "step": 28644 }, { "epoch": 93.91803278688525, "grad_norm": 4.832233428955078, "learning_rate": 1.9337918082106278e-07, "loss": 0.2494, "step": 28645 }, { "epoch": 93.92131147540984, "grad_norm": 6.17913293838501, "learning_rate": 1.931714156278708e-07, "loss": 0.4059, "step": 28646 }, { "epoch": 93.92459016393443, "grad_norm": 4.50043249130249, "learning_rate": 1.9296376101686552e-07, "loss": 0.5146, "step": 28647 }, { "epoch": 93.92786885245901, "grad_norm": 7.337027072906494, "learning_rate": 1.9275621699039182e-07, "loss": 0.3394, "step": 28648 }, { "epoch": 93.9311475409836, "grad_norm": 10.930248260498047, "learning_rate": 1.9254878355078888e-07, "loss": 0.5311, "step": 28649 }, { "epoch": 93.93442622950819, "grad_norm": 3.5417377948760986, "learning_rate": 1.9234146070039483e-07, "loss": 0.2331, "step": 28650 }, { "epoch": 93.9377049180328, "grad_norm": 6.59097146987915, "learning_rate": 1.921342484415478e-07, "loss": 0.5053, "step": 28651 }, { "epoch": 93.94098360655738, "grad_norm": 4.53013277053833, "learning_rate": 1.9192714677658598e-07, "loss": 0.4858, "step": 28652 }, { "epoch": 93.94426229508197, "grad_norm": 4.345995903015137, "learning_rate": 1.9172015570784297e-07, "loss": 0.324, "step": 28653 }, { "epoch": 93.94754098360656, "grad_norm": 5.563315391540527, "learning_rate": 1.9151327523765362e-07, "loss": 0.3647, "step": 28654 }, { "epoch": 93.95081967213115, "grad_norm": 4.067410945892334, "learning_rate": 1.913065053683494e-07, "loss": 0.3926, "step": 28655 }, { "epoch": 93.95409836065573, "grad_norm": 7.768564701080322, "learning_rate": 1.9109984610226396e-07, "loss": 0.5199, "step": 28656 }, { "epoch": 93.95737704918032, "grad_norm": 5.669182777404785, "learning_rate": 1.9089329744172658e-07, "loss": 0.3682, "step": 28657 }, { "epoch": 93.96065573770491, "grad_norm": 5.740295886993408, "learning_rate": 1.906868593890654e-07, "loss": 0.461, "step": 28658 }, { "epoch": 93.96393442622951, "grad_norm": 3.856438159942627, "learning_rate": 1.9048053194660965e-07, "loss": 0.1853, "step": 28659 }, { "epoch": 93.9672131147541, "grad_norm": 4.835602283477783, "learning_rate": 1.9027431511668414e-07, "loss": 0.4828, "step": 28660 }, { "epoch": 93.97049180327869, "grad_norm": 7.306564807891846, "learning_rate": 1.9006820890161593e-07, "loss": 0.5883, "step": 28661 }, { "epoch": 93.97377049180328, "grad_norm": 4.638000965118408, "learning_rate": 1.8986221330372867e-07, "loss": 0.2439, "step": 28662 }, { "epoch": 93.97704918032787, "grad_norm": 5.57415771484375, "learning_rate": 1.8965632832534497e-07, "loss": 0.5158, "step": 28663 }, { "epoch": 93.98032786885246, "grad_norm": 5.703019142150879, "learning_rate": 1.894505539687852e-07, "loss": 0.4865, "step": 28664 }, { "epoch": 93.98360655737704, "grad_norm": 4.876796722412109, "learning_rate": 1.8924489023637193e-07, "loss": 0.347, "step": 28665 }, { "epoch": 93.98688524590163, "grad_norm": 5.0132951736450195, "learning_rate": 1.8903933713042334e-07, "loss": 0.3628, "step": 28666 }, { "epoch": 93.99016393442623, "grad_norm": 4.829412937164307, "learning_rate": 1.8883389465325642e-07, "loss": 0.5278, "step": 28667 }, { "epoch": 93.99344262295082, "grad_norm": 5.0202131271362305, "learning_rate": 1.8862856280718821e-07, "loss": 0.4181, "step": 28668 }, { "epoch": 93.99672131147541, "grad_norm": 4.2362565994262695, "learning_rate": 1.884233415945347e-07, "loss": 0.4169, "step": 28669 }, { "epoch": 94.0, "grad_norm": 4.012740612030029, "learning_rate": 1.8821823101760949e-07, "loss": 0.3295, "step": 28670 }, { "epoch": 94.00327868852459, "grad_norm": 4.526752471923828, "learning_rate": 1.880132310787264e-07, "loss": 0.4328, "step": 28671 }, { "epoch": 94.00655737704918, "grad_norm": 3.5728940963745117, "learning_rate": 1.8780834178019459e-07, "loss": 0.2103, "step": 28672 }, { "epoch": 94.00983606557377, "grad_norm": 4.563145637512207, "learning_rate": 1.8760356312432558e-07, "loss": 0.2931, "step": 28673 }, { "epoch": 94.01311475409837, "grad_norm": 4.28514289855957, "learning_rate": 1.873988951134298e-07, "loss": 0.3236, "step": 28674 }, { "epoch": 94.01639344262296, "grad_norm": 5.522562026977539, "learning_rate": 1.8719433774981422e-07, "loss": 0.4704, "step": 28675 }, { "epoch": 94.01967213114754, "grad_norm": 4.984633922576904, "learning_rate": 1.869898910357848e-07, "loss": 0.2796, "step": 28676 }, { "epoch": 94.02295081967213, "grad_norm": 7.00129508972168, "learning_rate": 1.8678555497364636e-07, "loss": 0.447, "step": 28677 }, { "epoch": 94.02622950819672, "grad_norm": 4.186685085296631, "learning_rate": 1.8658132956570485e-07, "loss": 0.319, "step": 28678 }, { "epoch": 94.02950819672131, "grad_norm": 5.181464195251465, "learning_rate": 1.8637721481426284e-07, "loss": 0.3095, "step": 28679 }, { "epoch": 94.0327868852459, "grad_norm": 4.414435386657715, "learning_rate": 1.8617321072162075e-07, "loss": 0.3723, "step": 28680 }, { "epoch": 94.03606557377049, "grad_norm": 4.099949359893799, "learning_rate": 1.8596931729007895e-07, "loss": 0.2627, "step": 28681 }, { "epoch": 94.03934426229509, "grad_norm": 4.283799171447754, "learning_rate": 1.8576553452193779e-07, "loss": 0.3955, "step": 28682 }, { "epoch": 94.04262295081968, "grad_norm": 5.518932342529297, "learning_rate": 1.8556186241949437e-07, "loss": 0.623, "step": 28683 }, { "epoch": 94.04590163934427, "grad_norm": 5.939243793487549, "learning_rate": 1.853583009850457e-07, "loss": 0.4699, "step": 28684 }, { "epoch": 94.04918032786885, "grad_norm": 3.964571714401245, "learning_rate": 1.8515485022088664e-07, "loss": 0.3167, "step": 28685 }, { "epoch": 94.05245901639344, "grad_norm": 11.257046699523926, "learning_rate": 1.849515101293109e-07, "loss": 0.3331, "step": 28686 }, { "epoch": 94.05573770491803, "grad_norm": 9.551948547363281, "learning_rate": 1.847482807126122e-07, "loss": 0.4317, "step": 28687 }, { "epoch": 94.05901639344262, "grad_norm": 4.153321266174316, "learning_rate": 1.8454516197308314e-07, "loss": 0.2423, "step": 28688 }, { "epoch": 94.0622950819672, "grad_norm": 6.7654829025268555, "learning_rate": 1.843421539130108e-07, "loss": 0.3946, "step": 28689 }, { "epoch": 94.06557377049181, "grad_norm": 3.7936160564422607, "learning_rate": 1.8413925653468778e-07, "loss": 0.334, "step": 28690 }, { "epoch": 94.0688524590164, "grad_norm": 9.917725563049316, "learning_rate": 1.8393646984040115e-07, "loss": 0.4984, "step": 28691 }, { "epoch": 94.07213114754099, "grad_norm": 4.835265636444092, "learning_rate": 1.8373379383243572e-07, "loss": 0.2586, "step": 28692 }, { "epoch": 94.07540983606557, "grad_norm": 5.0277252197265625, "learning_rate": 1.835312285130786e-07, "loss": 0.1935, "step": 28693 }, { "epoch": 94.07868852459016, "grad_norm": 5.028171062469482, "learning_rate": 1.8332877388461345e-07, "loss": 0.2701, "step": 28694 }, { "epoch": 94.08196721311475, "grad_norm": 4.841299533843994, "learning_rate": 1.8312642994932294e-07, "loss": 0.2424, "step": 28695 }, { "epoch": 94.08524590163934, "grad_norm": 7.139977931976318, "learning_rate": 1.829241967094886e-07, "loss": 0.4771, "step": 28696 }, { "epoch": 94.08852459016393, "grad_norm": 6.8269734382629395, "learning_rate": 1.8272207416739186e-07, "loss": 0.2412, "step": 28697 }, { "epoch": 94.09180327868853, "grad_norm": 5.071835994720459, "learning_rate": 1.8252006232531207e-07, "loss": 0.3905, "step": 28698 }, { "epoch": 94.09508196721312, "grad_norm": 4.10150146484375, "learning_rate": 1.8231816118552405e-07, "loss": 0.4916, "step": 28699 }, { "epoch": 94.09836065573771, "grad_norm": 4.860902786254883, "learning_rate": 1.821163707503082e-07, "loss": 0.6159, "step": 28700 }, { "epoch": 94.1016393442623, "grad_norm": 3.794463634490967, "learning_rate": 1.8191469102193716e-07, "loss": 0.3185, "step": 28701 }, { "epoch": 94.10491803278688, "grad_norm": 6.086416244506836, "learning_rate": 1.8171312200268798e-07, "loss": 0.3503, "step": 28702 }, { "epoch": 94.10819672131147, "grad_norm": 8.569700241088867, "learning_rate": 1.8151166369482998e-07, "loss": 0.4452, "step": 28703 }, { "epoch": 94.11147540983606, "grad_norm": 4.123892307281494, "learning_rate": 1.8131031610063687e-07, "loss": 0.552, "step": 28704 }, { "epoch": 94.11475409836065, "grad_norm": 4.568142890930176, "learning_rate": 1.811090792223802e-07, "loss": 0.468, "step": 28705 }, { "epoch": 94.11803278688525, "grad_norm": 4.021976470947266, "learning_rate": 1.80907953062327e-07, "loss": 0.4038, "step": 28706 }, { "epoch": 94.12131147540984, "grad_norm": 3.575505018234253, "learning_rate": 1.8070693762274438e-07, "loss": 0.2842, "step": 28707 }, { "epoch": 94.12459016393443, "grad_norm": 4.675367832183838, "learning_rate": 1.8050603290590274e-07, "loss": 0.2828, "step": 28708 }, { "epoch": 94.12786885245902, "grad_norm": 5.392526626586914, "learning_rate": 1.8030523891406471e-07, "loss": 0.4034, "step": 28709 }, { "epoch": 94.1311475409836, "grad_norm": 4.444650173187256, "learning_rate": 1.8010455564949402e-07, "loss": 0.2754, "step": 28710 }, { "epoch": 94.1344262295082, "grad_norm": 4.756921768188477, "learning_rate": 1.7990398311445555e-07, "loss": 0.3439, "step": 28711 }, { "epoch": 94.13770491803278, "grad_norm": 3.426539182662964, "learning_rate": 1.7970352131120971e-07, "loss": 0.2448, "step": 28712 }, { "epoch": 94.14098360655737, "grad_norm": 5.2248616218566895, "learning_rate": 1.79503170242018e-07, "loss": 0.2364, "step": 28713 }, { "epoch": 94.14426229508197, "grad_norm": 5.427145957946777, "learning_rate": 1.7930292990913757e-07, "loss": 0.4809, "step": 28714 }, { "epoch": 94.14754098360656, "grad_norm": 3.8692524433135986, "learning_rate": 1.7910280031482873e-07, "loss": 0.1413, "step": 28715 }, { "epoch": 94.15081967213115, "grad_norm": 18.96099281311035, "learning_rate": 1.7890278146134533e-07, "loss": 0.4326, "step": 28716 }, { "epoch": 94.15409836065574, "grad_norm": 4.8550286293029785, "learning_rate": 1.787028733509455e-07, "loss": 0.3643, "step": 28717 }, { "epoch": 94.15737704918033, "grad_norm": 6.433923244476318, "learning_rate": 1.78503075985883e-07, "loss": 0.3926, "step": 28718 }, { "epoch": 94.16065573770491, "grad_norm": 6.142660140991211, "learning_rate": 1.783033893684094e-07, "loss": 0.2632, "step": 28719 }, { "epoch": 94.1639344262295, "grad_norm": 4.8848185539245605, "learning_rate": 1.7810381350077731e-07, "loss": 0.3793, "step": 28720 }, { "epoch": 94.1672131147541, "grad_norm": 6.005111217498779, "learning_rate": 1.7790434838523606e-07, "loss": 0.5748, "step": 28721 }, { "epoch": 94.1704918032787, "grad_norm": 4.285857200622559, "learning_rate": 1.7770499402403717e-07, "loss": 0.1733, "step": 28722 }, { "epoch": 94.17377049180328, "grad_norm": 4.644093990325928, "learning_rate": 1.7750575041942665e-07, "loss": 0.1453, "step": 28723 }, { "epoch": 94.17704918032787, "grad_norm": 6.013041019439697, "learning_rate": 1.7730661757365153e-07, "loss": 0.513, "step": 28724 }, { "epoch": 94.18032786885246, "grad_norm": 4.3684163093566895, "learning_rate": 1.7710759548895672e-07, "loss": 0.4463, "step": 28725 }, { "epoch": 94.18360655737705, "grad_norm": 4.155974388122559, "learning_rate": 1.7690868416758711e-07, "loss": 0.3917, "step": 28726 }, { "epoch": 94.18688524590164, "grad_norm": 4.642638683319092, "learning_rate": 1.7670988361178643e-07, "loss": 0.3028, "step": 28727 }, { "epoch": 94.19016393442622, "grad_norm": 4.15994930267334, "learning_rate": 1.7651119382379512e-07, "loss": 0.3092, "step": 28728 }, { "epoch": 94.19344262295083, "grad_norm": 4.787123203277588, "learning_rate": 1.763126148058536e-07, "loss": 0.1703, "step": 28729 }, { "epoch": 94.19672131147541, "grad_norm": 4.268680572509766, "learning_rate": 1.761141465602012e-07, "loss": 0.3372, "step": 28730 }, { "epoch": 94.2, "grad_norm": 5.198114395141602, "learning_rate": 1.7591578908907724e-07, "loss": 0.3506, "step": 28731 }, { "epoch": 94.20327868852459, "grad_norm": 4.390838146209717, "learning_rate": 1.757175423947166e-07, "loss": 0.1745, "step": 28732 }, { "epoch": 94.20655737704918, "grad_norm": 4.397006988525391, "learning_rate": 1.7551940647935417e-07, "loss": 0.2697, "step": 28733 }, { "epoch": 94.20983606557377, "grad_norm": 3.7754125595092773, "learning_rate": 1.7532138134522704e-07, "loss": 0.2262, "step": 28734 }, { "epoch": 94.21311475409836, "grad_norm": 4.682656764984131, "learning_rate": 1.7512346699456562e-07, "loss": 0.3914, "step": 28735 }, { "epoch": 94.21639344262294, "grad_norm": 3.7185580730438232, "learning_rate": 1.749256634296026e-07, "loss": 0.4129, "step": 28736 }, { "epoch": 94.21967213114755, "grad_norm": 5.57496976852417, "learning_rate": 1.7472797065256842e-07, "loss": 0.3489, "step": 28737 }, { "epoch": 94.22295081967214, "grad_norm": 4.606375694274902, "learning_rate": 1.7453038866569129e-07, "loss": 0.2274, "step": 28738 }, { "epoch": 94.22622950819672, "grad_norm": 4.998671054840088, "learning_rate": 1.743329174712005e-07, "loss": 0.35, "step": 28739 }, { "epoch": 94.22950819672131, "grad_norm": 4.062009811401367, "learning_rate": 1.7413555707132324e-07, "loss": 0.5508, "step": 28740 }, { "epoch": 94.2327868852459, "grad_norm": 3.9823195934295654, "learning_rate": 1.7393830746828212e-07, "loss": 0.1968, "step": 28741 }, { "epoch": 94.23606557377049, "grad_norm": 5.537408828735352, "learning_rate": 1.7374116866430424e-07, "loss": 0.4148, "step": 28742 }, { "epoch": 94.23934426229508, "grad_norm": 3.60711932182312, "learning_rate": 1.7354414066161118e-07, "loss": 0.6451, "step": 28743 }, { "epoch": 94.24262295081967, "grad_norm": 3.98667311668396, "learning_rate": 1.733472234624245e-07, "loss": 0.4218, "step": 28744 }, { "epoch": 94.24590163934427, "grad_norm": 5.446926593780518, "learning_rate": 1.7315041706896573e-07, "loss": 0.4157, "step": 28745 }, { "epoch": 94.24918032786886, "grad_norm": 6.244360446929932, "learning_rate": 1.7295372148345313e-07, "loss": 0.4548, "step": 28746 }, { "epoch": 94.25245901639344, "grad_norm": 4.852399826049805, "learning_rate": 1.727571367081049e-07, "loss": 0.4977, "step": 28747 }, { "epoch": 94.25573770491803, "grad_norm": 5.632646560668945, "learning_rate": 1.7256066274513705e-07, "loss": 0.2332, "step": 28748 }, { "epoch": 94.25901639344262, "grad_norm": 5.745721340179443, "learning_rate": 1.723642995967656e-07, "loss": 0.492, "step": 28749 }, { "epoch": 94.26229508196721, "grad_norm": 6.3905792236328125, "learning_rate": 1.7216804726520654e-07, "loss": 0.3493, "step": 28750 }, { "epoch": 94.2655737704918, "grad_norm": 4.536803245544434, "learning_rate": 1.719719057526692e-07, "loss": 0.3981, "step": 28751 }, { "epoch": 94.26885245901639, "grad_norm": 5.628533840179443, "learning_rate": 1.717758750613685e-07, "loss": 0.5832, "step": 28752 }, { "epoch": 94.27213114754099, "grad_norm": 4.6811017990112305, "learning_rate": 1.7157995519351267e-07, "loss": 0.3551, "step": 28753 }, { "epoch": 94.27540983606558, "grad_norm": 5.783850193023682, "learning_rate": 1.7138414615131327e-07, "loss": 0.401, "step": 28754 }, { "epoch": 94.27868852459017, "grad_norm": 4.248340129852295, "learning_rate": 1.711884479369752e-07, "loss": 0.3901, "step": 28755 }, { "epoch": 94.28196721311475, "grad_norm": 5.378754138946533, "learning_rate": 1.7099286055270781e-07, "loss": 0.2983, "step": 28756 }, { "epoch": 94.28524590163934, "grad_norm": 4.433952808380127, "learning_rate": 1.7079738400071488e-07, "loss": 0.2288, "step": 28757 }, { "epoch": 94.28852459016393, "grad_norm": 5.354222774505615, "learning_rate": 1.7060201828320244e-07, "loss": 0.3743, "step": 28758 }, { "epoch": 94.29180327868852, "grad_norm": 8.493205070495605, "learning_rate": 1.7040676340236983e-07, "loss": 0.2355, "step": 28759 }, { "epoch": 94.29508196721312, "grad_norm": 8.293891906738281, "learning_rate": 1.7021161936042306e-07, "loss": 0.3899, "step": 28760 }, { "epoch": 94.29836065573771, "grad_norm": 3.9009172916412354, "learning_rate": 1.700165861595615e-07, "loss": 0.3108, "step": 28761 }, { "epoch": 94.3016393442623, "grad_norm": 4.577801704406738, "learning_rate": 1.6982166380198227e-07, "loss": 0.3082, "step": 28762 }, { "epoch": 94.30491803278689, "grad_norm": 5.544471263885498, "learning_rate": 1.6962685228988472e-07, "loss": 0.4622, "step": 28763 }, { "epoch": 94.30819672131148, "grad_norm": 3.5808651447296143, "learning_rate": 1.694321516254649e-07, "loss": 0.382, "step": 28764 }, { "epoch": 94.31147540983606, "grad_norm": 6.162496089935303, "learning_rate": 1.6923756181091988e-07, "loss": 0.4311, "step": 28765 }, { "epoch": 94.31475409836065, "grad_norm": 3.6659181118011475, "learning_rate": 1.690430828484424e-07, "loss": 0.1439, "step": 28766 }, { "epoch": 94.31803278688524, "grad_norm": 4.479017734527588, "learning_rate": 1.6884871474022625e-07, "loss": 0.4286, "step": 28767 }, { "epoch": 94.32131147540984, "grad_norm": 6.502037048339844, "learning_rate": 1.6865445748846075e-07, "loss": 0.5208, "step": 28768 }, { "epoch": 94.32459016393443, "grad_norm": 4.262693405151367, "learning_rate": 1.6846031109533978e-07, "loss": 0.4958, "step": 28769 }, { "epoch": 94.32786885245902, "grad_norm": 9.633417129516602, "learning_rate": 1.6826627556305152e-07, "loss": 0.4219, "step": 28770 }, { "epoch": 94.33114754098361, "grad_norm": 4.529080867767334, "learning_rate": 1.6807235089378315e-07, "loss": 0.3947, "step": 28771 }, { "epoch": 94.3344262295082, "grad_norm": 9.976835250854492, "learning_rate": 1.6787853708972067e-07, "loss": 0.5665, "step": 28772 }, { "epoch": 94.33770491803278, "grad_norm": 4.879453659057617, "learning_rate": 1.6768483415305125e-07, "loss": 0.3969, "step": 28773 }, { "epoch": 94.34098360655737, "grad_norm": 6.313974857330322, "learning_rate": 1.6749124208595868e-07, "loss": 0.3221, "step": 28774 }, { "epoch": 94.34426229508196, "grad_norm": 4.810260772705078, "learning_rate": 1.6729776089062565e-07, "loss": 0.3442, "step": 28775 }, { "epoch": 94.34754098360656, "grad_norm": 8.742473602294922, "learning_rate": 1.6710439056923266e-07, "loss": 0.4091, "step": 28776 }, { "epoch": 94.35081967213115, "grad_norm": 4.018528461456299, "learning_rate": 1.669111311239624e-07, "loss": 0.2855, "step": 28777 }, { "epoch": 94.35409836065574, "grad_norm": 4.31567907333374, "learning_rate": 1.6671798255699202e-07, "loss": 0.1054, "step": 28778 }, { "epoch": 94.35737704918033, "grad_norm": 4.510361194610596, "learning_rate": 1.6652494487050198e-07, "loss": 0.3293, "step": 28779 }, { "epoch": 94.36065573770492, "grad_norm": 5.366199970245361, "learning_rate": 1.663320180666661e-07, "loss": 0.6312, "step": 28780 }, { "epoch": 94.3639344262295, "grad_norm": 4.182485103607178, "learning_rate": 1.6613920214766155e-07, "loss": 0.2279, "step": 28781 }, { "epoch": 94.3672131147541, "grad_norm": 3.2655093669891357, "learning_rate": 1.6594649711566214e-07, "loss": 0.1121, "step": 28782 }, { "epoch": 94.37049180327868, "grad_norm": 8.054597854614258, "learning_rate": 1.6575390297284056e-07, "loss": 0.4426, "step": 28783 }, { "epoch": 94.37377049180328, "grad_norm": 5.279581069946289, "learning_rate": 1.6556141972136952e-07, "loss": 0.4543, "step": 28784 }, { "epoch": 94.37704918032787, "grad_norm": 4.534053325653076, "learning_rate": 1.653690473634173e-07, "loss": 0.2699, "step": 28785 }, { "epoch": 94.38032786885246, "grad_norm": 5.091855525970459, "learning_rate": 1.651767859011566e-07, "loss": 0.4155, "step": 28786 }, { "epoch": 94.38360655737705, "grad_norm": 4.1223249435424805, "learning_rate": 1.6498463533675237e-07, "loss": 0.4308, "step": 28787 }, { "epoch": 94.38688524590164, "grad_norm": 4.678180694580078, "learning_rate": 1.647925956723717e-07, "loss": 0.4804, "step": 28788 }, { "epoch": 94.39016393442623, "grad_norm": 7.149263381958008, "learning_rate": 1.6460066691018183e-07, "loss": 0.3976, "step": 28789 }, { "epoch": 94.39344262295081, "grad_norm": 4.964545249938965, "learning_rate": 1.6440884905234323e-07, "loss": 0.2054, "step": 28790 }, { "epoch": 94.3967213114754, "grad_norm": 4.4780073165893555, "learning_rate": 1.6421714210102303e-07, "loss": 0.2826, "step": 28791 }, { "epoch": 94.4, "grad_norm": 5.245569229125977, "learning_rate": 1.6402554605838173e-07, "loss": 0.3449, "step": 28792 }, { "epoch": 94.4032786885246, "grad_norm": 4.908580780029297, "learning_rate": 1.6383406092657873e-07, "loss": 0.274, "step": 28793 }, { "epoch": 94.40655737704918, "grad_norm": 4.696010112762451, "learning_rate": 1.636426867077734e-07, "loss": 0.2775, "step": 28794 }, { "epoch": 94.40983606557377, "grad_norm": 3.922238826751709, "learning_rate": 1.6345142340412402e-07, "loss": 0.5136, "step": 28795 }, { "epoch": 94.41311475409836, "grad_norm": 4.607147693634033, "learning_rate": 1.6326027101778774e-07, "loss": 0.3844, "step": 28796 }, { "epoch": 94.41639344262295, "grad_norm": 4.713084697723389, "learning_rate": 1.630692295509184e-07, "loss": 0.3978, "step": 28797 }, { "epoch": 94.41967213114754, "grad_norm": 5.576659202575684, "learning_rate": 1.628782990056721e-07, "loss": 0.3621, "step": 28798 }, { "epoch": 94.42295081967212, "grad_norm": 4.882988452911377, "learning_rate": 1.626874793842015e-07, "loss": 0.4089, "step": 28799 }, { "epoch": 94.42622950819673, "grad_norm": 2.9641730785369873, "learning_rate": 1.624967706886571e-07, "loss": 0.2658, "step": 28800 }, { "epoch": 94.42950819672132, "grad_norm": 4.139882564544678, "learning_rate": 1.6230617292119056e-07, "loss": 0.5661, "step": 28801 }, { "epoch": 94.4327868852459, "grad_norm": 5.703760623931885, "learning_rate": 1.6211568608395012e-07, "loss": 0.3845, "step": 28802 }, { "epoch": 94.43606557377049, "grad_norm": 4.853427410125732, "learning_rate": 1.619253101790852e-07, "loss": 0.4935, "step": 28803 }, { "epoch": 94.43934426229508, "grad_norm": 6.288243770599365, "learning_rate": 1.617350452087396e-07, "loss": 0.4444, "step": 28804 }, { "epoch": 94.44262295081967, "grad_norm": 6.924084663391113, "learning_rate": 1.6154489117506166e-07, "loss": 0.4503, "step": 28805 }, { "epoch": 94.44590163934426, "grad_norm": 4.218266010284424, "learning_rate": 1.6135484808019518e-07, "loss": 0.5659, "step": 28806 }, { "epoch": 94.44918032786886, "grad_norm": 7.338266849517822, "learning_rate": 1.611649159262807e-07, "loss": 0.2562, "step": 28807 }, { "epoch": 94.45245901639345, "grad_norm": 3.5606331825256348, "learning_rate": 1.6097509471546313e-07, "loss": 0.3974, "step": 28808 }, { "epoch": 94.45573770491804, "grad_norm": 4.7322468757629395, "learning_rate": 1.6078538444988078e-07, "loss": 0.3606, "step": 28809 }, { "epoch": 94.45901639344262, "grad_norm": 4.191853046417236, "learning_rate": 1.605957851316742e-07, "loss": 0.3086, "step": 28810 }, { "epoch": 94.46229508196721, "grad_norm": 4.0034356117248535, "learning_rate": 1.6040629676297936e-07, "loss": 0.3791, "step": 28811 }, { "epoch": 94.4655737704918, "grad_norm": 5.727182865142822, "learning_rate": 1.6021691934593464e-07, "loss": 0.4963, "step": 28812 }, { "epoch": 94.46885245901639, "grad_norm": 8.979400634765625, "learning_rate": 1.6002765288267497e-07, "loss": 0.4169, "step": 28813 }, { "epoch": 94.47213114754098, "grad_norm": 4.9592366218566895, "learning_rate": 1.598384973753353e-07, "loss": 0.4318, "step": 28814 }, { "epoch": 94.47540983606558, "grad_norm": 6.2741618156433105, "learning_rate": 1.5964945282604726e-07, "loss": 0.381, "step": 28815 }, { "epoch": 94.47868852459017, "grad_norm": 3.961547613143921, "learning_rate": 1.594605192369425e-07, "loss": 0.2508, "step": 28816 }, { "epoch": 94.48196721311476, "grad_norm": 4.630209445953369, "learning_rate": 1.5927169661015262e-07, "loss": 0.339, "step": 28817 }, { "epoch": 94.48524590163935, "grad_norm": 4.491548538208008, "learning_rate": 1.5908298494780593e-07, "loss": 0.1887, "step": 28818 }, { "epoch": 94.48852459016393, "grad_norm": 5.301595687866211, "learning_rate": 1.5889438425203075e-07, "loss": 0.3546, "step": 28819 }, { "epoch": 94.49180327868852, "grad_norm": 4.3277201652526855, "learning_rate": 1.58705894524952e-07, "loss": 0.4453, "step": 28820 }, { "epoch": 94.49508196721311, "grad_norm": 5.23927116394043, "learning_rate": 1.58517515768698e-07, "loss": 0.4026, "step": 28821 }, { "epoch": 94.4983606557377, "grad_norm": 4.491041660308838, "learning_rate": 1.583292479853926e-07, "loss": 0.446, "step": 28822 }, { "epoch": 94.5016393442623, "grad_norm": 4.163539409637451, "learning_rate": 1.5814109117715636e-07, "loss": 0.5909, "step": 28823 }, { "epoch": 94.50491803278689, "grad_norm": 3.7093119621276855, "learning_rate": 1.57953045346112e-07, "loss": 0.5327, "step": 28824 }, { "epoch": 94.50819672131148, "grad_norm": 4.464219570159912, "learning_rate": 1.5776511049438114e-07, "loss": 0.3903, "step": 28825 }, { "epoch": 94.51147540983607, "grad_norm": 5.436840534210205, "learning_rate": 1.575772866240821e-07, "loss": 0.4661, "step": 28826 }, { "epoch": 94.51475409836065, "grad_norm": 6.814799785614014, "learning_rate": 1.573895737373321e-07, "loss": 0.2652, "step": 28827 }, { "epoch": 94.51803278688524, "grad_norm": 4.123260974884033, "learning_rate": 1.572019718362494e-07, "loss": 0.2578, "step": 28828 }, { "epoch": 94.52131147540983, "grad_norm": 4.656644821166992, "learning_rate": 1.570144809229468e-07, "loss": 0.3704, "step": 28829 }, { "epoch": 94.52459016393442, "grad_norm": 6.004491329193115, "learning_rate": 1.5682710099954035e-07, "loss": 0.4517, "step": 28830 }, { "epoch": 94.52786885245902, "grad_norm": 5.030575275421143, "learning_rate": 1.5663983206814394e-07, "loss": 0.4048, "step": 28831 }, { "epoch": 94.53114754098361, "grad_norm": 6.203094005584717, "learning_rate": 1.5645267413086695e-07, "loss": 0.3389, "step": 28832 }, { "epoch": 94.5344262295082, "grad_norm": 5.723662853240967, "learning_rate": 1.562656271898211e-07, "loss": 0.3805, "step": 28833 }, { "epoch": 94.53770491803279, "grad_norm": 4.115622520446777, "learning_rate": 1.560786912471146e-07, "loss": 0.3307, "step": 28834 }, { "epoch": 94.54098360655738, "grad_norm": 4.389348030090332, "learning_rate": 1.5589186630485697e-07, "loss": 0.4585, "step": 28835 }, { "epoch": 94.54426229508196, "grad_norm": 3.857574462890625, "learning_rate": 1.5570515236515315e-07, "loss": 0.2924, "step": 28836 }, { "epoch": 94.54754098360655, "grad_norm": 6.197287082672119, "learning_rate": 1.5551854943010923e-07, "loss": 0.283, "step": 28837 }, { "epoch": 94.55081967213114, "grad_norm": 5.409520626068115, "learning_rate": 1.5533205750183023e-07, "loss": 0.4196, "step": 28838 }, { "epoch": 94.55409836065574, "grad_norm": 11.960810661315918, "learning_rate": 1.5514567658241776e-07, "loss": 0.3502, "step": 28839 }, { "epoch": 94.55737704918033, "grad_norm": 4.378965377807617, "learning_rate": 1.5495940667397347e-07, "loss": 0.4038, "step": 28840 }, { "epoch": 94.56065573770492, "grad_norm": 5.210602760314941, "learning_rate": 1.5477324777859904e-07, "loss": 0.4334, "step": 28841 }, { "epoch": 94.56393442622951, "grad_norm": 4.463048934936523, "learning_rate": 1.5458719989839167e-07, "loss": 0.3508, "step": 28842 }, { "epoch": 94.5672131147541, "grad_norm": 7.000883102416992, "learning_rate": 1.5440126303545077e-07, "loss": 0.3245, "step": 28843 }, { "epoch": 94.57049180327868, "grad_norm": 4.484134674072266, "learning_rate": 1.5421543719187248e-07, "loss": 0.3774, "step": 28844 }, { "epoch": 94.57377049180327, "grad_norm": 4.937150478363037, "learning_rate": 1.5402972236975178e-07, "loss": 0.405, "step": 28845 }, { "epoch": 94.57704918032788, "grad_norm": 5.773629665374756, "learning_rate": 1.5384411857118252e-07, "loss": 0.4657, "step": 28846 }, { "epoch": 94.58032786885246, "grad_norm": 4.290585994720459, "learning_rate": 1.5365862579825973e-07, "loss": 0.1988, "step": 28847 }, { "epoch": 94.58360655737705, "grad_norm": 5.686036586761475, "learning_rate": 1.5347324405307283e-07, "loss": 0.3049, "step": 28848 }, { "epoch": 94.58688524590164, "grad_norm": 9.732444763183594, "learning_rate": 1.5328797333771352e-07, "loss": 0.3747, "step": 28849 }, { "epoch": 94.59016393442623, "grad_norm": 8.88387393951416, "learning_rate": 1.5310281365427003e-07, "loss": 0.5795, "step": 28850 }, { "epoch": 94.59344262295082, "grad_norm": 5.363029479980469, "learning_rate": 1.529177650048297e-07, "loss": 0.5095, "step": 28851 }, { "epoch": 94.5967213114754, "grad_norm": 4.191874027252197, "learning_rate": 1.5273282739148188e-07, "loss": 0.4685, "step": 28852 }, { "epoch": 94.6, "grad_norm": 6.226228713989258, "learning_rate": 1.5254800081630828e-07, "loss": 0.3533, "step": 28853 }, { "epoch": 94.6032786885246, "grad_norm": 3.868401288986206, "learning_rate": 1.5236328528139499e-07, "loss": 0.3574, "step": 28854 }, { "epoch": 94.60655737704919, "grad_norm": 5.067828178405762, "learning_rate": 1.521786807888248e-07, "loss": 0.4798, "step": 28855 }, { "epoch": 94.60983606557377, "grad_norm": 3.534492015838623, "learning_rate": 1.519941873406794e-07, "loss": 0.2812, "step": 28856 }, { "epoch": 94.61311475409836, "grad_norm": 4.203934669494629, "learning_rate": 1.518098049390393e-07, "loss": 0.1205, "step": 28857 }, { "epoch": 94.61639344262295, "grad_norm": 4.395448207855225, "learning_rate": 1.5162553358598286e-07, "loss": 0.2669, "step": 28858 }, { "epoch": 94.61967213114754, "grad_norm": 4.686858654022217, "learning_rate": 1.5144137328358733e-07, "loss": 0.3368, "step": 28859 }, { "epoch": 94.62295081967213, "grad_norm": 5.335777759552002, "learning_rate": 1.51257324033931e-07, "loss": 0.4076, "step": 28860 }, { "epoch": 94.62622950819672, "grad_norm": 6.865876197814941, "learning_rate": 1.5107338583908893e-07, "loss": 0.3728, "step": 28861 }, { "epoch": 94.62950819672132, "grad_norm": 5.4835686683654785, "learning_rate": 1.5088955870113386e-07, "loss": 0.278, "step": 28862 }, { "epoch": 94.6327868852459, "grad_norm": 4.716070175170898, "learning_rate": 1.5070584262213973e-07, "loss": 0.3334, "step": 28863 }, { "epoch": 94.6360655737705, "grad_norm": 4.61335563659668, "learning_rate": 1.5052223760417816e-07, "loss": 0.2519, "step": 28864 }, { "epoch": 94.63934426229508, "grad_norm": 6.487539291381836, "learning_rate": 1.5033874364931976e-07, "loss": 0.3938, "step": 28865 }, { "epoch": 94.64262295081967, "grad_norm": 5.0757293701171875, "learning_rate": 1.5015536075963288e-07, "loss": 0.4167, "step": 28866 }, { "epoch": 94.64590163934426, "grad_norm": 4.7335004806518555, "learning_rate": 1.4997208893718586e-07, "loss": 0.3813, "step": 28867 }, { "epoch": 94.64918032786885, "grad_norm": 5.017218589782715, "learning_rate": 1.4978892818404366e-07, "loss": 0.3405, "step": 28868 }, { "epoch": 94.65245901639344, "grad_norm": 4.968160152435303, "learning_rate": 1.496058785022736e-07, "loss": 0.3076, "step": 28869 }, { "epoch": 94.65573770491804, "grad_norm": 7.810227870941162, "learning_rate": 1.4942293989393953e-07, "loss": 0.2206, "step": 28870 }, { "epoch": 94.65901639344263, "grad_norm": 4.803661823272705, "learning_rate": 1.4924011236110424e-07, "loss": 0.372, "step": 28871 }, { "epoch": 94.66229508196722, "grad_norm": 4.172311305999756, "learning_rate": 1.4905739590582724e-07, "loss": 0.4506, "step": 28872 }, { "epoch": 94.6655737704918, "grad_norm": 8.091634750366211, "learning_rate": 1.4887479053017128e-07, "loss": 0.2802, "step": 28873 }, { "epoch": 94.66885245901639, "grad_norm": 17.64301109313965, "learning_rate": 1.4869229623619586e-07, "loss": 0.294, "step": 28874 }, { "epoch": 94.67213114754098, "grad_norm": 8.562621116638184, "learning_rate": 1.4850991302595597e-07, "loss": 0.2921, "step": 28875 }, { "epoch": 94.67540983606557, "grad_norm": 7.492832660675049, "learning_rate": 1.4832764090150997e-07, "loss": 0.2984, "step": 28876 }, { "epoch": 94.67868852459016, "grad_norm": 7.295932292938232, "learning_rate": 1.4814547986491402e-07, "loss": 0.371, "step": 28877 }, { "epoch": 94.68196721311476, "grad_norm": 5.401057720184326, "learning_rate": 1.4796342991822089e-07, "loss": 0.2673, "step": 28878 }, { "epoch": 94.68524590163935, "grad_norm": 4.232264041900635, "learning_rate": 1.4778149106348337e-07, "loss": 0.3906, "step": 28879 }, { "epoch": 94.68852459016394, "grad_norm": 5.276786804199219, "learning_rate": 1.475996633027532e-07, "loss": 0.4739, "step": 28880 }, { "epoch": 94.69180327868852, "grad_norm": 3.5429787635803223, "learning_rate": 1.4741794663807984e-07, "loss": 0.3291, "step": 28881 }, { "epoch": 94.69508196721311, "grad_norm": 4.736962795257568, "learning_rate": 1.4723634107151497e-07, "loss": 0.3571, "step": 28882 }, { "epoch": 94.6983606557377, "grad_norm": 5.296202182769775, "learning_rate": 1.470548466051036e-07, "loss": 0.5044, "step": 28883 }, { "epoch": 94.70163934426229, "grad_norm": 3.5506300926208496, "learning_rate": 1.4687346324089414e-07, "loss": 0.3932, "step": 28884 }, { "epoch": 94.70491803278688, "grad_norm": 4.477115154266357, "learning_rate": 1.4669219098093046e-07, "loss": 0.3182, "step": 28885 }, { "epoch": 94.70819672131148, "grad_norm": 4.856236457824707, "learning_rate": 1.465110298272565e-07, "loss": 0.3746, "step": 28886 }, { "epoch": 94.71147540983607, "grad_norm": 3.9928152561187744, "learning_rate": 1.463299797819173e-07, "loss": 0.3845, "step": 28887 }, { "epoch": 94.71475409836066, "grad_norm": 4.700979709625244, "learning_rate": 1.4614904084695235e-07, "loss": 0.4932, "step": 28888 }, { "epoch": 94.71803278688525, "grad_norm": 5.468621253967285, "learning_rate": 1.4596821302440112e-07, "loss": 0.2458, "step": 28889 }, { "epoch": 94.72131147540983, "grad_norm": 5.42241096496582, "learning_rate": 1.457874963163053e-07, "loss": 0.2299, "step": 28890 }, { "epoch": 94.72459016393442, "grad_norm": 3.724236249923706, "learning_rate": 1.4560689072470102e-07, "loss": 0.1453, "step": 28891 }, { "epoch": 94.72786885245901, "grad_norm": 8.635120391845703, "learning_rate": 1.4542639625162448e-07, "loss": 0.3524, "step": 28892 }, { "epoch": 94.73114754098361, "grad_norm": 4.330685615539551, "learning_rate": 1.452460128991129e-07, "loss": 0.3265, "step": 28893 }, { "epoch": 94.7344262295082, "grad_norm": 5.7560296058654785, "learning_rate": 1.4506574066919686e-07, "loss": 0.4155, "step": 28894 }, { "epoch": 94.73770491803279, "grad_norm": 4.306949138641357, "learning_rate": 1.4488557956391258e-07, "loss": 0.543, "step": 28895 }, { "epoch": 94.74098360655738, "grad_norm": 21.96320152282715, "learning_rate": 1.447055295852895e-07, "loss": 0.4237, "step": 28896 }, { "epoch": 94.74426229508197, "grad_norm": 4.8890700340271, "learning_rate": 1.4452559073535933e-07, "loss": 0.3035, "step": 28897 }, { "epoch": 94.74754098360656, "grad_norm": 4.172792434692383, "learning_rate": 1.4434576301614932e-07, "loss": 0.3746, "step": 28898 }, { "epoch": 94.75081967213114, "grad_norm": 4.9727463722229, "learning_rate": 1.4416604642968902e-07, "loss": 0.3083, "step": 28899 }, { "epoch": 94.75409836065573, "grad_norm": 4.506087303161621, "learning_rate": 1.4398644097800342e-07, "loss": 0.2872, "step": 28900 }, { "epoch": 94.75737704918033, "grad_norm": 5.5349650382995605, "learning_rate": 1.4380694666311867e-07, "loss": 0.4351, "step": 28901 }, { "epoch": 94.76065573770492, "grad_norm": 4.713353633880615, "learning_rate": 1.4362756348705765e-07, "loss": 0.4423, "step": 28902 }, { "epoch": 94.76393442622951, "grad_norm": 5.737358093261719, "learning_rate": 1.4344829145184425e-07, "loss": 0.3093, "step": 28903 }, { "epoch": 94.7672131147541, "grad_norm": 4.315842628479004, "learning_rate": 1.432691305595002e-07, "loss": 0.3032, "step": 28904 }, { "epoch": 94.77049180327869, "grad_norm": 6.0476484298706055, "learning_rate": 1.43090080812045e-07, "loss": 0.2975, "step": 28905 }, { "epoch": 94.77377049180328, "grad_norm": 4.045605659484863, "learning_rate": 1.4291114221149705e-07, "loss": 0.2931, "step": 28906 }, { "epoch": 94.77704918032786, "grad_norm": 4.971039772033691, "learning_rate": 1.427323147598758e-07, "loss": 0.4783, "step": 28907 }, { "epoch": 94.78032786885245, "grad_norm": 14.972286224365234, "learning_rate": 1.4255359845919635e-07, "loss": 0.3905, "step": 28908 }, { "epoch": 94.78360655737706, "grad_norm": 6.228298664093018, "learning_rate": 1.4237499331147376e-07, "loss": 0.4012, "step": 28909 }, { "epoch": 94.78688524590164, "grad_norm": 7.360781192779541, "learning_rate": 1.4219649931872303e-07, "loss": 0.3185, "step": 28910 }, { "epoch": 94.79016393442623, "grad_norm": 4.869348526000977, "learning_rate": 1.4201811648295594e-07, "loss": 0.4343, "step": 28911 }, { "epoch": 94.79344262295082, "grad_norm": 7.0850725173950195, "learning_rate": 1.4183984480618417e-07, "loss": 0.3626, "step": 28912 }, { "epoch": 94.79672131147541, "grad_norm": 4.618414878845215, "learning_rate": 1.4166168429041838e-07, "loss": 0.4984, "step": 28913 }, { "epoch": 94.8, "grad_norm": 4.750560283660889, "learning_rate": 1.4148363493766803e-07, "loss": 0.5302, "step": 28914 }, { "epoch": 94.80327868852459, "grad_norm": 5.443836688995361, "learning_rate": 1.413056967499382e-07, "loss": 0.3857, "step": 28915 }, { "epoch": 94.80655737704917, "grad_norm": 3.770763397216797, "learning_rate": 1.4112786972923842e-07, "loss": 0.3566, "step": 28916 }, { "epoch": 94.80983606557378, "grad_norm": 4.361375331878662, "learning_rate": 1.4095015387757261e-07, "loss": 0.3889, "step": 28917 }, { "epoch": 94.81311475409836, "grad_norm": 7.0847039222717285, "learning_rate": 1.407725491969447e-07, "loss": 0.4608, "step": 28918 }, { "epoch": 94.81639344262295, "grad_norm": 6.212926387786865, "learning_rate": 1.405950556893565e-07, "loss": 0.2505, "step": 28919 }, { "epoch": 94.81967213114754, "grad_norm": 4.832555294036865, "learning_rate": 1.404176733568108e-07, "loss": 0.3678, "step": 28920 }, { "epoch": 94.82295081967213, "grad_norm": 4.630005359649658, "learning_rate": 1.402404022013071e-07, "loss": 0.3663, "step": 28921 }, { "epoch": 94.82622950819672, "grad_norm": 5.836354732513428, "learning_rate": 1.400632422248438e-07, "loss": 0.3469, "step": 28922 }, { "epoch": 94.8295081967213, "grad_norm": 6.528267860412598, "learning_rate": 1.3988619342942045e-07, "loss": 0.3748, "step": 28923 }, { "epoch": 94.8327868852459, "grad_norm": 6.493064880371094, "learning_rate": 1.3970925581703098e-07, "loss": 0.2221, "step": 28924 }, { "epoch": 94.8360655737705, "grad_norm": 4.6994757652282715, "learning_rate": 1.3953242938967272e-07, "loss": 0.2957, "step": 28925 }, { "epoch": 94.83934426229509, "grad_norm": 4.341442108154297, "learning_rate": 1.3935571414933846e-07, "loss": 0.3286, "step": 28926 }, { "epoch": 94.84262295081967, "grad_norm": 6.910440921783447, "learning_rate": 1.391791100980211e-07, "loss": 0.4507, "step": 28927 }, { "epoch": 94.84590163934426, "grad_norm": 4.631430149078369, "learning_rate": 1.3900261723771125e-07, "loss": 0.4569, "step": 28928 }, { "epoch": 94.84918032786885, "grad_norm": 5.202208995819092, "learning_rate": 1.3882623557040065e-07, "loss": 0.2393, "step": 28929 }, { "epoch": 94.85245901639344, "grad_norm": 28.552082061767578, "learning_rate": 1.386499650980766e-07, "loss": 0.3286, "step": 28930 }, { "epoch": 94.85573770491803, "grad_norm": 4.482058048248291, "learning_rate": 1.3847380582272861e-07, "loss": 0.2506, "step": 28931 }, { "epoch": 94.85901639344263, "grad_norm": 4.557445049285889, "learning_rate": 1.382977577463407e-07, "loss": 0.2745, "step": 28932 }, { "epoch": 94.86229508196722, "grad_norm": 6.748831748962402, "learning_rate": 1.3812182087089898e-07, "loss": 0.4429, "step": 28933 }, { "epoch": 94.8655737704918, "grad_norm": 5.162299633026123, "learning_rate": 1.3794599519838858e-07, "loss": 0.4344, "step": 28934 }, { "epoch": 94.8688524590164, "grad_norm": 5.246621608734131, "learning_rate": 1.3777028073079012e-07, "loss": 0.3925, "step": 28935 }, { "epoch": 94.87213114754098, "grad_norm": 8.775559425354004, "learning_rate": 1.3759467747008648e-07, "loss": 0.4252, "step": 28936 }, { "epoch": 94.87540983606557, "grad_norm": 14.064275741577148, "learning_rate": 1.3741918541825606e-07, "loss": 0.3624, "step": 28937 }, { "epoch": 94.87868852459016, "grad_norm": 5.562567710876465, "learning_rate": 1.372438045772806e-07, "loss": 0.3973, "step": 28938 }, { "epoch": 94.88196721311475, "grad_norm": 5.403189182281494, "learning_rate": 1.370685349491352e-07, "loss": 0.3718, "step": 28939 }, { "epoch": 94.88524590163935, "grad_norm": 4.346111297607422, "learning_rate": 1.3689337653579714e-07, "loss": 0.3077, "step": 28940 }, { "epoch": 94.88852459016394, "grad_norm": 4.581988334655762, "learning_rate": 1.3671832933924045e-07, "loss": 0.3356, "step": 28941 }, { "epoch": 94.89180327868853, "grad_norm": 5.069429397583008, "learning_rate": 1.3654339336144019e-07, "loss": 0.2607, "step": 28942 }, { "epoch": 94.89508196721312, "grad_norm": 4.769134998321533, "learning_rate": 1.363685686043692e-07, "loss": 0.264, "step": 28943 }, { "epoch": 94.8983606557377, "grad_norm": 3.573698043823242, "learning_rate": 1.3619385506999815e-07, "loss": 0.3108, "step": 28944 }, { "epoch": 94.90163934426229, "grad_norm": 5.436110973358154, "learning_rate": 1.360192527602966e-07, "loss": 0.2623, "step": 28945 }, { "epoch": 94.90491803278688, "grad_norm": 8.81207275390625, "learning_rate": 1.3584476167723404e-07, "loss": 0.4117, "step": 28946 }, { "epoch": 94.90819672131147, "grad_norm": 4.284062385559082, "learning_rate": 1.3567038182277782e-07, "loss": 0.4105, "step": 28947 }, { "epoch": 94.91147540983607, "grad_norm": 6.471984386444092, "learning_rate": 1.3549611319889522e-07, "loss": 0.2788, "step": 28948 }, { "epoch": 94.91475409836066, "grad_norm": 5.018844127655029, "learning_rate": 1.3532195580754914e-07, "loss": 0.2743, "step": 28949 }, { "epoch": 94.91803278688525, "grad_norm": 11.959002494812012, "learning_rate": 1.351479096507047e-07, "loss": 0.2392, "step": 28950 }, { "epoch": 94.92131147540984, "grad_norm": 4.329834938049316, "learning_rate": 1.349739747303258e-07, "loss": 0.4803, "step": 28951 }, { "epoch": 94.92459016393443, "grad_norm": 4.849959850311279, "learning_rate": 1.3480015104837207e-07, "loss": 0.3865, "step": 28952 }, { "epoch": 94.92786885245901, "grad_norm": 4.4196648597717285, "learning_rate": 1.3462643860680414e-07, "loss": 0.1638, "step": 28953 }, { "epoch": 94.9311475409836, "grad_norm": 4.075146198272705, "learning_rate": 1.344528374075793e-07, "loss": 0.3437, "step": 28954 }, { "epoch": 94.93442622950819, "grad_norm": 5.263269424438477, "learning_rate": 1.3427934745265713e-07, "loss": 0.2646, "step": 28955 }, { "epoch": 94.9377049180328, "grad_norm": 10.404006004333496, "learning_rate": 1.3410596874399273e-07, "loss": 0.3735, "step": 28956 }, { "epoch": 94.94098360655738, "grad_norm": 10.179888725280762, "learning_rate": 1.339327012835423e-07, "loss": 0.3549, "step": 28957 }, { "epoch": 94.94426229508197, "grad_norm": 6.124549388885498, "learning_rate": 1.3375954507325762e-07, "loss": 0.3631, "step": 28958 }, { "epoch": 94.94754098360656, "grad_norm": 9.914691925048828, "learning_rate": 1.3358650011509267e-07, "loss": 0.5311, "step": 28959 }, { "epoch": 94.95081967213115, "grad_norm": 4.2936906814575195, "learning_rate": 1.3341356641099923e-07, "loss": 0.4484, "step": 28960 }, { "epoch": 94.95409836065573, "grad_norm": 8.04621410369873, "learning_rate": 1.3324074396292465e-07, "loss": 0.4132, "step": 28961 }, { "epoch": 94.95737704918032, "grad_norm": 6.0378289222717285, "learning_rate": 1.3306803277282176e-07, "loss": 0.1509, "step": 28962 }, { "epoch": 94.96065573770491, "grad_norm": 4.6940083503723145, "learning_rate": 1.3289543284263463e-07, "loss": 0.3613, "step": 28963 }, { "epoch": 94.96393442622951, "grad_norm": 8.281035423278809, "learning_rate": 1.3272294417431054e-07, "loss": 0.2797, "step": 28964 }, { "epoch": 94.9672131147541, "grad_norm": 7.112810134887695, "learning_rate": 1.325505667697957e-07, "loss": 0.4284, "step": 28965 }, { "epoch": 94.97049180327869, "grad_norm": 4.700309753417969, "learning_rate": 1.3237830063103197e-07, "loss": 0.216, "step": 28966 }, { "epoch": 94.97377049180328, "grad_norm": 4.810724258422852, "learning_rate": 1.3220614575996326e-07, "loss": 0.2722, "step": 28967 }, { "epoch": 94.97704918032787, "grad_norm": 4.768248558044434, "learning_rate": 1.3203410215852918e-07, "loss": 0.4105, "step": 28968 }, { "epoch": 94.98032786885246, "grad_norm": 6.800145626068115, "learning_rate": 1.318621698286715e-07, "loss": 0.4151, "step": 28969 }, { "epoch": 94.98360655737704, "grad_norm": 6.452073097229004, "learning_rate": 1.3169034877232867e-07, "loss": 0.185, "step": 28970 }, { "epoch": 94.98688524590163, "grad_norm": 9.055262565612793, "learning_rate": 1.315186389914369e-07, "loss": 0.5378, "step": 28971 }, { "epoch": 94.99016393442623, "grad_norm": 6.482059001922607, "learning_rate": 1.3134704048793246e-07, "loss": 0.3258, "step": 28972 }, { "epoch": 94.99344262295082, "grad_norm": 4.404629707336426, "learning_rate": 1.3117555326375264e-07, "loss": 0.5155, "step": 28973 }, { "epoch": 94.99672131147541, "grad_norm": 3.799287796020508, "learning_rate": 1.3100417732082816e-07, "loss": 0.1083, "step": 28974 }, { "epoch": 95.0, "grad_norm": 4.2154951095581055, "learning_rate": 1.30832912661093e-07, "loss": 0.3083, "step": 28975 }, { "epoch": 95.00327868852459, "grad_norm": 5.494478702545166, "learning_rate": 1.3066175928647785e-07, "loss": 0.4033, "step": 28976 }, { "epoch": 95.00655737704918, "grad_norm": 3.986898899078369, "learning_rate": 1.3049071719891339e-07, "loss": 0.1089, "step": 28977 }, { "epoch": 95.00983606557377, "grad_norm": 6.351156711578369, "learning_rate": 1.3031978640032806e-07, "loss": 0.3616, "step": 28978 }, { "epoch": 95.01311475409837, "grad_norm": 5.529893398284912, "learning_rate": 1.301489668926492e-07, "loss": 0.317, "step": 28979 }, { "epoch": 95.01639344262296, "grad_norm": 7.52565336227417, "learning_rate": 1.29978258677802e-07, "loss": 0.3266, "step": 28980 }, { "epoch": 95.01967213114754, "grad_norm": 4.681812763214111, "learning_rate": 1.2980766175771264e-07, "loss": 0.2899, "step": 28981 }, { "epoch": 95.02295081967213, "grad_norm": 4.874063491821289, "learning_rate": 1.2963717613430405e-07, "loss": 0.5124, "step": 28982 }, { "epoch": 95.02622950819672, "grad_norm": 9.682600975036621, "learning_rate": 1.2946680180949911e-07, "loss": 0.4823, "step": 28983 }, { "epoch": 95.02950819672131, "grad_norm": 4.165544033050537, "learning_rate": 1.2929653878521854e-07, "loss": 0.3673, "step": 28984 }, { "epoch": 95.0327868852459, "grad_norm": 5.7161993980407715, "learning_rate": 1.2912638706338188e-07, "loss": 0.298, "step": 28985 }, { "epoch": 95.03606557377049, "grad_norm": 3.5222489833831787, "learning_rate": 1.2895634664590984e-07, "loss": 0.172, "step": 28986 }, { "epoch": 95.03934426229509, "grad_norm": 4.956452369689941, "learning_rate": 1.2878641753471756e-07, "loss": 0.4066, "step": 28987 }, { "epoch": 95.04262295081968, "grad_norm": 4.000010013580322, "learning_rate": 1.2861659973172235e-07, "loss": 0.2971, "step": 28988 }, { "epoch": 95.04590163934427, "grad_norm": 4.424182415008545, "learning_rate": 1.2844689323883719e-07, "loss": 0.4825, "step": 28989 }, { "epoch": 95.04918032786885, "grad_norm": 4.091799736022949, "learning_rate": 1.282772980579783e-07, "loss": 0.3748, "step": 28990 }, { "epoch": 95.05245901639344, "grad_norm": 4.011523723602295, "learning_rate": 1.2810781419105745e-07, "loss": 0.289, "step": 28991 }, { "epoch": 95.05573770491803, "grad_norm": 5.316111087799072, "learning_rate": 1.2793844163998427e-07, "loss": 0.5816, "step": 28992 }, { "epoch": 95.05901639344262, "grad_norm": 4.692544460296631, "learning_rate": 1.2776918040666941e-07, "loss": 0.1868, "step": 28993 }, { "epoch": 95.0622950819672, "grad_norm": 5.246373653411865, "learning_rate": 1.2760003049302138e-07, "loss": 0.4639, "step": 28994 }, { "epoch": 95.06557377049181, "grad_norm": 5.04470157623291, "learning_rate": 1.2743099190094865e-07, "loss": 0.2076, "step": 28995 }, { "epoch": 95.0688524590164, "grad_norm": 7.504501819610596, "learning_rate": 1.2726206463235635e-07, "loss": 0.4313, "step": 28996 }, { "epoch": 95.07213114754099, "grad_norm": 4.340944290161133, "learning_rate": 1.270932486891485e-07, "loss": 0.4074, "step": 28997 }, { "epoch": 95.07540983606557, "grad_norm": 3.8312571048736572, "learning_rate": 1.269245440732303e-07, "loss": 0.363, "step": 28998 }, { "epoch": 95.07868852459016, "grad_norm": 9.001770973205566, "learning_rate": 1.267559507865024e-07, "loss": 0.3632, "step": 28999 }, { "epoch": 95.08196721311475, "grad_norm": 4.207843780517578, "learning_rate": 1.265874688308677e-07, "loss": 0.3426, "step": 29000 }, { "epoch": 95.08524590163934, "grad_norm": 4.718076705932617, "learning_rate": 1.2641909820822473e-07, "loss": 0.4503, "step": 29001 }, { "epoch": 95.08852459016393, "grad_norm": 4.1792073249816895, "learning_rate": 1.2625083892047195e-07, "loss": 0.1885, "step": 29002 }, { "epoch": 95.09180327868853, "grad_norm": 4.226088523864746, "learning_rate": 1.2608269096950787e-07, "loss": 0.37, "step": 29003 }, { "epoch": 95.09508196721312, "grad_norm": 3.8143084049224854, "learning_rate": 1.259146543572276e-07, "loss": 0.3134, "step": 29004 }, { "epoch": 95.09836065573771, "grad_norm": 4.5010085105896, "learning_rate": 1.257467290855263e-07, "loss": 0.2461, "step": 29005 }, { "epoch": 95.1016393442623, "grad_norm": 4.572776794433594, "learning_rate": 1.2557891515629695e-07, "loss": 0.4703, "step": 29006 }, { "epoch": 95.10491803278688, "grad_norm": 4.257676124572754, "learning_rate": 1.254112125714313e-07, "loss": 0.2527, "step": 29007 }, { "epoch": 95.10819672131147, "grad_norm": 4.074419021606445, "learning_rate": 1.252436213328223e-07, "loss": 0.4031, "step": 29008 }, { "epoch": 95.11147540983606, "grad_norm": 4.243396759033203, "learning_rate": 1.2507614144235847e-07, "loss": 0.2942, "step": 29009 }, { "epoch": 95.11475409836065, "grad_norm": 4.262994766235352, "learning_rate": 1.2490877290192827e-07, "loss": 0.4121, "step": 29010 }, { "epoch": 95.11803278688525, "grad_norm": 4.163778781890869, "learning_rate": 1.247415157134191e-07, "loss": 0.239, "step": 29011 }, { "epoch": 95.12131147540984, "grad_norm": 6.334698677062988, "learning_rate": 1.2457436987871717e-07, "loss": 0.3478, "step": 29012 }, { "epoch": 95.12459016393443, "grad_norm": 4.942314147949219, "learning_rate": 1.2440733539970662e-07, "loss": 0.4412, "step": 29013 }, { "epoch": 95.12786885245902, "grad_norm": 4.110652923583984, "learning_rate": 1.2424041227827144e-07, "loss": 0.1516, "step": 29014 }, { "epoch": 95.1311475409836, "grad_norm": 4.848623275756836, "learning_rate": 1.2407360051629457e-07, "loss": 0.4284, "step": 29015 }, { "epoch": 95.1344262295082, "grad_norm": 4.590473651885986, "learning_rate": 1.2390690011565566e-07, "loss": 0.3383, "step": 29016 }, { "epoch": 95.13770491803278, "grad_norm": 5.19282865524292, "learning_rate": 1.2374031107823536e-07, "loss": 0.4906, "step": 29017 }, { "epoch": 95.14098360655737, "grad_norm": 6.545367240905762, "learning_rate": 1.235738334059122e-07, "loss": 0.2983, "step": 29018 }, { "epoch": 95.14426229508197, "grad_norm": 5.854670524597168, "learning_rate": 1.2340746710056252e-07, "loss": 0.3015, "step": 29019 }, { "epoch": 95.14754098360656, "grad_norm": 5.324728488922119, "learning_rate": 1.2324121216406137e-07, "loss": 0.4923, "step": 29020 }, { "epoch": 95.15081967213115, "grad_norm": 5.998490333557129, "learning_rate": 1.2307506859828623e-07, "loss": 0.3985, "step": 29021 }, { "epoch": 95.15409836065574, "grad_norm": 4.328784942626953, "learning_rate": 1.2290903640510998e-07, "loss": 0.3664, "step": 29022 }, { "epoch": 95.15737704918033, "grad_norm": 11.931760787963867, "learning_rate": 1.2274311558640228e-07, "loss": 0.5454, "step": 29023 }, { "epoch": 95.16065573770491, "grad_norm": 4.079286098480225, "learning_rate": 1.2257730614403607e-07, "loss": 0.4152, "step": 29024 }, { "epoch": 95.1639344262295, "grad_norm": 3.997143268585205, "learning_rate": 1.2241160807988207e-07, "loss": 0.2837, "step": 29025 }, { "epoch": 95.1672131147541, "grad_norm": 9.359911918640137, "learning_rate": 1.2224602139580544e-07, "loss": 0.2585, "step": 29026 }, { "epoch": 95.1704918032787, "grad_norm": 5.0427937507629395, "learning_rate": 1.2208054609367693e-07, "loss": 0.4482, "step": 29027 }, { "epoch": 95.17377049180328, "grad_norm": 5.592493057250977, "learning_rate": 1.2191518217535947e-07, "loss": 0.3153, "step": 29028 }, { "epoch": 95.17704918032787, "grad_norm": 3.6365792751312256, "learning_rate": 1.2174992964271936e-07, "loss": 0.4362, "step": 29029 }, { "epoch": 95.18032786885246, "grad_norm": 3.880434036254883, "learning_rate": 1.2158478849761956e-07, "loss": 0.1689, "step": 29030 }, { "epoch": 95.18360655737705, "grad_norm": 5.185122966766357, "learning_rate": 1.214197587419219e-07, "loss": 0.2834, "step": 29031 }, { "epoch": 95.18688524590164, "grad_norm": 4.9849958419799805, "learning_rate": 1.2125484037748824e-07, "loss": 0.2852, "step": 29032 }, { "epoch": 95.19016393442622, "grad_norm": 4.696383953094482, "learning_rate": 1.210900334061771e-07, "loss": 0.407, "step": 29033 }, { "epoch": 95.19344262295083, "grad_norm": 4.587979316711426, "learning_rate": 1.2092533782984806e-07, "loss": 0.5187, "step": 29034 }, { "epoch": 95.19672131147541, "grad_norm": 11.745782852172852, "learning_rate": 1.2076075365035633e-07, "loss": 0.3508, "step": 29035 }, { "epoch": 95.2, "grad_norm": 3.926635503768921, "learning_rate": 1.2059628086956044e-07, "loss": 0.5257, "step": 29036 }, { "epoch": 95.20327868852459, "grad_norm": 5.23092794418335, "learning_rate": 1.2043191948931222e-07, "loss": 0.4682, "step": 29037 }, { "epoch": 95.20655737704918, "grad_norm": 4.9448981285095215, "learning_rate": 1.2026766951146684e-07, "loss": 0.4473, "step": 29038 }, { "epoch": 95.20983606557377, "grad_norm": 8.263334274291992, "learning_rate": 1.2010353093787508e-07, "loss": 0.2682, "step": 29039 }, { "epoch": 95.21311475409836, "grad_norm": 6.155246734619141, "learning_rate": 1.1993950377038988e-07, "loss": 0.3654, "step": 29040 }, { "epoch": 95.21639344262294, "grad_norm": 5.3775153160095215, "learning_rate": 1.1977558801085755e-07, "loss": 0.3125, "step": 29041 }, { "epoch": 95.21967213114755, "grad_norm": 4.111899375915527, "learning_rate": 1.1961178366112992e-07, "loss": 0.2698, "step": 29042 }, { "epoch": 95.22295081967214, "grad_norm": 6.083956241607666, "learning_rate": 1.1944809072305219e-07, "loss": 0.3491, "step": 29043 }, { "epoch": 95.22622950819672, "grad_norm": 3.981823444366455, "learning_rate": 1.192845091984707e-07, "loss": 0.4582, "step": 29044 }, { "epoch": 95.22950819672131, "grad_norm": 3.839629888534546, "learning_rate": 1.1912103908922945e-07, "loss": 0.4837, "step": 29045 }, { "epoch": 95.2327868852459, "grad_norm": 5.104165077209473, "learning_rate": 1.1895768039717149e-07, "loss": 0.5759, "step": 29046 }, { "epoch": 95.23606557377049, "grad_norm": 4.649000644683838, "learning_rate": 1.1879443312413974e-07, "loss": 0.483, "step": 29047 }, { "epoch": 95.23934426229508, "grad_norm": 5.5846757888793945, "learning_rate": 1.1863129727197498e-07, "loss": 0.3799, "step": 29048 }, { "epoch": 95.24262295081967, "grad_norm": 9.092968940734863, "learning_rate": 1.1846827284251571e-07, "loss": 0.35, "step": 29049 }, { "epoch": 95.24590163934427, "grad_norm": 4.827281951904297, "learning_rate": 1.183053598376005e-07, "loss": 0.3936, "step": 29050 }, { "epoch": 95.24918032786886, "grad_norm": 9.215324401855469, "learning_rate": 1.1814255825906785e-07, "loss": 0.2792, "step": 29051 }, { "epoch": 95.25245901639344, "grad_norm": 4.3252363204956055, "learning_rate": 1.1797986810875184e-07, "loss": 0.3433, "step": 29052 }, { "epoch": 95.25573770491803, "grad_norm": 3.8532023429870605, "learning_rate": 1.178172893884888e-07, "loss": 0.3694, "step": 29053 }, { "epoch": 95.25901639344262, "grad_norm": 4.804018020629883, "learning_rate": 1.1765482210010837e-07, "loss": 0.4783, "step": 29054 }, { "epoch": 95.26229508196721, "grad_norm": 4.255213737487793, "learning_rate": 1.1749246624544686e-07, "loss": 0.4514, "step": 29055 }, { "epoch": 95.2655737704918, "grad_norm": 4.975972652435303, "learning_rate": 1.1733022182633169e-07, "loss": 0.3695, "step": 29056 }, { "epoch": 95.26885245901639, "grad_norm": 3.893084764480591, "learning_rate": 1.1716808884459475e-07, "loss": 0.2341, "step": 29057 }, { "epoch": 95.27213114754099, "grad_norm": 5.606224536895752, "learning_rate": 1.1700606730206344e-07, "loss": 0.4382, "step": 29058 }, { "epoch": 95.27540983606558, "grad_norm": 4.421648979187012, "learning_rate": 1.1684415720056297e-07, "loss": 0.3658, "step": 29059 }, { "epoch": 95.27868852459017, "grad_norm": 3.9799020290374756, "learning_rate": 1.1668235854192189e-07, "loss": 0.2058, "step": 29060 }, { "epoch": 95.28196721311475, "grad_norm": 4.21933126449585, "learning_rate": 1.1652067132796208e-07, "loss": 0.4157, "step": 29061 }, { "epoch": 95.28524590163934, "grad_norm": 6.046544551849365, "learning_rate": 1.1635909556050873e-07, "loss": 0.3669, "step": 29062 }, { "epoch": 95.28852459016393, "grad_norm": 3.5678603649139404, "learning_rate": 1.1619763124138261e-07, "loss": 0.1408, "step": 29063 }, { "epoch": 95.29180327868852, "grad_norm": 4.979043006896973, "learning_rate": 1.160362783724056e-07, "loss": 0.4434, "step": 29064 }, { "epoch": 95.29508196721312, "grad_norm": 3.9000959396362305, "learning_rate": 1.1587503695539515e-07, "loss": 0.3362, "step": 29065 }, { "epoch": 95.29836065573771, "grad_norm": 5.762870788574219, "learning_rate": 1.157139069921709e-07, "loss": 0.3006, "step": 29066 }, { "epoch": 95.3016393442623, "grad_norm": 5.374533176422119, "learning_rate": 1.1555288848455026e-07, "loss": 0.2198, "step": 29067 }, { "epoch": 95.30491803278689, "grad_norm": 4.11700439453125, "learning_rate": 1.1539198143434738e-07, "loss": 0.4347, "step": 29068 }, { "epoch": 95.30819672131148, "grad_norm": 6.105214595794678, "learning_rate": 1.1523118584337745e-07, "loss": 0.3694, "step": 29069 }, { "epoch": 95.31147540983606, "grad_norm": 3.6953303813934326, "learning_rate": 1.1507050171345236e-07, "loss": 0.1775, "step": 29070 }, { "epoch": 95.31475409836065, "grad_norm": 5.104805946350098, "learning_rate": 1.1490992904638732e-07, "loss": 0.3935, "step": 29071 }, { "epoch": 95.31803278688524, "grad_norm": 5.090473651885986, "learning_rate": 1.1474946784398977e-07, "loss": 0.3614, "step": 29072 }, { "epoch": 95.32131147540984, "grad_norm": 5.1912078857421875, "learning_rate": 1.1458911810806939e-07, "loss": 0.2396, "step": 29073 }, { "epoch": 95.32459016393443, "grad_norm": 6.600139141082764, "learning_rate": 1.1442887984043472e-07, "loss": 0.4092, "step": 29074 }, { "epoch": 95.32786885245902, "grad_norm": 4.343796730041504, "learning_rate": 1.1426875304289431e-07, "loss": 0.2561, "step": 29075 }, { "epoch": 95.33114754098361, "grad_norm": 4.838343620300293, "learning_rate": 1.1410873771725117e-07, "loss": 0.5129, "step": 29076 }, { "epoch": 95.3344262295082, "grad_norm": 4.8704962730407715, "learning_rate": 1.1394883386531053e-07, "loss": 0.5523, "step": 29077 }, { "epoch": 95.33770491803278, "grad_norm": 6.228994846343994, "learning_rate": 1.1378904148887648e-07, "loss": 0.2365, "step": 29078 }, { "epoch": 95.34098360655737, "grad_norm": 5.817795753479004, "learning_rate": 1.1362936058975094e-07, "loss": 0.4698, "step": 29079 }, { "epoch": 95.34426229508196, "grad_norm": 4.737224578857422, "learning_rate": 1.1346979116973134e-07, "loss": 0.3619, "step": 29080 }, { "epoch": 95.34754098360656, "grad_norm": 8.497461318969727, "learning_rate": 1.1331033323062068e-07, "loss": 0.2711, "step": 29081 }, { "epoch": 95.35081967213115, "grad_norm": 4.029172420501709, "learning_rate": 1.1315098677421643e-07, "loss": 0.3631, "step": 29082 }, { "epoch": 95.35409836065574, "grad_norm": 4.797717094421387, "learning_rate": 1.129917518023127e-07, "loss": 0.2581, "step": 29083 }, { "epoch": 95.35737704918033, "grad_norm": 5.031069278717041, "learning_rate": 1.1283262831670804e-07, "loss": 0.2238, "step": 29084 }, { "epoch": 95.36065573770492, "grad_norm": 4.62436056137085, "learning_rate": 1.1267361631919549e-07, "loss": 0.5109, "step": 29085 }, { "epoch": 95.3639344262295, "grad_norm": 4.789943695068359, "learning_rate": 1.1251471581156803e-07, "loss": 0.4214, "step": 29086 }, { "epoch": 95.3672131147541, "grad_norm": 4.574680805206299, "learning_rate": 1.1235592679561757e-07, "loss": 0.4516, "step": 29087 }, { "epoch": 95.37049180327868, "grad_norm": 4.82239294052124, "learning_rate": 1.121972492731338e-07, "loss": 0.3498, "step": 29088 }, { "epoch": 95.37377049180328, "grad_norm": 5.15269136428833, "learning_rate": 1.120386832459075e-07, "loss": 0.3161, "step": 29089 }, { "epoch": 95.37704918032787, "grad_norm": 4.911247730255127, "learning_rate": 1.1188022871572612e-07, "loss": 0.3812, "step": 29090 }, { "epoch": 95.38032786885246, "grad_norm": 4.172164440155029, "learning_rate": 1.1172188568437603e-07, "loss": 0.2271, "step": 29091 }, { "epoch": 95.38360655737705, "grad_norm": 3.6777617931365967, "learning_rate": 1.1156365415364357e-07, "loss": 0.1903, "step": 29092 }, { "epoch": 95.38688524590164, "grad_norm": 4.685757637023926, "learning_rate": 1.1140553412531064e-07, "loss": 0.4012, "step": 29093 }, { "epoch": 95.39016393442623, "grad_norm": 4.616953372955322, "learning_rate": 1.1124752560116247e-07, "loss": 0.3908, "step": 29094 }, { "epoch": 95.39344262295081, "grad_norm": 6.0236711502075195, "learning_rate": 1.11089628582981e-07, "loss": 0.5586, "step": 29095 }, { "epoch": 95.3967213114754, "grad_norm": 5.699246406555176, "learning_rate": 1.109318430725459e-07, "loss": 0.5644, "step": 29096 }, { "epoch": 95.4, "grad_norm": 5.902668476104736, "learning_rate": 1.1077416907163573e-07, "loss": 0.289, "step": 29097 }, { "epoch": 95.4032786885246, "grad_norm": 14.625748634338379, "learning_rate": 1.1061660658202911e-07, "loss": 0.4496, "step": 29098 }, { "epoch": 95.40655737704918, "grad_norm": 4.902900218963623, "learning_rate": 1.1045915560550235e-07, "loss": 0.3384, "step": 29099 }, { "epoch": 95.40983606557377, "grad_norm": 5.548234462738037, "learning_rate": 1.1030181614383184e-07, "loss": 0.4644, "step": 29100 }, { "epoch": 95.41311475409836, "grad_norm": 4.773684024810791, "learning_rate": 1.101445881987906e-07, "loss": 0.5392, "step": 29101 }, { "epoch": 95.41639344262295, "grad_norm": 4.196001052856445, "learning_rate": 1.0998747177215163e-07, "loss": 0.4399, "step": 29102 }, { "epoch": 95.41967213114754, "grad_norm": 4.550593852996826, "learning_rate": 1.09830466865688e-07, "loss": 0.2681, "step": 29103 }, { "epoch": 95.42295081967212, "grad_norm": 6.116679668426514, "learning_rate": 1.0967357348116826e-07, "loss": 0.397, "step": 29104 }, { "epoch": 95.42622950819673, "grad_norm": 4.34246301651001, "learning_rate": 1.0951679162036322e-07, "loss": 0.3861, "step": 29105 }, { "epoch": 95.42950819672132, "grad_norm": 6.214404582977295, "learning_rate": 1.0936012128503815e-07, "loss": 0.4841, "step": 29106 }, { "epoch": 95.4327868852459, "grad_norm": 4.558784008026123, "learning_rate": 1.0920356247696273e-07, "loss": 0.4186, "step": 29107 }, { "epoch": 95.43606557377049, "grad_norm": 4.683135032653809, "learning_rate": 1.0904711519790113e-07, "loss": 0.5823, "step": 29108 }, { "epoch": 95.43934426229508, "grad_norm": 5.588489532470703, "learning_rate": 1.0889077944961635e-07, "loss": 0.366, "step": 29109 }, { "epoch": 95.44262295081967, "grad_norm": 7.507221698760986, "learning_rate": 1.0873455523387366e-07, "loss": 0.394, "step": 29110 }, { "epoch": 95.44590163934426, "grad_norm": 4.671269416809082, "learning_rate": 1.0857844255243167e-07, "loss": 0.2649, "step": 29111 }, { "epoch": 95.44918032786886, "grad_norm": 4.56680965423584, "learning_rate": 1.0842244140705338e-07, "loss": 0.4143, "step": 29112 }, { "epoch": 95.45245901639345, "grad_norm": 3.9836885929107666, "learning_rate": 1.082665517994963e-07, "loss": 0.4344, "step": 29113 }, { "epoch": 95.45573770491804, "grad_norm": 5.28015661239624, "learning_rate": 1.0811077373151791e-07, "loss": 0.4476, "step": 29114 }, { "epoch": 95.45901639344262, "grad_norm": 4.869386196136475, "learning_rate": 1.0795510720487568e-07, "loss": 0.2101, "step": 29115 }, { "epoch": 95.46229508196721, "grad_norm": 4.4697489738464355, "learning_rate": 1.0779955222132599e-07, "loss": 0.3669, "step": 29116 }, { "epoch": 95.4655737704918, "grad_norm": 4.82198429107666, "learning_rate": 1.0764410878262077e-07, "loss": 0.298, "step": 29117 }, { "epoch": 95.46885245901639, "grad_norm": 4.536969184875488, "learning_rate": 1.0748877689051418e-07, "loss": 0.4172, "step": 29118 }, { "epoch": 95.47213114754098, "grad_norm": 7.470262050628662, "learning_rate": 1.0733355654675703e-07, "loss": 0.2547, "step": 29119 }, { "epoch": 95.47540983606558, "grad_norm": 4.9584221839904785, "learning_rate": 1.0717844775309905e-07, "loss": 0.4717, "step": 29120 }, { "epoch": 95.47868852459017, "grad_norm": 3.9643282890319824, "learning_rate": 1.0702345051129104e-07, "loss": 0.2659, "step": 29121 }, { "epoch": 95.48196721311476, "grad_norm": 6.396407127380371, "learning_rate": 1.068685648230794e-07, "loss": 0.2655, "step": 29122 }, { "epoch": 95.48524590163935, "grad_norm": 4.43254280090332, "learning_rate": 1.0671379069021048e-07, "loss": 0.3028, "step": 29123 }, { "epoch": 95.48852459016393, "grad_norm": 5.468761920928955, "learning_rate": 1.0655912811443069e-07, "loss": 0.4142, "step": 29124 }, { "epoch": 95.49180327868852, "grad_norm": 3.794457197189331, "learning_rate": 1.0640457709748308e-07, "loss": 0.2362, "step": 29125 }, { "epoch": 95.49508196721311, "grad_norm": 6.571035385131836, "learning_rate": 1.062501376411107e-07, "loss": 0.4793, "step": 29126 }, { "epoch": 95.4983606557377, "grad_norm": 4.028399467468262, "learning_rate": 1.0609580974705547e-07, "loss": 0.2136, "step": 29127 }, { "epoch": 95.5016393442623, "grad_norm": 6.478455066680908, "learning_rate": 1.0594159341705601e-07, "loss": 0.4121, "step": 29128 }, { "epoch": 95.50491803278689, "grad_norm": 4.556858062744141, "learning_rate": 1.0578748865285315e-07, "loss": 0.2957, "step": 29129 }, { "epoch": 95.50819672131148, "grad_norm": 4.0197434425354, "learning_rate": 1.0563349545618329e-07, "loss": 0.4364, "step": 29130 }, { "epoch": 95.51147540983607, "grad_norm": 5.118119239807129, "learning_rate": 1.0547961382878391e-07, "loss": 0.3526, "step": 29131 }, { "epoch": 95.51475409836065, "grad_norm": 4.697446823120117, "learning_rate": 1.0532584377238808e-07, "loss": 0.4939, "step": 29132 }, { "epoch": 95.51803278688524, "grad_norm": 5.664710521697998, "learning_rate": 1.051721852887333e-07, "loss": 0.1573, "step": 29133 }, { "epoch": 95.52131147540983, "grad_norm": 5.393441200256348, "learning_rate": 1.0501863837954929e-07, "loss": 0.3418, "step": 29134 }, { "epoch": 95.52459016393442, "grad_norm": 5.816054821014404, "learning_rate": 1.04865203046568e-07, "loss": 0.2645, "step": 29135 }, { "epoch": 95.52786885245902, "grad_norm": 4.4057936668396, "learning_rate": 1.0471187929152027e-07, "loss": 0.5145, "step": 29136 }, { "epoch": 95.53114754098361, "grad_norm": 5.133961200714111, "learning_rate": 1.0455866711613472e-07, "loss": 0.2584, "step": 29137 }, { "epoch": 95.5344262295082, "grad_norm": 4.231428146362305, "learning_rate": 1.0440556652213885e-07, "loss": 0.6021, "step": 29138 }, { "epoch": 95.53770491803279, "grad_norm": 4.393795490264893, "learning_rate": 1.0425257751125906e-07, "loss": 0.3726, "step": 29139 }, { "epoch": 95.54098360655738, "grad_norm": 4.243871212005615, "learning_rate": 1.0409970008522063e-07, "loss": 0.3967, "step": 29140 }, { "epoch": 95.54426229508196, "grad_norm": 4.195668697357178, "learning_rate": 1.0394693424574554e-07, "loss": 0.1678, "step": 29141 }, { "epoch": 95.54754098360655, "grad_norm": 5.212817668914795, "learning_rate": 1.0379427999456015e-07, "loss": 0.1949, "step": 29142 }, { "epoch": 95.55081967213114, "grad_norm": 8.498499870300293, "learning_rate": 1.0364173733338312e-07, "loss": 0.5994, "step": 29143 }, { "epoch": 95.55409836065574, "grad_norm": 7.088330268859863, "learning_rate": 1.0348930626393527e-07, "loss": 0.2033, "step": 29144 }, { "epoch": 95.55737704918033, "grad_norm": 6.4899001121521, "learning_rate": 1.0333698678793413e-07, "loss": 0.3195, "step": 29145 }, { "epoch": 95.56065573770492, "grad_norm": 4.317991256713867, "learning_rate": 1.0318477890709944e-07, "loss": 0.5391, "step": 29146 }, { "epoch": 95.56393442622951, "grad_norm": 6.085321426391602, "learning_rate": 1.0303268262314647e-07, "loss": 0.5623, "step": 29147 }, { "epoch": 95.5672131147541, "grad_norm": 4.711657524108887, "learning_rate": 1.0288069793779053e-07, "loss": 0.4, "step": 29148 }, { "epoch": 95.57049180327868, "grad_norm": 5.19561243057251, "learning_rate": 1.0272882485274472e-07, "loss": 0.3493, "step": 29149 }, { "epoch": 95.57377049180327, "grad_norm": 6.186091899871826, "learning_rate": 1.0257706336972207e-07, "loss": 0.4576, "step": 29150 }, { "epoch": 95.57704918032788, "grad_norm": 7.714240550994873, "learning_rate": 1.0242541349043345e-07, "loss": 0.212, "step": 29151 }, { "epoch": 95.58032786885246, "grad_norm": 4.67858362197876, "learning_rate": 1.0227387521658972e-07, "loss": 0.2267, "step": 29152 }, { "epoch": 95.58360655737705, "grad_norm": 6.793328762054443, "learning_rate": 1.021224485498995e-07, "loss": 0.337, "step": 29153 }, { "epoch": 95.58688524590164, "grad_norm": 4.565058708190918, "learning_rate": 1.0197113349206922e-07, "loss": 0.1694, "step": 29154 }, { "epoch": 95.59016393442623, "grad_norm": 4.996511459350586, "learning_rate": 1.0181993004480528e-07, "loss": 0.5612, "step": 29155 }, { "epoch": 95.59344262295082, "grad_norm": 6.183237075805664, "learning_rate": 1.016688382098141e-07, "loss": 0.6187, "step": 29156 }, { "epoch": 95.5967213114754, "grad_norm": 5.2433061599731445, "learning_rate": 1.0151785798879877e-07, "loss": 0.4258, "step": 29157 }, { "epoch": 95.6, "grad_norm": 4.250023365020752, "learning_rate": 1.0136698938346012e-07, "loss": 0.2613, "step": 29158 }, { "epoch": 95.6032786885246, "grad_norm": 5.119043350219727, "learning_rate": 1.0121623239550126e-07, "loss": 0.3269, "step": 29159 }, { "epoch": 95.60655737704919, "grad_norm": 13.242341995239258, "learning_rate": 1.0106558702662195e-07, "loss": 0.3369, "step": 29160 }, { "epoch": 95.60983606557377, "grad_norm": 4.416547775268555, "learning_rate": 1.0091505327851969e-07, "loss": 0.48, "step": 29161 }, { "epoch": 95.61311475409836, "grad_norm": 5.271796703338623, "learning_rate": 1.0076463115289314e-07, "loss": 0.5056, "step": 29162 }, { "epoch": 95.61639344262295, "grad_norm": 3.710036277770996, "learning_rate": 1.0061432065143761e-07, "loss": 0.2717, "step": 29163 }, { "epoch": 95.61967213114754, "grad_norm": 3.950892210006714, "learning_rate": 1.0046412177584841e-07, "loss": 0.4008, "step": 29164 }, { "epoch": 95.62295081967213, "grad_norm": 4.07305908203125, "learning_rate": 1.0031403452781974e-07, "loss": 0.3771, "step": 29165 }, { "epoch": 95.62622950819672, "grad_norm": 5.2592620849609375, "learning_rate": 1.0016405890904358e-07, "loss": 0.3443, "step": 29166 }, { "epoch": 95.62950819672132, "grad_norm": 5.483820915222168, "learning_rate": 1.0001419492120967e-07, "loss": 0.19, "step": 29167 }, { "epoch": 95.6327868852459, "grad_norm": 18.37104606628418, "learning_rate": 9.986444256601002e-08, "loss": 0.3792, "step": 29168 }, { "epoch": 95.6360655737705, "grad_norm": 4.5306267738342285, "learning_rate": 9.971480184513216e-08, "loss": 0.2677, "step": 29169 }, { "epoch": 95.63934426229508, "grad_norm": 7.310753345489502, "learning_rate": 9.956527276026473e-08, "loss": 0.3568, "step": 29170 }, { "epoch": 95.64262295081967, "grad_norm": 5.202828407287598, "learning_rate": 9.941585531309084e-08, "loss": 0.3931, "step": 29171 }, { "epoch": 95.64590163934426, "grad_norm": 14.152257919311523, "learning_rate": 9.926654950529801e-08, "loss": 0.3758, "step": 29172 }, { "epoch": 95.64918032786885, "grad_norm": 5.994466304779053, "learning_rate": 9.911735533856937e-08, "loss": 0.3482, "step": 29173 }, { "epoch": 95.65245901639344, "grad_norm": 4.3209075927734375, "learning_rate": 9.896827281458687e-08, "loss": 0.2265, "step": 29174 }, { "epoch": 95.65573770491804, "grad_norm": 3.861253261566162, "learning_rate": 9.881930193503031e-08, "loss": 0.3597, "step": 29175 }, { "epoch": 95.65901639344263, "grad_norm": 4.835362434387207, "learning_rate": 9.867044270158167e-08, "loss": 0.3244, "step": 29176 }, { "epoch": 95.66229508196722, "grad_norm": 4.859875679016113, "learning_rate": 9.852169511591957e-08, "loss": 0.4421, "step": 29177 }, { "epoch": 95.6655737704918, "grad_norm": 4.107612133026123, "learning_rate": 9.83730591797183e-08, "loss": 0.4123, "step": 29178 }, { "epoch": 95.66885245901639, "grad_norm": 5.429717063903809, "learning_rate": 9.822453489465756e-08, "loss": 0.4831, "step": 29179 }, { "epoch": 95.67213114754098, "grad_norm": 7.704285621643066, "learning_rate": 9.807612226240937e-08, "loss": 0.5222, "step": 29180 }, { "epoch": 95.67540983606557, "grad_norm": 9.209016799926758, "learning_rate": 9.792782128464906e-08, "loss": 0.389, "step": 29181 }, { "epoch": 95.67868852459016, "grad_norm": 4.115417957305908, "learning_rate": 9.777963196304752e-08, "loss": 0.5912, "step": 29182 }, { "epoch": 95.68196721311476, "grad_norm": 5.055752277374268, "learning_rate": 9.763155429927673e-08, "loss": 0.3647, "step": 29183 }, { "epoch": 95.68524590163935, "grad_norm": 4.630505561828613, "learning_rate": 9.748358829500648e-08, "loss": 0.3622, "step": 29184 }, { "epoch": 95.68852459016394, "grad_norm": 4.286777496337891, "learning_rate": 9.733573395190432e-08, "loss": 0.4014, "step": 29185 }, { "epoch": 95.69180327868852, "grad_norm": 5.5250396728515625, "learning_rate": 9.71879912716378e-08, "loss": 0.3689, "step": 29186 }, { "epoch": 95.69508196721311, "grad_norm": 4.309356212615967, "learning_rate": 9.704036025587338e-08, "loss": 0.3409, "step": 29187 }, { "epoch": 95.6983606557377, "grad_norm": 5.640630722045898, "learning_rate": 9.689284090627526e-08, "loss": 0.2982, "step": 29188 }, { "epoch": 95.70163934426229, "grad_norm": 4.766960144042969, "learning_rate": 9.674543322450658e-08, "loss": 0.3054, "step": 29189 }, { "epoch": 95.70491803278688, "grad_norm": 3.8801841735839844, "learning_rate": 9.659813721223044e-08, "loss": 0.3677, "step": 29190 }, { "epoch": 95.70819672131148, "grad_norm": 5.034511089324951, "learning_rate": 9.645095287110773e-08, "loss": 0.4588, "step": 29191 }, { "epoch": 95.71147540983607, "grad_norm": 3.962794065475464, "learning_rate": 9.630388020279713e-08, "loss": 0.2009, "step": 29192 }, { "epoch": 95.71475409836066, "grad_norm": 23.853639602661133, "learning_rate": 9.615691920895731e-08, "loss": 0.3082, "step": 29193 }, { "epoch": 95.71803278688525, "grad_norm": 5.8966898918151855, "learning_rate": 9.601006989124584e-08, "loss": 0.4383, "step": 29194 }, { "epoch": 95.72131147540983, "grad_norm": 5.015532970428467, "learning_rate": 9.586333225131916e-08, "loss": 0.6215, "step": 29195 }, { "epoch": 95.72459016393442, "grad_norm": 4.914747714996338, "learning_rate": 9.57167062908304e-08, "loss": 0.2271, "step": 29196 }, { "epoch": 95.72786885245901, "grad_norm": 5.5333356857299805, "learning_rate": 9.557019201143269e-08, "loss": 0.3779, "step": 29197 }, { "epoch": 95.73114754098361, "grad_norm": 6.343701362609863, "learning_rate": 9.542378941478025e-08, "loss": 0.6838, "step": 29198 }, { "epoch": 95.7344262295082, "grad_norm": 5.422243118286133, "learning_rate": 9.527749850252288e-08, "loss": 0.2599, "step": 29199 }, { "epoch": 95.73770491803279, "grad_norm": 6.043550968170166, "learning_rate": 9.513131927630925e-08, "loss": 0.4816, "step": 29200 }, { "epoch": 95.74098360655738, "grad_norm": 4.560357093811035, "learning_rate": 9.498525173778916e-08, "loss": 0.2655, "step": 29201 }, { "epoch": 95.74426229508197, "grad_norm": 4.896012783050537, "learning_rate": 9.483929588860907e-08, "loss": 0.2717, "step": 29202 }, { "epoch": 95.74754098360656, "grad_norm": 6.054658889770508, "learning_rate": 9.469345173041433e-08, "loss": 0.4007, "step": 29203 }, { "epoch": 95.75081967213114, "grad_norm": 4.891916275024414, "learning_rate": 9.45477192648503e-08, "loss": 0.3368, "step": 29204 }, { "epoch": 95.75409836065573, "grad_norm": 4.112546443939209, "learning_rate": 9.440209849355896e-08, "loss": 0.4432, "step": 29205 }, { "epoch": 95.75737704918033, "grad_norm": 4.0334696769714355, "learning_rate": 9.42565894181835e-08, "loss": 0.2186, "step": 29206 }, { "epoch": 95.76065573770492, "grad_norm": 5.237345218658447, "learning_rate": 9.411119204036478e-08, "loss": 0.4022, "step": 29207 }, { "epoch": 95.76393442622951, "grad_norm": 3.749540328979492, "learning_rate": 9.396590636174153e-08, "loss": 0.3438, "step": 29208 }, { "epoch": 95.7672131147541, "grad_norm": 10.620003700256348, "learning_rate": 9.38207323839524e-08, "loss": 0.278, "step": 29209 }, { "epoch": 95.77049180327869, "grad_norm": 8.304533004760742, "learning_rate": 9.367567010863387e-08, "loss": 0.2833, "step": 29210 }, { "epoch": 95.77377049180328, "grad_norm": 4.045644760131836, "learning_rate": 9.353071953742354e-08, "loss": 0.4158, "step": 29211 }, { "epoch": 95.77704918032786, "grad_norm": 4.205834865570068, "learning_rate": 9.338588067195342e-08, "loss": 0.2574, "step": 29212 }, { "epoch": 95.78032786885245, "grad_norm": 5.483211040496826, "learning_rate": 9.324115351385887e-08, "loss": 0.2884, "step": 29213 }, { "epoch": 95.78360655737706, "grad_norm": 5.101911544799805, "learning_rate": 9.30965380647697e-08, "loss": 0.2611, "step": 29214 }, { "epoch": 95.78688524590164, "grad_norm": 4.50754451751709, "learning_rate": 9.295203432631794e-08, "loss": 0.4831, "step": 29215 }, { "epoch": 95.79016393442623, "grad_norm": 5.798499584197998, "learning_rate": 9.280764230013229e-08, "loss": 0.403, "step": 29216 }, { "epoch": 95.79344262295082, "grad_norm": 5.288582801818848, "learning_rate": 9.266336198784254e-08, "loss": 0.2858, "step": 29217 }, { "epoch": 95.79672131147541, "grad_norm": 3.8745858669281006, "learning_rate": 9.251919339107407e-08, "loss": 0.4505, "step": 29218 }, { "epoch": 95.8, "grad_norm": 4.873561382293701, "learning_rate": 9.237513651145224e-08, "loss": 0.2531, "step": 29219 }, { "epoch": 95.80327868852459, "grad_norm": 4.944587707519531, "learning_rate": 9.223119135060244e-08, "loss": 0.3207, "step": 29220 }, { "epoch": 95.80655737704917, "grad_norm": 5.52690315246582, "learning_rate": 9.208735791014666e-08, "loss": 0.3926, "step": 29221 }, { "epoch": 95.80983606557378, "grad_norm": 5.246231555938721, "learning_rate": 9.19436361917092e-08, "loss": 0.5094, "step": 29222 }, { "epoch": 95.81311475409836, "grad_norm": 6.404362201690674, "learning_rate": 9.180002619690765e-08, "loss": 0.5033, "step": 29223 }, { "epoch": 95.81639344262295, "grad_norm": 4.392811298370361, "learning_rate": 9.165652792736291e-08, "loss": 0.4903, "step": 29224 }, { "epoch": 95.81967213114754, "grad_norm": 5.054882526397705, "learning_rate": 9.151314138469369e-08, "loss": 0.3836, "step": 29225 }, { "epoch": 95.82295081967213, "grad_norm": 3.247485399246216, "learning_rate": 9.136986657051538e-08, "loss": 0.1809, "step": 29226 }, { "epoch": 95.82622950819672, "grad_norm": 4.3535847663879395, "learning_rate": 9.122670348644447e-08, "loss": 0.3349, "step": 29227 }, { "epoch": 95.8295081967213, "grad_norm": 4.096735954284668, "learning_rate": 9.108365213409521e-08, "loss": 0.2877, "step": 29228 }, { "epoch": 95.8327868852459, "grad_norm": 3.6782140731811523, "learning_rate": 9.094071251508074e-08, "loss": 0.5709, "step": 29229 }, { "epoch": 95.8360655737705, "grad_norm": 4.452746868133545, "learning_rate": 9.079788463101091e-08, "loss": 0.2477, "step": 29230 }, { "epoch": 95.83934426229509, "grad_norm": 5.151628017425537, "learning_rate": 9.065516848349997e-08, "loss": 0.4778, "step": 29231 }, { "epoch": 95.84262295081967, "grad_norm": 5.397034168243408, "learning_rate": 9.051256407415443e-08, "loss": 0.5277, "step": 29232 }, { "epoch": 95.84590163934426, "grad_norm": 5.539165496826172, "learning_rate": 9.037007140458299e-08, "loss": 0.4316, "step": 29233 }, { "epoch": 95.84918032786885, "grad_norm": 3.6969692707061768, "learning_rate": 9.022769047639102e-08, "loss": 0.2034, "step": 29234 }, { "epoch": 95.85245901639344, "grad_norm": 11.804876327514648, "learning_rate": 9.008542129118725e-08, "loss": 0.6578, "step": 29235 }, { "epoch": 95.85573770491803, "grad_norm": 4.863147735595703, "learning_rate": 8.994326385057373e-08, "loss": 0.348, "step": 29236 }, { "epoch": 95.85901639344263, "grad_norm": 4.605045795440674, "learning_rate": 8.980121815615362e-08, "loss": 0.3895, "step": 29237 }, { "epoch": 95.86229508196722, "grad_norm": 4.853789806365967, "learning_rate": 8.965928420952785e-08, "loss": 0.6659, "step": 29238 }, { "epoch": 95.8655737704918, "grad_norm": 9.822726249694824, "learning_rate": 8.951746201229961e-08, "loss": 0.2561, "step": 29239 }, { "epoch": 95.8688524590164, "grad_norm": 4.2181782722473145, "learning_rate": 8.937575156606537e-08, "loss": 0.4483, "step": 29240 }, { "epoch": 95.87213114754098, "grad_norm": 4.375208854675293, "learning_rate": 8.923415287242387e-08, "loss": 0.3624, "step": 29241 }, { "epoch": 95.87540983606557, "grad_norm": 6.863297462463379, "learning_rate": 8.909266593297162e-08, "loss": 0.2743, "step": 29242 }, { "epoch": 95.87868852459016, "grad_norm": 4.669652938842773, "learning_rate": 8.895129074930509e-08, "loss": 0.2754, "step": 29243 }, { "epoch": 95.88196721311475, "grad_norm": 11.100214958190918, "learning_rate": 8.881002732301746e-08, "loss": 0.2521, "step": 29244 }, { "epoch": 95.88524590163935, "grad_norm": 4.993167877197266, "learning_rate": 8.866887565570192e-08, "loss": 0.2915, "step": 29245 }, { "epoch": 95.88852459016394, "grad_norm": 4.547791004180908, "learning_rate": 8.852783574894941e-08, "loss": 0.2205, "step": 29246 }, { "epoch": 95.89180327868853, "grad_norm": 6.567039966583252, "learning_rate": 8.838690760435198e-08, "loss": 0.3394, "step": 29247 }, { "epoch": 95.89508196721312, "grad_norm": 4.83657693862915, "learning_rate": 8.824609122349726e-08, "loss": 0.468, "step": 29248 }, { "epoch": 95.8983606557377, "grad_norm": 7.588086128234863, "learning_rate": 8.810538660797175e-08, "loss": 0.273, "step": 29249 }, { "epoch": 95.90163934426229, "grad_norm": 6.737919330596924, "learning_rate": 8.79647937593664e-08, "loss": 0.2448, "step": 29250 }, { "epoch": 95.90491803278688, "grad_norm": 5.112740516662598, "learning_rate": 8.782431267926216e-08, "loss": 0.5148, "step": 29251 }, { "epoch": 95.90819672131147, "grad_norm": 4.41801643371582, "learning_rate": 8.768394336924558e-08, "loss": 0.1725, "step": 29252 }, { "epoch": 95.91147540983607, "grad_norm": 4.566664218902588, "learning_rate": 8.754368583089978e-08, "loss": 0.5564, "step": 29253 }, { "epoch": 95.91475409836066, "grad_norm": 4.840692043304443, "learning_rate": 8.740354006580353e-08, "loss": 0.3513, "step": 29254 }, { "epoch": 95.91803278688525, "grad_norm": 4.012762069702148, "learning_rate": 8.72635060755389e-08, "loss": 0.2566, "step": 29255 }, { "epoch": 95.92131147540984, "grad_norm": 4.220468997955322, "learning_rate": 8.712358386168573e-08, "loss": 0.3315, "step": 29256 }, { "epoch": 95.92459016393443, "grad_norm": 5.473220348358154, "learning_rate": 8.698377342582165e-08, "loss": 0.4652, "step": 29257 }, { "epoch": 95.92786885245901, "grad_norm": 4.9471540451049805, "learning_rate": 8.684407476952095e-08, "loss": 0.2337, "step": 29258 }, { "epoch": 95.9311475409836, "grad_norm": 3.2611634731292725, "learning_rate": 8.670448789436126e-08, "loss": 0.2751, "step": 29259 }, { "epoch": 95.93442622950819, "grad_norm": 3.3332724571228027, "learning_rate": 8.656501280191576e-08, "loss": 0.2338, "step": 29260 }, { "epoch": 95.9377049180328, "grad_norm": 4.87367582321167, "learning_rate": 8.642564949375654e-08, "loss": 0.288, "step": 29261 }, { "epoch": 95.94098360655738, "grad_norm": 6.787545204162598, "learning_rate": 8.628639797145566e-08, "loss": 0.4099, "step": 29262 }, { "epoch": 95.94426229508197, "grad_norm": 7.821634769439697, "learning_rate": 8.614725823658409e-08, "loss": 0.2845, "step": 29263 }, { "epoch": 95.94754098360656, "grad_norm": 4.666257381439209, "learning_rate": 8.600823029070949e-08, "loss": 0.3732, "step": 29264 }, { "epoch": 95.95081967213115, "grad_norm": 5.64095401763916, "learning_rate": 8.586931413540056e-08, "loss": 0.2168, "step": 29265 }, { "epoch": 95.95409836065573, "grad_norm": 5.746804237365723, "learning_rate": 8.573050977222275e-08, "loss": 0.4479, "step": 29266 }, { "epoch": 95.95737704918032, "grad_norm": 10.29234790802002, "learning_rate": 8.559181720274145e-08, "loss": 0.3594, "step": 29267 }, { "epoch": 95.96065573770491, "grad_norm": 4.570930004119873, "learning_rate": 8.5453236428521e-08, "loss": 0.4624, "step": 29268 }, { "epoch": 95.96393442622951, "grad_norm": 4.510122776031494, "learning_rate": 8.531476745112454e-08, "loss": 0.3493, "step": 29269 }, { "epoch": 95.9672131147541, "grad_norm": 5.424053192138672, "learning_rate": 8.517641027211198e-08, "loss": 0.3677, "step": 29270 }, { "epoch": 95.97049180327869, "grad_norm": 6.676285743713379, "learning_rate": 8.503816489304429e-08, "loss": 0.3116, "step": 29271 }, { "epoch": 95.97377049180328, "grad_norm": 4.954893589019775, "learning_rate": 8.490003131548019e-08, "loss": 0.434, "step": 29272 }, { "epoch": 95.97704918032787, "grad_norm": 5.375720977783203, "learning_rate": 8.476200954097846e-08, "loss": 0.2925, "step": 29273 }, { "epoch": 95.98032786885246, "grad_norm": 4.350802421569824, "learning_rate": 8.462409957109342e-08, "loss": 0.2675, "step": 29274 }, { "epoch": 95.98360655737704, "grad_norm": 4.4634294509887695, "learning_rate": 8.448630140738046e-08, "loss": 0.2955, "step": 29275 }, { "epoch": 95.98688524590163, "grad_norm": 4.662888050079346, "learning_rate": 8.434861505139502e-08, "loss": 0.3451, "step": 29276 }, { "epoch": 95.99016393442623, "grad_norm": 5.355000019073486, "learning_rate": 8.421104050468809e-08, "loss": 0.4044, "step": 29277 }, { "epoch": 95.99344262295082, "grad_norm": 3.8314623832702637, "learning_rate": 8.407357776881175e-08, "loss": 0.3687, "step": 29278 }, { "epoch": 95.99672131147541, "grad_norm": 11.327786445617676, "learning_rate": 8.393622684531588e-08, "loss": 0.2996, "step": 29279 }, { "epoch": 96.0, "grad_norm": 5.759328365325928, "learning_rate": 8.379898773574924e-08, "loss": 0.4609, "step": 29280 }, { "epoch": 96.00327868852459, "grad_norm": 5.216335773468018, "learning_rate": 8.366186044165948e-08, "loss": 0.2344, "step": 29281 }, { "epoch": 96.00655737704918, "grad_norm": 7.774703025817871, "learning_rate": 8.3524844964592e-08, "loss": 0.3807, "step": 29282 }, { "epoch": 96.00983606557377, "grad_norm": 6.679502964019775, "learning_rate": 8.338794130609229e-08, "loss": 0.2948, "step": 29283 }, { "epoch": 96.01311475409837, "grad_norm": 4.598140716552734, "learning_rate": 8.325114946770463e-08, "loss": 0.2746, "step": 29284 }, { "epoch": 96.01639344262296, "grad_norm": 5.770240783691406, "learning_rate": 8.311446945097112e-08, "loss": 0.4958, "step": 29285 }, { "epoch": 96.01967213114754, "grad_norm": 4.472044467926025, "learning_rate": 8.297790125743277e-08, "loss": 0.3253, "step": 29286 }, { "epoch": 96.02295081967213, "grad_norm": 7.326650619506836, "learning_rate": 8.284144488862944e-08, "loss": 0.3441, "step": 29287 }, { "epoch": 96.02622950819672, "grad_norm": 9.738687515258789, "learning_rate": 8.270510034610101e-08, "loss": 0.4543, "step": 29288 }, { "epoch": 96.02950819672131, "grad_norm": 4.31326150894165, "learning_rate": 8.256886763138295e-08, "loss": 0.2369, "step": 29289 }, { "epoch": 96.0327868852459, "grad_norm": 6.466090679168701, "learning_rate": 8.243274674601287e-08, "loss": 0.44, "step": 29290 }, { "epoch": 96.03606557377049, "grad_norm": 5.663755416870117, "learning_rate": 8.229673769152625e-08, "loss": 0.4162, "step": 29291 }, { "epoch": 96.03934426229509, "grad_norm": 6.9506001472473145, "learning_rate": 8.216084046945405e-08, "loss": 0.3192, "step": 29292 }, { "epoch": 96.04262295081968, "grad_norm": 9.8871431350708, "learning_rate": 8.202505508133063e-08, "loss": 0.3623, "step": 29293 }, { "epoch": 96.04590163934427, "grad_norm": 3.3175549507141113, "learning_rate": 8.188938152868809e-08, "loss": 0.1768, "step": 29294 }, { "epoch": 96.04918032786885, "grad_norm": 6.9024176597595215, "learning_rate": 8.175381981305409e-08, "loss": 0.3517, "step": 29295 }, { "epoch": 96.05245901639344, "grad_norm": 6.6645827293396, "learning_rate": 8.161836993595851e-08, "loss": 0.3348, "step": 29296 }, { "epoch": 96.05573770491803, "grad_norm": 4.781999588012695, "learning_rate": 8.148303189892793e-08, "loss": 0.4048, "step": 29297 }, { "epoch": 96.05901639344262, "grad_norm": 4.207897663116455, "learning_rate": 8.134780570348888e-08, "loss": 0.3689, "step": 29298 }, { "epoch": 96.0622950819672, "grad_norm": 4.651634216308594, "learning_rate": 8.121269135116683e-08, "loss": 0.3017, "step": 29299 }, { "epoch": 96.06557377049181, "grad_norm": 5.969091415405273, "learning_rate": 8.107768884348388e-08, "loss": 0.5243, "step": 29300 }, { "epoch": 96.0688524590164, "grad_norm": 3.8357906341552734, "learning_rate": 8.094279818196326e-08, "loss": 0.2675, "step": 29301 }, { "epoch": 96.07213114754099, "grad_norm": 4.603587627410889, "learning_rate": 8.080801936812599e-08, "loss": 0.1977, "step": 29302 }, { "epoch": 96.07540983606557, "grad_norm": 4.649311542510986, "learning_rate": 8.067335240349194e-08, "loss": 0.3014, "step": 29303 }, { "epoch": 96.07868852459016, "grad_norm": 5.5462446212768555, "learning_rate": 8.05387972895788e-08, "loss": 0.2926, "step": 29304 }, { "epoch": 96.08196721311475, "grad_norm": 5.2722320556640625, "learning_rate": 8.040435402790425e-08, "loss": 0.4705, "step": 29305 }, { "epoch": 96.08524590163934, "grad_norm": 4.343624591827393, "learning_rate": 8.027002261998484e-08, "loss": 0.3125, "step": 29306 }, { "epoch": 96.08852459016393, "grad_norm": 4.300462245941162, "learning_rate": 8.013580306733492e-08, "loss": 0.2993, "step": 29307 }, { "epoch": 96.09180327868853, "grad_norm": 6.945103168487549, "learning_rate": 8.000169537146774e-08, "loss": 0.6736, "step": 29308 }, { "epoch": 96.09508196721312, "grad_norm": 3.8868045806884766, "learning_rate": 7.986769953389539e-08, "loss": 0.3377, "step": 29309 }, { "epoch": 96.09836065573771, "grad_norm": 3.5235869884490967, "learning_rate": 7.973381555612891e-08, "loss": 0.2682, "step": 29310 }, { "epoch": 96.1016393442623, "grad_norm": 5.567051887512207, "learning_rate": 7.960004343967931e-08, "loss": 0.2436, "step": 29311 }, { "epoch": 96.10491803278688, "grad_norm": 3.9333670139312744, "learning_rate": 7.946638318605315e-08, "loss": 0.2506, "step": 29312 }, { "epoch": 96.10819672131147, "grad_norm": 17.924182891845703, "learning_rate": 7.933283479675813e-08, "loss": 0.3122, "step": 29313 }, { "epoch": 96.11147540983606, "grad_norm": 4.47637414932251, "learning_rate": 7.91993982732997e-08, "loss": 0.4567, "step": 29314 }, { "epoch": 96.11475409836065, "grad_norm": 5.847075462341309, "learning_rate": 7.906607361718443e-08, "loss": 0.4383, "step": 29315 }, { "epoch": 96.11803278688525, "grad_norm": 4.814831733703613, "learning_rate": 7.893286082991336e-08, "loss": 0.3082, "step": 29316 }, { "epoch": 96.12131147540984, "grad_norm": 8.213505744934082, "learning_rate": 7.879975991299082e-08, "loss": 0.3205, "step": 29317 }, { "epoch": 96.12459016393443, "grad_norm": 3.7116260528564453, "learning_rate": 7.866677086791563e-08, "loss": 0.3036, "step": 29318 }, { "epoch": 96.12786885245902, "grad_norm": 5.9426774978637695, "learning_rate": 7.853389369618768e-08, "loss": 0.2468, "step": 29319 }, { "epoch": 96.1311475409836, "grad_norm": 6.258223056793213, "learning_rate": 7.84011283993058e-08, "loss": 0.3098, "step": 29320 }, { "epoch": 96.1344262295082, "grad_norm": 19.686683654785156, "learning_rate": 7.826847497876766e-08, "loss": 0.2585, "step": 29321 }, { "epoch": 96.13770491803278, "grad_norm": 6.544134616851807, "learning_rate": 7.813593343606874e-08, "loss": 0.2713, "step": 29322 }, { "epoch": 96.14098360655737, "grad_norm": 3.8227760791778564, "learning_rate": 7.800350377270338e-08, "loss": 0.3597, "step": 29323 }, { "epoch": 96.14426229508197, "grad_norm": 5.825671195983887, "learning_rate": 7.787118599016375e-08, "loss": 0.4045, "step": 29324 }, { "epoch": 96.14754098360656, "grad_norm": 5.900690078735352, "learning_rate": 7.773898008994418e-08, "loss": 0.2219, "step": 29325 }, { "epoch": 96.15081967213115, "grad_norm": 5.334089756011963, "learning_rate": 7.760688607353351e-08, "loss": 0.258, "step": 29326 }, { "epoch": 96.15409836065574, "grad_norm": 5.945231914520264, "learning_rate": 7.747490394242163e-08, "loss": 0.5267, "step": 29327 }, { "epoch": 96.15737704918033, "grad_norm": 5.475971698760986, "learning_rate": 7.734303369809736e-08, "loss": 0.3668, "step": 29328 }, { "epoch": 96.16065573770491, "grad_norm": 5.081666946411133, "learning_rate": 7.72112753420473e-08, "loss": 0.293, "step": 29329 }, { "epoch": 96.1639344262295, "grad_norm": 5.134390830993652, "learning_rate": 7.707962887575804e-08, "loss": 0.2351, "step": 29330 }, { "epoch": 96.1672131147541, "grad_norm": 4.870210647583008, "learning_rate": 7.694809430071282e-08, "loss": 0.5832, "step": 29331 }, { "epoch": 96.1704918032787, "grad_norm": 4.957146644592285, "learning_rate": 7.681667161839378e-08, "loss": 0.2028, "step": 29332 }, { "epoch": 96.17377049180328, "grad_norm": 5.752942085266113, "learning_rate": 7.668536083028644e-08, "loss": 0.378, "step": 29333 }, { "epoch": 96.17704918032787, "grad_norm": 5.107311725616455, "learning_rate": 7.655416193786847e-08, "loss": 0.4282, "step": 29334 }, { "epoch": 96.18032786885246, "grad_norm": 4.425975322723389, "learning_rate": 7.642307494261981e-08, "loss": 0.3186, "step": 29335 }, { "epoch": 96.18360655737705, "grad_norm": 4.31925630569458, "learning_rate": 7.629209984601816e-08, "loss": 0.2956, "step": 29336 }, { "epoch": 96.18688524590164, "grad_norm": 4.524561405181885, "learning_rate": 7.616123664954233e-08, "loss": 0.3261, "step": 29337 }, { "epoch": 96.19016393442622, "grad_norm": 4.147854804992676, "learning_rate": 7.603048535466672e-08, "loss": 0.2356, "step": 29338 }, { "epoch": 96.19344262295083, "grad_norm": 5.793508529663086, "learning_rate": 7.589984596286459e-08, "loss": 0.4118, "step": 29339 }, { "epoch": 96.19672131147541, "grad_norm": 4.63946008682251, "learning_rate": 7.576931847561142e-08, "loss": 0.3221, "step": 29340 }, { "epoch": 96.2, "grad_norm": 7.385430335998535, "learning_rate": 7.563890289437825e-08, "loss": 0.2943, "step": 29341 }, { "epoch": 96.20327868852459, "grad_norm": 6.214110374450684, "learning_rate": 7.550859922063392e-08, "loss": 0.4973, "step": 29342 }, { "epoch": 96.20655737704918, "grad_norm": 5.367641448974609, "learning_rate": 7.537840745584945e-08, "loss": 0.2775, "step": 29343 }, { "epoch": 96.20983606557377, "grad_norm": 5.3088531494140625, "learning_rate": 7.524832760149258e-08, "loss": 0.2417, "step": 29344 }, { "epoch": 96.21311475409836, "grad_norm": 5.809307098388672, "learning_rate": 7.51183596590288e-08, "loss": 0.2184, "step": 29345 }, { "epoch": 96.21639344262294, "grad_norm": 4.410201549530029, "learning_rate": 7.498850362992694e-08, "loss": 0.4378, "step": 29346 }, { "epoch": 96.21967213114755, "grad_norm": 5.408497333526611, "learning_rate": 7.485875951564803e-08, "loss": 0.3978, "step": 29347 }, { "epoch": 96.22295081967214, "grad_norm": 4.473615646362305, "learning_rate": 7.472912731765647e-08, "loss": 0.4617, "step": 29348 }, { "epoch": 96.22622950819672, "grad_norm": 4.161830425262451, "learning_rate": 7.45996070374122e-08, "loss": 0.388, "step": 29349 }, { "epoch": 96.22950819672131, "grad_norm": 5.553586483001709, "learning_rate": 7.44701986763785e-08, "loss": 0.3692, "step": 29350 }, { "epoch": 96.2327868852459, "grad_norm": 5.2481465339660645, "learning_rate": 7.43409022360142e-08, "loss": 0.4151, "step": 29351 }, { "epoch": 96.23606557377049, "grad_norm": 4.768558979034424, "learning_rate": 7.42117177177748e-08, "loss": 0.423, "step": 29352 }, { "epoch": 96.23934426229508, "grad_norm": 14.889542579650879, "learning_rate": 7.408264512311914e-08, "loss": 0.6052, "step": 29353 }, { "epoch": 96.24262295081967, "grad_norm": 5.94811487197876, "learning_rate": 7.395368445350159e-08, "loss": 0.5203, "step": 29354 }, { "epoch": 96.24590163934427, "grad_norm": 4.798574924468994, "learning_rate": 7.382483571037768e-08, "loss": 0.3312, "step": 29355 }, { "epoch": 96.24918032786886, "grad_norm": 3.935377836227417, "learning_rate": 7.369609889519847e-08, "loss": 0.1195, "step": 29356 }, { "epoch": 96.25245901639344, "grad_norm": 4.480583190917969, "learning_rate": 7.356747400941722e-08, "loss": 0.2654, "step": 29357 }, { "epoch": 96.25573770491803, "grad_norm": 3.400784969329834, "learning_rate": 7.343896105448278e-08, "loss": 0.2519, "step": 29358 }, { "epoch": 96.25901639344262, "grad_norm": 4.831879615783691, "learning_rate": 7.331056003184511e-08, "loss": 0.2418, "step": 29359 }, { "epoch": 96.26229508196721, "grad_norm": 3.8481993675231934, "learning_rate": 7.318227094295305e-08, "loss": 0.2529, "step": 29360 }, { "epoch": 96.2655737704918, "grad_norm": 5.1584343910217285, "learning_rate": 7.30540937892521e-08, "loss": 0.4255, "step": 29361 }, { "epoch": 96.26885245901639, "grad_norm": 5.343426704406738, "learning_rate": 7.292602857218667e-08, "loss": 0.3759, "step": 29362 }, { "epoch": 96.27213114754099, "grad_norm": 6.290154457092285, "learning_rate": 7.279807529320226e-08, "loss": 0.3844, "step": 29363 }, { "epoch": 96.27540983606558, "grad_norm": 6.212299346923828, "learning_rate": 7.267023395374106e-08, "loss": 0.3371, "step": 29364 }, { "epoch": 96.27868852459017, "grad_norm": 3.5298595428466797, "learning_rate": 7.254250455524525e-08, "loss": 0.2635, "step": 29365 }, { "epoch": 96.28196721311475, "grad_norm": 4.861206531524658, "learning_rate": 7.241488709915478e-08, "loss": 0.3731, "step": 29366 }, { "epoch": 96.28524590163934, "grad_norm": 5.648654460906982, "learning_rate": 7.228738158690852e-08, "loss": 0.3565, "step": 29367 }, { "epoch": 96.28852459016393, "grad_norm": 9.017930030822754, "learning_rate": 7.215998801994417e-08, "loss": 0.5894, "step": 29368 }, { "epoch": 96.29180327868852, "grad_norm": 5.888311386108398, "learning_rate": 7.203270639969728e-08, "loss": 0.3662, "step": 29369 }, { "epoch": 96.29508196721312, "grad_norm": 7.317410945892334, "learning_rate": 7.190553672760558e-08, "loss": 0.4331, "step": 29370 }, { "epoch": 96.29836065573771, "grad_norm": 6.068874359130859, "learning_rate": 7.177847900510016e-08, "loss": 0.5635, "step": 29371 }, { "epoch": 96.3016393442623, "grad_norm": 6.224608898162842, "learning_rate": 7.165153323361652e-08, "loss": 0.309, "step": 29372 }, { "epoch": 96.30491803278689, "grad_norm": 5.526394844055176, "learning_rate": 7.152469941458462e-08, "loss": 0.4147, "step": 29373 }, { "epoch": 96.30819672131148, "grad_norm": 4.325096607208252, "learning_rate": 7.139797754943444e-08, "loss": 0.404, "step": 29374 }, { "epoch": 96.31147540983606, "grad_norm": 5.242035388946533, "learning_rate": 7.127136763959485e-08, "loss": 0.4515, "step": 29375 }, { "epoch": 96.31475409836065, "grad_norm": 8.575128555297852, "learning_rate": 7.11448696864936e-08, "loss": 0.2787, "step": 29376 }, { "epoch": 96.31803278688524, "grad_norm": 4.022461891174316, "learning_rate": 7.10184836915584e-08, "loss": 0.5663, "step": 29377 }, { "epoch": 96.32131147540984, "grad_norm": 6.127352714538574, "learning_rate": 7.089220965621368e-08, "loss": 0.1677, "step": 29378 }, { "epoch": 96.32459016393443, "grad_norm": 4.769562244415283, "learning_rate": 7.076604758188166e-08, "loss": 0.3855, "step": 29379 }, { "epoch": 96.32786885245902, "grad_norm": 3.692791223526001, "learning_rate": 7.063999746998673e-08, "loss": 0.411, "step": 29380 }, { "epoch": 96.33114754098361, "grad_norm": 4.9196085929870605, "learning_rate": 7.051405932194999e-08, "loss": 0.3351, "step": 29381 }, { "epoch": 96.3344262295082, "grad_norm": 5.161244869232178, "learning_rate": 7.038823313919141e-08, "loss": 0.5547, "step": 29382 }, { "epoch": 96.33770491803278, "grad_norm": 5.081036567687988, "learning_rate": 7.026251892312874e-08, "loss": 0.3308, "step": 29383 }, { "epoch": 96.34098360655737, "grad_norm": 5.384927272796631, "learning_rate": 7.013691667518086e-08, "loss": 0.3342, "step": 29384 }, { "epoch": 96.34426229508196, "grad_norm": 5.277284622192383, "learning_rate": 7.00114263967644e-08, "loss": 0.5764, "step": 29385 }, { "epoch": 96.34754098360656, "grad_norm": 5.0272111892700195, "learning_rate": 6.988604808929377e-08, "loss": 0.4536, "step": 29386 }, { "epoch": 96.35081967213115, "grad_norm": 6.613185882568359, "learning_rate": 6.976078175418233e-08, "loss": 0.4841, "step": 29387 }, { "epoch": 96.35409836065574, "grad_norm": 4.441059589385986, "learning_rate": 6.963562739284225e-08, "loss": 0.42, "step": 29388 }, { "epoch": 96.35737704918033, "grad_norm": 4.095851898193359, "learning_rate": 6.951058500668683e-08, "loss": 0.2701, "step": 29389 }, { "epoch": 96.36065573770492, "grad_norm": 4.836322784423828, "learning_rate": 6.938565459712387e-08, "loss": 0.2813, "step": 29390 }, { "epoch": 96.3639344262295, "grad_norm": 5.801835536956787, "learning_rate": 6.926083616556223e-08, "loss": 0.4004, "step": 29391 }, { "epoch": 96.3672131147541, "grad_norm": 4.482481956481934, "learning_rate": 6.913612971341077e-08, "loss": 0.3271, "step": 29392 }, { "epoch": 96.37049180327868, "grad_norm": 5.385968208312988, "learning_rate": 6.901153524207616e-08, "loss": 0.2899, "step": 29393 }, { "epoch": 96.37377049180328, "grad_norm": 6.692129611968994, "learning_rate": 6.88870527529606e-08, "loss": 0.3916, "step": 29394 }, { "epoch": 96.37704918032787, "grad_norm": 5.811457633972168, "learning_rate": 6.876268224746963e-08, "loss": 0.4178, "step": 29395 }, { "epoch": 96.38032786885246, "grad_norm": 4.912969589233398, "learning_rate": 6.863842372700546e-08, "loss": 0.3191, "step": 29396 }, { "epoch": 96.38360655737705, "grad_norm": 4.045721530914307, "learning_rate": 6.851427719296922e-08, "loss": 0.2741, "step": 29397 }, { "epoch": 96.38688524590164, "grad_norm": 3.5871477127075195, "learning_rate": 6.839024264675975e-08, "loss": 0.2201, "step": 29398 }, { "epoch": 96.39016393442623, "grad_norm": 4.7376933097839355, "learning_rate": 6.826632008977707e-08, "loss": 0.2907, "step": 29399 }, { "epoch": 96.39344262295081, "grad_norm": 8.446418762207031, "learning_rate": 6.814250952341894e-08, "loss": 0.4406, "step": 29400 }, { "epoch": 96.3967213114754, "grad_norm": 4.513125419616699, "learning_rate": 6.801881094907869e-08, "loss": 0.2788, "step": 29401 }, { "epoch": 96.4, "grad_norm": 7.795071125030518, "learning_rate": 6.78952243681541e-08, "loss": 0.2173, "step": 29402 }, { "epoch": 96.4032786885246, "grad_norm": 5.878581523895264, "learning_rate": 6.777174978203849e-08, "loss": 0.3843, "step": 29403 }, { "epoch": 96.40655737704918, "grad_norm": 4.064837455749512, "learning_rate": 6.764838719212297e-08, "loss": 0.3183, "step": 29404 }, { "epoch": 96.40983606557377, "grad_norm": 8.235560417175293, "learning_rate": 6.752513659979754e-08, "loss": 0.3151, "step": 29405 }, { "epoch": 96.41311475409836, "grad_norm": 6.200702667236328, "learning_rate": 6.740199800645442e-08, "loss": 0.4196, "step": 29406 }, { "epoch": 96.41639344262295, "grad_norm": 5.734390735626221, "learning_rate": 6.727897141348139e-08, "loss": 0.2717, "step": 29407 }, { "epoch": 96.41967213114754, "grad_norm": 7.496943473815918, "learning_rate": 6.715605682226511e-08, "loss": 0.4266, "step": 29408 }, { "epoch": 96.42295081967212, "grad_norm": 4.0509138107299805, "learning_rate": 6.703325423419227e-08, "loss": 0.3082, "step": 29409 }, { "epoch": 96.42622950819673, "grad_norm": 4.133906841278076, "learning_rate": 6.691056365064619e-08, "loss": 0.3016, "step": 29410 }, { "epoch": 96.42950819672132, "grad_norm": 3.6575169563293457, "learning_rate": 6.678798507301132e-08, "loss": 0.3535, "step": 29411 }, { "epoch": 96.4327868852459, "grad_norm": 6.599707126617432, "learning_rate": 6.6665518502671e-08, "loss": 0.1845, "step": 29412 }, { "epoch": 96.43606557377049, "grad_norm": 5.14120626449585, "learning_rate": 6.654316394100413e-08, "loss": 0.3045, "step": 29413 }, { "epoch": 96.43934426229508, "grad_norm": 3.9109997749328613, "learning_rate": 6.642092138939182e-08, "loss": 0.2601, "step": 29414 }, { "epoch": 96.44262295081967, "grad_norm": 3.469850540161133, "learning_rate": 6.629879084921187e-08, "loss": 0.3847, "step": 29415 }, { "epoch": 96.44590163934426, "grad_norm": 7.244429111480713, "learning_rate": 6.617677232184095e-08, "loss": 0.4228, "step": 29416 }, { "epoch": 96.44918032786886, "grad_norm": 4.015255928039551, "learning_rate": 6.605486580865683e-08, "loss": 0.2922, "step": 29417 }, { "epoch": 96.45245901639345, "grad_norm": 4.764573097229004, "learning_rate": 6.593307131103066e-08, "loss": 0.3452, "step": 29418 }, { "epoch": 96.45573770491804, "grad_norm": 5.395754337310791, "learning_rate": 6.58113888303391e-08, "loss": 0.2688, "step": 29419 }, { "epoch": 96.45901639344262, "grad_norm": 7.18345832824707, "learning_rate": 6.568981836795441e-08, "loss": 0.3389, "step": 29420 }, { "epoch": 96.46229508196721, "grad_norm": 4.382008075714111, "learning_rate": 6.556835992524436e-08, "loss": 0.4007, "step": 29421 }, { "epoch": 96.4655737704918, "grad_norm": 5.766583442687988, "learning_rate": 6.544701350358118e-08, "loss": 0.3039, "step": 29422 }, { "epoch": 96.46885245901639, "grad_norm": 5.0854387283325195, "learning_rate": 6.532577910433158e-08, "loss": 0.5152, "step": 29423 }, { "epoch": 96.47213114754098, "grad_norm": 6.037526607513428, "learning_rate": 6.520465672886333e-08, "loss": 0.478, "step": 29424 }, { "epoch": 96.47540983606558, "grad_norm": 7.400027751922607, "learning_rate": 6.508364637854314e-08, "loss": 0.3901, "step": 29425 }, { "epoch": 96.47868852459017, "grad_norm": 4.150338649749756, "learning_rate": 6.496274805473324e-08, "loss": 0.3635, "step": 29426 }, { "epoch": 96.48196721311476, "grad_norm": 5.566407680511475, "learning_rate": 6.48419617587992e-08, "loss": 0.404, "step": 29427 }, { "epoch": 96.48524590163935, "grad_norm": 4.813222885131836, "learning_rate": 6.472128749210105e-08, "loss": 0.4554, "step": 29428 }, { "epoch": 96.48852459016393, "grad_norm": 4.368781566619873, "learning_rate": 6.460072525600102e-08, "loss": 0.2114, "step": 29429 }, { "epoch": 96.49180327868852, "grad_norm": 10.555120468139648, "learning_rate": 6.448027505185917e-08, "loss": 0.4609, "step": 29430 }, { "epoch": 96.49508196721311, "grad_norm": 4.377173900604248, "learning_rate": 6.435993688103103e-08, "loss": 0.6441, "step": 29431 }, { "epoch": 96.4983606557377, "grad_norm": 7.427467346191406, "learning_rate": 6.423971074487556e-08, "loss": 0.3591, "step": 29432 }, { "epoch": 96.5016393442623, "grad_norm": 5.998897552490234, "learning_rate": 6.411959664474832e-08, "loss": 0.5815, "step": 29433 }, { "epoch": 96.50491803278689, "grad_norm": 5.484787940979004, "learning_rate": 6.399959458200266e-08, "loss": 0.4023, "step": 29434 }, { "epoch": 96.50819672131148, "grad_norm": 4.151427745819092, "learning_rate": 6.387970455799308e-08, "loss": 0.4146, "step": 29435 }, { "epoch": 96.51147540983607, "grad_norm": 4.353100299835205, "learning_rate": 6.37599265740696e-08, "loss": 0.6287, "step": 29436 }, { "epoch": 96.51475409836065, "grad_norm": 5.004003047943115, "learning_rate": 6.364026063158557e-08, "loss": 0.364, "step": 29437 }, { "epoch": 96.51803278688524, "grad_norm": 4.477813720703125, "learning_rate": 6.352070673188771e-08, "loss": 0.4122, "step": 29438 }, { "epoch": 96.52131147540983, "grad_norm": 6.701846599578857, "learning_rate": 6.340126487632602e-08, "loss": 0.2968, "step": 29439 }, { "epoch": 96.52459016393442, "grad_norm": 6.12866735458374, "learning_rate": 6.328193506624614e-08, "loss": 0.5668, "step": 29440 }, { "epoch": 96.52786885245902, "grad_norm": 4.8402276039123535, "learning_rate": 6.316271730299361e-08, "loss": 0.3814, "step": 29441 }, { "epoch": 96.53114754098361, "grad_norm": 15.537420272827148, "learning_rate": 6.304361158791405e-08, "loss": 0.3951, "step": 29442 }, { "epoch": 96.5344262295082, "grad_norm": 4.847540378570557, "learning_rate": 6.292461792234972e-08, "loss": 0.5159, "step": 29443 }, { "epoch": 96.53770491803279, "grad_norm": 6.594976425170898, "learning_rate": 6.280573630764064e-08, "loss": 0.2988, "step": 29444 }, { "epoch": 96.54098360655738, "grad_norm": 4.499279499053955, "learning_rate": 6.268696674513019e-08, "loss": 0.3703, "step": 29445 }, { "epoch": 96.54426229508196, "grad_norm": 5.626091003417969, "learning_rate": 6.256830923615732e-08, "loss": 0.4374, "step": 29446 }, { "epoch": 96.54754098360655, "grad_norm": 4.313998222351074, "learning_rate": 6.244976378205759e-08, "loss": 0.3839, "step": 29447 }, { "epoch": 96.55081967213114, "grad_norm": 5.533379554748535, "learning_rate": 6.233133038416994e-08, "loss": 0.4074, "step": 29448 }, { "epoch": 96.55409836065574, "grad_norm": 4.506345748901367, "learning_rate": 6.221300904382888e-08, "loss": 0.6101, "step": 29449 }, { "epoch": 96.55737704918033, "grad_norm": 5.798556804656982, "learning_rate": 6.209479976236887e-08, "loss": 0.5405, "step": 29450 }, { "epoch": 96.56065573770492, "grad_norm": 5.044969081878662, "learning_rate": 6.19767025411222e-08, "loss": 0.4059, "step": 29451 }, { "epoch": 96.56393442622951, "grad_norm": 4.706374168395996, "learning_rate": 6.185871738142224e-08, "loss": 0.5011, "step": 29452 }, { "epoch": 96.5672131147541, "grad_norm": 9.483527183532715, "learning_rate": 6.174084428459792e-08, "loss": 0.3575, "step": 29453 }, { "epoch": 96.57049180327868, "grad_norm": 4.981828212738037, "learning_rate": 6.162308325197819e-08, "loss": 0.4789, "step": 29454 }, { "epoch": 96.57377049180327, "grad_norm": 3.8160879611968994, "learning_rate": 6.150543428489308e-08, "loss": 0.4366, "step": 29455 }, { "epoch": 96.57704918032788, "grad_norm": 5.035435199737549, "learning_rate": 6.1387897384666e-08, "loss": 0.2826, "step": 29456 }, { "epoch": 96.58032786885246, "grad_norm": 4.769214153289795, "learning_rate": 6.127047255262475e-08, "loss": 0.3926, "step": 29457 }, { "epoch": 96.58360655737705, "grad_norm": 4.797830581665039, "learning_rate": 6.115315979009273e-08, "loss": 0.5713, "step": 29458 }, { "epoch": 96.58688524590164, "grad_norm": 6.9330153465271, "learning_rate": 6.103595909839222e-08, "loss": 0.2695, "step": 29459 }, { "epoch": 96.59016393442623, "grad_norm": 4.908828258514404, "learning_rate": 6.091887047884548e-08, "loss": 0.412, "step": 29460 }, { "epoch": 96.59344262295082, "grad_norm": 4.708399295806885, "learning_rate": 6.080189393277259e-08, "loss": 0.6469, "step": 29461 }, { "epoch": 96.5967213114754, "grad_norm": 4.56095552444458, "learning_rate": 6.068502946149135e-08, "loss": 0.2351, "step": 29462 }, { "epoch": 96.6, "grad_norm": 4.86544942855835, "learning_rate": 6.056827706632185e-08, "loss": 0.4841, "step": 29463 }, { "epoch": 96.6032786885246, "grad_norm": 5.028905391693115, "learning_rate": 6.045163674857968e-08, "loss": 0.4701, "step": 29464 }, { "epoch": 96.60655737704919, "grad_norm": 4.197055816650391, "learning_rate": 6.033510850957936e-08, "loss": 0.3258, "step": 29465 }, { "epoch": 96.60983606557377, "grad_norm": 4.138554096221924, "learning_rate": 6.021869235063538e-08, "loss": 0.2177, "step": 29466 }, { "epoch": 96.61311475409836, "grad_norm": 6.276188373565674, "learning_rate": 6.010238827306114e-08, "loss": 0.5555, "step": 29467 }, { "epoch": 96.61639344262295, "grad_norm": 4.581082820892334, "learning_rate": 5.998619627816671e-08, "loss": 0.3008, "step": 29468 }, { "epoch": 96.61967213114754, "grad_norm": 5.435534477233887, "learning_rate": 5.987011636726326e-08, "loss": 0.4302, "step": 29469 }, { "epoch": 96.62295081967213, "grad_norm": 5.706219673156738, "learning_rate": 5.975414854165862e-08, "loss": 0.3419, "step": 29470 }, { "epoch": 96.62622950819672, "grad_norm": 7.290224552154541, "learning_rate": 5.963829280266176e-08, "loss": 0.367, "step": 29471 }, { "epoch": 96.62950819672132, "grad_norm": 5.8586907386779785, "learning_rate": 5.952254915157829e-08, "loss": 0.5825, "step": 29472 }, { "epoch": 96.6327868852459, "grad_norm": 4.70114803314209, "learning_rate": 5.940691758971384e-08, "loss": 0.3266, "step": 29473 }, { "epoch": 96.6360655737705, "grad_norm": 4.503748893737793, "learning_rate": 5.9291398118371815e-08, "loss": 0.1884, "step": 29474 }, { "epoch": 96.63934426229508, "grad_norm": 5.879396915435791, "learning_rate": 5.9175990738854495e-08, "loss": 0.4485, "step": 29475 }, { "epoch": 96.64262295081967, "grad_norm": 8.91202449798584, "learning_rate": 5.906069545246529e-08, "loss": 0.3739, "step": 29476 }, { "epoch": 96.64590163934426, "grad_norm": 6.255204200744629, "learning_rate": 5.8945512260502045e-08, "loss": 0.4191, "step": 29477 }, { "epoch": 96.64918032786885, "grad_norm": 7.003516674041748, "learning_rate": 5.883044116426373e-08, "loss": 0.4766, "step": 29478 }, { "epoch": 96.65245901639344, "grad_norm": 4.3049540519714355, "learning_rate": 5.871548216504819e-08, "loss": 0.228, "step": 29479 }, { "epoch": 96.65573770491804, "grad_norm": 4.935245990753174, "learning_rate": 5.8600635264152164e-08, "loss": 0.163, "step": 29480 }, { "epoch": 96.65901639344263, "grad_norm": 4.147060871124268, "learning_rate": 5.848590046287128e-08, "loss": 0.3221, "step": 29481 }, { "epoch": 96.66229508196722, "grad_norm": 5.061028957366943, "learning_rate": 5.837127776249785e-08, "loss": 0.3793, "step": 29482 }, { "epoch": 96.6655737704918, "grad_norm": 4.845670223236084, "learning_rate": 5.825676716432527e-08, "loss": 0.3198, "step": 29483 }, { "epoch": 96.66885245901639, "grad_norm": 4.504861354827881, "learning_rate": 5.8142368669643625e-08, "loss": 0.5641, "step": 29484 }, { "epoch": 96.67213114754098, "grad_norm": 3.954864740371704, "learning_rate": 5.802808227974521e-08, "loss": 0.443, "step": 29485 }, { "epoch": 96.67540983606557, "grad_norm": 6.482718467712402, "learning_rate": 5.7913907995915675e-08, "loss": 0.258, "step": 29486 }, { "epoch": 96.67868852459016, "grad_norm": 14.298025131225586, "learning_rate": 5.7799845819445086e-08, "loss": 0.4303, "step": 29487 }, { "epoch": 96.68196721311476, "grad_norm": 4.576416492462158, "learning_rate": 5.768589575161798e-08, "loss": 0.2918, "step": 29488 }, { "epoch": 96.68524590163935, "grad_norm": 4.020603656768799, "learning_rate": 5.7572057793719995e-08, "loss": 0.3632, "step": 29489 }, { "epoch": 96.68852459016394, "grad_norm": 3.829806327819824, "learning_rate": 5.745833194703454e-08, "loss": 0.3579, "step": 29490 }, { "epoch": 96.69180327868852, "grad_norm": 4.072887420654297, "learning_rate": 5.734471821284393e-08, "loss": 0.1186, "step": 29491 }, { "epoch": 96.69508196721311, "grad_norm": 7.211709976196289, "learning_rate": 5.723121659242936e-08, "loss": 0.1331, "step": 29492 }, { "epoch": 96.6983606557377, "grad_norm": 3.936835765838623, "learning_rate": 5.711782708707092e-08, "loss": 0.459, "step": 29493 }, { "epoch": 96.70163934426229, "grad_norm": 5.763482093811035, "learning_rate": 5.7004549698046474e-08, "loss": 0.4458, "step": 29494 }, { "epoch": 96.70491803278688, "grad_norm": 5.457973957061768, "learning_rate": 5.6891384426635e-08, "loss": 0.3081, "step": 29495 }, { "epoch": 96.70819672131148, "grad_norm": 5.9818854331970215, "learning_rate": 5.6778331274109924e-08, "loss": 0.4599, "step": 29496 }, { "epoch": 96.71147540983607, "grad_norm": 5.453226089477539, "learning_rate": 5.666539024174911e-08, "loss": 0.3643, "step": 29497 }, { "epoch": 96.71475409836066, "grad_norm": 3.8736183643341064, "learning_rate": 5.6552561330823765e-08, "loss": 0.5403, "step": 29498 }, { "epoch": 96.71803278688525, "grad_norm": 5.0859761238098145, "learning_rate": 5.643984454260621e-08, "loss": 0.332, "step": 29499 }, { "epoch": 96.72131147540983, "grad_norm": 4.865176677703857, "learning_rate": 5.6327239878368745e-08, "loss": 0.1897, "step": 29500 }, { "epoch": 96.72459016393442, "grad_norm": 3.981116533279419, "learning_rate": 5.621474733938037e-08, "loss": 0.2995, "step": 29501 }, { "epoch": 96.72786885245901, "grad_norm": 4.922768592834473, "learning_rate": 5.6102366926910066e-08, "loss": 0.3083, "step": 29502 }, { "epoch": 96.73114754098361, "grad_norm": 3.6948187351226807, "learning_rate": 5.599009864222349e-08, "loss": 0.2579, "step": 29503 }, { "epoch": 96.7344262295082, "grad_norm": 4.315919876098633, "learning_rate": 5.587794248658851e-08, "loss": 0.4574, "step": 29504 }, { "epoch": 96.73770491803279, "grad_norm": 4.812675952911377, "learning_rate": 5.576589846126968e-08, "loss": 0.2705, "step": 29505 }, { "epoch": 96.74098360655738, "grad_norm": 4.809929847717285, "learning_rate": 5.5653966567528194e-08, "loss": 0.2651, "step": 29506 }, { "epoch": 96.74426229508197, "grad_norm": 4.844249725341797, "learning_rate": 5.554214680662973e-08, "loss": 0.4172, "step": 29507 }, { "epoch": 96.74754098360656, "grad_norm": 3.8432483673095703, "learning_rate": 5.5430439179832154e-08, "loss": 0.336, "step": 29508 }, { "epoch": 96.75081967213114, "grad_norm": 4.019717693328857, "learning_rate": 5.5318843688395575e-08, "loss": 0.3736, "step": 29509 }, { "epoch": 96.75409836065573, "grad_norm": 4.5810394287109375, "learning_rate": 5.52073603335801e-08, "loss": 0.5394, "step": 29510 }, { "epoch": 96.75737704918033, "grad_norm": 4.337751865386963, "learning_rate": 5.509598911664027e-08, "loss": 0.2669, "step": 29511 }, { "epoch": 96.76065573770492, "grad_norm": 4.059579849243164, "learning_rate": 5.498473003883398e-08, "loss": 0.3461, "step": 29512 }, { "epoch": 96.76393442622951, "grad_norm": 4.692518711090088, "learning_rate": 5.487358310141577e-08, "loss": 0.3825, "step": 29513 }, { "epoch": 96.7672131147541, "grad_norm": 5.216132164001465, "learning_rate": 5.476254830563688e-08, "loss": 0.5292, "step": 29514 }, { "epoch": 96.77049180327869, "grad_norm": 6.47929573059082, "learning_rate": 5.465162565275184e-08, "loss": 0.1263, "step": 29515 }, { "epoch": 96.77377049180328, "grad_norm": 5.475864887237549, "learning_rate": 5.4540815144009665e-08, "loss": 0.2649, "step": 29516 }, { "epoch": 96.77704918032786, "grad_norm": 5.798847675323486, "learning_rate": 5.4430116780661565e-08, "loss": 0.6296, "step": 29517 }, { "epoch": 96.78032786885245, "grad_norm": 5.984364032745361, "learning_rate": 5.431953056395323e-08, "loss": 0.2331, "step": 29518 }, { "epoch": 96.78360655737706, "grad_norm": 6.776766300201416, "learning_rate": 5.4209056495133636e-08, "loss": 0.5116, "step": 29519 }, { "epoch": 96.78688524590164, "grad_norm": 4.126212120056152, "learning_rate": 5.409869457544847e-08, "loss": 0.437, "step": 29520 }, { "epoch": 96.79016393442623, "grad_norm": 3.724787473678589, "learning_rate": 5.3988444806141184e-08, "loss": 0.53, "step": 29521 }, { "epoch": 96.79344262295082, "grad_norm": 4.344531536102295, "learning_rate": 5.387830718845521e-08, "loss": 0.3304, "step": 29522 }, { "epoch": 96.79672131147541, "grad_norm": 3.8084728717803955, "learning_rate": 5.376828172363291e-08, "loss": 0.1999, "step": 29523 }, { "epoch": 96.8, "grad_norm": 6.339277267456055, "learning_rate": 5.365836841291439e-08, "loss": 0.3552, "step": 29524 }, { "epoch": 96.80327868852459, "grad_norm": 5.304760932922363, "learning_rate": 5.3548567257540873e-08, "loss": 0.6939, "step": 29525 }, { "epoch": 96.80655737704917, "grad_norm": 4.135941028594971, "learning_rate": 5.343887825874694e-08, "loss": 0.308, "step": 29526 }, { "epoch": 96.80983606557378, "grad_norm": 4.646366119384766, "learning_rate": 5.3329301417772704e-08, "loss": 0.3586, "step": 29527 }, { "epoch": 96.81311475409836, "grad_norm": 4.700425624847412, "learning_rate": 5.3219836735852736e-08, "loss": 0.2575, "step": 29528 }, { "epoch": 96.81639344262295, "grad_norm": 7.119447231292725, "learning_rate": 5.3110484214220495e-08, "loss": 0.5061, "step": 29529 }, { "epoch": 96.81967213114754, "grad_norm": 5.820043563842773, "learning_rate": 5.300124385410943e-08, "loss": 0.364, "step": 29530 }, { "epoch": 96.82295081967213, "grad_norm": 6.0544233322143555, "learning_rate": 5.2892115656751894e-08, "loss": 0.562, "step": 29531 }, { "epoch": 96.82622950819672, "grad_norm": 5.905808925628662, "learning_rate": 5.278309962337913e-08, "loss": 0.3114, "step": 29532 }, { "epoch": 96.8295081967213, "grad_norm": 4.999993324279785, "learning_rate": 5.267419575521793e-08, "loss": 0.3481, "step": 29533 }, { "epoch": 96.8327868852459, "grad_norm": 4.938006401062012, "learning_rate": 5.2565404053499525e-08, "loss": 0.3827, "step": 29534 }, { "epoch": 96.8360655737705, "grad_norm": 4.706785678863525, "learning_rate": 5.245672451944739e-08, "loss": 0.3042, "step": 29535 }, { "epoch": 96.83934426229509, "grad_norm": 5.361917495727539, "learning_rate": 5.234815715428943e-08, "loss": 0.4037, "step": 29536 }, { "epoch": 96.84262295081967, "grad_norm": 3.805973768234253, "learning_rate": 5.223970195924799e-08, "loss": 0.4405, "step": 29537 }, { "epoch": 96.84590163934426, "grad_norm": 6.95817232131958, "learning_rate": 5.213135893554766e-08, "loss": 0.5672, "step": 29538 }, { "epoch": 96.84918032786885, "grad_norm": 8.670868873596191, "learning_rate": 5.202312808440968e-08, "loss": 0.4464, "step": 29539 }, { "epoch": 96.85245901639344, "grad_norm": 4.270807266235352, "learning_rate": 5.191500940705418e-08, "loss": 0.3278, "step": 29540 }, { "epoch": 96.85573770491803, "grad_norm": 6.551933765411377, "learning_rate": 5.18070029047002e-08, "loss": 0.3991, "step": 29541 }, { "epoch": 96.85901639344263, "grad_norm": 6.105630397796631, "learning_rate": 5.1699108578565636e-08, "loss": 0.2028, "step": 29542 }, { "epoch": 96.86229508196722, "grad_norm": 8.500046730041504, "learning_rate": 5.159132642986731e-08, "loss": 0.5432, "step": 29543 }, { "epoch": 96.8655737704918, "grad_norm": 8.285876274108887, "learning_rate": 5.1483656459819785e-08, "loss": 0.5954, "step": 29544 }, { "epoch": 96.8688524590164, "grad_norm": 4.102638244628906, "learning_rate": 5.137609866963877e-08, "loss": 0.1653, "step": 29545 }, { "epoch": 96.87213114754098, "grad_norm": 4.526051998138428, "learning_rate": 5.126865306053663e-08, "loss": 0.4357, "step": 29546 }, { "epoch": 96.87540983606557, "grad_norm": 4.853641510009766, "learning_rate": 5.116131963372462e-08, "loss": 0.2173, "step": 29547 }, { "epoch": 96.87868852459016, "grad_norm": 5.263369083404541, "learning_rate": 5.105409839041175e-08, "loss": 0.2923, "step": 29548 }, { "epoch": 96.88196721311475, "grad_norm": 3.9899353981018066, "learning_rate": 5.0946989331808196e-08, "loss": 0.2591, "step": 29549 }, { "epoch": 96.88524590163935, "grad_norm": 5.107419490814209, "learning_rate": 5.083999245912297e-08, "loss": 0.3918, "step": 29550 }, { "epoch": 96.88852459016394, "grad_norm": 6.603181838989258, "learning_rate": 5.073310777356066e-08, "loss": 0.4858, "step": 29551 }, { "epoch": 96.89180327868853, "grad_norm": 6.375115394592285, "learning_rate": 5.0626335276326986e-08, "loss": 0.3614, "step": 29552 }, { "epoch": 96.89508196721312, "grad_norm": 4.775113105773926, "learning_rate": 5.051967496862653e-08, "loss": 0.3155, "step": 29553 }, { "epoch": 96.8983606557377, "grad_norm": 4.1350626945495605, "learning_rate": 5.041312685166166e-08, "loss": 0.4496, "step": 29554 }, { "epoch": 96.90163934426229, "grad_norm": 5.214720726013184, "learning_rate": 5.030669092663365e-08, "loss": 0.2782, "step": 29555 }, { "epoch": 96.90491803278688, "grad_norm": 5.988236427307129, "learning_rate": 5.0200367194742636e-08, "loss": 0.3914, "step": 29556 }, { "epoch": 96.90819672131147, "grad_norm": 5.095818042755127, "learning_rate": 5.009415565718767e-08, "loss": 0.2006, "step": 29557 }, { "epoch": 96.91147540983607, "grad_norm": 5.244061470031738, "learning_rate": 4.9988056315166675e-08, "loss": 0.265, "step": 29558 }, { "epoch": 96.91475409836066, "grad_norm": 4.5557637214660645, "learning_rate": 4.988206916987537e-08, "loss": 0.2445, "step": 29559 }, { "epoch": 96.91803278688525, "grad_norm": 4.242135524749756, "learning_rate": 4.977619422250946e-08, "loss": 0.2606, "step": 29560 }, { "epoch": 96.92131147540984, "grad_norm": 3.8342831134796143, "learning_rate": 4.967043147426354e-08, "loss": 0.2896, "step": 29561 }, { "epoch": 96.92459016393443, "grad_norm": 4.237888813018799, "learning_rate": 4.956478092632777e-08, "loss": 0.1408, "step": 29562 }, { "epoch": 96.92786885245901, "grad_norm": 4.988141059875488, "learning_rate": 4.945924257989565e-08, "loss": 0.5708, "step": 29563 }, { "epoch": 96.9311475409836, "grad_norm": 3.5006041526794434, "learning_rate": 4.9353816436156224e-08, "loss": 0.1316, "step": 29564 }, { "epoch": 96.93442622950819, "grad_norm": 5.080824851989746, "learning_rate": 4.9248502496298534e-08, "loss": 0.2038, "step": 29565 }, { "epoch": 96.9377049180328, "grad_norm": 12.102027893066406, "learning_rate": 4.914330076151053e-08, "loss": 0.4622, "step": 29566 }, { "epoch": 96.94098360655738, "grad_norm": 5.350330829620361, "learning_rate": 4.903821123297792e-08, "loss": 0.3959, "step": 29567 }, { "epoch": 96.94426229508197, "grad_norm": 5.271989345550537, "learning_rate": 4.8933233911886426e-08, "loss": 0.3792, "step": 29568 }, { "epoch": 96.94754098360656, "grad_norm": 3.9100215435028076, "learning_rate": 4.8828368799418436e-08, "loss": 0.3676, "step": 29569 }, { "epoch": 96.95081967213115, "grad_norm": 5.643346309661865, "learning_rate": 4.872361589675745e-08, "loss": 0.3507, "step": 29570 }, { "epoch": 96.95409836065573, "grad_norm": 3.743549108505249, "learning_rate": 4.861897520508474e-08, "loss": 0.2336, "step": 29571 }, { "epoch": 96.95737704918032, "grad_norm": 4.206358432769775, "learning_rate": 4.8514446725580476e-08, "loss": 0.4837, "step": 29572 }, { "epoch": 96.96065573770491, "grad_norm": 5.091556072235107, "learning_rate": 4.8410030459421497e-08, "loss": 0.2317, "step": 29573 }, { "epoch": 96.96393442622951, "grad_norm": 4.906622409820557, "learning_rate": 4.8305726407786855e-08, "loss": 0.5201, "step": 29574 }, { "epoch": 96.9672131147541, "grad_norm": 4.269161701202393, "learning_rate": 4.820153457185228e-08, "loss": 0.3851, "step": 29575 }, { "epoch": 96.97049180327869, "grad_norm": 4.586702346801758, "learning_rate": 4.80974549527935e-08, "loss": 0.4307, "step": 29576 }, { "epoch": 96.97377049180328, "grad_norm": 4.474746227264404, "learning_rate": 4.79934875517829e-08, "loss": 0.6132, "step": 29577 }, { "epoch": 96.97704918032787, "grad_norm": 4.6173319816589355, "learning_rate": 4.788963236999289e-08, "loss": 0.2885, "step": 29578 }, { "epoch": 96.98032786885246, "grad_norm": 4.820502281188965, "learning_rate": 4.778588940859474e-08, "loss": 0.3381, "step": 29579 }, { "epoch": 96.98360655737704, "grad_norm": 5.676227569580078, "learning_rate": 4.7682258668758643e-08, "loss": 0.2804, "step": 29580 }, { "epoch": 96.98688524590163, "grad_norm": 5.000970363616943, "learning_rate": 4.757874015165365e-08, "loss": 0.24, "step": 29581 }, { "epoch": 96.99016393442623, "grad_norm": 5.59891414642334, "learning_rate": 4.7475333858445496e-08, "loss": 0.3922, "step": 29582 }, { "epoch": 96.99344262295082, "grad_norm": 4.823096752166748, "learning_rate": 4.7372039790299916e-08, "loss": 0.198, "step": 29583 }, { "epoch": 96.99672131147541, "grad_norm": 5.392541885375977, "learning_rate": 4.7268857948384875e-08, "loss": 0.288, "step": 29584 }, { "epoch": 97.0, "grad_norm": 3.753756523132324, "learning_rate": 4.716578833386054e-08, "loss": 0.204, "step": 29585 }, { "epoch": 97.00327868852459, "grad_norm": 4.5701212882995605, "learning_rate": 4.706283094789044e-08, "loss": 0.2451, "step": 29586 }, { "epoch": 97.00655737704918, "grad_norm": 5.043417930603027, "learning_rate": 4.6959985791634746e-08, "loss": 0.4818, "step": 29587 }, { "epoch": 97.00983606557377, "grad_norm": 4.579336643218994, "learning_rate": 4.6857252866254754e-08, "loss": 0.533, "step": 29588 }, { "epoch": 97.01311475409837, "grad_norm": 5.6517791748046875, "learning_rate": 4.675463217290732e-08, "loss": 0.3201, "step": 29589 }, { "epoch": 97.01639344262296, "grad_norm": 4.44159460067749, "learning_rate": 4.665212371275041e-08, "loss": 0.3078, "step": 29590 }, { "epoch": 97.01967213114754, "grad_norm": 5.800033092498779, "learning_rate": 4.654972748693976e-08, "loss": 0.5504, "step": 29591 }, { "epoch": 97.02295081967213, "grad_norm": 5.045541286468506, "learning_rate": 4.644744349662999e-08, "loss": 0.3284, "step": 29592 }, { "epoch": 97.02622950819672, "grad_norm": 4.585234642028809, "learning_rate": 4.634527174297465e-08, "loss": 0.6239, "step": 29593 }, { "epoch": 97.02950819672131, "grad_norm": 4.437098979949951, "learning_rate": 4.624321222712502e-08, "loss": 0.3769, "step": 29594 }, { "epoch": 97.0327868852459, "grad_norm": 4.123849868774414, "learning_rate": 4.614126495023241e-08, "loss": 0.3521, "step": 29595 }, { "epoch": 97.03606557377049, "grad_norm": 6.969988822937012, "learning_rate": 4.603942991344701e-08, "loss": 0.2404, "step": 29596 }, { "epoch": 97.03934426229509, "grad_norm": 4.468992233276367, "learning_rate": 4.5937707117915675e-08, "loss": 0.2433, "step": 29597 }, { "epoch": 97.04262295081968, "grad_norm": 5.930757999420166, "learning_rate": 4.583609656478749e-08, "loss": 0.4824, "step": 29598 }, { "epoch": 97.04590163934427, "grad_norm": 5.486489772796631, "learning_rate": 4.573459825520599e-08, "loss": 0.3373, "step": 29599 }, { "epoch": 97.04918032786885, "grad_norm": 3.533229351043701, "learning_rate": 4.5633212190318024e-08, "loss": 0.3801, "step": 29600 }, { "epoch": 97.05245901639344, "grad_norm": 7.451013088226318, "learning_rate": 4.553193837126379e-08, "loss": 0.2759, "step": 29601 }, { "epoch": 97.05573770491803, "grad_norm": 9.939506530761719, "learning_rate": 4.543077679918795e-08, "loss": 0.3722, "step": 29602 }, { "epoch": 97.05901639344262, "grad_norm": 4.732443332672119, "learning_rate": 4.532972747523068e-08, "loss": 0.7027, "step": 29603 }, { "epoch": 97.0622950819672, "grad_norm": 3.983228921890259, "learning_rate": 4.5228790400531077e-08, "loss": 0.5039, "step": 29604 }, { "epoch": 97.06557377049181, "grad_norm": 5.77244234085083, "learning_rate": 4.512796557622601e-08, "loss": 0.4427, "step": 29605 }, { "epoch": 97.0688524590164, "grad_norm": 5.325488567352295, "learning_rate": 4.5027253003454566e-08, "loss": 0.2188, "step": 29606 }, { "epoch": 97.07213114754099, "grad_norm": 5.864945411682129, "learning_rate": 4.492665268335139e-08, "loss": 0.4964, "step": 29607 }, { "epoch": 97.07540983606557, "grad_norm": 5.006041526794434, "learning_rate": 4.482616461705003e-08, "loss": 0.5658, "step": 29608 }, { "epoch": 97.07868852459016, "grad_norm": 4.17983341217041, "learning_rate": 4.4725788805685125e-08, "loss": 0.3106, "step": 29609 }, { "epoch": 97.08196721311475, "grad_norm": 5.066337585449219, "learning_rate": 4.462552525038799e-08, "loss": 0.231, "step": 29610 }, { "epoch": 97.08524590163934, "grad_norm": 4.487323760986328, "learning_rate": 4.452537395228884e-08, "loss": 0.3832, "step": 29611 }, { "epoch": 97.08852459016393, "grad_norm": 4.185271263122559, "learning_rate": 4.442533491251677e-08, "loss": 0.4682, "step": 29612 }, { "epoch": 97.09180327868853, "grad_norm": 7.100903511047363, "learning_rate": 4.432540813220088e-08, "loss": 0.4301, "step": 29613 }, { "epoch": 97.09508196721312, "grad_norm": 5.3754730224609375, "learning_rate": 4.422559361246692e-08, "loss": 0.4176, "step": 29614 }, { "epoch": 97.09836065573771, "grad_norm": 6.258880615234375, "learning_rate": 4.4125891354441786e-08, "loss": 0.3981, "step": 29615 }, { "epoch": 97.1016393442623, "grad_norm": 4.598592281341553, "learning_rate": 4.40263013592479e-08, "loss": 0.3285, "step": 29616 }, { "epoch": 97.10491803278688, "grad_norm": 4.13726282119751, "learning_rate": 4.392682362800882e-08, "loss": 0.3358, "step": 29617 }, { "epoch": 97.10819672131147, "grad_norm": 4.9268646240234375, "learning_rate": 4.382745816184697e-08, "loss": 0.166, "step": 29618 }, { "epoch": 97.11147540983606, "grad_norm": 5.431654453277588, "learning_rate": 4.372820496188257e-08, "loss": 0.5693, "step": 29619 }, { "epoch": 97.11475409836065, "grad_norm": 4.629000663757324, "learning_rate": 4.3629064029233615e-08, "loss": 0.4449, "step": 29620 }, { "epoch": 97.11803278688525, "grad_norm": 7.316647052764893, "learning_rate": 4.353003536502032e-08, "loss": 0.2315, "step": 29621 }, { "epoch": 97.12131147540984, "grad_norm": 5.1364006996154785, "learning_rate": 4.343111897035623e-08, "loss": 0.3599, "step": 29622 }, { "epoch": 97.12459016393443, "grad_norm": 4.86500358581543, "learning_rate": 4.333231484636047e-08, "loss": 0.4804, "step": 29623 }, { "epoch": 97.12786885245902, "grad_norm": 5.651515960693359, "learning_rate": 4.323362299414435e-08, "loss": 0.318, "step": 29624 }, { "epoch": 97.1311475409836, "grad_norm": 3.694161891937256, "learning_rate": 4.313504341482144e-08, "loss": 0.2398, "step": 29625 }, { "epoch": 97.1344262295082, "grad_norm": 5.498930931091309, "learning_rate": 4.303657610950418e-08, "loss": 0.2551, "step": 29626 }, { "epoch": 97.13770491803278, "grad_norm": 5.602179527282715, "learning_rate": 4.2938221079300566e-08, "loss": 0.3293, "step": 29627 }, { "epoch": 97.14098360655737, "grad_norm": 5.470434188842773, "learning_rate": 4.283997832532305e-08, "loss": 0.3462, "step": 29628 }, { "epoch": 97.14426229508197, "grad_norm": 4.339035987854004, "learning_rate": 4.27418478486763e-08, "loss": 0.2023, "step": 29629 }, { "epoch": 97.14754098360656, "grad_norm": 20.009349822998047, "learning_rate": 4.2643829650469426e-08, "loss": 0.2472, "step": 29630 }, { "epoch": 97.15081967213115, "grad_norm": 3.4046058654785156, "learning_rate": 4.254592373180488e-08, "loss": 0.1213, "step": 29631 }, { "epoch": 97.15409836065574, "grad_norm": 7.49746036529541, "learning_rate": 4.244813009378956e-08, "loss": 0.422, "step": 29632 }, { "epoch": 97.15737704918033, "grad_norm": 4.771040439605713, "learning_rate": 4.23504487375237e-08, "loss": 0.3913, "step": 29633 }, { "epoch": 97.16065573770491, "grad_norm": 4.552229404449463, "learning_rate": 4.225287966411085e-08, "loss": 0.4744, "step": 29634 }, { "epoch": 97.1639344262295, "grad_norm": 5.341343879699707, "learning_rate": 4.215542287465013e-08, "loss": 0.605, "step": 29635 }, { "epoch": 97.1672131147541, "grad_norm": 4.786452770233154, "learning_rate": 4.205807837023956e-08, "loss": 0.576, "step": 29636 }, { "epoch": 97.1704918032787, "grad_norm": 4.922469139099121, "learning_rate": 4.1960846151979374e-08, "loss": 0.1745, "step": 29637 }, { "epoch": 97.17377049180328, "grad_norm": 4.359311580657959, "learning_rate": 4.186372622096313e-08, "loss": 0.4389, "step": 29638 }, { "epoch": 97.17704918032787, "grad_norm": 4.758893966674805, "learning_rate": 4.176671857828773e-08, "loss": 0.2995, "step": 29639 }, { "epoch": 97.18032786885246, "grad_norm": 8.286011695861816, "learning_rate": 4.1669823225046756e-08, "loss": 0.4741, "step": 29640 }, { "epoch": 97.18360655737705, "grad_norm": 5.041018486022949, "learning_rate": 4.157304016233266e-08, "loss": 0.3068, "step": 29641 }, { "epoch": 97.18688524590164, "grad_norm": 5.233048439025879, "learning_rate": 4.1476369391236785e-08, "loss": 0.4325, "step": 29642 }, { "epoch": 97.19016393442622, "grad_norm": 5.406852722167969, "learning_rate": 4.1379810912848264e-08, "loss": 0.3627, "step": 29643 }, { "epoch": 97.19344262295083, "grad_norm": 8.418763160705566, "learning_rate": 4.128336472825734e-08, "loss": 0.4533, "step": 29644 }, { "epoch": 97.19672131147541, "grad_norm": 6.463350772857666, "learning_rate": 4.118703083855091e-08, "loss": 0.1953, "step": 29645 }, { "epoch": 97.2, "grad_norm": 5.458674430847168, "learning_rate": 4.109080924481479e-08, "loss": 0.2935, "step": 29646 }, { "epoch": 97.20327868852459, "grad_norm": 3.8663434982299805, "learning_rate": 4.0994699948135876e-08, "loss": 0.2998, "step": 29647 }, { "epoch": 97.20655737704918, "grad_norm": 4.263680934906006, "learning_rate": 4.0898702949594415e-08, "loss": 0.4124, "step": 29648 }, { "epoch": 97.20983606557377, "grad_norm": 4.944658279418945, "learning_rate": 4.080281825027621e-08, "loss": 0.591, "step": 29649 }, { "epoch": 97.21311475409836, "grad_norm": 4.0977678298950195, "learning_rate": 4.070704585126151e-08, "loss": 0.2894, "step": 29650 }, { "epoch": 97.21639344262294, "grad_norm": 5.0481791496276855, "learning_rate": 4.061138575362944e-08, "loss": 0.2625, "step": 29651 }, { "epoch": 97.21967213114755, "grad_norm": 4.100386619567871, "learning_rate": 4.051583795845915e-08, "loss": 0.4014, "step": 29652 }, { "epoch": 97.22295081967214, "grad_norm": 4.285146713256836, "learning_rate": 4.042040246682755e-08, "loss": 0.2512, "step": 29653 }, { "epoch": 97.22622950819672, "grad_norm": 20.143484115600586, "learning_rate": 4.032507927981266e-08, "loss": 0.4115, "step": 29654 }, { "epoch": 97.22950819672131, "grad_norm": 4.41467809677124, "learning_rate": 4.022986839848697e-08, "loss": 0.5268, "step": 29655 }, { "epoch": 97.2327868852459, "grad_norm": 5.880235195159912, "learning_rate": 4.013476982392517e-08, "loss": 0.2657, "step": 29656 }, { "epoch": 97.23606557377049, "grad_norm": 4.366344928741455, "learning_rate": 4.0039783557199727e-08, "loss": 0.394, "step": 29657 }, { "epoch": 97.23934426229508, "grad_norm": 5.753603935241699, "learning_rate": 3.994490959938091e-08, "loss": 0.4188, "step": 29658 }, { "epoch": 97.24262295081967, "grad_norm": 4.680558204650879, "learning_rate": 3.985014795154008e-08, "loss": 0.3997, "step": 29659 }, { "epoch": 97.24590163934427, "grad_norm": 3.2971041202545166, "learning_rate": 3.9755498614743036e-08, "loss": 0.4879, "step": 29660 }, { "epoch": 97.24918032786886, "grad_norm": 5.508087158203125, "learning_rate": 3.9660961590060056e-08, "loss": 0.5148, "step": 29661 }, { "epoch": 97.25245901639344, "grad_norm": 4.915985584259033, "learning_rate": 3.9566536878555825e-08, "loss": 0.3171, "step": 29662 }, { "epoch": 97.25573770491803, "grad_norm": 6.825986385345459, "learning_rate": 3.9472224481296174e-08, "loss": 0.5441, "step": 29663 }, { "epoch": 97.25901639344262, "grad_norm": 5.521847724914551, "learning_rate": 3.937802439934135e-08, "loss": 0.561, "step": 29664 }, { "epoch": 97.26229508196721, "grad_norm": 6.05337381362915, "learning_rate": 3.928393663375718e-08, "loss": 0.1897, "step": 29665 }, { "epoch": 97.2655737704918, "grad_norm": 4.8158063888549805, "learning_rate": 3.918996118560281e-08, "loss": 0.2124, "step": 29666 }, { "epoch": 97.26885245901639, "grad_norm": 5.338727951049805, "learning_rate": 3.9096098055938505e-08, "loss": 0.3586, "step": 29667 }, { "epoch": 97.27213114754099, "grad_norm": 5.23067569732666, "learning_rate": 3.9002347245822304e-08, "loss": 0.3816, "step": 29668 }, { "epoch": 97.27540983606558, "grad_norm": 6.458285808563232, "learning_rate": 3.890870875631225e-08, "loss": 0.3965, "step": 29669 }, { "epoch": 97.27868852459017, "grad_norm": 4.718104839324951, "learning_rate": 3.881518258846195e-08, "loss": 0.3955, "step": 29670 }, { "epoch": 97.28196721311475, "grad_norm": 5.1666789054870605, "learning_rate": 3.8721768743328334e-08, "loss": 0.4855, "step": 29671 }, { "epoch": 97.28524590163934, "grad_norm": 3.647930145263672, "learning_rate": 3.862846722196389e-08, "loss": 0.3643, "step": 29672 }, { "epoch": 97.28852459016393, "grad_norm": 4.389071941375732, "learning_rate": 3.8535278025421116e-08, "loss": 0.4992, "step": 29673 }, { "epoch": 97.29180327868852, "grad_norm": 6.969853401184082, "learning_rate": 3.844220115474917e-08, "loss": 0.1549, "step": 29674 }, { "epoch": 97.29508196721312, "grad_norm": 6.442582130432129, "learning_rate": 3.834923661099943e-08, "loss": 0.267, "step": 29675 }, { "epoch": 97.29836065573771, "grad_norm": 5.222379207611084, "learning_rate": 3.825638439521995e-08, "loss": 0.3603, "step": 29676 }, { "epoch": 97.3016393442623, "grad_norm": 4.349441051483154, "learning_rate": 3.816364450845766e-08, "loss": 0.3044, "step": 29677 }, { "epoch": 97.30491803278689, "grad_norm": 4.7754621505737305, "learning_rate": 3.80710169517573e-08, "loss": 0.2646, "step": 29678 }, { "epoch": 97.30819672131148, "grad_norm": 3.8512063026428223, "learning_rate": 3.797850172616358e-08, "loss": 0.463, "step": 29679 }, { "epoch": 97.31147540983606, "grad_norm": 3.9905762672424316, "learning_rate": 3.7886098832721205e-08, "loss": 0.4557, "step": 29680 }, { "epoch": 97.31475409836065, "grad_norm": 4.730813980102539, "learning_rate": 3.779380827247048e-08, "loss": 0.2436, "step": 29681 }, { "epoch": 97.31803278688524, "grad_norm": 4.625471591949463, "learning_rate": 3.770163004645277e-08, "loss": 0.2218, "step": 29682 }, { "epoch": 97.32131147540984, "grad_norm": 5.3992390632629395, "learning_rate": 3.7609564155707265e-08, "loss": 0.3721, "step": 29683 }, { "epoch": 97.32459016393443, "grad_norm": 4.4881768226623535, "learning_rate": 3.7517610601272016e-08, "loss": 0.349, "step": 29684 }, { "epoch": 97.32786885245902, "grad_norm": 5.088810920715332, "learning_rate": 3.742576938418507e-08, "loss": 0.4584, "step": 29685 }, { "epoch": 97.33114754098361, "grad_norm": 4.3327531814575195, "learning_rate": 3.733404050548006e-08, "loss": 0.5207, "step": 29686 }, { "epoch": 97.3344262295082, "grad_norm": 3.904446601867676, "learning_rate": 3.724242396619282e-08, "loss": 0.3374, "step": 29687 }, { "epoch": 97.33770491803278, "grad_norm": 6.3118062019348145, "learning_rate": 3.715091976735585e-08, "loss": 0.3027, "step": 29688 }, { "epoch": 97.34098360655737, "grad_norm": 4.077874183654785, "learning_rate": 3.7059527910000556e-08, "loss": 0.365, "step": 29689 }, { "epoch": 97.34426229508196, "grad_norm": 12.71583080291748, "learning_rate": 3.696824839515834e-08, "loss": 0.2365, "step": 29690 }, { "epoch": 97.34754098360656, "grad_norm": 4.823029518127441, "learning_rate": 3.6877081223858357e-08, "loss": 0.5547, "step": 29691 }, { "epoch": 97.35081967213115, "grad_norm": 5.029294967651367, "learning_rate": 3.6786026397127583e-08, "loss": 0.2463, "step": 29692 }, { "epoch": 97.35409836065574, "grad_norm": 5.031012535095215, "learning_rate": 3.669508391599408e-08, "loss": 0.4341, "step": 29693 }, { "epoch": 97.35737704918033, "grad_norm": 5.813541889190674, "learning_rate": 3.660425378148258e-08, "loss": 0.2948, "step": 29694 }, { "epoch": 97.36065573770492, "grad_norm": 4.385628700256348, "learning_rate": 3.651353599461782e-08, "loss": 0.1289, "step": 29695 }, { "epoch": 97.3639344262295, "grad_norm": 9.388670921325684, "learning_rate": 3.642293055642232e-08, "loss": 0.4133, "step": 29696 }, { "epoch": 97.3672131147541, "grad_norm": 6.402997970581055, "learning_rate": 3.633243746791748e-08, "loss": 0.4229, "step": 29697 }, { "epoch": 97.37049180327868, "grad_norm": 317.5063171386719, "learning_rate": 3.62420567301236e-08, "loss": 0.4911, "step": 29698 }, { "epoch": 97.37377049180328, "grad_norm": 4.1606125831604, "learning_rate": 3.615178834406097e-08, "loss": 0.3695, "step": 29699 }, { "epoch": 97.37704918032787, "grad_norm": 4.143206596374512, "learning_rate": 3.6061632310746554e-08, "loss": 0.5199, "step": 29700 }, { "epoch": 97.38032786885246, "grad_norm": 10.106112480163574, "learning_rate": 3.597158863119732e-08, "loss": 0.5327, "step": 29701 }, { "epoch": 97.38360655737705, "grad_norm": 3.3437774181365967, "learning_rate": 3.588165730642801e-08, "loss": 0.2116, "step": 29702 }, { "epoch": 97.38688524590164, "grad_norm": 5.002917766571045, "learning_rate": 3.579183833745337e-08, "loss": 0.2869, "step": 29703 }, { "epoch": 97.39016393442623, "grad_norm": 5.4011101722717285, "learning_rate": 3.5702131725285914e-08, "loss": 0.3458, "step": 29704 }, { "epoch": 97.39344262295081, "grad_norm": 5.0910444259643555, "learning_rate": 3.561253747093707e-08, "loss": 0.4908, "step": 29705 }, { "epoch": 97.3967213114754, "grad_norm": 4.958132743835449, "learning_rate": 3.552305557541713e-08, "loss": 0.4087, "step": 29706 }, { "epoch": 97.4, "grad_norm": 5.341403007507324, "learning_rate": 3.543368603973529e-08, "loss": 0.3286, "step": 29707 }, { "epoch": 97.4032786885246, "grad_norm": 6.256948471069336, "learning_rate": 3.534442886489964e-08, "loss": 0.2517, "step": 29708 }, { "epoch": 97.40655737704918, "grad_norm": 5.0348100662231445, "learning_rate": 3.525528405191492e-08, "loss": 0.334, "step": 29709 }, { "epoch": 97.40983606557377, "grad_norm": 6.352701187133789, "learning_rate": 3.516625160178921e-08, "loss": 0.4382, "step": 29710 }, { "epoch": 97.41311475409836, "grad_norm": 4.3607048988342285, "learning_rate": 3.507733151552395e-08, "loss": 0.2771, "step": 29711 }, { "epoch": 97.41639344262295, "grad_norm": 4.1902642250061035, "learning_rate": 3.498852379412276e-08, "loss": 0.1754, "step": 29712 }, { "epoch": 97.41967213114754, "grad_norm": 6.265616416931152, "learning_rate": 3.489982843858708e-08, "loss": 0.3931, "step": 29713 }, { "epoch": 97.42295081967212, "grad_norm": 6.686957836151123, "learning_rate": 3.481124544991721e-08, "loss": 0.5692, "step": 29714 }, { "epoch": 97.42622950819673, "grad_norm": 22.27952766418457, "learning_rate": 3.472277482911124e-08, "loss": 0.5173, "step": 29715 }, { "epoch": 97.42950819672132, "grad_norm": 3.6363983154296875, "learning_rate": 3.463441657716726e-08, "loss": 0.3282, "step": 29716 }, { "epoch": 97.4327868852459, "grad_norm": 8.115986824035645, "learning_rate": 3.454617069508226e-08, "loss": 0.3381, "step": 29717 }, { "epoch": 97.43606557377049, "grad_norm": 5.386783599853516, "learning_rate": 3.445803718384988e-08, "loss": 0.2831, "step": 29718 }, { "epoch": 97.43934426229508, "grad_norm": 5.477687358856201, "learning_rate": 3.437001604446488e-08, "loss": 0.4522, "step": 29719 }, { "epoch": 97.44262295081967, "grad_norm": 6.556703567504883, "learning_rate": 3.428210727791981e-08, "loss": 0.3686, "step": 29720 }, { "epoch": 97.44590163934426, "grad_norm": 6.0145416259765625, "learning_rate": 3.419431088520608e-08, "loss": 0.2607, "step": 29721 }, { "epoch": 97.44918032786886, "grad_norm": 3.800302743911743, "learning_rate": 3.4106626867312917e-08, "loss": 0.1708, "step": 29722 }, { "epoch": 97.45245901639345, "grad_norm": 4.329404354095459, "learning_rate": 3.4019055225229524e-08, "loss": 0.2692, "step": 29723 }, { "epoch": 97.45573770491804, "grad_norm": 6.583609580993652, "learning_rate": 3.3931595959942885e-08, "loss": 0.2395, "step": 29724 }, { "epoch": 97.45901639344262, "grad_norm": 5.513216495513916, "learning_rate": 3.3844249072439997e-08, "loss": 0.4324, "step": 29725 }, { "epoch": 97.46229508196721, "grad_norm": 4.303858280181885, "learning_rate": 3.3757014563705615e-08, "loss": 0.4611, "step": 29726 }, { "epoch": 97.4655737704918, "grad_norm": 4.640436172485352, "learning_rate": 3.36698924347234e-08, "loss": 0.5138, "step": 29727 }, { "epoch": 97.46885245901639, "grad_norm": 4.11752986907959, "learning_rate": 3.358288268647481e-08, "loss": 0.2713, "step": 29728 }, { "epoch": 97.47213114754098, "grad_norm": 5.48238468170166, "learning_rate": 3.349598531994236e-08, "loss": 0.4538, "step": 29729 }, { "epoch": 97.47540983606558, "grad_norm": 5.541481971740723, "learning_rate": 3.340920033610418e-08, "loss": 0.2947, "step": 29730 }, { "epoch": 97.47868852459017, "grad_norm": 7.862037181854248, "learning_rate": 3.332252773594058e-08, "loss": 0.4438, "step": 29731 }, { "epoch": 97.48196721311476, "grad_norm": 5.986063480377197, "learning_rate": 3.323596752042857e-08, "loss": 0.4919, "step": 29732 }, { "epoch": 97.48524590163935, "grad_norm": 4.9997711181640625, "learning_rate": 3.314951969054403e-08, "loss": 0.3935, "step": 29733 }, { "epoch": 97.48852459016393, "grad_norm": 4.249994277954102, "learning_rate": 3.3063184247260626e-08, "loss": 0.4347, "step": 29734 }, { "epoch": 97.49180327868852, "grad_norm": 4.379458427429199, "learning_rate": 3.2976961191553135e-08, "loss": 0.1871, "step": 29735 }, { "epoch": 97.49508196721311, "grad_norm": 4.447058200836182, "learning_rate": 3.289085052439411e-08, "loss": 0.4247, "step": 29736 }, { "epoch": 97.4983606557377, "grad_norm": 4.487000465393066, "learning_rate": 3.2804852246753893e-08, "loss": 0.4826, "step": 29737 }, { "epoch": 97.5016393442623, "grad_norm": 5.528129577636719, "learning_rate": 3.27189663596017e-08, "loss": 0.2335, "step": 29738 }, { "epoch": 97.50491803278689, "grad_norm": 4.778582572937012, "learning_rate": 3.263319286390676e-08, "loss": 0.4259, "step": 29739 }, { "epoch": 97.50819672131148, "grad_norm": 5.934096336364746, "learning_rate": 3.254753176063608e-08, "loss": 0.2487, "step": 29740 }, { "epoch": 97.51147540983607, "grad_norm": 5.789900779724121, "learning_rate": 3.246198305075554e-08, "loss": 0.4555, "step": 29741 }, { "epoch": 97.51475409836065, "grad_norm": 4.547562122344971, "learning_rate": 3.237654673522994e-08, "loss": 0.4081, "step": 29742 }, { "epoch": 97.51803278688524, "grad_norm": 4.706971645355225, "learning_rate": 3.229122281502184e-08, "loss": 0.3829, "step": 29743 }, { "epoch": 97.52131147540983, "grad_norm": 4.069639682769775, "learning_rate": 3.220601129109491e-08, "loss": 0.1903, "step": 29744 }, { "epoch": 97.52459016393442, "grad_norm": 4.463735580444336, "learning_rate": 3.212091216440838e-08, "loss": 0.2381, "step": 29745 }, { "epoch": 97.52786885245902, "grad_norm": 4.322964191436768, "learning_rate": 3.20359254359226e-08, "loss": 0.501, "step": 29746 }, { "epoch": 97.53114754098361, "grad_norm": 5.257562160491943, "learning_rate": 3.19510511065968e-08, "loss": 0.3644, "step": 29747 }, { "epoch": 97.5344262295082, "grad_norm": 4.707973480224609, "learning_rate": 3.186628917738577e-08, "loss": 0.4813, "step": 29748 }, { "epoch": 97.53770491803279, "grad_norm": 6.811776638031006, "learning_rate": 3.178163964924763e-08, "loss": 0.2721, "step": 29749 }, { "epoch": 97.54098360655738, "grad_norm": 4.358686923980713, "learning_rate": 3.169710252313496e-08, "loss": 0.5044, "step": 29750 }, { "epoch": 97.54426229508196, "grad_norm": 4.486551761627197, "learning_rate": 3.161267780000255e-08, "loss": 0.2822, "step": 29751 }, { "epoch": 97.54754098360655, "grad_norm": 7.510985851287842, "learning_rate": 3.152836548080185e-08, "loss": 0.313, "step": 29752 }, { "epoch": 97.55081967213114, "grad_norm": 5.109756946563721, "learning_rate": 3.144416556648211e-08, "loss": 0.598, "step": 29753 }, { "epoch": 97.55409836065574, "grad_norm": 5.324732303619385, "learning_rate": 3.1360078057995905e-08, "loss": 0.2038, "step": 29754 }, { "epoch": 97.55737704918033, "grad_norm": 4.884749412536621, "learning_rate": 3.1276102956289134e-08, "loss": 0.3347, "step": 29755 }, { "epoch": 97.56065573770492, "grad_norm": 4.15848445892334, "learning_rate": 3.119224026230883e-08, "loss": 0.1274, "step": 29756 }, { "epoch": 97.56393442622951, "grad_norm": 5.94729471206665, "learning_rate": 3.1108489977000885e-08, "loss": 0.2397, "step": 29757 }, { "epoch": 97.5672131147541, "grad_norm": 5.501312732696533, "learning_rate": 3.102485210130901e-08, "loss": 0.3736, "step": 29758 }, { "epoch": 97.57049180327868, "grad_norm": 5.609208106994629, "learning_rate": 3.0941326636177995e-08, "loss": 0.3272, "step": 29759 }, { "epoch": 97.57377049180327, "grad_norm": 7.281116485595703, "learning_rate": 3.0857913582549304e-08, "loss": 0.4718, "step": 29760 }, { "epoch": 97.57704918032788, "grad_norm": 5.041379451751709, "learning_rate": 3.0774612941362194e-08, "loss": 0.3526, "step": 29761 }, { "epoch": 97.58032786885246, "grad_norm": 5.505638599395752, "learning_rate": 3.0691424713557015e-08, "loss": 0.3301, "step": 29762 }, { "epoch": 97.58360655737705, "grad_norm": 4.3297319412231445, "learning_rate": 3.0608348900070806e-08, "loss": 0.1674, "step": 29763 }, { "epoch": 97.58688524590164, "grad_norm": 6.099319934844971, "learning_rate": 3.05253855018417e-08, "loss": 0.1913, "step": 29764 }, { "epoch": 97.59016393442623, "grad_norm": 4.4157562255859375, "learning_rate": 3.04425345198045e-08, "loss": 0.2745, "step": 29765 }, { "epoch": 97.59344262295082, "grad_norm": 5.269050121307373, "learning_rate": 3.035979595489291e-08, "loss": 0.2581, "step": 29766 }, { "epoch": 97.5967213114754, "grad_norm": 4.538007736206055, "learning_rate": 3.027716980804174e-08, "loss": 0.3627, "step": 29767 }, { "epoch": 97.6, "grad_norm": 5.090112209320068, "learning_rate": 3.019465608018024e-08, "loss": 0.3181, "step": 29768 }, { "epoch": 97.6032786885246, "grad_norm": 5.189466953277588, "learning_rate": 3.011225477223989e-08, "loss": 0.3031, "step": 29769 }, { "epoch": 97.60655737704919, "grad_norm": 5.60345983505249, "learning_rate": 3.0029965885151055e-08, "loss": 0.3735, "step": 29770 }, { "epoch": 97.60983606557377, "grad_norm": 4.804449081420898, "learning_rate": 2.994778941983967e-08, "loss": 0.1547, "step": 29771 }, { "epoch": 97.61311475409836, "grad_norm": 4.7259297370910645, "learning_rate": 2.986572537723276e-08, "loss": 0.3104, "step": 29772 }, { "epoch": 97.61639344262295, "grad_norm": 4.530945301055908, "learning_rate": 2.978377375825736e-08, "loss": 0.2831, "step": 29773 }, { "epoch": 97.61967213114754, "grad_norm": 5.484859943389893, "learning_rate": 2.9701934563834968e-08, "loss": 0.2712, "step": 29774 }, { "epoch": 97.62295081967213, "grad_norm": 3.567641019821167, "learning_rate": 2.9620207794890386e-08, "loss": 0.2588, "step": 29775 }, { "epoch": 97.62622950819672, "grad_norm": 4.036341190338135, "learning_rate": 2.953859345234511e-08, "loss": 0.6037, "step": 29776 }, { "epoch": 97.62950819672132, "grad_norm": 5.079986572265625, "learning_rate": 2.9457091537118398e-08, "loss": 0.1525, "step": 29777 }, { "epoch": 97.6327868852459, "grad_norm": 6.119863986968994, "learning_rate": 2.9375702050129516e-08, "loss": 0.3581, "step": 29778 }, { "epoch": 97.6360655737705, "grad_norm": 5.061798572540283, "learning_rate": 2.9294424992296623e-08, "loss": 0.4886, "step": 29779 }, { "epoch": 97.63934426229508, "grad_norm": 3.807675838470459, "learning_rate": 2.9213260364536754e-08, "loss": 0.2001, "step": 29780 }, { "epoch": 97.64262295081967, "grad_norm": 10.955251693725586, "learning_rate": 2.9132208167763633e-08, "loss": 0.5494, "step": 29781 }, { "epoch": 97.64590163934426, "grad_norm": 6.480525493621826, "learning_rate": 2.9051268402892074e-08, "loss": 0.3684, "step": 29782 }, { "epoch": 97.64918032786885, "grad_norm": 7.31526517868042, "learning_rate": 2.8970441070834688e-08, "loss": 0.2338, "step": 29783 }, { "epoch": 97.65245901639344, "grad_norm": 4.915900230407715, "learning_rate": 2.8889726172502963e-08, "loss": 0.3664, "step": 29784 }, { "epoch": 97.65573770491804, "grad_norm": 6.394381523132324, "learning_rate": 2.8809123708806176e-08, "loss": 0.3772, "step": 29785 }, { "epoch": 97.65901639344263, "grad_norm": 4.285159587860107, "learning_rate": 2.8728633680654707e-08, "loss": 0.3765, "step": 29786 }, { "epoch": 97.66229508196722, "grad_norm": 3.6874961853027344, "learning_rate": 2.8648256088955607e-08, "loss": 0.2688, "step": 29787 }, { "epoch": 97.6655737704918, "grad_norm": 6.316566467285156, "learning_rate": 2.856799093461482e-08, "loss": 0.5043, "step": 29788 }, { "epoch": 97.66885245901639, "grad_norm": 5.018540382385254, "learning_rate": 2.848783821853718e-08, "loss": 0.2296, "step": 29789 }, { "epoch": 97.67213114754098, "grad_norm": 7.894869327545166, "learning_rate": 2.8407797941627512e-08, "loss": 0.6062, "step": 29790 }, { "epoch": 97.67540983606557, "grad_norm": 3.6583986282348633, "learning_rate": 2.8327870104787325e-08, "loss": 0.2695, "step": 29791 }, { "epoch": 97.67868852459016, "grad_norm": 5.109076499938965, "learning_rate": 2.8248054708919226e-08, "loss": 0.3292, "step": 29792 }, { "epoch": 97.68196721311476, "grad_norm": 5.115363121032715, "learning_rate": 2.8168351754921387e-08, "loss": 0.4897, "step": 29793 }, { "epoch": 97.68524590163935, "grad_norm": 4.887291431427002, "learning_rate": 2.8088761243694195e-08, "loss": 0.2772, "step": 29794 }, { "epoch": 97.68852459016394, "grad_norm": 5.907601833343506, "learning_rate": 2.8009283176133606e-08, "loss": 0.5235, "step": 29795 }, { "epoch": 97.69180327868852, "grad_norm": 4.464132308959961, "learning_rate": 2.7929917553136677e-08, "loss": 0.4639, "step": 29796 }, { "epoch": 97.69508196721311, "grad_norm": 4.345644474029541, "learning_rate": 2.7850664375599358e-08, "loss": 0.2542, "step": 29797 }, { "epoch": 97.6983606557377, "grad_norm": 3.9914262294769287, "learning_rate": 2.7771523644413156e-08, "loss": 0.3976, "step": 29798 }, { "epoch": 97.70163934426229, "grad_norm": 5.033172607421875, "learning_rate": 2.7692495360471804e-08, "loss": 0.4113, "step": 29799 }, { "epoch": 97.70491803278688, "grad_norm": 15.184601783752441, "learning_rate": 2.76135795246657e-08, "loss": 0.1811, "step": 29800 }, { "epoch": 97.70819672131148, "grad_norm": 3.670621633529663, "learning_rate": 2.7534776137886356e-08, "loss": 0.3748, "step": 29801 }, { "epoch": 97.71147540983607, "grad_norm": 5.0425238609313965, "learning_rate": 2.7456085201020832e-08, "loss": 0.3416, "step": 29802 }, { "epoch": 97.71475409836066, "grad_norm": 4.869358539581299, "learning_rate": 2.7377506714956205e-08, "loss": 0.273, "step": 29803 }, { "epoch": 97.71803278688525, "grad_norm": 27.765947341918945, "learning_rate": 2.7299040680579536e-08, "loss": 0.4196, "step": 29804 }, { "epoch": 97.72131147540983, "grad_norm": 4.438800811767578, "learning_rate": 2.722068709877457e-08, "loss": 0.3355, "step": 29805 }, { "epoch": 97.72459016393442, "grad_norm": 9.195182800292969, "learning_rate": 2.7142445970426145e-08, "loss": 0.2941, "step": 29806 }, { "epoch": 97.72786885245901, "grad_norm": 4.527896881103516, "learning_rate": 2.7064317296415787e-08, "loss": 0.2287, "step": 29807 }, { "epoch": 97.73114754098361, "grad_norm": 4.663104057312012, "learning_rate": 2.69863010776239e-08, "loss": 0.5258, "step": 29808 }, { "epoch": 97.7344262295082, "grad_norm": 5.0474467277526855, "learning_rate": 2.690839731493089e-08, "loss": 0.4009, "step": 29809 }, { "epoch": 97.73770491803279, "grad_norm": 6.194153785705566, "learning_rate": 2.6830606009216053e-08, "loss": 0.1925, "step": 29810 }, { "epoch": 97.74098360655738, "grad_norm": 4.666561603546143, "learning_rate": 2.6752927161355357e-08, "loss": 0.3631, "step": 29811 }, { "epoch": 97.74426229508197, "grad_norm": 6.371237277984619, "learning_rate": 2.667536077222477e-08, "loss": 0.3412, "step": 29812 }, { "epoch": 97.74754098360656, "grad_norm": 4.534093856811523, "learning_rate": 2.659790684269803e-08, "loss": 0.2964, "step": 29813 }, { "epoch": 97.75081967213114, "grad_norm": 4.291804313659668, "learning_rate": 2.6520565373651108e-08, "loss": 0.2327, "step": 29814 }, { "epoch": 97.75409836065573, "grad_norm": 6.370297431945801, "learning_rate": 2.644333636595442e-08, "loss": 0.4263, "step": 29815 }, { "epoch": 97.75737704918033, "grad_norm": 5.89537239074707, "learning_rate": 2.6366219820478378e-08, "loss": 0.4434, "step": 29816 }, { "epoch": 97.76065573770492, "grad_norm": 4.9834184646606445, "learning_rate": 2.62892157380934e-08, "loss": 0.252, "step": 29817 }, { "epoch": 97.76393442622951, "grad_norm": 4.311262607574463, "learning_rate": 2.6212324119667677e-08, "loss": 0.2082, "step": 29818 }, { "epoch": 97.7672131147541, "grad_norm": 4.569196701049805, "learning_rate": 2.6135544966068294e-08, "loss": 0.4123, "step": 29819 }, { "epoch": 97.77049180327869, "grad_norm": 4.6444783210754395, "learning_rate": 2.6058878278161225e-08, "loss": 0.296, "step": 29820 }, { "epoch": 97.77377049180328, "grad_norm": 4.2471513748168945, "learning_rate": 2.5982324056810227e-08, "loss": 0.5964, "step": 29821 }, { "epoch": 97.77704918032786, "grad_norm": 5.3205132484436035, "learning_rate": 2.5905882302877938e-08, "loss": 0.1859, "step": 29822 }, { "epoch": 97.78032786885245, "grad_norm": 7.737308979034424, "learning_rate": 2.5829553017228114e-08, "loss": 0.3957, "step": 29823 }, { "epoch": 97.78360655737706, "grad_norm": 3.7945711612701416, "learning_rate": 2.575333620072118e-08, "loss": 0.2648, "step": 29824 }, { "epoch": 97.78688524590164, "grad_norm": 3.990251302719116, "learning_rate": 2.5677231854215333e-08, "loss": 0.28, "step": 29825 }, { "epoch": 97.79016393442623, "grad_norm": 4.367239475250244, "learning_rate": 2.560123997856989e-08, "loss": 0.5194, "step": 29826 }, { "epoch": 97.79344262295082, "grad_norm": 4.782304286956787, "learning_rate": 2.5525360574640834e-08, "loss": 0.5032, "step": 29827 }, { "epoch": 97.79672131147541, "grad_norm": 5.994488716125488, "learning_rate": 2.5449593643284144e-08, "loss": 0.3368, "step": 29828 }, { "epoch": 97.8, "grad_norm": 5.370280742645264, "learning_rate": 2.537393918535358e-08, "loss": 0.5974, "step": 29829 }, { "epoch": 97.80327868852459, "grad_norm": 11.814517974853516, "learning_rate": 2.5298397201704015e-08, "loss": 0.2161, "step": 29830 }, { "epoch": 97.80655737704917, "grad_norm": 4.178102493286133, "learning_rate": 2.5222967693185886e-08, "loss": 0.5171, "step": 29831 }, { "epoch": 97.80983606557378, "grad_norm": 4.9309563636779785, "learning_rate": 2.5147650660649613e-08, "loss": 0.4333, "step": 29832 }, { "epoch": 97.81311475409836, "grad_norm": 4.1109232902526855, "learning_rate": 2.5072446104944524e-08, "loss": 0.2573, "step": 29833 }, { "epoch": 97.81639344262295, "grad_norm": 5.890707969665527, "learning_rate": 2.499735402691994e-08, "loss": 0.3246, "step": 29834 }, { "epoch": 97.81967213114754, "grad_norm": 4.146755695343018, "learning_rate": 2.4922374427420736e-08, "loss": 0.2933, "step": 29835 }, { "epoch": 97.82295081967213, "grad_norm": 6.628939628601074, "learning_rate": 2.4847507307294018e-08, "loss": 0.3317, "step": 29836 }, { "epoch": 97.82622950819672, "grad_norm": 4.350808143615723, "learning_rate": 2.4772752667382437e-08, "loss": 0.2365, "step": 29837 }, { "epoch": 97.8295081967213, "grad_norm": 4.653900146484375, "learning_rate": 2.4698110508529772e-08, "loss": 0.3448, "step": 29838 }, { "epoch": 97.8327868852459, "grad_norm": 4.573848724365234, "learning_rate": 2.4623580831577565e-08, "loss": 0.6007, "step": 29839 }, { "epoch": 97.8360655737705, "grad_norm": 4.733711242675781, "learning_rate": 2.4549163637367368e-08, "loss": 0.3237, "step": 29840 }, { "epoch": 97.83934426229509, "grad_norm": 4.569544315338135, "learning_rate": 2.447485892673629e-08, "loss": 0.3481, "step": 29841 }, { "epoch": 97.84262295081967, "grad_norm": 4.32388973236084, "learning_rate": 2.4400666700523657e-08, "loss": 0.6998, "step": 29842 }, { "epoch": 97.84590163934426, "grad_norm": 4.453965187072754, "learning_rate": 2.432658695956436e-08, "loss": 0.2527, "step": 29843 }, { "epoch": 97.84918032786885, "grad_norm": 5.395515441894531, "learning_rate": 2.4252619704695502e-08, "loss": 0.2621, "step": 29844 }, { "epoch": 97.85245901639344, "grad_norm": 3.390885353088379, "learning_rate": 2.4178764936750864e-08, "loss": 0.2892, "step": 29845 }, { "epoch": 97.85573770491803, "grad_norm": 4.441596984863281, "learning_rate": 2.4105022656563114e-08, "loss": 0.6692, "step": 29846 }, { "epoch": 97.85901639344263, "grad_norm": 6.6303391456604, "learning_rate": 2.40313928649627e-08, "loss": 0.3136, "step": 29847 }, { "epoch": 97.86229508196722, "grad_norm": 4.902442932128906, "learning_rate": 2.3957875562781176e-08, "loss": 0.2616, "step": 29848 }, { "epoch": 97.8655737704918, "grad_norm": 4.344911575317383, "learning_rate": 2.3884470750847878e-08, "loss": 0.3652, "step": 29849 }, { "epoch": 97.8688524590164, "grad_norm": 6.957458972930908, "learning_rate": 2.3811178429988812e-08, "loss": 0.2608, "step": 29850 }, { "epoch": 97.87213114754098, "grad_norm": 4.697012901306152, "learning_rate": 2.3737998601031097e-08, "loss": 0.2893, "step": 29851 }, { "epoch": 97.87540983606557, "grad_norm": 4.78832483291626, "learning_rate": 2.366493126480074e-08, "loss": 0.4999, "step": 29852 }, { "epoch": 97.87868852459016, "grad_norm": 4.434173583984375, "learning_rate": 2.3591976422121522e-08, "loss": 0.4318, "step": 29853 }, { "epoch": 97.88196721311475, "grad_norm": 7.769186973571777, "learning_rate": 2.3519134073815007e-08, "loss": 0.3208, "step": 29854 }, { "epoch": 97.88524590163935, "grad_norm": 5.128420829772949, "learning_rate": 2.344640422070277e-08, "loss": 0.3631, "step": 29855 }, { "epoch": 97.88852459016394, "grad_norm": 4.418094158172607, "learning_rate": 2.3373786863605252e-08, "loss": 0.2051, "step": 29856 }, { "epoch": 97.89180327868853, "grad_norm": 4.093031406402588, "learning_rate": 2.3301282003341808e-08, "loss": 0.3422, "step": 29857 }, { "epoch": 97.89508196721312, "grad_norm": 4.4903340339660645, "learning_rate": 2.3228889640730668e-08, "loss": 0.4012, "step": 29858 }, { "epoch": 97.8983606557377, "grad_norm": 3.7263858318328857, "learning_rate": 2.3156609776585625e-08, "loss": 0.284, "step": 29859 }, { "epoch": 97.90163934426229, "grad_norm": 4.798644542694092, "learning_rate": 2.3084442411723805e-08, "loss": 0.4753, "step": 29860 }, { "epoch": 97.90491803278688, "grad_norm": 5.076610565185547, "learning_rate": 2.3012387546957893e-08, "loss": 0.3214, "step": 29861 }, { "epoch": 97.90819672131147, "grad_norm": 5.63288688659668, "learning_rate": 2.294044518310057e-08, "loss": 0.3709, "step": 29862 }, { "epoch": 97.91147540983607, "grad_norm": 5.906692981719971, "learning_rate": 2.2868615320963406e-08, "loss": 0.1967, "step": 29863 }, { "epoch": 97.91475409836066, "grad_norm": 5.535549163818359, "learning_rate": 2.2796897961356868e-08, "loss": 0.1813, "step": 29864 }, { "epoch": 97.91803278688525, "grad_norm": 4.422118663787842, "learning_rate": 2.2725293105088086e-08, "loss": 0.4479, "step": 29865 }, { "epoch": 97.92131147540984, "grad_norm": 4.857255458831787, "learning_rate": 2.2653800752966416e-08, "loss": 0.4261, "step": 29866 }, { "epoch": 97.92459016393443, "grad_norm": 6.833133697509766, "learning_rate": 2.2582420905796766e-08, "loss": 0.444, "step": 29867 }, { "epoch": 97.92786885245901, "grad_norm": 4.653447151184082, "learning_rate": 2.2511153564384046e-08, "loss": 0.4804, "step": 29868 }, { "epoch": 97.9311475409836, "grad_norm": 4.924747467041016, "learning_rate": 2.2439998729530952e-08, "loss": 0.3638, "step": 29869 }, { "epoch": 97.93442622950819, "grad_norm": 5.500908851623535, "learning_rate": 2.2368956402042398e-08, "loss": 0.512, "step": 29870 }, { "epoch": 97.9377049180328, "grad_norm": 4.086597919464111, "learning_rate": 2.2298026582717736e-08, "loss": 0.4321, "step": 29871 }, { "epoch": 97.94098360655738, "grad_norm": 3.8914663791656494, "learning_rate": 2.2227209272356332e-08, "loss": 0.2114, "step": 29872 }, { "epoch": 97.94426229508197, "grad_norm": 7.735720634460449, "learning_rate": 2.2156504471757546e-08, "loss": 0.3795, "step": 29873 }, { "epoch": 97.94754098360656, "grad_norm": 5.40487003326416, "learning_rate": 2.2085912181719628e-08, "loss": 0.4964, "step": 29874 }, { "epoch": 97.95081967213115, "grad_norm": 4.443500518798828, "learning_rate": 2.2015432403036386e-08, "loss": 0.2219, "step": 29875 }, { "epoch": 97.95409836065573, "grad_norm": 7.881903171539307, "learning_rate": 2.1945065136503853e-08, "loss": 0.3089, "step": 29876 }, { "epoch": 97.95737704918032, "grad_norm": 4.008758544921875, "learning_rate": 2.1874810382914725e-08, "loss": 0.3913, "step": 29877 }, { "epoch": 97.96065573770491, "grad_norm": 3.2882354259490967, "learning_rate": 2.1804668143062812e-08, "loss": 0.2368, "step": 29878 }, { "epoch": 97.96393442622951, "grad_norm": 4.653011798858643, "learning_rate": 2.1734638417737485e-08, "loss": 0.3108, "step": 29879 }, { "epoch": 97.9672131147541, "grad_norm": 5.052850723266602, "learning_rate": 2.166472120772922e-08, "loss": 0.1527, "step": 29880 }, { "epoch": 97.97049180327869, "grad_norm": 4.007880210876465, "learning_rate": 2.159491651382628e-08, "loss": 0.2156, "step": 29881 }, { "epoch": 97.97377049180328, "grad_norm": 3.8849639892578125, "learning_rate": 2.15252243368147e-08, "loss": 0.468, "step": 29882 }, { "epoch": 97.97704918032787, "grad_norm": 6.248305320739746, "learning_rate": 2.1455644677481624e-08, "loss": 0.3401, "step": 29883 }, { "epoch": 97.98032786885246, "grad_norm": 3.6550538539886475, "learning_rate": 2.1386177536611986e-08, "loss": 0.3242, "step": 29884 }, { "epoch": 97.98360655737704, "grad_norm": 5.119797229766846, "learning_rate": 2.1316822914987378e-08, "loss": 0.2531, "step": 29885 }, { "epoch": 97.98688524590163, "grad_norm": 10.278678894042969, "learning_rate": 2.1247580813391622e-08, "loss": 0.5462, "step": 29886 }, { "epoch": 97.99016393442623, "grad_norm": 4.139339923858643, "learning_rate": 2.1178451232604092e-08, "loss": 0.4065, "step": 29887 }, { "epoch": 97.99344262295082, "grad_norm": 5.341605186462402, "learning_rate": 2.1109434173404165e-08, "loss": 0.3807, "step": 29888 }, { "epoch": 97.99672131147541, "grad_norm": 5.209543228149414, "learning_rate": 2.1040529636572327e-08, "loss": 0.5955, "step": 29889 }, { "epoch": 98.0, "grad_norm": 4.37660026550293, "learning_rate": 2.0971737622883515e-08, "loss": 0.2626, "step": 29890 }, { "epoch": 98.00327868852459, "grad_norm": 4.940629005432129, "learning_rate": 2.0903058133113773e-08, "loss": 0.2772, "step": 29891 }, { "epoch": 98.00655737704918, "grad_norm": 4.357664108276367, "learning_rate": 2.083449116803804e-08, "loss": 0.2142, "step": 29892 }, { "epoch": 98.00983606557377, "grad_norm": 4.269907474517822, "learning_rate": 2.076603672842903e-08, "loss": 0.5438, "step": 29893 }, { "epoch": 98.01311475409837, "grad_norm": 4.248345851898193, "learning_rate": 2.0697694815058343e-08, "loss": 0.4945, "step": 29894 }, { "epoch": 98.01639344262296, "grad_norm": 5.399383068084717, "learning_rate": 2.0629465428697594e-08, "loss": 0.4447, "step": 29895 }, { "epoch": 98.01967213114754, "grad_norm": 5.740922451019287, "learning_rate": 2.0561348570115046e-08, "loss": 0.29, "step": 29896 }, { "epoch": 98.02295081967213, "grad_norm": 6.223212718963623, "learning_rate": 2.0493344240078983e-08, "loss": 0.619, "step": 29897 }, { "epoch": 98.02622950819672, "grad_norm": 3.9711713790893555, "learning_rate": 2.0425452439357675e-08, "loss": 0.3043, "step": 29898 }, { "epoch": 98.02950819672131, "grad_norm": 8.427295684814453, "learning_rate": 2.0357673168714952e-08, "loss": 0.3349, "step": 29899 }, { "epoch": 98.0327868852459, "grad_norm": 4.258643627166748, "learning_rate": 2.0290006428914655e-08, "loss": 0.1831, "step": 29900 }, { "epoch": 98.03606557377049, "grad_norm": 4.114288806915283, "learning_rate": 2.0222452220722832e-08, "loss": 0.4604, "step": 29901 }, { "epoch": 98.03934426229509, "grad_norm": 5.90788459777832, "learning_rate": 2.0155010544897768e-08, "loss": 0.4488, "step": 29902 }, { "epoch": 98.04262295081968, "grad_norm": 4.185389041900635, "learning_rate": 2.0087681402202185e-08, "loss": 0.2926, "step": 29903 }, { "epoch": 98.04590163934427, "grad_norm": 4.697877883911133, "learning_rate": 2.0020464793394366e-08, "loss": 0.2735, "step": 29904 }, { "epoch": 98.04918032786885, "grad_norm": 4.619381904602051, "learning_rate": 1.9953360719231484e-08, "loss": 0.431, "step": 29905 }, { "epoch": 98.05245901639344, "grad_norm": 7.199804306030273, "learning_rate": 1.988636918047182e-08, "loss": 0.6757, "step": 29906 }, { "epoch": 98.05573770491803, "grad_norm": 4.808994770050049, "learning_rate": 1.9819490177870326e-08, "loss": 0.3795, "step": 29907 }, { "epoch": 98.05901639344262, "grad_norm": 7.231961250305176, "learning_rate": 1.9752723712180845e-08, "loss": 0.2808, "step": 29908 }, { "epoch": 98.0622950819672, "grad_norm": 5.122055530548096, "learning_rate": 1.9686069784156104e-08, "loss": 0.4975, "step": 29909 }, { "epoch": 98.06557377049181, "grad_norm": 3.8824844360351562, "learning_rate": 1.9619528394547727e-08, "loss": 0.3141, "step": 29910 }, { "epoch": 98.0688524590164, "grad_norm": 5.205458164215088, "learning_rate": 1.9553099544106223e-08, "loss": 0.4043, "step": 29911 }, { "epoch": 98.07213114754099, "grad_norm": 4.475615978240967, "learning_rate": 1.9486783233580997e-08, "loss": 0.3099, "step": 29912 }, { "epoch": 98.07540983606557, "grad_norm": 5.408452987670898, "learning_rate": 1.9420579463718114e-08, "loss": 0.3013, "step": 29913 }, { "epoch": 98.07868852459016, "grad_norm": 4.74491024017334, "learning_rate": 1.9354488235266977e-08, "loss": 0.3206, "step": 29914 }, { "epoch": 98.08196721311475, "grad_norm": 3.7293031215667725, "learning_rate": 1.9288509548970325e-08, "loss": 0.4407, "step": 29915 }, { "epoch": 98.08524590163934, "grad_norm": 3.66409969329834, "learning_rate": 1.9222643405573114e-08, "loss": 0.1838, "step": 29916 }, { "epoch": 98.08852459016393, "grad_norm": 5.444117546081543, "learning_rate": 1.915688980581809e-08, "loss": 0.3896, "step": 29917 }, { "epoch": 98.09180327868853, "grad_norm": 3.9736545085906982, "learning_rate": 1.9091248750446877e-08, "loss": 0.3654, "step": 29918 }, { "epoch": 98.09508196721312, "grad_norm": 4.030632972717285, "learning_rate": 1.9025720240199996e-08, "loss": 0.1566, "step": 29919 }, { "epoch": 98.09836065573771, "grad_norm": 5.33686637878418, "learning_rate": 1.8960304275814634e-08, "loss": 0.4129, "step": 29920 }, { "epoch": 98.1016393442623, "grad_norm": 4.795388221740723, "learning_rate": 1.88950008580302e-08, "loss": 0.2757, "step": 29921 }, { "epoch": 98.10491803278688, "grad_norm": 5.089822769165039, "learning_rate": 1.882980998758166e-08, "loss": 0.5013, "step": 29922 }, { "epoch": 98.10819672131147, "grad_norm": 6.6053242683410645, "learning_rate": 1.8764731665205093e-08, "loss": 0.3438, "step": 29923 }, { "epoch": 98.11147540983606, "grad_norm": 5.01204776763916, "learning_rate": 1.8699765891634357e-08, "loss": 0.2709, "step": 29924 }, { "epoch": 98.11475409836065, "grad_norm": 6.519995212554932, "learning_rate": 1.863491266760109e-08, "loss": 0.2976, "step": 29925 }, { "epoch": 98.11803278688525, "grad_norm": 3.700561761856079, "learning_rate": 1.857017199383804e-08, "loss": 0.3881, "step": 29926 }, { "epoch": 98.12131147540984, "grad_norm": 4.441977024078369, "learning_rate": 1.8505543871073506e-08, "loss": 0.5746, "step": 29927 }, { "epoch": 98.12459016393443, "grad_norm": 3.935352325439453, "learning_rate": 1.844102830003802e-08, "loss": 0.3743, "step": 29928 }, { "epoch": 98.12786885245902, "grad_norm": 4.406871318817139, "learning_rate": 1.8376625281457672e-08, "loss": 0.4187, "step": 29929 }, { "epoch": 98.1311475409836, "grad_norm": 4.097374439239502, "learning_rate": 1.831233481605854e-08, "loss": 0.2196, "step": 29930 }, { "epoch": 98.1344262295082, "grad_norm": 5.284012317657471, "learning_rate": 1.8248156904567825e-08, "loss": 0.4104, "step": 29931 }, { "epoch": 98.13770491803278, "grad_norm": 5.559414863586426, "learning_rate": 1.818409154770606e-08, "loss": 0.4307, "step": 29932 }, { "epoch": 98.14098360655737, "grad_norm": 7.4929890632629395, "learning_rate": 1.8120138746198225e-08, "loss": 0.3069, "step": 29933 }, { "epoch": 98.14426229508197, "grad_norm": 4.687736511230469, "learning_rate": 1.8056298500763736e-08, "loss": 0.3664, "step": 29934 }, { "epoch": 98.14754098360656, "grad_norm": 4.6533074378967285, "learning_rate": 1.7992570812123132e-08, "loss": 0.2845, "step": 29935 }, { "epoch": 98.15081967213115, "grad_norm": 5.0154852867126465, "learning_rate": 1.792895568099473e-08, "loss": 0.2553, "step": 29936 }, { "epoch": 98.15409836065574, "grad_norm": 4.397959232330322, "learning_rate": 1.7865453108096843e-08, "loss": 0.2831, "step": 29937 }, { "epoch": 98.15737704918033, "grad_norm": 3.3410356044769287, "learning_rate": 1.780206309414445e-08, "loss": 0.3865, "step": 29938 }, { "epoch": 98.16065573770491, "grad_norm": 4.172728538513184, "learning_rate": 1.773878563985143e-08, "loss": 0.2249, "step": 29939 }, { "epoch": 98.1639344262295, "grad_norm": 4.116467475891113, "learning_rate": 1.7675620745933874e-08, "loss": 0.3704, "step": 29940 }, { "epoch": 98.1672131147541, "grad_norm": 3.912745714187622, "learning_rate": 1.7612568413103436e-08, "loss": 0.3749, "step": 29941 }, { "epoch": 98.1704918032787, "grad_norm": 5.002999782562256, "learning_rate": 1.7549628642069548e-08, "loss": 0.5553, "step": 29942 }, { "epoch": 98.17377049180328, "grad_norm": 4.6580376625061035, "learning_rate": 1.7486801433541644e-08, "loss": 0.3101, "step": 29943 }, { "epoch": 98.17704918032787, "grad_norm": 4.097663402557373, "learning_rate": 1.7424086788230264e-08, "loss": 0.5507, "step": 29944 }, { "epoch": 98.18032786885246, "grad_norm": 4.148587703704834, "learning_rate": 1.7361484706842623e-08, "loss": 0.4533, "step": 29945 }, { "epoch": 98.18360655737705, "grad_norm": 3.3773391246795654, "learning_rate": 1.72989951900826e-08, "loss": 0.4196, "step": 29946 }, { "epoch": 98.18688524590164, "grad_norm": 5.419236183166504, "learning_rate": 1.723661823865519e-08, "loss": 0.4255, "step": 29947 }, { "epoch": 98.19016393442622, "grad_norm": 4.081448554992676, "learning_rate": 1.7174353853265378e-08, "loss": 0.2869, "step": 29948 }, { "epoch": 98.19344262295083, "grad_norm": 3.591881036758423, "learning_rate": 1.7112202034613723e-08, "loss": 0.2073, "step": 29949 }, { "epoch": 98.19672131147541, "grad_norm": 4.468438625335693, "learning_rate": 1.705016278340188e-08, "loss": 0.196, "step": 29950 }, { "epoch": 98.2, "grad_norm": 4.499288082122803, "learning_rate": 1.698823610032929e-08, "loss": 0.328, "step": 29951 }, { "epoch": 98.20327868852459, "grad_norm": 4.030954360961914, "learning_rate": 1.692642198609318e-08, "loss": 0.2767, "step": 29952 }, { "epoch": 98.20655737704918, "grad_norm": 3.925067186355591, "learning_rate": 1.686472044139187e-08, "loss": 0.233, "step": 29953 }, { "epoch": 98.20983606557377, "grad_norm": 3.8175435066223145, "learning_rate": 1.6803131466921473e-08, "loss": 0.2629, "step": 29954 }, { "epoch": 98.21311475409836, "grad_norm": 4.76411771774292, "learning_rate": 1.6741655063374775e-08, "loss": 0.3501, "step": 29955 }, { "epoch": 98.21639344262294, "grad_norm": 4.770264148712158, "learning_rate": 1.6680291231445656e-08, "loss": 0.3936, "step": 29956 }, { "epoch": 98.21967213114755, "grad_norm": 4.837494373321533, "learning_rate": 1.66190399718269e-08, "loss": 0.5073, "step": 29957 }, { "epoch": 98.22295081967214, "grad_norm": 3.568171739578247, "learning_rate": 1.6557901285209066e-08, "loss": 0.3248, "step": 29958 }, { "epoch": 98.22622950819672, "grad_norm": 7.593884468078613, "learning_rate": 1.649687517228049e-08, "loss": 0.2843, "step": 29959 }, { "epoch": 98.22950819672131, "grad_norm": 5.0754876136779785, "learning_rate": 1.6435961633729514e-08, "loss": 0.2832, "step": 29960 }, { "epoch": 98.2327868852459, "grad_norm": 3.799891710281372, "learning_rate": 1.6375160670244473e-08, "loss": 0.2266, "step": 29961 }, { "epoch": 98.23606557377049, "grad_norm": 4.146523952484131, "learning_rate": 1.6314472282509262e-08, "loss": 0.4571, "step": 29962 }, { "epoch": 98.23934426229508, "grad_norm": 5.820745944976807, "learning_rate": 1.6253896471207785e-08, "loss": 0.5529, "step": 29963 }, { "epoch": 98.24262295081967, "grad_norm": 4.273187637329102, "learning_rate": 1.6193433237026157e-08, "loss": 0.4869, "step": 29964 }, { "epoch": 98.24590163934427, "grad_norm": 3.488859176635742, "learning_rate": 1.6133082580642722e-08, "loss": 0.3523, "step": 29965 }, { "epoch": 98.24918032786886, "grad_norm": 6.698504447937012, "learning_rate": 1.607284450273916e-08, "loss": 0.311, "step": 29966 }, { "epoch": 98.25245901639344, "grad_norm": 3.9799163341522217, "learning_rate": 1.6012719003996036e-08, "loss": 0.5038, "step": 29967 }, { "epoch": 98.25573770491803, "grad_norm": 5.506752014160156, "learning_rate": 1.5952706085089475e-08, "loss": 0.2713, "step": 29968 }, { "epoch": 98.25901639344262, "grad_norm": 4.412115573883057, "learning_rate": 1.5892805746696716e-08, "loss": 0.4866, "step": 29969 }, { "epoch": 98.26229508196721, "grad_norm": 5.007838249206543, "learning_rate": 1.5833017989493882e-08, "loss": 0.2817, "step": 29970 }, { "epoch": 98.2655737704918, "grad_norm": 4.09788703918457, "learning_rate": 1.577334281415488e-08, "loss": 0.1389, "step": 29971 }, { "epoch": 98.26885245901639, "grad_norm": 7.036113262176514, "learning_rate": 1.5713780221352503e-08, "loss": 0.2683, "step": 29972 }, { "epoch": 98.27213114754099, "grad_norm": 4.531503677368164, "learning_rate": 1.565433021175844e-08, "loss": 0.3513, "step": 29973 }, { "epoch": 98.27540983606558, "grad_norm": 5.634396076202393, "learning_rate": 1.559499278604215e-08, "loss": 0.4226, "step": 29974 }, { "epoch": 98.27868852459017, "grad_norm": 4.890310287475586, "learning_rate": 1.5535767944874215e-08, "loss": 0.3555, "step": 29975 }, { "epoch": 98.28196721311475, "grad_norm": 5.7317070960998535, "learning_rate": 1.5476655688921872e-08, "loss": 0.1926, "step": 29976 }, { "epoch": 98.28524590163934, "grad_norm": 5.604348659515381, "learning_rate": 1.5417656018851257e-08, "loss": 0.1781, "step": 29977 }, { "epoch": 98.28852459016393, "grad_norm": 8.374754905700684, "learning_rate": 1.5358768935327395e-08, "loss": 0.3413, "step": 29978 }, { "epoch": 98.29180327868852, "grad_norm": 5.158191680908203, "learning_rate": 1.529999443901531e-08, "loss": 0.3685, "step": 29979 }, { "epoch": 98.29508196721312, "grad_norm": 5.010882377624512, "learning_rate": 1.524133253057669e-08, "loss": 0.5505, "step": 29980 }, { "epoch": 98.29836065573771, "grad_norm": 4.305232048034668, "learning_rate": 1.5182783210674347e-08, "loss": 0.3226, "step": 29981 }, { "epoch": 98.3016393442623, "grad_norm": 3.9805805683135986, "learning_rate": 1.5124346479967744e-08, "loss": 0.2351, "step": 29982 }, { "epoch": 98.30491803278689, "grad_norm": 7.721524715423584, "learning_rate": 1.506602233911525e-08, "loss": 0.4894, "step": 29983 }, { "epoch": 98.30819672131148, "grad_norm": 3.3920493125915527, "learning_rate": 1.5007810788775222e-08, "loss": 0.1842, "step": 29984 }, { "epoch": 98.31147540983606, "grad_norm": 5.019822120666504, "learning_rate": 1.4949711829603807e-08, "loss": 0.3876, "step": 29985 }, { "epoch": 98.31475409836065, "grad_norm": 4.95302152633667, "learning_rate": 1.4891725462257145e-08, "loss": 0.6198, "step": 29986 }, { "epoch": 98.31803278688524, "grad_norm": 4.541217803955078, "learning_rate": 1.4833851687386935e-08, "loss": 0.3585, "step": 29987 }, { "epoch": 98.32131147540984, "grad_norm": 5.042107582092285, "learning_rate": 1.4776090505648211e-08, "loss": 0.2586, "step": 29988 }, { "epoch": 98.32459016393443, "grad_norm": 6.495304107666016, "learning_rate": 1.4718441917690452e-08, "loss": 0.6083, "step": 29989 }, { "epoch": 98.32786885245902, "grad_norm": 3.7784879207611084, "learning_rate": 1.4660905924164248e-08, "loss": 0.4191, "step": 29990 }, { "epoch": 98.33114754098361, "grad_norm": 4.540966987609863, "learning_rate": 1.4603482525717972e-08, "loss": 0.3451, "step": 29991 }, { "epoch": 98.3344262295082, "grad_norm": 4.650204181671143, "learning_rate": 1.45461717230011e-08, "loss": 0.4899, "step": 29992 }, { "epoch": 98.33770491803278, "grad_norm": 5.483153820037842, "learning_rate": 1.4488973516657568e-08, "loss": 0.2563, "step": 29993 }, { "epoch": 98.34098360655737, "grad_norm": 4.905778408050537, "learning_rate": 1.4431887907332409e-08, "loss": 0.5383, "step": 29994 }, { "epoch": 98.34426229508196, "grad_norm": 6.126343727111816, "learning_rate": 1.4374914895671776e-08, "loss": 0.2448, "step": 29995 }, { "epoch": 98.34754098360656, "grad_norm": 3.693634271621704, "learning_rate": 1.4318054482315158e-08, "loss": 0.3586, "step": 29996 }, { "epoch": 98.35081967213115, "grad_norm": 4.809548854827881, "learning_rate": 1.4261306667905372e-08, "loss": 0.4331, "step": 29997 }, { "epoch": 98.35409836065574, "grad_norm": 4.638577461242676, "learning_rate": 1.4204671453081909e-08, "loss": 0.4446, "step": 29998 }, { "epoch": 98.35737704918033, "grad_norm": 3.997763156890869, "learning_rate": 1.4148148838483145e-08, "loss": 0.17, "step": 29999 }, { "epoch": 98.36065573770492, "grad_norm": 4.05896520614624, "learning_rate": 1.4091738824747458e-08, "loss": 0.4576, "step": 30000 }, { "epoch": 98.3639344262295, "grad_norm": 5.188887119293213, "learning_rate": 1.4035441412509899e-08, "loss": 0.4251, "step": 30001 }, { "epoch": 98.3672131147541, "grad_norm": 5.888725757598877, "learning_rate": 1.397925660240551e-08, "loss": 0.3437, "step": 30002 }, { "epoch": 98.37049180327868, "grad_norm": 4.951490879058838, "learning_rate": 1.3923184395067124e-08, "loss": 0.3009, "step": 30003 }, { "epoch": 98.37377049180328, "grad_norm": 5.081794261932373, "learning_rate": 1.3867224791128675e-08, "loss": 0.6857, "step": 30004 }, { "epoch": 98.37704918032787, "grad_norm": 4.8509745597839355, "learning_rate": 1.381137779121966e-08, "loss": 0.3142, "step": 30005 }, { "epoch": 98.38032786885246, "grad_norm": 4.368444919586182, "learning_rate": 1.3755643395970685e-08, "loss": 0.2611, "step": 30006 }, { "epoch": 98.38360655737705, "grad_norm": 8.813375473022461, "learning_rate": 1.370002160601014e-08, "loss": 0.4107, "step": 30007 }, { "epoch": 98.38688524590164, "grad_norm": 5.003798961639404, "learning_rate": 1.3644512421964184e-08, "loss": 0.4811, "step": 30008 }, { "epoch": 98.39016393442623, "grad_norm": 7.417763710021973, "learning_rate": 1.3589115844460098e-08, "loss": 0.308, "step": 30009 }, { "epoch": 98.39344262295081, "grad_norm": 4.131083965301514, "learning_rate": 1.3533831874121828e-08, "loss": 0.4645, "step": 30010 }, { "epoch": 98.3967213114754, "grad_norm": 5.536590099334717, "learning_rate": 1.3478660511573316e-08, "loss": 0.2905, "step": 30011 }, { "epoch": 98.4, "grad_norm": 8.636581420898438, "learning_rate": 1.3423601757436289e-08, "loss": 0.4622, "step": 30012 }, { "epoch": 98.4032786885246, "grad_norm": 3.9473698139190674, "learning_rate": 1.336865561233136e-08, "loss": 0.4572, "step": 30013 }, { "epoch": 98.40655737704918, "grad_norm": 6.045205116271973, "learning_rate": 1.3313822076878036e-08, "loss": 0.5538, "step": 30014 }, { "epoch": 98.40983606557377, "grad_norm": 6.632084369659424, "learning_rate": 1.325910115169471e-08, "loss": 0.4338, "step": 30015 }, { "epoch": 98.41311475409836, "grad_norm": 4.50796365737915, "learning_rate": 1.3204492837399774e-08, "loss": 0.5229, "step": 30016 }, { "epoch": 98.41639344262295, "grad_norm": 5.891873836517334, "learning_rate": 1.3149997134607185e-08, "loss": 0.2839, "step": 30017 }, { "epoch": 98.41967213114754, "grad_norm": 3.716468572616577, "learning_rate": 1.3095614043932004e-08, "loss": 0.5029, "step": 30018 }, { "epoch": 98.42295081967212, "grad_norm": 5.265543460845947, "learning_rate": 1.3041343565987074e-08, "loss": 0.582, "step": 30019 }, { "epoch": 98.42622950819673, "grad_norm": 3.305100917816162, "learning_rate": 1.2987185701385242e-08, "loss": 0.424, "step": 30020 }, { "epoch": 98.42950819672132, "grad_norm": 3.7805371284484863, "learning_rate": 1.2933140450737125e-08, "loss": 0.5284, "step": 30021 }, { "epoch": 98.4327868852459, "grad_norm": 5.882532596588135, "learning_rate": 1.287920781465224e-08, "loss": 0.3759, "step": 30022 }, { "epoch": 98.43606557377049, "grad_norm": 5.8005690574646, "learning_rate": 1.2825387793736766e-08, "loss": 0.1667, "step": 30023 }, { "epoch": 98.43934426229508, "grad_norm": 6.468861103057861, "learning_rate": 1.2771680388600216e-08, "loss": 0.3502, "step": 30024 }, { "epoch": 98.44262295081967, "grad_norm": 3.8814265727996826, "learning_rate": 1.2718085599847662e-08, "loss": 0.1477, "step": 30025 }, { "epoch": 98.44590163934426, "grad_norm": 4.968142509460449, "learning_rate": 1.2664603428080847e-08, "loss": 0.4564, "step": 30026 }, { "epoch": 98.44918032786886, "grad_norm": 6.205862998962402, "learning_rate": 1.2611233873907059e-08, "loss": 0.2912, "step": 30027 }, { "epoch": 98.45245901639345, "grad_norm": 4.285089015960693, "learning_rate": 1.2557976937924709e-08, "loss": 0.2651, "step": 30028 }, { "epoch": 98.45573770491804, "grad_norm": 4.60920524597168, "learning_rate": 1.2504832620735542e-08, "loss": 0.2598, "step": 30029 }, { "epoch": 98.45901639344262, "grad_norm": 5.115783214569092, "learning_rate": 1.2451800922939072e-08, "loss": 0.4192, "step": 30030 }, { "epoch": 98.46229508196721, "grad_norm": 4.188279151916504, "learning_rate": 1.2398881845132605e-08, "loss": 0.3988, "step": 30031 }, { "epoch": 98.4655737704918, "grad_norm": 4.7411651611328125, "learning_rate": 1.2346075387913436e-08, "loss": 0.3169, "step": 30032 }, { "epoch": 98.46885245901639, "grad_norm": 5.949612617492676, "learning_rate": 1.2293381551876649e-08, "loss": 0.4394, "step": 30033 }, { "epoch": 98.47213114754098, "grad_norm": 3.670905351638794, "learning_rate": 1.224080033761732e-08, "loss": 0.2593, "step": 30034 }, { "epoch": 98.47540983606558, "grad_norm": 4.24107551574707, "learning_rate": 1.21883317457272e-08, "loss": 0.1151, "step": 30035 }, { "epoch": 98.47868852459017, "grad_norm": 4.243551731109619, "learning_rate": 1.2135975776798036e-08, "loss": 0.3928, "step": 30036 }, { "epoch": 98.48196721311476, "grad_norm": 6.261160373687744, "learning_rate": 1.208373243142047e-08, "loss": 0.615, "step": 30037 }, { "epoch": 98.48524590163935, "grad_norm": 4.160933494567871, "learning_rate": 1.2031601710184026e-08, "loss": 0.4245, "step": 30038 }, { "epoch": 98.48852459016393, "grad_norm": 6.1088972091674805, "learning_rate": 1.1979583613676015e-08, "loss": 0.2599, "step": 30039 }, { "epoch": 98.49180327868852, "grad_norm": 4.807248115539551, "learning_rate": 1.1927678142483746e-08, "loss": 0.4797, "step": 30040 }, { "epoch": 98.49508196721311, "grad_norm": 4.9626054763793945, "learning_rate": 1.1875885297191192e-08, "loss": 0.1998, "step": 30041 }, { "epoch": 98.4983606557377, "grad_norm": 5.979335784912109, "learning_rate": 1.1824205078383444e-08, "loss": 0.2882, "step": 30042 }, { "epoch": 98.5016393442623, "grad_norm": 4.5896759033203125, "learning_rate": 1.1772637486642258e-08, "loss": 0.3434, "step": 30043 }, { "epoch": 98.50491803278689, "grad_norm": 3.872969627380371, "learning_rate": 1.172118252254939e-08, "loss": 0.1813, "step": 30044 }, { "epoch": 98.50819672131148, "grad_norm": 4.740760803222656, "learning_rate": 1.1669840186686599e-08, "loss": 0.4397, "step": 30045 }, { "epoch": 98.51147540983607, "grad_norm": 12.581608772277832, "learning_rate": 1.1618610479631198e-08, "loss": 0.3356, "step": 30046 }, { "epoch": 98.51475409836065, "grad_norm": 5.942957401275635, "learning_rate": 1.1567493401961616e-08, "loss": 0.3721, "step": 30047 }, { "epoch": 98.51803278688524, "grad_norm": 4.012843132019043, "learning_rate": 1.1516488954252947e-08, "loss": 0.6592, "step": 30048 }, { "epoch": 98.52131147540983, "grad_norm": 6.264941692352295, "learning_rate": 1.1465597137082507e-08, "loss": 0.2974, "step": 30049 }, { "epoch": 98.52459016393442, "grad_norm": 4.0737223625183105, "learning_rate": 1.1414817951022062e-08, "loss": 0.2859, "step": 30050 }, { "epoch": 98.52786885245902, "grad_norm": 8.431551933288574, "learning_rate": 1.1364151396645596e-08, "loss": 0.3526, "step": 30051 }, { "epoch": 98.53114754098361, "grad_norm": 5.154022216796875, "learning_rate": 1.1313597474523764e-08, "loss": 0.246, "step": 30052 }, { "epoch": 98.5344262295082, "grad_norm": 5.559288501739502, "learning_rate": 1.1263156185226109e-08, "loss": 0.3244, "step": 30053 }, { "epoch": 98.53770491803279, "grad_norm": 4.830925464630127, "learning_rate": 1.1212827529322178e-08, "loss": 0.2491, "step": 30054 }, { "epoch": 98.54098360655738, "grad_norm": 4.7539448738098145, "learning_rate": 1.1162611507380406e-08, "loss": 0.4708, "step": 30055 }, { "epoch": 98.54426229508196, "grad_norm": 9.093680381774902, "learning_rate": 1.1112508119964782e-08, "loss": 0.4514, "step": 30056 }, { "epoch": 98.54754098360655, "grad_norm": 4.466962814331055, "learning_rate": 1.1062517367642633e-08, "loss": 0.347, "step": 30057 }, { "epoch": 98.55081967213114, "grad_norm": 4.487472057342529, "learning_rate": 1.1012639250975731e-08, "loss": 0.2582, "step": 30058 }, { "epoch": 98.55409836065574, "grad_norm": 3.8683369159698486, "learning_rate": 1.096287377052696e-08, "loss": 0.3339, "step": 30059 }, { "epoch": 98.55737704918033, "grad_norm": 4.006760120391846, "learning_rate": 1.0913220926858092e-08, "loss": 0.2399, "step": 30060 }, { "epoch": 98.56065573770492, "grad_norm": 4.46733283996582, "learning_rate": 1.086368072052868e-08, "loss": 0.4477, "step": 30061 }, { "epoch": 98.56393442622951, "grad_norm": 4.497267246246338, "learning_rate": 1.0814253152098275e-08, "loss": 0.5843, "step": 30062 }, { "epoch": 98.5672131147541, "grad_norm": 5.36850118637085, "learning_rate": 1.076493822212199e-08, "loss": 0.2943, "step": 30063 }, { "epoch": 98.57049180327868, "grad_norm": 11.625216484069824, "learning_rate": 1.0715735931158266e-08, "loss": 0.3998, "step": 30064 }, { "epoch": 98.57377049180327, "grad_norm": 6.450196266174316, "learning_rate": 1.0666646279759996e-08, "loss": 0.5256, "step": 30065 }, { "epoch": 98.57704918032788, "grad_norm": 5.64532470703125, "learning_rate": 1.061766926848229e-08, "loss": 0.3421, "step": 30066 }, { "epoch": 98.58032786885246, "grad_norm": 5.244251728057861, "learning_rate": 1.0568804897875818e-08, "loss": 0.5666, "step": 30067 }, { "epoch": 98.58360655737705, "grad_norm": 3.8776259422302246, "learning_rate": 1.0520053168493471e-08, "loss": 0.3831, "step": 30068 }, { "epoch": 98.58688524590164, "grad_norm": 4.141992568969727, "learning_rate": 1.0471414080883702e-08, "loss": 0.2446, "step": 30069 }, { "epoch": 98.59016393442623, "grad_norm": 5.276762962341309, "learning_rate": 1.0422887635594959e-08, "loss": 0.6259, "step": 30070 }, { "epoch": 98.59344262295082, "grad_norm": 5.286577224731445, "learning_rate": 1.0374473833174581e-08, "loss": 0.3033, "step": 30071 }, { "epoch": 98.5967213114754, "grad_norm": 3.72823429107666, "learning_rate": 1.03261726741688e-08, "loss": 0.2308, "step": 30072 }, { "epoch": 98.6, "grad_norm": 4.298457622528076, "learning_rate": 1.0277984159122734e-08, "loss": 0.3979, "step": 30073 }, { "epoch": 98.6032786885246, "grad_norm": 4.203033924102783, "learning_rate": 1.0229908288578171e-08, "loss": 0.4612, "step": 30074 }, { "epoch": 98.60655737704919, "grad_norm": 6.946444511413574, "learning_rate": 1.0181945063079125e-08, "loss": 0.3325, "step": 30075 }, { "epoch": 98.60983606557377, "grad_norm": 4.302467346191406, "learning_rate": 1.0134094483164048e-08, "loss": 0.3478, "step": 30076 }, { "epoch": 98.61311475409836, "grad_norm": 4.276561260223389, "learning_rate": 1.0086356549374731e-08, "loss": 0.2104, "step": 30077 }, { "epoch": 98.61639344262295, "grad_norm": 5.389086723327637, "learning_rate": 1.0038731262248524e-08, "loss": 0.5676, "step": 30078 }, { "epoch": 98.61967213114754, "grad_norm": 10.404082298278809, "learning_rate": 9.991218622322774e-09, "loss": 0.5979, "step": 30079 }, { "epoch": 98.62295081967213, "grad_norm": 4.828352451324463, "learning_rate": 9.943818630133716e-09, "loss": 0.5126, "step": 30080 }, { "epoch": 98.62622950819672, "grad_norm": 4.607102870941162, "learning_rate": 9.896531286214261e-09, "loss": 0.377, "step": 30081 }, { "epoch": 98.62950819672132, "grad_norm": 4.804977893829346, "learning_rate": 9.849356591098424e-09, "loss": 0.4358, "step": 30082 }, { "epoch": 98.6327868852459, "grad_norm": 4.957468509674072, "learning_rate": 9.802294545318003e-09, "loss": 0.4077, "step": 30083 }, { "epoch": 98.6360655737705, "grad_norm": 5.445928573608398, "learning_rate": 9.755345149404794e-09, "loss": 0.6236, "step": 30084 }, { "epoch": 98.63934426229508, "grad_norm": 4.521383762359619, "learning_rate": 9.708508403887262e-09, "loss": 0.3093, "step": 30085 }, { "epoch": 98.64262295081967, "grad_norm": 8.650774002075195, "learning_rate": 9.661784309292765e-09, "loss": 0.4782, "step": 30086 }, { "epoch": 98.64590163934426, "grad_norm": 5.180764198303223, "learning_rate": 9.615172866149768e-09, "loss": 0.2782, "step": 30087 }, { "epoch": 98.64918032786885, "grad_norm": 3.7257513999938965, "learning_rate": 9.568674074982298e-09, "loss": 0.3759, "step": 30088 }, { "epoch": 98.65245901639344, "grad_norm": 5.540517807006836, "learning_rate": 9.522287936316599e-09, "loss": 0.5048, "step": 30089 }, { "epoch": 98.65573770491804, "grad_norm": 5.049629211425781, "learning_rate": 9.476014450673365e-09, "loss": 0.2238, "step": 30090 }, { "epoch": 98.65901639344263, "grad_norm": 5.739275932312012, "learning_rate": 9.429853618576622e-09, "loss": 0.3236, "step": 30091 }, { "epoch": 98.66229508196722, "grad_norm": 4.716647148132324, "learning_rate": 9.383805440545957e-09, "loss": 0.289, "step": 30092 }, { "epoch": 98.6655737704918, "grad_norm": 4.89293909072876, "learning_rate": 9.33786991709984e-09, "loss": 0.4258, "step": 30093 }, { "epoch": 98.66885245901639, "grad_norm": 6.133468151092529, "learning_rate": 9.292047048756747e-09, "loss": 0.3835, "step": 30094 }, { "epoch": 98.67213114754098, "grad_norm": 5.137686729431152, "learning_rate": 9.246336836034043e-09, "loss": 0.4801, "step": 30095 }, { "epoch": 98.67540983606557, "grad_norm": 4.161472797393799, "learning_rate": 9.200739279446868e-09, "loss": 0.3758, "step": 30096 }, { "epoch": 98.67868852459016, "grad_norm": 5.429937362670898, "learning_rate": 9.155254379508149e-09, "loss": 0.4924, "step": 30097 }, { "epoch": 98.68196721311476, "grad_norm": 5.437263488769531, "learning_rate": 9.109882136733029e-09, "loss": 0.4306, "step": 30098 }, { "epoch": 98.68524590163935, "grad_norm": 6.455376625061035, "learning_rate": 9.064622551631098e-09, "loss": 0.2939, "step": 30099 }, { "epoch": 98.68852459016394, "grad_norm": 4.954666614532471, "learning_rate": 9.019475624714169e-09, "loss": 0.4021, "step": 30100 }, { "epoch": 98.69180327868852, "grad_norm": 4.660338878631592, "learning_rate": 8.974441356489616e-09, "loss": 0.3775, "step": 30101 }, { "epoch": 98.69508196721311, "grad_norm": 4.271225452423096, "learning_rate": 8.92951974746703e-09, "loss": 0.143, "step": 30102 }, { "epoch": 98.6983606557377, "grad_norm": 5.118976593017578, "learning_rate": 8.884710798152674e-09, "loss": 0.3215, "step": 30103 }, { "epoch": 98.70163934426229, "grad_norm": 5.2368550300598145, "learning_rate": 8.840014509050588e-09, "loss": 0.1338, "step": 30104 }, { "epoch": 98.70491803278688, "grad_norm": 4.606515407562256, "learning_rate": 8.795430880665922e-09, "loss": 0.4627, "step": 30105 }, { "epoch": 98.70819672131148, "grad_norm": 4.6523895263671875, "learning_rate": 8.750959913500501e-09, "loss": 0.4427, "step": 30106 }, { "epoch": 98.71147540983607, "grad_norm": 5.87211275100708, "learning_rate": 8.706601608057252e-09, "loss": 0.177, "step": 30107 }, { "epoch": 98.71475409836066, "grad_norm": 6.903456687927246, "learning_rate": 8.662355964834667e-09, "loss": 0.5232, "step": 30108 }, { "epoch": 98.71803278688525, "grad_norm": 4.834142208099365, "learning_rate": 8.618222984332347e-09, "loss": 0.4824, "step": 30109 }, { "epoch": 98.72131147540983, "grad_norm": 13.481209754943848, "learning_rate": 8.574202667048782e-09, "loss": 0.5442, "step": 30110 }, { "epoch": 98.72459016393442, "grad_norm": 4.520722389221191, "learning_rate": 8.530295013479129e-09, "loss": 0.3395, "step": 30111 }, { "epoch": 98.72786885245901, "grad_norm": 5.12448787689209, "learning_rate": 8.48650002411855e-09, "loss": 0.2446, "step": 30112 }, { "epoch": 98.73114754098361, "grad_norm": 4.275254726409912, "learning_rate": 8.442817699462202e-09, "loss": 0.2311, "step": 30113 }, { "epoch": 98.7344262295082, "grad_norm": 4.375673294067383, "learning_rate": 8.399248040000808e-09, "loss": 0.1917, "step": 30114 }, { "epoch": 98.73770491803279, "grad_norm": 4.011582374572754, "learning_rate": 8.355791046226191e-09, "loss": 0.3499, "step": 30115 }, { "epoch": 98.74098360655738, "grad_norm": 7.474967002868652, "learning_rate": 8.312446718630186e-09, "loss": 0.4637, "step": 30116 }, { "epoch": 98.74426229508197, "grad_norm": 5.284393310546875, "learning_rate": 8.269215057699066e-09, "loss": 0.3006, "step": 30117 }, { "epoch": 98.74754098360656, "grad_norm": 4.624584197998047, "learning_rate": 8.22609606392133e-09, "loss": 0.6256, "step": 30118 }, { "epoch": 98.75081967213114, "grad_norm": 3.826265811920166, "learning_rate": 8.183089737783256e-09, "loss": 0.3364, "step": 30119 }, { "epoch": 98.75409836065573, "grad_norm": 4.212737083435059, "learning_rate": 8.140196079770013e-09, "loss": 0.1334, "step": 30120 }, { "epoch": 98.75737704918033, "grad_norm": 4.842738151550293, "learning_rate": 8.097415090364547e-09, "loss": 0.3363, "step": 30121 }, { "epoch": 98.76065573770492, "grad_norm": 4.34567403793335, "learning_rate": 8.054746770049804e-09, "loss": 0.3562, "step": 30122 }, { "epoch": 98.76393442622951, "grad_norm": 4.646992206573486, "learning_rate": 8.012191119307622e-09, "loss": 0.3531, "step": 30123 }, { "epoch": 98.7672131147541, "grad_norm": 4.251141548156738, "learning_rate": 7.969748138616507e-09, "loss": 0.2683, "step": 30124 }, { "epoch": 98.77049180327869, "grad_norm": 4.33439302444458, "learning_rate": 7.927417828454965e-09, "loss": 0.2516, "step": 30125 }, { "epoch": 98.77377049180328, "grad_norm": 3.8226871490478516, "learning_rate": 7.8852001893015e-09, "loss": 0.4779, "step": 30126 }, { "epoch": 98.77704918032786, "grad_norm": 3.6773433685302734, "learning_rate": 7.843095221631291e-09, "loss": 0.2932, "step": 30127 }, { "epoch": 98.78032786885245, "grad_norm": 4.533839702606201, "learning_rate": 7.801102925920622e-09, "loss": 0.2897, "step": 30128 }, { "epoch": 98.78360655737706, "grad_norm": 5.557707786560059, "learning_rate": 7.759223302640228e-09, "loss": 0.3372, "step": 30129 }, { "epoch": 98.78688524590164, "grad_norm": 4.730175018310547, "learning_rate": 7.717456352264175e-09, "loss": 0.4411, "step": 30130 }, { "epoch": 98.79016393442623, "grad_norm": 4.71404504776001, "learning_rate": 7.675802075264305e-09, "loss": 0.4554, "step": 30131 }, { "epoch": 98.79344262295082, "grad_norm": 8.283388137817383, "learning_rate": 7.634260472108023e-09, "loss": 0.2153, "step": 30132 }, { "epoch": 98.79672131147541, "grad_norm": 7.124741077423096, "learning_rate": 7.592831543266066e-09, "loss": 0.3736, "step": 30133 }, { "epoch": 98.8, "grad_norm": 6.878481864929199, "learning_rate": 7.551515289203615e-09, "loss": 0.2563, "step": 30134 }, { "epoch": 98.80327868852459, "grad_norm": 6.020030975341797, "learning_rate": 7.510311710386964e-09, "loss": 0.4094, "step": 30135 }, { "epoch": 98.80655737704917, "grad_norm": 6.372445106506348, "learning_rate": 7.469220807281297e-09, "loss": 0.3924, "step": 30136 }, { "epoch": 98.80983606557378, "grad_norm": 5.078782558441162, "learning_rate": 7.428242580350686e-09, "loss": 0.4071, "step": 30137 }, { "epoch": 98.81311475409836, "grad_norm": 4.432847499847412, "learning_rate": 7.387377030055875e-09, "loss": 0.3815, "step": 30138 }, { "epoch": 98.81639344262295, "grad_norm": 4.902745246887207, "learning_rate": 7.3466241568576065e-09, "loss": 0.3096, "step": 30139 }, { "epoch": 98.81967213114754, "grad_norm": 5.361023902893066, "learning_rate": 7.305983961216623e-09, "loss": 0.3508, "step": 30140 }, { "epoch": 98.82295081967213, "grad_norm": 4.763502597808838, "learning_rate": 7.265456443590335e-09, "loss": 0.2956, "step": 30141 }, { "epoch": 98.82622950819672, "grad_norm": 4.403288841247559, "learning_rate": 7.225041604435046e-09, "loss": 0.2886, "step": 30142 }, { "epoch": 98.8295081967213, "grad_norm": 4.866436004638672, "learning_rate": 7.1847394442081665e-09, "loss": 0.2533, "step": 30143 }, { "epoch": 98.8327868852459, "grad_norm": 5.449641704559326, "learning_rate": 7.144549963362668e-09, "loss": 0.2385, "step": 30144 }, { "epoch": 98.8360655737705, "grad_norm": 6.449153423309326, "learning_rate": 7.104473162352632e-09, "loss": 0.4169, "step": 30145 }, { "epoch": 98.83934426229509, "grad_norm": 4.663259029388428, "learning_rate": 7.064509041629919e-09, "loss": 0.3539, "step": 30146 }, { "epoch": 98.84262295081967, "grad_norm": 4.412985324859619, "learning_rate": 7.02465760164417e-09, "loss": 0.4623, "step": 30147 }, { "epoch": 98.84590163934426, "grad_norm": 7.344740390777588, "learning_rate": 6.984918842846133e-09, "loss": 0.1567, "step": 30148 }, { "epoch": 98.84918032786885, "grad_norm": 6.002574920654297, "learning_rate": 6.945292765683231e-09, "loss": 0.408, "step": 30149 }, { "epoch": 98.85245901639344, "grad_norm": 6.547835350036621, "learning_rate": 6.905779370601773e-09, "loss": 0.37, "step": 30150 }, { "epoch": 98.85573770491803, "grad_norm": 4.115286350250244, "learning_rate": 6.866378658049178e-09, "loss": 0.2428, "step": 30151 }, { "epoch": 98.85901639344263, "grad_norm": 4.870417594909668, "learning_rate": 6.8270906284673145e-09, "loss": 0.3879, "step": 30152 }, { "epoch": 98.86229508196722, "grad_norm": 6.1422438621521, "learning_rate": 6.7879152823002726e-09, "loss": 0.6287, "step": 30153 }, { "epoch": 98.8655737704918, "grad_norm": 5.127813816070557, "learning_rate": 6.74885261998992e-09, "loss": 0.4134, "step": 30154 }, { "epoch": 98.8688524590164, "grad_norm": 5.416986465454102, "learning_rate": 6.709902641977018e-09, "loss": 0.453, "step": 30155 }, { "epoch": 98.87213114754098, "grad_norm": 7.331116199493408, "learning_rate": 6.6710653487001005e-09, "loss": 0.362, "step": 30156 }, { "epoch": 98.87540983606557, "grad_norm": 5.407893657684326, "learning_rate": 6.632340740597709e-09, "loss": 0.4172, "step": 30157 }, { "epoch": 98.87868852459016, "grad_norm": 5.095007419586182, "learning_rate": 6.5937288181061595e-09, "loss": 0.1935, "step": 30158 }, { "epoch": 98.88196721311475, "grad_norm": 4.470791339874268, "learning_rate": 6.555229581660661e-09, "loss": 0.2614, "step": 30159 }, { "epoch": 98.88524590163935, "grad_norm": 5.316206932067871, "learning_rate": 6.516843031695308e-09, "loss": 0.3028, "step": 30160 }, { "epoch": 98.88852459016394, "grad_norm": 5.160395622253418, "learning_rate": 6.47856916864309e-09, "loss": 0.3553, "step": 30161 }, { "epoch": 98.89180327868853, "grad_norm": 6.875415802001953, "learning_rate": 6.440407992935882e-09, "loss": 0.3783, "step": 30162 }, { "epoch": 98.89508196721312, "grad_norm": 7.35301399230957, "learning_rate": 6.4023595050044514e-09, "loss": 0.5591, "step": 30163 }, { "epoch": 98.8983606557377, "grad_norm": 4.907260894775391, "learning_rate": 6.3644237052762346e-09, "loss": 0.4351, "step": 30164 }, { "epoch": 98.90163934426229, "grad_norm": 3.5864226818084717, "learning_rate": 6.326600594179777e-09, "loss": 0.3014, "step": 30165 }, { "epoch": 98.90491803278688, "grad_norm": 4.619290828704834, "learning_rate": 6.288890172142515e-09, "loss": 0.2607, "step": 30166 }, { "epoch": 98.90819672131147, "grad_norm": 4.300400733947754, "learning_rate": 6.251292439588552e-09, "loss": 0.2031, "step": 30167 }, { "epoch": 98.91147540983607, "grad_norm": 11.385040283203125, "learning_rate": 6.213807396941995e-09, "loss": 0.4366, "step": 30168 }, { "epoch": 98.91475409836066, "grad_norm": 13.869503021240234, "learning_rate": 6.176435044625839e-09, "loss": 0.452, "step": 30169 }, { "epoch": 98.91803278688525, "grad_norm": 3.954251289367676, "learning_rate": 6.139175383060858e-09, "loss": 0.2197, "step": 30170 }, { "epoch": 98.92131147540984, "grad_norm": 4.156396865844727, "learning_rate": 6.102028412667827e-09, "loss": 0.3864, "step": 30171 }, { "epoch": 98.92459016393443, "grad_norm": 5.457239151000977, "learning_rate": 6.064994133866409e-09, "loss": 0.2377, "step": 30172 }, { "epoch": 98.92786885245901, "grad_norm": 4.414928436279297, "learning_rate": 6.028072547071828e-09, "loss": 0.1891, "step": 30173 }, { "epoch": 98.9311475409836, "grad_norm": 5.045699119567871, "learning_rate": 5.991263652703749e-09, "loss": 0.6498, "step": 30174 }, { "epoch": 98.93442622950819, "grad_norm": 7.933241367340088, "learning_rate": 5.954567451174065e-09, "loss": 0.175, "step": 30175 }, { "epoch": 98.9377049180328, "grad_norm": 7.736171722412109, "learning_rate": 5.917983942897998e-09, "loss": 0.2661, "step": 30176 }, { "epoch": 98.94098360655738, "grad_norm": 4.00980281829834, "learning_rate": 5.881513128287442e-09, "loss": 0.3385, "step": 30177 }, { "epoch": 98.94426229508197, "grad_norm": 3.706878423690796, "learning_rate": 5.845155007754288e-09, "loss": 0.4246, "step": 30178 }, { "epoch": 98.94754098360656, "grad_norm": 4.838839530944824, "learning_rate": 5.808909581709321e-09, "loss": 0.413, "step": 30179 }, { "epoch": 98.95081967213115, "grad_norm": 5.014048099517822, "learning_rate": 5.772776850558881e-09, "loss": 0.4685, "step": 30180 }, { "epoch": 98.95409836065573, "grad_norm": 4.8485918045043945, "learning_rate": 5.73675681471264e-09, "loss": 0.3196, "step": 30181 }, { "epoch": 98.95737704918032, "grad_norm": 4.363319396972656, "learning_rate": 5.700849474575831e-09, "loss": 0.2042, "step": 30182 }, { "epoch": 98.96065573770491, "grad_norm": 4.15648889541626, "learning_rate": 5.665054830553684e-09, "loss": 0.2742, "step": 30183 }, { "epoch": 98.96393442622951, "grad_norm": 4.3182373046875, "learning_rate": 5.6293728830492115e-09, "loss": 0.3248, "step": 30184 }, { "epoch": 98.9672131147541, "grad_norm": 5.8603949546813965, "learning_rate": 5.593803632464312e-09, "loss": 0.2736, "step": 30185 }, { "epoch": 98.97049180327869, "grad_norm": 5.549642086029053, "learning_rate": 5.5583470792019975e-09, "loss": 0.3077, "step": 30186 }, { "epoch": 98.97377049180328, "grad_norm": 4.639422416687012, "learning_rate": 5.523003223659729e-09, "loss": 0.196, "step": 30187 }, { "epoch": 98.97704918032787, "grad_norm": 5.620364189147949, "learning_rate": 5.487772066238295e-09, "loss": 0.2342, "step": 30188 }, { "epoch": 98.98032786885246, "grad_norm": 9.191078186035156, "learning_rate": 5.452653607334046e-09, "loss": 0.4416, "step": 30189 }, { "epoch": 98.98360655737704, "grad_norm": 4.9377641677856445, "learning_rate": 5.417647847342222e-09, "loss": 0.4177, "step": 30190 }, { "epoch": 98.98688524590163, "grad_norm": 3.811338186264038, "learning_rate": 5.382754786658062e-09, "loss": 0.3093, "step": 30191 }, { "epoch": 98.99016393442623, "grad_norm": 5.776835918426514, "learning_rate": 5.347974425675695e-09, "loss": 0.1927, "step": 30192 }, { "epoch": 98.99344262295082, "grad_norm": 4.411137580871582, "learning_rate": 5.313306764787029e-09, "loss": 0.3174, "step": 30193 }, { "epoch": 98.99672131147541, "grad_norm": 7.423463344573975, "learning_rate": 5.278751804381754e-09, "loss": 0.3435, "step": 30194 }, { "epoch": 99.0, "grad_norm": 4.43885612487793, "learning_rate": 5.2443095448506674e-09, "loss": 0.3642, "step": 30195 }, { "epoch": 99.00327868852459, "grad_norm": 4.661487579345703, "learning_rate": 5.209979986582347e-09, "loss": 0.2943, "step": 30196 }, { "epoch": 99.00655737704918, "grad_norm": 4.693761348724365, "learning_rate": 5.175763129963152e-09, "loss": 0.3344, "step": 30197 }, { "epoch": 99.00983606557377, "grad_norm": 4.5453314781188965, "learning_rate": 5.1416589753794376e-09, "loss": 0.3635, "step": 30198 }, { "epoch": 99.01311475409837, "grad_norm": 9.182716369628906, "learning_rate": 5.1076675232153426e-09, "loss": 0.4082, "step": 30199 }, { "epoch": 99.01639344262296, "grad_norm": 5.256496906280518, "learning_rate": 5.073788773855004e-09, "loss": 0.198, "step": 30200 }, { "epoch": 99.01967213114754, "grad_norm": 5.127089023590088, "learning_rate": 5.040022727679228e-09, "loss": 0.4759, "step": 30201 }, { "epoch": 99.02295081967213, "grad_norm": 10.232494354248047, "learning_rate": 5.0063693850699315e-09, "loss": 0.5653, "step": 30202 }, { "epoch": 99.02622950819672, "grad_norm": 4.771909236907959, "learning_rate": 4.9728287464057e-09, "loss": 0.452, "step": 30203 }, { "epoch": 99.02950819672131, "grad_norm": 4.020485877990723, "learning_rate": 4.93940081206401e-09, "loss": 0.4165, "step": 30204 }, { "epoch": 99.0327868852459, "grad_norm": 11.554315567016602, "learning_rate": 4.906085582424558e-09, "loss": 0.5797, "step": 30205 }, { "epoch": 99.03606557377049, "grad_norm": 4.728775978088379, "learning_rate": 4.872883057860378e-09, "loss": 0.3017, "step": 30206 }, { "epoch": 99.03934426229509, "grad_norm": 5.262741565704346, "learning_rate": 4.8397932387467265e-09, "loss": 0.371, "step": 30207 }, { "epoch": 99.04262295081968, "grad_norm": 5.840124607086182, "learning_rate": 4.806816125456637e-09, "loss": 0.2576, "step": 30208 }, { "epoch": 99.04590163934427, "grad_norm": 4.354731559753418, "learning_rate": 4.773951718362035e-09, "loss": 0.416, "step": 30209 }, { "epoch": 99.04918032786885, "grad_norm": 4.6860175132751465, "learning_rate": 4.741200017833736e-09, "loss": 0.3952, "step": 30210 }, { "epoch": 99.05245901639344, "grad_norm": 4.394062519073486, "learning_rate": 4.708561024241443e-09, "loss": 0.2677, "step": 30211 }, { "epoch": 99.05573770491803, "grad_norm": 5.086997032165527, "learning_rate": 4.676034737951529e-09, "loss": 0.4668, "step": 30212 }, { "epoch": 99.05901639344262, "grad_norm": 5.487668514251709, "learning_rate": 4.64362115933259e-09, "loss": 0.3081, "step": 30213 }, { "epoch": 99.0622950819672, "grad_norm": 7.520092964172363, "learning_rate": 4.611320288749887e-09, "loss": 0.3428, "step": 30214 }, { "epoch": 99.06557377049181, "grad_norm": 5.865393161773682, "learning_rate": 4.579132126566465e-09, "loss": 0.4494, "step": 30215 }, { "epoch": 99.0688524590164, "grad_norm": 5.556144714355469, "learning_rate": 4.547056673145367e-09, "loss": 0.3936, "step": 30216 }, { "epoch": 99.07213114754099, "grad_norm": 4.8424391746521, "learning_rate": 4.515093928849634e-09, "loss": 0.3364, "step": 30217 }, { "epoch": 99.07540983606557, "grad_norm": 7.702737808227539, "learning_rate": 4.48324389403898e-09, "loss": 0.2281, "step": 30218 }, { "epoch": 99.07868852459016, "grad_norm": 5.488790035247803, "learning_rate": 4.451506569073116e-09, "loss": 0.4345, "step": 30219 }, { "epoch": 99.08196721311475, "grad_norm": 4.811376571655273, "learning_rate": 4.4198819543084244e-09, "loss": 0.343, "step": 30220 }, { "epoch": 99.08524590163934, "grad_norm": 4.310902118682861, "learning_rate": 4.388370050102397e-09, "loss": 0.4028, "step": 30221 }, { "epoch": 99.08852459016393, "grad_norm": 3.882418394088745, "learning_rate": 4.356970856810305e-09, "loss": 0.199, "step": 30222 }, { "epoch": 99.09180327868853, "grad_norm": 4.886033535003662, "learning_rate": 4.3256843747863095e-09, "loss": 0.2765, "step": 30223 }, { "epoch": 99.09508196721312, "grad_norm": 4.4194793701171875, "learning_rate": 4.294510604382352e-09, "loss": 0.2723, "step": 30224 }, { "epoch": 99.09836065573771, "grad_norm": 5.928380966186523, "learning_rate": 4.263449545951481e-09, "loss": 0.473, "step": 30225 }, { "epoch": 99.1016393442623, "grad_norm": 4.340155124664307, "learning_rate": 4.232501199843419e-09, "loss": 0.3057, "step": 30226 }, { "epoch": 99.10491803278688, "grad_norm": 4.652383804321289, "learning_rate": 4.201665566406776e-09, "loss": 0.5222, "step": 30227 }, { "epoch": 99.10819672131147, "grad_norm": 10.692310333251953, "learning_rate": 4.170942645989051e-09, "loss": 0.4184, "step": 30228 }, { "epoch": 99.11147540983606, "grad_norm": 3.925262451171875, "learning_rate": 4.140332438937744e-09, "loss": 0.2299, "step": 30229 }, { "epoch": 99.11475409836065, "grad_norm": 4.372013092041016, "learning_rate": 4.109834945595914e-09, "loss": 0.1662, "step": 30230 }, { "epoch": 99.11803278688525, "grad_norm": 5.1208624839782715, "learning_rate": 4.07945016630995e-09, "loss": 0.2353, "step": 30231 }, { "epoch": 99.12131147540984, "grad_norm": 5.75847053527832, "learning_rate": 4.049178101421802e-09, "loss": 0.2752, "step": 30232 }, { "epoch": 99.12459016393443, "grad_norm": 4.275235652923584, "learning_rate": 4.0190187512711976e-09, "loss": 0.4218, "step": 30233 }, { "epoch": 99.12786885245902, "grad_norm": 8.05243968963623, "learning_rate": 3.9889721161989745e-09, "loss": 0.381, "step": 30234 }, { "epoch": 99.1311475409836, "grad_norm": 4.657601356506348, "learning_rate": 3.959038196545972e-09, "loss": 0.1588, "step": 30235 }, { "epoch": 99.1344262295082, "grad_norm": 4.517657279968262, "learning_rate": 3.929216992647477e-09, "loss": 0.3276, "step": 30236 }, { "epoch": 99.13770491803278, "grad_norm": 5.347707748413086, "learning_rate": 3.899508504839888e-09, "loss": 0.3695, "step": 30237 }, { "epoch": 99.14098360655737, "grad_norm": 7.113286972045898, "learning_rate": 3.869912733458492e-09, "loss": 0.2232, "step": 30238 }, { "epoch": 99.14426229508197, "grad_norm": 11.497496604919434, "learning_rate": 3.8404296788374654e-09, "loss": 0.5066, "step": 30239 }, { "epoch": 99.14754098360656, "grad_norm": 3.8792550563812256, "learning_rate": 3.8110593413098755e-09, "loss": 0.4211, "step": 30240 }, { "epoch": 99.15081967213115, "grad_norm": 4.510950565338135, "learning_rate": 3.781801721204348e-09, "loss": 0.4845, "step": 30241 }, { "epoch": 99.15409836065574, "grad_norm": 4.949260711669922, "learning_rate": 3.75265681885395e-09, "loss": 0.3997, "step": 30242 }, { "epoch": 99.15737704918033, "grad_norm": 10.621406555175781, "learning_rate": 3.723624634585088e-09, "loss": 0.4484, "step": 30243 }, { "epoch": 99.16065573770491, "grad_norm": 4.784666538238525, "learning_rate": 3.694705168726387e-09, "loss": 0.3167, "step": 30244 }, { "epoch": 99.1639344262295, "grad_norm": 3.7754430770874023, "learning_rate": 3.6658984216031425e-09, "loss": 0.2452, "step": 30245 }, { "epoch": 99.1672131147541, "grad_norm": 4.876158237457275, "learning_rate": 3.63720439354065e-09, "loss": 0.2422, "step": 30246 }, { "epoch": 99.1704918032787, "grad_norm": 3.337104320526123, "learning_rate": 3.608623084861984e-09, "loss": 0.3313, "step": 30247 }, { "epoch": 99.17377049180328, "grad_norm": 5.078723907470703, "learning_rate": 3.5801544958891097e-09, "loss": 0.2054, "step": 30248 }, { "epoch": 99.17704918032787, "grad_norm": 4.1255998611450195, "learning_rate": 3.551798626945102e-09, "loss": 0.4046, "step": 30249 }, { "epoch": 99.18032786885246, "grad_norm": 7.636796474456787, "learning_rate": 3.5235554783474845e-09, "loss": 0.1743, "step": 30250 }, { "epoch": 99.18360655737705, "grad_norm": 4.5431227684021, "learning_rate": 3.4954250504148913e-09, "loss": 0.318, "step": 30251 }, { "epoch": 99.18688524590164, "grad_norm": 5.631622791290283, "learning_rate": 3.467407343465956e-09, "loss": 0.3964, "step": 30252 }, { "epoch": 99.19016393442622, "grad_norm": 4.735173225402832, "learning_rate": 3.4395023578159823e-09, "loss": 0.3614, "step": 30253 }, { "epoch": 99.19344262295083, "grad_norm": 6.336160182952881, "learning_rate": 3.4117100937791634e-09, "loss": 0.4009, "step": 30254 }, { "epoch": 99.19672131147541, "grad_norm": 4.180320739746094, "learning_rate": 3.3840305516696927e-09, "loss": 0.2405, "step": 30255 }, { "epoch": 99.2, "grad_norm": 4.3010711669921875, "learning_rate": 3.3564637317984318e-09, "loss": 0.4281, "step": 30256 }, { "epoch": 99.20327868852459, "grad_norm": 4.739950180053711, "learning_rate": 3.3290096344773538e-09, "loss": 0.5298, "step": 30257 }, { "epoch": 99.20655737704918, "grad_norm": 4.209957599639893, "learning_rate": 3.3016682600151005e-09, "loss": 0.2515, "step": 30258 }, { "epoch": 99.20983606557377, "grad_norm": 4.872042655944824, "learning_rate": 3.2744396087203146e-09, "loss": 0.4706, "step": 30259 }, { "epoch": 99.21311475409836, "grad_norm": 5.2357916831970215, "learning_rate": 3.247323680900527e-09, "loss": 0.3085, "step": 30260 }, { "epoch": 99.21639344262294, "grad_norm": 4.307200908660889, "learning_rate": 3.2203204768610497e-09, "loss": 0.4664, "step": 30261 }, { "epoch": 99.21967213114755, "grad_norm": 4.992637634277344, "learning_rate": 3.1934299969071934e-09, "loss": 0.423, "step": 30262 }, { "epoch": 99.22295081967214, "grad_norm": 5.766491889953613, "learning_rate": 3.1666522413409397e-09, "loss": 0.3877, "step": 30263 }, { "epoch": 99.22622950819672, "grad_norm": 5.816926956176758, "learning_rate": 3.1399872104653782e-09, "loss": 0.4073, "step": 30264 }, { "epoch": 99.22950819672131, "grad_norm": 17.210105895996094, "learning_rate": 3.1134349045802703e-09, "loss": 0.3356, "step": 30265 }, { "epoch": 99.2327868852459, "grad_norm": 4.630694389343262, "learning_rate": 3.0869953239853757e-09, "loss": 0.289, "step": 30266 }, { "epoch": 99.23606557377049, "grad_norm": 5.338674545288086, "learning_rate": 3.060668468978234e-09, "loss": 0.3941, "step": 30267 }, { "epoch": 99.23934426229508, "grad_norm": 4.202892780303955, "learning_rate": 3.0344543398563852e-09, "loss": 0.4761, "step": 30268 }, { "epoch": 99.24262295081967, "grad_norm": 6.259316921234131, "learning_rate": 3.0083529369151487e-09, "loss": 0.5938, "step": 30269 }, { "epoch": 99.24590163934427, "grad_norm": 4.04469633102417, "learning_rate": 2.9823642604498435e-09, "loss": 0.5299, "step": 30270 }, { "epoch": 99.24918032786886, "grad_norm": 3.7978675365448, "learning_rate": 2.956488310752459e-09, "loss": 0.3757, "step": 30271 }, { "epoch": 99.25245901639344, "grad_norm": 4.2817583084106445, "learning_rate": 2.930725088114983e-09, "loss": 0.408, "step": 30272 }, { "epoch": 99.25573770491803, "grad_norm": 3.811488628387451, "learning_rate": 2.905074592827184e-09, "loss": 0.2438, "step": 30273 }, { "epoch": 99.25901639344262, "grad_norm": 5.452425003051758, "learning_rate": 2.8795368251799403e-09, "loss": 0.3655, "step": 30274 }, { "epoch": 99.26229508196721, "grad_norm": 4.232594013214111, "learning_rate": 2.85411178545969e-09, "loss": 0.3104, "step": 30275 }, { "epoch": 99.2655737704918, "grad_norm": 4.304996013641357, "learning_rate": 2.8287994739539803e-09, "loss": 0.3135, "step": 30276 }, { "epoch": 99.26885245901639, "grad_norm": 6.830652713775635, "learning_rate": 2.8035998909481387e-09, "loss": 0.3966, "step": 30277 }, { "epoch": 99.27213114754099, "grad_norm": 5.1461992263793945, "learning_rate": 2.7785130367263823e-09, "loss": 0.1688, "step": 30278 }, { "epoch": 99.27540983606558, "grad_norm": 5.367216110229492, "learning_rate": 2.753538911570708e-09, "loss": 0.1913, "step": 30279 }, { "epoch": 99.27868852459017, "grad_norm": 4.604512691497803, "learning_rate": 2.728677515764222e-09, "loss": 0.3122, "step": 30280 }, { "epoch": 99.28196721311475, "grad_norm": 15.097439765930176, "learning_rate": 2.703928849585591e-09, "loss": 0.4596, "step": 30281 }, { "epoch": 99.28524590163934, "grad_norm": 5.914208889007568, "learning_rate": 2.6792929133157006e-09, "loss": 0.2882, "step": 30282 }, { "epoch": 99.28852459016393, "grad_norm": 5.164333820343018, "learning_rate": 2.6547697072309977e-09, "loss": 0.3863, "step": 30283 }, { "epoch": 99.29180327868852, "grad_norm": 4.328794479370117, "learning_rate": 2.6303592316079263e-09, "loss": 0.395, "step": 30284 }, { "epoch": 99.29508196721312, "grad_norm": 5.311270713806152, "learning_rate": 2.6060614867229327e-09, "loss": 0.4199, "step": 30285 }, { "epoch": 99.29836065573771, "grad_norm": 6.042073726654053, "learning_rate": 2.5818764728480217e-09, "loss": 0.2667, "step": 30286 }, { "epoch": 99.3016393442623, "grad_norm": 11.247847557067871, "learning_rate": 2.557804190258528e-09, "loss": 0.3767, "step": 30287 }, { "epoch": 99.30491803278689, "grad_norm": 3.8346152305603027, "learning_rate": 2.5338446392242365e-09, "loss": 0.5399, "step": 30288 }, { "epoch": 99.30819672131148, "grad_norm": 5.818617820739746, "learning_rate": 2.509997820014931e-09, "loss": 0.2444, "step": 30289 }, { "epoch": 99.31147540983606, "grad_norm": 4.686370372772217, "learning_rate": 2.486263732900396e-09, "loss": 0.2794, "step": 30290 }, { "epoch": 99.31475409836065, "grad_norm": 9.546941757202148, "learning_rate": 2.462642378149305e-09, "loss": 0.4622, "step": 30291 }, { "epoch": 99.31803278688524, "grad_norm": 5.587864875793457, "learning_rate": 2.4391337560247807e-09, "loss": 0.6572, "step": 30292 }, { "epoch": 99.32131147540984, "grad_norm": 5.024856090545654, "learning_rate": 2.4157378667954978e-09, "loss": 0.3255, "step": 30293 }, { "epoch": 99.32459016393443, "grad_norm": 5.463027000427246, "learning_rate": 2.3924547107223583e-09, "loss": 0.3245, "step": 30294 }, { "epoch": 99.32786885245902, "grad_norm": 5.7883806228637695, "learning_rate": 2.3692842880707056e-09, "loss": 0.5411, "step": 30295 }, { "epoch": 99.33114754098361, "grad_norm": 3.9404520988464355, "learning_rate": 2.3462265990992216e-09, "loss": 0.3653, "step": 30296 }, { "epoch": 99.3344262295082, "grad_norm": 5.141037464141846, "learning_rate": 2.323281644068809e-09, "loss": 0.3989, "step": 30297 }, { "epoch": 99.33770491803278, "grad_norm": 5.380748271942139, "learning_rate": 2.3004494232392593e-09, "loss": 0.4414, "step": 30298 }, { "epoch": 99.34098360655737, "grad_norm": 4.399687767028809, "learning_rate": 2.2777299368659246e-09, "loss": 0.329, "step": 30299 }, { "epoch": 99.34426229508196, "grad_norm": 4.696415901184082, "learning_rate": 2.2551231852074862e-09, "loss": 0.4183, "step": 30300 }, { "epoch": 99.34754098360656, "grad_norm": 3.719386100769043, "learning_rate": 2.2326291685170755e-09, "loss": 0.3922, "step": 30301 }, { "epoch": 99.35081967213115, "grad_norm": 4.583269119262695, "learning_rate": 2.210247887048933e-09, "loss": 0.4901, "step": 30302 }, { "epoch": 99.35409836065574, "grad_norm": 7.179398536682129, "learning_rate": 2.1879793410550797e-09, "loss": 0.4268, "step": 30303 }, { "epoch": 99.35737704918033, "grad_norm": 4.19881534576416, "learning_rate": 2.1658235307875364e-09, "loss": 0.4374, "step": 30304 }, { "epoch": 99.36065573770492, "grad_norm": 5.175714015960693, "learning_rate": 2.1437804564949928e-09, "loss": 0.4875, "step": 30305 }, { "epoch": 99.3639344262295, "grad_norm": 5.576696395874023, "learning_rate": 2.1218501184261385e-09, "loss": 0.3904, "step": 30306 }, { "epoch": 99.3672131147541, "grad_norm": 5.1581645011901855, "learning_rate": 2.100032516828554e-09, "loss": 0.252, "step": 30307 }, { "epoch": 99.37049180327868, "grad_norm": 7.912304878234863, "learning_rate": 2.0783276519487084e-09, "loss": 0.4306, "step": 30308 }, { "epoch": 99.37377049180328, "grad_norm": 5.109128952026367, "learning_rate": 2.0567355240308507e-09, "loss": 0.3484, "step": 30309 }, { "epoch": 99.37704918032787, "grad_norm": 4.627634048461914, "learning_rate": 2.03525613331923e-09, "loss": 0.4307, "step": 30310 }, { "epoch": 99.38032786885246, "grad_norm": 7.084238052368164, "learning_rate": 2.013889480054765e-09, "loss": 0.182, "step": 30311 }, { "epoch": 99.38360655737705, "grad_norm": 8.792927742004395, "learning_rate": 1.9926355644783735e-09, "loss": 0.4384, "step": 30312 }, { "epoch": 99.38688524590164, "grad_norm": 4.549108982086182, "learning_rate": 1.9714943868309744e-09, "loss": 0.3927, "step": 30313 }, { "epoch": 99.39016393442623, "grad_norm": 4.3267035484313965, "learning_rate": 1.950465947350155e-09, "loss": 0.2019, "step": 30314 }, { "epoch": 99.39344262295081, "grad_norm": 5.123697280883789, "learning_rate": 1.9295502462735037e-09, "loss": 0.1553, "step": 30315 }, { "epoch": 99.3967213114754, "grad_norm": 4.269351959228516, "learning_rate": 1.9087472838363875e-09, "loss": 0.3666, "step": 30316 }, { "epoch": 99.4, "grad_norm": 4.40482759475708, "learning_rate": 1.888057060274173e-09, "loss": 0.1887, "step": 30317 }, { "epoch": 99.4032786885246, "grad_norm": 3.5623912811279297, "learning_rate": 1.8674795758188978e-09, "loss": 0.5152, "step": 30318 }, { "epoch": 99.40655737704918, "grad_norm": 4.705840110778809, "learning_rate": 1.8470148307025981e-09, "loss": 0.2718, "step": 30319 }, { "epoch": 99.40983606557377, "grad_norm": 4.613284587860107, "learning_rate": 1.8266628251584206e-09, "loss": 0.3265, "step": 30320 }, { "epoch": 99.41311475409836, "grad_norm": 5.057956218719482, "learning_rate": 1.806423559412851e-09, "loss": 0.4447, "step": 30321 }, { "epoch": 99.41639344262295, "grad_norm": 4.544755458831787, "learning_rate": 1.7862970336957054e-09, "loss": 0.4875, "step": 30322 }, { "epoch": 99.41967213114754, "grad_norm": 4.405251979827881, "learning_rate": 1.7662832482334692e-09, "loss": 0.2978, "step": 30323 }, { "epoch": 99.42295081967212, "grad_norm": 3.843515634536743, "learning_rate": 1.7463822032515177e-09, "loss": 0.3302, "step": 30324 }, { "epoch": 99.42622950819673, "grad_norm": 9.761263847351074, "learning_rate": 1.7265938989752261e-09, "loss": 0.2356, "step": 30325 }, { "epoch": 99.42950819672132, "grad_norm": 6.128499984741211, "learning_rate": 1.7069183356266394e-09, "loss": 0.3518, "step": 30326 }, { "epoch": 99.4327868852459, "grad_norm": 4.815245628356934, "learning_rate": 1.6873555134289121e-09, "loss": 0.4002, "step": 30327 }, { "epoch": 99.43606557377049, "grad_norm": 3.9203054904937744, "learning_rate": 1.6679054326018685e-09, "loss": 0.2159, "step": 30328 }, { "epoch": 99.43934426229508, "grad_norm": 6.273867607116699, "learning_rate": 1.6485680933642223e-09, "loss": 0.5235, "step": 30329 }, { "epoch": 99.44262295081967, "grad_norm": 5.786279201507568, "learning_rate": 1.6293434959346877e-09, "loss": 0.3881, "step": 30330 }, { "epoch": 99.44590163934426, "grad_norm": 7.557889938354492, "learning_rate": 1.6102316405308682e-09, "loss": 0.2774, "step": 30331 }, { "epoch": 99.44918032786886, "grad_norm": 4.860970497131348, "learning_rate": 1.591232527367037e-09, "loss": 0.3358, "step": 30332 }, { "epoch": 99.45245901639345, "grad_norm": 5.532203197479248, "learning_rate": 1.572346156657467e-09, "loss": 0.3533, "step": 30333 }, { "epoch": 99.45573770491804, "grad_norm": 5.360836505889893, "learning_rate": 1.5535725286153213e-09, "loss": 0.3838, "step": 30334 }, { "epoch": 99.45901639344262, "grad_norm": 5.141409873962402, "learning_rate": 1.5349116434526524e-09, "loss": 0.4274, "step": 30335 }, { "epoch": 99.46229508196721, "grad_norm": 5.655120849609375, "learning_rate": 1.5163635013804023e-09, "loss": 0.4459, "step": 30336 }, { "epoch": 99.4655737704918, "grad_norm": 6.351102828979492, "learning_rate": 1.497928102606183e-09, "loss": 0.2508, "step": 30337 }, { "epoch": 99.46885245901639, "grad_norm": 4.929065704345703, "learning_rate": 1.4796054473387166e-09, "loss": 0.392, "step": 30338 }, { "epoch": 99.47213114754098, "grad_norm": 4.658386707305908, "learning_rate": 1.4613955357845045e-09, "loss": 0.3148, "step": 30339 }, { "epoch": 99.47540983606558, "grad_norm": 6.325270175933838, "learning_rate": 1.4432983681489377e-09, "loss": 0.2916, "step": 30340 }, { "epoch": 99.47868852459017, "grad_norm": 5.871452808380127, "learning_rate": 1.4253139446362974e-09, "loss": 0.4084, "step": 30341 }, { "epoch": 99.48196721311476, "grad_norm": 3.7590811252593994, "learning_rate": 1.407442265448644e-09, "loss": 0.2551, "step": 30342 }, { "epoch": 99.48524590163935, "grad_norm": 4.674335479736328, "learning_rate": 1.3896833307880387e-09, "loss": 0.3009, "step": 30343 }, { "epoch": 99.48852459016393, "grad_norm": 8.674245834350586, "learning_rate": 1.3720371408554311e-09, "loss": 0.4633, "step": 30344 }, { "epoch": 99.49180327868852, "grad_norm": 4.419760704040527, "learning_rate": 1.3545036958484415e-09, "loss": 0.5824, "step": 30345 }, { "epoch": 99.49508196721311, "grad_norm": 4.70665168762207, "learning_rate": 1.3370829959657994e-09, "loss": 0.5698, "step": 30346 }, { "epoch": 99.4983606557377, "grad_norm": 6.729851245880127, "learning_rate": 1.3197750414029043e-09, "loss": 0.5113, "step": 30347 }, { "epoch": 99.5016393442623, "grad_norm": 4.159594535827637, "learning_rate": 1.3025798323562655e-09, "loss": 0.2322, "step": 30348 }, { "epoch": 99.50491803278689, "grad_norm": 4.840936183929443, "learning_rate": 1.285497369019062e-09, "loss": 0.3061, "step": 30349 }, { "epoch": 99.50819672131148, "grad_norm": 5.558220863342285, "learning_rate": 1.2685276515844724e-09, "loss": 0.4019, "step": 30350 }, { "epoch": 99.51147540983607, "grad_norm": 3.656200408935547, "learning_rate": 1.2516706802423451e-09, "loss": 0.1732, "step": 30351 }, { "epoch": 99.51475409836065, "grad_norm": 4.85000467300415, "learning_rate": 1.2349264551836383e-09, "loss": 0.3143, "step": 30352 }, { "epoch": 99.51803278688524, "grad_norm": 4.865344047546387, "learning_rate": 1.2182949765970898e-09, "loss": 0.5436, "step": 30353 }, { "epoch": 99.52131147540983, "grad_norm": 4.780860900878906, "learning_rate": 1.2017762446714375e-09, "loss": 0.344, "step": 30354 }, { "epoch": 99.52459016393442, "grad_norm": 10.730151176452637, "learning_rate": 1.1853702595909788e-09, "loss": 0.5116, "step": 30355 }, { "epoch": 99.52786885245902, "grad_norm": 4.449030876159668, "learning_rate": 1.169077021542231e-09, "loss": 0.172, "step": 30356 }, { "epoch": 99.53114754098361, "grad_norm": 4.677169322967529, "learning_rate": 1.1528965307083806e-09, "loss": 0.4856, "step": 30357 }, { "epoch": 99.5344262295082, "grad_norm": 5.3006272315979, "learning_rate": 1.1368287872715045e-09, "loss": 0.2985, "step": 30358 }, { "epoch": 99.53770491803279, "grad_norm": 4.529174327850342, "learning_rate": 1.1208737914125689e-09, "loss": 0.3723, "step": 30359 }, { "epoch": 99.54098360655738, "grad_norm": 5.700822353363037, "learning_rate": 1.1050315433125402e-09, "loss": 0.4099, "step": 30360 }, { "epoch": 99.54426229508196, "grad_norm": 4.188980579376221, "learning_rate": 1.0893020431501645e-09, "loss": 0.4752, "step": 30361 }, { "epoch": 99.54754098360655, "grad_norm": 10.953875541687012, "learning_rate": 1.0736852911008566e-09, "loss": 0.1946, "step": 30362 }, { "epoch": 99.55081967213114, "grad_norm": 4.794671058654785, "learning_rate": 1.0581812873422525e-09, "loss": 0.1996, "step": 30363 }, { "epoch": 99.55409836065574, "grad_norm": 5.075148105621338, "learning_rate": 1.0427900320497674e-09, "loss": 0.2424, "step": 30364 }, { "epoch": 99.55737704918033, "grad_norm": 7.311694145202637, "learning_rate": 1.027511525395486e-09, "loss": 0.3701, "step": 30365 }, { "epoch": 99.56065573770492, "grad_norm": 5.021277904510498, "learning_rate": 1.0123457675526026e-09, "loss": 0.2715, "step": 30366 }, { "epoch": 99.56393442622951, "grad_norm": 4.821173667907715, "learning_rate": 9.972927586920923e-10, "loss": 0.3085, "step": 30367 }, { "epoch": 99.5672131147541, "grad_norm": 26.68900489807129, "learning_rate": 9.823524989838185e-10, "loss": 0.3963, "step": 30368 }, { "epoch": 99.57049180327868, "grad_norm": 6.1883769035339355, "learning_rate": 9.67524988594315e-10, "loss": 0.3998, "step": 30369 }, { "epoch": 99.57377049180327, "grad_norm": 5.868671417236328, "learning_rate": 9.528102276934459e-10, "loss": 0.2973, "step": 30370 }, { "epoch": 99.57704918032788, "grad_norm": 4.912691593170166, "learning_rate": 9.382082164466345e-10, "loss": 0.3576, "step": 30371 }, { "epoch": 99.58032786885246, "grad_norm": 3.43326997756958, "learning_rate": 9.237189550170833e-10, "loss": 0.4328, "step": 30372 }, { "epoch": 99.58360655737705, "grad_norm": 6.31809663772583, "learning_rate": 9.093424435691056e-10, "loss": 0.3374, "step": 30373 }, { "epoch": 99.58688524590164, "grad_norm": 11.410090446472168, "learning_rate": 8.950786822647939e-10, "loss": 0.3239, "step": 30374 }, { "epoch": 99.59016393442623, "grad_norm": 4.619557857513428, "learning_rate": 8.809276712651305e-10, "loss": 0.4674, "step": 30375 }, { "epoch": 99.59344262295082, "grad_norm": 5.082202911376953, "learning_rate": 8.668894107288772e-10, "loss": 0.1836, "step": 30376 }, { "epoch": 99.5967213114754, "grad_norm": 4.7483391761779785, "learning_rate": 8.529639008159063e-10, "loss": 0.2704, "step": 30377 }, { "epoch": 99.6, "grad_norm": 4.54461145401001, "learning_rate": 8.391511416816489e-10, "loss": 0.3476, "step": 30378 }, { "epoch": 99.6032786885246, "grad_norm": 5.197936534881592, "learning_rate": 8.254511334826465e-10, "loss": 0.3699, "step": 30379 }, { "epoch": 99.60655737704919, "grad_norm": 5.3378987312316895, "learning_rate": 8.1186387637322e-10, "loss": 0.3092, "step": 30380 }, { "epoch": 99.60983606557377, "grad_norm": 5.155365467071533, "learning_rate": 7.983893705065804e-10, "loss": 0.2472, "step": 30381 }, { "epoch": 99.61311475409836, "grad_norm": 5.628369331359863, "learning_rate": 7.850276160337178e-10, "loss": 0.3187, "step": 30382 }, { "epoch": 99.61639344262295, "grad_norm": 5.995254039764404, "learning_rate": 7.717786131078431e-10, "loss": 0.3053, "step": 30383 }, { "epoch": 99.61967213114754, "grad_norm": 4.109888076782227, "learning_rate": 7.586423618755056e-10, "loss": 0.3025, "step": 30384 }, { "epoch": 99.62295081967213, "grad_norm": 9.633115768432617, "learning_rate": 7.456188624865856e-10, "loss": 0.4095, "step": 30385 }, { "epoch": 99.62622950819672, "grad_norm": 4.621076583862305, "learning_rate": 7.327081150876325e-10, "loss": 0.4083, "step": 30386 }, { "epoch": 99.62950819672132, "grad_norm": 4.856508255004883, "learning_rate": 7.199101198240854e-10, "loss": 0.262, "step": 30387 }, { "epoch": 99.6327868852459, "grad_norm": 4.418596267700195, "learning_rate": 7.072248768402734e-10, "loss": 0.3784, "step": 30388 }, { "epoch": 99.6360655737705, "grad_norm": 5.390641689300537, "learning_rate": 6.94652386278305e-10, "loss": 0.2654, "step": 30389 }, { "epoch": 99.63934426229508, "grad_norm": 6.597855091094971, "learning_rate": 6.82192648281399e-10, "loss": 0.2528, "step": 30390 }, { "epoch": 99.64262295081967, "grad_norm": 4.399654865264893, "learning_rate": 6.698456629894435e-10, "loss": 0.3856, "step": 30391 }, { "epoch": 99.64590163934426, "grad_norm": 4.635314464569092, "learning_rate": 6.576114305412162e-10, "loss": 0.3978, "step": 30392 }, { "epoch": 99.64918032786885, "grad_norm": 4.438194274902344, "learning_rate": 6.454899510754953e-10, "loss": 0.2564, "step": 30393 }, { "epoch": 99.65245901639344, "grad_norm": 5.707380771636963, "learning_rate": 6.33481224728838e-10, "loss": 0.3691, "step": 30394 }, { "epoch": 99.65573770491804, "grad_norm": 5.259962558746338, "learning_rate": 6.215852516366916e-10, "loss": 0.5076, "step": 30395 }, { "epoch": 99.65901639344263, "grad_norm": 5.2733635902404785, "learning_rate": 6.098020319322828e-10, "loss": 0.4325, "step": 30396 }, { "epoch": 99.66229508196722, "grad_norm": 4.45228910446167, "learning_rate": 5.981315657488385e-10, "loss": 0.4777, "step": 30397 }, { "epoch": 99.6655737704918, "grad_norm": 5.058133125305176, "learning_rate": 5.865738532195852e-10, "loss": 0.4775, "step": 30398 }, { "epoch": 99.66885245901639, "grad_norm": 6.319345951080322, "learning_rate": 5.751288944721989e-10, "loss": 0.3863, "step": 30399 }, { "epoch": 99.67213114754098, "grad_norm": 5.0909528732299805, "learning_rate": 5.637966896376857e-10, "loss": 0.348, "step": 30400 }, { "epoch": 99.67540983606557, "grad_norm": 5.029351234436035, "learning_rate": 5.525772388426109e-10, "loss": 0.3043, "step": 30401 }, { "epoch": 99.67868852459016, "grad_norm": 5.262859344482422, "learning_rate": 5.414705422146505e-10, "loss": 0.2209, "step": 30402 }, { "epoch": 99.68196721311476, "grad_norm": 4.056534290313721, "learning_rate": 5.304765998781491e-10, "loss": 0.3104, "step": 30403 }, { "epoch": 99.68524590163935, "grad_norm": 5.800341606140137, "learning_rate": 5.195954119563418e-10, "loss": 0.5678, "step": 30404 }, { "epoch": 99.68852459016394, "grad_norm": 4.930461406707764, "learning_rate": 5.088269785746835e-10, "loss": 0.2416, "step": 30405 }, { "epoch": 99.69180327868852, "grad_norm": 4.032099723815918, "learning_rate": 4.981712998519683e-10, "loss": 0.2628, "step": 30406 }, { "epoch": 99.69508196721311, "grad_norm": 4.758469104766846, "learning_rate": 4.876283759092105e-10, "loss": 0.4184, "step": 30407 }, { "epoch": 99.6983606557377, "grad_norm": 5.369178771972656, "learning_rate": 4.771982068652037e-10, "loss": 0.3877, "step": 30408 }, { "epoch": 99.70163934426229, "grad_norm": 6.015305519104004, "learning_rate": 4.668807928387419e-10, "loss": 0.4775, "step": 30409 }, { "epoch": 99.70491803278688, "grad_norm": 7.953272342681885, "learning_rate": 4.566761339441783e-10, "loss": 0.6667, "step": 30410 }, { "epoch": 99.70819672131148, "grad_norm": 4.213710308074951, "learning_rate": 4.4658423029808606e-10, "loss": 0.2325, "step": 30411 }, { "epoch": 99.71147540983607, "grad_norm": 4.04282283782959, "learning_rate": 4.3660508201259775e-10, "loss": 0.3491, "step": 30412 }, { "epoch": 99.71475409836066, "grad_norm": 4.346501350402832, "learning_rate": 4.267386892020664e-10, "loss": 0.2801, "step": 30413 }, { "epoch": 99.71803278688525, "grad_norm": 5.621890544891357, "learning_rate": 4.1698505197751427e-10, "loss": 0.6197, "step": 30414 }, { "epoch": 99.72131147540983, "grad_norm": 5.448023319244385, "learning_rate": 4.0734417044774323e-10, "loss": 0.2566, "step": 30415 }, { "epoch": 99.72459016393442, "grad_norm": 4.41229248046875, "learning_rate": 3.978160447215551e-10, "loss": 0.3329, "step": 30416 }, { "epoch": 99.72786885245901, "grad_norm": 6.137504577636719, "learning_rate": 3.884006749077518e-10, "loss": 0.563, "step": 30417 }, { "epoch": 99.73114754098361, "grad_norm": 7.3781585693359375, "learning_rate": 3.7909806111180447e-10, "loss": 0.5133, "step": 30418 }, { "epoch": 99.7344262295082, "grad_norm": 5.7466721534729, "learning_rate": 3.699082034380741e-10, "loss": 0.5584, "step": 30419 }, { "epoch": 99.73770491803279, "grad_norm": 4.525243759155273, "learning_rate": 3.6083110199092165e-10, "loss": 0.3151, "step": 30420 }, { "epoch": 99.74098360655738, "grad_norm": 5.081255912780762, "learning_rate": 3.5186675687248763e-10, "loss": 0.3696, "step": 30421 }, { "epoch": 99.74426229508197, "grad_norm": 5.1321187019348145, "learning_rate": 3.430151681838023e-10, "loss": 0.3365, "step": 30422 }, { "epoch": 99.74754098360656, "grad_norm": 5.265096187591553, "learning_rate": 3.342763360247858e-10, "loss": 0.2912, "step": 30423 }, { "epoch": 99.75081967213114, "grad_norm": 4.8729681968688965, "learning_rate": 3.25650260494248e-10, "loss": 0.557, "step": 30424 }, { "epoch": 99.75409836065573, "grad_norm": 4.4374237060546875, "learning_rate": 3.1713694168766793e-10, "loss": 0.3384, "step": 30425 }, { "epoch": 99.75737704918033, "grad_norm": 4.080207347869873, "learning_rate": 3.087363797038556e-10, "loss": 0.2297, "step": 30426 }, { "epoch": 99.76065573770492, "grad_norm": 3.8816864490509033, "learning_rate": 3.0044857463495945e-10, "loss": 0.5313, "step": 30427 }, { "epoch": 99.76393442622951, "grad_norm": 5.246135711669922, "learning_rate": 2.922735265764587e-10, "loss": 0.5858, "step": 30428 }, { "epoch": 99.7672131147541, "grad_norm": 4.508635997772217, "learning_rate": 2.8421123561939157e-10, "loss": 0.3706, "step": 30429 }, { "epoch": 99.77049180327869, "grad_norm": 8.16975212097168, "learning_rate": 2.762617018547964e-10, "loss": 0.3475, "step": 30430 }, { "epoch": 99.77377049180328, "grad_norm": 5.417956352233887, "learning_rate": 2.6842492537260124e-10, "loss": 0.2974, "step": 30431 }, { "epoch": 99.77704918032786, "grad_norm": 5.176100730895996, "learning_rate": 2.607009062605137e-10, "loss": 0.4297, "step": 30432 }, { "epoch": 99.78032786885245, "grad_norm": 5.19556999206543, "learning_rate": 2.530896446062414e-10, "loss": 0.4373, "step": 30433 }, { "epoch": 99.78360655737706, "grad_norm": 4.4105401039123535, "learning_rate": 2.4559114049638177e-10, "loss": 0.4724, "step": 30434 }, { "epoch": 99.78688524590164, "grad_norm": 4.608063220977783, "learning_rate": 2.382053940142015e-10, "loss": 0.2766, "step": 30435 }, { "epoch": 99.79016393442623, "grad_norm": 3.784881830215454, "learning_rate": 2.3093240524296733e-10, "loss": 0.389, "step": 30436 }, { "epoch": 99.79344262295082, "grad_norm": 4.203151702880859, "learning_rate": 2.23772174265946e-10, "loss": 0.2315, "step": 30437 }, { "epoch": 99.79672131147541, "grad_norm": 4.119734764099121, "learning_rate": 2.167247011619633e-10, "loss": 0.3857, "step": 30438 }, { "epoch": 99.8, "grad_norm": 7.174624443054199, "learning_rate": 2.0978998601206558e-10, "loss": 0.4209, "step": 30439 }, { "epoch": 99.80327868852459, "grad_norm": 5.248386859893799, "learning_rate": 2.029680288939684e-10, "loss": 0.3916, "step": 30440 }, { "epoch": 99.80655737704917, "grad_norm": 3.4879913330078125, "learning_rate": 1.9625882988538737e-10, "loss": 0.2684, "step": 30441 }, { "epoch": 99.80983606557378, "grad_norm": 4.648614883422852, "learning_rate": 1.8966238905959722e-10, "loss": 0.4258, "step": 30442 }, { "epoch": 99.81311475409836, "grad_norm": 4.076149940490723, "learning_rate": 1.8317870649431357e-10, "loss": 0.3261, "step": 30443 }, { "epoch": 99.81639344262295, "grad_norm": 4.057044982910156, "learning_rate": 1.7680778225948046e-10, "loss": 0.2886, "step": 30444 }, { "epoch": 99.81967213114754, "grad_norm": 4.391853332519531, "learning_rate": 1.7054961642948286e-10, "loss": 0.4459, "step": 30445 }, { "epoch": 99.82295081967213, "grad_norm": 4.553491592407227, "learning_rate": 1.6440420907204434e-10, "loss": 0.3235, "step": 30446 }, { "epoch": 99.82622950819672, "grad_norm": 4.8834004402160645, "learning_rate": 1.5837156025932943e-10, "loss": 0.3554, "step": 30447 }, { "epoch": 99.8295081967213, "grad_norm": 5.940531253814697, "learning_rate": 1.524516700590617e-10, "loss": 0.3499, "step": 30448 }, { "epoch": 99.8327868852459, "grad_norm": 4.371584892272949, "learning_rate": 1.466445385356341e-10, "loss": 0.3943, "step": 30449 }, { "epoch": 99.8360655737705, "grad_norm": 5.8654866218566895, "learning_rate": 1.4095016575677022e-10, "loss": 0.1982, "step": 30450 }, { "epoch": 99.83934426229509, "grad_norm": 5.564246654510498, "learning_rate": 1.3536855178575281e-10, "loss": 0.2973, "step": 30451 }, { "epoch": 99.84262295081967, "grad_norm": 4.508939743041992, "learning_rate": 1.2989969668586455e-10, "loss": 0.43, "step": 30452 }, { "epoch": 99.84590163934426, "grad_norm": 5.427256107330322, "learning_rate": 1.2454360051816773e-10, "loss": 0.3731, "step": 30453 }, { "epoch": 99.84918032786885, "grad_norm": 3.816924810409546, "learning_rate": 1.1930026334372458e-10, "loss": 0.2173, "step": 30454 }, { "epoch": 99.85245901639344, "grad_norm": 5.176791667938232, "learning_rate": 1.1416968522137695e-10, "loss": 0.3022, "step": 30455 }, { "epoch": 99.85573770491803, "grad_norm": 5.534242153167725, "learning_rate": 1.0915186620996665e-10, "loss": 0.3522, "step": 30456 }, { "epoch": 99.85901639344263, "grad_norm": 4.759234428405762, "learning_rate": 1.0424680636389462e-10, "loss": 0.3519, "step": 30457 }, { "epoch": 99.86229508196722, "grad_norm": 5.912521839141846, "learning_rate": 9.945450574089244e-11, "loss": 0.3598, "step": 30458 }, { "epoch": 99.8655737704918, "grad_norm": 3.517411470413208, "learning_rate": 9.47749643931406e-11, "loss": 0.2414, "step": 30459 }, { "epoch": 99.8688524590164, "grad_norm": 4.975577354431152, "learning_rate": 9.020818237392982e-11, "loss": 0.3432, "step": 30460 }, { "epoch": 99.87213114754098, "grad_norm": 6.130183696746826, "learning_rate": 8.575415973433032e-11, "loss": 0.445, "step": 30461 }, { "epoch": 99.87540983606557, "grad_norm": 4.851229667663574, "learning_rate": 8.141289652652262e-11, "loss": 0.3937, "step": 30462 }, { "epoch": 99.87868852459016, "grad_norm": 4.044906139373779, "learning_rate": 7.718439279713608e-11, "loss": 0.3445, "step": 30463 }, { "epoch": 99.88196721311475, "grad_norm": 5.099445343017578, "learning_rate": 7.30686485950205e-11, "loss": 0.4137, "step": 30464 }, { "epoch": 99.88524590163935, "grad_norm": 4.478635787963867, "learning_rate": 6.906566396680525e-11, "loss": 0.5901, "step": 30465 }, { "epoch": 99.88852459016394, "grad_norm": 8.867680549621582, "learning_rate": 6.517543895689926e-11, "loss": 0.4073, "step": 30466 }, { "epoch": 99.89180327868853, "grad_norm": 5.185690879821777, "learning_rate": 6.139797360971145e-11, "loss": 0.2558, "step": 30467 }, { "epoch": 99.89508196721312, "grad_norm": 4.357379913330078, "learning_rate": 5.7733267967430284e-11, "loss": 0.3313, "step": 30468 }, { "epoch": 99.8983606557377, "grad_norm": 4.2878618240356445, "learning_rate": 5.4181322071134025e-11, "loss": 0.3219, "step": 30469 }, { "epoch": 99.90163934426229, "grad_norm": 8.212530136108398, "learning_rate": 5.0742135961900917e-11, "loss": 0.3825, "step": 30470 }, { "epoch": 99.90491803278688, "grad_norm": 4.024535179138184, "learning_rate": 4.741570967747855e-11, "loss": 0.421, "step": 30471 }, { "epoch": 99.90819672131147, "grad_norm": 6.584774971008301, "learning_rate": 4.42020432556145e-11, "loss": 0.5586, "step": 30472 }, { "epoch": 99.91147540983607, "grad_norm": 5.688474655151367, "learning_rate": 4.110113673294613e-11, "loss": 0.3254, "step": 30473 }, { "epoch": 99.91475409836066, "grad_norm": 4.694416046142578, "learning_rate": 3.8112990145000585e-11, "loss": 0.354, "step": 30474 }, { "epoch": 99.91803278688525, "grad_norm": 10.44174861907959, "learning_rate": 3.523760352397432e-11, "loss": 0.3164, "step": 30475 }, { "epoch": 99.92131147540984, "grad_norm": 5.13270378112793, "learning_rate": 3.247497690317403e-11, "loss": 0.2345, "step": 30476 }, { "epoch": 99.92459016393443, "grad_norm": 4.125114917755127, "learning_rate": 2.982511031257573e-11, "loss": 0.3786, "step": 30477 }, { "epoch": 99.92786885245901, "grad_norm": 5.955507755279541, "learning_rate": 2.7288003784375904e-11, "loss": 0.3544, "step": 30478 }, { "epoch": 99.9311475409836, "grad_norm": 4.363475799560547, "learning_rate": 2.4863657345219893e-11, "loss": 0.2417, "step": 30479 }, { "epoch": 99.93442622950819, "grad_norm": 5.530818462371826, "learning_rate": 2.2552071022863274e-11, "loss": 0.5639, "step": 30480 }, { "epoch": 99.9377049180328, "grad_norm": 8.235801696777344, "learning_rate": 2.0353244843951404e-11, "loss": 0.2545, "step": 30481 }, { "epoch": 99.94098360655738, "grad_norm": 5.365503787994385, "learning_rate": 1.8267178832909182e-11, "loss": 0.4134, "step": 30482 }, { "epoch": 99.94426229508197, "grad_norm": 5.17361307144165, "learning_rate": 1.6293873014161522e-11, "loss": 0.1968, "step": 30483 }, { "epoch": 99.94754098360656, "grad_norm": 4.019871234893799, "learning_rate": 1.4433327407692433e-11, "loss": 0.4707, "step": 30484 }, { "epoch": 99.95081967213115, "grad_norm": 4.303971290588379, "learning_rate": 1.2685542036816601e-11, "loss": 0.3449, "step": 30485 }, { "epoch": 99.95409836065573, "grad_norm": 8.095227241516113, "learning_rate": 1.1050516919297593e-11, "loss": 0.4223, "step": 30486 }, { "epoch": 99.95737704918032, "grad_norm": 5.438266754150391, "learning_rate": 9.528252075119426e-12, "loss": 0.4207, "step": 30487 }, { "epoch": 99.96065573770491, "grad_norm": 6.032220840454102, "learning_rate": 8.118747520935445e-12, "loss": 0.397, "step": 30488 }, { "epoch": 99.96393442622951, "grad_norm": 4.464608669281006, "learning_rate": 6.822003273398991e-12, "loss": 0.2326, "step": 30489 }, { "epoch": 99.9672131147541, "grad_norm": 3.3787243366241455, "learning_rate": 5.638019344722523e-12, "loss": 0.1862, "step": 30490 }, { "epoch": 99.97049180327869, "grad_norm": 3.8172264099121094, "learning_rate": 4.566795751559383e-12, "loss": 0.295, "step": 30491 }, { "epoch": 99.97377049180328, "grad_norm": 4.4929656982421875, "learning_rate": 3.608332502791356e-12, "loss": 0.2998, "step": 30492 }, { "epoch": 99.97704918032787, "grad_norm": 3.845477342605591, "learning_rate": 2.762629611741119e-12, "loss": 0.1784, "step": 30493 }, { "epoch": 99.98032786885246, "grad_norm": 4.251657962799072, "learning_rate": 2.029687086180232e-12, "loss": 0.2071, "step": 30494 }, { "epoch": 99.98360655737704, "grad_norm": 5.128242492675781, "learning_rate": 1.4095049361007028e-12, "loss": 0.4086, "step": 30495 }, { "epoch": 99.98688524590163, "grad_norm": 5.368431568145752, "learning_rate": 9.020831659434237e-13, "loss": 0.397, "step": 30496 }, { "epoch": 99.99016393442623, "grad_norm": 3.138451337814331, "learning_rate": 5.074217845901785e-13, "loss": 0.2625, "step": 30497 }, { "epoch": 99.99344262295082, "grad_norm": 4.038363456726074, "learning_rate": 2.2552079426141349e-13, "loss": 0.2383, "step": 30498 }, { "epoch": 99.99672131147541, "grad_norm": 9.755109786987305, "learning_rate": 5.6380198287797616e-14, "loss": 0.2376, "step": 30499 }, { "epoch": 100.0, "grad_norm": 4.956100940704346, "learning_rate": 0.0, "loss": 0.3293, "step": 30500 }, { "epoch": 100.0, "step": 30500, "total_flos": 1.895439597568e+16, "train_loss": 0.9038017748613827, "train_runtime": 9273.4878, "train_samples_per_second": 104.966, "train_steps_per_second": 3.289 } ], "logging_steps": 1.0, "max_steps": 30500, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.895439597568e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }