Qwarkstar-4B-Instruct-Preview / trainer_state.json

qingy2024

Upload checkpoint 3300

21f5eae verified 20 days ago

184 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9786476868327402,
	"eval_steps": 500,
	"global_step": 3300,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0008896797153024911,
	"grad_norm": 1.0390625,
	"learning_rate": 6e-05,
	"loss": 1.0103,
	"step": 3
	},
	{
	"epoch": 0.0017793594306049821,
	"grad_norm": 0.85546875,
	"learning_rate": 0.00012,
	"loss": 0.9297,
	"step": 6
	},
	{
	"epoch": 0.0026690391459074734,
	"grad_norm": 1.125,
	"learning_rate": 0.00018,
	"loss": 0.9383,
	"step": 9
	},
	{
	"epoch": 0.0035587188612099642,
	"grad_norm": 0.8046875,
	"learning_rate": 0.00019999982536383071,
	"loss": 0.9338,
	"step": 12
	},
	{
	"epoch": 0.004448398576512456,
	"grad_norm": 0.515625,
	"learning_rate": 0.00019999890852560968,
	"loss": 0.8937,
	"step": 15
	},
	{
	"epoch": 0.005338078291814947,
	"grad_norm": 0.474609375,
	"learning_rate": 0.00019999720583349016,
	"loss": 0.8876,
	"step": 18
	},
	{
	"epoch": 0.006227758007117438,
	"grad_norm": 0.400390625,
	"learning_rate": 0.000199994717300853,
	"loss": 0.8804,
	"step": 21
	},
	{
	"epoch": 0.0071174377224199285,
	"grad_norm": 0.41796875,
	"learning_rate": 0.00019999144294725462,
	"loss": 0.8889,
	"step": 24
	},
	{
	"epoch": 0.00800711743772242,
	"grad_norm": 0.39453125,
	"learning_rate": 0.0001999873827984269,
	"loss": 0.8664,
	"step": 27
	},
	{
	"epoch": 0.008896797153024912,
	"grad_norm": 0.416015625,
	"learning_rate": 0.00019998253688627705,
	"loss": 0.876,
	"step": 30
	},
	{
	"epoch": 0.009786476868327402,
	"grad_norm": 0.39453125,
	"learning_rate": 0.00019997690524888734,
	"loss": 0.8375,
	"step": 33
	},
	{
	"epoch": 0.010676156583629894,
	"grad_norm": 0.39453125,
	"learning_rate": 0.0001999704879305146,
	"loss": 0.8698,
	"step": 36
	},
	{
	"epoch": 0.011565836298932384,
	"grad_norm": 0.369140625,
	"learning_rate": 0.0001999632849815902,
	"loss": 0.8346,
	"step": 39
	},
	{
	"epoch": 0.012455516014234875,
	"grad_norm": 0.37109375,
	"learning_rate": 0.00019995529645871934,
	"loss": 0.8573,
	"step": 42
	},
	{
	"epoch": 0.013345195729537367,
	"grad_norm": 0.42578125,
	"learning_rate": 0.0001999465224246809,
	"loss": 0.8411,
	"step": 45
	},
	{
	"epoch": 0.014234875444839857,
	"grad_norm": 0.408203125,
	"learning_rate": 0.00019993696294842668,
	"loss": 0.8484,
	"step": 48
	},
	{
	"epoch": 0.015124555160142349,
	"grad_norm": 0.41015625,
	"learning_rate": 0.000199926618105081,
	"loss": 0.8172,
	"step": 51
	},
	{
	"epoch": 0.01601423487544484,
	"grad_norm": 0.384765625,
	"learning_rate": 0.00019991548797594015,
	"loss": 0.8481,
	"step": 54
	},
	{
	"epoch": 0.016903914590747332,
	"grad_norm": 0.421875,
	"learning_rate": 0.0001999035726484716,
	"loss": 0.8458,
	"step": 57
	},
	{
	"epoch": 0.017793594306049824,
	"grad_norm": 0.390625,
	"learning_rate": 0.00019989087221631343,
	"loss": 0.8321,
	"step": 60
	},
	{
	"epoch": 0.018683274021352312,
	"grad_norm": 0.4140625,
	"learning_rate": 0.00019987738677927365,
	"loss": 0.8511,
	"step": 63
	},
	{
	"epoch": 0.019572953736654804,
	"grad_norm": 0.375,
	"learning_rate": 0.00019986311644332915,
	"loss": 0.8324,
	"step": 66
	},
	{
	"epoch": 0.020462633451957295,
	"grad_norm": 0.341796875,
	"learning_rate": 0.00019984806132062517,
	"loss": 0.8368,
	"step": 69
	},
	{
	"epoch": 0.021352313167259787,
	"grad_norm": 0.498046875,
	"learning_rate": 0.00019983222152947428,
	"loss": 0.8062,
	"step": 72
	},
	{
	"epoch": 0.02224199288256228,
	"grad_norm": 0.484375,
	"learning_rate": 0.0001998155971943555,
	"loss": 0.8329,
	"step": 75
	},
	{
	"epoch": 0.023131672597864767,
	"grad_norm": 0.44921875,
	"learning_rate": 0.00019979818844591317,
	"loss": 0.8214,
	"step": 78
	},
	{
	"epoch": 0.02402135231316726,
	"grad_norm": 0.4765625,
	"learning_rate": 0.00019977999542095617,
	"loss": 0.8077,
	"step": 81
	},
	{
	"epoch": 0.02491103202846975,
	"grad_norm": 0.38671875,
	"learning_rate": 0.0001997610182624566,
	"loss": 0.8004,
	"step": 84
	},
	{
	"epoch": 0.025800711743772242,
	"grad_norm": 0.447265625,
	"learning_rate": 0.0001997412571195489,
	"loss": 0.8649,
	"step": 87
	},
	{
	"epoch": 0.026690391459074734,
	"grad_norm": 0.37109375,
	"learning_rate": 0.0001997207121475284,
	"loss": 0.82,
	"step": 90
	},
	{
	"epoch": 0.027580071174377226,
	"grad_norm": 0.40234375,
	"learning_rate": 0.00019969938350785035,
	"loss": 0.8353,
	"step": 93
	},
	{
	"epoch": 0.028469750889679714,
	"grad_norm": 0.41015625,
	"learning_rate": 0.00019967727136812856,
	"loss": 0.8006,
	"step": 96
	},
	{
	"epoch": 0.029359430604982206,
	"grad_norm": 0.376953125,
	"learning_rate": 0.000199654375902134,
	"loss": 0.8127,
	"step": 99
	},
	{
	"epoch": 0.030249110320284697,
	"grad_norm": 0.37890625,
	"learning_rate": 0.00019963069728979357,
	"loss": 0.8029,
	"step": 102
	},
	{
	"epoch": 0.03113879003558719,
	"grad_norm": 0.400390625,
	"learning_rate": 0.00019960623571718862,
	"loss": 0.8162,
	"step": 105
	},
	{
	"epoch": 0.03202846975088968,
	"grad_norm": 0.36328125,
	"learning_rate": 0.0001995809913765534,
	"loss": 0.7928,
	"step": 108
	},
	{
	"epoch": 0.03291814946619217,
	"grad_norm": 0.39453125,
	"learning_rate": 0.00019955496446627375,
	"loss": 0.8011,
	"step": 111
	},
	{
	"epoch": 0.033807829181494664,
	"grad_norm": 0.36328125,
	"learning_rate": 0.0001995281551908854,
	"loss": 0.7874,
	"step": 114
	},
	{
	"epoch": 0.03469750889679715,
	"grad_norm": 0.37890625,
	"learning_rate": 0.00019950056376107238,
	"loss": 0.827,
	"step": 117
	},
	{
	"epoch": 0.03558718861209965,
	"grad_norm": 0.375,
	"learning_rate": 0.00019947219039366537,
	"loss": 0.7829,
	"step": 120
	},
	{
	"epoch": 0.036476868327402136,
	"grad_norm": 0.36328125,
	"learning_rate": 0.00019944303531164005,
	"loss": 0.8184,
	"step": 123
	},
	{
	"epoch": 0.037366548042704624,
	"grad_norm": 0.388671875,
	"learning_rate": 0.00019941309874411524,
	"loss": 0.8005,
	"step": 126
	},
	{
	"epoch": 0.03825622775800712,
	"grad_norm": 0.357421875,
	"learning_rate": 0.0001993823809263512,
	"loss": 0.8199,
	"step": 129
	},
	{
	"epoch": 0.03914590747330961,
	"grad_norm": 0.35546875,
	"learning_rate": 0.00019935088209974773,
	"loss": 0.8066,
	"step": 132
	},
	{
	"epoch": 0.0400355871886121,
	"grad_norm": 0.34375,
	"learning_rate": 0.0001993186025118423,
	"loss": 0.7823,
	"step": 135
	},
	{
	"epoch": 0.04092526690391459,
	"grad_norm": 0.345703125,
	"learning_rate": 0.00019928554241630802,
	"loss": 0.7799,
	"step": 138
	},
	{
	"epoch": 0.04181494661921708,
	"grad_norm": 0.337890625,
	"learning_rate": 0.0001992517020729519,
	"loss": 0.7996,
	"step": 141
	},
	{
	"epoch": 0.042704626334519574,
	"grad_norm": 0.37890625,
	"learning_rate": 0.00019921708174771237,
	"loss": 0.8068,
	"step": 144
	},
	{
	"epoch": 0.04359430604982206,
	"grad_norm": 0.369140625,
	"learning_rate": 0.00019918168171265764,
	"loss": 0.8197,
	"step": 147
	},
	{
	"epoch": 0.04448398576512456,
	"grad_norm": 0.390625,
	"learning_rate": 0.0001991455022459833,
	"loss": 0.8255,
	"step": 150
	},
	{
	"epoch": 0.045373665480427046,
	"grad_norm": 0.388671875,
	"learning_rate": 0.00019910854363201018,
	"loss": 0.823,
	"step": 153
	},
	{
	"epoch": 0.046263345195729534,
	"grad_norm": 0.396484375,
	"learning_rate": 0.00019907080616118222,
	"loss": 0.7878,
	"step": 156
	},
	{
	"epoch": 0.04715302491103203,
	"grad_norm": 0.40625,
	"learning_rate": 0.00019903229013006394,
	"loss": 0.7548,
	"step": 159
	},
	{
	"epoch": 0.04804270462633452,
	"grad_norm": 0.33984375,
	"learning_rate": 0.00019899299584133845,
	"loss": 0.7965,
	"step": 162
	},
	{
	"epoch": 0.04893238434163701,
	"grad_norm": 0.400390625,
	"learning_rate": 0.0001989529236038048,
	"loss": 0.7848,
	"step": 165
	},
	{
	"epoch": 0.0498220640569395,
	"grad_norm": 0.353515625,
	"learning_rate": 0.0001989120737323757,
	"loss": 0.7979,
	"step": 168
	},
	{
	"epoch": 0.050711743772241996,
	"grad_norm": 0.369140625,
	"learning_rate": 0.00019887044654807488,
	"loss": 0.747,
	"step": 171
	},
	{
	"epoch": 0.051601423487544484,
	"grad_norm": 0.380859375,
	"learning_rate": 0.00019882804237803488,
	"loss": 0.811,
	"step": 174
	},
	{
	"epoch": 0.05249110320284697,
	"grad_norm": 0.33984375,
	"learning_rate": 0.00019878486155549405,
	"loss": 0.7585,
	"step": 177
	},
	{
	"epoch": 0.05338078291814947,
	"grad_norm": 0.373046875,
	"learning_rate": 0.0001987409044197943,
	"loss": 0.7964,
	"step": 180
	},
	{
	"epoch": 0.054270462633451956,
	"grad_norm": 0.365234375,
	"learning_rate": 0.0001986961713163783,
	"loss": 0.7923,
	"step": 183
	},
	{
	"epoch": 0.05516014234875445,
	"grad_norm": 0.349609375,
	"learning_rate": 0.0001986506625967867,
	"loss": 0.7821,
	"step": 186
	},
	{
	"epoch": 0.05604982206405694,
	"grad_norm": 0.349609375,
	"learning_rate": 0.00019860437861865546,
	"loss": 0.75,
	"step": 189
	},
	{
	"epoch": 0.05693950177935943,
	"grad_norm": 0.373046875,
	"learning_rate": 0.00019855731974571298,
	"loss": 0.7976,
	"step": 192
	},
	{
	"epoch": 0.05782918149466192,
	"grad_norm": 0.361328125,
	"learning_rate": 0.0001985094863477773,
	"loss": 0.7894,
	"step": 195
	},
	{
	"epoch": 0.05871886120996441,
	"grad_norm": 0.38671875,
	"learning_rate": 0.00019846087880075314,
	"loss": 0.7952,
	"step": 198
	},
	{
	"epoch": 0.059608540925266906,
	"grad_norm": 0.376953125,
	"learning_rate": 0.00019841149748662894,
	"loss": 0.8058,
	"step": 201
	},
	{
	"epoch": 0.060498220640569395,
	"grad_norm": 0.36328125,
	"learning_rate": 0.000198361342793474,
	"loss": 0.7727,
	"step": 204
	},
	{
	"epoch": 0.06138790035587188,
	"grad_norm": 0.349609375,
	"learning_rate": 0.00019831041511543515,
	"loss": 0.7792,
	"step": 207
	},
	{
	"epoch": 0.06227758007117438,
	"grad_norm": 0.390625,
	"learning_rate": 0.00019825871485273396,
	"loss": 0.8123,
	"step": 210
	},
	{
	"epoch": 0.06316725978647687,
	"grad_norm": 0.400390625,
	"learning_rate": 0.00019820624241166334,
	"loss": 0.7966,
	"step": 213
	},
	{
	"epoch": 0.06405693950177936,
	"grad_norm": 0.369140625,
	"learning_rate": 0.00019815299820458458,
	"loss": 0.7883,
	"step": 216
	},
	{
	"epoch": 0.06494661921708185,
	"grad_norm": 0.353515625,
	"learning_rate": 0.00019809898264992385,
	"loss": 0.7707,
	"step": 219
	},
	{
	"epoch": 0.06583629893238434,
	"grad_norm": 0.353515625,
	"learning_rate": 0.0001980441961721692,
	"loss": 0.788,
	"step": 222
	},
	{
	"epoch": 0.06672597864768683,
	"grad_norm": 0.333984375,
	"learning_rate": 0.00019798863920186696,
	"loss": 0.7759,
	"step": 225
	},
	{
	"epoch": 0.06761565836298933,
	"grad_norm": 0.3359375,
	"learning_rate": 0.0001979323121756185,
	"loss": 0.7782,
	"step": 228
	},
	{
	"epoch": 0.06850533807829182,
	"grad_norm": 0.3671875,
	"learning_rate": 0.0001978752155360768,
	"loss": 0.7417,
	"step": 231
	},
	{
	"epoch": 0.0693950177935943,
	"grad_norm": 0.35546875,
	"learning_rate": 0.00019781734973194293,
	"loss": 0.7678,
	"step": 234
	},
	{
	"epoch": 0.07028469750889679,
	"grad_norm": 0.376953125,
	"learning_rate": 0.00019775871521796252,
	"loss": 0.7867,
	"step": 237
	},
	{
	"epoch": 0.0711743772241993,
	"grad_norm": 0.3984375,
	"learning_rate": 0.00019769931245492222,
	"loss": 0.807,
	"step": 240
	},
	{
	"epoch": 0.07206405693950178,
	"grad_norm": 0.39453125,
	"learning_rate": 0.00019763914190964609,
	"loss": 0.7683,
	"step": 243
	},
	{
	"epoch": 0.07295373665480427,
	"grad_norm": 0.333984375,
	"learning_rate": 0.0001975782040549918,
	"loss": 0.7412,
	"step": 246
	},
	{
	"epoch": 0.07384341637010676,
	"grad_norm": 0.33203125,
	"learning_rate": 0.0001975164993698471,
	"loss": 0.7793,
	"step": 249
	},
	{
	"epoch": 0.07473309608540925,
	"grad_norm": 0.37890625,
	"learning_rate": 0.00019745402833912598,
	"loss": 0.8096,
	"step": 252
	},
	{
	"epoch": 0.07562277580071175,
	"grad_norm": 0.494140625,
	"learning_rate": 0.00019739079145376484,
	"loss": 0.771,
	"step": 255
	},
	{
	"epoch": 0.07651245551601424,
	"grad_norm": 0.47265625,
	"learning_rate": 0.0001973267892107186,
	"loss": 0.776,
	"step": 258
	},
	{
	"epoch": 0.07740213523131673,
	"grad_norm": 0.76171875,
	"learning_rate": 0.00019726202211295686,
	"loss": 0.7794,
	"step": 261
	},
	{
	"epoch": 0.07829181494661921,
	"grad_norm": 0.353515625,
	"learning_rate": 0.00019719649066945996,
	"loss": 0.7751,
	"step": 264
	},
	{
	"epoch": 0.0791814946619217,
	"grad_norm": 0.384765625,
	"learning_rate": 0.000197130195395215,
	"loss": 0.7957,
	"step": 267
	},
	{
	"epoch": 0.0800711743772242,
	"grad_norm": 0.361328125,
	"learning_rate": 0.0001970631368112115,
	"loss": 0.7747,
	"step": 270
	},
	{
	"epoch": 0.0809608540925267,
	"grad_norm": 0.3828125,
	"learning_rate": 0.00019699531544443784,
	"loss": 0.7677,
	"step": 273
	},
	{
	"epoch": 0.08185053380782918,
	"grad_norm": 0.36328125,
	"learning_rate": 0.00019692673182787666,
	"loss": 0.7749,
	"step": 276
	},
	{
	"epoch": 0.08274021352313167,
	"grad_norm": 0.43359375,
	"learning_rate": 0.00019685738650050086,
	"loss": 0.7795,
	"step": 279
	},
	{
	"epoch": 0.08362989323843416,
	"grad_norm": 0.38671875,
	"learning_rate": 0.00019678728000726935,
	"loss": 0.7748,
	"step": 282
	},
	{
	"epoch": 0.08451957295373666,
	"grad_norm": 0.546875,
	"learning_rate": 0.0001967164128991227,
	"loss": 0.7842,
	"step": 285
	},
	{
	"epoch": 0.08540925266903915,
	"grad_norm": 0.39453125,
	"learning_rate": 0.000196644785732979,
	"loss": 0.7675,
	"step": 288
	},
	{
	"epoch": 0.08629893238434164,
	"grad_norm": 0.37890625,
	"learning_rate": 0.00019657239907172925,
	"loss": 0.7593,
	"step": 291
	},
	{
	"epoch": 0.08718861209964412,
	"grad_norm": 0.369140625,
	"learning_rate": 0.00019649925348423305,
	"loss": 0.7741,
	"step": 294
	},
	{
	"epoch": 0.08807829181494661,
	"grad_norm": 0.3515625,
	"learning_rate": 0.0001964253495453141,
	"loss": 0.7842,
	"step": 297
	},
	{
	"epoch": 0.08896797153024912,
	"grad_norm": 0.40625,
	"learning_rate": 0.00019635068783575578,
	"loss": 0.7615,
	"step": 300
	},
	{
	"epoch": 0.0898576512455516,
	"grad_norm": 0.357421875,
	"learning_rate": 0.0001962752689422964,
	"loss": 0.7853,
	"step": 303
	},
	{
	"epoch": 0.09074733096085409,
	"grad_norm": 0.326171875,
	"learning_rate": 0.00019619909345762476,
	"loss": 0.7412,
	"step": 306
	},
	{
	"epoch": 0.09163701067615658,
	"grad_norm": 0.361328125,
	"learning_rate": 0.00019612216198037542,
	"loss": 0.7758,
	"step": 309
	},
	{
	"epoch": 0.09252669039145907,
	"grad_norm": 0.359375,
	"learning_rate": 0.00019604447511512396,
	"loss": 0.7994,
	"step": 312
	},
	{
	"epoch": 0.09341637010676157,
	"grad_norm": 0.357421875,
	"learning_rate": 0.00019596603347238234,
	"loss": 0.746,
	"step": 315
	},
	{
	"epoch": 0.09430604982206406,
	"grad_norm": 0.341796875,
	"learning_rate": 0.00019588683766859398,
	"loss": 0.7644,
	"step": 318
	},
	{
	"epoch": 0.09519572953736655,
	"grad_norm": 0.3515625,
	"learning_rate": 0.000195806888326129,
	"loss": 0.7574,
	"step": 321
	},
	{
	"epoch": 0.09608540925266904,
	"grad_norm": 0.365234375,
	"learning_rate": 0.00019572618607327925,
	"loss": 0.7797,
	"step": 324
	},
	{
	"epoch": 0.09697508896797152,
	"grad_norm": 0.34375,
	"learning_rate": 0.00019564473154425348,
	"loss": 0.7519,
	"step": 327
	},
	{
	"epoch": 0.09786476868327403,
	"grad_norm": 26.375,
	"learning_rate": 0.00019556252537917225,
	"loss": 0.8056,
	"step": 330
	},
	{
	"epoch": 0.09875444839857651,
	"grad_norm": 0.46484375,
	"learning_rate": 0.000195479568224063,
	"loss": 0.7589,
	"step": 333
	},
	{
	"epoch": 0.099644128113879,
	"grad_norm": 0.375,
	"learning_rate": 0.00019539586073085482,
	"loss": 0.7666,
	"step": 336
	},
	{
	"epoch": 0.10053380782918149,
	"grad_norm": 0.36328125,
	"learning_rate": 0.00019531140355737354,
	"loss": 0.7903,
	"step": 339
	},
	{
	"epoch": 0.10142348754448399,
	"grad_norm": 0.388671875,
	"learning_rate": 0.00019522619736733637,
	"loss": 0.7793,
	"step": 342
	},
	{
	"epoch": 0.10231316725978648,
	"grad_norm": 0.34765625,
	"learning_rate": 0.00019514024283034682,
	"loss": 0.7505,
	"step": 345
	},
	{
	"epoch": 0.10320284697508897,
	"grad_norm": 0.388671875,
	"learning_rate": 0.00019505354062188931,
	"loss": 0.7642,
	"step": 348
	},
	{
	"epoch": 0.10409252669039146,
	"grad_norm": 0.35546875,
	"learning_rate": 0.00019496609142332397,
	"loss": 0.7675,
	"step": 351
	},
	{
	"epoch": 0.10498220640569395,
	"grad_norm": 0.361328125,
	"learning_rate": 0.00019487789592188124,
	"loss": 0.7635,
	"step": 354
	},
	{
	"epoch": 0.10587188612099645,
	"grad_norm": 0.365234375,
	"learning_rate": 0.00019478895481065645,
	"loss": 0.7739,
	"step": 357
	},
	{
	"epoch": 0.10676156583629894,
	"grad_norm": 0.33203125,
	"learning_rate": 0.00019469926878860444,
	"loss": 0.7302,
	"step": 360
	},
	{
	"epoch": 0.10765124555160142,
	"grad_norm": 0.359375,
	"learning_rate": 0.000194608838560534,
	"loss": 0.7563,
	"step": 363
	},
	{
	"epoch": 0.10854092526690391,
	"grad_norm": 0.341796875,
	"learning_rate": 0.00019451766483710237,
	"loss": 0.7473,
	"step": 366
	},
	{
	"epoch": 0.1094306049822064,
	"grad_norm": 0.373046875,
	"learning_rate": 0.00019442574833480962,
	"loss": 0.7352,
	"step": 369
	},
	{
	"epoch": 0.1103202846975089,
	"grad_norm": 0.330078125,
	"learning_rate": 0.00019433308977599305,
	"loss": 0.7561,
	"step": 372
	},
	{
	"epoch": 0.11120996441281139,
	"grad_norm": 0.3359375,
	"learning_rate": 0.0001942396898888215,
	"loss": 0.7633,
	"step": 375
	},
	{
	"epoch": 0.11209964412811388,
	"grad_norm": 0.33984375,
	"learning_rate": 0.00019414554940728963,
	"loss": 0.7221,
	"step": 378
	},
	{
	"epoch": 0.11298932384341637,
	"grad_norm": 0.32421875,
	"learning_rate": 0.0001940506690712122,
	"loss": 0.7552,
	"step": 381
	},
	{
	"epoch": 0.11387900355871886,
	"grad_norm": 0.35546875,
	"learning_rate": 0.0001939550496262181,
	"loss": 0.7563,
	"step": 384
	},
	{
	"epoch": 0.11476868327402136,
	"grad_norm": 0.3125,
	"learning_rate": 0.00019385869182374474,
	"loss": 0.7643,
	"step": 387
	},
	{
	"epoch": 0.11565836298932385,
	"grad_norm": 0.31640625,
	"learning_rate": 0.0001937615964210319,
	"loss": 0.7562,
	"step": 390
	},
	{
	"epoch": 0.11654804270462633,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00019366376418111588,
	"loss": 0.7425,
	"step": 393
	},
	{
	"epoch": 0.11743772241992882,
	"grad_norm": 0.357421875,
	"learning_rate": 0.0001935651958728236,
	"loss": 0.7514,
	"step": 396
	},
	{
	"epoch": 0.11832740213523131,
	"grad_norm": 0.328125,
	"learning_rate": 0.0001934658922707664,
	"loss": 0.7466,
	"step": 399
	},
	{
	"epoch": 0.11921708185053381,
	"grad_norm": 0.341796875,
	"learning_rate": 0.00019336585415533398,
	"loss": 0.7557,
	"step": 402
	},
	{
	"epoch": 0.1201067615658363,
	"grad_norm": 0.318359375,
	"learning_rate": 0.00019326508231268838,
	"loss": 0.7685,
	"step": 405
	},
	{
	"epoch": 0.12099644128113879,
	"grad_norm": 0.4140625,
	"learning_rate": 0.00019316357753475772,
	"loss": 0.7632,
	"step": 408
	},
	{
	"epoch": 0.12188612099644128,
	"grad_norm": 0.328125,
	"learning_rate": 0.00019306134061922994,
	"loss": 0.7484,
	"step": 411
	},
	{
	"epoch": 0.12277580071174377,
	"grad_norm": 0.40625,
	"learning_rate": 0.0001929583723695466,
	"loss": 0.7623,
	"step": 414
	},
	{
	"epoch": 0.12366548042704627,
	"grad_norm": 0.33984375,
	"learning_rate": 0.00019285467359489653,
	"loss": 0.7662,
	"step": 417
	},
	{
	"epoch": 0.12455516014234876,
	"grad_norm": 0.341796875,
	"learning_rate": 0.0001927502451102095,
	"loss": 0.7346,
	"step": 420
	},
	{
	"epoch": 0.12544483985765126,
	"grad_norm": 0.341796875,
	"learning_rate": 0.00019264508773614972,
	"loss": 0.7275,
	"step": 423
	},
	{
	"epoch": 0.12633451957295375,
	"grad_norm": 0.34375,
	"learning_rate": 0.00019253920229910965,
	"loss": 0.7476,
	"step": 426
	},
	{
	"epoch": 0.12722419928825623,
	"grad_norm": 0.359375,
	"learning_rate": 0.00019243258963120313,
	"loss": 0.7471,
	"step": 429
	},
	{
	"epoch": 0.12811387900355872,
	"grad_norm": 0.37890625,
	"learning_rate": 0.00019232525057025915,
	"loss": 0.7575,
	"step": 432
	},
	{
	"epoch": 0.1290035587188612,
	"grad_norm": 0.36328125,
	"learning_rate": 0.00019221718595981507,
	"loss": 0.7385,
	"step": 435
	},
	{
	"epoch": 0.1298932384341637,
	"grad_norm": 0.3125,
	"learning_rate": 0.00019210839664911012,
	"loss": 0.7562,
	"step": 438
	},
	{
	"epoch": 0.1307829181494662,
	"grad_norm": 0.34375,
	"learning_rate": 0.00019199888349307872,
	"loss": 0.7409,
	"step": 441
	},
	{
	"epoch": 0.13167259786476868,
	"grad_norm": 0.341796875,
	"learning_rate": 0.0001918886473523436,
	"loss": 0.7473,
	"step": 444
	},
	{
	"epoch": 0.13256227758007116,
	"grad_norm": 0.3359375,
	"learning_rate": 0.00019177768909320927,
	"loss": 0.7526,
	"step": 447
	},
	{
	"epoch": 0.13345195729537365,
	"grad_norm": 0.361328125,
	"learning_rate": 0.00019166600958765506,
	"loss": 0.7723,
	"step": 450
	},
	{
	"epoch": 0.13434163701067617,
	"grad_norm": 0.318359375,
	"learning_rate": 0.00019155360971332826,
	"loss": 0.7301,
	"step": 453
	},
	{
	"epoch": 0.13523131672597866,
	"grad_norm": 0.3828125,
	"learning_rate": 0.0001914404903535373,
	"loss": 0.7629,
	"step": 456
	},
	{
	"epoch": 0.13612099644128114,
	"grad_norm": 0.35546875,
	"learning_rate": 0.00019132665239724486,
	"loss": 0.7431,
	"step": 459
	},
	{
	"epoch": 0.13701067615658363,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00019121209673906065,
	"loss": 0.742,
	"step": 462
	},
	{
	"epoch": 0.13790035587188612,
	"grad_norm": 0.33984375,
	"learning_rate": 0.0001910968242792346,
	"loss": 0.7517,
	"step": 465
	},
	{
	"epoch": 0.1387900355871886,
	"grad_norm": 0.359375,
	"learning_rate": 0.00019098083592364974,
	"loss": 0.7371,
	"step": 468
	},
	{
	"epoch": 0.1396797153024911,
	"grad_norm": 0.35546875,
	"learning_rate": 0.00019086413258381506,
	"loss": 0.7498,
	"step": 471
	},
	{
	"epoch": 0.14056939501779359,
	"grad_norm": 0.3359375,
	"learning_rate": 0.00019074671517685827,
	"loss": 0.7289,
	"step": 474
	},
	{
	"epoch": 0.14145907473309607,
	"grad_norm": 0.33203125,
	"learning_rate": 0.00019062858462551874,
	"loss": 0.7158,
	"step": 477
	},
	{
	"epoch": 0.1423487544483986,
	"grad_norm": 0.357421875,
	"learning_rate": 0.0001905097418581401,
	"loss": 0.729,
	"step": 480
	},
	{
	"epoch": 0.14323843416370108,
	"grad_norm": 0.328125,
	"learning_rate": 0.00019039018780866312,
	"loss": 0.7763,
	"step": 483
	},
	{
	"epoch": 0.14412811387900357,
	"grad_norm": 0.34375,
	"learning_rate": 0.00019026992341661817,
	"loss": 0.7581,
	"step": 486
	},
	{
	"epoch": 0.14501779359430605,
	"grad_norm": 0.337890625,
	"learning_rate": 0.00019014894962711805,
	"loss": 0.7664,
	"step": 489
	},
	{
	"epoch": 0.14590747330960854,
	"grad_norm": 1.1328125,
	"learning_rate": 0.00019002726739085027,
	"loss": 0.7466,
	"step": 492
	},
	{
	"epoch": 0.14679715302491103,
	"grad_norm": 0.33984375,
	"learning_rate": 0.00018990487766406994,
	"loss": 0.7463,
	"step": 495
	},
	{
	"epoch": 0.14768683274021352,
	"grad_norm": 0.408203125,
	"learning_rate": 0.000189781781408592,
	"loss": 0.7512,
	"step": 498
	},
	{
	"epoch": 0.148576512455516,
	"grad_norm": 0.3359375,
	"learning_rate": 0.0001896579795917837,
	"loss": 0.7208,
	"step": 501
	},
	{
	"epoch": 0.1494661921708185,
	"grad_norm": 0.333984375,
	"learning_rate": 0.00018953347318655705,
	"loss": 0.7514,
	"step": 504
	},
	{
	"epoch": 0.15035587188612098,
	"grad_norm": 0.35546875,
	"learning_rate": 0.0001894082631713612,
	"loss": 0.7525,
	"step": 507
	},
	{
	"epoch": 0.1512455516014235,
	"grad_norm": 0.328125,
	"learning_rate": 0.00018928235053017472,
	"loss": 0.7335,
	"step": 510
	},
	{
	"epoch": 0.152135231316726,
	"grad_norm": 0.3203125,
	"learning_rate": 0.0001891557362524977,
	"loss": 0.7331,
	"step": 513
	},
	{
	"epoch": 0.15302491103202848,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00018902842133334432,
	"loss": 0.7437,
	"step": 516
	},
	{
	"epoch": 0.15391459074733096,
	"grad_norm": 0.33203125,
	"learning_rate": 0.00018890040677323474,
	"loss": 0.7029,
	"step": 519
	},
	{
	"epoch": 0.15480427046263345,
	"grad_norm": 0.34765625,
	"learning_rate": 0.0001887716935781873,
	"loss": 0.7364,
	"step": 522
	},
	{
	"epoch": 0.15569395017793594,
	"grad_norm": 0.330078125,
	"learning_rate": 0.00018864228275971076,
	"loss": 0.7084,
	"step": 525
	},
	{
	"epoch": 0.15658362989323843,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00018851217533479616,
	"loss": 0.7301,
	"step": 528
	},
	{
	"epoch": 0.15747330960854092,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00018838137232590895,
	"loss": 0.7352,
	"step": 531
	},
	{
	"epoch": 0.1583629893238434,
	"grad_norm": 0.3125,
	"learning_rate": 0.00018824987476098092,
	"loss": 0.7245,
	"step": 534
	},
	{
	"epoch": 0.1592526690391459,
	"grad_norm": 0.328125,
	"learning_rate": 0.00018811768367340198,
	"loss": 0.7509,
	"step": 537
	},
	{
	"epoch": 0.1601423487544484,
	"grad_norm": 0.310546875,
	"learning_rate": 0.0001879848001020124,
	"loss": 0.732,
	"step": 540
	},
	{
	"epoch": 0.1610320284697509,
	"grad_norm": 0.333984375,
	"learning_rate": 0.00018785122509109426,
	"loss": 0.7194,
	"step": 543
	},
	{
	"epoch": 0.1619217081850534,
	"grad_norm": 0.359375,
	"learning_rate": 0.00018771695969036344,
	"loss": 0.7389,
	"step": 546
	},
	{
	"epoch": 0.16281138790035588,
	"grad_norm": 0.328125,
	"learning_rate": 0.00018758200495496132,
	"loss": 0.7155,
	"step": 549
	},
	{
	"epoch": 0.16370106761565836,
	"grad_norm": 0.30078125,
	"learning_rate": 0.0001874463619454466,
	"loss": 0.7561,
	"step": 552
	},
	{
	"epoch": 0.16459074733096085,
	"grad_norm": 0.306640625,
	"learning_rate": 0.0001873100317277867,
	"loss": 0.7208,
	"step": 555
	},
	{
	"epoch": 0.16548042704626334,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00018717301537334973,
	"loss": 0.682,
	"step": 558
	},
	{
	"epoch": 0.16637010676156583,
	"grad_norm": 0.33203125,
	"learning_rate": 0.00018703531395889575,
	"loss": 0.7293,
	"step": 561
	},
	{
	"epoch": 0.16725978647686832,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00018689692856656853,
	"loss": 0.7452,
	"step": 564
	},
	{
	"epoch": 0.16814946619217083,
	"grad_norm": 0.328125,
	"learning_rate": 0.00018675786028388692,
	"loss": 0.73,
	"step": 567
	},
	{
	"epoch": 0.16903914590747332,
	"grad_norm": 0.306640625,
	"learning_rate": 0.0001866181102037364,
	"loss": 0.728,
	"step": 570
	},
	{
	"epoch": 0.1699288256227758,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00018647767942436038,
	"loss": 0.7141,
	"step": 573
	},
	{
	"epoch": 0.1708185053380783,
	"grad_norm": 0.296875,
	"learning_rate": 0.0001863365690493517,
	"loss": 0.728,
	"step": 576
	},
	{
	"epoch": 0.17170818505338079,
	"grad_norm": 0.318359375,
	"learning_rate": 0.00018619478018764378,
	"loss": 0.7351,
	"step": 579
	},
	{
	"epoch": 0.17259786476868327,
	"grad_norm": 0.310546875,
	"learning_rate": 0.00018605231395350214,
	"loss": 0.7291,
	"step": 582
	},
	{
	"epoch": 0.17348754448398576,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00018590917146651544,
	"loss": 0.7626,
	"step": 585
	},
	{
	"epoch": 0.17437722419928825,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00018576535385158674,
	"loss": 0.7216,
	"step": 588
	},
	{
	"epoch": 0.17526690391459074,
	"grad_norm": 0.326171875,
	"learning_rate": 0.00018562086223892474,
	"loss": 0.7437,
	"step": 591
	},
	{
	"epoch": 0.17615658362989323,
	"grad_norm": 0.349609375,
	"learning_rate": 0.0001854756977640348,
	"loss": 0.7354,
	"step": 594
	},
	{
	"epoch": 0.17704626334519574,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00018532986156771008,
	"loss": 0.7039,
	"step": 597
	},
	{
	"epoch": 0.17793594306049823,
	"grad_norm": 0.3515625,
	"learning_rate": 0.00018518335479602248,
	"loss": 0.7506,
	"step": 600
	},
	{
	"epoch": 0.17882562277580072,
	"grad_norm": 0.328125,
	"learning_rate": 0.00018503617860031376,
	"loss": 0.748,
	"step": 603
	},
	{
	"epoch": 0.1797153024911032,
	"grad_norm": 0.322265625,
	"learning_rate": 0.00018488833413718645,
	"loss": 0.7402,
	"step": 606
	},
	{
	"epoch": 0.1806049822064057,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00018473982256849466,
	"loss": 0.7068,
	"step": 609
	},
	{
	"epoch": 0.18149466192170818,
	"grad_norm": 0.3203125,
	"learning_rate": 0.0001845906450613351,
	"loss": 0.7265,
	"step": 612
	},
	{
	"epoch": 0.18238434163701067,
	"grad_norm": 0.333984375,
	"learning_rate": 0.0001844408027880378,
	"loss": 0.7285,
	"step": 615
	},
	{
	"epoch": 0.18327402135231316,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00018429029692615701,
	"loss": 0.7226,
	"step": 618
	},
	{
	"epoch": 0.18416370106761565,
	"grad_norm": 0.306640625,
	"learning_rate": 0.00018413912865846178,
	"loss": 0.7204,
	"step": 621
	},
	{
	"epoch": 0.18505338078291814,
	"grad_norm": 0.3359375,
	"learning_rate": 0.00018398729917292684,
	"loss": 0.724,
	"step": 624
	},
	{
	"epoch": 0.18594306049822065,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00018383480966272306,
	"loss": 0.7032,
	"step": 627
	},
	{
	"epoch": 0.18683274021352314,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00018368166132620836,
	"loss": 0.7126,
	"step": 630
	},
	{
	"epoch": 0.18772241992882563,
	"grad_norm": 0.328125,
	"learning_rate": 0.00018352785536691804,
	"loss": 0.7188,
	"step": 633
	},
	{
	"epoch": 0.18861209964412812,
	"grad_norm": 0.427734375,
	"learning_rate": 0.00018337339299355542,
	"loss": 0.7207,
	"step": 636
	},
	{
	"epoch": 0.1895017793594306,
	"grad_norm": 0.318359375,
	"learning_rate": 0.00018321827541998228,
	"loss": 0.7095,
	"step": 639
	},
	{
	"epoch": 0.1903914590747331,
	"grad_norm": 0.283203125,
	"learning_rate": 0.0001830625038652095,
	"loss": 0.733,
	"step": 642
	},
	{
	"epoch": 0.19128113879003558,
	"grad_norm": 0.29296875,
	"learning_rate": 0.0001829060795533872,
	"loss": 0.7192,
	"step": 645
	},
	{
	"epoch": 0.19217081850533807,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00018274900371379542,
	"loss": 0.7178,
	"step": 648
	},
	{
	"epoch": 0.19306049822064056,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00018259127758083417,
	"loss": 0.6983,
	"step": 651
	},
	{
	"epoch": 0.19395017793594305,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00018243290239401404,
	"loss": 0.7008,
	"step": 654
	},
	{
	"epoch": 0.19483985765124556,
	"grad_norm": 0.291015625,
	"learning_rate": 0.0001822738793979461,
	"loss": 0.6886,
	"step": 657
	},
	{
	"epoch": 0.19572953736654805,
	"grad_norm": 0.287109375,
	"learning_rate": 0.0001821142098423325,
	"loss": 0.6909,
	"step": 660
	},
	{
	"epoch": 0.19661921708185054,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00018195389498195627,
	"loss": 0.7114,
	"step": 663
	},
	{
	"epoch": 0.19750889679715303,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00018179293607667178,
	"loss": 0.7185,
	"step": 666
	},
	{
	"epoch": 0.19839857651245552,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00018163133439139467,
	"loss": 0.7284,
	"step": 669
	},
	{
	"epoch": 0.199288256227758,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00018146909119609196,
	"loss": 0.702,
	"step": 672
	},
	{
	"epoch": 0.2001779359430605,
	"grad_norm": 0.314453125,
	"learning_rate": 0.00018130620776577198,
	"loss": 0.7232,
	"step": 675
	},
	{
	"epoch": 0.20106761565836298,
	"grad_norm": 0.322265625,
	"learning_rate": 0.00018114268538047456,
	"loss": 0.7201,
	"step": 678
	},
	{
	"epoch": 0.20195729537366547,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00018097852532526074,
	"loss": 0.704,
	"step": 681
	},
	{
	"epoch": 0.20284697508896798,
	"grad_norm": 0.3203125,
	"learning_rate": 0.0001808137288902028,
	"loss": 0.7134,
	"step": 684
	},
	{
	"epoch": 0.20373665480427047,
	"grad_norm": 0.3203125,
	"learning_rate": 0.0001806482973703741,
	"loss": 0.7004,
	"step": 687
	},
	{
	"epoch": 0.20462633451957296,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00018048223206583878,
	"loss": 0.7124,
	"step": 690
	},
	{
	"epoch": 0.20551601423487545,
	"grad_norm": 0.33203125,
	"learning_rate": 0.00018031553428164186,
	"loss": 0.7271,
	"step": 693
	},
	{
	"epoch": 0.20640569395017794,
	"grad_norm": 0.30859375,
	"learning_rate": 0.0001801482053277987,
	"loss": 0.6942,
	"step": 696
	},
	{
	"epoch": 0.20729537366548043,
	"grad_norm": 0.322265625,
	"learning_rate": 0.00017998024651928464,
	"loss": 0.7274,
	"step": 699
	},
	{
	"epoch": 0.20818505338078291,
	"grad_norm": 0.328125,
	"learning_rate": 0.000179811659176025,
	"loss": 0.7046,
	"step": 702
	},
	{
	"epoch": 0.2090747330960854,
	"grad_norm": 0.296875,
	"learning_rate": 0.00017964244462288448,
	"loss": 0.6981,
	"step": 705
	},
	{
	"epoch": 0.2099644128113879,
	"grad_norm": 0.30859375,
	"learning_rate": 0.0001794726041896567,
	"loss": 0.7034,
	"step": 708
	},
	{
	"epoch": 0.21085409252669038,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00017930213921105392,
	"loss": 0.7157,
	"step": 711
	},
	{
	"epoch": 0.2117437722419929,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00017913105102669642,
	"loss": 0.7015,
	"step": 714
	},
	{
	"epoch": 0.21263345195729538,
	"grad_norm": 0.333984375,
	"learning_rate": 0.00017895934098110207,
	"loss": 0.714,
	"step": 717
	},
	{
	"epoch": 0.21352313167259787,
	"grad_norm": 0.31640625,
	"learning_rate": 0.0001787870104236757,
	"loss": 0.7185,
	"step": 720
	},
	{
	"epoch": 0.21441281138790036,
	"grad_norm": 0.328125,
	"learning_rate": 0.00017861406070869844,
	"loss": 0.6969,
	"step": 723
	},
	{
	"epoch": 0.21530249110320285,
	"grad_norm": 0.296875,
	"learning_rate": 0.00017844049319531725,
	"loss": 0.7188,
	"step": 726
	},
	{
	"epoch": 0.21619217081850534,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00017826630924753408,
	"loss": 0.7109,
	"step": 729
	},
	{
	"epoch": 0.21708185053380782,
	"grad_norm": 0.32421875,
	"learning_rate": 0.00017809151023419516,
	"loss": 0.715,
	"step": 732
	},
	{
	"epoch": 0.2179715302491103,
	"grad_norm": 0.341796875,
	"learning_rate": 0.0001779160975289804,
	"loss": 0.7103,
	"step": 735
	},
	{
	"epoch": 0.2188612099644128,
	"grad_norm": 0.326171875,
	"learning_rate": 0.00017774007251039245,
	"loss": 0.7083,
	"step": 738
	},
	{
	"epoch": 0.2197508896797153,
	"grad_norm": 0.32421875,
	"learning_rate": 0.00017756343656174584,
	"loss": 0.733,
	"step": 741
	},
	{
	"epoch": 0.2206405693950178,
	"grad_norm": 0.3125,
	"learning_rate": 0.00017738619107115618,
	"loss": 0.7088,
	"step": 744
	},
	{
	"epoch": 0.2215302491103203,
	"grad_norm": 0.322265625,
	"learning_rate": 0.00017720833743152935,
	"loss": 0.715,
	"step": 747
	},
	{
	"epoch": 0.22241992882562278,
	"grad_norm": 0.3125,
	"learning_rate": 0.0001770298770405503,
	"loss": 0.707,
	"step": 750
	},
	{
	"epoch": 0.22330960854092527,
	"grad_norm": 0.330078125,
	"learning_rate": 0.0001768508113006723,
	"loss": 0.6825,
	"step": 753
	},
	{
	"epoch": 0.22419928825622776,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00017667114161910586,
	"loss": 0.72,
	"step": 756
	},
	{
	"epoch": 0.22508896797153025,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00017649086940780748,
	"loss": 0.7141,
	"step": 759
	},
	{
	"epoch": 0.22597864768683273,
	"grad_norm": 0.310546875,
	"learning_rate": 0.00017630999608346886,
	"loss": 0.6972,
	"step": 762
	},
	{
	"epoch": 0.22686832740213522,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00017612852306750566,
	"loss": 0.6851,
	"step": 765
	},
	{
	"epoch": 0.2277580071174377,
	"grad_norm": 0.310546875,
	"learning_rate": 0.00017594645178604611,
	"loss": 0.7086,
	"step": 768
	},
	{
	"epoch": 0.22864768683274023,
	"grad_norm": 0.302734375,
	"learning_rate": 0.0001757637836699202,
	"loss": 0.6971,
	"step": 771
	},
	{
	"epoch": 0.22953736654804271,
	"grad_norm": 0.30078125,
	"learning_rate": 0.000175580520154648,
	"loss": 0.7095,
	"step": 774
	},
	{
	"epoch": 0.2304270462633452,
	"grad_norm": 0.28515625,
	"learning_rate": 0.0001753966626804288,
	"loss": 0.6622,
	"step": 777
	},
	{
	"epoch": 0.2313167259786477,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00017521221269212943,
	"loss": 0.7095,
	"step": 780
	},
	{
	"epoch": 0.23220640569395018,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00017502717163927315,
	"loss": 0.7202,
	"step": 783
	},
	{
	"epoch": 0.23309608540925267,
	"grad_norm": 0.345703125,
	"learning_rate": 0.0001748415409760282,
	"loss": 0.7216,
	"step": 786
	},
	{
	"epoch": 0.23398576512455516,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00017465532216119625,
	"loss": 0.6692,
	"step": 789
	},
	{
	"epoch": 0.23487544483985764,
	"grad_norm": 0.3359375,
	"learning_rate": 0.00017446851665820116,
	"loss": 0.6887,
	"step": 792
	},
	{
	"epoch": 0.23576512455516013,
	"grad_norm": 0.330078125,
	"learning_rate": 0.00017428112593507723,
	"loss": 0.7244,
	"step": 795
	},
	{
	"epoch": 0.23665480427046262,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00017409315146445784,
	"loss": 0.7044,
	"step": 798
	},
	{
	"epoch": 0.23754448398576514,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00017390459472356383,
	"loss": 0.7155,
	"step": 801
	},
	{
	"epoch": 0.23843416370106763,
	"grad_norm": 0.314453125,
	"learning_rate": 0.00017371545719419186,
	"loss": 0.698,
	"step": 804
	},
	{
	"epoch": 0.2393238434163701,
	"grad_norm": 0.34375,
	"learning_rate": 0.00017352574036270282,
	"loss": 0.6877,
	"step": 807
	},
	{
	"epoch": 0.2402135231316726,
	"grad_norm": 0.310546875,
	"learning_rate": 0.00017333544572001007,
	"loss": 0.7019,
	"step": 810
	},
	{
	"epoch": 0.2411032028469751,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00017314457476156782,
	"loss": 0.711,
	"step": 813
	},
	{
	"epoch": 0.24199288256227758,
	"grad_norm": 0.33203125,
	"learning_rate": 0.00017295312898735934,
	"loss": 0.7147,
	"step": 816
	},
	{
	"epoch": 0.24288256227758007,
	"grad_norm": 0.380859375,
	"learning_rate": 0.00017276110990188507,
	"loss": 0.6981,
	"step": 819
	},
	{
	"epoch": 0.24377224199288255,
	"grad_norm": 0.287109375,
	"learning_rate": 0.000172568519014151,
	"loss": 0.6907,
	"step": 822
	},
	{
	"epoch": 0.24466192170818504,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00017237535783765662,
	"loss": 0.7123,
	"step": 825
	},
	{
	"epoch": 0.24555160142348753,
	"grad_norm": 0.306640625,
	"learning_rate": 0.00017218162789038312,
	"loss": 0.7002,
	"step": 828
	},
	{
	"epoch": 0.24644128113879005,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00017198733069478153,
	"loss": 0.6812,
	"step": 831
	},
	{
	"epoch": 0.24733096085409254,
	"grad_norm": 0.330078125,
	"learning_rate": 0.00017179246777776052,
	"loss": 0.697,
	"step": 834
	},
	{
	"epoch": 0.24822064056939502,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00017159704067067468,
	"loss": 0.6925,
	"step": 837
	},
	{
	"epoch": 0.2491103202846975,
	"grad_norm": 0.291015625,
	"learning_rate": 0.0001714010509093123,
	"loss": 0.7084,
	"step": 840
	},
	{
	"epoch": 0.25,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00017120450003388338,
	"loss": 0.6717,
	"step": 843
	},
	{
	"epoch": 0.2508896797153025,
	"grad_norm": 0.298828125,
	"learning_rate": 0.0001710073895890075,
	"loss": 0.7053,
	"step": 846
	},
	{
	"epoch": 0.251779359430605,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00017080972112370167,
	"loss": 0.6479,
	"step": 849
	},
	{
	"epoch": 0.2526690391459075,
	"grad_norm": 0.365234375,
	"learning_rate": 0.00017061149619136817,
	"loss": 0.6837,
	"step": 852
	},
	{
	"epoch": 0.25355871886120995,
	"grad_norm": 0.341796875,
	"learning_rate": 0.0001704127163497824,
	"loss": 0.7102,
	"step": 855
	},
	{
	"epoch": 0.25444839857651247,
	"grad_norm": 0.3203125,
	"learning_rate": 0.0001702133831610805,
	"loss": 0.7029,
	"step": 858
	},
	{
	"epoch": 0.25533807829181493,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00017001349819174727,
	"loss": 0.6871,
	"step": 861
	},
	{
	"epoch": 0.25622775800711745,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00016981306301260357,
	"loss": 0.6933,
	"step": 864
	},
	{
	"epoch": 0.2571174377224199,
	"grad_norm": 0.3046875,
	"learning_rate": 0.0001696120791987944,
	"loss": 0.7173,
	"step": 867
	},
	{
	"epoch": 0.2580071174377224,
	"grad_norm": 0.29296875,
	"learning_rate": 0.000169410548329776,
	"loss": 0.6973,
	"step": 870
	},
	{
	"epoch": 0.25889679715302494,
	"grad_norm": 0.298828125,
	"learning_rate": 0.0001692084719893039,
	"loss": 0.6909,
	"step": 873
	},
	{
	"epoch": 0.2597864768683274,
	"grad_norm": 0.28125,
	"learning_rate": 0.0001690058517654203,
	"loss": 0.7051,
	"step": 876
	},
	{
	"epoch": 0.2606761565836299,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00016880268925044143,
	"loss": 0.6671,
	"step": 879
	},
	{
	"epoch": 0.2615658362989324,
	"grad_norm": 0.328125,
	"learning_rate": 0.0001685989860409453,
	"loss": 0.6904,
	"step": 882
	},
	{
	"epoch": 0.2624555160142349,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00016839474373775892,
	"loss": 0.7076,
	"step": 885
	},
	{
	"epoch": 0.26334519572953735,
	"grad_norm": 0.314453125,
	"learning_rate": 0.00016818996394594603,
	"loss": 0.7155,
	"step": 888
	},
	{
	"epoch": 0.26423487544483987,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00016798464827479404,
	"loss": 0.6727,
	"step": 891
	},
	{
	"epoch": 0.26512455516014233,
	"grad_norm": 0.28515625,
	"learning_rate": 0.0001677787983378019,
	"loss": 0.7061,
	"step": 894
	},
	{
	"epoch": 0.26601423487544484,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00016757241575266694,
	"loss": 0.6945,
	"step": 897
	},
	{
	"epoch": 0.2669039145907473,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00016736550214127246,
	"loss": 0.6812,
	"step": 900
	},
	{
	"epoch": 0.2677935943060498,
	"grad_norm": 0.326171875,
	"learning_rate": 0.0001671580591296749,
	"loss": 0.7009,
	"step": 903
	},
	{
	"epoch": 0.26868327402135234,
	"grad_norm": 0.34375,
	"learning_rate": 0.00016695008834809107,
	"loss": 0.6928,
	"step": 906
	},
	{
	"epoch": 0.2695729537366548,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00016674159143088526,
	"loss": 0.7009,
	"step": 909
	},
	{
	"epoch": 0.2704626334519573,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00016653257001655652,
	"loss": 0.6824,
	"step": 912
	},
	{
	"epoch": 0.2713523131672598,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00016632302574772577,
	"loss": 0.7052,
	"step": 915
	},
	{
	"epoch": 0.2722419928825623,
	"grad_norm": 0.2890625,
	"learning_rate": 0.0001661129602711227,
	"loss": 0.6904,
	"step": 918
	},
	{
	"epoch": 0.27313167259786475,
	"grad_norm": 0.333984375,
	"learning_rate": 0.0001659023752375731,
	"loss": 0.6945,
	"step": 921
	},
	{
	"epoch": 0.27402135231316727,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00016569127230198577,
	"loss": 0.6761,
	"step": 924
	},
	{
	"epoch": 0.2749110320284697,
	"grad_norm": 0.27734375,
	"learning_rate": 0.0001654796531233394,
	"loss": 0.7051,
	"step": 927
	},
	{
	"epoch": 0.27580071174377224,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00016526751936466974,
	"loss": 0.6919,
	"step": 930
	},
	{
	"epoch": 0.27669039145907476,
	"grad_norm": 0.279296875,
	"learning_rate": 0.0001650548726930564,
	"loss": 0.7075,
	"step": 933
	},
	{
	"epoch": 0.2775800711743772,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00016484171477960976,
	"loss": 0.6621,
	"step": 936
	},
	{
	"epoch": 0.27846975088967973,
	"grad_norm": 0.314453125,
	"learning_rate": 0.0001646280472994579,
	"loss": 0.6956,
	"step": 939
	},
	{
	"epoch": 0.2793594306049822,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00016441387193173336,
	"loss": 0.6791,
	"step": 942
	},
	{
	"epoch": 0.2802491103202847,
	"grad_norm": 0.306640625,
	"learning_rate": 0.00016419919035956,
	"loss": 0.6891,
	"step": 945
	},
	{
	"epoch": 0.28113879003558717,
	"grad_norm": 0.314453125,
	"learning_rate": 0.0001639840042700397,
	"loss": 0.6948,
	"step": 948
	},
	{
	"epoch": 0.2820284697508897,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00016376831535423923,
	"loss": 0.6941,
	"step": 951
	},
	{
	"epoch": 0.28291814946619215,
	"grad_norm": 0.32421875,
	"learning_rate": 0.00016355212530717682,
	"loss": 0.6737,
	"step": 954
	},
	{
	"epoch": 0.28380782918149466,
	"grad_norm": 0.328125,
	"learning_rate": 0.00016333543582780898,
	"loss": 0.6966,
	"step": 957
	},
	{
	"epoch": 0.2846975088967972,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00016311824861901694,
	"loss": 0.6938,
	"step": 960
	},
	{
	"epoch": 0.28558718861209964,
	"grad_norm": 0.306640625,
	"learning_rate": 0.00016290056538759352,
	"loss": 0.6963,
	"step": 963
	},
	{
	"epoch": 0.28647686832740216,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00016268238784422954,
	"loss": 0.6931,
	"step": 966
	},
	{
	"epoch": 0.2873665480427046,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00016246371770350045,
	"loss": 0.6698,
	"step": 969
	},
	{
	"epoch": 0.28825622775800713,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00016224455668385282,
	"loss": 0.6909,
	"step": 972
	},
	{
	"epoch": 0.2891459074733096,
	"grad_norm": 0.30859375,
	"learning_rate": 0.0001620249065075909,
	"loss": 0.7073,
	"step": 975
	},
	{
	"epoch": 0.2900355871886121,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00016180476890086297,
	"loss": 0.6631,
	"step": 978
	},
	{
	"epoch": 0.29092526690391457,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00016158414559364789,
	"loss": 0.6814,
	"step": 981
	},
	{
	"epoch": 0.2918149466192171,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00016136303831974146,
	"loss": 0.6889,
	"step": 984
	},
	{
	"epoch": 0.29270462633451955,
	"grad_norm": 0.298828125,
	"learning_rate": 0.0001611414488167427,
	"loss": 0.6689,
	"step": 987
	},
	{
	"epoch": 0.29359430604982206,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00016091937882604048,
	"loss": 0.6845,
	"step": 990
	},
	{
	"epoch": 0.2944839857651246,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00016069683009279942,
	"loss": 0.6819,
	"step": 993
	},
	{
	"epoch": 0.29537366548042704,
	"grad_norm": 0.302734375,
	"learning_rate": 0.0001604738043659466,
	"loss": 0.6779,
	"step": 996
	},
	{
	"epoch": 0.29626334519572955,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00016025030339815745,
	"loss": 0.6603,
	"step": 999
	},
	{
	"epoch": 0.297153024911032,
	"grad_norm": 0.41796875,
	"learning_rate": 0.00016002632894584226,
	"loss": 0.6968,
	"step": 1002
	},
	{
	"epoch": 0.29804270462633453,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00015980188276913215,
	"loss": 0.6903,
	"step": 1005
	},
	{
	"epoch": 0.298932384341637,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00015957696663186546,
	"loss": 0.6829,
	"step": 1008
	},
	{
	"epoch": 0.2998220640569395,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00015935158230157367,
	"loss": 0.67,
	"step": 1011
	},
	{
	"epoch": 0.30071174377224197,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00015912573154946768,
	"loss": 0.7109,
	"step": 1014
	},
	{
	"epoch": 0.3016014234875445,
	"grad_norm": 0.28515625,
	"learning_rate": 0.0001588994161504238,
	"loss": 0.6877,
	"step": 1017
	},
	{
	"epoch": 0.302491103202847,
	"grad_norm": 0.33203125,
	"learning_rate": 0.00015867263788296984,
	"loss": 0.6896,
	"step": 1020
	},
	{
	"epoch": 0.30338078291814946,
	"grad_norm": 0.31640625,
	"learning_rate": 0.00015844539852927109,
	"loss": 0.7024,
	"step": 1023
	},
	{
	"epoch": 0.304270462633452,
	"grad_norm": 0.306640625,
	"learning_rate": 0.00015821769987511635,
	"loss": 0.6913,
	"step": 1026
	},
	{
	"epoch": 0.30516014234875444,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00015798954370990393,
	"loss": 0.6893,
	"step": 1029
	},
	{
	"epoch": 0.30604982206405695,
	"grad_norm": 0.296875,
	"learning_rate": 0.0001577609318266275,
	"loss": 0.6613,
	"step": 1032
	},
	{
	"epoch": 0.3069395017793594,
	"grad_norm": 0.296875,
	"learning_rate": 0.00015753186602186209,
	"loss": 0.6827,
	"step": 1035
	},
	{
	"epoch": 0.30782918149466193,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00015730234809574985,
	"loss": 0.6789,
	"step": 1038
	},
	{
	"epoch": 0.3087188612099644,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00015707237985198612,
	"loss": 0.6798,
	"step": 1041
	},
	{
	"epoch": 0.3096085409252669,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00015684196309780494,
	"loss": 0.6736,
	"step": 1044
	},
	{
	"epoch": 0.3104982206405694,
	"grad_norm": 0.3125,
	"learning_rate": 0.0001566110996439652,
	"loss": 0.6915,
	"step": 1047
	},
	{
	"epoch": 0.3113879003558719,
	"grad_norm": 0.30078125,
	"learning_rate": 0.0001563797913047361,
	"loss": 0.6674,
	"step": 1050
	},
	{
	"epoch": 0.3122775800711744,
	"grad_norm": 0.314453125,
	"learning_rate": 0.00015614803989788314,
	"loss": 0.6783,
	"step": 1053
	},
	{
	"epoch": 0.31316725978647686,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00015591584724465363,
	"loss": 0.6735,
	"step": 1056
	},
	{
	"epoch": 0.3140569395017794,
	"grad_norm": 0.3203125,
	"learning_rate": 0.00015568321516976248,
	"loss": 0.6884,
	"step": 1059
	},
	{
	"epoch": 0.31494661921708184,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00015545014550137786,
	"loss": 0.6765,
	"step": 1062
	},
	{
	"epoch": 0.31583629893238435,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00015521664007110691,
	"loss": 0.6778,
	"step": 1065
	},
	{
	"epoch": 0.3167259786476868,
	"grad_norm": 0.310546875,
	"learning_rate": 0.00015498270071398116,
	"loss": 0.6766,
	"step": 1068
	},
	{
	"epoch": 0.31761565836298933,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00015474832926844223,
	"loss": 0.6813,
	"step": 1071
	},
	{
	"epoch": 0.3185053380782918,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00015451352757632733,
	"loss": 0.6756,
	"step": 1074
	},
	{
	"epoch": 0.3193950177935943,
	"grad_norm": 0.294921875,
	"learning_rate": 0.0001542782974828549,
	"loss": 0.6401,
	"step": 1077
	},
	{
	"epoch": 0.3202846975088968,
	"grad_norm": 0.306640625,
	"learning_rate": 0.00015404264083660992,
	"loss": 0.6928,
	"step": 1080
	},
	{
	"epoch": 0.3211743772241993,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00015380655948952961,
	"loss": 0.6618,
	"step": 1083
	},
	{
	"epoch": 0.3220640569395018,
	"grad_norm": 0.296875,
	"learning_rate": 0.00015357005529688866,
	"loss": 0.7026,
	"step": 1086
	},
	{
	"epoch": 0.32295373665480426,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00015333313011728478,
	"loss": 0.6659,
	"step": 1089
	},
	{
	"epoch": 0.3238434163701068,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00015309578581262402,
	"loss": 0.6674,
	"step": 1092
	},
	{
	"epoch": 0.32473309608540923,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00015285802424810626,
	"loss": 0.6893,
	"step": 1095
	},
	{
	"epoch": 0.32562277580071175,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00015261984729221038,
	"loss": 0.6829,
	"step": 1098
	},
	{
	"epoch": 0.3265124555160142,
	"grad_norm": 0.3125,
	"learning_rate": 0.00015238125681667973,
	"loss": 0.6579,
	"step": 1101
	},
	{
	"epoch": 0.3274021352313167,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00015214225469650726,
	"loss": 0.6546,
	"step": 1104
	},
	{
	"epoch": 0.32829181494661924,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00015190284280992107,
	"loss": 0.6665,
	"step": 1107
	},
	{
	"epoch": 0.3291814946619217,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00015166302303836927,
	"loss": 0.6877,
	"step": 1110
	},
	{
	"epoch": 0.3300711743772242,
	"grad_norm": 0.296875,
	"learning_rate": 0.00015142279726650543,
	"loss": 0.6823,
	"step": 1113
	},
	{
	"epoch": 0.3309608540925267,
	"grad_norm": 0.306640625,
	"learning_rate": 0.0001511821673821738,
	"loss": 0.6788,
	"step": 1116
	},
	{
	"epoch": 0.3318505338078292,
	"grad_norm": 0.296875,
	"learning_rate": 0.0001509411352763943,
	"loss": 0.6804,
	"step": 1119
	},
	{
	"epoch": 0.33274021352313166,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00015069970284334785,
	"loss": 0.6902,
	"step": 1122
	},
	{
	"epoch": 0.33362989323843417,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00015045787198036132,
	"loss": 0.6713,
	"step": 1125
	},
	{
	"epoch": 0.33451957295373663,
	"grad_norm": 0.2890625,
	"learning_rate": 0.0001502156445878927,
	"loss": 0.6787,
	"step": 1128
	},
	{
	"epoch": 0.33540925266903915,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00014997302256951624,
	"loss": 0.6657,
	"step": 1131
	},
	{
	"epoch": 0.33629893238434166,
	"grad_norm": 0.296875,
	"learning_rate": 0.00014973000783190726,
	"loss": 0.657,
	"step": 1134
	},
	{
	"epoch": 0.3371886120996441,
	"grad_norm": 0.296875,
	"learning_rate": 0.00014948660228482745,
	"loss": 0.6838,
	"step": 1137
	},
	{
	"epoch": 0.33807829181494664,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00014924280784110963,
	"loss": 0.6536,
	"step": 1140
	},
	{
	"epoch": 0.3389679715302491,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00014899862641664288,
	"loss": 0.6654,
	"step": 1143
	},
	{
	"epoch": 0.3398576512455516,
	"grad_norm": 0.30078125,
	"learning_rate": 0.0001487540599303574,
	"loss": 0.6894,
	"step": 1146
	},
	{
	"epoch": 0.3407473309608541,
	"grad_norm": 0.296875,
	"learning_rate": 0.0001485091103042094,
	"loss": 0.6429,
	"step": 1149
	},
	{
	"epoch": 0.3416370106761566,
	"grad_norm": 0.294921875,
	"learning_rate": 0.0001482637794631661,
	"loss": 0.6806,
	"step": 1152
	},
	{
	"epoch": 0.34252669039145905,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00014801806933519048,
	"loss": 0.67,
	"step": 1155
	},
	{
	"epoch": 0.34341637010676157,
	"grad_norm": 0.294921875,
	"learning_rate": 0.0001477719818512263,
	"loss": 0.6683,
	"step": 1158
	},
	{
	"epoch": 0.34430604982206403,
	"grad_norm": 0.326171875,
	"learning_rate": 0.00014752551894518272,
	"loss": 0.6468,
	"step": 1161
	},
	{
	"epoch": 0.34519572953736655,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00014727868255391924,
	"loss": 0.6664,
	"step": 1164
	},
	{
	"epoch": 0.34608540925266906,
	"grad_norm": 0.314453125,
	"learning_rate": 0.00014703147461723041,
	"loss": 0.6905,
	"step": 1167
	},
	{
	"epoch": 0.3469750889679715,
	"grad_norm": 0.296875,
	"learning_rate": 0.00014678389707783071,
	"loss": 0.6777,
	"step": 1170
	},
	{
	"epoch": 0.34786476868327404,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00014653595188133904,
	"loss": 0.656,
	"step": 1173
	},
	{
	"epoch": 0.3487544483985765,
	"grad_norm": 0.294921875,
	"learning_rate": 0.0001462876409762637,
	"loss": 0.6772,
	"step": 1176
	},
	{
	"epoch": 0.349644128113879,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00014603896631398692,
	"loss": 0.667,
	"step": 1179
	},
	{
	"epoch": 0.3505338078291815,
	"grad_norm": 0.296875,
	"learning_rate": 0.00014578992984874955,
	"loss": 0.6635,
	"step": 1182
	},
	{
	"epoch": 0.351423487544484,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00014554053353763575,
	"loss": 0.6726,
	"step": 1185
	},
	{
	"epoch": 0.35231316725978645,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00014529077934055752,
	"loss": 0.67,
	"step": 1188
	},
	{
	"epoch": 0.35320284697508897,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00014504066922023934,
	"loss": 0.6716,
	"step": 1191
	},
	{
	"epoch": 0.3540925266903915,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00014479020514220284,
	"loss": 0.6786,
	"step": 1194
	},
	{
	"epoch": 0.35498220640569395,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00014453938907475124,
	"loss": 0.6636,
	"step": 1197
	},
	{
	"epoch": 0.35587188612099646,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00014428822298895387,
	"loss": 0.6316,
	"step": 1200
	},
	{
	"epoch": 0.3567615658362989,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00014403670885863073,
	"loss": 0.6849,
	"step": 1203
	},
	{
	"epoch": 0.35765124555160144,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00014378484866033704,
	"loss": 0.6701,
	"step": 1206
	},
	{
	"epoch": 0.3585409252669039,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00014353264437334758,
	"loss": 0.629,
	"step": 1209
	},
	{
	"epoch": 0.3594306049822064,
	"grad_norm": 0.310546875,
	"learning_rate": 0.00014328009797964113,
	"loss": 0.6799,
	"step": 1212
	},
	{
	"epoch": 0.3603202846975089,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00014302721146388514,
	"loss": 0.6566,
	"step": 1215
	},
	{
	"epoch": 0.3612099644128114,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00014277398681341983,
	"loss": 0.6607,
	"step": 1218
	},
	{
	"epoch": 0.3620996441281139,
	"grad_norm": 0.28125,
	"learning_rate": 0.0001425204260182426,
	"loss": 0.6603,
	"step": 1221
	},
	{
	"epoch": 0.36298932384341637,
	"grad_norm": 0.28125,
	"learning_rate": 0.00014226653107099273,
	"loss": 0.6862,
	"step": 1224
	},
	{
	"epoch": 0.3638790035587189,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00014201230396693526,
	"loss": 0.6771,
	"step": 1227
	},
	{
	"epoch": 0.36476868327402134,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00014175774670394562,
	"loss": 0.6682,
	"step": 1230
	},
	{
	"epoch": 0.36565836298932386,
	"grad_norm": 0.291015625,
	"learning_rate": 0.0001415028612824938,
	"loss": 0.664,
	"step": 1233
	},
	{
	"epoch": 0.3665480427046263,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00014124764970562873,
	"loss": 0.6675,
	"step": 1236
	},
	{
	"epoch": 0.36743772241992884,
	"grad_norm": 0.2890625,
	"learning_rate": 0.0001409921139789624,
	"loss": 0.6531,
	"step": 1239
	},
	{
	"epoch": 0.3683274021352313,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00014073625611065423,
	"loss": 0.6503,
	"step": 1242
	},
	{
	"epoch": 0.3692170818505338,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00014048007811139513,
	"loss": 0.6663,
	"step": 1245
	},
	{
	"epoch": 0.3701067615658363,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00014022358199439192,
	"loss": 0.6692,
	"step": 1248
	},
	{
	"epoch": 0.3709964412811388,
	"grad_norm": 0.283203125,
	"learning_rate": 0.0001399667697753513,
	"loss": 0.636,
	"step": 1251
	},
	{
	"epoch": 0.3718861209964413,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00013970964347246418,
	"loss": 0.6724,
	"step": 1254
	},
	{
	"epoch": 0.37277580071174377,
	"grad_norm": 0.2890625,
	"learning_rate": 0.0001394522051063897,
	"loss": 0.6496,
	"step": 1257
	},
	{
	"epoch": 0.3736654804270463,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00013919445670023932,
	"loss": 0.681,
	"step": 1260
	},
	{
	"epoch": 0.37455516014234874,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00013893640027956106,
	"loss": 0.665,
	"step": 1263
	},
	{
	"epoch": 0.37544483985765126,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00013867803787232348,
	"loss": 0.6562,
	"step": 1266
	},
	{
	"epoch": 0.3763345195729537,
	"grad_norm": 0.287109375,
	"learning_rate": 0.0001384193715088999,
	"loss": 0.676,
	"step": 1269
	},
	{
	"epoch": 0.37722419928825623,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00013816040322205207,
	"loss": 0.6744,
	"step": 1272
	},
	{
	"epoch": 0.3781138790035587,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00013790113504691463,
	"loss": 0.661,
	"step": 1275
	},
	{
	"epoch": 0.3790035587188612,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00013764156902097891,
	"loss": 0.6443,
	"step": 1278
	},
	{
	"epoch": 0.3798932384341637,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00013738170718407687,
	"loss": 0.6613,
	"step": 1281
	},
	{
	"epoch": 0.3807829181494662,
	"grad_norm": 0.279296875,
	"learning_rate": 0.0001371215515783652,
	"loss": 0.6629,
	"step": 1284
	},
	{
	"epoch": 0.3816725978647687,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00013686110424830923,
	"loss": 0.6735,
	"step": 1287
	},
	{
	"epoch": 0.38256227758007116,
	"grad_norm": 0.294921875,
	"learning_rate": 0.00013660036724066668,
	"loss": 0.6597,
	"step": 1290
	},
	{
	"epoch": 0.3834519572953737,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00013633934260447192,
	"loss": 0.6712,
	"step": 1293
	},
	{
	"epoch": 0.38434163701067614,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00013607803239101964,
	"loss": 0.6494,
	"step": 1296
	},
	{
	"epoch": 0.38523131672597866,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00013581643865384873,
	"loss": 0.6474,
	"step": 1299
	},
	{
	"epoch": 0.3861209964412811,
	"grad_norm": 0.26953125,
	"learning_rate": 0.0001355545634487262,
	"loss": 0.6386,
	"step": 1302
	},
	{
	"epoch": 0.38701067615658363,
	"grad_norm": 0.27734375,
	"learning_rate": 0.000135292408833631,
	"loss": 0.6541,
	"step": 1305
	},
	{
	"epoch": 0.3879003558718861,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00013502997686873797,
	"loss": 0.6444,
	"step": 1308
	},
	{
	"epoch": 0.3887900355871886,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00013476726961640133,
	"loss": 0.6295,
	"step": 1311
	},
	{
	"epoch": 0.3896797153024911,
	"grad_norm": 0.30859375,
	"learning_rate": 0.0001345042891411389,
	"loss": 0.6551,
	"step": 1314
	},
	{
	"epoch": 0.3905693950177936,
	"grad_norm": 0.30078125,
	"learning_rate": 0.0001342410375096155,
	"loss": 0.6757,
	"step": 1317
	},
	{
	"epoch": 0.3914590747330961,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00013397751679062692,
	"loss": 0.6516,
	"step": 1320
	},
	{
	"epoch": 0.39234875444839856,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00013371372905508362,
	"loss": 0.6425,
	"step": 1323
	},
	{
	"epoch": 0.3932384341637011,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00013344967637599444,
	"loss": 0.642,
	"step": 1326
	},
	{
	"epoch": 0.39412811387900354,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00013318536082845026,
	"loss": 0.6309,
	"step": 1329
	},
	{
	"epoch": 0.39501779359430605,
	"grad_norm": 0.283203125,
	"learning_rate": 0.0001329207844896078,
	"loss": 0.6477,
	"step": 1332
	},
	{
	"epoch": 0.3959074733096085,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00013265594943867327,
	"loss": 0.6476,
	"step": 1335
	},
	{
	"epoch": 0.39679715302491103,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00013239085775688592,
	"loss": 0.646,
	"step": 1338
	},
	{
	"epoch": 0.39768683274021355,
	"grad_norm": 0.30078125,
	"learning_rate": 0.00013212551152750178,
	"loss": 0.6614,
	"step": 1341
	},
	{
	"epoch": 0.398576512455516,
	"grad_norm": 0.28125,
	"learning_rate": 0.00013185991283577738,
	"loss": 0.6477,
	"step": 1344
	},
	{
	"epoch": 0.3994661921708185,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00013159406376895313,
	"loss": 0.6417,
	"step": 1347
	},
	{
	"epoch": 0.400355871886121,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00013132796641623703,
	"loss": 0.6358,
	"step": 1350
	},
	{
	"epoch": 0.4012455516014235,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00013106162286878842,
	"loss": 0.652,
	"step": 1353
	},
	{
	"epoch": 0.40213523131672596,
	"grad_norm": 0.2734375,
	"learning_rate": 0.00013079503521970127,
	"loss": 0.6511,
	"step": 1356
	},
	{
	"epoch": 0.4030249110320285,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00013052820556398785,
	"loss": 0.6546,
	"step": 1359
	},
	{
	"epoch": 0.40391459074733094,
	"grad_norm": 0.296875,
	"learning_rate": 0.0001302611359985623,
	"loss": 0.6564,
	"step": 1362
	},
	{
	"epoch": 0.40480427046263345,
	"grad_norm": 0.26953125,
	"learning_rate": 0.00012999382862222415,
	"loss": 0.6609,
	"step": 1365
	},
	{
	"epoch": 0.40569395017793597,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00012972628553564177,
	"loss": 0.6508,
	"step": 1368
	},
	{
	"epoch": 0.40658362989323843,
	"grad_norm": 0.275390625,
	"learning_rate": 0.0001294585088413358,
	"loss": 0.652,
	"step": 1371
	},
	{
	"epoch": 0.40747330960854095,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00012919050064366295,
	"loss": 0.6492,
	"step": 1374
	},
	{
	"epoch": 0.4083629893238434,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00012892226304879893,
	"loss": 0.6136,
	"step": 1377
	},
	{
	"epoch": 0.4092526690391459,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00012865379816472242,
	"loss": 0.6543,
	"step": 1380
	},
	{
	"epoch": 0.4101423487544484,
	"grad_norm": 0.26953125,
	"learning_rate": 0.0001283851081011982,
	"loss": 0.6568,
	"step": 1383
	},
	{
	"epoch": 0.4110320284697509,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00012811619496976066,
	"loss": 0.6457,
	"step": 1386
	},
	{
	"epoch": 0.41192170818505336,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00012784706088369714,
	"loss": 0.6653,
	"step": 1389
	},
	{
	"epoch": 0.4128113879003559,
	"grad_norm": 0.275390625,
	"learning_rate": 0.0001275777079580315,
	"loss": 0.6545,
	"step": 1392
	},
	{
	"epoch": 0.41370106761565834,
	"grad_norm": 0.2734375,
	"learning_rate": 0.00012730813830950732,
	"loss": 0.6465,
	"step": 1395
	},
	{
	"epoch": 0.41459074733096085,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00012703835405657122,
	"loss": 0.6304,
	"step": 1398
	},
	{
	"epoch": 0.41548042704626337,
	"grad_norm": 0.33203125,
	"learning_rate": 0.0001267683573193565,
	"loss": 0.6628,
	"step": 1401
	},
	{
	"epoch": 0.41637010676156583,
	"grad_norm": 0.283203125,
	"learning_rate": 0.0001264981502196662,
	"loss": 0.6328,
	"step": 1404
	},
	{
	"epoch": 0.41725978647686834,
	"grad_norm": 0.298828125,
	"learning_rate": 0.00012622773488095643,
	"loss": 0.6621,
	"step": 1407
	},
	{
	"epoch": 0.4181494661921708,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00012595711342831996,
	"loss": 0.6672,
	"step": 1410
	},
	{
	"epoch": 0.4190391459074733,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00012568628798846924,
	"loss": 0.6496,
	"step": 1413
	},
	{
	"epoch": 0.4199288256227758,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00012541526068971973,
	"loss": 0.6318,
	"step": 1416
	},
	{
	"epoch": 0.4208185053380783,
	"grad_norm": 0.283203125,
	"learning_rate": 0.0001251440336619733,
	"loss": 0.6328,
	"step": 1419
	},
	{
	"epoch": 0.42170818505338076,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00012487260903670135,
	"loss": 0.6476,
	"step": 1422
	},
	{
	"epoch": 0.4225978647686833,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00012460098894692822,
	"loss": 0.6404,
	"step": 1425
	},
	{
	"epoch": 0.4234875444839858,
	"grad_norm": 0.279296875,
	"learning_rate": 0.0001243291755272142,
	"loss": 0.6592,
	"step": 1428
	},
	{
	"epoch": 0.42437722419928825,
	"grad_norm": 0.2734375,
	"learning_rate": 0.000124057170913639,
	"loss": 0.6489,
	"step": 1431
	},
	{
	"epoch": 0.42526690391459077,
	"grad_norm": 0.306640625,
	"learning_rate": 0.00012378497724378483,
	"loss": 0.6669,
	"step": 1434
	},
	{
	"epoch": 0.4261565836298932,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00012351259665671958,
	"loss": 0.6477,
	"step": 1437
	},
	{
	"epoch": 0.42704626334519574,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00012324003129298005,
	"loss": 0.6202,
	"step": 1440
	},
	{
	"epoch": 0.4279359430604982,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00012296728329455524,
	"loss": 0.6655,
	"step": 1443
	},
	{
	"epoch": 0.4288256227758007,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00012269435480486923,
	"loss": 0.6526,
	"step": 1446
	},
	{
	"epoch": 0.4297153024911032,
	"grad_norm": 0.2890625,
	"learning_rate": 0.0001224212479687646,
	"loss": 0.6619,
	"step": 1449
	},
	{
	"epoch": 0.4306049822064057,
	"grad_norm": 0.318359375,
	"learning_rate": 0.00012214796493248563,
	"loss": 0.6718,
	"step": 1452
	},
	{
	"epoch": 0.4314946619217082,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00012187450784366101,
	"loss": 0.6414,
	"step": 1455
	},
	{
	"epoch": 0.43238434163701067,
	"grad_norm": 0.447265625,
	"learning_rate": 0.00012160087885128745,
	"loss": 0.6567,
	"step": 1458
	},
	{
	"epoch": 0.4332740213523132,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00012132708010571252,
	"loss": 0.6345,
	"step": 1461
	},
	{
	"epoch": 0.43416370106761565,
	"grad_norm": 0.302734375,
	"learning_rate": 0.00012105311375861785,
	"loss": 0.6611,
	"step": 1464
	},
	{
	"epoch": 0.43505338078291816,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00012077898196300208,
	"loss": 0.652,
	"step": 1467
	},
	{
	"epoch": 0.4359430604982206,
	"grad_norm": 0.28125,
	"learning_rate": 0.00012050468687316419,
	"loss": 0.6373,
	"step": 1470
	},
	{
	"epoch": 0.43683274021352314,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00012023023064468637,
	"loss": 0.6323,
	"step": 1473
	},
	{
	"epoch": 0.4377224199288256,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00011995561543441709,
	"loss": 0.6321,
	"step": 1476
	},
	{
	"epoch": 0.4386120996441281,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011968084340045425,
	"loss": 0.6388,
	"step": 1479
	},
	{
	"epoch": 0.4395017793594306,
	"grad_norm": 0.275390625,
	"learning_rate": 0.0001194059167021282,
	"loss": 0.653,
	"step": 1482
	},
	{
	"epoch": 0.4403914590747331,
	"grad_norm": 0.3046875,
	"learning_rate": 0.00011913083749998464,
	"loss": 0.6604,
	"step": 1485
	},
	{
	"epoch": 0.4412811387900356,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00011885560795576783,
	"loss": 0.662,
	"step": 1488
	},
	{
	"epoch": 0.44217081850533807,
	"grad_norm": 0.3046875,
	"learning_rate": 0.0001185802302324035,
	"loss": 0.6724,
	"step": 1491
	},
	{
	"epoch": 0.4430604982206406,
	"grad_norm": 0.310546875,
	"learning_rate": 0.00011830470649398182,
	"loss": 0.6296,
	"step": 1494
	},
	{
	"epoch": 0.44395017793594305,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011802903890574046,
	"loss": 0.6475,
	"step": 1497
	},
	{
	"epoch": 0.44483985765124556,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011775322963404756,
	"loss": 0.6665,
	"step": 1500
	},
	{
	"epoch": 0.445729537366548,
	"grad_norm": 0.279296875,
	"learning_rate": 0.0001174772808463847,
	"loss": 0.6361,
	"step": 1503
	},
	{
	"epoch": 0.44661921708185054,
	"grad_norm": 0.259765625,
	"learning_rate": 0.00011720119471132984,
	"loss": 0.6302,
	"step": 1506
	},
	{
	"epoch": 0.447508896797153,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00011692497339854031,
	"loss": 0.6552,
	"step": 1509
	},
	{
	"epoch": 0.4483985765124555,
	"grad_norm": 0.2734375,
	"learning_rate": 0.00011664861907873584,
	"loss": 0.6436,
	"step": 1512
	},
	{
	"epoch": 0.44928825622775803,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011637213392368118,
	"loss": 0.6381,
	"step": 1515
	},
	{
	"epoch": 0.4501779359430605,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00011609552010616954,
	"loss": 0.659,
	"step": 1518
	},
	{
	"epoch": 0.451067615658363,
	"grad_norm": 0.259765625,
	"learning_rate": 0.00011581877980000508,
	"loss": 0.6412,
	"step": 1521
	},
	{
	"epoch": 0.45195729537366547,
	"grad_norm": 0.26953125,
	"learning_rate": 0.00011554191517998598,
	"loss": 0.637,
	"step": 1524
	},
	{
	"epoch": 0.452846975088968,
	"grad_norm": 0.376953125,
	"learning_rate": 0.00011526492842188745,
	"loss": 0.64,
	"step": 1527
	},
	{
	"epoch": 0.45373665480427045,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011498782170244449,
	"loss": 0.6525,
	"step": 1530
	},
	{
	"epoch": 0.45462633451957296,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00011471059719933479,
	"loss": 0.6248,
	"step": 1533
	},
	{
	"epoch": 0.4555160142348754,
	"grad_norm": 0.375,
	"learning_rate": 0.00011443325709116171,
	"loss": 0.6334,
	"step": 1536
	},
	{
	"epoch": 0.45640569395017794,
	"grad_norm": 0.2734375,
	"learning_rate": 0.00011415580355743707,
	"loss": 0.6381,
	"step": 1539
	},
	{
	"epoch": 0.45729537366548045,
	"grad_norm": 0.26953125,
	"learning_rate": 0.00011387823877856411,
	"loss": 0.6367,
	"step": 1542
	},
	{
	"epoch": 0.4581850533807829,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00011360056493582028,
	"loss": 0.6443,
	"step": 1545
	},
	{
	"epoch": 0.45907473309608543,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011332278421134005,
	"loss": 0.6342,
	"step": 1548
	},
	{
	"epoch": 0.4599644128113879,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011304489878809803,
	"loss": 0.6385,
	"step": 1551
	},
	{
	"epoch": 0.4608540925266904,
	"grad_norm": 0.2734375,
	"learning_rate": 0.00011276691084989134,
	"loss": 0.6102,
	"step": 1554
	},
	{
	"epoch": 0.46174377224199287,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00011248882258132299,
	"loss": 0.6285,
	"step": 1557
	},
	{
	"epoch": 0.4626334519572954,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00011221063616778425,
	"loss": 0.6504,
	"step": 1560
	},
	{
	"epoch": 0.46352313167259784,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00011193235379543778,
	"loss": 0.647,
	"step": 1563
	},
	{
	"epoch": 0.46441281138790036,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00011165397765120033,
	"loss": 0.645,
	"step": 1566
	},
	{
	"epoch": 0.4653024911032028,
	"grad_norm": 0.30859375,
	"learning_rate": 0.00011137550992272561,
	"loss": 0.6311,
	"step": 1569
	},
	{
	"epoch": 0.46619217081850534,
	"grad_norm": 0.28125,
	"learning_rate": 0.0001110969527983869,
	"loss": 0.6348,
	"step": 1572
	},
	{
	"epoch": 0.46708185053380785,
	"grad_norm": 0.28515625,
	"learning_rate": 0.00011081830846726021,
	"loss": 0.6244,
	"step": 1575
	},
	{
	"epoch": 0.4679715302491103,
	"grad_norm": 0.291015625,
	"learning_rate": 0.00011053957911910671,
	"loss": 0.6336,
	"step": 1578
	},
	{
	"epoch": 0.46886120996441283,
	"grad_norm": 0.291015625,
	"learning_rate": 0.0001102607669443558,
	"loss": 0.6211,
	"step": 1581
	},
	{
	"epoch": 0.4697508896797153,
	"grad_norm": 0.283203125,
	"learning_rate": 0.00010998187413408774,
	"loss": 0.6337,
	"step": 1584
	},
	{
	"epoch": 0.4706405693950178,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00010970290288001644,
	"loss": 0.6451,
	"step": 1587
	},
	{
	"epoch": 0.47153024911032027,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00010942385537447236,
	"loss": 0.6466,
	"step": 1590
	},
	{
	"epoch": 0.4724199288256228,
	"grad_norm": 0.28125,
	"learning_rate": 0.00010914473381038508,
	"loss": 0.6324,
	"step": 1593
	},
	{
	"epoch": 0.47330960854092524,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00010886554038126625,
	"loss": 0.615,
	"step": 1596
	},
	{
	"epoch": 0.47419928825622776,
	"grad_norm": 0.26171875,
	"learning_rate": 0.00010858627728119226,
	"loss": 0.6385,
	"step": 1599
	},
	{
	"epoch": 0.4750889679715303,
	"grad_norm": 0.265625,
	"learning_rate": 0.00010830694670478705,
	"loss": 0.6253,
	"step": 1602
	},
	{
	"epoch": 0.47597864768683273,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00010802755084720479,
	"loss": 0.6188,
	"step": 1605
	},
	{
	"epoch": 0.47686832740213525,
	"grad_norm": 0.2890625,
	"learning_rate": 0.0001077480919041127,
	"loss": 0.6179,
	"step": 1608
	},
	{
	"epoch": 0.4777580071174377,
	"grad_norm": 0.271484375,
	"learning_rate": 0.00010746857207167372,
	"loss": 0.6427,
	"step": 1611
	},
	{
	"epoch": 0.4786476868327402,
	"grad_norm": 0.271484375,
	"learning_rate": 0.00010718899354652931,
	"loss": 0.6096,
	"step": 1614
	},
	{
	"epoch": 0.4795373665480427,
	"grad_norm": 0.48046875,
	"learning_rate": 0.00010690935852578225,
	"loss": 0.648,
	"step": 1617
	},
	{
	"epoch": 0.4804270462633452,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00010662966920697919,
	"loss": 0.619,
	"step": 1620
	},
	{
	"epoch": 0.48131672597864766,
	"grad_norm": 0.271484375,
	"learning_rate": 0.00010634992778809357,
	"loss": 0.6368,
	"step": 1623
	},
	{
	"epoch": 0.4822064056939502,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00010607013646750818,
	"loss": 0.623,
	"step": 1626
	},
	{
	"epoch": 0.4830960854092527,
	"grad_norm": 0.2734375,
	"learning_rate": 0.00010579029744399809,
	"loss": 0.6212,
	"step": 1629
	},
	{
	"epoch": 0.48398576512455516,
	"grad_norm": 0.2734375,
	"learning_rate": 0.00010551041291671311,
	"loss": 0.6343,
	"step": 1632
	},
	{
	"epoch": 0.48487544483985767,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00010523048508516075,
	"loss": 0.634,
	"step": 1635
	},
	{
	"epoch": 0.48576512455516013,
	"grad_norm": 0.275390625,
	"learning_rate": 0.00010495051614918881,
	"loss": 0.5907,
	"step": 1638
	},
	{
	"epoch": 0.48665480427046265,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00010467050830896808,
	"loss": 0.6122,
	"step": 1641
	},
	{
	"epoch": 0.4875444839857651,
	"grad_norm": 0.2734375,
	"learning_rate": 0.0001043904637649751,
	"loss": 0.6175,
	"step": 1644
	},
	{
	"epoch": 0.4884341637010676,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00010411038471797488,
	"loss": 0.6488,
	"step": 1647
	},
	{
	"epoch": 0.4893238434163701,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00010383027336900355,
	"loss": 0.6245,
	"step": 1650
	},
	{
	"epoch": 0.4902135231316726,
	"grad_norm": 0.265625,
	"learning_rate": 0.00010355013191935108,
	"loss": 0.6203,
	"step": 1653
	},
	{
	"epoch": 0.49110320284697506,
	"grad_norm": 0.265625,
	"learning_rate": 0.000103269962570544,
	"loss": 0.6193,
	"step": 1656
	},
	{
	"epoch": 0.4919928825622776,
	"grad_norm": 0.26953125,
	"learning_rate": 0.00010298976752432812,
	"loss": 0.6403,
	"step": 1659
	},
	{
	"epoch": 0.4928825622775801,
	"grad_norm": 0.267578125,
	"learning_rate": 0.00010270954898265112,
	"loss": 0.6292,
	"step": 1662
	},
	{
	"epoch": 0.49377224199288255,
	"grad_norm": 0.2890625,
	"learning_rate": 0.00010242930914764541,
	"loss": 0.6404,
	"step": 1665
	},
	{
	"epoch": 0.49466192170818507,
	"grad_norm": 0.28125,
	"learning_rate": 0.0001021490502216107,
	"loss": 0.6359,
	"step": 1668
	},
	{
	"epoch": 0.49555160142348753,
	"grad_norm": 0.27734375,
	"learning_rate": 0.0001018687744069967,
	"loss": 0.6442,
	"step": 1671
	},
	{
	"epoch": 0.49644128113879005,
	"grad_norm": 0.287109375,
	"learning_rate": 0.00010158848390638587,
	"loss": 0.6515,
	"step": 1674
	},
	{
	"epoch": 0.4973309608540925,
	"grad_norm": 0.27734375,
	"learning_rate": 0.00010130818092247607,
	"loss": 0.6115,
	"step": 1677
	},
	{
	"epoch": 0.498220640569395,
	"grad_norm": 0.283203125,
	"learning_rate": 0.0001010278676580633,
	"loss": 0.6434,
	"step": 1680
	},
	{
	"epoch": 0.4991103202846975,
	"grad_norm": 0.271484375,
	"learning_rate": 0.00010074754631602428,
	"loss": 0.6234,
	"step": 1683
	},
	{
	"epoch": 0.5,
	"grad_norm": 0.29296875,
	"learning_rate": 0.00010046721909929928,
	"loss": 0.6571,
	"step": 1686
	},
	{
	"epoch": 0.5008896797153025,
	"grad_norm": 0.279296875,
	"learning_rate": 0.00010018688821087474,
	"loss": 0.6454,
	"step": 1689
	},
	{
	"epoch": 0.501779359430605,
	"grad_norm": 0.275390625,
	"learning_rate": 9.990655585376584e-05,
	"loss": 0.5973,
	"step": 1692
	},
	{
	"epoch": 0.5026690391459074,
	"grad_norm": 0.271484375,
	"learning_rate": 9.962622423099942e-05,
	"loss": 0.6094,
	"step": 1695
	},
	{
	"epoch": 0.50355871886121,
	"grad_norm": 0.28125,
	"learning_rate": 9.934589554559653e-05,
	"loss": 0.642,
	"step": 1698
	},
	{
	"epoch": 0.5044483985765125,
	"grad_norm": 0.2578125,
	"learning_rate": 9.906557200055508e-05,
	"loss": 0.6283,
	"step": 1701
	},
	{
	"epoch": 0.505338078291815,
	"grad_norm": 0.27734375,
	"learning_rate": 9.878525579883265e-05,
	"loss": 0.6277,
	"step": 1704
	},
	{
	"epoch": 0.5062277580071174,
	"grad_norm": 0.279296875,
	"learning_rate": 9.850494914332908e-05,
	"loss": 0.627,
	"step": 1707
	},
	{
	"epoch": 0.5071174377224199,
	"grad_norm": 0.2890625,
	"learning_rate": 9.822465423686917e-05,
	"loss": 0.6435,
	"step": 1710
	},
	{
	"epoch": 0.5080071174377224,
	"grad_norm": 0.291015625,
	"learning_rate": 9.794437328218546e-05,
	"loss": 0.6537,
	"step": 1713
	},
	{
	"epoch": 0.5088967971530249,
	"grad_norm": 0.287109375,
	"learning_rate": 9.766410848190077e-05,
	"loss": 0.6366,
	"step": 1716
	},
	{
	"epoch": 0.5097864768683275,
	"grad_norm": 0.265625,
	"learning_rate": 9.738386203851101e-05,
	"loss": 0.625,
	"step": 1719
	},
	{
	"epoch": 0.5106761565836299,
	"grad_norm": 0.263671875,
	"learning_rate": 9.710363615436776e-05,
	"loss": 0.6372,
	"step": 1722
	},
	{
	"epoch": 0.5115658362989324,
	"grad_norm": 0.294921875,
	"learning_rate": 9.682343303166117e-05,
	"loss": 0.6188,
	"step": 1725
	},
	{
	"epoch": 0.5124555160142349,
	"grad_norm": 0.271484375,
	"learning_rate": 9.654325487240243e-05,
	"loss": 0.6223,
	"step": 1728
	},
	{
	"epoch": 0.5133451957295374,
	"grad_norm": 0.263671875,
	"learning_rate": 9.626310387840648e-05,
	"loss": 0.6096,
	"step": 1731
	},
	{
	"epoch": 0.5142348754448398,
	"grad_norm": 0.2578125,
	"learning_rate": 9.598298225127498e-05,
	"loss": 0.615,
	"step": 1734
	},
	{
	"epoch": 0.5151245551601423,
	"grad_norm": 0.2734375,
	"learning_rate": 9.570289219237858e-05,
	"loss": 0.6377,
	"step": 1737
	},
	{
	"epoch": 0.5160142348754448,
	"grad_norm": 0.26953125,
	"learning_rate": 9.542283590284002e-05,
	"loss": 0.6191,
	"step": 1740
	},
	{
	"epoch": 0.5169039145907474,
	"grad_norm": 0.2734375,
	"learning_rate": 9.514281558351653e-05,
	"loss": 0.625,
	"step": 1743
	},
	{
	"epoch": 0.5177935943060499,
	"grad_norm": 0.283203125,
	"learning_rate": 9.486283343498277e-05,
	"loss": 0.6202,
	"step": 1746
	},
	{
	"epoch": 0.5186832740213523,
	"grad_norm": 0.2734375,
	"learning_rate": 9.458289165751339e-05,
	"loss": 0.613,
	"step": 1749
	},
	{
	"epoch": 0.5195729537366548,
	"grad_norm": 0.28125,
	"learning_rate": 9.430299245106573e-05,
	"loss": 0.6306,
	"step": 1752
	},
	{
	"epoch": 0.5204626334519573,
	"grad_norm": 0.255859375,
	"learning_rate": 9.402313801526267e-05,
	"loss": 0.6162,
	"step": 1755
	},
	{
	"epoch": 0.5213523131672598,
	"grad_norm": 0.279296875,
	"learning_rate": 9.37433305493752e-05,
	"loss": 0.6472,
	"step": 1758
	},
	{
	"epoch": 0.5222419928825622,
	"grad_norm": 0.265625,
	"learning_rate": 9.346357225230519e-05,
	"loss": 0.6347,
	"step": 1761
	},
	{
	"epoch": 0.5231316725978647,
	"grad_norm": 0.271484375,
	"learning_rate": 9.318386532256807e-05,
	"loss": 0.6155,
	"step": 1764
	},
	{
	"epoch": 0.5240213523131673,
	"grad_norm": 0.265625,
	"learning_rate": 9.290421195827572e-05,
	"loss": 0.6112,
	"step": 1767
	},
	{
	"epoch": 0.5249110320284698,
	"grad_norm": 0.275390625,
	"learning_rate": 9.262461435711898e-05,
	"loss": 0.6252,
	"step": 1770
	},
	{
	"epoch": 0.5258007117437722,
	"grad_norm": 0.283203125,
	"learning_rate": 9.234507471635043e-05,
	"loss": 0.6241,
	"step": 1773
	},
	{
	"epoch": 0.5266903914590747,
	"grad_norm": 0.291015625,
	"learning_rate": 9.206559523276731e-05,
	"loss": 0.6346,
	"step": 1776
	},
	{
	"epoch": 0.5275800711743772,
	"grad_norm": 0.28125,
	"learning_rate": 9.178617810269388e-05,
	"loss": 0.6489,
	"step": 1779
	},
	{
	"epoch": 0.5284697508896797,
	"grad_norm": 0.263671875,
	"learning_rate": 9.150682552196462e-05,
	"loss": 0.6247,
	"step": 1782
	},
	{
	"epoch": 0.5293594306049823,
	"grad_norm": 0.259765625,
	"learning_rate": 9.12275396859066e-05,
	"loss": 0.5991,
	"step": 1785
	},
	{
	"epoch": 0.5302491103202847,
	"grad_norm": 0.26953125,
	"learning_rate": 9.094832278932238e-05,
	"loss": 0.6174,
	"step": 1788
	},
	{
	"epoch": 0.5311387900355872,
	"grad_norm": 0.27734375,
	"learning_rate": 9.066917702647284e-05,
	"loss": 0.6204,
	"step": 1791
	},
	{
	"epoch": 0.5320284697508897,
	"grad_norm": 0.26953125,
	"learning_rate": 9.039010459105974e-05,
	"loss": 0.584,
	"step": 1794
	},
	{
	"epoch": 0.5329181494661922,
	"grad_norm": 0.27734375,
	"learning_rate": 9.011110767620865e-05,
	"loss": 0.6179,
	"step": 1797
	},
	{
	"epoch": 0.5338078291814946,
	"grad_norm": 0.267578125,
	"learning_rate": 8.983218847445157e-05,
	"loss": 0.5993,
	"step": 1800
	},
	{
	"epoch": 0.5346975088967971,
	"grad_norm": 0.271484375,
	"learning_rate": 8.955334917770993e-05,
	"loss": 0.6324,
	"step": 1803
	},
	{
	"epoch": 0.5355871886120996,
	"grad_norm": 0.265625,
	"learning_rate": 8.927459197727712e-05,
	"loss": 0.6048,
	"step": 1806
	},
	{
	"epoch": 0.5364768683274022,
	"grad_norm": 0.279296875,
	"learning_rate": 8.899591906380131e-05,
	"loss": 0.6138,
	"step": 1809
	},
	{
	"epoch": 0.5373665480427047,
	"grad_norm": 0.271484375,
	"learning_rate": 8.871733262726846e-05,
	"loss": 0.626,
	"step": 1812
	},
	{
	"epoch": 0.5382562277580071,
	"grad_norm": 0.275390625,
	"learning_rate": 8.843883485698474e-05,
	"loss": 0.6169,
	"step": 1815
	},
	{
	"epoch": 0.5391459074733096,
	"grad_norm": 0.265625,
	"learning_rate": 8.81604279415597e-05,
	"loss": 0.6043,
	"step": 1818
	},
	{
	"epoch": 0.5400355871886121,
	"grad_norm": 0.2734375,
	"learning_rate": 8.788211406888872e-05,
	"loss": 0.6216,
	"step": 1821
	},
	{
	"epoch": 0.5409252669039146,
	"grad_norm": 0.283203125,
	"learning_rate": 8.76038954261362e-05,
	"loss": 0.6365,
	"step": 1824
	},
	{
	"epoch": 0.541814946619217,
	"grad_norm": 0.271484375,
	"learning_rate": 8.732577419971801e-05,
	"loss": 0.6226,
	"step": 1827
	},
	{
	"epoch": 0.5427046263345195,
	"grad_norm": 0.265625,
	"learning_rate": 8.704775257528448e-05,
	"loss": 0.6179,
	"step": 1830
	},
	{
	"epoch": 0.5435943060498221,
	"grad_norm": 0.27734375,
	"learning_rate": 8.676983273770327e-05,
	"loss": 0.6191,
	"step": 1833
	},
	{
	"epoch": 0.5444839857651246,
	"grad_norm": 0.2890625,
	"learning_rate": 8.649201687104209e-05,
	"loss": 0.6243,
	"step": 1836
	},
	{
	"epoch": 0.5453736654804271,
	"grad_norm": 0.271484375,
	"learning_rate": 8.621430715855155e-05,
	"loss": 0.628,
	"step": 1839
	},
	{
	"epoch": 0.5462633451957295,
	"grad_norm": 0.263671875,
	"learning_rate": 8.593670578264814e-05,
	"loss": 0.6108,
	"step": 1842
	},
	{
	"epoch": 0.547153024911032,
	"grad_norm": 0.259765625,
	"learning_rate": 8.565921492489686e-05,
	"loss": 0.609,
	"step": 1845
	},
	{
	"epoch": 0.5480427046263345,
	"grad_norm": 0.26953125,
	"learning_rate": 8.538183676599426e-05,
	"loss": 0.6135,
	"step": 1848
	},
	{
	"epoch": 0.548932384341637,
	"grad_norm": 0.26953125,
	"learning_rate": 8.510457348575115e-05,
	"loss": 0.623,
	"step": 1851
	},
	{
	"epoch": 0.5498220640569395,
	"grad_norm": 0.28515625,
	"learning_rate": 8.482742726307569e-05,
	"loss": 0.6345,
	"step": 1854
	},
	{
	"epoch": 0.550711743772242,
	"grad_norm": 0.271484375,
	"learning_rate": 8.4550400275956e-05,
	"loss": 0.6167,
	"step": 1857
	},
	{
	"epoch": 0.5516014234875445,
	"grad_norm": 0.287109375,
	"learning_rate": 8.427349470144319e-05,
	"loss": 0.6199,
	"step": 1860
	},
	{
	"epoch": 0.552491103202847,
	"grad_norm": 0.267578125,
	"learning_rate": 8.399671271563438e-05,
	"loss": 0.589,
	"step": 1863
	},
	{
	"epoch": 0.5533807829181495,
	"grad_norm": 0.2734375,
	"learning_rate": 8.372005649365519e-05,
	"loss": 0.6183,
	"step": 1866
	},
	{
	"epoch": 0.5542704626334519,
	"grad_norm": 0.2734375,
	"learning_rate": 8.344352820964317e-05,
	"loss": 0.6555,
	"step": 1869
	},
	{
	"epoch": 0.5551601423487544,
	"grad_norm": 0.26953125,
	"learning_rate": 8.316713003673028e-05,
	"loss": 0.6172,
	"step": 1872
	},
	{
	"epoch": 0.556049822064057,
	"grad_norm": 0.26953125,
	"learning_rate": 8.289086414702609e-05,
	"loss": 0.6328,
	"step": 1875
	},
	{
	"epoch": 0.5569395017793595,
	"grad_norm": 0.27734375,
	"learning_rate": 8.261473271160046e-05,
	"loss": 0.6314,
	"step": 1878
	},
	{
	"epoch": 0.5578291814946619,
	"grad_norm": 0.26953125,
	"learning_rate": 8.233873790046684e-05,
	"loss": 0.6392,
	"step": 1881
	},
	{
	"epoch": 0.5587188612099644,
	"grad_norm": 0.2734375,
	"learning_rate": 8.206288188256486e-05,
	"loss": 0.6297,
	"step": 1884
	},
	{
	"epoch": 0.5596085409252669,
	"grad_norm": 0.26953125,
	"learning_rate": 8.178716682574339e-05,
	"loss": 0.5915,
	"step": 1887
	},
	{
	"epoch": 0.5604982206405694,
	"grad_norm": 0.279296875,
	"learning_rate": 8.15115948967437e-05,
	"loss": 0.625,
	"step": 1890
	},
	{
	"epoch": 0.5613879003558719,
	"grad_norm": 0.279296875,
	"learning_rate": 8.12361682611821e-05,
	"loss": 0.6307,
	"step": 1893
	},
	{
	"epoch": 0.5622775800711743,
	"grad_norm": 0.26953125,
	"learning_rate": 8.096088908353315e-05,
	"loss": 0.6018,
	"step": 1896
	},
	{
	"epoch": 0.5631672597864769,
	"grad_norm": 0.287109375,
	"learning_rate": 8.068575952711272e-05,
	"loss": 0.6045,
	"step": 1899
	},
	{
	"epoch": 0.5640569395017794,
	"grad_norm": 0.271484375,
	"learning_rate": 8.041078175406064e-05,
	"loss": 0.5921,
	"step": 1902
	},
	{
	"epoch": 0.5649466192170819,
	"grad_norm": 0.271484375,
	"learning_rate": 8.013595792532412e-05,
	"loss": 0.6339,
	"step": 1905
	},
	{
	"epoch": 0.5658362989323843,
	"grad_norm": 0.2734375,
	"learning_rate": 7.986129020064044e-05,
	"loss": 0.6147,
	"step": 1908
	},
	{
	"epoch": 0.5667259786476868,
	"grad_norm": 0.259765625,
	"learning_rate": 7.958678073852025e-05,
	"loss": 0.5981,
	"step": 1911
	},
	{
	"epoch": 0.5676156583629893,
	"grad_norm": 0.275390625,
	"learning_rate": 7.931243169623037e-05,
	"loss": 0.6201,
	"step": 1914
	},
	{
	"epoch": 0.5685053380782918,
	"grad_norm": 0.263671875,
	"learning_rate": 7.903824522977695e-05,
	"loss": 0.5997,
	"step": 1917
	},
	{
	"epoch": 0.5693950177935944,
	"grad_norm": 0.28125,
	"learning_rate": 7.876422349388862e-05,
	"loss": 0.6216,
	"step": 1920
	},
	{
	"epoch": 0.5702846975088968,
	"grad_norm": 0.27734375,
	"learning_rate": 7.849036864199931e-05,
	"loss": 0.6365,
	"step": 1923
	},
	{
	"epoch": 0.5711743772241993,
	"grad_norm": 0.291015625,
	"learning_rate": 7.821668282623158e-05,
	"loss": 0.5978,
	"step": 1926
	},
	{
	"epoch": 0.5720640569395018,
	"grad_norm": 0.26953125,
	"learning_rate": 7.79431681973795e-05,
	"loss": 0.605,
	"step": 1929
	},
	{
	"epoch": 0.5729537366548043,
	"grad_norm": 0.26953125,
	"learning_rate": 7.766982690489199e-05,
	"loss": 0.6106,
	"step": 1932
	},
	{
	"epoch": 0.5738434163701067,
	"grad_norm": 0.283203125,
	"learning_rate": 7.739666109685563e-05,
	"loss": 0.6312,
	"step": 1935
	},
	{
	"epoch": 0.5747330960854092,
	"grad_norm": 0.265625,
	"learning_rate": 7.7123672919978e-05,
	"loss": 0.6069,
	"step": 1938
	},
	{
	"epoch": 0.5756227758007118,
	"grad_norm": 0.259765625,
	"learning_rate": 7.685086451957084e-05,
	"loss": 0.6266,
	"step": 1941
	},
	{
	"epoch": 0.5765124555160143,
	"grad_norm": 0.275390625,
	"learning_rate": 7.657823803953288e-05,
	"loss": 0.5883,
	"step": 1944
	},
	{
	"epoch": 0.5774021352313167,
	"grad_norm": 0.26171875,
	"learning_rate": 7.63057956223334e-05,
	"loss": 0.6277,
	"step": 1947
	},
	{
	"epoch": 0.5782918149466192,
	"grad_norm": 0.271484375,
	"learning_rate": 7.60335394089951e-05,
	"loss": 0.6281,
	"step": 1950
	},
	{
	"epoch": 0.5791814946619217,
	"grad_norm": 0.26171875,
	"learning_rate": 7.576147153907742e-05,
	"loss": 0.6225,
	"step": 1953
	},
	{
	"epoch": 0.5800711743772242,
	"grad_norm": 0.259765625,
	"learning_rate": 7.54895941506596e-05,
	"loss": 0.6119,
	"step": 1956
	},
	{
	"epoch": 0.5809608540925267,
	"grad_norm": 0.2890625,
	"learning_rate": 7.521790938032408e-05,
	"loss": 0.6066,
	"step": 1959
	},
	{
	"epoch": 0.5818505338078291,
	"grad_norm": 0.267578125,
	"learning_rate": 7.494641936313953e-05,
	"loss": 0.5946,
	"step": 1962
	},
	{
	"epoch": 0.5827402135231317,
	"grad_norm": 0.26953125,
	"learning_rate": 7.467512623264403e-05,
	"loss": 0.6134,
	"step": 1965
	},
	{
	"epoch": 0.5836298932384342,
	"grad_norm": 0.267578125,
	"learning_rate": 7.440403212082862e-05,
	"loss": 0.6004,
	"step": 1968
	},
	{
	"epoch": 0.5845195729537367,
	"grad_norm": 0.283203125,
	"learning_rate": 7.41331391581201e-05,
	"loss": 0.613,
	"step": 1971
	},
	{
	"epoch": 0.5854092526690391,
	"grad_norm": 0.2734375,
	"learning_rate": 7.386244947336462e-05,
	"loss": 0.6162,
	"step": 1974
	},
	{
	"epoch": 0.5862989323843416,
	"grad_norm": 0.287109375,
	"learning_rate": 7.359196519381092e-05,
	"loss": 0.6304,
	"step": 1977
	},
	{
	"epoch": 0.5871886120996441,
	"grad_norm": 0.275390625,
	"learning_rate": 7.33216884450934e-05,
	"loss": 0.6367,
	"step": 1980
	},
	{
	"epoch": 0.5880782918149466,
	"grad_norm": 0.26171875,
	"learning_rate": 7.305162135121561e-05,
	"loss": 0.5892,
	"step": 1983
	},
	{
	"epoch": 0.5889679715302492,
	"grad_norm": 0.255859375,
	"learning_rate": 7.278176603453347e-05,
	"loss": 0.6313,
	"step": 1986
	},
	{
	"epoch": 0.5898576512455516,
	"grad_norm": 0.263671875,
	"learning_rate": 7.251212461573873e-05,
	"loss": 0.6132,
	"step": 1989
	},
	{
	"epoch": 0.5907473309608541,
	"grad_norm": 0.267578125,
	"learning_rate": 7.224269921384206e-05,
	"loss": 0.6081,
	"step": 1992
	},
	{
	"epoch": 0.5916370106761566,
	"grad_norm": 0.259765625,
	"learning_rate": 7.197349194615656e-05,
	"loss": 0.5778,
	"step": 1995
	},
	{
	"epoch": 0.5925266903914591,
	"grad_norm": 0.271484375,
	"learning_rate": 7.170450492828125e-05,
	"loss": 0.6191,
	"step": 1998
	},
	{
	"epoch": 0.5934163701067615,
	"grad_norm": 0.27734375,
	"learning_rate": 7.143574027408408e-05,
	"loss": 0.6019,
	"step": 2001
	},
	{
	"epoch": 0.594306049822064,
	"grad_norm": 0.28125,
	"learning_rate": 7.116720009568564e-05,
	"loss": 0.6255,
	"step": 2004
	},
	{
	"epoch": 0.5951957295373665,
	"grad_norm": 0.265625,
	"learning_rate": 7.08988865034424e-05,
	"loss": 0.5998,
	"step": 2007
	},
	{
	"epoch": 0.5960854092526691,
	"grad_norm": 0.259765625,
	"learning_rate": 7.063080160593025e-05,
	"loss": 0.6059,
	"step": 2010
	},
	{
	"epoch": 0.5969750889679716,
	"grad_norm": 0.263671875,
	"learning_rate": 7.036294750992775e-05,
	"loss": 0.6082,
	"step": 2013
	},
	{
	"epoch": 0.597864768683274,
	"grad_norm": 0.263671875,
	"learning_rate": 7.009532632039975e-05,
	"loss": 0.6038,
	"step": 2016
	},
	{
	"epoch": 0.5987544483985765,
	"grad_norm": 0.271484375,
	"learning_rate": 6.982794014048077e-05,
	"loss": 0.6042,
	"step": 2019
	},
	{
	"epoch": 0.599644128113879,
	"grad_norm": 0.2734375,
	"learning_rate": 6.956079107145845e-05,
	"loss": 0.6194,
	"step": 2022
	},
	{
	"epoch": 0.6005338078291815,
	"grad_norm": 0.265625,
	"learning_rate": 6.92938812127571e-05,
	"loss": 0.6125,
	"step": 2025
	},
	{
	"epoch": 0.6014234875444839,
	"grad_norm": 0.265625,
	"learning_rate": 6.902721266192111e-05,
	"loss": 0.6206,
	"step": 2028
	},
	{
	"epoch": 0.6023131672597865,
	"grad_norm": 0.267578125,
	"learning_rate": 6.876078751459856e-05,
	"loss": 0.5901,
	"step": 2031
	},
	{
	"epoch": 0.603202846975089,
	"grad_norm": 0.27734375,
	"learning_rate": 6.849460786452475e-05,
	"loss": 0.6007,
	"step": 2034
	},
	{
	"epoch": 0.6040925266903915,
	"grad_norm": 0.283203125,
	"learning_rate": 6.822867580350563e-05,
	"loss": 0.6419,
	"step": 2037
	},
	{
	"epoch": 0.604982206405694,
	"grad_norm": 0.275390625,
	"learning_rate": 6.79629934214015e-05,
	"loss": 0.6086,
	"step": 2040
	},
	{
	"epoch": 0.6058718861209964,
	"grad_norm": 0.271484375,
	"learning_rate": 6.769756280611046e-05,
	"loss": 0.6324,
	"step": 2043
	},
	{
	"epoch": 0.6067615658362989,
	"grad_norm": 0.271484375,
	"learning_rate": 6.743238604355219e-05,
	"loss": 0.5897,
	"step": 2046
	},
	{
	"epoch": 0.6076512455516014,
	"grad_norm": 0.259765625,
	"learning_rate": 6.716746521765131e-05,
	"loss": 0.598,
	"step": 2049
	},
	{
	"epoch": 0.608540925266904,
	"grad_norm": 0.259765625,
	"learning_rate": 6.690280241032116e-05,
	"loss": 0.6165,
	"step": 2052
	},
	{
	"epoch": 0.6094306049822064,
	"grad_norm": 0.265625,
	"learning_rate": 6.663839970144751e-05,
	"loss": 0.607,
	"step": 2055
	},
	{
	"epoch": 0.6103202846975089,
	"grad_norm": 0.27734375,
	"learning_rate": 6.637425916887198e-05,
	"loss": 0.6024,
	"step": 2058
	},
	{
	"epoch": 0.6112099644128114,
	"grad_norm": 0.26953125,
	"learning_rate": 6.611038288837593e-05,
	"loss": 0.6074,
	"step": 2061
	},
	{
	"epoch": 0.6120996441281139,
	"grad_norm": 0.263671875,
	"learning_rate": 6.584677293366396e-05,
	"loss": 0.6156,
	"step": 2064
	},
	{
	"epoch": 0.6129893238434164,
	"grad_norm": 0.287109375,
	"learning_rate": 6.558343137634788e-05,
	"loss": 0.6304,
	"step": 2067
	},
	{
	"epoch": 0.6138790035587188,
	"grad_norm": 0.263671875,
	"learning_rate": 6.532036028593011e-05,
	"loss": 0.5998,
	"step": 2070
	},
	{
	"epoch": 0.6147686832740213,
	"grad_norm": 0.263671875,
	"learning_rate": 6.505756172978765e-05,
	"loss": 0.6092,
	"step": 2073
	},
	{
	"epoch": 0.6156583629893239,
	"grad_norm": 0.265625,
	"learning_rate": 6.479503777315577e-05,
	"loss": 0.6055,
	"step": 2076
	},
	{
	"epoch": 0.6165480427046264,
	"grad_norm": 0.2734375,
	"learning_rate": 6.453279047911169e-05,
	"loss": 0.6009,
	"step": 2079
	},
	{
	"epoch": 0.6174377224199288,
	"grad_norm": 0.267578125,
	"learning_rate": 6.427082190855854e-05,
	"loss": 0.6202,
	"step": 2082
	},
	{
	"epoch": 0.6183274021352313,
	"grad_norm": 0.275390625,
	"learning_rate": 6.400913412020895e-05,
	"loss": 0.6211,
	"step": 2085
	},
	{
	"epoch": 0.6192170818505338,
	"grad_norm": 0.267578125,
	"learning_rate": 6.374772917056908e-05,
	"loss": 0.6151,
	"step": 2088
	},
	{
	"epoch": 0.6201067615658363,
	"grad_norm": 0.255859375,
	"learning_rate": 6.34866091139224e-05,
	"loss": 0.6111,
	"step": 2091
	},
	{
	"epoch": 0.6209964412811388,
	"grad_norm": 0.25,
	"learning_rate": 6.322577600231332e-05,
	"loss": 0.6109,
	"step": 2094
	},
	{
	"epoch": 0.6218861209964412,
	"grad_norm": 0.267578125,
	"learning_rate": 6.296523188553153e-05,
	"loss": 0.5829,
	"step": 2097
	},
	{
	"epoch": 0.6227758007117438,
	"grad_norm": 0.259765625,
	"learning_rate": 6.270497881109541e-05,
	"loss": 0.6002,
	"step": 2100
	},
	{
	"epoch": 0.6236654804270463,
	"grad_norm": 0.265625,
	"learning_rate": 6.244501882423621e-05,
	"loss": 0.6095,
	"step": 2103
	},
	{
	"epoch": 0.6245551601423488,
	"grad_norm": 0.26171875,
	"learning_rate": 6.21853539678819e-05,
	"loss": 0.6049,
	"step": 2106
	},
	{
	"epoch": 0.6254448398576512,
	"grad_norm": 0.25,
	"learning_rate": 6.192598628264121e-05,
	"loss": 0.604,
	"step": 2109
	},
	{
	"epoch": 0.6263345195729537,
	"grad_norm": 0.2578125,
	"learning_rate": 6.166691780678743e-05,
	"loss": 0.6092,
	"step": 2112
	},
	{
	"epoch": 0.6272241992882562,
	"grad_norm": 0.267578125,
	"learning_rate": 6.140815057624248e-05,
	"loss": 0.6099,
	"step": 2115
	},
	{
	"epoch": 0.6281138790035588,
	"grad_norm": 0.259765625,
	"learning_rate": 6.114968662456093e-05,
	"loss": 0.5978,
	"step": 2118
	},
	{
	"epoch": 0.6290035587188612,
	"grad_norm": 0.271484375,
	"learning_rate": 6.089152798291398e-05,
	"loss": 0.5994,
	"step": 2121
	},
	{
	"epoch": 0.6298932384341637,
	"grad_norm": 0.2734375,
	"learning_rate": 6.063367668007356e-05,
	"loss": 0.5946,
	"step": 2124
	},
	{
	"epoch": 0.6307829181494662,
	"grad_norm": 0.265625,
	"learning_rate": 6.0376134742396276e-05,
	"loss": 0.6131,
	"step": 2127
	},
	{
	"epoch": 0.6316725978647687,
	"grad_norm": 0.2734375,
	"learning_rate": 6.011890419380756e-05,
	"loss": 0.6259,
	"step": 2130
	},
	{
	"epoch": 0.6325622775800712,
	"grad_norm": 0.28125,
	"learning_rate": 5.986198705578583e-05,
	"loss": 0.6079,
	"step": 2133
	},
	{
	"epoch": 0.6334519572953736,
	"grad_norm": 0.248046875,
	"learning_rate": 5.960538534734641e-05,
	"loss": 0.5792,
	"step": 2136
	},
	{
	"epoch": 0.6343416370106761,
	"grad_norm": 0.255859375,
	"learning_rate": 5.934910108502587e-05,
	"loss": 0.5913,
	"step": 2139
	},
	{
	"epoch": 0.6352313167259787,
	"grad_norm": 0.271484375,
	"learning_rate": 5.909313628286601e-05,
	"loss": 0.6215,
	"step": 2142
	},
	{
	"epoch": 0.6361209964412812,
	"grad_norm": 0.265625,
	"learning_rate": 5.8837492952398234e-05,
	"loss": 0.5994,
	"step": 2145
	},
	{
	"epoch": 0.6370106761565836,
	"grad_norm": 0.275390625,
	"learning_rate": 5.8582173102627524e-05,
	"loss": 0.5984,
	"step": 2148
	},
	{
	"epoch": 0.6379003558718861,
	"grad_norm": 0.259765625,
	"learning_rate": 5.8327178740016744e-05,
	"loss": 0.612,
	"step": 2151
	},
	{
	"epoch": 0.6387900355871886,
	"grad_norm": 0.2578125,
	"learning_rate": 5.8072511868470945e-05,
	"loss": 0.596,
	"step": 2154
	},
	{
	"epoch": 0.6396797153024911,
	"grad_norm": 0.2578125,
	"learning_rate": 5.781817448932145e-05,
	"loss": 0.6327,
	"step": 2157
	},
	{
	"epoch": 0.6405693950177936,
	"grad_norm": 0.265625,
	"learning_rate": 5.756416860131036e-05,
	"loss": 0.6162,
	"step": 2160
	},
	{
	"epoch": 0.641459074733096,
	"grad_norm": 0.26953125,
	"learning_rate": 5.731049620057457e-05,
	"loss": 0.6125,
	"step": 2163
	},
	{
	"epoch": 0.6423487544483986,
	"grad_norm": 0.2578125,
	"learning_rate": 5.705715928063031e-05,
	"loss": 0.6053,
	"step": 2166
	},
	{
	"epoch": 0.6432384341637011,
	"grad_norm": 0.25390625,
	"learning_rate": 5.6804159832357426e-05,
	"loss": 0.5802,
	"step": 2169
	},
	{
	"epoch": 0.6441281138790036,
	"grad_norm": 0.26953125,
	"learning_rate": 5.655149984398359e-05,
	"loss": 0.6088,
	"step": 2172
	},
	{
	"epoch": 0.645017793594306,
	"grad_norm": 0.28125,
	"learning_rate": 5.629918130106886e-05,
	"loss": 0.6359,
	"step": 2175
	},
	{
	"epoch": 0.6459074733096085,
	"grad_norm": 0.267578125,
	"learning_rate": 5.6047206186489934e-05,
	"loss": 0.5961,
	"step": 2178
	},
	{
	"epoch": 0.646797153024911,
	"grad_norm": 0.265625,
	"learning_rate": 5.5795576480424774e-05,
	"loss": 0.5898,
	"step": 2181
	},
	{
	"epoch": 0.6476868327402135,
	"grad_norm": 0.27734375,
	"learning_rate": 5.554429416033673e-05,
	"loss": 0.6315,
	"step": 2184
	},
	{
	"epoch": 0.6485765124555161,
	"grad_norm": 0.259765625,
	"learning_rate": 5.5293361200959314e-05,
	"loss": 0.6186,
	"step": 2187
	},
	{
	"epoch": 0.6494661921708185,
	"grad_norm": 0.259765625,
	"learning_rate": 5.504277957428052e-05,
	"loss": 0.6337,
	"step": 2190
	},
	{
	"epoch": 0.650355871886121,
	"grad_norm": 0.267578125,
	"learning_rate": 5.4792551249527314e-05,
	"loss": 0.6123,
	"step": 2193
	},
	{
	"epoch": 0.6512455516014235,
	"grad_norm": 0.267578125,
	"learning_rate": 5.454267819315015e-05,
	"loss": 0.6191,
	"step": 2196
	},
	{
	"epoch": 0.652135231316726,
	"grad_norm": 0.275390625,
	"learning_rate": 5.429316236880764e-05,
	"loss": 0.6093,
	"step": 2199
	},
	{
	"epoch": 0.6530249110320284,
	"grad_norm": 0.2734375,
	"learning_rate": 5.4044005737351044e-05,
	"loss": 0.6005,
	"step": 2202
	},
	{
	"epoch": 0.6539145907473309,
	"grad_norm": 0.2734375,
	"learning_rate": 5.379521025680878e-05,
	"loss": 0.6154,
	"step": 2205
	},
	{
	"epoch": 0.6548042704626335,
	"grad_norm": 0.265625,
	"learning_rate": 5.3546777882371254e-05,
	"loss": 0.6227,
	"step": 2208
	},
	{
	"epoch": 0.655693950177936,
	"grad_norm": 0.263671875,
	"learning_rate": 5.329871056637524e-05,
	"loss": 0.6065,
	"step": 2211
	},
	{
	"epoch": 0.6565836298932385,
	"grad_norm": 0.263671875,
	"learning_rate": 5.305101025828863e-05,
	"loss": 0.6088,
	"step": 2214
	},
	{
	"epoch": 0.6574733096085409,
	"grad_norm": 0.275390625,
	"learning_rate": 5.280367890469529e-05,
	"loss": 0.5995,
	"step": 2217
	},
	{
	"epoch": 0.6583629893238434,
	"grad_norm": 0.271484375,
	"learning_rate": 5.255671844927944e-05,
	"loss": 0.6172,
	"step": 2220
	},
	{
	"epoch": 0.6592526690391459,
	"grad_norm": 0.275390625,
	"learning_rate": 5.231013083281067e-05,
	"loss": 0.603,
	"step": 2223
	},
	{
	"epoch": 0.6601423487544484,
	"grad_norm": 0.265625,
	"learning_rate": 5.2063917993128554e-05,
	"loss": 0.6181,
	"step": 2226
	},
	{
	"epoch": 0.6610320284697508,
	"grad_norm": 0.271484375,
	"learning_rate": 5.1818081865127386e-05,
	"loss": 0.6015,
	"step": 2229
	},
	{
	"epoch": 0.6619217081850534,
	"grad_norm": 0.279296875,
	"learning_rate": 5.157262438074104e-05,
	"loss": 0.6155,
	"step": 2232
	},
	{
	"epoch": 0.6628113879003559,
	"grad_norm": 0.259765625,
	"learning_rate": 5.132754746892776e-05,
	"loss": 0.5928,
	"step": 2235
	},
	{
	"epoch": 0.6637010676156584,
	"grad_norm": 0.25390625,
	"learning_rate": 5.1082853055655076e-05,
	"loss": 0.6114,
	"step": 2238
	},
	{
	"epoch": 0.6645907473309609,
	"grad_norm": 0.275390625,
	"learning_rate": 5.0838543063884515e-05,
	"loss": 0.6169,
	"step": 2241
	},
	{
	"epoch": 0.6654804270462633,
	"grad_norm": 0.267578125,
	"learning_rate": 5.059461941355666e-05,
	"loss": 0.5962,
	"step": 2244
	},
	{
	"epoch": 0.6663701067615658,
	"grad_norm": 0.28125,
	"learning_rate": 5.035108402157598e-05,
	"loss": 0.6391,
	"step": 2247
	},
	{
	"epoch": 0.6672597864768683,
	"grad_norm": 0.275390625,
	"learning_rate": 5.0107938801795695e-05,
	"loss": 0.6067,
	"step": 2250
	},
	{
	"epoch": 0.6681494661921709,
	"grad_norm": 0.263671875,
	"learning_rate": 4.986518566500287e-05,
	"loss": 0.6251,
	"step": 2253
	},
	{
	"epoch": 0.6690391459074733,
	"grad_norm": 0.26953125,
	"learning_rate": 4.962282651890325e-05,
	"loss": 0.5799,
	"step": 2256
	},
	{
	"epoch": 0.6699288256227758,
	"grad_norm": 0.259765625,
	"learning_rate": 4.938086326810651e-05,
	"loss": 0.6,
	"step": 2259
	},
	{
	"epoch": 0.6708185053380783,
	"grad_norm": 0.2578125,
	"learning_rate": 4.913929781411098e-05,
	"loss": 0.5815,
	"step": 2262
	},
	{
	"epoch": 0.6717081850533808,
	"grad_norm": 0.263671875,
	"learning_rate": 4.889813205528895e-05,
	"loss": 0.5998,
	"step": 2265
	},
	{
	"epoch": 0.6725978647686833,
	"grad_norm": 0.259765625,
	"learning_rate": 4.865736788687164e-05,
	"loss": 0.6064,
	"step": 2268
	},
	{
	"epoch": 0.6734875444839857,
	"grad_norm": 0.25390625,
	"learning_rate": 4.8417007200934294e-05,
	"loss": 0.5976,
	"step": 2271
	},
	{
	"epoch": 0.6743772241992882,
	"grad_norm": 0.26171875,
	"learning_rate": 4.8177051886381344e-05,
	"loss": 0.5906,
	"step": 2274
	},
	{
	"epoch": 0.6752669039145908,
	"grad_norm": 0.27734375,
	"learning_rate": 4.793750382893151e-05,
	"loss": 0.6096,
	"step": 2277
	},
	{
	"epoch": 0.6761565836298933,
	"grad_norm": 0.263671875,
	"learning_rate": 4.769836491110314e-05,
	"loss": 0.6067,
	"step": 2280
	},
	{
	"epoch": 0.6770462633451957,
	"grad_norm": 0.271484375,
	"learning_rate": 4.74596370121993e-05,
	"loss": 0.6047,
	"step": 2283
	},
	{
	"epoch": 0.6779359430604982,
	"grad_norm": 0.259765625,
	"learning_rate": 4.7221322008292915e-05,
	"loss": 0.5876,
	"step": 2286
	},
	{
	"epoch": 0.6788256227758007,
	"grad_norm": 0.259765625,
	"learning_rate": 4.698342177221219e-05,
	"loss": 0.6129,
	"step": 2289
	},
	{
	"epoch": 0.6797153024911032,
	"grad_norm": 0.26171875,
	"learning_rate": 4.674593817352575e-05,
	"loss": 0.6128,
	"step": 2292
	},
	{
	"epoch": 0.6806049822064056,
	"grad_norm": 0.26953125,
	"learning_rate": 4.650887307852818e-05,
	"loss": 0.6152,
	"step": 2295
	},
	{
	"epoch": 0.6814946619217082,
	"grad_norm": 0.26171875,
	"learning_rate": 4.627222835022502e-05,
	"loss": 0.6167,
	"step": 2298
	},
	{
	"epoch": 0.6823843416370107,
	"grad_norm": 0.2578125,
	"learning_rate": 4.603600584831844e-05,
	"loss": 0.6155,
	"step": 2301
	},
	{
	"epoch": 0.6832740213523132,
	"grad_norm": 0.265625,
	"learning_rate": 4.580020742919246e-05,
	"loss": 0.6212,
	"step": 2304
	},
	{
	"epoch": 0.6841637010676157,
	"grad_norm": 0.2578125,
	"learning_rate": 4.556483494589836e-05,
	"loss": 0.6115,
	"step": 2307
	},
	{
	"epoch": 0.6850533807829181,
	"grad_norm": 0.267578125,
	"learning_rate": 4.532989024814015e-05,
	"loss": 0.5939,
	"step": 2310
	},
	{
	"epoch": 0.6859430604982206,
	"grad_norm": 0.26171875,
	"learning_rate": 4.5095375182260016e-05,
	"loss": 0.5834,
	"step": 2313
	},
	{
	"epoch": 0.6868327402135231,
	"grad_norm": 0.26171875,
	"learning_rate": 4.486129159122393e-05,
	"loss": 0.5859,
	"step": 2316
	},
	{
	"epoch": 0.6877224199288257,
	"grad_norm": 0.26953125,
	"learning_rate": 4.462764131460694e-05,
	"loss": 0.6076,
	"step": 2319
	},
	{
	"epoch": 0.6886120996441281,
	"grad_norm": 0.263671875,
	"learning_rate": 4.439442618857891e-05,
	"loss": 0.5954,
	"step": 2322
	},
	{
	"epoch": 0.6895017793594306,
	"grad_norm": 0.26171875,
	"learning_rate": 4.416164804589005e-05,
	"loss": 0.6072,
	"step": 2325
	},
	{
	"epoch": 0.6903914590747331,
	"grad_norm": 0.265625,
	"learning_rate": 4.39293087158564e-05,
	"loss": 0.6083,
	"step": 2328
	},
	{
	"epoch": 0.6912811387900356,
	"grad_norm": 0.25390625,
	"learning_rate": 4.369741002434556e-05,
	"loss": 0.5948,
	"step": 2331
	},
	{
	"epoch": 0.6921708185053381,
	"grad_norm": 0.251953125,
	"learning_rate": 4.346595379376232e-05,
	"loss": 0.6148,
	"step": 2334
	},
	{
	"epoch": 0.6930604982206405,
	"grad_norm": 0.267578125,
	"learning_rate": 4.323494184303435e-05,
	"loss": 0.6134,
	"step": 2337
	},
	{
	"epoch": 0.693950177935943,
	"grad_norm": 0.24609375,
	"learning_rate": 4.3004375987597946e-05,
	"loss": 0.5801,
	"step": 2340
	},
	{
	"epoch": 0.6948398576512456,
	"grad_norm": 0.263671875,
	"learning_rate": 4.277425803938356e-05,
	"loss": 0.615,
	"step": 2343
	},
	{
	"epoch": 0.6957295373665481,
	"grad_norm": 0.271484375,
	"learning_rate": 4.254458980680188e-05,
	"loss": 0.6239,
	"step": 2346
	},
	{
	"epoch": 0.6966192170818505,
	"grad_norm": 0.279296875,
	"learning_rate": 4.2315373094729316e-05,
	"loss": 0.5997,
	"step": 2349
	},
	{
	"epoch": 0.697508896797153,
	"grad_norm": 0.267578125,
	"learning_rate": 4.2086609704494015e-05,
	"loss": 0.5897,
	"step": 2352
	},
	{
	"epoch": 0.6983985765124555,
	"grad_norm": 0.26171875,
	"learning_rate": 4.1858301433861566e-05,
	"loss": 0.5926,
	"step": 2355
	},
	{
	"epoch": 0.699288256227758,
	"grad_norm": 0.263671875,
	"learning_rate": 4.163045007702104e-05,
	"loss": 0.5991,
	"step": 2358
	},
	{
	"epoch": 0.7001779359430605,
	"grad_norm": 0.263671875,
	"learning_rate": 4.14030574245708e-05,
	"loss": 0.6193,
	"step": 2361
	},
	{
	"epoch": 0.701067615658363,
	"grad_norm": 0.2734375,
	"learning_rate": 4.117612526350428e-05,
	"loss": 0.6146,
	"step": 2364
	},
	{
	"epoch": 0.7019572953736655,
	"grad_norm": 0.26171875,
	"learning_rate": 4.09496553771963e-05,
	"loss": 0.6032,
	"step": 2367
	},
	{
	"epoch": 0.702846975088968,
	"grad_norm": 0.263671875,
	"learning_rate": 4.0723649545388575e-05,
	"loss": 0.5999,
	"step": 2370
	},
	{
	"epoch": 0.7037366548042705,
	"grad_norm": 0.267578125,
	"learning_rate": 4.0498109544176245e-05,
	"loss": 0.5979,
	"step": 2373
	},
	{
	"epoch": 0.7046263345195729,
	"grad_norm": 0.251953125,
	"learning_rate": 4.0273037145993454e-05,
	"loss": 0.6016,
	"step": 2376
	},
	{
	"epoch": 0.7055160142348754,
	"grad_norm": 0.26171875,
	"learning_rate": 4.0048434119599765e-05,
	"loss": 0.6017,
	"step": 2379
	},
	{
	"epoch": 0.7064056939501779,
	"grad_norm": 0.267578125,
	"learning_rate": 3.982430223006613e-05,
	"loss": 0.5984,
	"step": 2382
	},
	{
	"epoch": 0.7072953736654805,
	"grad_norm": 0.287109375,
	"learning_rate": 3.960064323876093e-05,
	"loss": 0.5982,
	"step": 2385
	},
	{
	"epoch": 0.708185053380783,
	"grad_norm": 0.2578125,
	"learning_rate": 3.937745890333623e-05,
	"loss": 0.5984,
	"step": 2388
	},
	{
	"epoch": 0.7090747330960854,
	"grad_norm": 0.267578125,
	"learning_rate": 3.915475097771396e-05,
	"loss": 0.6207,
	"step": 2391
	},
	{
	"epoch": 0.7099644128113879,
	"grad_norm": 0.26953125,
	"learning_rate": 3.8932521212072206e-05,
	"loss": 0.6029,
	"step": 2394
	},
	{
	"epoch": 0.7108540925266904,
	"grad_norm": 0.255859375,
	"learning_rate": 3.871077135283123e-05,
	"loss": 0.5887,
	"step": 2397
	},
	{
	"epoch": 0.7117437722419929,
	"grad_norm": 0.267578125,
	"learning_rate": 3.8489503142640016e-05,
	"loss": 0.6058,
	"step": 2400
	},
	{
	"epoch": 0.7126334519572953,
	"grad_norm": 0.26171875,
	"learning_rate": 3.826871832036242e-05,
	"loss": 0.606,
	"step": 2403
	},
	{
	"epoch": 0.7135231316725978,
	"grad_norm": 0.2578125,
	"learning_rate": 3.804841862106347e-05,
	"loss": 0.5846,
	"step": 2406
	},
	{
	"epoch": 0.7144128113879004,
	"grad_norm": 0.287109375,
	"learning_rate": 3.782860577599585e-05,
	"loss": 0.6075,
	"step": 2409
	},
	{
	"epoch": 0.7153024911032029,
	"grad_norm": 0.26953125,
	"learning_rate": 3.7609281512586203e-05,
	"loss": 0.6038,
	"step": 2412
	},
	{
	"epoch": 0.7161921708185054,
	"grad_norm": 0.267578125,
	"learning_rate": 3.739044755442162e-05,
	"loss": 0.6201,
	"step": 2415
	},
	{
	"epoch": 0.7170818505338078,
	"grad_norm": 0.259765625,
	"learning_rate": 3.717210562123613e-05,
	"loss": 0.6062,
	"step": 2418
	},
	{
	"epoch": 0.7179715302491103,
	"grad_norm": 0.2734375,
	"learning_rate": 3.695425742889698e-05,
	"loss": 0.6108,
	"step": 2421
	},
	{
	"epoch": 0.7188612099644128,
	"grad_norm": 0.26171875,
	"learning_rate": 3.6736904689391417e-05,
	"loss": 0.6307,
	"step": 2424
	},
	{
	"epoch": 0.7197508896797153,
	"grad_norm": 0.271484375,
	"learning_rate": 3.6520049110813035e-05,
	"loss": 0.6057,
	"step": 2427
	},
	{
	"epoch": 0.7206405693950177,
	"grad_norm": 0.2734375,
	"learning_rate": 3.6303692397348455e-05,
	"loss": 0.6262,
	"step": 2430
	},
	{
	"epoch": 0.7215302491103203,
	"grad_norm": 0.2578125,
	"learning_rate": 3.6087836249263875e-05,
	"loss": 0.599,
	"step": 2433
	},
	{
	"epoch": 0.7224199288256228,
	"grad_norm": 0.255859375,
	"learning_rate": 3.58724823628918e-05,
	"loss": 0.5879,
	"step": 2436
	},
	{
	"epoch": 0.7233096085409253,
	"grad_norm": 0.25390625,
	"learning_rate": 3.5657632430617635e-05,
	"loss": 0.6095,
	"step": 2439
	},
	{
	"epoch": 0.7241992882562278,
	"grad_norm": 0.267578125,
	"learning_rate": 3.5443288140866316e-05,
	"loss": 0.593,
	"step": 2442
	},
	{
	"epoch": 0.7250889679715302,
	"grad_norm": 0.2578125,
	"learning_rate": 3.522945117808929e-05,
	"loss": 0.5932,
	"step": 2445
	},
	{
	"epoch": 0.7259786476868327,
	"grad_norm": 0.263671875,
	"learning_rate": 3.501612322275086e-05,
	"loss": 0.6149,
	"step": 2448
	},
	{
	"epoch": 0.7268683274021353,
	"grad_norm": 0.263671875,
	"learning_rate": 3.48033059513155e-05,
	"loss": 0.591,
	"step": 2451
	},
	{
	"epoch": 0.7277580071174378,
	"grad_norm": 0.259765625,
	"learning_rate": 3.45910010362342e-05,
	"loss": 0.5986,
	"step": 2454
	},
	{
	"epoch": 0.7286476868327402,
	"grad_norm": 0.265625,
	"learning_rate": 3.437921014593167e-05,
	"loss": 0.5983,
	"step": 2457
	},
	{
	"epoch": 0.7295373665480427,
	"grad_norm": 0.2734375,
	"learning_rate": 3.416793494479308e-05,
	"loss": 0.6305,
	"step": 2460
	},
	{
	"epoch": 0.7304270462633452,
	"grad_norm": 0.2578125,
	"learning_rate": 3.3957177093150915e-05,
	"loss": 0.6212,
	"step": 2463
	},
	{
	"epoch": 0.7313167259786477,
	"grad_norm": 0.275390625,
	"learning_rate": 3.374693824727204e-05,
	"loss": 0.6132,
	"step": 2466
	},
	{
	"epoch": 0.7322064056939501,
	"grad_norm": 0.251953125,
	"learning_rate": 3.353722005934463e-05,
	"loss": 0.5886,
	"step": 2469
	},
	{
	"epoch": 0.7330960854092526,
	"grad_norm": 0.296875,
	"learning_rate": 3.332802417746527e-05,
	"loss": 0.6087,
	"step": 2472
	},
	{
	"epoch": 0.7339857651245552,
	"grad_norm": 0.267578125,
	"learning_rate": 3.311935224562591e-05,
	"loss": 0.6045,
	"step": 2475
	},
	{
	"epoch": 0.7348754448398577,
	"grad_norm": 0.259765625,
	"learning_rate": 3.291120590370091e-05,
	"loss": 0.5976,
	"step": 2478
	},
	{
	"epoch": 0.7357651245551602,
	"grad_norm": 0.267578125,
	"learning_rate": 3.270358678743434e-05,
	"loss": 0.6191,
	"step": 2481
	},
	{
	"epoch": 0.7366548042704626,
	"grad_norm": 0.25390625,
	"learning_rate": 3.249649652842687e-05,
	"loss": 0.5864,
	"step": 2484
	},
	{
	"epoch": 0.7375444839857651,
	"grad_norm": 0.265625,
	"learning_rate": 3.228993675412315e-05,
	"loss": 0.6063,
	"step": 2487
	},
	{
	"epoch": 0.7384341637010676,
	"grad_norm": 0.259765625,
	"learning_rate": 3.20839090877989e-05,
	"loss": 0.5998,
	"step": 2490
	},
	{
	"epoch": 0.7393238434163701,
	"grad_norm": 0.248046875,
	"learning_rate": 3.187841514854829e-05,
	"loss": 0.5807,
	"step": 2493
	},
	{
	"epoch": 0.7402135231316725,
	"grad_norm": 0.263671875,
	"learning_rate": 3.1673456551271086e-05,
	"loss": 0.5983,
	"step": 2496
	},
	{
	"epoch": 0.7411032028469751,
	"grad_norm": 0.259765625,
	"learning_rate": 3.1469034906659946e-05,
	"loss": 0.6053,
	"step": 2499
	},
	{
	"epoch": 0.7419928825622776,
	"grad_norm": 0.267578125,
	"learning_rate": 3.126515182118793e-05,
	"loss": 0.5994,
	"step": 2502
	},
	{
	"epoch": 0.7428825622775801,
	"grad_norm": 0.263671875,
	"learning_rate": 3.106180889709567e-05,
	"loss": 0.5969,
	"step": 2505
	},
	{
	"epoch": 0.7437722419928826,
	"grad_norm": 0.259765625,
	"learning_rate": 3.0859007732378896e-05,
	"loss": 0.5936,
	"step": 2508
	},
	{
	"epoch": 0.744661921708185,
	"grad_norm": 0.251953125,
	"learning_rate": 3.065674992077584e-05,
	"loss": 0.5717,
	"step": 2511
	},
	{
	"epoch": 0.7455516014234875,
	"grad_norm": 0.2578125,
	"learning_rate": 3.0455037051754777e-05,
	"loss": 0.6061,
	"step": 2514
	},
	{
	"epoch": 0.74644128113879,
	"grad_norm": 0.251953125,
	"learning_rate": 3.0253870710501475e-05,
	"loss": 0.5914,
	"step": 2517
	},
	{
	"epoch": 0.7473309608540926,
	"grad_norm": 0.251953125,
	"learning_rate": 3.005325247790668e-05,
	"loss": 0.6067,
	"step": 2520
	},
	{
	"epoch": 0.748220640569395,
	"grad_norm": 0.271484375,
	"learning_rate": 2.9853183930553853e-05,
	"loss": 0.5909,
	"step": 2523
	},
	{
	"epoch": 0.7491103202846975,
	"grad_norm": 0.25,
	"learning_rate": 2.965366664070661e-05,
	"loss": 0.5847,
	"step": 2526
	},
	{
	"epoch": 0.75,
	"grad_norm": 0.26953125,
	"learning_rate": 2.9454702176296423e-05,
	"loss": 0.5907,
	"step": 2529
	},
	{
	"epoch": 0.7508896797153025,
	"grad_norm": 0.26953125,
	"learning_rate": 2.925629210091043e-05,
	"loss": 0.606,
	"step": 2532
	},
	{
	"epoch": 0.751779359430605,
	"grad_norm": 0.267578125,
	"learning_rate": 2.9058437973778896e-05,
	"loss": 0.6055,
	"step": 2535
	},
	{
	"epoch": 0.7526690391459074,
	"grad_norm": 0.2578125,
	"learning_rate": 2.886114134976322e-05,
	"loss": 0.5993,
	"step": 2538
	},
	{
	"epoch": 0.75355871886121,
	"grad_norm": 0.271484375,
	"learning_rate": 2.866440377934352e-05,
	"loss": 0.6098,
	"step": 2541
	},
	{
	"epoch": 0.7544483985765125,
	"grad_norm": 0.26171875,
	"learning_rate": 2.8468226808606522e-05,
	"loss": 0.584,
	"step": 2544
	},
	{
	"epoch": 0.755338078291815,
	"grad_norm": 0.255859375,
	"learning_rate": 2.827261197923341e-05,
	"loss": 0.5949,
	"step": 2547
	},
	{
	"epoch": 0.7562277580071174,
	"grad_norm": 0.271484375,
	"learning_rate": 2.8077560828487748e-05,
	"loss": 0.5698,
	"step": 2550
	},
	{
	"epoch": 0.7571174377224199,
	"grad_norm": 0.26171875,
	"learning_rate": 2.7883074889203363e-05,
	"loss": 0.612,
	"step": 2553
	},
	{
	"epoch": 0.7580071174377224,
	"grad_norm": 0.26953125,
	"learning_rate": 2.7689155689772217e-05,
	"loss": 0.5951,
	"step": 2556
	},
	{
	"epoch": 0.7588967971530249,
	"grad_norm": 0.259765625,
	"learning_rate": 2.7495804754132602e-05,
	"loss": 0.5841,
	"step": 2559
	},
	{
	"epoch": 0.7597864768683275,
	"grad_norm": 0.26953125,
	"learning_rate": 2.7303023601756928e-05,
	"loss": 0.5978,
	"step": 2562
	},
	{
	"epoch": 0.7606761565836299,
	"grad_norm": 0.251953125,
	"learning_rate": 2.711081374763993e-05,
	"loss": 0.5994,
	"step": 2565
	},
	{
	"epoch": 0.7615658362989324,
	"grad_norm": 0.263671875,
	"learning_rate": 2.6919176702286698e-05,
	"loss": 0.6014,
	"step": 2568
	},
	{
	"epoch": 0.7624555160142349,
	"grad_norm": 0.271484375,
	"learning_rate": 2.6728113971700908e-05,
	"loss": 0.5958,
	"step": 2571
	},
	{
	"epoch": 0.7633451957295374,
	"grad_norm": 0.28125,
	"learning_rate": 2.653762705737287e-05,
	"loss": 0.6242,
	"step": 2574
	},
	{
	"epoch": 0.7642348754448398,
	"grad_norm": 0.26171875,
	"learning_rate": 2.634771745626772e-05,
	"loss": 0.616,
	"step": 2577
	},
	{
	"epoch": 0.7651245551601423,
	"grad_norm": 0.25390625,
	"learning_rate": 2.6158386660813806e-05,
	"loss": 0.5959,
	"step": 2580
	},
	{
	"epoch": 0.7660142348754448,
	"grad_norm": 0.26953125,
	"learning_rate": 2.5969636158890775e-05,
	"loss": 0.5971,
	"step": 2583
	},
	{
	"epoch": 0.7669039145907474,
	"grad_norm": 0.26171875,
	"learning_rate": 2.5781467433817973e-05,
	"loss": 0.593,
	"step": 2586
	},
	{
	"epoch": 0.7677935943060499,
	"grad_norm": 0.2578125,
	"learning_rate": 2.5593881964342857e-05,
	"loss": 0.5841,
	"step": 2589
	},
	{
	"epoch": 0.7686832740213523,
	"grad_norm": 0.251953125,
	"learning_rate": 2.5406881224629174e-05,
	"loss": 0.6111,
	"step": 2592
	},
	{
	"epoch": 0.7695729537366548,
	"grad_norm": 0.263671875,
	"learning_rate": 2.5220466684245646e-05,
	"loss": 0.5758,
	"step": 2595
	},
	{
	"epoch": 0.7704626334519573,
	"grad_norm": 0.263671875,
	"learning_rate": 2.5034639808154114e-05,
	"loss": 0.6276,
	"step": 2598
	},
	{
	"epoch": 0.7713523131672598,
	"grad_norm": 0.2578125,
	"learning_rate": 2.4849402056698334e-05,
	"loss": 0.6062,
	"step": 2601
	},
	{
	"epoch": 0.7722419928825622,
	"grad_norm": 0.263671875,
	"learning_rate": 2.4664754885592268e-05,
	"loss": 0.5881,
	"step": 2604
	},
	{
	"epoch": 0.7731316725978647,
	"grad_norm": 0.2578125,
	"learning_rate": 2.4480699745908707e-05,
	"loss": 0.6124,
	"step": 2607
	},
	{
	"epoch": 0.7740213523131673,
	"grad_norm": 0.259765625,
	"learning_rate": 2.4297238084067985e-05,
	"loss": 0.5779,
	"step": 2610
	},
	{
	"epoch": 0.7749110320284698,
	"grad_norm": 0.263671875,
	"learning_rate": 2.4114371341826415e-05,
	"loss": 0.6019,
	"step": 2613
	},
	{
	"epoch": 0.7758007117437722,
	"grad_norm": 0.259765625,
	"learning_rate": 2.3932100956265148e-05,
	"loss": 0.6087,
	"step": 2616
	},
	{
	"epoch": 0.7766903914590747,
	"grad_norm": 0.265625,
	"learning_rate": 2.375042835977872e-05,
	"loss": 0.5983,
	"step": 2619
	},
	{
	"epoch": 0.7775800711743772,
	"grad_norm": 0.283203125,
	"learning_rate": 2.3569354980063906e-05,
	"loss": 0.6024,
	"step": 2622
	},
	{
	"epoch": 0.7784697508896797,
	"grad_norm": 0.25,
	"learning_rate": 2.3388882240108423e-05,
	"loss": 0.6039,
	"step": 2625
	},
	{
	"epoch": 0.7793594306049823,
	"grad_norm": 0.26953125,
	"learning_rate": 2.3209011558179826e-05,
	"loss": 0.5958,
	"step": 2628
	},
	{
	"epoch": 0.7802491103202847,
	"grad_norm": 0.26171875,
	"learning_rate": 2.3029744347814365e-05,
	"loss": 0.5979,
	"step": 2631
	},
	{
	"epoch": 0.7811387900355872,
	"grad_norm": 0.25390625,
	"learning_rate": 2.2851082017805703e-05,
	"loss": 0.5918,
	"step": 2634
	},
	{
	"epoch": 0.7820284697508897,
	"grad_norm": 0.26171875,
	"learning_rate": 2.2673025972194106e-05,
	"loss": 0.5906,
	"step": 2637
	},
	{
	"epoch": 0.7829181494661922,
	"grad_norm": 0.255859375,
	"learning_rate": 2.2495577610255203e-05,
	"loss": 0.5857,
	"step": 2640
	},
	{
	"epoch": 0.7838078291814946,
	"grad_norm": 0.26171875,
	"learning_rate": 2.2318738326489074e-05,
	"loss": 0.602,
	"step": 2643
	},
	{
	"epoch": 0.7846975088967971,
	"grad_norm": 0.26171875,
	"learning_rate": 2.2142509510609277e-05,
	"loss": 0.5846,
	"step": 2646
	},
	{
	"epoch": 0.7855871886120996,
	"grad_norm": 0.265625,
	"learning_rate": 2.196689254753196e-05,
	"loss": 0.5983,
	"step": 2649
	},
	{
	"epoch": 0.7864768683274022,
	"grad_norm": 0.263671875,
	"learning_rate": 2.179188881736498e-05,
	"loss": 0.5753,
	"step": 2652
	},
	{
	"epoch": 0.7873665480427047,
	"grad_norm": 0.2578125,
	"learning_rate": 2.1617499695396924e-05,
	"loss": 0.605,
	"step": 2655
	},
	{
	"epoch": 0.7882562277580071,
	"grad_norm": 0.265625,
	"learning_rate": 2.1443726552086528e-05,
	"loss": 0.5982,
	"step": 2658
	},
	{
	"epoch": 0.7891459074733096,
	"grad_norm": 0.25390625,
	"learning_rate": 2.1270570753051668e-05,
	"loss": 0.5972,
	"step": 2661
	},
	{
	"epoch": 0.7900355871886121,
	"grad_norm": 0.25390625,
	"learning_rate": 2.109803365905879e-05,
	"loss": 0.5869,
	"step": 2664
	},
	{
	"epoch": 0.7909252669039146,
	"grad_norm": 0.275390625,
	"learning_rate": 2.0926116626012205e-05,
	"loss": 0.5984,
	"step": 2667
	},
	{
	"epoch": 0.791814946619217,
	"grad_norm": 0.259765625,
	"learning_rate": 2.0754821004943336e-05,
	"loss": 0.6054,
	"step": 2670
	},
	{
	"epoch": 0.7927046263345195,
	"grad_norm": 0.255859375,
	"learning_rate": 2.0584148142000225e-05,
	"loss": 0.5809,
	"step": 2673
	},
	{
	"epoch": 0.7935943060498221,
	"grad_norm": 0.267578125,
	"learning_rate": 2.0414099378436813e-05,
	"loss": 0.6019,
	"step": 2676
	},
	{
	"epoch": 0.7944839857651246,
	"grad_norm": 0.259765625,
	"learning_rate": 2.0244676050602572e-05,
	"loss": 0.5837,
	"step": 2679
	},
	{
	"epoch": 0.7953736654804271,
	"grad_norm": 0.267578125,
	"learning_rate": 2.0075879489931847e-05,
	"loss": 0.6226,
	"step": 2682
	},
	{
	"epoch": 0.7962633451957295,
	"grad_norm": 0.271484375,
	"learning_rate": 1.990771102293344e-05,
	"loss": 0.6255,
	"step": 2685
	},
	{
	"epoch": 0.797153024911032,
	"grad_norm": 0.25390625,
	"learning_rate": 1.9740171971180278e-05,
	"loss": 0.5958,
	"step": 2688
	},
	{
	"epoch": 0.7980427046263345,
	"grad_norm": 0.25,
	"learning_rate": 1.9573263651298836e-05,
	"loss": 0.5902,
	"step": 2691
	},
	{
	"epoch": 0.798932384341637,
	"grad_norm": 0.25,
	"learning_rate": 1.940698737495904e-05,
	"loss": 0.6078,
	"step": 2694
	},
	{
	"epoch": 0.7998220640569395,
	"grad_norm": 0.259765625,
	"learning_rate": 1.9241344448863696e-05,
	"loss": 0.6009,
	"step": 2697
	},
	{
	"epoch": 0.800711743772242,
	"grad_norm": 0.26953125,
	"learning_rate": 1.9076336174738473e-05,
	"loss": 0.5657,
	"step": 2700
	},
	{
	"epoch": 0.8016014234875445,
	"grad_norm": 0.251953125,
	"learning_rate": 1.891196384932139e-05,
	"loss": 0.5824,
	"step": 2703
	},
	{
	"epoch": 0.802491103202847,
	"grad_norm": 0.2578125,
	"learning_rate": 1.8748228764352914e-05,
	"loss": 0.5945,
	"step": 2706
	},
	{
	"epoch": 0.8033807829181495,
	"grad_norm": 0.25390625,
	"learning_rate": 1.858513220656567e-05,
	"loss": 0.5806,
	"step": 2709
	},
	{
	"epoch": 0.8042704626334519,
	"grad_norm": 0.255859375,
	"learning_rate": 1.8422675457674254e-05,
	"loss": 0.5963,
	"step": 2712
	},
	{
	"epoch": 0.8051601423487544,
	"grad_norm": 0.251953125,
	"learning_rate": 1.8260859794365338e-05,
	"loss": 0.5849,
	"step": 2715
	},
	{
	"epoch": 0.806049822064057,
	"grad_norm": 0.275390625,
	"learning_rate": 1.809968648828748e-05,
	"loss": 0.6066,
	"step": 2718
	},
	{
	"epoch": 0.8069395017793595,
	"grad_norm": 0.25,
	"learning_rate": 1.7939156806041203e-05,
	"loss": 0.5969,
	"step": 2721
	},
	{
	"epoch": 0.8078291814946619,
	"grad_norm": 0.259765625,
	"learning_rate": 1.777927200916907e-05,
	"loss": 0.5832,
	"step": 2724
	},
	{
	"epoch": 0.8087188612099644,
	"grad_norm": 0.259765625,
	"learning_rate": 1.762003335414566e-05,
	"loss": 0.5965,
	"step": 2727
	},
	{
	"epoch": 0.8096085409252669,
	"grad_norm": 0.25390625,
	"learning_rate": 1.7461442092367862e-05,
	"loss": 0.5792,
	"step": 2730
	},
	{
	"epoch": 0.8104982206405694,
	"grad_norm": 0.267578125,
	"learning_rate": 1.7303499470144846e-05,
	"loss": 0.5943,
	"step": 2733
	},
	{
	"epoch": 0.8113879003558719,
	"grad_norm": 0.255859375,
	"learning_rate": 1.7146206728688463e-05,
	"loss": 0.6036,
	"step": 2736
	},
	{
	"epoch": 0.8122775800711743,
	"grad_norm": 0.25390625,
	"learning_rate": 1.6989565104103312e-05,
	"loss": 0.5755,
	"step": 2739
	},
	{
	"epoch": 0.8131672597864769,
	"grad_norm": 0.265625,
	"learning_rate": 1.6833575827377134e-05,
	"loss": 0.6149,
	"step": 2742
	},
	{
	"epoch": 0.8140569395017794,
	"grad_norm": 0.26171875,
	"learning_rate": 1.6678240124371157e-05,
	"loss": 0.5944,
	"step": 2745
	},
	{
	"epoch": 0.8149466192170819,
	"grad_norm": 0.263671875,
	"learning_rate": 1.6523559215810337e-05,
	"loss": 0.5925,
	"step": 2748
	},
	{
	"epoch": 0.8158362989323843,
	"grad_norm": 0.26171875,
	"learning_rate": 1.636953431727395e-05,
	"loss": 0.5936,
	"step": 2751
	},
	{
	"epoch": 0.8167259786476868,
	"grad_norm": 0.259765625,
	"learning_rate": 1.6216166639185803e-05,
	"loss": 0.5973,
	"step": 2754
	},
	{
	"epoch": 0.8176156583629893,
	"grad_norm": 0.263671875,
	"learning_rate": 1.6063457386805004e-05,
	"loss": 0.6125,
	"step": 2757
	},
	{
	"epoch": 0.8185053380782918,
	"grad_norm": 0.259765625,
	"learning_rate": 1.5911407760216235e-05,
	"loss": 0.6036,
	"step": 2760
	},
	{
	"epoch": 0.8193950177935944,
	"grad_norm": 0.25,
	"learning_rate": 1.576001895432042e-05,
	"loss": 0.597,
	"step": 2763
	},
	{
	"epoch": 0.8202846975088968,
	"grad_norm": 0.251953125,
	"learning_rate": 1.5609292158825438e-05,
	"loss": 0.5872,
	"step": 2766
	},
	{
	"epoch": 0.8211743772241993,
	"grad_norm": 0.2578125,
	"learning_rate": 1.545922855823656e-05,
	"loss": 0.6095,
	"step": 2769
	},
	{
	"epoch": 0.8220640569395018,
	"grad_norm": 0.255859375,
	"learning_rate": 1.530982933184737e-05,
	"loss": 0.603,
	"step": 2772
	},
	{
	"epoch": 0.8229537366548043,
	"grad_norm": 0.263671875,
	"learning_rate": 1.5161095653730273e-05,
	"loss": 0.5938,
	"step": 2775
	},
	{
	"epoch": 0.8238434163701067,
	"grad_norm": 0.26953125,
	"learning_rate": 1.5013028692727481e-05,
	"loss": 0.6032,
	"step": 2778
	},
	{
	"epoch": 0.8247330960854092,
	"grad_norm": 0.2578125,
	"learning_rate": 1.4865629612441656e-05,
	"loss": 0.5877,
	"step": 2781
	},
	{
	"epoch": 0.8256227758007118,
	"grad_norm": 0.25390625,
	"learning_rate": 1.471889957122684e-05,
	"loss": 0.6057,
	"step": 2784
	},
	{
	"epoch": 0.8265124555160143,
	"grad_norm": 0.259765625,
	"learning_rate": 1.457283972217941e-05,
	"loss": 0.6011,
	"step": 2787
	},
	{
	"epoch": 0.8274021352313167,
	"grad_norm": 0.263671875,
	"learning_rate": 1.4427451213128873e-05,
	"loss": 0.6009,
	"step": 2790
	},
	{
	"epoch": 0.8282918149466192,
	"grad_norm": 0.27734375,
	"learning_rate": 1.4282735186629014e-05,
	"loss": 0.6263,
	"step": 2793
	},
	{
	"epoch": 0.8291814946619217,
	"grad_norm": 0.2578125,
	"learning_rate": 1.4138692779948748e-05,
	"loss": 0.5978,
	"step": 2796
	},
	{
	"epoch": 0.8300711743772242,
	"grad_norm": 0.24609375,
	"learning_rate": 1.3995325125063274e-05,
	"loss": 0.5968,
	"step": 2799
	},
	{
	"epoch": 0.8309608540925267,
	"grad_norm": 0.26171875,
	"learning_rate": 1.3852633348645262e-05,
	"loss": 0.6074,
	"step": 2802
	},
	{
	"epoch": 0.8318505338078291,
	"grad_norm": 0.25390625,
	"learning_rate": 1.3710618572055767e-05,
	"loss": 0.608,
	"step": 2805
	},
	{
	"epoch": 0.8327402135231317,
	"grad_norm": 0.263671875,
	"learning_rate": 1.3569281911335684e-05,
	"loss": 0.5896,
	"step": 2808
	},
	{
	"epoch": 0.8336298932384342,
	"grad_norm": 0.265625,
	"learning_rate": 1.3428624477196761e-05,
	"loss": 0.6042,
	"step": 2811
	},
	{
	"epoch": 0.8345195729537367,
	"grad_norm": 0.255859375,
	"learning_rate": 1.328864737501302e-05,
	"loss": 0.6092,
	"step": 2814
	},
	{
	"epoch": 0.8354092526690391,
	"grad_norm": 0.25,
	"learning_rate": 1.3149351704811962e-05,
	"loss": 0.6081,
	"step": 2817
	},
	{
	"epoch": 0.8362989323843416,
	"grad_norm": 0.2734375,
	"learning_rate": 1.3010738561265979e-05,
	"loss": 0.5918,
	"step": 2820
	},
	{
	"epoch": 0.8371886120996441,
	"grad_norm": 0.259765625,
	"learning_rate": 1.2872809033683798e-05,
	"loss": 0.595,
	"step": 2823
	},
	{
	"epoch": 0.8380782918149466,
	"grad_norm": 0.267578125,
	"learning_rate": 1.2735564206001749e-05,
	"loss": 0.5856,
	"step": 2826
	},
	{
	"epoch": 0.8389679715302492,
	"grad_norm": 0.248046875,
	"learning_rate": 1.2599005156775512e-05,
	"loss": 0.5715,
	"step": 2829
	},
	{
	"epoch": 0.8398576512455516,
	"grad_norm": 0.255859375,
	"learning_rate": 1.2463132959171341e-05,
	"loss": 0.607,
	"step": 2832
	},
	{
	"epoch": 0.8407473309608541,
	"grad_norm": 0.259765625,
	"learning_rate": 1.2327948680957924e-05,
	"loss": 0.5798,
	"step": 2835
	},
	{
	"epoch": 0.8416370106761566,
	"grad_norm": 0.251953125,
	"learning_rate": 1.2193453384497722e-05,
	"loss": 0.6194,
	"step": 2838
	},
	{
	"epoch": 0.8425266903914591,
	"grad_norm": 0.263671875,
	"learning_rate": 1.205964812673881e-05,
	"loss": 0.597,
	"step": 2841
	},
	{
	"epoch": 0.8434163701067615,
	"grad_norm": 0.27734375,
	"learning_rate": 1.192653395920652e-05,
	"loss": 0.6119,
	"step": 2844
	},
	{
	"epoch": 0.844306049822064,
	"grad_norm": 0.259765625,
	"learning_rate": 1.179411192799511e-05,
	"loss": 0.5891,
	"step": 2847
	},
	{
	"epoch": 0.8451957295373665,
	"grad_norm": 0.251953125,
	"learning_rate": 1.1662383073759685e-05,
	"loss": 0.6034,
	"step": 2850
	},
	{
	"epoch": 0.8460854092526691,
	"grad_norm": 0.25390625,
	"learning_rate": 1.1531348431707823e-05,
	"loss": 0.5686,
	"step": 2853
	},
	{
	"epoch": 0.8469750889679716,
	"grad_norm": 0.263671875,
	"learning_rate": 1.1401009031591658e-05,
	"loss": 0.5882,
	"step": 2856
	},
	{
	"epoch": 0.847864768683274,
	"grad_norm": 0.255859375,
	"learning_rate": 1.1271365897699615e-05,
	"loss": 0.5835,
	"step": 2859
	},
	{
	"epoch": 0.8487544483985765,
	"grad_norm": 0.26171875,
	"learning_rate": 1.114242004884839e-05,
	"loss": 0.6015,
	"step": 2862
	},
	{
	"epoch": 0.849644128113879,
	"grad_norm": 0.265625,
	"learning_rate": 1.1014172498375086e-05,
	"loss": 0.6059,
	"step": 2865
	},
	{
	"epoch": 0.8505338078291815,
	"grad_norm": 0.25390625,
	"learning_rate": 1.088662425412903e-05,
	"loss": 0.5979,
	"step": 2868
	},
	{
	"epoch": 0.8514234875444839,
	"grad_norm": 0.26171875,
	"learning_rate": 1.0759776318464043e-05,
	"loss": 0.6005,
	"step": 2871
	},
	{
	"epoch": 0.8523131672597865,
	"grad_norm": 0.271484375,
	"learning_rate": 1.0633629688230452e-05,
	"loss": 0.595,
	"step": 2874
	},
	{
	"epoch": 0.853202846975089,
	"grad_norm": 0.2578125,
	"learning_rate": 1.0508185354767264e-05,
	"loss": 0.5866,
	"step": 2877
	},
	{
	"epoch": 0.8540925266903915,
	"grad_norm": 0.251953125,
	"learning_rate": 1.0383444303894452e-05,
	"loss": 0.6049,
	"step": 2880
	},
	{
	"epoch": 0.854982206405694,
	"grad_norm": 0.271484375,
	"learning_rate": 1.0259407515905094e-05,
	"loss": 0.6029,
	"step": 2883
	},
	{
	"epoch": 0.8558718861209964,
	"grad_norm": 0.255859375,
	"learning_rate": 1.0136075965557811e-05,
	"loss": 0.6028,
	"step": 2886
	},
	{
	"epoch": 0.8567615658362989,
	"grad_norm": 0.2490234375,
	"learning_rate": 1.0013450622068921e-05,
	"loss": 0.6034,
	"step": 2889
	},
	{
	"epoch": 0.8576512455516014,
	"grad_norm": 0.24609375,
	"learning_rate": 9.891532449105045e-06,
	"loss": 0.6008,
	"step": 2892
	},
	{
	"epoch": 0.858540925266904,
	"grad_norm": 0.251953125,
	"learning_rate": 9.770322404775323e-06,
	"loss": 0.5886,
	"step": 2895
	},
	{
	"epoch": 0.8594306049822064,
	"grad_norm": 0.2578125,
	"learning_rate": 9.649821441623986e-06,
	"loss": 0.5991,
	"step": 2898
	},
	{
	"epoch": 0.8603202846975089,
	"grad_norm": 0.263671875,
	"learning_rate": 9.530030506622934e-06,
	"loss": 0.6189,
	"step": 2901
	},
	{
	"epoch": 0.8612099644128114,
	"grad_norm": 0.259765625,
	"learning_rate": 9.410950541164143e-06,
	"loss": 0.6034,
	"step": 2904
	},
	{
	"epoch": 0.8620996441281139,
	"grad_norm": 0.2490234375,
	"learning_rate": 9.292582481052403e-06,
	"loss": 0.5953,
	"step": 2907
	},
	{
	"epoch": 0.8629893238434164,
	"grad_norm": 0.26171875,
	"learning_rate": 9.174927256497844e-06,
	"loss": 0.5974,
	"step": 2910
	},
	{
	"epoch": 0.8638790035587188,
	"grad_norm": 0.255859375,
	"learning_rate": 9.05798579210878e-06,
	"loss": 0.6074,
	"step": 2913
	},
	{
	"epoch": 0.8647686832740213,
	"grad_norm": 0.2578125,
	"learning_rate": 8.941759006884265e-06,
	"loss": 0.5955,
	"step": 2916
	},
	{
	"epoch": 0.8656583629893239,
	"grad_norm": 0.271484375,
	"learning_rate": 8.826247814206967e-06,
	"loss": 0.6182,
	"step": 2919
	},
	{
	"epoch": 0.8665480427046264,
	"grad_norm": 0.275390625,
	"learning_rate": 8.711453121836066e-06,
	"loss": 0.5978,
	"step": 2922
	},
	{
	"epoch": 0.8674377224199288,
	"grad_norm": 0.25,
	"learning_rate": 8.597375831899913e-06,
	"loss": 0.5886,
	"step": 2925
	},
	{
	"epoch": 0.8683274021352313,
	"grad_norm": 0.259765625,
	"learning_rate": 8.484016840889176e-06,
	"loss": 0.6046,
	"step": 2928
	},
	{
	"epoch": 0.8692170818505338,
	"grad_norm": 0.2578125,
	"learning_rate": 8.371377039649586e-06,
	"loss": 0.6104,
	"step": 2931
	},
	{
	"epoch": 0.8701067615658363,
	"grad_norm": 0.26953125,
	"learning_rate": 8.259457313375096e-06,
	"loss": 0.6054,
	"step": 2934
	},
	{
	"epoch": 0.8709964412811388,
	"grad_norm": 0.2734375,
	"learning_rate": 8.14825854160085e-06,
	"loss": 0.5899,
	"step": 2937
	},
	{
	"epoch": 0.8718861209964412,
	"grad_norm": 0.2578125,
	"learning_rate": 8.037781598196225e-06,
	"loss": 0.5991,
	"step": 2940
	},
	{
	"epoch": 0.8727758007117438,
	"grad_norm": 0.259765625,
	"learning_rate": 7.928027351358114e-06,
	"loss": 0.5856,
	"step": 2943
	},
	{
	"epoch": 0.8736654804270463,
	"grad_norm": 0.263671875,
	"learning_rate": 7.818996663603917e-06,
	"loss": 0.6008,
	"step": 2946
	},
	{
	"epoch": 0.8745551601423488,
	"grad_norm": 0.251953125,
	"learning_rate": 7.71069039176493e-06,
	"loss": 0.5707,
	"step": 2949
	},
	{
	"epoch": 0.8754448398576512,
	"grad_norm": 0.271484375,
	"learning_rate": 7.603109386979501e-06,
	"loss": 0.588,
	"step": 2952
	},
	{
	"epoch": 0.8763345195729537,
	"grad_norm": 0.306640625,
	"learning_rate": 7.496254494686339e-06,
	"loss": 0.6,
	"step": 2955
	},
	{
	"epoch": 0.8772241992882562,
	"grad_norm": 0.2470703125,
	"learning_rate": 7.390126554617982e-06,
	"loss": 0.5762,
	"step": 2958
	},
	{
	"epoch": 0.8781138790035588,
	"grad_norm": 0.265625,
	"learning_rate": 7.284726400794073e-06,
	"loss": 0.5991,
	"step": 2961
	},
	{
	"epoch": 0.8790035587188612,
	"grad_norm": 0.271484375,
	"learning_rate": 7.180054861514885e-06,
	"loss": 0.5988,
	"step": 2964
	},
	{
	"epoch": 0.8798932384341637,
	"grad_norm": 0.271484375,
	"learning_rate": 7.076112759354736e-06,
	"loss": 0.5755,
	"step": 2967
	},
	{
	"epoch": 0.8807829181494662,
	"grad_norm": 0.2734375,
	"learning_rate": 6.972900911155655e-06,
	"loss": 0.609,
	"step": 2970
	},
	{
	"epoch": 0.8816725978647687,
	"grad_norm": 0.267578125,
	"learning_rate": 6.8704201280207935e-06,
	"loss": 0.5757,
	"step": 2973
	},
	{
	"epoch": 0.8825622775800712,
	"grad_norm": 0.259765625,
	"learning_rate": 6.7686712153081645e-06,
	"loss": 0.6276,
	"step": 2976
	},
	{
	"epoch": 0.8834519572953736,
	"grad_norm": 0.271484375,
	"learning_rate": 6.667654972624315e-06,
	"loss": 0.6059,
	"step": 2979
	},
	{
	"epoch": 0.8843416370106761,
	"grad_norm": 0.25390625,
	"learning_rate": 6.567372193817966e-06,
	"loss": 0.5917,
	"step": 2982
	},
	{
	"epoch": 0.8852313167259787,
	"grad_norm": 0.259765625,
	"learning_rate": 6.467823666973871e-06,
	"loss": 0.5702,
	"step": 2985
	},
	{
	"epoch": 0.8861209964412812,
	"grad_norm": 0.267578125,
	"learning_rate": 6.369010174406531e-06,
	"loss": 0.607,
	"step": 2988
	},
	{
	"epoch": 0.8870106761565836,
	"grad_norm": 0.255859375,
	"learning_rate": 6.270932492654125e-06,
	"loss": 0.5965,
	"step": 2991
	},
	{
	"epoch": 0.8879003558718861,
	"grad_norm": 0.2734375,
	"learning_rate": 6.173591392472333e-06,
	"loss": 0.587,
	"step": 2994
	},
	{
	"epoch": 0.8887900355871886,
	"grad_norm": 0.2578125,
	"learning_rate": 6.076987638828335e-06,
	"loss": 0.6043,
	"step": 2997
	},
	{
	"epoch": 0.8896797153024911,
	"grad_norm": 0.259765625,
	"learning_rate": 5.981121990894789e-06,
	"loss": 0.5967,
	"step": 3000
	},
	{
	"epoch": 0.8905693950177936,
	"grad_norm": 0.25390625,
	"learning_rate": 5.885995202043848e-06,
	"loss": 0.584,
	"step": 3003
	},
	{
	"epoch": 0.891459074733096,
	"grad_norm": 0.251953125,
	"learning_rate": 5.791608019841244e-06,
	"loss": 0.5836,
	"step": 3006
	},
	{
	"epoch": 0.8923487544483986,
	"grad_norm": 0.267578125,
	"learning_rate": 5.697961186040435e-06,
	"loss": 0.6029,
	"step": 3009
	},
	{
	"epoch": 0.8932384341637011,
	"grad_norm": 0.255859375,
	"learning_rate": 5.605055436576745e-06,
	"loss": 0.588,
	"step": 3012
	},
	{
	"epoch": 0.8941281138790036,
	"grad_norm": 0.26171875,
	"learning_rate": 5.51289150156159e-06,
	"loss": 0.596,
	"step": 3015
	},
	{
	"epoch": 0.895017793594306,
	"grad_norm": 0.2578125,
	"learning_rate": 5.421470105276749e-06,
	"loss": 0.5913,
	"step": 3018
	},
	{
	"epoch": 0.8959074733096085,
	"grad_norm": 0.2578125,
	"learning_rate": 5.33079196616868e-06,
	"loss": 0.5799,
	"step": 3021
	},
	{
	"epoch": 0.896797153024911,
	"grad_norm": 0.25390625,
	"learning_rate": 5.240857796842846e-06,
	"loss": 0.6145,
	"step": 3024
	},
	{
	"epoch": 0.8976868327402135,
	"grad_norm": 0.25390625,
	"learning_rate": 5.151668304058132e-06,
	"loss": 0.6146,
	"step": 3027
	},
	{
	"epoch": 0.8985765124555161,
	"grad_norm": 0.26171875,
	"learning_rate": 5.0632241887213275e-06,
	"loss": 0.6113,
	"step": 3030
	},
	{
	"epoch": 0.8994661921708185,
	"grad_norm": 0.267578125,
	"learning_rate": 4.975526145881515e-06,
	"loss": 0.5878,
	"step": 3033
	},
	{
	"epoch": 0.900355871886121,
	"grad_norm": 0.26171875,
	"learning_rate": 4.888574864724715e-06,
	"loss": 0.6173,
	"step": 3036
	},
	{
	"epoch": 0.9012455516014235,
	"grad_norm": 0.2490234375,
	"learning_rate": 4.8023710285683975e-06,
	"loss": 0.5758,
	"step": 3039
	},
	{
	"epoch": 0.902135231316726,
	"grad_norm": 0.259765625,
	"learning_rate": 4.716915314856196e-06,
	"loss": 0.5931,
	"step": 3042
	},
	{
	"epoch": 0.9030249110320284,
	"grad_norm": 0.251953125,
	"learning_rate": 4.6322083951524705e-06,
	"loss": 0.5857,
	"step": 3045
	},
	{
	"epoch": 0.9039145907473309,
	"grad_norm": 0.25390625,
	"learning_rate": 4.548250935137144e-06,
	"loss": 0.5957,
	"step": 3048
	},
	{
	"epoch": 0.9048042704626335,
	"grad_norm": 0.26171875,
	"learning_rate": 4.465043594600382e-06,
	"loss": 0.6066,
	"step": 3051
	},
	{
	"epoch": 0.905693950177936,
	"grad_norm": 0.25390625,
	"learning_rate": 4.382587027437435e-06,
	"loss": 0.5993,
	"step": 3054
	},
	{
	"epoch": 0.9065836298932385,
	"grad_norm": 0.255859375,
	"learning_rate": 4.300881881643537e-06,
	"loss": 0.5769,
	"step": 3057
	},
	{
	"epoch": 0.9074733096085409,
	"grad_norm": 0.255859375,
	"learning_rate": 4.219928799308759e-06,
	"loss": 0.5997,
	"step": 3060
	},
	{
	"epoch": 0.9083629893238434,
	"grad_norm": 0.251953125,
	"learning_rate": 4.139728416613031e-06,
	"loss": 0.598,
	"step": 3063
	},
	{
	"epoch": 0.9092526690391459,
	"grad_norm": 0.255859375,
	"learning_rate": 4.0602813638210165e-06,
	"loss": 0.5925,
	"step": 3066
	},
	{
	"epoch": 0.9101423487544484,
	"grad_norm": 0.275390625,
	"learning_rate": 3.981588265277337e-06,
	"loss": 0.6136,
	"step": 3069
	},
	{
	"epoch": 0.9110320284697508,
	"grad_norm": 0.27734375,
	"learning_rate": 3.903649739401494e-06,
	"loss": 0.5873,
	"step": 3072
	},
	{
	"epoch": 0.9119217081850534,
	"grad_norm": 0.26953125,
	"learning_rate": 3.826466398683126e-06,
	"loss": 0.6177,
	"step": 3075
	},
	{
	"epoch": 0.9128113879003559,
	"grad_norm": 0.271484375,
	"learning_rate": 3.750038849677162e-06,
	"loss": 0.5871,
	"step": 3078
	},
	{
	"epoch": 0.9137010676156584,
	"grad_norm": 0.255859375,
	"learning_rate": 3.6743676929989924e-06,
	"loss": 0.5864,
	"step": 3081
	},
	{
	"epoch": 0.9145907473309609,
	"grad_norm": 0.2578125,
	"learning_rate": 3.5994535233198846e-06,
	"loss": 0.5827,
	"step": 3084
	},
	{
	"epoch": 0.9154804270462633,
	"grad_norm": 0.255859375,
	"learning_rate": 3.525296929362165e-06,
	"loss": 0.6044,
	"step": 3087
	},
	{
	"epoch": 0.9163701067615658,
	"grad_norm": 0.263671875,
	"learning_rate": 3.45189849389469e-06,
	"loss": 0.6116,
	"step": 3090
	},
	{
	"epoch": 0.9172597864768683,
	"grad_norm": 0.2578125,
	"learning_rate": 3.3792587937282128e-06,
	"loss": 0.5705,
	"step": 3093
	},
	{
	"epoch": 0.9181494661921709,
	"grad_norm": 0.26171875,
	"learning_rate": 3.30737839971087e-06,
	"loss": 0.6156,
	"step": 3096
	},
	{
	"epoch": 0.9190391459074733,
	"grad_norm": 0.2451171875,
	"learning_rate": 3.236257876723725e-06,
	"loss": 0.5991,
	"step": 3099
	},
	{
	"epoch": 0.9199288256227758,
	"grad_norm": 0.259765625,
	"learning_rate": 3.165897783676275e-06,
	"loss": 0.5901,
	"step": 3102
	},
	{
	"epoch": 0.9208185053380783,
	"grad_norm": 0.27734375,
	"learning_rate": 3.0962986735020738e-06,
	"loss": 0.6183,
	"step": 3105
	},
	{
	"epoch": 0.9217081850533808,
	"grad_norm": 0.255859375,
	"learning_rate": 3.027461093154449e-06,
	"loss": 0.5892,
	"step": 3108
	},
	{
	"epoch": 0.9225978647686833,
	"grad_norm": 0.2578125,
	"learning_rate": 2.959385583602081e-06,
	"loss": 0.6269,
	"step": 3111
	},
	{
	"epoch": 0.9234875444839857,
	"grad_norm": 0.26171875,
	"learning_rate": 2.8920726798248643e-06,
	"loss": 0.5946,
	"step": 3114
	},
	{
	"epoch": 0.9243772241992882,
	"grad_norm": 0.259765625,
	"learning_rate": 2.8255229108096527e-06,
	"loss": 0.6192,
	"step": 3117
	},
	{
	"epoch": 0.9252669039145908,
	"grad_norm": 0.255859375,
	"learning_rate": 2.7597367995461086e-06,
	"loss": 0.6153,
	"step": 3120
	},
	{
	"epoch": 0.9261565836298933,
	"grad_norm": 0.265625,
	"learning_rate": 2.694714863022585e-06,
	"loss": 0.5831,
	"step": 3123
	},
	{
	"epoch": 0.9270462633451957,
	"grad_norm": 0.267578125,
	"learning_rate": 2.6304576122221035e-06,
	"loss": 0.5898,
	"step": 3126
	},
	{
	"epoch": 0.9279359430604982,
	"grad_norm": 0.2490234375,
	"learning_rate": 2.566965552118272e-06,
	"loss": 0.6098,
	"step": 3129
	},
	{
	"epoch": 0.9288256227758007,
	"grad_norm": 0.251953125,
	"learning_rate": 2.504239181671353e-06,
	"loss": 0.5932,
	"step": 3132
	},
	{
	"epoch": 0.9297153024911032,
	"grad_norm": 0.259765625,
	"learning_rate": 2.4422789938243763e-06,
	"loss": 0.5877,
	"step": 3135
	},
	{
	"epoch": 0.9306049822064056,
	"grad_norm": 0.279296875,
	"learning_rate": 2.381085475499201e-06,
	"loss": 0.5755,
	"step": 3138
	},
	{
	"epoch": 0.9314946619217082,
	"grad_norm": 0.263671875,
	"learning_rate": 2.3206591075927376e-06,
	"loss": 0.5875,
	"step": 3141
	},
	{
	"epoch": 0.9323843416370107,
	"grad_norm": 0.265625,
	"learning_rate": 2.2610003649731092e-06,
	"loss": 0.6113,
	"step": 3144
	},
	{
	"epoch": 0.9332740213523132,
	"grad_norm": 0.2578125,
	"learning_rate": 2.2021097164760085e-06,
	"loss": 0.6035,
	"step": 3147
	},
	{
	"epoch": 0.9341637010676157,
	"grad_norm": 0.26171875,
	"learning_rate": 2.143987624900945e-06,
	"loss": 0.5813,
	"step": 3150
	},
	{
	"epoch": 0.9350533807829181,
	"grad_norm": 0.259765625,
	"learning_rate": 2.0866345470076044e-06,
	"loss": 0.589,
	"step": 3153
	},
	{
	"epoch": 0.9359430604982206,
	"grad_norm": 0.279296875,
	"learning_rate": 2.0300509335123283e-06,
	"loss": 0.5971,
	"step": 3156
	},
	{
	"epoch": 0.9368327402135231,
	"grad_norm": 0.255859375,
	"learning_rate": 1.974237229084497e-06,
	"loss": 0.5808,
	"step": 3159
	},
	{
	"epoch": 0.9377224199288257,
	"grad_norm": 0.263671875,
	"learning_rate": 1.9191938723430615e-06,
	"loss": 0.6167,
	"step": 3162
	},
	{
	"epoch": 0.9386120996441281,
	"grad_norm": 0.2578125,
	"learning_rate": 1.8649212958531282e-06,
	"loss": 0.6088,
	"step": 3165
	},
	{
	"epoch": 0.9395017793594306,
	"grad_norm": 0.259765625,
	"learning_rate": 1.8114199261224928e-06,
	"loss": 0.5884,
	"step": 3168
	},
	{
	"epoch": 0.9403914590747331,
	"grad_norm": 0.265625,
	"learning_rate": 1.7586901835983437e-06,
	"loss": 0.6122,
	"step": 3171
	},
	{
	"epoch": 0.9412811387900356,
	"grad_norm": 0.259765625,
	"learning_rate": 1.7067324826639419e-06,
	"loss": 0.6036,
	"step": 3174
	},
	{
	"epoch": 0.9421708185053381,
	"grad_norm": 0.2578125,
	"learning_rate": 1.655547231635368e-06,
	"loss": 0.598,
	"step": 3177
	},
	{
	"epoch": 0.9430604982206405,
	"grad_norm": 0.26171875,
	"learning_rate": 1.6051348327583037e-06,
	"loss": 0.6078,
	"step": 3180
	},
	{
	"epoch": 0.943950177935943,
	"grad_norm": 0.251953125,
	"learning_rate": 1.5554956822048661e-06,
	"loss": 0.5955,
	"step": 3183
	},
	{
	"epoch": 0.9448398576512456,
	"grad_norm": 0.271484375,
	"learning_rate": 1.5066301700705331e-06,
	"loss": 0.589,
	"step": 3186
	},
	{
	"epoch": 0.9457295373665481,
	"grad_norm": 0.263671875,
	"learning_rate": 1.4585386803710021e-06,
	"loss": 0.6035,
	"step": 3189
	},
	{
	"epoch": 0.9466192170818505,
	"grad_norm": 0.265625,
	"learning_rate": 1.411221591039269e-06,
	"loss": 0.6396,
	"step": 3192
	},
	{
	"epoch": 0.947508896797153,
	"grad_norm": 0.263671875,
	"learning_rate": 1.3646792739225533e-06,
	"loss": 0.577,
	"step": 3195
	},
	{
	"epoch": 0.9483985765124555,
	"grad_norm": 0.2578125,
	"learning_rate": 1.3189120947794897e-06,
	"loss": 0.5983,
	"step": 3198
	},
	{
	"epoch": 0.949288256227758,
	"grad_norm": 0.263671875,
	"learning_rate": 1.273920413277152e-06,
	"loss": 0.6093,
	"step": 3201
	},
	{
	"epoch": 0.9501779359430605,
	"grad_norm": 0.263671875,
	"learning_rate": 1.2297045829882892e-06,
	"loss": 0.5966,
	"step": 3204
	},
	{
	"epoch": 0.951067615658363,
	"grad_norm": 0.265625,
	"learning_rate": 1.186264951388516e-06,
	"loss": 0.6228,
	"step": 3207
	},
	{
	"epoch": 0.9519572953736655,
	"grad_norm": 0.26953125,
	"learning_rate": 1.1436018598535937e-06,
	"loss": 0.6083,
	"step": 3210
	},
	{
	"epoch": 0.952846975088968,
	"grad_norm": 0.25390625,
	"learning_rate": 1.1017156436567532e-06,
	"loss": 0.5806,
	"step": 3213
	},
	{
	"epoch": 0.9537366548042705,
	"grad_norm": 0.267578125,
	"learning_rate": 1.0606066319660435e-06,
	"loss": 0.579,
	"step": 3216
	},
	{
	"epoch": 0.9546263345195729,
	"grad_norm": 0.263671875,
	"learning_rate": 1.020275147841765e-06,
	"loss": 0.6053,
	"step": 3219
	},
	{
	"epoch": 0.9555160142348754,
	"grad_norm": 0.26171875,
	"learning_rate": 9.807215082339394e-07,
	"loss": 0.595,
	"step": 3222
	},
	{
	"epoch": 0.9564056939501779,
	"grad_norm": 0.25,
	"learning_rate": 9.41946023979745e-07,
	"loss": 0.5857,
	"step": 3225
	},
	{
	"epoch": 0.9572953736654805,
	"grad_norm": 0.265625,
	"learning_rate": 9.039489998011852e-07,
	"loss": 0.6189,
	"step": 3228
	},
	{
	"epoch": 0.958185053380783,
	"grad_norm": 0.26171875,
	"learning_rate": 8.66730734302601e-07,
	"loss": 0.5837,
	"step": 3231
	},
	{
	"epoch": 0.9590747330960854,
	"grad_norm": 0.25,
	"learning_rate": 8.302915199683737e-07,
	"loss": 0.5827,
	"step": 3234
	},
	{
	"epoch": 0.9599644128113879,
	"grad_norm": 0.259765625,
	"learning_rate": 7.94631643160626e-07,
	"loss": 0.6043,
	"step": 3237
	},
	{
	"epoch": 0.9608540925266904,
	"grad_norm": 0.26171875,
	"learning_rate": 7.597513841169468e-07,
	"loss": 0.5621,
	"step": 3240
	},
	{
	"epoch": 0.9617437722419929,
	"grad_norm": 0.26953125,
	"learning_rate": 7.256510169482034e-07,
	"loss": 0.5886,
	"step": 3243
	},
	{
	"epoch": 0.9626334519572953,
	"grad_norm": 0.279296875,
	"learning_rate": 6.923308096363879e-07,
	"loss": 0.6205,
	"step": 3246
	},
	{
	"epoch": 0.9635231316725978,
	"grad_norm": 0.267578125,
	"learning_rate": 6.597910240324967e-07,
	"loss": 0.6038,
	"step": 3249
	},
	{
	"epoch": 0.9644128113879004,
	"grad_norm": 0.26171875,
	"learning_rate": 6.280319158544989e-07,
	"loss": 0.6301,
	"step": 3252
	},
	{
	"epoch": 0.9653024911032029,
	"grad_norm": 0.265625,
	"learning_rate": 5.970537346853156e-07,
	"loss": 0.6007,
	"step": 3255
	},
	{
	"epoch": 0.9661921708185054,
	"grad_norm": 0.2470703125,
	"learning_rate": 5.668567239708323e-07,
	"loss": 0.5789,
	"step": 3258
	},
	{
	"epoch": 0.9670818505338078,
	"grad_norm": 0.265625,
	"learning_rate": 5.374411210180341e-07,
	"loss": 0.5964,
	"step": 3261
	},
	{
	"epoch": 0.9679715302491103,
	"grad_norm": 0.25390625,
	"learning_rate": 5.088071569931185e-07,
	"loss": 0.5953,
	"step": 3264
	},
	{
	"epoch": 0.9688612099644128,
	"grad_norm": 0.2431640625,
	"learning_rate": 4.809550569196519e-07,
	"loss": 0.5877,
	"step": 3267
	},
	{
	"epoch": 0.9697508896797153,
	"grad_norm": 0.263671875,
	"learning_rate": 4.5388503967683793e-07,
	"loss": 0.5923,
	"step": 3270
	},
	{
	"epoch": 0.9706405693950177,
	"grad_norm": 0.255859375,
	"learning_rate": 4.275973179977855e-07,
	"loss": 0.5958,
	"step": 3273
	},
	{
	"epoch": 0.9715302491103203,
	"grad_norm": 0.271484375,
	"learning_rate": 4.0209209846783224e-07,
	"loss": 0.5977,
	"step": 3276
	},
	{
	"epoch": 0.9724199288256228,
	"grad_norm": 0.265625,
	"learning_rate": 3.773695815229239e-07,
	"loss": 0.592,
	"step": 3279
	},
	{
	"epoch": 0.9733096085409253,
	"grad_norm": 0.248046875,
	"learning_rate": 3.534299614480596e-07,
	"loss": 0.5702,
	"step": 3282
	},
	{
	"epoch": 0.9741992882562278,
	"grad_norm": 0.255859375,
	"learning_rate": 3.3027342637572676e-07,
	"loss": 0.5893,
	"step": 3285
	},
	{
	"epoch": 0.9750889679715302,
	"grad_norm": 0.251953125,
	"learning_rate": 3.079001582844354e-07,
	"loss": 0.6177,
	"step": 3288
	},
	{
	"epoch": 0.9759786476868327,
	"grad_norm": 0.341796875,
	"learning_rate": 2.8631033299730825e-07,
	"loss": 0.6178,
	"step": 3291
	},
	{
	"epoch": 0.9768683274021353,
	"grad_norm": 0.255859375,
	"learning_rate": 2.655041201806707e-07,
	"loss": 0.5924,
	"step": 3294
	},
	{
	"epoch": 0.9777580071174378,
	"grad_norm": 0.259765625,
	"learning_rate": 2.454816833427631e-07,
	"loss": 0.6021,
	"step": 3297
	},
	{
	"epoch": 0.9786476868327402,
	"grad_norm": 0.2578125,
	"learning_rate": 2.2624317983239718e-07,
	"loss": 0.6131,
	"step": 3300
	}
	],
	"logging_steps": 3,
	"max_steps": 3372,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 2.1603485806523056e+19,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}