{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999447544334568, "eval_steps": 500, "global_step": 9050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001104911330865698, "grad_norm": 0.90625, "learning_rate": 2.2099447513812156e-07, "loss": 2.3586, "step": 1 }, { "epoch": 0.000552455665432849, "grad_norm": 0.8125, "learning_rate": 1.1049723756906078e-06, "loss": 2.7101, "step": 5 }, { "epoch": 0.001104911330865698, "grad_norm": 0.96484375, "learning_rate": 2.2099447513812157e-06, "loss": 2.4766, "step": 10 }, { "epoch": 0.001657366996298547, "grad_norm": 0.8828125, "learning_rate": 3.314917127071823e-06, "loss": 2.4607, "step": 15 }, { "epoch": 0.002209822661731396, "grad_norm": 0.90234375, "learning_rate": 4.419889502762431e-06, "loss": 2.3726, "step": 20 }, { "epoch": 0.002762278327164245, "grad_norm": 1.046875, "learning_rate": 5.524861878453038e-06, "loss": 2.4172, "step": 25 }, { "epoch": 0.003314733992597094, "grad_norm": 0.95703125, "learning_rate": 6.629834254143646e-06, "loss": 2.4128, "step": 30 }, { "epoch": 0.003867189658029943, "grad_norm": 0.96875, "learning_rate": 7.734806629834254e-06, "loss": 2.3467, "step": 35 }, { "epoch": 0.004419645323462792, "grad_norm": 0.8046875, "learning_rate": 8.839779005524863e-06, "loss": 2.5135, "step": 40 }, { "epoch": 0.004972100988895641, "grad_norm": 0.86328125, "learning_rate": 9.94475138121547e-06, "loss": 2.3268, "step": 45 }, { "epoch": 0.00552455665432849, "grad_norm": 0.734375, "learning_rate": 1.1049723756906077e-05, "loss": 2.3631, "step": 50 }, { "epoch": 0.0060770123197613395, "grad_norm": 0.60546875, "learning_rate": 1.2154696132596685e-05, "loss": 2.2232, "step": 55 }, { "epoch": 0.006629467985194188, "grad_norm": 1.65625, "learning_rate": 1.3259668508287292e-05, "loss": 2.3471, "step": 60 }, { "epoch": 0.007181923650627037, "grad_norm": 0.61328125, "learning_rate": 1.4364640883977901e-05, "loss": 2.1807, "step": 65 }, { "epoch": 0.007734379316059886, "grad_norm": 0.50390625, "learning_rate": 1.5469613259668508e-05, "loss": 2.2929, "step": 70 }, { "epoch": 0.008286834981492736, "grad_norm": 0.515625, "learning_rate": 1.6574585635359117e-05, "loss": 2.315, "step": 75 }, { "epoch": 0.008839290646925584, "grad_norm": 2.0, "learning_rate": 1.7679558011049725e-05, "loss": 2.3873, "step": 80 }, { "epoch": 0.009391746312358434, "grad_norm": 0.419921875, "learning_rate": 1.878453038674033e-05, "loss": 2.0662, "step": 85 }, { "epoch": 0.009944201977791282, "grad_norm": 0.5234375, "learning_rate": 1.988950276243094e-05, "loss": 2.0609, "step": 90 }, { "epoch": 0.010496657643224131, "grad_norm": 0.40234375, "learning_rate": 2.0994475138121548e-05, "loss": 2.007, "step": 95 }, { "epoch": 0.01104911330865698, "grad_norm": 0.349609375, "learning_rate": 2.2099447513812153e-05, "loss": 1.9603, "step": 100 }, { "epoch": 0.01160156897408983, "grad_norm": 0.37890625, "learning_rate": 2.3204419889502762e-05, "loss": 1.9959, "step": 105 }, { "epoch": 0.012154024639522679, "grad_norm": 0.34375, "learning_rate": 2.430939226519337e-05, "loss": 1.9086, "step": 110 }, { "epoch": 0.012706480304955527, "grad_norm": 0.3125, "learning_rate": 2.541436464088398e-05, "loss": 1.9003, "step": 115 }, { "epoch": 0.013258935970388377, "grad_norm": 0.361328125, "learning_rate": 2.6519337016574585e-05, "loss": 1.9075, "step": 120 }, { "epoch": 0.013811391635821225, "grad_norm": 0.30859375, "learning_rate": 2.7624309392265197e-05, "loss": 1.7995, "step": 125 }, { "epoch": 0.014363847301254075, "grad_norm": 0.41796875, "learning_rate": 2.8729281767955802e-05, "loss": 1.854, "step": 130 }, { "epoch": 0.014916302966686923, "grad_norm": 0.38671875, "learning_rate": 2.983425414364641e-05, "loss": 1.9294, "step": 135 }, { "epoch": 0.015468758632119772, "grad_norm": 0.341796875, "learning_rate": 3.0939226519337016e-05, "loss": 1.8513, "step": 140 }, { "epoch": 0.01602121429755262, "grad_norm": 0.404296875, "learning_rate": 3.2044198895027625e-05, "loss": 1.8049, "step": 145 }, { "epoch": 0.016573669962985472, "grad_norm": 0.35546875, "learning_rate": 3.3149171270718233e-05, "loss": 1.806, "step": 150 }, { "epoch": 0.01712612562841832, "grad_norm": 0.380859375, "learning_rate": 3.425414364640884e-05, "loss": 1.9506, "step": 155 }, { "epoch": 0.017678581293851168, "grad_norm": 0.302734375, "learning_rate": 3.535911602209945e-05, "loss": 1.8163, "step": 160 }, { "epoch": 0.018231036959284016, "grad_norm": 0.341796875, "learning_rate": 3.646408839779006e-05, "loss": 1.7206, "step": 165 }, { "epoch": 0.018783492624716867, "grad_norm": 0.3984375, "learning_rate": 3.756906077348066e-05, "loss": 1.6925, "step": 170 }, { "epoch": 0.019335948290149715, "grad_norm": 0.3984375, "learning_rate": 3.867403314917128e-05, "loss": 1.7628, "step": 175 }, { "epoch": 0.019888403955582563, "grad_norm": 0.404296875, "learning_rate": 3.977900552486188e-05, "loss": 1.7019, "step": 180 }, { "epoch": 0.020440859621015415, "grad_norm": 0.376953125, "learning_rate": 4.088397790055249e-05, "loss": 1.7424, "step": 185 }, { "epoch": 0.020993315286448263, "grad_norm": 0.369140625, "learning_rate": 4.1988950276243096e-05, "loss": 1.8125, "step": 190 }, { "epoch": 0.02154577095188111, "grad_norm": 0.37890625, "learning_rate": 4.3093922651933705e-05, "loss": 1.6872, "step": 195 }, { "epoch": 0.02209822661731396, "grad_norm": 0.392578125, "learning_rate": 4.419889502762431e-05, "loss": 1.6941, "step": 200 }, { "epoch": 0.02265068228274681, "grad_norm": 0.37890625, "learning_rate": 4.530386740331492e-05, "loss": 1.7496, "step": 205 }, { "epoch": 0.02320313794817966, "grad_norm": 1.2734375, "learning_rate": 4.6408839779005524e-05, "loss": 1.8093, "step": 210 }, { "epoch": 0.023755593613612506, "grad_norm": 0.3515625, "learning_rate": 4.751381215469613e-05, "loss": 1.7676, "step": 215 }, { "epoch": 0.024308049279045358, "grad_norm": 0.353515625, "learning_rate": 4.861878453038674e-05, "loss": 1.7021, "step": 220 }, { "epoch": 0.024860504944478206, "grad_norm": 0.3984375, "learning_rate": 4.972375690607735e-05, "loss": 1.8137, "step": 225 }, { "epoch": 0.025412960609911054, "grad_norm": 0.31640625, "learning_rate": 5.082872928176796e-05, "loss": 1.6551, "step": 230 }, { "epoch": 0.025965416275343902, "grad_norm": 0.365234375, "learning_rate": 5.193370165745857e-05, "loss": 1.7552, "step": 235 }, { "epoch": 0.026517871940776754, "grad_norm": 0.431640625, "learning_rate": 5.303867403314917e-05, "loss": 1.6806, "step": 240 }, { "epoch": 0.0270703276062096, "grad_norm": 0.828125, "learning_rate": 5.414364640883978e-05, "loss": 1.8021, "step": 245 }, { "epoch": 0.02762278327164245, "grad_norm": 0.921875, "learning_rate": 5.5248618784530394e-05, "loss": 1.6734, "step": 250 }, { "epoch": 0.0281752389370753, "grad_norm": 0.314453125, "learning_rate": 5.6353591160220996e-05, "loss": 1.6604, "step": 255 }, { "epoch": 0.02872769460250815, "grad_norm": 0.40625, "learning_rate": 5.7458563535911604e-05, "loss": 1.6265, "step": 260 }, { "epoch": 0.029280150267940997, "grad_norm": 0.408203125, "learning_rate": 5.8563535911602206e-05, "loss": 1.6527, "step": 265 }, { "epoch": 0.029832605933373845, "grad_norm": 0.31640625, "learning_rate": 5.966850828729282e-05, "loss": 1.6239, "step": 270 }, { "epoch": 0.030385061598806697, "grad_norm": 0.482421875, "learning_rate": 6.077348066298343e-05, "loss": 1.6633, "step": 275 }, { "epoch": 0.030937517264239545, "grad_norm": 0.38671875, "learning_rate": 6.187845303867403e-05, "loss": 1.7084, "step": 280 }, { "epoch": 0.03148997292967239, "grad_norm": 0.4140625, "learning_rate": 6.298342541436464e-05, "loss": 1.6181, "step": 285 }, { "epoch": 0.03204242859510524, "grad_norm": 0.392578125, "learning_rate": 6.408839779005525e-05, "loss": 1.6637, "step": 290 }, { "epoch": 0.03259488426053809, "grad_norm": 0.3515625, "learning_rate": 6.519337016574586e-05, "loss": 1.6835, "step": 295 }, { "epoch": 0.033147339925970944, "grad_norm": 0.40234375, "learning_rate": 6.629834254143647e-05, "loss": 1.6695, "step": 300 }, { "epoch": 0.03369979559140379, "grad_norm": 0.451171875, "learning_rate": 6.740331491712708e-05, "loss": 1.6593, "step": 305 }, { "epoch": 0.03425225125683664, "grad_norm": 0.33203125, "learning_rate": 6.850828729281768e-05, "loss": 1.728, "step": 310 }, { "epoch": 0.03480470692226949, "grad_norm": 0.8125, "learning_rate": 6.961325966850829e-05, "loss": 1.716, "step": 315 }, { "epoch": 0.035357162587702336, "grad_norm": 0.380859375, "learning_rate": 7.07182320441989e-05, "loss": 1.6942, "step": 320 }, { "epoch": 0.035909618253135184, "grad_norm": 0.40234375, "learning_rate": 7.182320441988951e-05, "loss": 1.6477, "step": 325 }, { "epoch": 0.03646207391856803, "grad_norm": 0.369140625, "learning_rate": 7.292817679558012e-05, "loss": 1.7323, "step": 330 }, { "epoch": 0.03701452958400089, "grad_norm": 0.421875, "learning_rate": 7.403314917127073e-05, "loss": 1.6528, "step": 335 }, { "epoch": 0.037566985249433735, "grad_norm": 0.44140625, "learning_rate": 7.513812154696132e-05, "loss": 1.7076, "step": 340 }, { "epoch": 0.03811944091486658, "grad_norm": 1.3125, "learning_rate": 7.624309392265195e-05, "loss": 1.763, "step": 345 }, { "epoch": 0.03867189658029943, "grad_norm": 0.333984375, "learning_rate": 7.734806629834255e-05, "loss": 1.6633, "step": 350 }, { "epoch": 0.03922435224573228, "grad_norm": 0.50390625, "learning_rate": 7.845303867403315e-05, "loss": 1.676, "step": 355 }, { "epoch": 0.03977680791116513, "grad_norm": 0.359375, "learning_rate": 7.955801104972376e-05, "loss": 1.5961, "step": 360 }, { "epoch": 0.040329263576597975, "grad_norm": 0.392578125, "learning_rate": 8.066298342541438e-05, "loss": 1.6465, "step": 365 }, { "epoch": 0.04088171924203083, "grad_norm": 0.34765625, "learning_rate": 8.176795580110498e-05, "loss": 1.6811, "step": 370 }, { "epoch": 0.04143417490746368, "grad_norm": 0.345703125, "learning_rate": 8.287292817679558e-05, "loss": 1.6558, "step": 375 }, { "epoch": 0.041986630572896526, "grad_norm": 0.369140625, "learning_rate": 8.397790055248619e-05, "loss": 1.6384, "step": 380 }, { "epoch": 0.042539086238329374, "grad_norm": 0.341796875, "learning_rate": 8.50828729281768e-05, "loss": 1.7084, "step": 385 }, { "epoch": 0.04309154190376222, "grad_norm": 0.4375, "learning_rate": 8.618784530386741e-05, "loss": 1.673, "step": 390 }, { "epoch": 0.04364399756919507, "grad_norm": 0.330078125, "learning_rate": 8.729281767955802e-05, "loss": 1.6415, "step": 395 }, { "epoch": 0.04419645323462792, "grad_norm": 0.3515625, "learning_rate": 8.839779005524861e-05, "loss": 1.5831, "step": 400 }, { "epoch": 0.04474890890006077, "grad_norm": 0.3515625, "learning_rate": 8.950276243093924e-05, "loss": 1.6339, "step": 405 }, { "epoch": 0.04530136456549362, "grad_norm": 0.3828125, "learning_rate": 9.060773480662984e-05, "loss": 1.5738, "step": 410 }, { "epoch": 0.04585382023092647, "grad_norm": 0.40625, "learning_rate": 9.171270718232044e-05, "loss": 1.6621, "step": 415 }, { "epoch": 0.04640627589635932, "grad_norm": 0.345703125, "learning_rate": 9.281767955801105e-05, "loss": 1.614, "step": 420 }, { "epoch": 0.046958731561792165, "grad_norm": 0.396484375, "learning_rate": 9.392265193370167e-05, "loss": 1.6316, "step": 425 }, { "epoch": 0.04751118722722501, "grad_norm": 0.41796875, "learning_rate": 9.502762430939227e-05, "loss": 1.6197, "step": 430 }, { "epoch": 0.04806364289265786, "grad_norm": 0.376953125, "learning_rate": 9.613259668508287e-05, "loss": 1.6116, "step": 435 }, { "epoch": 0.048616098558090716, "grad_norm": 0.3515625, "learning_rate": 9.723756906077348e-05, "loss": 1.5811, "step": 440 }, { "epoch": 0.049168554223523564, "grad_norm": 0.380859375, "learning_rate": 9.834254143646409e-05, "loss": 1.6332, "step": 445 }, { "epoch": 0.04972100988895641, "grad_norm": 0.36328125, "learning_rate": 9.94475138121547e-05, "loss": 1.6479, "step": 450 }, { "epoch": 0.05027346555438926, "grad_norm": 0.3828125, "learning_rate": 0.00010055248618784532, "loss": 1.6241, "step": 455 }, { "epoch": 0.05082592121982211, "grad_norm": 0.3984375, "learning_rate": 0.00010165745856353592, "loss": 1.5343, "step": 460 }, { "epoch": 0.051378376885254956, "grad_norm": 0.40625, "learning_rate": 0.00010276243093922653, "loss": 1.6228, "step": 465 }, { "epoch": 0.051930832550687804, "grad_norm": 0.384765625, "learning_rate": 0.00010386740331491714, "loss": 1.6593, "step": 470 }, { "epoch": 0.05248328821612066, "grad_norm": 0.412109375, "learning_rate": 0.00010497237569060774, "loss": 1.6194, "step": 475 }, { "epoch": 0.05303574388155351, "grad_norm": 0.419921875, "learning_rate": 0.00010607734806629834, "loss": 1.623, "step": 480 }, { "epoch": 0.053588199546986355, "grad_norm": 0.404296875, "learning_rate": 0.00010718232044198895, "loss": 1.6077, "step": 485 }, { "epoch": 0.0541406552124192, "grad_norm": 0.400390625, "learning_rate": 0.00010828729281767956, "loss": 1.5668, "step": 490 }, { "epoch": 0.05469311087785205, "grad_norm": 0.376953125, "learning_rate": 0.00010939226519337018, "loss": 1.5495, "step": 495 }, { "epoch": 0.0552455665432849, "grad_norm": 0.361328125, "learning_rate": 0.00011049723756906079, "loss": 1.6508, "step": 500 }, { "epoch": 0.05579802220871775, "grad_norm": 0.400390625, "learning_rate": 0.0001116022099447514, "loss": 1.5204, "step": 505 }, { "epoch": 0.0563504778741506, "grad_norm": 0.36328125, "learning_rate": 0.00011270718232044199, "loss": 1.6537, "step": 510 }, { "epoch": 0.05690293353958345, "grad_norm": 0.419921875, "learning_rate": 0.0001138121546961326, "loss": 1.6174, "step": 515 }, { "epoch": 0.0574553892050163, "grad_norm": 0.408203125, "learning_rate": 0.00011491712707182321, "loss": 1.6325, "step": 520 }, { "epoch": 0.058007844870449146, "grad_norm": 0.388671875, "learning_rate": 0.0001160220994475138, "loss": 1.6093, "step": 525 }, { "epoch": 0.058560300535881994, "grad_norm": 0.375, "learning_rate": 0.00011712707182320441, "loss": 1.6768, "step": 530 }, { "epoch": 0.05911275620131484, "grad_norm": 0.373046875, "learning_rate": 0.00011823204419889505, "loss": 1.6416, "step": 535 }, { "epoch": 0.05966521186674769, "grad_norm": 0.361328125, "learning_rate": 0.00011933701657458564, "loss": 1.5412, "step": 540 }, { "epoch": 0.060217667532180545, "grad_norm": 0.34765625, "learning_rate": 0.00012044198895027625, "loss": 1.556, "step": 545 }, { "epoch": 0.06077012319761339, "grad_norm": 0.361328125, "learning_rate": 0.00012154696132596686, "loss": 1.5613, "step": 550 }, { "epoch": 0.06132257886304624, "grad_norm": 0.365234375, "learning_rate": 0.00012265193370165746, "loss": 1.6679, "step": 555 }, { "epoch": 0.06187503452847909, "grad_norm": 1.3515625, "learning_rate": 0.00012375690607734806, "loss": 1.6211, "step": 560 }, { "epoch": 0.06242749019391194, "grad_norm": 0.55078125, "learning_rate": 0.00012486187845303867, "loss": 1.5611, "step": 565 }, { "epoch": 0.06297994585934479, "grad_norm": 0.435546875, "learning_rate": 0.00012596685082872928, "loss": 1.6134, "step": 570 }, { "epoch": 0.06353240152477764, "grad_norm": 0.40234375, "learning_rate": 0.00012707182320441992, "loss": 1.5494, "step": 575 }, { "epoch": 0.06408485719021048, "grad_norm": 0.384765625, "learning_rate": 0.0001281767955801105, "loss": 1.569, "step": 580 }, { "epoch": 0.06463731285564334, "grad_norm": 0.361328125, "learning_rate": 0.0001292817679558011, "loss": 1.6021, "step": 585 }, { "epoch": 0.06518976852107618, "grad_norm": 0.33984375, "learning_rate": 0.00013038674033149172, "loss": 1.6384, "step": 590 }, { "epoch": 0.06574222418650903, "grad_norm": 0.32421875, "learning_rate": 0.00013149171270718233, "loss": 1.6082, "step": 595 }, { "epoch": 0.06629467985194189, "grad_norm": 0.37109375, "learning_rate": 0.00013259668508287293, "loss": 1.617, "step": 600 }, { "epoch": 0.06684713551737473, "grad_norm": 0.349609375, "learning_rate": 0.00013370165745856354, "loss": 1.6381, "step": 605 }, { "epoch": 0.06739959118280758, "grad_norm": 0.33984375, "learning_rate": 0.00013480662983425415, "loss": 1.6354, "step": 610 }, { "epoch": 0.06795204684824042, "grad_norm": 0.326171875, "learning_rate": 0.00013591160220994476, "loss": 1.6156, "step": 615 }, { "epoch": 0.06850450251367328, "grad_norm": 0.318359375, "learning_rate": 0.00013701657458563537, "loss": 1.6065, "step": 620 }, { "epoch": 0.06905695817910612, "grad_norm": 0.3828125, "learning_rate": 0.00013812154696132598, "loss": 1.6413, "step": 625 }, { "epoch": 0.06960941384453898, "grad_norm": 0.345703125, "learning_rate": 0.00013922651933701659, "loss": 1.5627, "step": 630 }, { "epoch": 0.07016186950997183, "grad_norm": 0.412109375, "learning_rate": 0.0001403314917127072, "loss": 1.5644, "step": 635 }, { "epoch": 0.07071432517540467, "grad_norm": 0.455078125, "learning_rate": 0.0001414364640883978, "loss": 1.5796, "step": 640 }, { "epoch": 0.07126678084083753, "grad_norm": 0.40625, "learning_rate": 0.00014254143646408839, "loss": 1.572, "step": 645 }, { "epoch": 0.07181923650627037, "grad_norm": 0.390625, "learning_rate": 0.00014364640883977902, "loss": 1.6295, "step": 650 }, { "epoch": 0.07237169217170322, "grad_norm": 0.4765625, "learning_rate": 0.00014475138121546963, "loss": 1.6568, "step": 655 }, { "epoch": 0.07292414783713606, "grad_norm": 0.392578125, "learning_rate": 0.00014585635359116024, "loss": 1.6504, "step": 660 }, { "epoch": 0.07347660350256892, "grad_norm": 0.37109375, "learning_rate": 0.00014696132596685085, "loss": 1.5834, "step": 665 }, { "epoch": 0.07402905916800177, "grad_norm": 0.34375, "learning_rate": 0.00014806629834254146, "loss": 1.6404, "step": 670 }, { "epoch": 0.07458151483343461, "grad_norm": 0.38671875, "learning_rate": 0.00014917127071823204, "loss": 1.5972, "step": 675 }, { "epoch": 0.07513397049886747, "grad_norm": 0.33984375, "learning_rate": 0.00015027624309392265, "loss": 1.6089, "step": 680 }, { "epoch": 0.07568642616430031, "grad_norm": 0.33984375, "learning_rate": 0.00015138121546961325, "loss": 1.5924, "step": 685 }, { "epoch": 0.07623888182973317, "grad_norm": 0.34375, "learning_rate": 0.0001524861878453039, "loss": 1.612, "step": 690 }, { "epoch": 0.076791337495166, "grad_norm": 0.3359375, "learning_rate": 0.0001535911602209945, "loss": 1.5955, "step": 695 }, { "epoch": 0.07734379316059886, "grad_norm": 0.3125, "learning_rate": 0.0001546961325966851, "loss": 1.5907, "step": 700 }, { "epoch": 0.07789624882603172, "grad_norm": 0.375, "learning_rate": 0.0001558011049723757, "loss": 1.6093, "step": 705 }, { "epoch": 0.07844870449146456, "grad_norm": 0.330078125, "learning_rate": 0.0001569060773480663, "loss": 1.437, "step": 710 }, { "epoch": 0.07900116015689741, "grad_norm": 0.384765625, "learning_rate": 0.0001580110497237569, "loss": 1.4556, "step": 715 }, { "epoch": 0.07955361582233025, "grad_norm": 0.359375, "learning_rate": 0.00015911602209944752, "loss": 1.5582, "step": 720 }, { "epoch": 0.08010607148776311, "grad_norm": 0.345703125, "learning_rate": 0.00016022099447513812, "loss": 1.566, "step": 725 }, { "epoch": 0.08065852715319595, "grad_norm": 0.333984375, "learning_rate": 0.00016132596685082876, "loss": 1.6621, "step": 730 }, { "epoch": 0.0812109828186288, "grad_norm": 0.3203125, "learning_rate": 0.00016243093922651934, "loss": 1.4944, "step": 735 }, { "epoch": 0.08176343848406166, "grad_norm": 0.353515625, "learning_rate": 0.00016353591160220995, "loss": 1.5275, "step": 740 }, { "epoch": 0.0823158941494945, "grad_norm": 0.326171875, "learning_rate": 0.00016464088397790056, "loss": 1.5893, "step": 745 }, { "epoch": 0.08286834981492736, "grad_norm": 0.34375, "learning_rate": 0.00016574585635359117, "loss": 1.5861, "step": 750 }, { "epoch": 0.0834208054803602, "grad_norm": 0.36328125, "learning_rate": 0.00016685082872928178, "loss": 1.5378, "step": 755 }, { "epoch": 0.08397326114579305, "grad_norm": 0.33203125, "learning_rate": 0.00016795580110497238, "loss": 1.6126, "step": 760 }, { "epoch": 0.08452571681122589, "grad_norm": 0.3203125, "learning_rate": 0.000169060773480663, "loss": 1.5768, "step": 765 }, { "epoch": 0.08507817247665875, "grad_norm": 0.32421875, "learning_rate": 0.0001701657458563536, "loss": 1.6201, "step": 770 }, { "epoch": 0.0856306281420916, "grad_norm": 0.326171875, "learning_rate": 0.0001712707182320442, "loss": 1.535, "step": 775 }, { "epoch": 0.08618308380752444, "grad_norm": 0.302734375, "learning_rate": 0.00017237569060773482, "loss": 1.5564, "step": 780 }, { "epoch": 0.0867355394729573, "grad_norm": 0.318359375, "learning_rate": 0.00017348066298342543, "loss": 1.5377, "step": 785 }, { "epoch": 0.08728799513839014, "grad_norm": 0.306640625, "learning_rate": 0.00017458563535911604, "loss": 1.5815, "step": 790 }, { "epoch": 0.087840450803823, "grad_norm": 0.31640625, "learning_rate": 0.00017569060773480665, "loss": 1.5875, "step": 795 }, { "epoch": 0.08839290646925584, "grad_norm": 0.33203125, "learning_rate": 0.00017679558011049723, "loss": 1.5496, "step": 800 }, { "epoch": 0.08894536213468869, "grad_norm": 0.31640625, "learning_rate": 0.00017790055248618784, "loss": 1.6073, "step": 805 }, { "epoch": 0.08949781780012155, "grad_norm": 0.416015625, "learning_rate": 0.00017900552486187847, "loss": 1.5634, "step": 810 }, { "epoch": 0.09005027346555439, "grad_norm": 0.326171875, "learning_rate": 0.00018011049723756908, "loss": 1.6645, "step": 815 }, { "epoch": 0.09060272913098724, "grad_norm": 0.306640625, "learning_rate": 0.0001812154696132597, "loss": 1.6745, "step": 820 }, { "epoch": 0.09115518479642008, "grad_norm": 0.31640625, "learning_rate": 0.0001823204419889503, "loss": 1.6031, "step": 825 }, { "epoch": 0.09170764046185294, "grad_norm": 0.333984375, "learning_rate": 0.00018342541436464088, "loss": 1.5564, "step": 830 }, { "epoch": 0.09226009612728578, "grad_norm": 0.3125, "learning_rate": 0.0001845303867403315, "loss": 1.557, "step": 835 }, { "epoch": 0.09281255179271863, "grad_norm": 0.34765625, "learning_rate": 0.0001856353591160221, "loss": 1.6214, "step": 840 }, { "epoch": 0.09336500745815149, "grad_norm": 0.337890625, "learning_rate": 0.0001867403314917127, "loss": 1.5873, "step": 845 }, { "epoch": 0.09391746312358433, "grad_norm": 0.33984375, "learning_rate": 0.00018784530386740334, "loss": 1.5694, "step": 850 }, { "epoch": 0.09446991878901718, "grad_norm": 0.302734375, "learning_rate": 0.00018895027624309395, "loss": 1.5454, "step": 855 }, { "epoch": 0.09502237445445003, "grad_norm": 0.35546875, "learning_rate": 0.00019005524861878453, "loss": 1.5422, "step": 860 }, { "epoch": 0.09557483011988288, "grad_norm": 0.337890625, "learning_rate": 0.00019116022099447514, "loss": 1.547, "step": 865 }, { "epoch": 0.09612728578531572, "grad_norm": 0.322265625, "learning_rate": 0.00019226519337016575, "loss": 1.6308, "step": 870 }, { "epoch": 0.09667974145074858, "grad_norm": 0.3203125, "learning_rate": 0.00019337016574585636, "loss": 1.5603, "step": 875 }, { "epoch": 0.09723219711618143, "grad_norm": 0.32421875, "learning_rate": 0.00019447513812154697, "loss": 1.5505, "step": 880 }, { "epoch": 0.09778465278161427, "grad_norm": 0.43359375, "learning_rate": 0.00019558011049723757, "loss": 1.5212, "step": 885 }, { "epoch": 0.09833710844704713, "grad_norm": 0.353515625, "learning_rate": 0.00019668508287292818, "loss": 1.6186, "step": 890 }, { "epoch": 0.09888956411247997, "grad_norm": 0.326171875, "learning_rate": 0.0001977900552486188, "loss": 1.6193, "step": 895 }, { "epoch": 0.09944201977791282, "grad_norm": 0.310546875, "learning_rate": 0.0001988950276243094, "loss": 1.5252, "step": 900 }, { "epoch": 0.09999447544334567, "grad_norm": 0.296875, "learning_rate": 0.0002, "loss": 1.5828, "step": 905 }, { "epoch": 0.10054693110877852, "grad_norm": 0.328125, "learning_rate": 0.00019999981403661345, "loss": 1.5575, "step": 910 }, { "epoch": 0.10109938677421137, "grad_norm": 0.310546875, "learning_rate": 0.00019999925614714537, "loss": 1.5476, "step": 915 }, { "epoch": 0.10165184243964422, "grad_norm": 0.296875, "learning_rate": 0.00019999832633367076, "loss": 1.4411, "step": 920 }, { "epoch": 0.10220429810507707, "grad_norm": 0.37109375, "learning_rate": 0.0001999970245996478, "loss": 1.6125, "step": 925 }, { "epoch": 0.10275675377050991, "grad_norm": 0.396484375, "learning_rate": 0.00019999535094991798, "loss": 1.5408, "step": 930 }, { "epoch": 0.10330920943594277, "grad_norm": 0.302734375, "learning_rate": 0.00019999330539070617, "loss": 1.6132, "step": 935 }, { "epoch": 0.10386166510137561, "grad_norm": 0.3203125, "learning_rate": 0.00019999088792962017, "loss": 1.4461, "step": 940 }, { "epoch": 0.10441412076680846, "grad_norm": 0.3125, "learning_rate": 0.00019998809857565131, "loss": 1.5561, "step": 945 }, { "epoch": 0.10496657643224132, "grad_norm": 0.3515625, "learning_rate": 0.00019998493733917384, "loss": 1.549, "step": 950 }, { "epoch": 0.10551903209767416, "grad_norm": 0.298828125, "learning_rate": 0.00019998140423194534, "loss": 1.5282, "step": 955 }, { "epoch": 0.10607148776310701, "grad_norm": 0.30859375, "learning_rate": 0.0001999774992671063, "loss": 1.603, "step": 960 }, { "epoch": 0.10662394342853986, "grad_norm": 0.345703125, "learning_rate": 0.0001999732224591804, "loss": 1.5311, "step": 965 }, { "epoch": 0.10717639909397271, "grad_norm": 0.32421875, "learning_rate": 0.0001999685738240742, "loss": 1.5819, "step": 970 }, { "epoch": 0.10772885475940555, "grad_norm": 0.3046875, "learning_rate": 0.00019996355337907718, "loss": 1.556, "step": 975 }, { "epoch": 0.1082813104248384, "grad_norm": 0.29296875, "learning_rate": 0.00019995816114286178, "loss": 1.6214, "step": 980 }, { "epoch": 0.10883376609027126, "grad_norm": 0.365234375, "learning_rate": 0.00019995239713548318, "loss": 1.5817, "step": 985 }, { "epoch": 0.1093862217557041, "grad_norm": 0.306640625, "learning_rate": 0.00019994626137837917, "loss": 1.5485, "step": 990 }, { "epoch": 0.10993867742113696, "grad_norm": 0.34375, "learning_rate": 0.00019993975389437038, "loss": 1.4899, "step": 995 }, { "epoch": 0.1104911330865698, "grad_norm": 0.271484375, "learning_rate": 0.00019993287470765984, "loss": 1.5237, "step": 1000 }, { "epoch": 0.11104358875200265, "grad_norm": 0.28515625, "learning_rate": 0.00019992562384383309, "loss": 1.5832, "step": 1005 }, { "epoch": 0.1115960444174355, "grad_norm": 0.34765625, "learning_rate": 0.00019991800132985804, "loss": 1.5726, "step": 1010 }, { "epoch": 0.11214850008286835, "grad_norm": 0.318359375, "learning_rate": 0.00019991000719408485, "loss": 1.4869, "step": 1015 }, { "epoch": 0.1127009557483012, "grad_norm": 0.29296875, "learning_rate": 0.00019990164146624584, "loss": 1.5738, "step": 1020 }, { "epoch": 0.11325341141373405, "grad_norm": 0.3671875, "learning_rate": 0.00019989290417745542, "loss": 1.4831, "step": 1025 }, { "epoch": 0.1138058670791669, "grad_norm": 0.333984375, "learning_rate": 0.00019988379536020986, "loss": 1.6201, "step": 1030 }, { "epoch": 0.11435832274459974, "grad_norm": 0.294921875, "learning_rate": 0.00019987431504838737, "loss": 1.5374, "step": 1035 }, { "epoch": 0.1149107784100326, "grad_norm": 0.31640625, "learning_rate": 0.0001998644632772477, "loss": 1.5273, "step": 1040 }, { "epoch": 0.11546323407546544, "grad_norm": 0.296875, "learning_rate": 0.00019985424008343226, "loss": 1.5514, "step": 1045 }, { "epoch": 0.11601568974089829, "grad_norm": 0.306640625, "learning_rate": 0.00019984364550496383, "loss": 1.5302, "step": 1050 }, { "epoch": 0.11656814540633115, "grad_norm": 0.28515625, "learning_rate": 0.00019983267958124644, "loss": 1.5647, "step": 1055 }, { "epoch": 0.11712060107176399, "grad_norm": 0.27734375, "learning_rate": 0.0001998213423530654, "loss": 1.5973, "step": 1060 }, { "epoch": 0.11767305673719684, "grad_norm": 0.294921875, "learning_rate": 0.00019980963386258683, "loss": 1.6156, "step": 1065 }, { "epoch": 0.11822551240262968, "grad_norm": 0.294921875, "learning_rate": 0.0001997975541533577, "loss": 1.6118, "step": 1070 }, { "epoch": 0.11877796806806254, "grad_norm": 0.314453125, "learning_rate": 0.00019978510327030579, "loss": 1.5749, "step": 1075 }, { "epoch": 0.11933042373349538, "grad_norm": 0.291015625, "learning_rate": 0.00019977228125973916, "loss": 1.5673, "step": 1080 }, { "epoch": 0.11988287939892824, "grad_norm": 0.314453125, "learning_rate": 0.0001997590881693464, "loss": 1.5335, "step": 1085 }, { "epoch": 0.12043533506436109, "grad_norm": 0.322265625, "learning_rate": 0.00019974552404819607, "loss": 1.5394, "step": 1090 }, { "epoch": 0.12098779072979393, "grad_norm": 0.337890625, "learning_rate": 0.0001997315889467368, "loss": 1.4369, "step": 1095 }, { "epoch": 0.12154024639522679, "grad_norm": 0.302734375, "learning_rate": 0.0001997172829167969, "loss": 1.5878, "step": 1100 }, { "epoch": 0.12209270206065963, "grad_norm": 0.28515625, "learning_rate": 0.00019970260601158443, "loss": 1.565, "step": 1105 }, { "epoch": 0.12264515772609248, "grad_norm": 0.33203125, "learning_rate": 0.00019968755828568668, "loss": 1.5666, "step": 1110 }, { "epoch": 0.12319761339152532, "grad_norm": 0.314453125, "learning_rate": 0.00019967213979507018, "loss": 1.487, "step": 1115 }, { "epoch": 0.12375006905695818, "grad_norm": 0.314453125, "learning_rate": 0.0001996563505970804, "loss": 1.503, "step": 1120 }, { "epoch": 0.12430252472239103, "grad_norm": 0.302734375, "learning_rate": 0.00019964019075044163, "loss": 1.4641, "step": 1125 }, { "epoch": 0.12485498038782387, "grad_norm": 0.298828125, "learning_rate": 0.00019962366031525664, "loss": 1.5303, "step": 1130 }, { "epoch": 0.12540743605325672, "grad_norm": 0.287109375, "learning_rate": 0.0001996067593530065, "loss": 1.5302, "step": 1135 }, { "epoch": 0.12595989171868957, "grad_norm": 0.3125, "learning_rate": 0.00019958948792655055, "loss": 1.5768, "step": 1140 }, { "epoch": 0.12651234738412243, "grad_norm": 0.283203125, "learning_rate": 0.0001995718461001257, "loss": 1.5311, "step": 1145 }, { "epoch": 0.12706480304955528, "grad_norm": 0.328125, "learning_rate": 0.00019955383393934674, "loss": 1.4975, "step": 1150 }, { "epoch": 0.12761725871498814, "grad_norm": 0.3046875, "learning_rate": 0.00019953545151120565, "loss": 1.5817, "step": 1155 }, { "epoch": 0.12816971438042096, "grad_norm": 0.32421875, "learning_rate": 0.0001995166988840716, "loss": 1.5245, "step": 1160 }, { "epoch": 0.12872217004585382, "grad_norm": 0.30078125, "learning_rate": 0.00019949757612769067, "loss": 1.5251, "step": 1165 }, { "epoch": 0.12927462571128667, "grad_norm": 0.28515625, "learning_rate": 0.0001994780833131855, "loss": 1.6192, "step": 1170 }, { "epoch": 0.12982708137671953, "grad_norm": 0.365234375, "learning_rate": 0.00019945822051305507, "loss": 1.5495, "step": 1175 }, { "epoch": 0.13037953704215235, "grad_norm": 0.298828125, "learning_rate": 0.00019943798780117447, "loss": 1.5847, "step": 1180 }, { "epoch": 0.1309319927075852, "grad_norm": 0.3125, "learning_rate": 0.00019941738525279453, "loss": 1.4855, "step": 1185 }, { "epoch": 0.13148444837301806, "grad_norm": 0.310546875, "learning_rate": 0.00019939641294454172, "loss": 1.5691, "step": 1190 }, { "epoch": 0.13203690403845092, "grad_norm": 0.30078125, "learning_rate": 0.0001993750709544176, "loss": 1.5432, "step": 1195 }, { "epoch": 0.13258935970388377, "grad_norm": 0.322265625, "learning_rate": 0.00019935335936179874, "loss": 1.5993, "step": 1200 }, { "epoch": 0.1331418153693166, "grad_norm": 0.3046875, "learning_rate": 0.00019933127824743645, "loss": 1.506, "step": 1205 }, { "epoch": 0.13369427103474946, "grad_norm": 0.298828125, "learning_rate": 0.00019930882769345624, "loss": 1.4988, "step": 1210 }, { "epoch": 0.1342467267001823, "grad_norm": 0.376953125, "learning_rate": 0.00019928600778335774, "loss": 1.5912, "step": 1215 }, { "epoch": 0.13479918236561517, "grad_norm": 0.2890625, "learning_rate": 0.0001992628186020143, "loss": 1.5898, "step": 1220 }, { "epoch": 0.13535163803104802, "grad_norm": 0.30078125, "learning_rate": 0.0001992392602356727, "loss": 1.579, "step": 1225 }, { "epoch": 0.13590409369648085, "grad_norm": 0.29296875, "learning_rate": 0.00019921533277195283, "loss": 1.5507, "step": 1230 }, { "epoch": 0.1364565493619137, "grad_norm": 0.302734375, "learning_rate": 0.00019919103629984728, "loss": 1.5658, "step": 1235 }, { "epoch": 0.13700900502734656, "grad_norm": 0.287109375, "learning_rate": 0.0001991663709097212, "loss": 1.5174, "step": 1240 }, { "epoch": 0.1375614606927794, "grad_norm": 0.30078125, "learning_rate": 0.00019914133669331175, "loss": 1.5341, "step": 1245 }, { "epoch": 0.13811391635821224, "grad_norm": 0.310546875, "learning_rate": 0.00019911593374372788, "loss": 1.5029, "step": 1250 }, { "epoch": 0.1386663720236451, "grad_norm": 0.3125, "learning_rate": 0.00019909016215544998, "loss": 1.5949, "step": 1255 }, { "epoch": 0.13921882768907795, "grad_norm": 0.298828125, "learning_rate": 0.00019906402202432945, "loss": 1.519, "step": 1260 }, { "epoch": 0.1397712833545108, "grad_norm": 0.35546875, "learning_rate": 0.00019903751344758848, "loss": 1.5305, "step": 1265 }, { "epoch": 0.14032373901994366, "grad_norm": 0.33203125, "learning_rate": 0.00019901063652381953, "loss": 1.5157, "step": 1270 }, { "epoch": 0.1408761946853765, "grad_norm": 0.67578125, "learning_rate": 0.00019898339135298508, "loss": 1.4779, "step": 1275 }, { "epoch": 0.14142865035080934, "grad_norm": 0.298828125, "learning_rate": 0.00019895577803641726, "loss": 1.5472, "step": 1280 }, { "epoch": 0.1419811060162422, "grad_norm": 0.328125, "learning_rate": 0.00019892779667681732, "loss": 1.5501, "step": 1285 }, { "epoch": 0.14253356168167505, "grad_norm": 0.3046875, "learning_rate": 0.00019889944737825545, "loss": 1.5952, "step": 1290 }, { "epoch": 0.1430860173471079, "grad_norm": 0.33984375, "learning_rate": 0.0001988707302461703, "loss": 1.5092, "step": 1295 }, { "epoch": 0.14363847301254073, "grad_norm": 0.314453125, "learning_rate": 0.00019884164538736858, "loss": 1.5222, "step": 1300 }, { "epoch": 0.1441909286779736, "grad_norm": 0.3125, "learning_rate": 0.00019881219291002463, "loss": 1.5447, "step": 1305 }, { "epoch": 0.14474338434340644, "grad_norm": 0.29296875, "learning_rate": 0.00019878237292368013, "loss": 1.592, "step": 1310 }, { "epoch": 0.1452958400088393, "grad_norm": 0.298828125, "learning_rate": 0.00019875218553924357, "loss": 1.5346, "step": 1315 }, { "epoch": 0.14584829567427213, "grad_norm": 0.341796875, "learning_rate": 0.00019872163086898993, "loss": 1.5665, "step": 1320 }, { "epoch": 0.14640075133970498, "grad_norm": 0.298828125, "learning_rate": 0.00019869070902656018, "loss": 1.5225, "step": 1325 }, { "epoch": 0.14695320700513784, "grad_norm": 0.328125, "learning_rate": 0.00019865942012696098, "loss": 1.5376, "step": 1330 }, { "epoch": 0.1475056626705707, "grad_norm": 0.298828125, "learning_rate": 0.0001986277642865641, "loss": 1.5207, "step": 1335 }, { "epoch": 0.14805811833600355, "grad_norm": 0.337890625, "learning_rate": 0.00019859574162310608, "loss": 1.4715, "step": 1340 }, { "epoch": 0.14861057400143637, "grad_norm": 0.318359375, "learning_rate": 0.0001985633522556878, "loss": 1.5546, "step": 1345 }, { "epoch": 0.14916302966686923, "grad_norm": 0.294921875, "learning_rate": 0.00019853059630477396, "loss": 1.4695, "step": 1350 }, { "epoch": 0.14971548533230208, "grad_norm": 0.310546875, "learning_rate": 0.00019849747389219272, "loss": 1.529, "step": 1355 }, { "epoch": 0.15026794099773494, "grad_norm": 0.30078125, "learning_rate": 0.0001984639851411352, "loss": 1.5036, "step": 1360 }, { "epoch": 0.1508203966631678, "grad_norm": 0.29296875, "learning_rate": 0.00019843013017615505, "loss": 1.5284, "step": 1365 }, { "epoch": 0.15137285232860062, "grad_norm": 0.318359375, "learning_rate": 0.00019839590912316791, "loss": 1.5161, "step": 1370 }, { "epoch": 0.15192530799403348, "grad_norm": 0.3125, "learning_rate": 0.0001983613221094511, "loss": 1.5213, "step": 1375 }, { "epoch": 0.15247776365946633, "grad_norm": 0.27734375, "learning_rate": 0.0001983263692636429, "loss": 1.5539, "step": 1380 }, { "epoch": 0.15303021932489919, "grad_norm": 0.3046875, "learning_rate": 0.0001982910507157424, "loss": 1.5405, "step": 1385 }, { "epoch": 0.153582674990332, "grad_norm": 0.3125, "learning_rate": 0.00019825536659710867, "loss": 1.4907, "step": 1390 }, { "epoch": 0.15413513065576487, "grad_norm": 0.3359375, "learning_rate": 0.00019821931704046047, "loss": 1.4981, "step": 1395 }, { "epoch": 0.15468758632119772, "grad_norm": 0.310546875, "learning_rate": 0.00019818290217987587, "loss": 1.4737, "step": 1400 }, { "epoch": 0.15524004198663058, "grad_norm": 0.337890625, "learning_rate": 0.0001981461221507914, "loss": 1.5069, "step": 1405 }, { "epoch": 0.15579249765206343, "grad_norm": 0.3203125, "learning_rate": 0.0001981089770900018, "loss": 1.5213, "step": 1410 }, { "epoch": 0.15634495331749626, "grad_norm": 0.306640625, "learning_rate": 0.00019807146713565955, "loss": 1.5582, "step": 1415 }, { "epoch": 0.15689740898292912, "grad_norm": 0.283203125, "learning_rate": 0.00019803359242727425, "loss": 1.5523, "step": 1420 }, { "epoch": 0.15744986464836197, "grad_norm": 0.31640625, "learning_rate": 0.00019799535310571203, "loss": 1.5255, "step": 1425 }, { "epoch": 0.15800232031379483, "grad_norm": 0.296875, "learning_rate": 0.00019795674931319515, "loss": 1.5588, "step": 1430 }, { "epoch": 0.15855477597922768, "grad_norm": 0.296875, "learning_rate": 0.0001979177811933015, "loss": 1.4525, "step": 1435 }, { "epoch": 0.1591072316446605, "grad_norm": 0.294921875, "learning_rate": 0.0001978784488909639, "loss": 1.4614, "step": 1440 }, { "epoch": 0.15965968731009336, "grad_norm": 0.314453125, "learning_rate": 0.00019783875255246973, "loss": 1.5611, "step": 1445 }, { "epoch": 0.16021214297552622, "grad_norm": 0.287109375, "learning_rate": 0.00019779869232546034, "loss": 1.5101, "step": 1450 }, { "epoch": 0.16076459864095907, "grad_norm": 0.294921875, "learning_rate": 0.0001977582683589304, "loss": 1.5875, "step": 1455 }, { "epoch": 0.1613170543063919, "grad_norm": 0.306640625, "learning_rate": 0.00019771748080322745, "loss": 1.4513, "step": 1460 }, { "epoch": 0.16186950997182475, "grad_norm": 0.279296875, "learning_rate": 0.00019767632981005138, "loss": 1.5401, "step": 1465 }, { "epoch": 0.1624219656372576, "grad_norm": 0.275390625, "learning_rate": 0.0001976348155324537, "loss": 1.5393, "step": 1470 }, { "epoch": 0.16297442130269046, "grad_norm": 0.28515625, "learning_rate": 0.00019759293812483713, "loss": 1.6111, "step": 1475 }, { "epoch": 0.16352687696812332, "grad_norm": 0.314453125, "learning_rate": 0.000197550697742955, "loss": 1.5438, "step": 1480 }, { "epoch": 0.16407933263355615, "grad_norm": 0.28125, "learning_rate": 0.0001975080945439106, "loss": 1.5031, "step": 1485 }, { "epoch": 0.164631788298989, "grad_norm": 0.283203125, "learning_rate": 0.00019746512868615656, "loss": 1.474, "step": 1490 }, { "epoch": 0.16518424396442186, "grad_norm": 0.28515625, "learning_rate": 0.0001974218003294945, "loss": 1.5033, "step": 1495 }, { "epoch": 0.1657366996298547, "grad_norm": 0.3046875, "learning_rate": 0.0001973781096350741, "loss": 1.4658, "step": 1500 }, { "epoch": 0.16628915529528757, "grad_norm": 0.283203125, "learning_rate": 0.0001973340567653928, "loss": 1.4781, "step": 1505 }, { "epoch": 0.1668416109607204, "grad_norm": 0.3203125, "learning_rate": 0.00019728964188429503, "loss": 1.503, "step": 1510 }, { "epoch": 0.16739406662615325, "grad_norm": 0.28125, "learning_rate": 0.00019724486515697155, "loss": 1.5261, "step": 1515 }, { "epoch": 0.1679465222915861, "grad_norm": 0.30078125, "learning_rate": 0.00019719972674995905, "loss": 1.5426, "step": 1520 }, { "epoch": 0.16849897795701896, "grad_norm": 0.306640625, "learning_rate": 0.00019715422683113938, "loss": 1.5656, "step": 1525 }, { "epoch": 0.16905143362245179, "grad_norm": 0.287109375, "learning_rate": 0.00019710836556973885, "loss": 1.4961, "step": 1530 }, { "epoch": 0.16960388928788464, "grad_norm": 0.318359375, "learning_rate": 0.00019706214313632784, "loss": 1.5364, "step": 1535 }, { "epoch": 0.1701563449533175, "grad_norm": 0.29296875, "learning_rate": 0.00019701555970281988, "loss": 1.4616, "step": 1540 }, { "epoch": 0.17070880061875035, "grad_norm": 0.302734375, "learning_rate": 0.0001969686154424713, "loss": 1.4938, "step": 1545 }, { "epoch": 0.1712612562841832, "grad_norm": 0.283203125, "learning_rate": 0.00019692131052988034, "loss": 1.4881, "step": 1550 }, { "epoch": 0.17181371194961603, "grad_norm": 0.3046875, "learning_rate": 0.00019687364514098664, "loss": 1.534, "step": 1555 }, { "epoch": 0.1723661676150489, "grad_norm": 0.296875, "learning_rate": 0.00019682561945307052, "loss": 1.4373, "step": 1560 }, { "epoch": 0.17291862328048174, "grad_norm": 0.298828125, "learning_rate": 0.00019677723364475237, "loss": 1.5397, "step": 1565 }, { "epoch": 0.1734710789459146, "grad_norm": 0.28515625, "learning_rate": 0.00019672848789599204, "loss": 1.4886, "step": 1570 }, { "epoch": 0.17402353461134745, "grad_norm": 0.3046875, "learning_rate": 0.00019667938238808797, "loss": 1.5597, "step": 1575 }, { "epoch": 0.17457599027678028, "grad_norm": 0.30078125, "learning_rate": 0.00019662991730367663, "loss": 1.5465, "step": 1580 }, { "epoch": 0.17512844594221313, "grad_norm": 0.28125, "learning_rate": 0.00019658009282673202, "loss": 1.5507, "step": 1585 }, { "epoch": 0.175680901607646, "grad_norm": 0.326171875, "learning_rate": 0.00019652990914256467, "loss": 1.5349, "step": 1590 }, { "epoch": 0.17623335727307884, "grad_norm": 0.3046875, "learning_rate": 0.00019647936643782109, "loss": 1.5655, "step": 1595 }, { "epoch": 0.17678581293851167, "grad_norm": 0.296875, "learning_rate": 0.0001964284649004832, "loss": 1.5414, "step": 1600 }, { "epoch": 0.17733826860394453, "grad_norm": 0.296875, "learning_rate": 0.00019637720471986735, "loss": 1.4833, "step": 1605 }, { "epoch": 0.17789072426937738, "grad_norm": 0.302734375, "learning_rate": 0.00019632558608662402, "loss": 1.4558, "step": 1610 }, { "epoch": 0.17844317993481024, "grad_norm": 0.29296875, "learning_rate": 0.0001962736091927366, "loss": 1.4225, "step": 1615 }, { "epoch": 0.1789956356002431, "grad_norm": 0.333984375, "learning_rate": 0.00019622127423152112, "loss": 1.4695, "step": 1620 }, { "epoch": 0.17954809126567592, "grad_norm": 0.27734375, "learning_rate": 0.00019616858139762534, "loss": 1.5194, "step": 1625 }, { "epoch": 0.18010054693110877, "grad_norm": 0.310546875, "learning_rate": 0.00019611553088702798, "loss": 1.4821, "step": 1630 }, { "epoch": 0.18065300259654163, "grad_norm": 0.296875, "learning_rate": 0.0001960621228970381, "loss": 1.498, "step": 1635 }, { "epoch": 0.18120545826197448, "grad_norm": 0.298828125, "learning_rate": 0.0001960083576262943, "loss": 1.5545, "step": 1640 }, { "epoch": 0.18175791392740734, "grad_norm": 0.2890625, "learning_rate": 0.00019595423527476405, "loss": 1.5343, "step": 1645 }, { "epoch": 0.18231036959284017, "grad_norm": 0.326171875, "learning_rate": 0.00019589975604374286, "loss": 1.5667, "step": 1650 }, { "epoch": 0.18286282525827302, "grad_norm": 0.27734375, "learning_rate": 0.00019584492013585355, "loss": 1.4717, "step": 1655 }, { "epoch": 0.18341528092370588, "grad_norm": 0.326171875, "learning_rate": 0.00019578972775504555, "loss": 1.4784, "step": 1660 }, { "epoch": 0.18396773658913873, "grad_norm": 0.28515625, "learning_rate": 0.00019573417910659412, "loss": 1.4912, "step": 1665 }, { "epoch": 0.18452019225457156, "grad_norm": 0.322265625, "learning_rate": 0.00019567827439709954, "loss": 1.4916, "step": 1670 }, { "epoch": 0.1850726479200044, "grad_norm": 0.296875, "learning_rate": 0.00019562201383448638, "loss": 1.4388, "step": 1675 }, { "epoch": 0.18562510358543727, "grad_norm": 0.318359375, "learning_rate": 0.00019556539762800276, "loss": 1.5014, "step": 1680 }, { "epoch": 0.18617755925087012, "grad_norm": 0.310546875, "learning_rate": 0.00019550842598821952, "loss": 1.5172, "step": 1685 }, { "epoch": 0.18673001491630298, "grad_norm": 0.3046875, "learning_rate": 0.0001954510991270294, "loss": 1.498, "step": 1690 }, { "epoch": 0.1872824705817358, "grad_norm": 0.306640625, "learning_rate": 0.00019539341725764638, "loss": 1.5276, "step": 1695 }, { "epoch": 0.18783492624716866, "grad_norm": 0.287109375, "learning_rate": 0.00019533538059460475, "loss": 1.5612, "step": 1700 }, { "epoch": 0.18838738191260151, "grad_norm": 0.30078125, "learning_rate": 0.0001952769893537584, "loss": 1.5206, "step": 1705 }, { "epoch": 0.18893983757803437, "grad_norm": 0.30859375, "learning_rate": 0.00019521824375228004, "loss": 1.4559, "step": 1710 }, { "epoch": 0.18949229324346722, "grad_norm": 0.29296875, "learning_rate": 0.0001951591440086602, "loss": 1.485, "step": 1715 }, { "epoch": 0.19004474890890005, "grad_norm": 0.294921875, "learning_rate": 0.00019509969034270673, "loss": 1.4358, "step": 1720 }, { "epoch": 0.1905972045743329, "grad_norm": 0.3125, "learning_rate": 0.0001950398829755437, "loss": 1.5716, "step": 1725 }, { "epoch": 0.19114966023976576, "grad_norm": 0.34765625, "learning_rate": 0.0001949797221296107, "loss": 1.5009, "step": 1730 }, { "epoch": 0.19170211590519862, "grad_norm": 0.318359375, "learning_rate": 0.00019491920802866205, "loss": 1.5663, "step": 1735 }, { "epoch": 0.19225457157063144, "grad_norm": 0.28125, "learning_rate": 0.00019485834089776586, "loss": 1.5536, "step": 1740 }, { "epoch": 0.1928070272360643, "grad_norm": 0.29296875, "learning_rate": 0.00019479712096330336, "loss": 1.4775, "step": 1745 }, { "epoch": 0.19335948290149715, "grad_norm": 0.2890625, "learning_rate": 0.0001947355484529678, "loss": 1.4417, "step": 1750 }, { "epoch": 0.19391193856693, "grad_norm": 0.28125, "learning_rate": 0.00019467362359576386, "loss": 1.4184, "step": 1755 }, { "epoch": 0.19446439423236286, "grad_norm": 0.283203125, "learning_rate": 0.00019461134662200668, "loss": 1.4776, "step": 1760 }, { "epoch": 0.1950168498977957, "grad_norm": 0.310546875, "learning_rate": 0.00019454871776332095, "loss": 1.4961, "step": 1765 }, { "epoch": 0.19556930556322855, "grad_norm": 0.30078125, "learning_rate": 0.00019448573725264022, "loss": 1.4678, "step": 1770 }, { "epoch": 0.1961217612286614, "grad_norm": 0.287109375, "learning_rate": 0.00019442240532420584, "loss": 1.5752, "step": 1775 }, { "epoch": 0.19667421689409426, "grad_norm": 0.306640625, "learning_rate": 0.0001943587222135662, "loss": 1.4159, "step": 1780 }, { "epoch": 0.1972266725595271, "grad_norm": 0.296875, "learning_rate": 0.00019429468815757587, "loss": 1.4551, "step": 1785 }, { "epoch": 0.19777912822495994, "grad_norm": 0.29296875, "learning_rate": 0.00019423030339439464, "loss": 1.4391, "step": 1790 }, { "epoch": 0.1983315838903928, "grad_norm": 0.3046875, "learning_rate": 0.00019416556816348663, "loss": 1.525, "step": 1795 }, { "epoch": 0.19888403955582565, "grad_norm": 0.296875, "learning_rate": 0.00019410048270561956, "loss": 1.5037, "step": 1800 }, { "epoch": 0.1994364952212585, "grad_norm": 0.296875, "learning_rate": 0.0001940350472628637, "loss": 1.4907, "step": 1805 }, { "epoch": 0.19998895088669133, "grad_norm": 0.291015625, "learning_rate": 0.00019396926207859084, "loss": 1.5334, "step": 1810 }, { "epoch": 0.20054140655212419, "grad_norm": 0.259765625, "learning_rate": 0.00019390312739747385, "loss": 1.4685, "step": 1815 }, { "epoch": 0.20109386221755704, "grad_norm": 0.29296875, "learning_rate": 0.0001938366434654852, "loss": 1.3745, "step": 1820 }, { "epoch": 0.2016463178829899, "grad_norm": 0.294921875, "learning_rate": 0.00019376981052989653, "loss": 1.4493, "step": 1825 }, { "epoch": 0.20219877354842275, "grad_norm": 0.302734375, "learning_rate": 0.00019370262883927733, "loss": 1.4757, "step": 1830 }, { "epoch": 0.20275122921385558, "grad_norm": 0.3203125, "learning_rate": 0.00019363509864349436, "loss": 1.4893, "step": 1835 }, { "epoch": 0.20330368487928843, "grad_norm": 0.294921875, "learning_rate": 0.0001935672201937105, "loss": 1.4814, "step": 1840 }, { "epoch": 0.2038561405447213, "grad_norm": 0.283203125, "learning_rate": 0.00019349899374238383, "loss": 1.4163, "step": 1845 }, { "epoch": 0.20440859621015414, "grad_norm": 0.30859375, "learning_rate": 0.0001934304195432668, "loss": 1.4938, "step": 1850 }, { "epoch": 0.204961051875587, "grad_norm": 0.2890625, "learning_rate": 0.00019336149785140525, "loss": 1.5428, "step": 1855 }, { "epoch": 0.20551350754101982, "grad_norm": 0.28125, "learning_rate": 0.00019329222892313736, "loss": 1.4776, "step": 1860 }, { "epoch": 0.20606596320645268, "grad_norm": 0.30859375, "learning_rate": 0.00019322261301609286, "loss": 1.4864, "step": 1865 }, { "epoch": 0.20661841887188553, "grad_norm": 0.3125, "learning_rate": 0.00019315265038919192, "loss": 1.4902, "step": 1870 }, { "epoch": 0.2071708745373184, "grad_norm": 0.4296875, "learning_rate": 0.00019308234130264431, "loss": 1.48, "step": 1875 }, { "epoch": 0.20772333020275122, "grad_norm": 0.3125, "learning_rate": 0.0001930116860179483, "loss": 1.4944, "step": 1880 }, { "epoch": 0.20827578586818407, "grad_norm": 0.302734375, "learning_rate": 0.00019294068479788984, "loss": 1.5251, "step": 1885 }, { "epoch": 0.20882824153361693, "grad_norm": 0.291015625, "learning_rate": 0.00019286933790654148, "loss": 1.5332, "step": 1890 }, { "epoch": 0.20938069719904978, "grad_norm": 0.28125, "learning_rate": 0.00019279764560926142, "loss": 1.5146, "step": 1895 }, { "epoch": 0.20993315286448264, "grad_norm": 0.302734375, "learning_rate": 0.00019272560817269247, "loss": 1.5907, "step": 1900 }, { "epoch": 0.21048560852991546, "grad_norm": 0.294921875, "learning_rate": 0.00019265322586476118, "loss": 1.6496, "step": 1905 }, { "epoch": 0.21103806419534832, "grad_norm": 0.298828125, "learning_rate": 0.0001925804989546767, "loss": 1.5383, "step": 1910 }, { "epoch": 0.21159051986078117, "grad_norm": 0.302734375, "learning_rate": 0.0001925074277129299, "loss": 1.4571, "step": 1915 }, { "epoch": 0.21214297552621403, "grad_norm": 0.279296875, "learning_rate": 0.0001924340124112923, "loss": 1.5052, "step": 1920 }, { "epoch": 0.21269543119164688, "grad_norm": 0.30078125, "learning_rate": 0.00019236025332281507, "loss": 1.522, "step": 1925 }, { "epoch": 0.2132478868570797, "grad_norm": 0.294921875, "learning_rate": 0.00019228615072182796, "loss": 1.5114, "step": 1930 }, { "epoch": 0.21380034252251257, "grad_norm": 0.28515625, "learning_rate": 0.00019221170488393843, "loss": 1.4876, "step": 1935 }, { "epoch": 0.21435279818794542, "grad_norm": 0.30078125, "learning_rate": 0.00019213691608603047, "loss": 1.512, "step": 1940 }, { "epoch": 0.21490525385337828, "grad_norm": 0.28125, "learning_rate": 0.0001920617846062636, "loss": 1.4158, "step": 1945 }, { "epoch": 0.2154577095188111, "grad_norm": 0.302734375, "learning_rate": 0.000191986310724072, "loss": 1.4746, "step": 1950 }, { "epoch": 0.21601016518424396, "grad_norm": 0.296875, "learning_rate": 0.00019191049472016313, "loss": 1.4644, "step": 1955 }, { "epoch": 0.2165626208496768, "grad_norm": 0.357421875, "learning_rate": 0.0001918343368765171, "loss": 1.4734, "step": 1960 }, { "epoch": 0.21711507651510967, "grad_norm": 0.28515625, "learning_rate": 0.0001917578374763853, "loss": 1.45, "step": 1965 }, { "epoch": 0.21766753218054252, "grad_norm": 0.30078125, "learning_rate": 0.00019168099680428943, "loss": 1.4963, "step": 1970 }, { "epoch": 0.21821998784597535, "grad_norm": 0.296875, "learning_rate": 0.0001916038151460206, "loss": 1.4858, "step": 1975 }, { "epoch": 0.2187724435114082, "grad_norm": 0.3671875, "learning_rate": 0.000191526292788638, "loss": 1.4716, "step": 1980 }, { "epoch": 0.21932489917684106, "grad_norm": 0.3203125, "learning_rate": 0.00019144843002046806, "loss": 1.4051, "step": 1985 }, { "epoch": 0.21987735484227391, "grad_norm": 0.314453125, "learning_rate": 0.00019137022713110324, "loss": 1.4374, "step": 1990 }, { "epoch": 0.22042981050770677, "grad_norm": 0.298828125, "learning_rate": 0.00019129168441140104, "loss": 1.4783, "step": 1995 }, { "epoch": 0.2209822661731396, "grad_norm": 0.31640625, "learning_rate": 0.00019121280215348286, "loss": 1.428, "step": 2000 }, { "epoch": 0.22153472183857245, "grad_norm": 0.33203125, "learning_rate": 0.00019113358065073297, "loss": 1.4127, "step": 2005 }, { "epoch": 0.2220871775040053, "grad_norm": 0.28125, "learning_rate": 0.00019105402019779728, "loss": 1.5459, "step": 2010 }, { "epoch": 0.22263963316943816, "grad_norm": 0.330078125, "learning_rate": 0.00019097412109058247, "loss": 1.4805, "step": 2015 }, { "epoch": 0.223192088834871, "grad_norm": 0.29296875, "learning_rate": 0.00019089388362625466, "loss": 1.4213, "step": 2020 }, { "epoch": 0.22374454450030384, "grad_norm": 0.302734375, "learning_rate": 0.00019081330810323852, "loss": 1.4803, "step": 2025 }, { "epoch": 0.2242970001657367, "grad_norm": 0.287109375, "learning_rate": 0.000190732394821216, "loss": 1.4711, "step": 2030 }, { "epoch": 0.22484945583116955, "grad_norm": 0.302734375, "learning_rate": 0.00019065114408112517, "loss": 1.5147, "step": 2035 }, { "epoch": 0.2254019114966024, "grad_norm": 0.408203125, "learning_rate": 0.00019056955618515934, "loss": 1.4599, "step": 2040 }, { "epoch": 0.22595436716203524, "grad_norm": 0.3046875, "learning_rate": 0.00019048763143676578, "loss": 1.4952, "step": 2045 }, { "epoch": 0.2265068228274681, "grad_norm": 0.283203125, "learning_rate": 0.0001904053701406445, "loss": 1.4651, "step": 2050 }, { "epoch": 0.22705927849290095, "grad_norm": 0.306640625, "learning_rate": 0.0001903227726027473, "loss": 1.4525, "step": 2055 }, { "epoch": 0.2276117341583338, "grad_norm": 0.29296875, "learning_rate": 0.00019023983913027655, "loss": 1.4922, "step": 2060 }, { "epoch": 0.22816418982376666, "grad_norm": 0.283203125, "learning_rate": 0.00019015657003168405, "loss": 1.5772, "step": 2065 }, { "epoch": 0.22871664548919948, "grad_norm": 0.306640625, "learning_rate": 0.00019007296561666985, "loss": 1.4583, "step": 2070 }, { "epoch": 0.22926910115463234, "grad_norm": 0.271484375, "learning_rate": 0.00018998902619618116, "loss": 1.5524, "step": 2075 }, { "epoch": 0.2298215568200652, "grad_norm": 0.291015625, "learning_rate": 0.00018990475208241115, "loss": 1.4469, "step": 2080 }, { "epoch": 0.23037401248549805, "grad_norm": 0.296875, "learning_rate": 0.0001898201435887978, "loss": 1.528, "step": 2085 }, { "epoch": 0.23092646815093087, "grad_norm": 0.27734375, "learning_rate": 0.00018973520103002277, "loss": 1.4708, "step": 2090 }, { "epoch": 0.23147892381636373, "grad_norm": 0.27734375, "learning_rate": 0.0001896499247220102, "loss": 1.4552, "step": 2095 }, { "epoch": 0.23203137948179658, "grad_norm": 0.287109375, "learning_rate": 0.00018956431498192547, "loss": 1.5011, "step": 2100 }, { "epoch": 0.23258383514722944, "grad_norm": 0.318359375, "learning_rate": 0.00018947837212817415, "loss": 1.5578, "step": 2105 }, { "epoch": 0.2331362908126623, "grad_norm": 0.279296875, "learning_rate": 0.00018939209648040071, "loss": 1.5539, "step": 2110 }, { "epoch": 0.23368874647809512, "grad_norm": 0.294921875, "learning_rate": 0.00018930548835948736, "loss": 1.4746, "step": 2115 }, { "epoch": 0.23424120214352798, "grad_norm": 0.283203125, "learning_rate": 0.00018921854808755294, "loss": 1.4893, "step": 2120 }, { "epoch": 0.23479365780896083, "grad_norm": 0.28125, "learning_rate": 0.00018913127598795156, "loss": 1.4536, "step": 2125 }, { "epoch": 0.2353461134743937, "grad_norm": 0.298828125, "learning_rate": 0.00018904367238527155, "loss": 1.5643, "step": 2130 }, { "epoch": 0.23589856913982654, "grad_norm": 0.302734375, "learning_rate": 0.00018895573760533413, "loss": 1.502, "step": 2135 }, { "epoch": 0.23645102480525937, "grad_norm": 0.30859375, "learning_rate": 0.00018886747197519233, "loss": 1.5057, "step": 2140 }, { "epoch": 0.23700348047069222, "grad_norm": 0.310546875, "learning_rate": 0.0001887788758231296, "loss": 1.4701, "step": 2145 }, { "epoch": 0.23755593613612508, "grad_norm": 0.296875, "learning_rate": 0.00018868994947865883, "loss": 1.5663, "step": 2150 }, { "epoch": 0.23810839180155793, "grad_norm": 0.298828125, "learning_rate": 0.00018860069327252086, "loss": 1.4895, "step": 2155 }, { "epoch": 0.23866084746699076, "grad_norm": 0.294921875, "learning_rate": 0.0001885111075366834, "loss": 1.4719, "step": 2160 }, { "epoch": 0.23921330313242362, "grad_norm": 0.294921875, "learning_rate": 0.00018842119260433982, "loss": 1.495, "step": 2165 }, { "epoch": 0.23976575879785647, "grad_norm": 0.291015625, "learning_rate": 0.0001883309488099078, "loss": 1.4955, "step": 2170 }, { "epoch": 0.24031821446328933, "grad_norm": 0.306640625, "learning_rate": 0.00018824037648902819, "loss": 1.4881, "step": 2175 }, { "epoch": 0.24087067012872218, "grad_norm": 0.287109375, "learning_rate": 0.00018814947597856367, "loss": 1.4388, "step": 2180 }, { "epoch": 0.241423125794155, "grad_norm": 0.291015625, "learning_rate": 0.00018805824761659764, "loss": 1.5011, "step": 2185 }, { "epoch": 0.24197558145958786, "grad_norm": 0.2890625, "learning_rate": 0.00018796669174243273, "loss": 1.4647, "step": 2190 }, { "epoch": 0.24252803712502072, "grad_norm": 0.3203125, "learning_rate": 0.0001878748086965898, "loss": 1.4424, "step": 2195 }, { "epoch": 0.24308049279045357, "grad_norm": 0.30078125, "learning_rate": 0.0001877825988208065, "loss": 1.4438, "step": 2200 }, { "epoch": 0.24363294845588643, "grad_norm": 0.3125, "learning_rate": 0.00018769006245803596, "loss": 1.4229, "step": 2205 }, { "epoch": 0.24418540412131925, "grad_norm": 0.298828125, "learning_rate": 0.0001875971999524458, "loss": 1.4438, "step": 2210 }, { "epoch": 0.2447378597867521, "grad_norm": 0.298828125, "learning_rate": 0.0001875040116494165, "loss": 1.3864, "step": 2215 }, { "epoch": 0.24529031545218496, "grad_norm": 0.3046875, "learning_rate": 0.00018741049789554028, "loss": 1.5423, "step": 2220 }, { "epoch": 0.24584277111761782, "grad_norm": 0.30859375, "learning_rate": 0.00018731665903861985, "loss": 1.4257, "step": 2225 }, { "epoch": 0.24639522678305065, "grad_norm": 0.30859375, "learning_rate": 0.00018722249542766703, "loss": 1.4072, "step": 2230 }, { "epoch": 0.2469476824484835, "grad_norm": 0.28125, "learning_rate": 0.00018712800741290154, "loss": 1.4309, "step": 2235 }, { "epoch": 0.24750013811391636, "grad_norm": 0.283203125, "learning_rate": 0.00018703319534574956, "loss": 1.373, "step": 2240 }, { "epoch": 0.2480525937793492, "grad_norm": 0.37890625, "learning_rate": 0.00018693805957884258, "loss": 1.4164, "step": 2245 }, { "epoch": 0.24860504944478207, "grad_norm": 0.291015625, "learning_rate": 0.00018684260046601594, "loss": 1.5035, "step": 2250 }, { "epoch": 0.2491575051102149, "grad_norm": 0.294921875, "learning_rate": 0.0001867468183623077, "loss": 1.4215, "step": 2255 }, { "epoch": 0.24970996077564775, "grad_norm": 0.306640625, "learning_rate": 0.0001866507136239571, "loss": 1.452, "step": 2260 }, { "epoch": 0.2502624164410806, "grad_norm": 0.28515625, "learning_rate": 0.00018655428660840345, "loss": 1.4321, "step": 2265 }, { "epoch": 0.25081487210651343, "grad_norm": 0.404296875, "learning_rate": 0.00018645753767428458, "loss": 1.4725, "step": 2270 }, { "epoch": 0.2513673277719463, "grad_norm": 0.326171875, "learning_rate": 0.0001863604671814357, "loss": 1.4643, "step": 2275 }, { "epoch": 0.25191978343737914, "grad_norm": 0.294921875, "learning_rate": 0.00018626307549088792, "loss": 1.4399, "step": 2280 }, { "epoch": 0.252472239102812, "grad_norm": 0.306640625, "learning_rate": 0.0001861653629648671, "loss": 1.5461, "step": 2285 }, { "epoch": 0.25302469476824485, "grad_norm": 0.29296875, "learning_rate": 0.00018606732996679224, "loss": 1.4933, "step": 2290 }, { "epoch": 0.2535771504336777, "grad_norm": 0.283203125, "learning_rate": 0.00018596897686127428, "loss": 1.4928, "step": 2295 }, { "epoch": 0.25412960609911056, "grad_norm": 0.283203125, "learning_rate": 0.0001858703040141148, "loss": 1.4852, "step": 2300 }, { "epoch": 0.2546820617645434, "grad_norm": 0.291015625, "learning_rate": 0.00018577131179230448, "loss": 1.4447, "step": 2305 }, { "epoch": 0.25523451742997627, "grad_norm": 0.302734375, "learning_rate": 0.00018567200056402195, "loss": 1.5006, "step": 2310 }, { "epoch": 0.2557869730954091, "grad_norm": 0.306640625, "learning_rate": 0.00018557237069863222, "loss": 1.4365, "step": 2315 }, { "epoch": 0.2563394287608419, "grad_norm": 0.27734375, "learning_rate": 0.00018547242256668548, "loss": 1.5176, "step": 2320 }, { "epoch": 0.2568918844262748, "grad_norm": 0.33203125, "learning_rate": 0.00018537215653991552, "loss": 1.5278, "step": 2325 }, { "epoch": 0.25744434009170764, "grad_norm": 0.2890625, "learning_rate": 0.0001852715729912386, "loss": 1.5096, "step": 2330 }, { "epoch": 0.25799679575714046, "grad_norm": 0.298828125, "learning_rate": 0.00018517067229475184, "loss": 1.5419, "step": 2335 }, { "epoch": 0.25854925142257335, "grad_norm": 0.3046875, "learning_rate": 0.00018506945482573195, "loss": 1.4599, "step": 2340 }, { "epoch": 0.2591017070880062, "grad_norm": 0.271484375, "learning_rate": 0.0001849679209606338, "loss": 1.4883, "step": 2345 }, { "epoch": 0.25965416275343906, "grad_norm": 0.298828125, "learning_rate": 0.000184866071077089, "loss": 1.5316, "step": 2350 }, { "epoch": 0.2602066184188719, "grad_norm": 0.33984375, "learning_rate": 0.00018476390555390457, "loss": 1.4475, "step": 2355 }, { "epoch": 0.2607590740843047, "grad_norm": 0.310546875, "learning_rate": 0.0001846614247710614, "loss": 1.5135, "step": 2360 }, { "epoch": 0.2613115297497376, "grad_norm": 0.294921875, "learning_rate": 0.000184558629109713, "loss": 1.5215, "step": 2365 }, { "epoch": 0.2618639854151704, "grad_norm": 0.2890625, "learning_rate": 0.00018445551895218394, "loss": 1.4583, "step": 2370 }, { "epoch": 0.2624164410806033, "grad_norm": 0.291015625, "learning_rate": 0.00018435209468196847, "loss": 1.5316, "step": 2375 }, { "epoch": 0.26296889674603613, "grad_norm": 0.2734375, "learning_rate": 0.00018424835668372919, "loss": 1.5076, "step": 2380 }, { "epoch": 0.26352135241146896, "grad_norm": 0.302734375, "learning_rate": 0.0001841443053432955, "loss": 1.4466, "step": 2385 }, { "epoch": 0.26407380807690184, "grad_norm": 0.30078125, "learning_rate": 0.00018403994104766212, "loss": 1.5106, "step": 2390 }, { "epoch": 0.26462626374233467, "grad_norm": 0.3359375, "learning_rate": 0.00018393526418498786, "loss": 1.4642, "step": 2395 }, { "epoch": 0.26517871940776755, "grad_norm": 0.337890625, "learning_rate": 0.00018383027514459402, "loss": 1.4459, "step": 2400 }, { "epoch": 0.2657311750732004, "grad_norm": 0.318359375, "learning_rate": 0.00018372497431696288, "loss": 1.4776, "step": 2405 }, { "epoch": 0.2662836307386332, "grad_norm": 0.29296875, "learning_rate": 0.00018361936209373644, "loss": 1.3993, "step": 2410 }, { "epoch": 0.2668360864040661, "grad_norm": 0.283203125, "learning_rate": 0.00018351343886771488, "loss": 1.5118, "step": 2415 }, { "epoch": 0.2673885420694989, "grad_norm": 0.3125, "learning_rate": 0.00018340720503285497, "loss": 1.4976, "step": 2420 }, { "epoch": 0.2679409977349318, "grad_norm": 0.296875, "learning_rate": 0.00018330066098426882, "loss": 1.5046, "step": 2425 }, { "epoch": 0.2684934534003646, "grad_norm": 0.298828125, "learning_rate": 0.00018319380711822225, "loss": 1.4667, "step": 2430 }, { "epoch": 0.26904590906579745, "grad_norm": 0.30859375, "learning_rate": 0.00018308664383213344, "loss": 1.5915, "step": 2435 }, { "epoch": 0.26959836473123033, "grad_norm": 0.298828125, "learning_rate": 0.00018297917152457126, "loss": 1.5324, "step": 2440 }, { "epoch": 0.27015082039666316, "grad_norm": 0.283203125, "learning_rate": 0.00018287139059525412, "loss": 1.4487, "step": 2445 }, { "epoch": 0.27070327606209604, "grad_norm": 0.283203125, "learning_rate": 0.00018276330144504803, "loss": 1.4692, "step": 2450 }, { "epoch": 0.27125573172752887, "grad_norm": 0.28125, "learning_rate": 0.0001826549044759655, "loss": 1.4531, "step": 2455 }, { "epoch": 0.2718081873929617, "grad_norm": 0.306640625, "learning_rate": 0.00018254620009116397, "loss": 1.4298, "step": 2460 }, { "epoch": 0.2723606430583946, "grad_norm": 0.2890625, "learning_rate": 0.00018243718869494408, "loss": 1.5047, "step": 2465 }, { "epoch": 0.2729130987238274, "grad_norm": 0.5546875, "learning_rate": 0.0001823278706927484, "loss": 1.4877, "step": 2470 }, { "epoch": 0.27346555438926023, "grad_norm": 0.384765625, "learning_rate": 0.00018221824649115984, "loss": 1.5091, "step": 2475 }, { "epoch": 0.2740180100546931, "grad_norm": 0.279296875, "learning_rate": 0.00018210831649790018, "loss": 1.4246, "step": 2480 }, { "epoch": 0.27457046572012594, "grad_norm": 0.29296875, "learning_rate": 0.00018199808112182847, "loss": 1.4186, "step": 2485 }, { "epoch": 0.2751229213855588, "grad_norm": 0.302734375, "learning_rate": 0.00018188754077293963, "loss": 1.415, "step": 2490 }, { "epoch": 0.27567537705099165, "grad_norm": 0.279296875, "learning_rate": 0.00018177669586236277, "loss": 1.4204, "step": 2495 }, { "epoch": 0.2762278327164245, "grad_norm": 0.298828125, "learning_rate": 0.0001816655468023598, "loss": 1.4628, "step": 2500 }, { "epoch": 0.27678028838185736, "grad_norm": 0.291015625, "learning_rate": 0.00018155409400632386, "loss": 1.4822, "step": 2505 }, { "epoch": 0.2773327440472902, "grad_norm": 0.302734375, "learning_rate": 0.0001814423378887777, "loss": 1.4151, "step": 2510 }, { "epoch": 0.2778851997127231, "grad_norm": 0.298828125, "learning_rate": 0.00018133027886537225, "loss": 1.414, "step": 2515 }, { "epoch": 0.2784376553781559, "grad_norm": 0.28125, "learning_rate": 0.00018121791735288504, "loss": 1.4126, "step": 2520 }, { "epoch": 0.27899011104358873, "grad_norm": 0.291015625, "learning_rate": 0.00018110525376921862, "loss": 1.4385, "step": 2525 }, { "epoch": 0.2795425667090216, "grad_norm": 0.29296875, "learning_rate": 0.00018099228853339901, "loss": 1.4447, "step": 2530 }, { "epoch": 0.28009502237445444, "grad_norm": 0.283203125, "learning_rate": 0.00018087902206557411, "loss": 1.5647, "step": 2535 }, { "epoch": 0.2806474780398873, "grad_norm": 0.3046875, "learning_rate": 0.00018076545478701235, "loss": 1.4131, "step": 2540 }, { "epoch": 0.28119993370532015, "grad_norm": 0.302734375, "learning_rate": 0.00018065158712010076, "loss": 1.4021, "step": 2545 }, { "epoch": 0.281752389370753, "grad_norm": 0.283203125, "learning_rate": 0.0001805374194883437, "loss": 1.5636, "step": 2550 }, { "epoch": 0.28230484503618586, "grad_norm": 0.306640625, "learning_rate": 0.00018042295231636115, "loss": 1.559, "step": 2555 }, { "epoch": 0.2828573007016187, "grad_norm": 0.298828125, "learning_rate": 0.0001803081860298872, "loss": 1.49, "step": 2560 }, { "epoch": 0.28340975636705157, "grad_norm": 0.279296875, "learning_rate": 0.0001801931210557684, "loss": 1.4524, "step": 2565 }, { "epoch": 0.2839622120324844, "grad_norm": 0.291015625, "learning_rate": 0.00018007775782196214, "loss": 1.4295, "step": 2570 }, { "epoch": 0.2845146676979172, "grad_norm": 0.30859375, "learning_rate": 0.00017996209675753523, "loss": 1.4928, "step": 2575 }, { "epoch": 0.2850671233633501, "grad_norm": 0.31640625, "learning_rate": 0.0001798461382926621, "loss": 1.427, "step": 2580 }, { "epoch": 0.28561957902878293, "grad_norm": 0.28515625, "learning_rate": 0.00017972988285862337, "loss": 1.5194, "step": 2585 }, { "epoch": 0.2861720346942158, "grad_norm": 0.28515625, "learning_rate": 0.00017961333088780404, "loss": 1.4244, "step": 2590 }, { "epoch": 0.28672449035964864, "grad_norm": 0.3125, "learning_rate": 0.00017949648281369217, "loss": 1.5016, "step": 2595 }, { "epoch": 0.28727694602508147, "grad_norm": 0.287109375, "learning_rate": 0.00017937933907087703, "loss": 1.3756, "step": 2600 }, { "epoch": 0.28782940169051435, "grad_norm": 0.30859375, "learning_rate": 0.00017926190009504752, "loss": 1.4507, "step": 2605 }, { "epoch": 0.2883818573559472, "grad_norm": 0.287109375, "learning_rate": 0.00017914416632299065, "loss": 1.4099, "step": 2610 }, { "epoch": 0.28893431302138, "grad_norm": 0.306640625, "learning_rate": 0.00017902613819258985, "loss": 1.4358, "step": 2615 }, { "epoch": 0.2894867686868129, "grad_norm": 0.310546875, "learning_rate": 0.0001789078161428233, "loss": 1.3748, "step": 2620 }, { "epoch": 0.2900392243522457, "grad_norm": 0.2890625, "learning_rate": 0.00017878920061376247, "loss": 1.3931, "step": 2625 }, { "epoch": 0.2905916800176786, "grad_norm": 0.314453125, "learning_rate": 0.0001786702920465702, "loss": 1.4582, "step": 2630 }, { "epoch": 0.2911441356831114, "grad_norm": 0.30078125, "learning_rate": 0.00017855109088349926, "loss": 1.4293, "step": 2635 }, { "epoch": 0.29169659134854425, "grad_norm": 0.314453125, "learning_rate": 0.00017843159756789076, "loss": 1.4838, "step": 2640 }, { "epoch": 0.29224904701397714, "grad_norm": 0.2890625, "learning_rate": 0.00017831181254417228, "loss": 1.484, "step": 2645 }, { "epoch": 0.29280150267940996, "grad_norm": 0.294921875, "learning_rate": 0.00017819173625785643, "loss": 1.4863, "step": 2650 }, { "epoch": 0.29335395834484285, "grad_norm": 0.287109375, "learning_rate": 0.00017807136915553903, "loss": 1.5217, "step": 2655 }, { "epoch": 0.2939064140102757, "grad_norm": 0.306640625, "learning_rate": 0.0001779507116848976, "loss": 1.4755, "step": 2660 }, { "epoch": 0.2944588696757085, "grad_norm": 0.2890625, "learning_rate": 0.00017782976429468956, "loss": 1.4407, "step": 2665 }, { "epoch": 0.2950113253411414, "grad_norm": 0.314453125, "learning_rate": 0.00017770852743475066, "loss": 1.4386, "step": 2670 }, { "epoch": 0.2955637810065742, "grad_norm": 0.28125, "learning_rate": 0.00017758700155599317, "loss": 1.4737, "step": 2675 }, { "epoch": 0.2961162366720071, "grad_norm": 0.283203125, "learning_rate": 0.00017746518711040442, "loss": 1.4389, "step": 2680 }, { "epoch": 0.2966686923374399, "grad_norm": 0.3046875, "learning_rate": 0.00017734308455104496, "loss": 1.392, "step": 2685 }, { "epoch": 0.29722114800287275, "grad_norm": 0.283203125, "learning_rate": 0.00017722069433204687, "loss": 1.494, "step": 2690 }, { "epoch": 0.29777360366830563, "grad_norm": 0.28125, "learning_rate": 0.00017709801690861214, "loss": 1.518, "step": 2695 }, { "epoch": 0.29832605933373846, "grad_norm": 0.27734375, "learning_rate": 0.00017697505273701097, "loss": 1.4549, "step": 2700 }, { "epoch": 0.29887851499917134, "grad_norm": 0.30859375, "learning_rate": 0.00017685180227458003, "loss": 1.5111, "step": 2705 }, { "epoch": 0.29943097066460417, "grad_norm": 0.353515625, "learning_rate": 0.0001767282659797208, "loss": 1.3792, "step": 2710 }, { "epoch": 0.299983426330037, "grad_norm": 0.298828125, "learning_rate": 0.0001766044443118978, "loss": 1.4517, "step": 2715 }, { "epoch": 0.3005358819954699, "grad_norm": 0.2890625, "learning_rate": 0.000176480337731637, "loss": 1.3493, "step": 2720 }, { "epoch": 0.3010883376609027, "grad_norm": 0.271484375, "learning_rate": 0.000176355946700524, "loss": 1.4132, "step": 2725 }, { "epoch": 0.3016407933263356, "grad_norm": 0.302734375, "learning_rate": 0.00017623127168120233, "loss": 1.4985, "step": 2730 }, { "epoch": 0.3021932489917684, "grad_norm": 0.318359375, "learning_rate": 0.00017610631313737173, "loss": 1.4538, "step": 2735 }, { "epoch": 0.30274570465720124, "grad_norm": 0.310546875, "learning_rate": 0.00017598107153378657, "loss": 1.5276, "step": 2740 }, { "epoch": 0.3032981603226341, "grad_norm": 0.298828125, "learning_rate": 0.00017585554733625384, "loss": 1.4989, "step": 2745 }, { "epoch": 0.30385061598806695, "grad_norm": 0.287109375, "learning_rate": 0.00017572974101163165, "loss": 1.4242, "step": 2750 }, { "epoch": 0.3044030716534998, "grad_norm": 0.31640625, "learning_rate": 0.00017560365302782738, "loss": 1.4799, "step": 2755 }, { "epoch": 0.30495552731893266, "grad_norm": 0.314453125, "learning_rate": 0.00017547728385379605, "loss": 1.4403, "step": 2760 }, { "epoch": 0.3055079829843655, "grad_norm": 0.318359375, "learning_rate": 0.0001753506339595384, "loss": 1.4773, "step": 2765 }, { "epoch": 0.30606043864979837, "grad_norm": 0.287109375, "learning_rate": 0.00017522370381609935, "loss": 1.4368, "step": 2770 }, { "epoch": 0.3066128943152312, "grad_norm": 0.291015625, "learning_rate": 0.00017509649389556607, "loss": 1.4827, "step": 2775 }, { "epoch": 0.307165349980664, "grad_norm": 0.3203125, "learning_rate": 0.00017496900467106627, "loss": 1.443, "step": 2780 }, { "epoch": 0.3077178056460969, "grad_norm": 0.306640625, "learning_rate": 0.00017484123661676656, "loss": 1.4532, "step": 2785 }, { "epoch": 0.30827026131152974, "grad_norm": 0.283203125, "learning_rate": 0.0001747131902078705, "loss": 1.4416, "step": 2790 }, { "epoch": 0.3088227169769626, "grad_norm": 0.5, "learning_rate": 0.00017458486592061704, "loss": 1.4443, "step": 2795 }, { "epoch": 0.30937517264239545, "grad_norm": 0.310546875, "learning_rate": 0.00017445626423227844, "loss": 1.4009, "step": 2800 }, { "epoch": 0.3099276283078283, "grad_norm": 0.298828125, "learning_rate": 0.0001743273856211589, "loss": 1.4307, "step": 2805 }, { "epoch": 0.31048008397326116, "grad_norm": 0.306640625, "learning_rate": 0.00017419823056659243, "loss": 1.5067, "step": 2810 }, { "epoch": 0.311032539638694, "grad_norm": 0.28515625, "learning_rate": 0.00017406879954894134, "loss": 1.4288, "step": 2815 }, { "epoch": 0.31158499530412687, "grad_norm": 0.302734375, "learning_rate": 0.00017393909304959414, "loss": 1.467, "step": 2820 }, { "epoch": 0.3121374509695597, "grad_norm": 0.306640625, "learning_rate": 0.00017380911155096408, "loss": 1.4855, "step": 2825 }, { "epoch": 0.3126899066349925, "grad_norm": 0.310546875, "learning_rate": 0.00017367885553648717, "loss": 1.4932, "step": 2830 }, { "epoch": 0.3132423623004254, "grad_norm": 0.279296875, "learning_rate": 0.00017354832549062034, "loss": 1.4093, "step": 2835 }, { "epoch": 0.31379481796585823, "grad_norm": 0.310546875, "learning_rate": 0.00017341752189883983, "loss": 1.465, "step": 2840 }, { "epoch": 0.3143472736312911, "grad_norm": 0.2734375, "learning_rate": 0.0001732864452476392, "loss": 1.45, "step": 2845 }, { "epoch": 0.31489972929672394, "grad_norm": 0.3125, "learning_rate": 0.0001731550960245276, "loss": 1.4682, "step": 2850 }, { "epoch": 0.31545218496215677, "grad_norm": 0.2890625, "learning_rate": 0.00017302347471802798, "loss": 1.4693, "step": 2855 }, { "epoch": 0.31600464062758965, "grad_norm": 0.29296875, "learning_rate": 0.00017289158181767517, "loss": 1.4372, "step": 2860 }, { "epoch": 0.3165570962930225, "grad_norm": 0.29296875, "learning_rate": 0.00017275941781401427, "loss": 1.4883, "step": 2865 }, { "epoch": 0.31710955195845536, "grad_norm": 0.326171875, "learning_rate": 0.00017262698319859846, "loss": 1.6259, "step": 2870 }, { "epoch": 0.3176620076238882, "grad_norm": 0.2890625, "learning_rate": 0.00017249427846398766, "loss": 1.4421, "step": 2875 }, { "epoch": 0.318214463289321, "grad_norm": 0.29296875, "learning_rate": 0.00017236130410374625, "loss": 1.4753, "step": 2880 }, { "epoch": 0.3187669189547539, "grad_norm": 0.345703125, "learning_rate": 0.0001722280606124415, "loss": 1.4435, "step": 2885 }, { "epoch": 0.3193193746201867, "grad_norm": 0.322265625, "learning_rate": 0.00017209454848564156, "loss": 1.3777, "step": 2890 }, { "epoch": 0.31987183028561955, "grad_norm": 0.287109375, "learning_rate": 0.00017196076821991384, "loss": 1.4947, "step": 2895 }, { "epoch": 0.32042428595105243, "grad_norm": 0.296875, "learning_rate": 0.00017182672031282296, "loss": 1.4574, "step": 2900 }, { "epoch": 0.32097674161648526, "grad_norm": 0.30078125, "learning_rate": 0.00017169240526292896, "loss": 1.5842, "step": 2905 }, { "epoch": 0.32152919728191814, "grad_norm": 0.294921875, "learning_rate": 0.0001715578235697855, "loss": 1.4548, "step": 2910 }, { "epoch": 0.32208165294735097, "grad_norm": 0.3046875, "learning_rate": 0.0001714229757339379, "loss": 1.4684, "step": 2915 }, { "epoch": 0.3226341086127838, "grad_norm": 0.296875, "learning_rate": 0.00017128786225692136, "loss": 1.4647, "step": 2920 }, { "epoch": 0.3231865642782167, "grad_norm": 0.30859375, "learning_rate": 0.00017115248364125906, "loss": 1.4044, "step": 2925 }, { "epoch": 0.3237390199436495, "grad_norm": 0.287109375, "learning_rate": 0.00017101684039046036, "loss": 1.5083, "step": 2930 }, { "epoch": 0.3242914756090824, "grad_norm": 0.32421875, "learning_rate": 0.00017088093300901883, "loss": 1.5365, "step": 2935 }, { "epoch": 0.3248439312745152, "grad_norm": 0.302734375, "learning_rate": 0.00017074476200241035, "loss": 1.445, "step": 2940 }, { "epoch": 0.32539638693994805, "grad_norm": 0.349609375, "learning_rate": 0.0001706083278770914, "loss": 1.4662, "step": 2945 }, { "epoch": 0.32594884260538093, "grad_norm": 0.294921875, "learning_rate": 0.00017047163114049702, "loss": 1.4684, "step": 2950 }, { "epoch": 0.32650129827081376, "grad_norm": 0.310546875, "learning_rate": 0.00017033467230103894, "loss": 1.4445, "step": 2955 }, { "epoch": 0.32705375393624664, "grad_norm": 0.3046875, "learning_rate": 0.00017019745186810378, "loss": 1.4906, "step": 2960 }, { "epoch": 0.32760620960167947, "grad_norm": 0.30859375, "learning_rate": 0.0001700599703520511, "loss": 1.5048, "step": 2965 }, { "epoch": 0.3281586652671123, "grad_norm": 0.310546875, "learning_rate": 0.00016992222826421133, "loss": 1.4527, "step": 2970 }, { "epoch": 0.3287111209325452, "grad_norm": 0.322265625, "learning_rate": 0.0001697842261168843, "loss": 1.4886, "step": 2975 }, { "epoch": 0.329263576597978, "grad_norm": 0.287109375, "learning_rate": 0.00016964596442333696, "loss": 1.3755, "step": 2980 }, { "epoch": 0.3298160322634109, "grad_norm": 0.322265625, "learning_rate": 0.00016950744369780148, "loss": 1.4592, "step": 2985 }, { "epoch": 0.3303684879288437, "grad_norm": 0.298828125, "learning_rate": 0.00016936866445547353, "loss": 1.431, "step": 2990 }, { "epoch": 0.33092094359427654, "grad_norm": 0.3203125, "learning_rate": 0.00016922962721251038, "loss": 1.4769, "step": 2995 }, { "epoch": 0.3314733992597094, "grad_norm": 0.322265625, "learning_rate": 0.0001690903324860286, "loss": 1.3937, "step": 3000 }, { "epoch": 0.33202585492514225, "grad_norm": 0.287109375, "learning_rate": 0.0001689507807941027, "loss": 1.5291, "step": 3005 }, { "epoch": 0.33257831059057513, "grad_norm": 0.408203125, "learning_rate": 0.0001688109726557627, "loss": 1.4393, "step": 3010 }, { "epoch": 0.33313076625600796, "grad_norm": 0.28515625, "learning_rate": 0.00016867090859099256, "loss": 1.4412, "step": 3015 }, { "epoch": 0.3336832219214408, "grad_norm": 0.30859375, "learning_rate": 0.00016853058912072802, "loss": 1.4353, "step": 3020 }, { "epoch": 0.33423567758687367, "grad_norm": 0.32421875, "learning_rate": 0.0001683900147668547, "loss": 1.4603, "step": 3025 }, { "epoch": 0.3347881332523065, "grad_norm": 0.310546875, "learning_rate": 0.0001682491860522063, "loss": 1.4362, "step": 3030 }, { "epoch": 0.3353405889177393, "grad_norm": 0.31640625, "learning_rate": 0.0001681081035005626, "loss": 1.4929, "step": 3035 }, { "epoch": 0.3358930445831722, "grad_norm": 0.296875, "learning_rate": 0.00016796676763664725, "loss": 1.5166, "step": 3040 }, { "epoch": 0.33644550024860503, "grad_norm": 0.283203125, "learning_rate": 0.00016782517898612619, "loss": 1.397, "step": 3045 }, { "epoch": 0.3369979559140379, "grad_norm": 0.30859375, "learning_rate": 0.00016768333807560558, "loss": 1.3502, "step": 3050 }, { "epoch": 0.33755041157947074, "grad_norm": 0.306640625, "learning_rate": 0.00016754124543262973, "loss": 1.4702, "step": 3055 }, { "epoch": 0.33810286724490357, "grad_norm": 0.28515625, "learning_rate": 0.00016739890158567916, "loss": 1.3889, "step": 3060 }, { "epoch": 0.33865532291033645, "grad_norm": 0.30078125, "learning_rate": 0.0001672563070641688, "loss": 1.4814, "step": 3065 }, { "epoch": 0.3392077785757693, "grad_norm": 0.28125, "learning_rate": 0.00016711346239844588, "loss": 1.4631, "step": 3070 }, { "epoch": 0.33976023424120216, "grad_norm": 0.28515625, "learning_rate": 0.00016697036811978786, "loss": 1.446, "step": 3075 }, { "epoch": 0.340312689906635, "grad_norm": 0.287109375, "learning_rate": 0.00016682702476040077, "loss": 1.5016, "step": 3080 }, { "epoch": 0.3408651455720678, "grad_norm": 0.291015625, "learning_rate": 0.00016668343285341686, "loss": 1.4782, "step": 3085 }, { "epoch": 0.3414176012375007, "grad_norm": 0.32421875, "learning_rate": 0.00016653959293289297, "loss": 1.4882, "step": 3090 }, { "epoch": 0.34197005690293353, "grad_norm": 0.29296875, "learning_rate": 0.00016639550553380818, "loss": 1.4984, "step": 3095 }, { "epoch": 0.3425225125683664, "grad_norm": 0.296875, "learning_rate": 0.00016625117119206214, "loss": 1.4493, "step": 3100 }, { "epoch": 0.34307496823379924, "grad_norm": 0.298828125, "learning_rate": 0.00016610659044447298, "loss": 1.4004, "step": 3105 }, { "epoch": 0.34362742389923207, "grad_norm": 0.30078125, "learning_rate": 0.00016596176382877506, "loss": 1.4726, "step": 3110 }, { "epoch": 0.34417987956466495, "grad_norm": 0.29296875, "learning_rate": 0.00016581669188361748, "loss": 1.4436, "step": 3115 }, { "epoch": 0.3447323352300978, "grad_norm": 0.29296875, "learning_rate": 0.00016567137514856154, "loss": 1.5283, "step": 3120 }, { "epoch": 0.34528479089553066, "grad_norm": 0.294921875, "learning_rate": 0.00016552581416407917, "loss": 1.5343, "step": 3125 }, { "epoch": 0.3458372465609635, "grad_norm": 0.27734375, "learning_rate": 0.00016538000947155062, "loss": 1.3747, "step": 3130 }, { "epoch": 0.3463897022263963, "grad_norm": 0.287109375, "learning_rate": 0.0001652339616132625, "loss": 1.407, "step": 3135 }, { "epoch": 0.3469421578918292, "grad_norm": 0.30078125, "learning_rate": 0.00016508767113240598, "loss": 1.4736, "step": 3140 }, { "epoch": 0.347494613557262, "grad_norm": 0.296875, "learning_rate": 0.00016494113857307453, "loss": 1.3872, "step": 3145 }, { "epoch": 0.3480470692226949, "grad_norm": 0.310546875, "learning_rate": 0.00016479436448026195, "loss": 1.4449, "step": 3150 }, { "epoch": 0.34859952488812773, "grad_norm": 0.30859375, "learning_rate": 0.00016464734939986036, "loss": 1.4158, "step": 3155 }, { "epoch": 0.34915198055356056, "grad_norm": 0.310546875, "learning_rate": 0.00016450009387865822, "loss": 1.5011, "step": 3160 }, { "epoch": 0.34970443621899344, "grad_norm": 0.279296875, "learning_rate": 0.00016435259846433824, "loss": 1.4216, "step": 3165 }, { "epoch": 0.35025689188442627, "grad_norm": 0.30078125, "learning_rate": 0.00016420486370547537, "loss": 1.4454, "step": 3170 }, { "epoch": 0.3508093475498591, "grad_norm": 0.294921875, "learning_rate": 0.00016405689015153472, "loss": 1.412, "step": 3175 }, { "epoch": 0.351361803215292, "grad_norm": 0.32421875, "learning_rate": 0.00016390867835286953, "loss": 1.4926, "step": 3180 }, { "epoch": 0.3519142588807248, "grad_norm": 0.3046875, "learning_rate": 0.0001637602288607192, "loss": 1.4457, "step": 3185 }, { "epoch": 0.3524667145461577, "grad_norm": 0.3125, "learning_rate": 0.00016361154222720715, "loss": 1.4387, "step": 3190 }, { "epoch": 0.3530191702115905, "grad_norm": 0.3125, "learning_rate": 0.00016346261900533867, "loss": 1.5094, "step": 3195 }, { "epoch": 0.35357162587702334, "grad_norm": 0.302734375, "learning_rate": 0.00016331345974899923, "loss": 1.4318, "step": 3200 }, { "epoch": 0.3541240815424562, "grad_norm": 0.279296875, "learning_rate": 0.00016316406501295198, "loss": 1.3934, "step": 3205 }, { "epoch": 0.35467653720788905, "grad_norm": 0.29296875, "learning_rate": 0.0001630144353528359, "loss": 1.4147, "step": 3210 }, { "epoch": 0.35522899287332194, "grad_norm": 0.31640625, "learning_rate": 0.00016286457132516383, "loss": 1.4941, "step": 3215 }, { "epoch": 0.35578144853875476, "grad_norm": 0.296875, "learning_rate": 0.0001627144734873202, "loss": 1.4218, "step": 3220 }, { "epoch": 0.3563339042041876, "grad_norm": 0.283203125, "learning_rate": 0.00016256414239755902, "loss": 1.424, "step": 3225 }, { "epoch": 0.3568863598696205, "grad_norm": 0.3125, "learning_rate": 0.00016241357861500184, "loss": 1.4298, "step": 3230 }, { "epoch": 0.3574388155350533, "grad_norm": 0.3046875, "learning_rate": 0.00016226278269963578, "loss": 1.4652, "step": 3235 }, { "epoch": 0.3579912712004862, "grad_norm": 0.306640625, "learning_rate": 0.00016211175521231108, "loss": 1.4164, "step": 3240 }, { "epoch": 0.358543726865919, "grad_norm": 0.3359375, "learning_rate": 0.00016196049671473954, "loss": 1.4244, "step": 3245 }, { "epoch": 0.35909618253135184, "grad_norm": 0.296875, "learning_rate": 0.0001618090077694919, "loss": 1.4373, "step": 3250 }, { "epoch": 0.3596486381967847, "grad_norm": 0.306640625, "learning_rate": 0.00016165728893999617, "loss": 1.4531, "step": 3255 }, { "epoch": 0.36020109386221755, "grad_norm": 0.357421875, "learning_rate": 0.00016150534079053527, "loss": 1.4346, "step": 3260 }, { "epoch": 0.36075354952765043, "grad_norm": 0.30859375, "learning_rate": 0.00016135316388624505, "loss": 1.4656, "step": 3265 }, { "epoch": 0.36130600519308326, "grad_norm": 0.294921875, "learning_rate": 0.0001612007587931122, "loss": 1.4618, "step": 3270 }, { "epoch": 0.3618584608585161, "grad_norm": 0.296875, "learning_rate": 0.00016104812607797202, "loss": 1.4244, "step": 3275 }, { "epoch": 0.36241091652394897, "grad_norm": 0.29296875, "learning_rate": 0.00016089526630850643, "loss": 1.4007, "step": 3280 }, { "epoch": 0.3629633721893818, "grad_norm": 0.322265625, "learning_rate": 0.0001607421800532419, "loss": 1.4165, "step": 3285 }, { "epoch": 0.3635158278548147, "grad_norm": 0.318359375, "learning_rate": 0.00016058886788154712, "loss": 1.4063, "step": 3290 }, { "epoch": 0.3640682835202475, "grad_norm": 0.291015625, "learning_rate": 0.00016043533036363115, "loss": 1.3675, "step": 3295 }, { "epoch": 0.36462073918568033, "grad_norm": 0.302734375, "learning_rate": 0.00016028156807054112, "loss": 1.5229, "step": 3300 }, { "epoch": 0.3651731948511132, "grad_norm": 0.306640625, "learning_rate": 0.0001601275815741602, "loss": 1.4296, "step": 3305 }, { "epoch": 0.36572565051654604, "grad_norm": 0.294921875, "learning_rate": 0.00015997337144720532, "loss": 1.4719, "step": 3310 }, { "epoch": 0.36627810618197887, "grad_norm": 0.291015625, "learning_rate": 0.00015981893826322527, "loss": 1.3636, "step": 3315 }, { "epoch": 0.36683056184741175, "grad_norm": 0.302734375, "learning_rate": 0.00015966428259659845, "loss": 1.5047, "step": 3320 }, { "epoch": 0.3673830175128446, "grad_norm": 0.310546875, "learning_rate": 0.00015950940502253063, "loss": 1.4119, "step": 3325 }, { "epoch": 0.36793547317827746, "grad_norm": 0.3125, "learning_rate": 0.00015935430611705296, "loss": 1.4094, "step": 3330 }, { "epoch": 0.3684879288437103, "grad_norm": 0.333984375, "learning_rate": 0.0001591989864570199, "loss": 1.4622, "step": 3335 }, { "epoch": 0.3690403845091431, "grad_norm": 0.291015625, "learning_rate": 0.00015904344662010672, "loss": 1.4536, "step": 3340 }, { "epoch": 0.369592840174576, "grad_norm": 0.296875, "learning_rate": 0.00015888768718480778, "loss": 1.4756, "step": 3345 }, { "epoch": 0.3701452958400088, "grad_norm": 0.2890625, "learning_rate": 0.00015873170873043413, "loss": 1.438, "step": 3350 }, { "epoch": 0.3706977515054417, "grad_norm": 0.302734375, "learning_rate": 0.00015857551183711137, "loss": 1.4668, "step": 3355 }, { "epoch": 0.37125020717087454, "grad_norm": 0.298828125, "learning_rate": 0.0001584190970857776, "loss": 1.4761, "step": 3360 }, { "epoch": 0.37180266283630736, "grad_norm": 0.318359375, "learning_rate": 0.00015826246505818112, "loss": 1.5129, "step": 3365 }, { "epoch": 0.37235511850174025, "grad_norm": 0.291015625, "learning_rate": 0.00015810561633687842, "loss": 1.4366, "step": 3370 }, { "epoch": 0.3729075741671731, "grad_norm": 0.310546875, "learning_rate": 0.00015794855150523182, "loss": 1.4382, "step": 3375 }, { "epoch": 0.37346002983260596, "grad_norm": 0.3125, "learning_rate": 0.00015779127114740757, "loss": 1.4559, "step": 3380 }, { "epoch": 0.3740124854980388, "grad_norm": 0.28125, "learning_rate": 0.00015763377584837335, "loss": 1.4635, "step": 3385 }, { "epoch": 0.3745649411634716, "grad_norm": 0.298828125, "learning_rate": 0.0001574760661938964, "loss": 1.3995, "step": 3390 }, { "epoch": 0.3751173968289045, "grad_norm": 0.322265625, "learning_rate": 0.00015731814277054112, "loss": 1.4577, "step": 3395 }, { "epoch": 0.3756698524943373, "grad_norm": 0.2890625, "learning_rate": 0.00015716000616566698, "loss": 1.5044, "step": 3400 }, { "epoch": 0.3762223081597702, "grad_norm": 0.34375, "learning_rate": 0.0001570016569674264, "loss": 1.4689, "step": 3405 }, { "epoch": 0.37677476382520303, "grad_norm": 0.296875, "learning_rate": 0.00015684309576476246, "loss": 1.4682, "step": 3410 }, { "epoch": 0.37732721949063586, "grad_norm": 0.29296875, "learning_rate": 0.00015668432314740663, "loss": 1.3978, "step": 3415 }, { "epoch": 0.37787967515606874, "grad_norm": 0.302734375, "learning_rate": 0.00015652533970587687, "loss": 1.5125, "step": 3420 }, { "epoch": 0.37843213082150157, "grad_norm": 0.30859375, "learning_rate": 0.00015636614603147512, "loss": 1.4144, "step": 3425 }, { "epoch": 0.37898458648693445, "grad_norm": 0.298828125, "learning_rate": 0.0001562067427162853, "loss": 1.4377, "step": 3430 }, { "epoch": 0.3795370421523673, "grad_norm": 0.2734375, "learning_rate": 0.00015604713035317097, "loss": 1.3831, "step": 3435 }, { "epoch": 0.3800894978178001, "grad_norm": 0.302734375, "learning_rate": 0.00015588730953577335, "loss": 1.4989, "step": 3440 }, { "epoch": 0.380641953483233, "grad_norm": 0.3046875, "learning_rate": 0.0001557272808585087, "loss": 1.5037, "step": 3445 }, { "epoch": 0.3811944091486658, "grad_norm": 0.310546875, "learning_rate": 0.00015556704491656665, "loss": 1.4662, "step": 3450 }, { "epoch": 0.38174686481409864, "grad_norm": 0.298828125, "learning_rate": 0.00015540660230590748, "loss": 1.3792, "step": 3455 }, { "epoch": 0.3822993204795315, "grad_norm": 0.30078125, "learning_rate": 0.00015524595362326025, "loss": 1.4334, "step": 3460 }, { "epoch": 0.38285177614496435, "grad_norm": 0.310546875, "learning_rate": 0.00015508509946612044, "loss": 1.4367, "step": 3465 }, { "epoch": 0.38340423181039723, "grad_norm": 0.294921875, "learning_rate": 0.0001549240404327477, "loss": 1.3873, "step": 3470 }, { "epoch": 0.38395668747583006, "grad_norm": 0.30078125, "learning_rate": 0.00015476277712216365, "loss": 1.5405, "step": 3475 }, { "epoch": 0.3845091431412629, "grad_norm": 0.314453125, "learning_rate": 0.00015460131013414979, "loss": 1.4719, "step": 3480 }, { "epoch": 0.38506159880669577, "grad_norm": 0.3203125, "learning_rate": 0.00015443964006924509, "loss": 1.4193, "step": 3485 }, { "epoch": 0.3856140544721286, "grad_norm": 0.294921875, "learning_rate": 0.00015427776752874371, "loss": 1.4507, "step": 3490 }, { "epoch": 0.3861665101375615, "grad_norm": 0.29296875, "learning_rate": 0.00015411569311469308, "loss": 1.4655, "step": 3495 }, { "epoch": 0.3867189658029943, "grad_norm": 0.302734375, "learning_rate": 0.00015395341742989124, "loss": 1.4137, "step": 3500 }, { "epoch": 0.38727142146842713, "grad_norm": 0.302734375, "learning_rate": 0.00015379094107788497, "loss": 1.4708, "step": 3505 }, { "epoch": 0.38782387713386, "grad_norm": 0.2890625, "learning_rate": 0.00015362826466296732, "loss": 1.385, "step": 3510 }, { "epoch": 0.38837633279929284, "grad_norm": 0.2890625, "learning_rate": 0.0001534653887901754, "loss": 1.4926, "step": 3515 }, { "epoch": 0.3889287884647257, "grad_norm": 0.287109375, "learning_rate": 0.0001533023140652882, "loss": 1.5047, "step": 3520 }, { "epoch": 0.38948124413015855, "grad_norm": 0.3125, "learning_rate": 0.00015313904109482432, "loss": 1.4278, "step": 3525 }, { "epoch": 0.3900336997955914, "grad_norm": 0.30859375, "learning_rate": 0.0001529755704860396, "loss": 1.4142, "step": 3530 }, { "epoch": 0.39058615546102426, "grad_norm": 0.294921875, "learning_rate": 0.000152811902846925, "loss": 1.4648, "step": 3535 }, { "epoch": 0.3911386111264571, "grad_norm": 0.3046875, "learning_rate": 0.0001526480387862043, "loss": 1.4416, "step": 3540 }, { "epoch": 0.39169106679189, "grad_norm": 0.318359375, "learning_rate": 0.00015248397891333185, "loss": 1.5161, "step": 3545 }, { "epoch": 0.3922435224573228, "grad_norm": 0.30859375, "learning_rate": 0.00015231972383849017, "loss": 1.4496, "step": 3550 }, { "epoch": 0.39279597812275563, "grad_norm": 0.310546875, "learning_rate": 0.00015215527417258794, "loss": 1.4295, "step": 3555 }, { "epoch": 0.3933484337881885, "grad_norm": 0.298828125, "learning_rate": 0.00015199063052725745, "loss": 1.4012, "step": 3560 }, { "epoch": 0.39390088945362134, "grad_norm": 0.28515625, "learning_rate": 0.00015182579351485248, "loss": 1.4115, "step": 3565 }, { "epoch": 0.3944533451190542, "grad_norm": 0.296875, "learning_rate": 0.00015166076374844605, "loss": 1.3849, "step": 3570 }, { "epoch": 0.39500580078448705, "grad_norm": 0.30078125, "learning_rate": 0.00015149554184182802, "loss": 1.5061, "step": 3575 }, { "epoch": 0.3955582564499199, "grad_norm": 0.3046875, "learning_rate": 0.00015133012840950292, "loss": 1.41, "step": 3580 }, { "epoch": 0.39611071211535276, "grad_norm": 0.283203125, "learning_rate": 0.00015116452406668758, "loss": 1.432, "step": 3585 }, { "epoch": 0.3966631677807856, "grad_norm": 0.294921875, "learning_rate": 0.00015099872942930887, "loss": 1.4382, "step": 3590 }, { "epoch": 0.3972156234462184, "grad_norm": 0.3125, "learning_rate": 0.00015083274511400142, "loss": 1.3529, "step": 3595 }, { "epoch": 0.3977680791116513, "grad_norm": 0.294921875, "learning_rate": 0.0001506665717381054, "loss": 1.4524, "step": 3600 }, { "epoch": 0.3983205347770841, "grad_norm": 0.328125, "learning_rate": 0.00015050020991966406, "loss": 1.4009, "step": 3605 }, { "epoch": 0.398872990442517, "grad_norm": 0.3203125, "learning_rate": 0.00015033366027742155, "loss": 1.414, "step": 3610 }, { "epoch": 0.39942544610794983, "grad_norm": 0.30078125, "learning_rate": 0.00015016692343082052, "loss": 1.4155, "step": 3615 }, { "epoch": 0.39997790177338266, "grad_norm": 0.3125, "learning_rate": 0.00015000000000000001, "loss": 1.3841, "step": 3620 }, { "epoch": 0.40053035743881554, "grad_norm": 0.31640625, "learning_rate": 0.00014983289060579294, "loss": 1.5023, "step": 3625 }, { "epoch": 0.40108281310424837, "grad_norm": 0.296875, "learning_rate": 0.00014966559586972387, "loss": 1.4502, "step": 3630 }, { "epoch": 0.40163526876968125, "grad_norm": 0.294921875, "learning_rate": 0.0001494981164140067, "loss": 1.5344, "step": 3635 }, { "epoch": 0.4021877244351141, "grad_norm": 0.32421875, "learning_rate": 0.0001493304528615424, "loss": 1.4407, "step": 3640 }, { "epoch": 0.4027401801005469, "grad_norm": 0.2890625, "learning_rate": 0.00014916260583591658, "loss": 1.4001, "step": 3645 }, { "epoch": 0.4032926357659798, "grad_norm": 0.28515625, "learning_rate": 0.00014899457596139729, "loss": 1.4573, "step": 3650 }, { "epoch": 0.4038450914314126, "grad_norm": 0.29296875, "learning_rate": 0.0001488263638629326, "loss": 1.4588, "step": 3655 }, { "epoch": 0.4043975470968455, "grad_norm": 0.318359375, "learning_rate": 0.00014865797016614838, "loss": 1.4679, "step": 3660 }, { "epoch": 0.4049500027622783, "grad_norm": 0.32421875, "learning_rate": 0.0001484893954973458, "loss": 1.4899, "step": 3665 }, { "epoch": 0.40550245842771115, "grad_norm": 0.310546875, "learning_rate": 0.00014832064048349926, "loss": 1.4963, "step": 3670 }, { "epoch": 0.40605491409314404, "grad_norm": 0.310546875, "learning_rate": 0.00014815170575225382, "loss": 1.4085, "step": 3675 }, { "epoch": 0.40660736975857686, "grad_norm": 0.310546875, "learning_rate": 0.00014798259193192297, "loss": 1.4317, "step": 3680 }, { "epoch": 0.40715982542400975, "grad_norm": 0.30078125, "learning_rate": 0.00014781329965148624, "loss": 1.4226, "step": 3685 }, { "epoch": 0.4077122810894426, "grad_norm": 0.322265625, "learning_rate": 0.000147643829540587, "loss": 1.4239, "step": 3690 }, { "epoch": 0.4082647367548754, "grad_norm": 0.310546875, "learning_rate": 0.00014747418222952995, "loss": 1.3731, "step": 3695 }, { "epoch": 0.4088171924203083, "grad_norm": 0.306640625, "learning_rate": 0.00014730435834927884, "loss": 1.3506, "step": 3700 }, { "epoch": 0.4093696480857411, "grad_norm": 0.30859375, "learning_rate": 0.0001471343585314542, "loss": 1.4202, "step": 3705 }, { "epoch": 0.409922103751174, "grad_norm": 0.306640625, "learning_rate": 0.0001469641834083308, "loss": 1.416, "step": 3710 }, { "epoch": 0.4104745594166068, "grad_norm": 0.298828125, "learning_rate": 0.00014679383361283554, "loss": 1.4201, "step": 3715 }, { "epoch": 0.41102701508203965, "grad_norm": 0.30078125, "learning_rate": 0.00014662330977854488, "loss": 1.481, "step": 3720 }, { "epoch": 0.41157947074747253, "grad_norm": 0.2890625, "learning_rate": 0.00014645261253968262, "loss": 1.4405, "step": 3725 }, { "epoch": 0.41213192641290536, "grad_norm": 0.30859375, "learning_rate": 0.00014628174253111752, "loss": 1.3565, "step": 3730 }, { "epoch": 0.41268438207833824, "grad_norm": 0.298828125, "learning_rate": 0.00014611070038836083, "loss": 1.4352, "step": 3735 }, { "epoch": 0.41323683774377107, "grad_norm": 0.29296875, "learning_rate": 0.00014593948674756417, "loss": 1.3886, "step": 3740 }, { "epoch": 0.4137892934092039, "grad_norm": 0.30078125, "learning_rate": 0.00014576810224551683, "loss": 1.4271, "step": 3745 }, { "epoch": 0.4143417490746368, "grad_norm": 0.322265625, "learning_rate": 0.00014559654751964364, "loss": 1.4662, "step": 3750 }, { "epoch": 0.4148942047400696, "grad_norm": 0.296875, "learning_rate": 0.00014542482320800264, "loss": 1.5126, "step": 3755 }, { "epoch": 0.41544666040550243, "grad_norm": 0.294921875, "learning_rate": 0.00014525292994928247, "loss": 1.4977, "step": 3760 }, { "epoch": 0.4159991160709353, "grad_norm": 0.298828125, "learning_rate": 0.00014508086838280017, "loss": 1.5123, "step": 3765 }, { "epoch": 0.41655157173636814, "grad_norm": 0.294921875, "learning_rate": 0.0001449086391484988, "loss": 1.4831, "step": 3770 }, { "epoch": 0.417104027401801, "grad_norm": 0.30859375, "learning_rate": 0.00014473624288694498, "loss": 1.4885, "step": 3775 }, { "epoch": 0.41765648306723385, "grad_norm": 0.318359375, "learning_rate": 0.00014456368023932657, "loss": 1.4083, "step": 3780 }, { "epoch": 0.4182089387326667, "grad_norm": 0.31640625, "learning_rate": 0.00014439095184745024, "loss": 1.4011, "step": 3785 }, { "epoch": 0.41876139439809956, "grad_norm": 0.294921875, "learning_rate": 0.00014421805835373915, "loss": 1.4728, "step": 3790 }, { "epoch": 0.4193138500635324, "grad_norm": 0.314453125, "learning_rate": 0.0001440450004012305, "loss": 1.5319, "step": 3795 }, { "epoch": 0.4198663057289653, "grad_norm": 0.30078125, "learning_rate": 0.00014387177863357307, "loss": 1.4318, "step": 3800 }, { "epoch": 0.4204187613943981, "grad_norm": 0.302734375, "learning_rate": 0.00014369839369502506, "loss": 1.4462, "step": 3805 }, { "epoch": 0.4209712170598309, "grad_norm": 0.31640625, "learning_rate": 0.00014352484623045148, "loss": 1.471, "step": 3810 }, { "epoch": 0.4215236727252638, "grad_norm": 0.306640625, "learning_rate": 0.00014335113688532182, "loss": 1.4585, "step": 3815 }, { "epoch": 0.42207612839069664, "grad_norm": 0.2734375, "learning_rate": 0.0001431772663057076, "loss": 1.4262, "step": 3820 }, { "epoch": 0.4226285840561295, "grad_norm": 0.318359375, "learning_rate": 0.00014300323513828008, "loss": 1.5063, "step": 3825 }, { "epoch": 0.42318103972156235, "grad_norm": 0.28515625, "learning_rate": 0.00014282904403030772, "loss": 1.4091, "step": 3830 }, { "epoch": 0.4237334953869952, "grad_norm": 0.275390625, "learning_rate": 0.000142654693629654, "loss": 1.4438, "step": 3835 }, { "epoch": 0.42428595105242806, "grad_norm": 0.3203125, "learning_rate": 0.00014248018458477463, "loss": 1.4291, "step": 3840 }, { "epoch": 0.4248384067178609, "grad_norm": 0.30078125, "learning_rate": 0.00014230551754471554, "loss": 1.4479, "step": 3845 }, { "epoch": 0.42539086238329377, "grad_norm": 0.3046875, "learning_rate": 0.00014213069315911013, "loss": 1.4685, "step": 3850 }, { "epoch": 0.4259433180487266, "grad_norm": 0.314453125, "learning_rate": 0.0001419557120781772, "loss": 1.4265, "step": 3855 }, { "epoch": 0.4264957737141594, "grad_norm": 0.296875, "learning_rate": 0.00014178057495271815, "loss": 1.4129, "step": 3860 }, { "epoch": 0.4270482293795923, "grad_norm": 0.298828125, "learning_rate": 0.00014160528243411494, "loss": 1.4851, "step": 3865 }, { "epoch": 0.42760068504502513, "grad_norm": 0.298828125, "learning_rate": 0.00014142983517432723, "loss": 1.3819, "step": 3870 }, { "epoch": 0.428153140710458, "grad_norm": 0.306640625, "learning_rate": 0.00014125423382589048, "loss": 1.4883, "step": 3875 }, { "epoch": 0.42870559637589084, "grad_norm": 0.30859375, "learning_rate": 0.0001410784790419131, "loss": 1.3911, "step": 3880 }, { "epoch": 0.42925805204132367, "grad_norm": 0.287109375, "learning_rate": 0.00014090257147607413, "loss": 1.4121, "step": 3885 }, { "epoch": 0.42981050770675655, "grad_norm": 0.27734375, "learning_rate": 0.00014072651178262096, "loss": 1.4759, "step": 3890 }, { "epoch": 0.4303629633721894, "grad_norm": 0.314453125, "learning_rate": 0.00014055030061636668, "loss": 1.42, "step": 3895 }, { "epoch": 0.4309154190376222, "grad_norm": 0.328125, "learning_rate": 0.00014037393863268783, "loss": 1.4047, "step": 3900 }, { "epoch": 0.4314678747030551, "grad_norm": 0.30859375, "learning_rate": 0.00014019742648752184, "loss": 1.4241, "step": 3905 }, { "epoch": 0.4320203303684879, "grad_norm": 0.330078125, "learning_rate": 0.0001400207648373646, "loss": 1.4569, "step": 3910 }, { "epoch": 0.4325727860339208, "grad_norm": 0.31640625, "learning_rate": 0.00013984395433926816, "loss": 1.4373, "step": 3915 }, { "epoch": 0.4331252416993536, "grad_norm": 0.30859375, "learning_rate": 0.00013966699565083802, "loss": 1.4522, "step": 3920 }, { "epoch": 0.43367769736478645, "grad_norm": 0.294921875, "learning_rate": 0.00013948988943023096, "loss": 1.461, "step": 3925 }, { "epoch": 0.43423015303021933, "grad_norm": 0.30078125, "learning_rate": 0.00013931263633615241, "loss": 1.528, "step": 3930 }, { "epoch": 0.43478260869565216, "grad_norm": 0.287109375, "learning_rate": 0.0001391352370278541, "loss": 1.4782, "step": 3935 }, { "epoch": 0.43533506436108504, "grad_norm": 0.30859375, "learning_rate": 0.00013895769216513157, "loss": 1.4816, "step": 3940 }, { "epoch": 0.43588752002651787, "grad_norm": 0.30078125, "learning_rate": 0.00013878000240832167, "loss": 1.3961, "step": 3945 }, { "epoch": 0.4364399756919507, "grad_norm": 0.291015625, "learning_rate": 0.00013860216841830018, "loss": 1.4405, "step": 3950 }, { "epoch": 0.4369924313573836, "grad_norm": 0.296875, "learning_rate": 0.00013842419085647933, "loss": 1.4432, "step": 3955 }, { "epoch": 0.4375448870228164, "grad_norm": 0.32421875, "learning_rate": 0.00013824607038480532, "loss": 1.411, "step": 3960 }, { "epoch": 0.4380973426882493, "grad_norm": 0.287109375, "learning_rate": 0.00013806780766575588, "loss": 1.349, "step": 3965 }, { "epoch": 0.4386497983536821, "grad_norm": 0.31640625, "learning_rate": 0.0001378894033623378, "loss": 1.5009, "step": 3970 }, { "epoch": 0.43920225401911495, "grad_norm": 0.32421875, "learning_rate": 0.00013771085813808442, "loss": 1.4634, "step": 3975 }, { "epoch": 0.43975470968454783, "grad_norm": 0.310546875, "learning_rate": 0.00013753217265705323, "loss": 1.4722, "step": 3980 }, { "epoch": 0.44030716534998066, "grad_norm": 0.28515625, "learning_rate": 0.0001373533475838234, "loss": 1.3979, "step": 3985 }, { "epoch": 0.44085962101541354, "grad_norm": 0.28515625, "learning_rate": 0.0001371743835834932, "loss": 1.4224, "step": 3990 }, { "epoch": 0.44141207668084637, "grad_norm": 0.29296875, "learning_rate": 0.00013699528132167776, "loss": 1.4472, "step": 3995 }, { "epoch": 0.4419645323462792, "grad_norm": 0.3125, "learning_rate": 0.00013681604146450625, "loss": 1.3897, "step": 4000 }, { "epoch": 0.4425169880117121, "grad_norm": 0.29296875, "learning_rate": 0.00013663666467861972, "loss": 1.4628, "step": 4005 }, { "epoch": 0.4430694436771449, "grad_norm": 0.294921875, "learning_rate": 0.00013645715163116846, "loss": 1.4286, "step": 4010 }, { "epoch": 0.4436218993425778, "grad_norm": 0.322265625, "learning_rate": 0.0001362775029898096, "loss": 1.4614, "step": 4015 }, { "epoch": 0.4441743550080106, "grad_norm": 0.318359375, "learning_rate": 0.00013609771942270444, "loss": 1.4062, "step": 4020 }, { "epoch": 0.44472681067344344, "grad_norm": 0.296875, "learning_rate": 0.0001359178015985163, "loss": 1.4872, "step": 4025 }, { "epoch": 0.4452792663388763, "grad_norm": 0.302734375, "learning_rate": 0.00013573775018640766, "loss": 1.4161, "step": 4030 }, { "epoch": 0.44583172200430915, "grad_norm": 0.302734375, "learning_rate": 0.00013555756585603793, "loss": 1.4594, "step": 4035 }, { "epoch": 0.446384177669742, "grad_norm": 0.310546875, "learning_rate": 0.00013537724927756094, "loss": 1.3884, "step": 4040 }, { "epoch": 0.44693663333517486, "grad_norm": 0.3046875, "learning_rate": 0.0001351968011216223, "loss": 1.4477, "step": 4045 }, { "epoch": 0.4474890890006077, "grad_norm": 0.3125, "learning_rate": 0.00013501622205935697, "loss": 1.5247, "step": 4050 }, { "epoch": 0.44804154466604057, "grad_norm": 0.306640625, "learning_rate": 0.0001348355127623869, "loss": 1.4113, "step": 4055 }, { "epoch": 0.4485940003314734, "grad_norm": 0.3203125, "learning_rate": 0.00013465467390281826, "loss": 1.4087, "step": 4060 }, { "epoch": 0.4491464559969062, "grad_norm": 0.34375, "learning_rate": 0.00013447370615323923, "loss": 1.3985, "step": 4065 }, { "epoch": 0.4496989116623391, "grad_norm": 0.3125, "learning_rate": 0.00013429261018671734, "loss": 1.4328, "step": 4070 }, { "epoch": 0.45025136732777193, "grad_norm": 0.3046875, "learning_rate": 0.00013411138667679696, "loss": 1.316, "step": 4075 }, { "epoch": 0.4508038229932048, "grad_norm": 0.314453125, "learning_rate": 0.00013393003629749684, "loss": 1.4402, "step": 4080 }, { "epoch": 0.45135627865863764, "grad_norm": 0.298828125, "learning_rate": 0.00013374855972330757, "loss": 1.4145, "step": 4085 }, { "epoch": 0.45190873432407047, "grad_norm": 0.29296875, "learning_rate": 0.00013356695762918914, "loss": 1.4794, "step": 4090 }, { "epoch": 0.45246118998950335, "grad_norm": 0.33203125, "learning_rate": 0.0001333852306905684, "loss": 1.4601, "step": 4095 }, { "epoch": 0.4530136456549362, "grad_norm": 0.30859375, "learning_rate": 0.0001332033795833364, "loss": 1.4962, "step": 4100 }, { "epoch": 0.45356610132036906, "grad_norm": 0.298828125, "learning_rate": 0.00013302140498384617, "loss": 1.4911, "step": 4105 }, { "epoch": 0.4541185569858019, "grad_norm": 0.287109375, "learning_rate": 0.0001328393075689099, "loss": 1.3954, "step": 4110 }, { "epoch": 0.4546710126512347, "grad_norm": 0.33203125, "learning_rate": 0.0001326570880157967, "loss": 1.3622, "step": 4115 }, { "epoch": 0.4552234683166676, "grad_norm": 0.302734375, "learning_rate": 0.0001324747470022298, "loss": 1.4581, "step": 4120 }, { "epoch": 0.45577592398210043, "grad_norm": 0.296875, "learning_rate": 0.00013229228520638436, "loss": 1.4947, "step": 4125 }, { "epoch": 0.4563283796475333, "grad_norm": 0.296875, "learning_rate": 0.00013210970330688454, "loss": 1.5485, "step": 4130 }, { "epoch": 0.45688083531296614, "grad_norm": 0.30078125, "learning_rate": 0.0001319270019828013, "loss": 1.4623, "step": 4135 }, { "epoch": 0.45743329097839897, "grad_norm": 0.318359375, "learning_rate": 0.00013174418191364988, "loss": 1.3469, "step": 4140 }, { "epoch": 0.45798574664383185, "grad_norm": 0.298828125, "learning_rate": 0.00013156124377938699, "loss": 1.4317, "step": 4145 }, { "epoch": 0.4585382023092647, "grad_norm": 0.361328125, "learning_rate": 0.00013137818826040854, "loss": 1.4984, "step": 4150 }, { "epoch": 0.45909065797469756, "grad_norm": 0.322265625, "learning_rate": 0.00013119501603754704, "loss": 1.4446, "step": 4155 }, { "epoch": 0.4596431136401304, "grad_norm": 0.3046875, "learning_rate": 0.000131011727792069, "loss": 1.3731, "step": 4160 }, { "epoch": 0.4601955693055632, "grad_norm": 0.294921875, "learning_rate": 0.00013082832420567247, "loss": 1.4508, "step": 4165 }, { "epoch": 0.4607480249709961, "grad_norm": 0.3046875, "learning_rate": 0.00013064480596048454, "loss": 1.4218, "step": 4170 }, { "epoch": 0.4613004806364289, "grad_norm": 0.30859375, "learning_rate": 0.00013046117373905866, "loss": 1.4415, "step": 4175 }, { "epoch": 0.46185293630186175, "grad_norm": 0.302734375, "learning_rate": 0.0001302774282243722, "loss": 1.3982, "step": 4180 }, { "epoch": 0.46240539196729463, "grad_norm": 0.3046875, "learning_rate": 0.00013009357009982397, "loss": 1.4166, "step": 4185 }, { "epoch": 0.46295784763272746, "grad_norm": 0.349609375, "learning_rate": 0.00012990960004923154, "loss": 1.4337, "step": 4190 }, { "epoch": 0.46351030329816034, "grad_norm": 0.29296875, "learning_rate": 0.0001297255187568288, "loss": 1.4031, "step": 4195 }, { "epoch": 0.46406275896359317, "grad_norm": 0.30078125, "learning_rate": 0.00012954132690726336, "loss": 1.4677, "step": 4200 }, { "epoch": 0.464615214629026, "grad_norm": 0.306640625, "learning_rate": 0.00012935702518559398, "loss": 1.4278, "step": 4205 }, { "epoch": 0.4651676702944589, "grad_norm": 0.30859375, "learning_rate": 0.00012917261427728815, "loss": 1.4421, "step": 4210 }, { "epoch": 0.4657201259598917, "grad_norm": 0.298828125, "learning_rate": 0.0001289880948682194, "loss": 1.4351, "step": 4215 }, { "epoch": 0.4662725816253246, "grad_norm": 0.30078125, "learning_rate": 0.0001288034676446648, "loss": 1.3918, "step": 4220 }, { "epoch": 0.4668250372907574, "grad_norm": 0.294921875, "learning_rate": 0.00012861873329330248, "loss": 1.3621, "step": 4225 }, { "epoch": 0.46737749295619024, "grad_norm": 0.3203125, "learning_rate": 0.00012843389250120885, "loss": 1.4724, "step": 4230 }, { "epoch": 0.4679299486216231, "grad_norm": 0.291015625, "learning_rate": 0.00012824894595585637, "loss": 1.434, "step": 4235 }, { "epoch": 0.46848240428705595, "grad_norm": 0.306640625, "learning_rate": 0.00012806389434511076, "loss": 1.4302, "step": 4240 }, { "epoch": 0.46903485995248884, "grad_norm": 0.298828125, "learning_rate": 0.0001278787383572285, "loss": 1.3645, "step": 4245 }, { "epoch": 0.46958731561792166, "grad_norm": 0.322265625, "learning_rate": 0.00012769347868085427, "loss": 1.3792, "step": 4250 }, { "epoch": 0.4701397712833545, "grad_norm": 0.30078125, "learning_rate": 0.00012750811600501842, "loss": 1.4845, "step": 4255 }, { "epoch": 0.4706922269487874, "grad_norm": 0.306640625, "learning_rate": 0.00012732265101913435, "loss": 1.3922, "step": 4260 }, { "epoch": 0.4712446826142202, "grad_norm": 0.294921875, "learning_rate": 0.000127137084412996, "loss": 1.3738, "step": 4265 }, { "epoch": 0.4717971382796531, "grad_norm": 0.291015625, "learning_rate": 0.00012695141687677527, "loss": 1.3914, "step": 4270 }, { "epoch": 0.4723495939450859, "grad_norm": 0.294921875, "learning_rate": 0.00012676564910101947, "loss": 1.4687, "step": 4275 }, { "epoch": 0.47290204961051874, "grad_norm": 0.29296875, "learning_rate": 0.0001265797817766486, "loss": 1.4647, "step": 4280 }, { "epoch": 0.4734545052759516, "grad_norm": 0.310546875, "learning_rate": 0.0001263938155949531, "loss": 1.3946, "step": 4285 }, { "epoch": 0.47400696094138445, "grad_norm": 0.29296875, "learning_rate": 0.00012620775124759092, "loss": 1.4388, "step": 4290 }, { "epoch": 0.47455941660681733, "grad_norm": 0.326171875, "learning_rate": 0.0001260215894265852, "loss": 1.4762, "step": 4295 }, { "epoch": 0.47511187227225016, "grad_norm": 0.3046875, "learning_rate": 0.0001258353308243217, "loss": 1.4246, "step": 4300 }, { "epoch": 0.475664327937683, "grad_norm": 0.3046875, "learning_rate": 0.00012564897613354586, "loss": 1.4346, "step": 4305 }, { "epoch": 0.47621678360311587, "grad_norm": 0.3046875, "learning_rate": 0.00012546252604736074, "loss": 1.3979, "step": 4310 }, { "epoch": 0.4767692392685487, "grad_norm": 0.26953125, "learning_rate": 0.00012527598125922413, "loss": 1.4107, "step": 4315 }, { "epoch": 0.4773216949339815, "grad_norm": 0.3359375, "learning_rate": 0.00012508934246294604, "loss": 1.4976, "step": 4320 }, { "epoch": 0.4778741505994144, "grad_norm": 0.294921875, "learning_rate": 0.00012490261035268612, "loss": 1.4439, "step": 4325 }, { "epoch": 0.47842660626484723, "grad_norm": 0.298828125, "learning_rate": 0.00012471578562295115, "loss": 1.4115, "step": 4330 }, { "epoch": 0.4789790619302801, "grad_norm": 0.30078125, "learning_rate": 0.0001245288689685922, "loss": 1.4565, "step": 4335 }, { "epoch": 0.47953151759571294, "grad_norm": 0.298828125, "learning_rate": 0.0001243418610848024, "loss": 1.3786, "step": 4340 }, { "epoch": 0.48008397326114577, "grad_norm": 0.2890625, "learning_rate": 0.00012415476266711413, "loss": 1.5102, "step": 4345 }, { "epoch": 0.48063642892657865, "grad_norm": 0.328125, "learning_rate": 0.00012396757441139654, "loss": 1.3849, "step": 4350 }, { "epoch": 0.4811888845920115, "grad_norm": 0.306640625, "learning_rate": 0.00012378029701385287, "loss": 1.4281, "step": 4355 }, { "epoch": 0.48174134025744436, "grad_norm": 0.298828125, "learning_rate": 0.00012359293117101782, "loss": 1.3882, "step": 4360 }, { "epoch": 0.4822937959228772, "grad_norm": 0.302734375, "learning_rate": 0.0001234054775797552, "loss": 1.4363, "step": 4365 }, { "epoch": 0.48284625158831, "grad_norm": 0.322265625, "learning_rate": 0.00012321793693725509, "loss": 1.4339, "step": 4370 }, { "epoch": 0.4833987072537429, "grad_norm": 0.306640625, "learning_rate": 0.00012303030994103133, "loss": 1.4822, "step": 4375 }, { "epoch": 0.4839511629191757, "grad_norm": 0.291015625, "learning_rate": 0.00012284259728891897, "loss": 1.445, "step": 4380 }, { "epoch": 0.4845036185846086, "grad_norm": 0.333984375, "learning_rate": 0.00012265479967907159, "loss": 1.4321, "step": 4385 }, { "epoch": 0.48505607425004144, "grad_norm": 0.294921875, "learning_rate": 0.00012246691780995881, "loss": 1.3703, "step": 4390 }, { "epoch": 0.48560852991547426, "grad_norm": 0.287109375, "learning_rate": 0.0001222789523803636, "loss": 1.3892, "step": 4395 }, { "epoch": 0.48616098558090715, "grad_norm": 0.291015625, "learning_rate": 0.00012209090408937971, "loss": 1.3678, "step": 4400 }, { "epoch": 0.48671344124634, "grad_norm": 0.306640625, "learning_rate": 0.00012190277363640907, "loss": 1.4581, "step": 4405 }, { "epoch": 0.48726589691177286, "grad_norm": 0.341796875, "learning_rate": 0.00012171456172115923, "loss": 1.493, "step": 4410 }, { "epoch": 0.4878183525772057, "grad_norm": 0.306640625, "learning_rate": 0.00012152626904364067, "loss": 1.3839, "step": 4415 }, { "epoch": 0.4883708082426385, "grad_norm": 0.310546875, "learning_rate": 0.00012133789630416425, "loss": 1.4604, "step": 4420 }, { "epoch": 0.4889232639080714, "grad_norm": 0.28515625, "learning_rate": 0.00012114944420333869, "loss": 1.4421, "step": 4425 }, { "epoch": 0.4894757195735042, "grad_norm": 0.291015625, "learning_rate": 0.00012096091344206777, "loss": 1.4186, "step": 4430 }, { "epoch": 0.4900281752389371, "grad_norm": 0.291015625, "learning_rate": 0.00012077230472154786, "loss": 1.5027, "step": 4435 }, { "epoch": 0.49058063090436993, "grad_norm": 0.306640625, "learning_rate": 0.00012058361874326526, "loss": 1.4147, "step": 4440 }, { "epoch": 0.49113308656980276, "grad_norm": 0.31640625, "learning_rate": 0.00012039485620899369, "loss": 1.3777, "step": 4445 }, { "epoch": 0.49168554223523564, "grad_norm": 0.2890625, "learning_rate": 0.00012020601782079155, "loss": 1.3413, "step": 4450 }, { "epoch": 0.49223799790066847, "grad_norm": 0.3125, "learning_rate": 0.00012001710428099935, "loss": 1.4145, "step": 4455 }, { "epoch": 0.4927904535661013, "grad_norm": 0.322265625, "learning_rate": 0.00011982811629223709, "loss": 1.4776, "step": 4460 }, { "epoch": 0.4933429092315342, "grad_norm": 0.298828125, "learning_rate": 0.00011963905455740177, "loss": 1.435, "step": 4465 }, { "epoch": 0.493895364896967, "grad_norm": 0.296875, "learning_rate": 0.00011944991977966452, "loss": 1.4146, "step": 4470 }, { "epoch": 0.4944478205623999, "grad_norm": 0.28125, "learning_rate": 0.00011926071266246826, "loss": 1.3926, "step": 4475 }, { "epoch": 0.4950002762278327, "grad_norm": 0.306640625, "learning_rate": 0.00011907143390952493, "loss": 1.3694, "step": 4480 }, { "epoch": 0.49555273189326554, "grad_norm": 0.30859375, "learning_rate": 0.00011888208422481287, "loss": 1.475, "step": 4485 }, { "epoch": 0.4961051875586984, "grad_norm": 0.306640625, "learning_rate": 0.00011869266431257422, "loss": 1.4105, "step": 4490 }, { "epoch": 0.49665764322413125, "grad_norm": 0.326171875, "learning_rate": 0.00011850317487731239, "loss": 1.3722, "step": 4495 }, { "epoch": 0.49721009888956413, "grad_norm": 0.298828125, "learning_rate": 0.00011831361662378932, "loss": 1.4877, "step": 4500 }, { "epoch": 0.49776255455499696, "grad_norm": 0.32421875, "learning_rate": 0.0001181239902570229, "loss": 1.4594, "step": 4505 }, { "epoch": 0.4983150102204298, "grad_norm": 0.310546875, "learning_rate": 0.00011793429648228436, "loss": 1.4159, "step": 4510 }, { "epoch": 0.49886746588586267, "grad_norm": 0.306640625, "learning_rate": 0.00011774453600509559, "loss": 1.4306, "step": 4515 }, { "epoch": 0.4994199215512955, "grad_norm": 0.302734375, "learning_rate": 0.00011755470953122667, "loss": 1.3958, "step": 4520 }, { "epoch": 0.4999723772167284, "grad_norm": 0.318359375, "learning_rate": 0.00011736481776669306, "loss": 1.5186, "step": 4525 }, { "epoch": 0.5005248328821612, "grad_norm": 0.31640625, "learning_rate": 0.00011717486141775305, "loss": 1.4557, "step": 4530 }, { "epoch": 0.501077288547594, "grad_norm": 0.3359375, "learning_rate": 0.00011698484119090518, "loss": 1.3748, "step": 4535 }, { "epoch": 0.5016297442130269, "grad_norm": 0.296875, "learning_rate": 0.00011679475779288555, "loss": 1.4228, "step": 4540 }, { "epoch": 0.5021821998784598, "grad_norm": 0.322265625, "learning_rate": 0.00011660461193066521, "loss": 1.3967, "step": 4545 }, { "epoch": 0.5027346555438926, "grad_norm": 0.302734375, "learning_rate": 0.0001164144043114475, "loss": 1.474, "step": 4550 }, { "epoch": 0.5032871112093255, "grad_norm": 0.294921875, "learning_rate": 0.00011622413564266555, "loss": 1.4455, "step": 4555 }, { "epoch": 0.5038395668747583, "grad_norm": 0.31640625, "learning_rate": 0.0001160338066319794, "loss": 1.3944, "step": 4560 }, { "epoch": 0.5043920225401911, "grad_norm": 0.32421875, "learning_rate": 0.00011584341798727365, "loss": 1.5009, "step": 4565 }, { "epoch": 0.504944478205624, "grad_norm": 0.306640625, "learning_rate": 0.0001156529704166546, "loss": 1.4863, "step": 4570 }, { "epoch": 0.5054969338710569, "grad_norm": 0.298828125, "learning_rate": 0.00011546246462844779, "loss": 1.3807, "step": 4575 }, { "epoch": 0.5060493895364897, "grad_norm": 0.296875, "learning_rate": 0.00011527190133119526, "loss": 1.3866, "step": 4580 }, { "epoch": 0.5066018452019225, "grad_norm": 0.30859375, "learning_rate": 0.0001150812812336529, "loss": 1.4143, "step": 4585 }, { "epoch": 0.5071543008673554, "grad_norm": 0.3203125, "learning_rate": 0.00011489060504478788, "loss": 1.4057, "step": 4590 }, { "epoch": 0.5077067565327883, "grad_norm": 0.30859375, "learning_rate": 0.00011469987347377602, "loss": 1.4907, "step": 4595 }, { "epoch": 0.5082592121982211, "grad_norm": 0.3125, "learning_rate": 0.00011450908722999909, "loss": 1.3286, "step": 4600 }, { "epoch": 0.508811667863654, "grad_norm": 0.3046875, "learning_rate": 0.00011431824702304221, "loss": 1.4404, "step": 4605 }, { "epoch": 0.5093641235290868, "grad_norm": 0.3046875, "learning_rate": 0.00011412735356269124, "loss": 1.4489, "step": 4610 }, { "epoch": 0.5099165791945196, "grad_norm": 0.326171875, "learning_rate": 0.00011393640755893002, "loss": 1.4586, "step": 4615 }, { "epoch": 0.5104690348599525, "grad_norm": 0.30859375, "learning_rate": 0.00011374540972193786, "loss": 1.4208, "step": 4620 }, { "epoch": 0.5110214905253854, "grad_norm": 0.294921875, "learning_rate": 0.00011355436076208687, "loss": 1.4104, "step": 4625 }, { "epoch": 0.5115739461908182, "grad_norm": 0.29296875, "learning_rate": 0.00011336326138993927, "loss": 1.4139, "step": 4630 }, { "epoch": 0.512126401856251, "grad_norm": 0.302734375, "learning_rate": 0.00011317211231624483, "loss": 1.3634, "step": 4635 }, { "epoch": 0.5126788575216839, "grad_norm": 0.29296875, "learning_rate": 0.00011298091425193806, "loss": 1.3531, "step": 4640 }, { "epoch": 0.5132313131871168, "grad_norm": 0.302734375, "learning_rate": 0.00011278966790813582, "loss": 1.3968, "step": 4645 }, { "epoch": 0.5137837688525496, "grad_norm": 0.306640625, "learning_rate": 0.0001125983739961344, "loss": 1.4707, "step": 4650 }, { "epoch": 0.5143362245179824, "grad_norm": 0.302734375, "learning_rate": 0.00011240703322740711, "loss": 1.5112, "step": 4655 }, { "epoch": 0.5148886801834153, "grad_norm": 0.294921875, "learning_rate": 0.00011221564631360152, "loss": 1.4681, "step": 4660 }, { "epoch": 0.5154411358488481, "grad_norm": 0.298828125, "learning_rate": 0.00011202421396653677, "loss": 1.4125, "step": 4665 }, { "epoch": 0.5159935915142809, "grad_norm": 0.302734375, "learning_rate": 0.000111832736898201, "loss": 1.3707, "step": 4670 }, { "epoch": 0.5165460471797139, "grad_norm": 0.314453125, "learning_rate": 0.00011164121582074873, "loss": 1.5334, "step": 4675 }, { "epoch": 0.5170985028451467, "grad_norm": 0.302734375, "learning_rate": 0.00011144965144649809, "loss": 1.4451, "step": 4680 }, { "epoch": 0.5176509585105795, "grad_norm": 0.326171875, "learning_rate": 0.00011125804448792831, "loss": 1.4046, "step": 4685 }, { "epoch": 0.5182034141760123, "grad_norm": 0.302734375, "learning_rate": 0.00011106639565767692, "loss": 1.4415, "step": 4690 }, { "epoch": 0.5187558698414452, "grad_norm": 0.3046875, "learning_rate": 0.00011087470566853726, "loss": 1.3762, "step": 4695 }, { "epoch": 0.5193083255068781, "grad_norm": 0.306640625, "learning_rate": 0.00011068297523345573, "loss": 1.4809, "step": 4700 }, { "epoch": 0.5198607811723109, "grad_norm": 0.30078125, "learning_rate": 0.00011049120506552913, "loss": 1.4173, "step": 4705 }, { "epoch": 0.5204132368377438, "grad_norm": 0.29296875, "learning_rate": 0.00011029939587800206, "loss": 1.383, "step": 4710 }, { "epoch": 0.5209656925031766, "grad_norm": 0.3046875, "learning_rate": 0.00011010754838426428, "loss": 1.5444, "step": 4715 }, { "epoch": 0.5215181481686094, "grad_norm": 0.296875, "learning_rate": 0.0001099156632978479, "loss": 1.4696, "step": 4720 }, { "epoch": 0.5220706038340424, "grad_norm": 0.326171875, "learning_rate": 0.00010972374133242502, "loss": 1.3972, "step": 4725 }, { "epoch": 0.5226230594994752, "grad_norm": 0.318359375, "learning_rate": 0.00010953178320180475, "loss": 1.3613, "step": 4730 }, { "epoch": 0.523175515164908, "grad_norm": 0.302734375, "learning_rate": 0.00010933978961993083, "loss": 1.4784, "step": 4735 }, { "epoch": 0.5237279708303408, "grad_norm": 0.298828125, "learning_rate": 0.00010914776130087873, "loss": 1.3857, "step": 4740 }, { "epoch": 0.5242804264957737, "grad_norm": 0.310546875, "learning_rate": 0.0001089556989588532, "loss": 1.4099, "step": 4745 }, { "epoch": 0.5248328821612066, "grad_norm": 0.294921875, "learning_rate": 0.00010876360330818553, "loss": 1.427, "step": 4750 }, { "epoch": 0.5253853378266394, "grad_norm": 0.330078125, "learning_rate": 0.00010857147506333088, "loss": 1.3537, "step": 4755 }, { "epoch": 0.5259377934920723, "grad_norm": 0.333984375, "learning_rate": 0.00010837931493886562, "loss": 1.4496, "step": 4760 }, { "epoch": 0.5264902491575051, "grad_norm": 0.31640625, "learning_rate": 0.0001081871236494847, "loss": 1.4078, "step": 4765 }, { "epoch": 0.5270427048229379, "grad_norm": 0.29296875, "learning_rate": 0.00010799490190999892, "loss": 1.4451, "step": 4770 }, { "epoch": 0.5275951604883709, "grad_norm": 0.310546875, "learning_rate": 0.0001078026504353325, "loss": 1.3917, "step": 4775 }, { "epoch": 0.5281476161538037, "grad_norm": 0.318359375, "learning_rate": 0.00010761036994052008, "loss": 1.4646, "step": 4780 }, { "epoch": 0.5287000718192365, "grad_norm": 0.302734375, "learning_rate": 0.00010741806114070434, "loss": 1.4463, "step": 4785 }, { "epoch": 0.5292525274846693, "grad_norm": 0.33203125, "learning_rate": 0.00010722572475113315, "loss": 1.4154, "step": 4790 }, { "epoch": 0.5298049831501022, "grad_norm": 0.298828125, "learning_rate": 0.00010703336148715705, "loss": 1.4923, "step": 4795 }, { "epoch": 0.5303574388155351, "grad_norm": 0.318359375, "learning_rate": 0.00010684097206422654, "loss": 1.4458, "step": 4800 }, { "epoch": 0.5309098944809679, "grad_norm": 0.302734375, "learning_rate": 0.00010664855719788935, "loss": 1.4214, "step": 4805 }, { "epoch": 0.5314623501464008, "grad_norm": 0.30859375, "learning_rate": 0.00010645611760378795, "loss": 1.461, "step": 4810 }, { "epoch": 0.5320148058118336, "grad_norm": 0.3203125, "learning_rate": 0.00010626365399765667, "loss": 1.4765, "step": 4815 }, { "epoch": 0.5325672614772664, "grad_norm": 0.296875, "learning_rate": 0.00010607116709531918, "loss": 1.4213, "step": 4820 }, { "epoch": 0.5331197171426993, "grad_norm": 0.291015625, "learning_rate": 0.00010587865761268583, "loss": 1.4838, "step": 4825 }, { "epoch": 0.5336721728081322, "grad_norm": 0.31640625, "learning_rate": 0.00010568612626575092, "loss": 1.441, "step": 4830 }, { "epoch": 0.534224628473565, "grad_norm": 0.32421875, "learning_rate": 0.00010549357377059006, "loss": 1.4197, "step": 4835 }, { "epoch": 0.5347770841389978, "grad_norm": 0.3203125, "learning_rate": 0.00010530100084335758, "loss": 1.3674, "step": 4840 }, { "epoch": 0.5353295398044307, "grad_norm": 0.318359375, "learning_rate": 0.0001051084082002837, "loss": 1.5552, "step": 4845 }, { "epoch": 0.5358819954698636, "grad_norm": 0.318359375, "learning_rate": 0.00010491579655767203, "loss": 1.4615, "step": 4850 }, { "epoch": 0.5364344511352964, "grad_norm": 0.314453125, "learning_rate": 0.00010472316663189683, "loss": 1.4436, "step": 4855 }, { "epoch": 0.5369869068007292, "grad_norm": 0.32421875, "learning_rate": 0.00010453051913940042, "loss": 1.363, "step": 4860 }, { "epoch": 0.5375393624661621, "grad_norm": 0.310546875, "learning_rate": 0.00010433785479669038, "loss": 1.4616, "step": 4865 }, { "epoch": 0.5380918181315949, "grad_norm": 0.302734375, "learning_rate": 0.00010414517432033695, "loss": 1.4034, "step": 4870 }, { "epoch": 0.5386442737970278, "grad_norm": 0.32421875, "learning_rate": 0.0001039524784269704, "loss": 1.3833, "step": 4875 }, { "epoch": 0.5391967294624607, "grad_norm": 0.326171875, "learning_rate": 0.00010375976783327841, "loss": 1.4014, "step": 4880 }, { "epoch": 0.5397491851278935, "grad_norm": 0.3046875, "learning_rate": 0.00010356704325600324, "loss": 1.4487, "step": 4885 }, { "epoch": 0.5403016407933263, "grad_norm": 0.306640625, "learning_rate": 0.00010337430541193918, "loss": 1.429, "step": 4890 }, { "epoch": 0.5408540964587591, "grad_norm": 0.296875, "learning_rate": 0.00010318155501792988, "loss": 1.3984, "step": 4895 }, { "epoch": 0.5414065521241921, "grad_norm": 0.5546875, "learning_rate": 0.00010298879279086568, "loss": 1.4471, "step": 4900 }, { "epoch": 0.5419590077896249, "grad_norm": 0.30078125, "learning_rate": 0.00010279601944768089, "loss": 1.393, "step": 4905 }, { "epoch": 0.5425114634550577, "grad_norm": 0.30859375, "learning_rate": 0.0001026032357053512, "loss": 1.4618, "step": 4910 }, { "epoch": 0.5430639191204906, "grad_norm": 0.322265625, "learning_rate": 0.00010241044228089096, "loss": 1.3546, "step": 4915 }, { "epoch": 0.5436163747859234, "grad_norm": 0.322265625, "learning_rate": 0.00010221763989135052, "loss": 1.4693, "step": 4920 }, { "epoch": 0.5441688304513563, "grad_norm": 0.3046875, "learning_rate": 0.00010202482925381358, "loss": 1.3441, "step": 4925 }, { "epoch": 0.5447212861167892, "grad_norm": 0.3046875, "learning_rate": 0.00010183201108539453, "loss": 1.4544, "step": 4930 }, { "epoch": 0.545273741782222, "grad_norm": 0.3125, "learning_rate": 0.00010163918610323579, "loss": 1.4556, "step": 4935 }, { "epoch": 0.5458261974476548, "grad_norm": 0.3125, "learning_rate": 0.00010144635502450508, "loss": 1.4041, "step": 4940 }, { "epoch": 0.5463786531130876, "grad_norm": 0.3125, "learning_rate": 0.00010125351856639278, "loss": 1.4069, "step": 4945 }, { "epoch": 0.5469311087785205, "grad_norm": 0.3359375, "learning_rate": 0.00010106067744610933, "loss": 1.5252, "step": 4950 }, { "epoch": 0.5474835644439534, "grad_norm": 0.283203125, "learning_rate": 0.00010086783238088244, "loss": 1.4417, "step": 4955 }, { "epoch": 0.5480360201093862, "grad_norm": 0.322265625, "learning_rate": 0.00010067498408795462, "loss": 1.4612, "step": 4960 }, { "epoch": 0.5485884757748191, "grad_norm": 0.314453125, "learning_rate": 0.00010048213328458028, "loss": 1.4275, "step": 4965 }, { "epoch": 0.5491409314402519, "grad_norm": 0.310546875, "learning_rate": 0.00010028928068802315, "loss": 1.3232, "step": 4970 }, { "epoch": 0.5496933871056847, "grad_norm": 0.3046875, "learning_rate": 0.00010009642701555368, "loss": 1.4001, "step": 4975 }, { "epoch": 0.5502458427711177, "grad_norm": 0.296875, "learning_rate": 9.990357298444632e-05, "loss": 1.3206, "step": 4980 }, { "epoch": 0.5507982984365505, "grad_norm": 0.31640625, "learning_rate": 9.971071931197685e-05, "loss": 1.4543, "step": 4985 }, { "epoch": 0.5513507541019833, "grad_norm": 0.31640625, "learning_rate": 9.951786671541973e-05, "loss": 1.3529, "step": 4990 }, { "epoch": 0.5519032097674161, "grad_norm": 0.33203125, "learning_rate": 9.932501591204536e-05, "loss": 1.4447, "step": 4995 }, { "epoch": 0.552455665432849, "grad_norm": 0.3125, "learning_rate": 9.913216761911755e-05, "loss": 1.4555, "step": 5000 }, { "epoch": 0.5530081210982819, "grad_norm": 0.294921875, "learning_rate": 9.893932255389068e-05, "loss": 1.4076, "step": 5005 }, { "epoch": 0.5535605767637147, "grad_norm": 0.32421875, "learning_rate": 9.874648143360723e-05, "loss": 1.4206, "step": 5010 }, { "epoch": 0.5541130324291476, "grad_norm": 0.314453125, "learning_rate": 9.855364497549496e-05, "loss": 1.4416, "step": 5015 }, { "epoch": 0.5546654880945804, "grad_norm": 0.30078125, "learning_rate": 9.836081389676422e-05, "loss": 1.4954, "step": 5020 }, { "epoch": 0.5552179437600132, "grad_norm": 0.3125, "learning_rate": 9.816798891460546e-05, "loss": 1.4446, "step": 5025 }, { "epoch": 0.5557703994254461, "grad_norm": 0.296875, "learning_rate": 9.797517074618642e-05, "loss": 1.3804, "step": 5030 }, { "epoch": 0.556322855090879, "grad_norm": 0.3046875, "learning_rate": 9.778236010864949e-05, "loss": 1.3723, "step": 5035 }, { "epoch": 0.5568753107563118, "grad_norm": 0.32421875, "learning_rate": 9.758955771910906e-05, "loss": 1.3652, "step": 5040 }, { "epoch": 0.5574277664217446, "grad_norm": 0.31640625, "learning_rate": 9.739676429464881e-05, "loss": 1.388, "step": 5045 }, { "epoch": 0.5579802220871775, "grad_norm": 0.318359375, "learning_rate": 9.720398055231911e-05, "loss": 1.4303, "step": 5050 }, { "epoch": 0.5585326777526104, "grad_norm": 0.330078125, "learning_rate": 9.701120720913433e-05, "loss": 1.4268, "step": 5055 }, { "epoch": 0.5590851334180432, "grad_norm": 0.30859375, "learning_rate": 9.68184449820701e-05, "loss": 1.3738, "step": 5060 }, { "epoch": 0.559637589083476, "grad_norm": 0.328125, "learning_rate": 9.662569458806085e-05, "loss": 1.3449, "step": 5065 }, { "epoch": 0.5601900447489089, "grad_norm": 0.314453125, "learning_rate": 9.64329567439968e-05, "loss": 1.4515, "step": 5070 }, { "epoch": 0.5607425004143417, "grad_norm": 0.306640625, "learning_rate": 9.624023216672161e-05, "loss": 1.3739, "step": 5075 }, { "epoch": 0.5612949560797746, "grad_norm": 0.30859375, "learning_rate": 9.604752157302961e-05, "loss": 1.4705, "step": 5080 }, { "epoch": 0.5618474117452075, "grad_norm": 0.328125, "learning_rate": 9.585482567966309e-05, "loss": 1.447, "step": 5085 }, { "epoch": 0.5623998674106403, "grad_norm": 0.310546875, "learning_rate": 9.566214520330966e-05, "loss": 1.4011, "step": 5090 }, { "epoch": 0.5629523230760731, "grad_norm": 0.314453125, "learning_rate": 9.54694808605996e-05, "loss": 1.3941, "step": 5095 }, { "epoch": 0.563504778741506, "grad_norm": 0.3203125, "learning_rate": 9.527683336810318e-05, "loss": 1.3446, "step": 5100 }, { "epoch": 0.5640572344069389, "grad_norm": 0.31640625, "learning_rate": 9.5084203442328e-05, "loss": 1.4566, "step": 5105 }, { "epoch": 0.5646096900723717, "grad_norm": 0.296875, "learning_rate": 9.489159179971632e-05, "loss": 1.3497, "step": 5110 }, { "epoch": 0.5651621457378045, "grad_norm": 0.302734375, "learning_rate": 9.469899915664244e-05, "loss": 1.4315, "step": 5115 }, { "epoch": 0.5657146014032374, "grad_norm": 0.302734375, "learning_rate": 9.450642622940995e-05, "loss": 1.4209, "step": 5120 }, { "epoch": 0.5662670570686702, "grad_norm": 0.314453125, "learning_rate": 9.43138737342491e-05, "loss": 1.3847, "step": 5125 }, { "epoch": 0.5668195127341031, "grad_norm": 0.345703125, "learning_rate": 9.412134238731418e-05, "loss": 1.557, "step": 5130 }, { "epoch": 0.567371968399536, "grad_norm": 0.3046875, "learning_rate": 9.392883290468083e-05, "loss": 1.4852, "step": 5135 }, { "epoch": 0.5679244240649688, "grad_norm": 0.326171875, "learning_rate": 9.373634600234334e-05, "loss": 1.4824, "step": 5140 }, { "epoch": 0.5684768797304016, "grad_norm": 0.326171875, "learning_rate": 9.354388239621208e-05, "loss": 1.4543, "step": 5145 }, { "epoch": 0.5690293353958344, "grad_norm": 0.31640625, "learning_rate": 9.335144280211066e-05, "loss": 1.43, "step": 5150 }, { "epoch": 0.5695817910612674, "grad_norm": 0.310546875, "learning_rate": 9.31590279357735e-05, "loss": 1.4072, "step": 5155 }, { "epoch": 0.5701342467267002, "grad_norm": 0.330078125, "learning_rate": 9.296663851284297e-05, "loss": 1.4969, "step": 5160 }, { "epoch": 0.570686702392133, "grad_norm": 0.310546875, "learning_rate": 9.277427524886689e-05, "loss": 1.4579, "step": 5165 }, { "epoch": 0.5712391580575659, "grad_norm": 0.3046875, "learning_rate": 9.258193885929569e-05, "loss": 1.4423, "step": 5170 }, { "epoch": 0.5717916137229987, "grad_norm": 0.330078125, "learning_rate": 9.238963005947993e-05, "loss": 1.4415, "step": 5175 }, { "epoch": 0.5723440693884316, "grad_norm": 0.30078125, "learning_rate": 9.219734956466752e-05, "loss": 1.406, "step": 5180 }, { "epoch": 0.5728965250538645, "grad_norm": 0.298828125, "learning_rate": 9.200509809000108e-05, "loss": 1.451, "step": 5185 }, { "epoch": 0.5734489807192973, "grad_norm": 0.291015625, "learning_rate": 9.181287635051534e-05, "loss": 1.4085, "step": 5190 }, { "epoch": 0.5740014363847301, "grad_norm": 0.298828125, "learning_rate": 9.16206850611344e-05, "loss": 1.5098, "step": 5195 }, { "epoch": 0.5745538920501629, "grad_norm": 0.310546875, "learning_rate": 9.142852493666914e-05, "loss": 1.4526, "step": 5200 }, { "epoch": 0.5751063477155959, "grad_norm": 0.31640625, "learning_rate": 9.123639669181448e-05, "loss": 1.3801, "step": 5205 }, { "epoch": 0.5756588033810287, "grad_norm": 0.31640625, "learning_rate": 9.104430104114681e-05, "loss": 1.3975, "step": 5210 }, { "epoch": 0.5762112590464615, "grad_norm": 0.30859375, "learning_rate": 9.085223869912129e-05, "loss": 1.3678, "step": 5215 }, { "epoch": 0.5767637147118944, "grad_norm": 0.3359375, "learning_rate": 9.066021038006919e-05, "loss": 1.496, "step": 5220 }, { "epoch": 0.5773161703773272, "grad_norm": 0.330078125, "learning_rate": 9.046821679819527e-05, "loss": 1.435, "step": 5225 }, { "epoch": 0.57786862604276, "grad_norm": 0.310546875, "learning_rate": 9.0276258667575e-05, "loss": 1.3839, "step": 5230 }, { "epoch": 0.578421081708193, "grad_norm": 0.302734375, "learning_rate": 9.008433670215212e-05, "loss": 1.3672, "step": 5235 }, { "epoch": 0.5789735373736258, "grad_norm": 0.3359375, "learning_rate": 8.989245161573576e-05, "loss": 1.4355, "step": 5240 }, { "epoch": 0.5795259930390586, "grad_norm": 0.296875, "learning_rate": 8.970060412199795e-05, "loss": 1.3957, "step": 5245 }, { "epoch": 0.5800784487044914, "grad_norm": 0.32421875, "learning_rate": 8.950879493447091e-05, "loss": 1.4862, "step": 5250 }, { "epoch": 0.5806309043699243, "grad_norm": 0.306640625, "learning_rate": 8.931702476654431e-05, "loss": 1.4036, "step": 5255 }, { "epoch": 0.5811833600353572, "grad_norm": 0.310546875, "learning_rate": 8.912529433146278e-05, "loss": 1.3881, "step": 5260 }, { "epoch": 0.58173581570079, "grad_norm": 0.337890625, "learning_rate": 8.893360434232312e-05, "loss": 1.4404, "step": 5265 }, { "epoch": 0.5822882713662229, "grad_norm": 0.318359375, "learning_rate": 8.874195551207174e-05, "loss": 1.407, "step": 5270 }, { "epoch": 0.5828407270316557, "grad_norm": 0.30078125, "learning_rate": 8.855034855350194e-05, "loss": 1.451, "step": 5275 }, { "epoch": 0.5833931826970885, "grad_norm": 0.306640625, "learning_rate": 8.835878417925132e-05, "loss": 1.3569, "step": 5280 }, { "epoch": 0.5839456383625214, "grad_norm": 0.3203125, "learning_rate": 8.816726310179904e-05, "loss": 1.4479, "step": 5285 }, { "epoch": 0.5844980940279543, "grad_norm": 0.3125, "learning_rate": 8.797578603346328e-05, "loss": 1.3634, "step": 5290 }, { "epoch": 0.5850505496933871, "grad_norm": 0.3203125, "learning_rate": 8.778435368639851e-05, "loss": 1.3534, "step": 5295 }, { "epoch": 0.5856030053588199, "grad_norm": 0.333984375, "learning_rate": 8.759296677259291e-05, "loss": 1.481, "step": 5300 }, { "epoch": 0.5861554610242528, "grad_norm": 0.322265625, "learning_rate": 8.740162600386565e-05, "loss": 1.4225, "step": 5305 }, { "epoch": 0.5867079166896857, "grad_norm": 0.333984375, "learning_rate": 8.721033209186425e-05, "loss": 1.4631, "step": 5310 }, { "epoch": 0.5872603723551185, "grad_norm": 0.310546875, "learning_rate": 8.701908574806197e-05, "loss": 1.4146, "step": 5315 }, { "epoch": 0.5878128280205513, "grad_norm": 0.326171875, "learning_rate": 8.682788768375521e-05, "loss": 1.4269, "step": 5320 }, { "epoch": 0.5883652836859842, "grad_norm": 0.302734375, "learning_rate": 8.663673861006074e-05, "loss": 1.3666, "step": 5325 }, { "epoch": 0.588917739351417, "grad_norm": 0.3359375, "learning_rate": 8.644563923791318e-05, "loss": 1.3195, "step": 5330 }, { "epoch": 0.5894701950168499, "grad_norm": 0.31640625, "learning_rate": 8.625459027806214e-05, "loss": 1.3606, "step": 5335 }, { "epoch": 0.5900226506822828, "grad_norm": 0.30859375, "learning_rate": 8.606359244106999e-05, "loss": 1.4826, "step": 5340 }, { "epoch": 0.5905751063477156, "grad_norm": 0.30078125, "learning_rate": 8.587264643730877e-05, "loss": 1.4822, "step": 5345 }, { "epoch": 0.5911275620131484, "grad_norm": 0.326171875, "learning_rate": 8.568175297695777e-05, "loss": 1.3564, "step": 5350 }, { "epoch": 0.5916800176785812, "grad_norm": 0.326171875, "learning_rate": 8.549091277000091e-05, "loss": 1.4051, "step": 5355 }, { "epoch": 0.5922324733440142, "grad_norm": 0.306640625, "learning_rate": 8.530012652622397e-05, "loss": 1.4311, "step": 5360 }, { "epoch": 0.592784929009447, "grad_norm": 0.318359375, "learning_rate": 8.510939495521213e-05, "loss": 1.387, "step": 5365 }, { "epoch": 0.5933373846748798, "grad_norm": 0.310546875, "learning_rate": 8.491871876634712e-05, "loss": 1.4441, "step": 5370 }, { "epoch": 0.5938898403403127, "grad_norm": 0.306640625, "learning_rate": 8.472809866880475e-05, "loss": 1.4135, "step": 5375 }, { "epoch": 0.5944422960057455, "grad_norm": 0.322265625, "learning_rate": 8.45375353715522e-05, "loss": 1.3886, "step": 5380 }, { "epoch": 0.5949947516711784, "grad_norm": 0.341796875, "learning_rate": 8.434702958334539e-05, "loss": 1.4528, "step": 5385 }, { "epoch": 0.5955472073366113, "grad_norm": 0.302734375, "learning_rate": 8.415658201272636e-05, "loss": 1.3982, "step": 5390 }, { "epoch": 0.5960996630020441, "grad_norm": 0.326171875, "learning_rate": 8.39661933680206e-05, "loss": 1.4269, "step": 5395 }, { "epoch": 0.5966521186674769, "grad_norm": 0.32421875, "learning_rate": 8.377586435733446e-05, "loss": 1.3757, "step": 5400 }, { "epoch": 0.5972045743329097, "grad_norm": 0.318359375, "learning_rate": 8.358559568855249e-05, "loss": 1.4688, "step": 5405 }, { "epoch": 0.5977570299983427, "grad_norm": 0.318359375, "learning_rate": 8.33953880693348e-05, "loss": 1.4721, "step": 5410 }, { "epoch": 0.5983094856637755, "grad_norm": 0.3046875, "learning_rate": 8.320524220711446e-05, "loss": 1.4231, "step": 5415 }, { "epoch": 0.5988619413292083, "grad_norm": 0.337890625, "learning_rate": 8.301515880909481e-05, "loss": 1.4276, "step": 5420 }, { "epoch": 0.5994143969946412, "grad_norm": 0.28515625, "learning_rate": 8.282513858224698e-05, "loss": 1.3423, "step": 5425 }, { "epoch": 0.599966852660074, "grad_norm": 0.318359375, "learning_rate": 8.263518223330697e-05, "loss": 1.4341, "step": 5430 }, { "epoch": 0.6005193083255069, "grad_norm": 0.32421875, "learning_rate": 8.244529046877336e-05, "loss": 1.417, "step": 5435 }, { "epoch": 0.6010717639909398, "grad_norm": 0.38671875, "learning_rate": 8.225546399490442e-05, "loss": 1.4885, "step": 5440 }, { "epoch": 0.6016242196563726, "grad_norm": 0.326171875, "learning_rate": 8.206570351771568e-05, "loss": 1.4098, "step": 5445 }, { "epoch": 0.6021766753218054, "grad_norm": 0.328125, "learning_rate": 8.187600974297714e-05, "loss": 1.3801, "step": 5450 }, { "epoch": 0.6027291309872382, "grad_norm": 0.3125, "learning_rate": 8.16863833762107e-05, "loss": 1.5099, "step": 5455 }, { "epoch": 0.6032815866526712, "grad_norm": 0.296875, "learning_rate": 8.149682512268763e-05, "loss": 1.3502, "step": 5460 }, { "epoch": 0.603834042318104, "grad_norm": 0.302734375, "learning_rate": 8.130733568742579e-05, "loss": 1.4121, "step": 5465 }, { "epoch": 0.6043864979835368, "grad_norm": 0.353515625, "learning_rate": 8.111791577518716e-05, "loss": 1.4172, "step": 5470 }, { "epoch": 0.6049389536489697, "grad_norm": 0.322265625, "learning_rate": 8.092856609047508e-05, "loss": 1.3658, "step": 5475 }, { "epoch": 0.6054914093144025, "grad_norm": 0.310546875, "learning_rate": 8.073928733753175e-05, "loss": 1.4617, "step": 5480 }, { "epoch": 0.6060438649798354, "grad_norm": 0.30859375, "learning_rate": 8.055008022033551e-05, "loss": 1.4063, "step": 5485 }, { "epoch": 0.6065963206452682, "grad_norm": 0.3046875, "learning_rate": 8.036094544259827e-05, "loss": 1.4611, "step": 5490 }, { "epoch": 0.6071487763107011, "grad_norm": 0.337890625, "learning_rate": 8.017188370776292e-05, "loss": 1.4173, "step": 5495 }, { "epoch": 0.6077012319761339, "grad_norm": 0.314453125, "learning_rate": 7.998289571900067e-05, "loss": 1.3557, "step": 5500 }, { "epoch": 0.6082536876415667, "grad_norm": 0.3046875, "learning_rate": 7.979398217920849e-05, "loss": 1.4499, "step": 5505 }, { "epoch": 0.6088061433069996, "grad_norm": 0.306640625, "learning_rate": 7.960514379100632e-05, "loss": 1.4443, "step": 5510 }, { "epoch": 0.6093585989724325, "grad_norm": 0.30078125, "learning_rate": 7.941638125673475e-05, "loss": 1.3823, "step": 5515 }, { "epoch": 0.6099110546378653, "grad_norm": 0.30078125, "learning_rate": 7.922769527845217e-05, "loss": 1.5232, "step": 5520 }, { "epoch": 0.6104635103032982, "grad_norm": 0.2890625, "learning_rate": 7.903908655793224e-05, "loss": 1.3992, "step": 5525 }, { "epoch": 0.611015965968731, "grad_norm": 0.3125, "learning_rate": 7.885055579666133e-05, "loss": 1.3597, "step": 5530 }, { "epoch": 0.6115684216341638, "grad_norm": 0.31640625, "learning_rate": 7.866210369583576e-05, "loss": 1.4361, "step": 5535 }, { "epoch": 0.6121208772995967, "grad_norm": 0.322265625, "learning_rate": 7.847373095635937e-05, "loss": 1.3935, "step": 5540 }, { "epoch": 0.6126733329650296, "grad_norm": 0.31640625, "learning_rate": 7.82854382788408e-05, "loss": 1.4327, "step": 5545 }, { "epoch": 0.6132257886304624, "grad_norm": 0.3203125, "learning_rate": 7.809722636359095e-05, "loss": 1.4703, "step": 5550 }, { "epoch": 0.6137782442958952, "grad_norm": 0.29296875, "learning_rate": 7.790909591062032e-05, "loss": 1.4328, "step": 5555 }, { "epoch": 0.614330699961328, "grad_norm": 0.310546875, "learning_rate": 7.772104761963645e-05, "loss": 1.4323, "step": 5560 }, { "epoch": 0.614883155626761, "grad_norm": 0.296875, "learning_rate": 7.753308219004122e-05, "loss": 1.4705, "step": 5565 }, { "epoch": 0.6154356112921938, "grad_norm": 0.30859375, "learning_rate": 7.734520032092845e-05, "loss": 1.5011, "step": 5570 }, { "epoch": 0.6159880669576266, "grad_norm": 0.30859375, "learning_rate": 7.715740271108107e-05, "loss": 1.4137, "step": 5575 }, { "epoch": 0.6165405226230595, "grad_norm": 0.30859375, "learning_rate": 7.69696900589687e-05, "loss": 1.4004, "step": 5580 }, { "epoch": 0.6170929782884923, "grad_norm": 0.296875, "learning_rate": 7.678206306274495e-05, "loss": 1.533, "step": 5585 }, { "epoch": 0.6176454339539252, "grad_norm": 0.30859375, "learning_rate": 7.659452242024482e-05, "loss": 1.3519, "step": 5590 }, { "epoch": 0.6181978896193581, "grad_norm": 0.345703125, "learning_rate": 7.64070688289822e-05, "loss": 1.3747, "step": 5595 }, { "epoch": 0.6187503452847909, "grad_norm": 0.3046875, "learning_rate": 7.621970298614717e-05, "loss": 1.5023, "step": 5600 }, { "epoch": 0.6193028009502237, "grad_norm": 0.298828125, "learning_rate": 7.603242558860347e-05, "loss": 1.4198, "step": 5605 }, { "epoch": 0.6198552566156565, "grad_norm": 0.306640625, "learning_rate": 7.584523733288589e-05, "loss": 1.4645, "step": 5610 }, { "epoch": 0.6204077122810895, "grad_norm": 0.330078125, "learning_rate": 7.565813891519765e-05, "loss": 1.4773, "step": 5615 }, { "epoch": 0.6209601679465223, "grad_norm": 0.322265625, "learning_rate": 7.547113103140786e-05, "loss": 1.4707, "step": 5620 }, { "epoch": 0.6215126236119551, "grad_norm": 0.3125, "learning_rate": 7.528421437704892e-05, "loss": 1.4016, "step": 5625 }, { "epoch": 0.622065079277388, "grad_norm": 0.318359375, "learning_rate": 7.509738964731389e-05, "loss": 1.3629, "step": 5630 }, { "epoch": 0.6226175349428208, "grad_norm": 0.30859375, "learning_rate": 7.491065753705399e-05, "loss": 1.4072, "step": 5635 }, { "epoch": 0.6231699906082537, "grad_norm": 0.30859375, "learning_rate": 7.472401874077592e-05, "loss": 1.4738, "step": 5640 }, { "epoch": 0.6237224462736866, "grad_norm": 0.30859375, "learning_rate": 7.453747395263931e-05, "loss": 1.3884, "step": 5645 }, { "epoch": 0.6242749019391194, "grad_norm": 0.302734375, "learning_rate": 7.43510238664542e-05, "loss": 1.387, "step": 5650 }, { "epoch": 0.6248273576045522, "grad_norm": 0.318359375, "learning_rate": 7.416466917567837e-05, "loss": 1.456, "step": 5655 }, { "epoch": 0.625379813269985, "grad_norm": 0.330078125, "learning_rate": 7.397841057341479e-05, "loss": 1.3837, "step": 5660 }, { "epoch": 0.625932268935418, "grad_norm": 0.318359375, "learning_rate": 7.379224875240911e-05, "loss": 1.4575, "step": 5665 }, { "epoch": 0.6264847246008508, "grad_norm": 0.326171875, "learning_rate": 7.360618440504694e-05, "loss": 1.4044, "step": 5670 }, { "epoch": 0.6270371802662836, "grad_norm": 0.3125, "learning_rate": 7.342021822335143e-05, "loss": 1.3745, "step": 5675 }, { "epoch": 0.6275896359317165, "grad_norm": 0.30859375, "learning_rate": 7.323435089898059e-05, "loss": 1.5103, "step": 5680 }, { "epoch": 0.6281420915971493, "grad_norm": 0.294921875, "learning_rate": 7.304858312322475e-05, "loss": 1.3561, "step": 5685 }, { "epoch": 0.6286945472625822, "grad_norm": 0.314453125, "learning_rate": 7.2862915587004e-05, "loss": 1.3271, "step": 5690 }, { "epoch": 0.629247002928015, "grad_norm": 0.3046875, "learning_rate": 7.267734898086564e-05, "loss": 1.4493, "step": 5695 }, { "epoch": 0.6297994585934479, "grad_norm": 0.30078125, "learning_rate": 7.249188399498158e-05, "loss": 1.4204, "step": 5700 }, { "epoch": 0.6303519142588807, "grad_norm": 0.333984375, "learning_rate": 7.230652131914574e-05, "loss": 1.3962, "step": 5705 }, { "epoch": 0.6309043699243135, "grad_norm": 0.306640625, "learning_rate": 7.21212616427715e-05, "loss": 1.4979, "step": 5710 }, { "epoch": 0.6314568255897465, "grad_norm": 0.31640625, "learning_rate": 7.193610565488924e-05, "loss": 1.454, "step": 5715 }, { "epoch": 0.6320092812551793, "grad_norm": 0.30859375, "learning_rate": 7.175105404414362e-05, "loss": 1.4328, "step": 5720 }, { "epoch": 0.6325617369206121, "grad_norm": 0.33203125, "learning_rate": 7.156610749879116e-05, "loss": 1.4053, "step": 5725 }, { "epoch": 0.633114192586045, "grad_norm": 0.306640625, "learning_rate": 7.138126670669755e-05, "loss": 1.3886, "step": 5730 }, { "epoch": 0.6336666482514778, "grad_norm": 0.3203125, "learning_rate": 7.11965323553352e-05, "loss": 1.4102, "step": 5735 }, { "epoch": 0.6342191039169107, "grad_norm": 0.3203125, "learning_rate": 7.10119051317806e-05, "loss": 1.3462, "step": 5740 }, { "epoch": 0.6347715595823435, "grad_norm": 0.322265625, "learning_rate": 7.082738572271185e-05, "loss": 1.3464, "step": 5745 }, { "epoch": 0.6353240152477764, "grad_norm": 0.31640625, "learning_rate": 7.0642974814406e-05, "loss": 1.4082, "step": 5750 }, { "epoch": 0.6358764709132092, "grad_norm": 0.330078125, "learning_rate": 7.045867309273664e-05, "loss": 1.4451, "step": 5755 }, { "epoch": 0.636428926578642, "grad_norm": 0.3046875, "learning_rate": 7.027448124317119e-05, "loss": 1.4267, "step": 5760 }, { "epoch": 0.636981382244075, "grad_norm": 0.302734375, "learning_rate": 7.009039995076844e-05, "loss": 1.4514, "step": 5765 }, { "epoch": 0.6375338379095078, "grad_norm": 0.29296875, "learning_rate": 6.990642990017602e-05, "loss": 1.4112, "step": 5770 }, { "epoch": 0.6380862935749406, "grad_norm": 0.298828125, "learning_rate": 6.97225717756278e-05, "loss": 1.4462, "step": 5775 }, { "epoch": 0.6386387492403734, "grad_norm": 0.296875, "learning_rate": 6.953882626094136e-05, "loss": 1.414, "step": 5780 }, { "epoch": 0.6391912049058063, "grad_norm": 0.3125, "learning_rate": 6.93551940395155e-05, "loss": 1.4571, "step": 5785 }, { "epoch": 0.6397436605712391, "grad_norm": 0.32421875, "learning_rate": 6.917167579432753e-05, "loss": 1.3975, "step": 5790 }, { "epoch": 0.640296116236672, "grad_norm": 0.310546875, "learning_rate": 6.898827220793103e-05, "loss": 1.4832, "step": 5795 }, { "epoch": 0.6408485719021049, "grad_norm": 0.294921875, "learning_rate": 6.880498396245298e-05, "loss": 1.4024, "step": 5800 }, { "epoch": 0.6414010275675377, "grad_norm": 0.33203125, "learning_rate": 6.862181173959146e-05, "loss": 1.4506, "step": 5805 }, { "epoch": 0.6419534832329705, "grad_norm": 0.302734375, "learning_rate": 6.843875622061304e-05, "loss": 1.4239, "step": 5810 }, { "epoch": 0.6425059388984034, "grad_norm": 0.32421875, "learning_rate": 6.825581808635016e-05, "loss": 1.4, "step": 5815 }, { "epoch": 0.6430583945638363, "grad_norm": 0.314453125, "learning_rate": 6.80729980171987e-05, "loss": 1.3876, "step": 5820 }, { "epoch": 0.6436108502292691, "grad_norm": 0.330078125, "learning_rate": 6.789029669311551e-05, "loss": 1.4729, "step": 5825 }, { "epoch": 0.6441633058947019, "grad_norm": 0.318359375, "learning_rate": 6.770771479361568e-05, "loss": 1.4531, "step": 5830 }, { "epoch": 0.6447157615601348, "grad_norm": 0.33203125, "learning_rate": 6.752525299777021e-05, "loss": 1.3799, "step": 5835 }, { "epoch": 0.6452682172255676, "grad_norm": 0.341796875, "learning_rate": 6.734291198420333e-05, "loss": 1.4754, "step": 5840 }, { "epoch": 0.6458206728910005, "grad_norm": 0.298828125, "learning_rate": 6.716069243109011e-05, "loss": 1.3178, "step": 5845 }, { "epoch": 0.6463731285564334, "grad_norm": 0.314453125, "learning_rate": 6.697859501615387e-05, "loss": 1.4845, "step": 5850 }, { "epoch": 0.6469255842218662, "grad_norm": 0.349609375, "learning_rate": 6.679662041666362e-05, "loss": 1.373, "step": 5855 }, { "epoch": 0.647478039887299, "grad_norm": 0.32421875, "learning_rate": 6.661476930943163e-05, "loss": 1.4152, "step": 5860 }, { "epoch": 0.6480304955527318, "grad_norm": 0.30078125, "learning_rate": 6.643304237081087e-05, "loss": 1.4324, "step": 5865 }, { "epoch": 0.6485829512181648, "grad_norm": 0.361328125, "learning_rate": 6.625144027669245e-05, "loss": 1.396, "step": 5870 }, { "epoch": 0.6491354068835976, "grad_norm": 0.322265625, "learning_rate": 6.60699637025032e-05, "loss": 1.4296, "step": 5875 }, { "epoch": 0.6496878625490304, "grad_norm": 0.3125, "learning_rate": 6.588861332320306e-05, "loss": 1.4973, "step": 5880 }, { "epoch": 0.6502403182144633, "grad_norm": 0.296875, "learning_rate": 6.570738981328266e-05, "loss": 1.375, "step": 5885 }, { "epoch": 0.6507927738798961, "grad_norm": 0.3046875, "learning_rate": 6.552629384676079e-05, "loss": 1.3495, "step": 5890 }, { "epoch": 0.651345229545329, "grad_norm": 0.30078125, "learning_rate": 6.534532609718177e-05, "loss": 1.463, "step": 5895 }, { "epoch": 0.6518976852107619, "grad_norm": 0.310546875, "learning_rate": 6.516448723761315e-05, "loss": 1.423, "step": 5900 }, { "epoch": 0.6524501408761947, "grad_norm": 0.318359375, "learning_rate": 6.498377794064303e-05, "loss": 1.4837, "step": 5905 }, { "epoch": 0.6530025965416275, "grad_norm": 0.326171875, "learning_rate": 6.480319887837771e-05, "loss": 1.3753, "step": 5910 }, { "epoch": 0.6535550522070603, "grad_norm": 0.345703125, "learning_rate": 6.462275072243908e-05, "loss": 1.4911, "step": 5915 }, { "epoch": 0.6541075078724933, "grad_norm": 0.32421875, "learning_rate": 6.444243414396208e-05, "loss": 1.3941, "step": 5920 }, { "epoch": 0.6546599635379261, "grad_norm": 0.314453125, "learning_rate": 6.426224981359238e-05, "loss": 1.3927, "step": 5925 }, { "epoch": 0.6552124192033589, "grad_norm": 0.310546875, "learning_rate": 6.408219840148375e-05, "loss": 1.4448, "step": 5930 }, { "epoch": 0.6557648748687918, "grad_norm": 0.70703125, "learning_rate": 6.390228057729557e-05, "loss": 1.3537, "step": 5935 }, { "epoch": 0.6563173305342246, "grad_norm": 0.30859375, "learning_rate": 6.372249701019045e-05, "loss": 1.3776, "step": 5940 }, { "epoch": 0.6568697861996575, "grad_norm": 0.330078125, "learning_rate": 6.354284836883156e-05, "loss": 1.3591, "step": 5945 }, { "epoch": 0.6574222418650904, "grad_norm": 0.333984375, "learning_rate": 6.336333532138032e-05, "loss": 1.3008, "step": 5950 }, { "epoch": 0.6579746975305232, "grad_norm": 0.326171875, "learning_rate": 6.31839585354938e-05, "loss": 1.4673, "step": 5955 }, { "epoch": 0.658527153195956, "grad_norm": 0.310546875, "learning_rate": 6.300471867832229e-05, "loss": 1.4225, "step": 5960 }, { "epoch": 0.6590796088613888, "grad_norm": 0.32421875, "learning_rate": 6.282561641650682e-05, "loss": 1.4618, "step": 5965 }, { "epoch": 0.6596320645268218, "grad_norm": 0.302734375, "learning_rate": 6.264665241617666e-05, "loss": 1.3691, "step": 5970 }, { "epoch": 0.6601845201922546, "grad_norm": 0.31640625, "learning_rate": 6.246782734294683e-05, "loss": 1.4719, "step": 5975 }, { "epoch": 0.6607369758576874, "grad_norm": 0.296875, "learning_rate": 6.228914186191563e-05, "loss": 1.3776, "step": 5980 }, { "epoch": 0.6612894315231203, "grad_norm": 0.3359375, "learning_rate": 6.211059663766224e-05, "loss": 1.4838, "step": 5985 }, { "epoch": 0.6618418871885531, "grad_norm": 0.314453125, "learning_rate": 6.193219233424414e-05, "loss": 1.4367, "step": 5990 }, { "epoch": 0.662394342853986, "grad_norm": 0.32421875, "learning_rate": 6.175392961519471e-05, "loss": 1.4332, "step": 5995 }, { "epoch": 0.6629467985194188, "grad_norm": 0.318359375, "learning_rate": 6.15758091435207e-05, "loss": 1.4095, "step": 6000 }, { "epoch": 0.6634992541848517, "grad_norm": 0.3046875, "learning_rate": 6.139783158169984e-05, "loss": 1.4669, "step": 6005 }, { "epoch": 0.6640517098502845, "grad_norm": 0.322265625, "learning_rate": 6.121999759167837e-05, "loss": 1.4407, "step": 6010 }, { "epoch": 0.6646041655157173, "grad_norm": 0.314453125, "learning_rate": 6.104230783486847e-05, "loss": 1.3339, "step": 6015 }, { "epoch": 0.6651566211811503, "grad_norm": 0.314453125, "learning_rate": 6.086476297214594e-05, "loss": 1.4346, "step": 6020 }, { "epoch": 0.6657090768465831, "grad_norm": 0.3125, "learning_rate": 6.068736366384764e-05, "loss": 1.4282, "step": 6025 }, { "epoch": 0.6662615325120159, "grad_norm": 0.302734375, "learning_rate": 6.0510110569769095e-05, "loss": 1.3328, "step": 6030 }, { "epoch": 0.6668139881774487, "grad_norm": 0.326171875, "learning_rate": 6.033300434916203e-05, "loss": 1.4929, "step": 6035 }, { "epoch": 0.6673664438428816, "grad_norm": 0.3125, "learning_rate": 6.0156045660731873e-05, "loss": 1.4232, "step": 6040 }, { "epoch": 0.6679188995083145, "grad_norm": 0.302734375, "learning_rate": 5.9979235162635394e-05, "loss": 1.3371, "step": 6045 }, { "epoch": 0.6684713551737473, "grad_norm": 0.337890625, "learning_rate": 5.980257351247818e-05, "loss": 1.3739, "step": 6050 }, { "epoch": 0.6690238108391802, "grad_norm": 0.345703125, "learning_rate": 5.9626061367312166e-05, "loss": 1.4272, "step": 6055 }, { "epoch": 0.669576266504613, "grad_norm": 0.326171875, "learning_rate": 5.9449699383633316e-05, "loss": 1.3722, "step": 6060 }, { "epoch": 0.6701287221700458, "grad_norm": 0.3125, "learning_rate": 5.927348821737906e-05, "loss": 1.4717, "step": 6065 }, { "epoch": 0.6706811778354786, "grad_norm": 0.310546875, "learning_rate": 5.909742852392587e-05, "loss": 1.4052, "step": 6070 }, { "epoch": 0.6712336335009116, "grad_norm": 0.310546875, "learning_rate": 5.8921520958086905e-05, "loss": 1.426, "step": 6075 }, { "epoch": 0.6717860891663444, "grad_norm": 0.3203125, "learning_rate": 5.8745766174109495e-05, "loss": 1.4957, "step": 6080 }, { "epoch": 0.6723385448317772, "grad_norm": 0.302734375, "learning_rate": 5.857016482567275e-05, "loss": 1.4444, "step": 6085 }, { "epoch": 0.6728910004972101, "grad_norm": 0.32421875, "learning_rate": 5.8394717565885106e-05, "loss": 1.5115, "step": 6090 }, { "epoch": 0.6734434561626429, "grad_norm": 0.310546875, "learning_rate": 5.821942504728183e-05, "loss": 1.388, "step": 6095 }, { "epoch": 0.6739959118280758, "grad_norm": 0.33203125, "learning_rate": 5.804428792182279e-05, "loss": 1.4386, "step": 6100 }, { "epoch": 0.6745483674935087, "grad_norm": 0.318359375, "learning_rate": 5.786930684088988e-05, "loss": 1.495, "step": 6105 }, { "epoch": 0.6751008231589415, "grad_norm": 0.322265625, "learning_rate": 5.7694482455284504e-05, "loss": 1.3579, "step": 6110 }, { "epoch": 0.6756532788243743, "grad_norm": 0.310546875, "learning_rate": 5.751981541522539e-05, "loss": 1.327, "step": 6115 }, { "epoch": 0.6762057344898071, "grad_norm": 0.32421875, "learning_rate": 5.734530637034603e-05, "loss": 1.4853, "step": 6120 }, { "epoch": 0.6767581901552401, "grad_norm": 0.3125, "learning_rate": 5.7170955969692265e-05, "loss": 1.3874, "step": 6125 }, { "epoch": 0.6773106458206729, "grad_norm": 0.31640625, "learning_rate": 5.699676486171994e-05, "loss": 1.3563, "step": 6130 }, { "epoch": 0.6778631014861057, "grad_norm": 0.3125, "learning_rate": 5.6822733694292427e-05, "loss": 1.4127, "step": 6135 }, { "epoch": 0.6784155571515386, "grad_norm": 0.3125, "learning_rate": 5.664886311467821e-05, "loss": 1.354, "step": 6140 }, { "epoch": 0.6789680128169714, "grad_norm": 0.330078125, "learning_rate": 5.647515376954852e-05, "loss": 1.3495, "step": 6145 }, { "epoch": 0.6795204684824043, "grad_norm": 0.322265625, "learning_rate": 5.630160630497493e-05, "loss": 1.3592, "step": 6150 }, { "epoch": 0.6800729241478372, "grad_norm": 0.3515625, "learning_rate": 5.612822136642697e-05, "loss": 1.4271, "step": 6155 }, { "epoch": 0.68062537981327, "grad_norm": 0.3125, "learning_rate": 5.5954999598769575e-05, "loss": 1.3796, "step": 6160 }, { "epoch": 0.6811778354787028, "grad_norm": 0.33984375, "learning_rate": 5.578194164626089e-05, "loss": 1.4226, "step": 6165 }, { "epoch": 0.6817302911441356, "grad_norm": 0.314453125, "learning_rate": 5.5609048152549794e-05, "loss": 1.4148, "step": 6170 }, { "epoch": 0.6822827468095686, "grad_norm": 0.3125, "learning_rate": 5.543631976067345e-05, "loss": 1.4115, "step": 6175 }, { "epoch": 0.6828352024750014, "grad_norm": 0.30859375, "learning_rate": 5.526375711305504e-05, "loss": 1.4234, "step": 6180 }, { "epoch": 0.6833876581404342, "grad_norm": 0.31640625, "learning_rate": 5.509136085150122e-05, "loss": 1.4154, "step": 6185 }, { "epoch": 0.6839401138058671, "grad_norm": 0.310546875, "learning_rate": 5.491913161719984e-05, "loss": 1.4132, "step": 6190 }, { "epoch": 0.6844925694712999, "grad_norm": 0.310546875, "learning_rate": 5.4747070050717556e-05, "loss": 1.4359, "step": 6195 }, { "epoch": 0.6850450251367328, "grad_norm": 0.3203125, "learning_rate": 5.457517679199736e-05, "loss": 1.4502, "step": 6200 }, { "epoch": 0.6855974808021656, "grad_norm": 0.3046875, "learning_rate": 5.4403452480356346e-05, "loss": 1.4295, "step": 6205 }, { "epoch": 0.6861499364675985, "grad_norm": 0.30859375, "learning_rate": 5.423189775448323e-05, "loss": 1.4613, "step": 6210 }, { "epoch": 0.6867023921330313, "grad_norm": 0.3125, "learning_rate": 5.406051325243586e-05, "loss": 1.3301, "step": 6215 }, { "epoch": 0.6872548477984641, "grad_norm": 0.330078125, "learning_rate": 5.3889299611639174e-05, "loss": 1.429, "step": 6220 }, { "epoch": 0.6878073034638971, "grad_norm": 0.3203125, "learning_rate": 5.371825746888251e-05, "loss": 1.4605, "step": 6225 }, { "epoch": 0.6883597591293299, "grad_norm": 0.322265625, "learning_rate": 5.35473874603174e-05, "loss": 1.4406, "step": 6230 }, { "epoch": 0.6889122147947627, "grad_norm": 0.310546875, "learning_rate": 5.337669022145515e-05, "loss": 1.4548, "step": 6235 }, { "epoch": 0.6894646704601956, "grad_norm": 0.318359375, "learning_rate": 5.320616638716448e-05, "loss": 1.4498, "step": 6240 }, { "epoch": 0.6900171261256284, "grad_norm": 0.35546875, "learning_rate": 5.3035816591669205e-05, "loss": 1.4835, "step": 6245 }, { "epoch": 0.6905695817910613, "grad_norm": 0.306640625, "learning_rate": 5.286564146854581e-05, "loss": 1.4066, "step": 6250 }, { "epoch": 0.6911220374564941, "grad_norm": 0.3203125, "learning_rate": 5.269564165072115e-05, "loss": 1.393, "step": 6255 }, { "epoch": 0.691674493121927, "grad_norm": 0.318359375, "learning_rate": 5.2525817770470084e-05, "loss": 1.3736, "step": 6260 }, { "epoch": 0.6922269487873598, "grad_norm": 0.296875, "learning_rate": 5.2356170459413035e-05, "loss": 1.3745, "step": 6265 }, { "epoch": 0.6927794044527926, "grad_norm": 0.33203125, "learning_rate": 5.2186700348513786e-05, "loss": 1.471, "step": 6270 }, { "epoch": 0.6933318601182256, "grad_norm": 0.31640625, "learning_rate": 5.2017408068077064e-05, "loss": 1.3714, "step": 6275 }, { "epoch": 0.6938843157836584, "grad_norm": 0.310546875, "learning_rate": 5.18482942477462e-05, "loss": 1.3272, "step": 6280 }, { "epoch": 0.6944367714490912, "grad_norm": 0.33203125, "learning_rate": 5.1679359516500735e-05, "loss": 1.445, "step": 6285 }, { "epoch": 0.694989227114524, "grad_norm": 0.3203125, "learning_rate": 5.151060450265419e-05, "loss": 1.5031, "step": 6290 }, { "epoch": 0.6955416827799569, "grad_norm": 0.30078125, "learning_rate": 5.1342029833851634e-05, "loss": 1.3876, "step": 6295 }, { "epoch": 0.6960941384453898, "grad_norm": 0.306640625, "learning_rate": 5.1173636137067406e-05, "loss": 1.3588, "step": 6300 }, { "epoch": 0.6966465941108226, "grad_norm": 0.318359375, "learning_rate": 5.1005424038602724e-05, "loss": 1.365, "step": 6305 }, { "epoch": 0.6971990497762555, "grad_norm": 0.326171875, "learning_rate": 5.083739416408343e-05, "loss": 1.3757, "step": 6310 }, { "epoch": 0.6977515054416883, "grad_norm": 0.318359375, "learning_rate": 5.066954713845766e-05, "loss": 1.4058, "step": 6315 }, { "epoch": 0.6983039611071211, "grad_norm": 0.322265625, "learning_rate": 5.050188358599335e-05, "loss": 1.4267, "step": 6320 }, { "epoch": 0.6988564167725541, "grad_norm": 0.314453125, "learning_rate": 5.033440413027619e-05, "loss": 1.3602, "step": 6325 }, { "epoch": 0.6994088724379869, "grad_norm": 0.298828125, "learning_rate": 5.01671093942071e-05, "loss": 1.3058, "step": 6330 }, { "epoch": 0.6999613281034197, "grad_norm": 0.30859375, "learning_rate": 5.000000000000002e-05, "loss": 1.4389, "step": 6335 }, { "epoch": 0.7005137837688525, "grad_norm": 0.328125, "learning_rate": 4.9833076569179506e-05, "loss": 1.421, "step": 6340 }, { "epoch": 0.7010662394342854, "grad_norm": 0.3046875, "learning_rate": 4.9666339722578494e-05, "loss": 1.4014, "step": 6345 }, { "epoch": 0.7016186950997182, "grad_norm": 0.298828125, "learning_rate": 4.949979008033596e-05, "loss": 1.419, "step": 6350 }, { "epoch": 0.7021711507651511, "grad_norm": 0.298828125, "learning_rate": 4.93334282618946e-05, "loss": 1.4411, "step": 6355 }, { "epoch": 0.702723606430584, "grad_norm": 0.30859375, "learning_rate": 4.9167254885998584e-05, "loss": 1.3659, "step": 6360 }, { "epoch": 0.7032760620960168, "grad_norm": 0.32421875, "learning_rate": 4.900127057069116e-05, "loss": 1.4577, "step": 6365 }, { "epoch": 0.7038285177614496, "grad_norm": 0.341796875, "learning_rate": 4.883547593331248e-05, "loss": 1.3618, "step": 6370 }, { "epoch": 0.7043809734268824, "grad_norm": 0.314453125, "learning_rate": 4.866987159049713e-05, "loss": 1.4154, "step": 6375 }, { "epoch": 0.7049334290923154, "grad_norm": 0.3046875, "learning_rate": 4.850445815817202e-05, "loss": 1.3765, "step": 6380 }, { "epoch": 0.7054858847577482, "grad_norm": 0.33203125, "learning_rate": 4.833923625155399e-05, "loss": 1.4659, "step": 6385 }, { "epoch": 0.706038340423181, "grad_norm": 0.310546875, "learning_rate": 4.817420648514755e-05, "loss": 1.377, "step": 6390 }, { "epoch": 0.7065907960886139, "grad_norm": 0.3046875, "learning_rate": 4.800936947274255e-05, "loss": 1.4308, "step": 6395 }, { "epoch": 0.7071432517540467, "grad_norm": 0.3125, "learning_rate": 4.7844725827412054e-05, "loss": 1.4635, "step": 6400 }, { "epoch": 0.7076957074194796, "grad_norm": 0.359375, "learning_rate": 4.7680276161509795e-05, "loss": 1.3211, "step": 6405 }, { "epoch": 0.7082481630849125, "grad_norm": 0.322265625, "learning_rate": 4.751602108666818e-05, "loss": 1.4339, "step": 6410 }, { "epoch": 0.7088006187503453, "grad_norm": 0.3125, "learning_rate": 4.735196121379571e-05, "loss": 1.3621, "step": 6415 }, { "epoch": 0.7093530744157781, "grad_norm": 0.30078125, "learning_rate": 4.7188097153075017e-05, "loss": 1.3405, "step": 6420 }, { "epoch": 0.7099055300812109, "grad_norm": 0.30859375, "learning_rate": 4.7024429513960425e-05, "loss": 1.4359, "step": 6425 }, { "epoch": 0.7104579857466439, "grad_norm": 0.314453125, "learning_rate": 4.686095890517569e-05, "loss": 1.4501, "step": 6430 }, { "epoch": 0.7110104414120767, "grad_norm": 0.287109375, "learning_rate": 4.6697685934711785e-05, "loss": 1.2913, "step": 6435 }, { "epoch": 0.7115628970775095, "grad_norm": 0.310546875, "learning_rate": 4.65346112098246e-05, "loss": 1.4188, "step": 6440 }, { "epoch": 0.7121153527429424, "grad_norm": 0.388671875, "learning_rate": 4.637173533703267e-05, "loss": 1.398, "step": 6445 }, { "epoch": 0.7126678084083752, "grad_norm": 0.310546875, "learning_rate": 4.6209058922115015e-05, "loss": 1.4681, "step": 6450 }, { "epoch": 0.7132202640738081, "grad_norm": 0.333984375, "learning_rate": 4.6046582570108744e-05, "loss": 1.4483, "step": 6455 }, { "epoch": 0.713772719739241, "grad_norm": 0.31640625, "learning_rate": 4.588430688530696e-05, "loss": 1.3695, "step": 6460 }, { "epoch": 0.7143251754046738, "grad_norm": 0.3203125, "learning_rate": 4.5722232471256296e-05, "loss": 1.4027, "step": 6465 }, { "epoch": 0.7148776310701066, "grad_norm": 0.318359375, "learning_rate": 4.556035993075495e-05, "loss": 1.4575, "step": 6470 }, { "epoch": 0.7154300867355394, "grad_norm": 0.341796875, "learning_rate": 4.539868986585022e-05, "loss": 1.4805, "step": 6475 }, { "epoch": 0.7159825424009724, "grad_norm": 0.3125, "learning_rate": 4.523722287783636e-05, "loss": 1.4233, "step": 6480 }, { "epoch": 0.7165349980664052, "grad_norm": 0.30859375, "learning_rate": 4.5075959567252335e-05, "loss": 1.5138, "step": 6485 }, { "epoch": 0.717087453731838, "grad_norm": 0.30859375, "learning_rate": 4.491490053387958e-05, "loss": 1.3541, "step": 6490 }, { "epoch": 0.7176399093972708, "grad_norm": 0.322265625, "learning_rate": 4.475404637673974e-05, "loss": 1.3962, "step": 6495 }, { "epoch": 0.7181923650627037, "grad_norm": 0.3125, "learning_rate": 4.459339769409252e-05, "loss": 1.3973, "step": 6500 }, { "epoch": 0.7187448207281366, "grad_norm": 0.318359375, "learning_rate": 4.443295508343336e-05, "loss": 1.4353, "step": 6505 }, { "epoch": 0.7192972763935694, "grad_norm": 0.310546875, "learning_rate": 4.427271914149128e-05, "loss": 1.4758, "step": 6510 }, { "epoch": 0.7198497320590023, "grad_norm": 0.30859375, "learning_rate": 4.41126904642267e-05, "loss": 1.3707, "step": 6515 }, { "epoch": 0.7204021877244351, "grad_norm": 0.3125, "learning_rate": 4.395286964682903e-05, "loss": 1.4377, "step": 6520 }, { "epoch": 0.7209546433898679, "grad_norm": 0.30859375, "learning_rate": 4.379325728371473e-05, "loss": 1.4025, "step": 6525 }, { "epoch": 0.7215070990553009, "grad_norm": 0.328125, "learning_rate": 4.363385396852491e-05, "loss": 1.3887, "step": 6530 }, { "epoch": 0.7220595547207337, "grad_norm": 0.314453125, "learning_rate": 4.347466029412316e-05, "loss": 1.4631, "step": 6535 }, { "epoch": 0.7226120103861665, "grad_norm": 0.302734375, "learning_rate": 4.331567685259338e-05, "loss": 1.3674, "step": 6540 }, { "epoch": 0.7231644660515993, "grad_norm": 0.3125, "learning_rate": 4.3156904235237574e-05, "loss": 1.4344, "step": 6545 }, { "epoch": 0.7237169217170322, "grad_norm": 0.3203125, "learning_rate": 4.2998343032573596e-05, "loss": 1.47, "step": 6550 }, { "epoch": 0.7242693773824651, "grad_norm": 0.310546875, "learning_rate": 4.2839993834333014e-05, "loss": 1.4782, "step": 6555 }, { "epoch": 0.7248218330478979, "grad_norm": 0.291015625, "learning_rate": 4.2681857229458885e-05, "loss": 1.3742, "step": 6560 }, { "epoch": 0.7253742887133308, "grad_norm": 0.33984375, "learning_rate": 4.25239338061036e-05, "loss": 1.486, "step": 6565 }, { "epoch": 0.7259267443787636, "grad_norm": 0.306640625, "learning_rate": 4.236622415162668e-05, "loss": 1.4639, "step": 6570 }, { "epoch": 0.7264792000441964, "grad_norm": 0.326171875, "learning_rate": 4.220872885259247e-05, "loss": 1.3645, "step": 6575 }, { "epoch": 0.7270316557096294, "grad_norm": 0.322265625, "learning_rate": 4.20514484947682e-05, "loss": 1.351, "step": 6580 }, { "epoch": 0.7275841113750622, "grad_norm": 0.302734375, "learning_rate": 4.189438366312162e-05, "loss": 1.4509, "step": 6585 }, { "epoch": 0.728136567040495, "grad_norm": 0.306640625, "learning_rate": 4.17375349418189e-05, "loss": 1.3844, "step": 6590 }, { "epoch": 0.7286890227059278, "grad_norm": 0.3125, "learning_rate": 4.158090291422243e-05, "loss": 1.4377, "step": 6595 }, { "epoch": 0.7292414783713607, "grad_norm": 0.296875, "learning_rate": 4.142448816288864e-05, "loss": 1.2994, "step": 6600 }, { "epoch": 0.7297939340367936, "grad_norm": 0.296875, "learning_rate": 4.1268291269565885e-05, "loss": 1.3902, "step": 6605 }, { "epoch": 0.7303463897022264, "grad_norm": 0.30859375, "learning_rate": 4.111231281519222e-05, "loss": 1.4172, "step": 6610 }, { "epoch": 0.7308988453676593, "grad_norm": 0.296875, "learning_rate": 4.095655337989329e-05, "loss": 1.4288, "step": 6615 }, { "epoch": 0.7314513010330921, "grad_norm": 0.314453125, "learning_rate": 4.080101354298016e-05, "loss": 1.4087, "step": 6620 }, { "epoch": 0.7320037566985249, "grad_norm": 0.310546875, "learning_rate": 4.0645693882947046e-05, "loss": 1.4225, "step": 6625 }, { "epoch": 0.7325562123639577, "grad_norm": 0.318359375, "learning_rate": 4.0490594977469406e-05, "loss": 1.3727, "step": 6630 }, { "epoch": 0.7331086680293907, "grad_norm": 0.31640625, "learning_rate": 4.0335717403401576e-05, "loss": 1.4652, "step": 6635 }, { "epoch": 0.7336611236948235, "grad_norm": 0.33203125, "learning_rate": 4.018106173677473e-05, "loss": 1.3582, "step": 6640 }, { "epoch": 0.7342135793602563, "grad_norm": 0.3125, "learning_rate": 4.00266285527947e-05, "loss": 1.3786, "step": 6645 }, { "epoch": 0.7347660350256892, "grad_norm": 0.3203125, "learning_rate": 3.987241842583983e-05, "loss": 1.4383, "step": 6650 }, { "epoch": 0.735318490691122, "grad_norm": 0.30859375, "learning_rate": 3.971843192945889e-05, "loss": 1.4522, "step": 6655 }, { "epoch": 0.7358709463565549, "grad_norm": 0.328125, "learning_rate": 3.9564669636368866e-05, "loss": 1.4472, "step": 6660 }, { "epoch": 0.7364234020219877, "grad_norm": 0.3125, "learning_rate": 3.9411132118452896e-05, "loss": 1.4341, "step": 6665 }, { "epoch": 0.7369758576874206, "grad_norm": 0.310546875, "learning_rate": 3.9257819946758135e-05, "loss": 1.4202, "step": 6670 }, { "epoch": 0.7375283133528534, "grad_norm": 0.306640625, "learning_rate": 3.910473369149361e-05, "loss": 1.4052, "step": 6675 }, { "epoch": 0.7380807690182862, "grad_norm": 0.318359375, "learning_rate": 3.895187392202804e-05, "loss": 1.466, "step": 6680 }, { "epoch": 0.7386332246837192, "grad_norm": 0.3203125, "learning_rate": 3.8799241206887836e-05, "loss": 1.3813, "step": 6685 }, { "epoch": 0.739185680349152, "grad_norm": 0.33203125, "learning_rate": 3.864683611375497e-05, "loss": 1.3681, "step": 6690 }, { "epoch": 0.7397381360145848, "grad_norm": 0.296875, "learning_rate": 3.849465920946475e-05, "loss": 1.4094, "step": 6695 }, { "epoch": 0.7402905916800177, "grad_norm": 0.310546875, "learning_rate": 3.834271106000385e-05, "loss": 1.3916, "step": 6700 }, { "epoch": 0.7408430473454505, "grad_norm": 0.33203125, "learning_rate": 3.819099223050813e-05, "loss": 1.3616, "step": 6705 }, { "epoch": 0.7413955030108834, "grad_norm": 0.318359375, "learning_rate": 3.8039503285260506e-05, "loss": 1.3636, "step": 6710 }, { "epoch": 0.7419479586763162, "grad_norm": 0.3125, "learning_rate": 3.788824478768893e-05, "loss": 1.3739, "step": 6715 }, { "epoch": 0.7425004143417491, "grad_norm": 0.298828125, "learning_rate": 3.773721730036426e-05, "loss": 1.4124, "step": 6720 }, { "epoch": 0.7430528700071819, "grad_norm": 0.32421875, "learning_rate": 3.758642138499819e-05, "loss": 1.4013, "step": 6725 }, { "epoch": 0.7436053256726147, "grad_norm": 0.283203125, "learning_rate": 3.743585760244104e-05, "loss": 1.3522, "step": 6730 }, { "epoch": 0.7441577813380477, "grad_norm": 0.330078125, "learning_rate": 3.728552651267985e-05, "loss": 1.3858, "step": 6735 }, { "epoch": 0.7447102370034805, "grad_norm": 0.33984375, "learning_rate": 3.7135428674836184e-05, "loss": 1.4384, "step": 6740 }, { "epoch": 0.7452626926689133, "grad_norm": 0.291015625, "learning_rate": 3.698556464716411e-05, "loss": 1.4432, "step": 6745 }, { "epoch": 0.7458151483343461, "grad_norm": 0.30859375, "learning_rate": 3.683593498704801e-05, "loss": 1.3553, "step": 6750 }, { "epoch": 0.746367603999779, "grad_norm": 0.361328125, "learning_rate": 3.6686540251000756e-05, "loss": 1.4585, "step": 6755 }, { "epoch": 0.7469200596652119, "grad_norm": 0.310546875, "learning_rate": 3.6537380994661295e-05, "loss": 1.4168, "step": 6760 }, { "epoch": 0.7474725153306447, "grad_norm": 0.3203125, "learning_rate": 3.638845777279286e-05, "loss": 1.3934, "step": 6765 }, { "epoch": 0.7480249709960776, "grad_norm": 0.322265625, "learning_rate": 3.623977113928081e-05, "loss": 1.3967, "step": 6770 }, { "epoch": 0.7485774266615104, "grad_norm": 0.326171875, "learning_rate": 3.6091321647130484e-05, "loss": 1.4235, "step": 6775 }, { "epoch": 0.7491298823269432, "grad_norm": 0.33203125, "learning_rate": 3.59431098484653e-05, "loss": 1.365, "step": 6780 }, { "epoch": 0.7496823379923762, "grad_norm": 0.306640625, "learning_rate": 3.579513629452464e-05, "loss": 1.3719, "step": 6785 }, { "epoch": 0.750234793657809, "grad_norm": 0.314453125, "learning_rate": 3.564740153566176e-05, "loss": 1.4067, "step": 6790 }, { "epoch": 0.7507872493232418, "grad_norm": 0.326171875, "learning_rate": 3.5499906121341785e-05, "loss": 1.3539, "step": 6795 }, { "epoch": 0.7513397049886746, "grad_norm": 0.3359375, "learning_rate": 3.535265060013965e-05, "loss": 1.4184, "step": 6800 }, { "epoch": 0.7518921606541075, "grad_norm": 0.31640625, "learning_rate": 3.520563551973806e-05, "loss": 1.4005, "step": 6805 }, { "epoch": 0.7524446163195404, "grad_norm": 0.373046875, "learning_rate": 3.5058861426925447e-05, "loss": 1.4478, "step": 6810 }, { "epoch": 0.7529970719849732, "grad_norm": 0.31640625, "learning_rate": 3.491232886759398e-05, "loss": 1.3996, "step": 6815 }, { "epoch": 0.7535495276504061, "grad_norm": 0.298828125, "learning_rate": 3.4766038386737506e-05, "loss": 1.4403, "step": 6820 }, { "epoch": 0.7541019833158389, "grad_norm": 0.3046875, "learning_rate": 3.461999052844942e-05, "loss": 1.4721, "step": 6825 }, { "epoch": 0.7546544389812717, "grad_norm": 0.337890625, "learning_rate": 3.447418583592084e-05, "loss": 1.4444, "step": 6830 }, { "epoch": 0.7552068946467047, "grad_norm": 0.31640625, "learning_rate": 3.432862485143846e-05, "loss": 1.3966, "step": 6835 }, { "epoch": 0.7557593503121375, "grad_norm": 0.31640625, "learning_rate": 3.418330811638255e-05, "loss": 1.4488, "step": 6840 }, { "epoch": 0.7563118059775703, "grad_norm": 0.306640625, "learning_rate": 3.4038236171224946e-05, "loss": 1.3696, "step": 6845 }, { "epoch": 0.7568642616430031, "grad_norm": 0.314453125, "learning_rate": 3.389340955552707e-05, "loss": 1.385, "step": 6850 }, { "epoch": 0.757416717308436, "grad_norm": 0.34375, "learning_rate": 3.374882880793785e-05, "loss": 1.4048, "step": 6855 }, { "epoch": 0.7579691729738689, "grad_norm": 0.310546875, "learning_rate": 3.360449446619183e-05, "loss": 1.3815, "step": 6860 }, { "epoch": 0.7585216286393017, "grad_norm": 0.30859375, "learning_rate": 3.346040706710705e-05, "loss": 1.3768, "step": 6865 }, { "epoch": 0.7590740843047346, "grad_norm": 0.326171875, "learning_rate": 3.331656714658313e-05, "loss": 1.4291, "step": 6870 }, { "epoch": 0.7596265399701674, "grad_norm": 0.31640625, "learning_rate": 3.317297523959927e-05, "loss": 1.4714, "step": 6875 }, { "epoch": 0.7601789956356002, "grad_norm": 0.322265625, "learning_rate": 3.302963188021216e-05, "loss": 1.3719, "step": 6880 }, { "epoch": 0.7607314513010331, "grad_norm": 0.322265625, "learning_rate": 3.2886537601554165e-05, "loss": 1.42, "step": 6885 }, { "epoch": 0.761283906966466, "grad_norm": 0.306640625, "learning_rate": 3.274369293583121e-05, "loss": 1.3751, "step": 6890 }, { "epoch": 0.7618363626318988, "grad_norm": 0.310546875, "learning_rate": 3.260109841432085e-05, "loss": 1.3936, "step": 6895 }, { "epoch": 0.7623888182973316, "grad_norm": 0.310546875, "learning_rate": 3.24587545673703e-05, "loss": 1.343, "step": 6900 }, { "epoch": 0.7629412739627645, "grad_norm": 0.3125, "learning_rate": 3.231666192439442e-05, "loss": 1.4089, "step": 6905 }, { "epoch": 0.7634937296281973, "grad_norm": 0.326171875, "learning_rate": 3.217482101387381e-05, "loss": 1.4818, "step": 6910 }, { "epoch": 0.7640461852936302, "grad_norm": 0.78515625, "learning_rate": 3.203323236335277e-05, "loss": 1.4219, "step": 6915 }, { "epoch": 0.764598640959063, "grad_norm": 0.310546875, "learning_rate": 3.189189649943743e-05, "loss": 1.4546, "step": 6920 }, { "epoch": 0.7651510966244959, "grad_norm": 0.314453125, "learning_rate": 3.17508139477937e-05, "loss": 1.3574, "step": 6925 }, { "epoch": 0.7657035522899287, "grad_norm": 0.31640625, "learning_rate": 3.1609985233145334e-05, "loss": 1.3803, "step": 6930 }, { "epoch": 0.7662560079553615, "grad_norm": 0.32421875, "learning_rate": 3.146941087927203e-05, "loss": 1.4591, "step": 6935 }, { "epoch": 0.7668084636207945, "grad_norm": 0.30078125, "learning_rate": 3.132909140900746e-05, "loss": 1.3831, "step": 6940 }, { "epoch": 0.7673609192862273, "grad_norm": 0.30859375, "learning_rate": 3.118902734423731e-05, "loss": 1.3904, "step": 6945 }, { "epoch": 0.7679133749516601, "grad_norm": 0.3125, "learning_rate": 3.104921920589733e-05, "loss": 1.4404, "step": 6950 }, { "epoch": 0.768465830617093, "grad_norm": 0.3125, "learning_rate": 3.0909667513971396e-05, "loss": 1.4067, "step": 6955 }, { "epoch": 0.7690182862825258, "grad_norm": 0.330078125, "learning_rate": 3.077037278748965e-05, "loss": 1.4569, "step": 6960 }, { "epoch": 0.7695707419479587, "grad_norm": 0.3203125, "learning_rate": 3.063133554452645e-05, "loss": 1.366, "step": 6965 }, { "epoch": 0.7701231976133915, "grad_norm": 0.3046875, "learning_rate": 3.0492556302198526e-05, "loss": 1.3561, "step": 6970 }, { "epoch": 0.7706756532788244, "grad_norm": 0.328125, "learning_rate": 3.0354035576663043e-05, "loss": 1.46, "step": 6975 }, { "epoch": 0.7712281089442572, "grad_norm": 0.31640625, "learning_rate": 3.0215773883115706e-05, "loss": 1.4398, "step": 6980 }, { "epoch": 0.77178056460969, "grad_norm": 0.314453125, "learning_rate": 3.0077771735788684e-05, "loss": 1.4308, "step": 6985 }, { "epoch": 0.772333020275123, "grad_norm": 0.310546875, "learning_rate": 2.9940029647948963e-05, "loss": 1.4168, "step": 6990 }, { "epoch": 0.7728854759405558, "grad_norm": 0.31640625, "learning_rate": 2.9802548131896236e-05, "loss": 1.4504, "step": 6995 }, { "epoch": 0.7734379316059886, "grad_norm": 0.306640625, "learning_rate": 2.9665327698961077e-05, "loss": 1.4844, "step": 7000 }, { "epoch": 0.7739903872714214, "grad_norm": 0.3125, "learning_rate": 2.9528368859502996e-05, "loss": 1.4483, "step": 7005 }, { "epoch": 0.7745428429368543, "grad_norm": 0.30078125, "learning_rate": 2.93916721229086e-05, "loss": 1.3185, "step": 7010 }, { "epoch": 0.7750952986022872, "grad_norm": 0.314453125, "learning_rate": 2.9255237997589657e-05, "loss": 1.4033, "step": 7015 }, { "epoch": 0.77564775426772, "grad_norm": 0.3203125, "learning_rate": 2.9119066990981193e-05, "loss": 1.3294, "step": 7020 }, { "epoch": 0.7762002099331529, "grad_norm": 0.32421875, "learning_rate": 2.8983159609539635e-05, "loss": 1.3608, "step": 7025 }, { "epoch": 0.7767526655985857, "grad_norm": 0.298828125, "learning_rate": 2.884751635874098e-05, "loss": 1.3886, "step": 7030 }, { "epoch": 0.7773051212640185, "grad_norm": 0.3203125, "learning_rate": 2.8712137743078695e-05, "loss": 1.3314, "step": 7035 }, { "epoch": 0.7778575769294515, "grad_norm": 0.328125, "learning_rate": 2.8577024266062146e-05, "loss": 1.3881, "step": 7040 }, { "epoch": 0.7784100325948843, "grad_norm": 0.310546875, "learning_rate": 2.844217643021454e-05, "loss": 1.3882, "step": 7045 }, { "epoch": 0.7789624882603171, "grad_norm": 0.326171875, "learning_rate": 2.8307594737071052e-05, "loss": 1.3965, "step": 7050 }, { "epoch": 0.7795149439257499, "grad_norm": 0.33203125, "learning_rate": 2.8173279687177057e-05, "loss": 1.4129, "step": 7055 }, { "epoch": 0.7800673995911828, "grad_norm": 0.318359375, "learning_rate": 2.8039231780086183e-05, "loss": 1.413, "step": 7060 }, { "epoch": 0.7806198552566157, "grad_norm": 0.296875, "learning_rate": 2.7905451514358472e-05, "loss": 1.3399, "step": 7065 }, { "epoch": 0.7811723109220485, "grad_norm": 0.318359375, "learning_rate": 2.7771939387558554e-05, "loss": 1.3916, "step": 7070 }, { "epoch": 0.7817247665874814, "grad_norm": 0.326171875, "learning_rate": 2.7638695896253774e-05, "loss": 1.3693, "step": 7075 }, { "epoch": 0.7822772222529142, "grad_norm": 0.345703125, "learning_rate": 2.7505721536012353e-05, "loss": 1.3703, "step": 7080 }, { "epoch": 0.782829677918347, "grad_norm": 0.380859375, "learning_rate": 2.7373016801401576e-05, "loss": 1.4043, "step": 7085 }, { "epoch": 0.78338213358378, "grad_norm": 0.30859375, "learning_rate": 2.7240582185985798e-05, "loss": 1.4615, "step": 7090 }, { "epoch": 0.7839345892492128, "grad_norm": 0.3046875, "learning_rate": 2.7108418182324857e-05, "loss": 1.4332, "step": 7095 }, { "epoch": 0.7844870449146456, "grad_norm": 0.3125, "learning_rate": 2.6976525281972078e-05, "loss": 1.4364, "step": 7100 }, { "epoch": 0.7850395005800784, "grad_norm": 0.30859375, "learning_rate": 2.68449039754724e-05, "loss": 1.4757, "step": 7105 }, { "epoch": 0.7855919562455113, "grad_norm": 0.314453125, "learning_rate": 2.6713554752360802e-05, "loss": 1.4353, "step": 7110 }, { "epoch": 0.7861444119109442, "grad_norm": 0.32421875, "learning_rate": 2.6582478101160167e-05, "loss": 1.3812, "step": 7115 }, { "epoch": 0.786696867576377, "grad_norm": 0.322265625, "learning_rate": 2.6451674509379643e-05, "loss": 1.4036, "step": 7120 }, { "epoch": 0.7872493232418099, "grad_norm": 0.32421875, "learning_rate": 2.632114446351286e-05, "loss": 1.4684, "step": 7125 }, { "epoch": 0.7878017789072427, "grad_norm": 0.32421875, "learning_rate": 2.619088844903592e-05, "loss": 1.4453, "step": 7130 }, { "epoch": 0.7883542345726755, "grad_norm": 0.32421875, "learning_rate": 2.606090695040586e-05, "loss": 1.4662, "step": 7135 }, { "epoch": 0.7889066902381084, "grad_norm": 0.330078125, "learning_rate": 2.5931200451058678e-05, "loss": 1.3586, "step": 7140 }, { "epoch": 0.7894591459035413, "grad_norm": 0.337890625, "learning_rate": 2.5801769433407565e-05, "loss": 1.4184, "step": 7145 }, { "epoch": 0.7900116015689741, "grad_norm": 0.31640625, "learning_rate": 2.567261437884112e-05, "loss": 1.2987, "step": 7150 }, { "epoch": 0.7905640572344069, "grad_norm": 0.330078125, "learning_rate": 2.5543735767721576e-05, "loss": 1.4084, "step": 7155 }, { "epoch": 0.7911165128998398, "grad_norm": 0.3203125, "learning_rate": 2.5415134079383006e-05, "loss": 1.3856, "step": 7160 }, { "epoch": 0.7916689685652727, "grad_norm": 0.349609375, "learning_rate": 2.5286809792129496e-05, "loss": 1.3802, "step": 7165 }, { "epoch": 0.7922214242307055, "grad_norm": 0.310546875, "learning_rate": 2.5158763383233443e-05, "loss": 1.3617, "step": 7170 }, { "epoch": 0.7927738798961383, "grad_norm": 0.314453125, "learning_rate": 2.5030995328933726e-05, "loss": 1.4292, "step": 7175 }, { "epoch": 0.7933263355615712, "grad_norm": 0.33203125, "learning_rate": 2.490350610443396e-05, "loss": 1.4079, "step": 7180 }, { "epoch": 0.793878791227004, "grad_norm": 0.3359375, "learning_rate": 2.477629618390066e-05, "loss": 1.4079, "step": 7185 }, { "epoch": 0.7944312468924368, "grad_norm": 0.333984375, "learning_rate": 2.4649366040461597e-05, "loss": 1.376, "step": 7190 }, { "epoch": 0.7949837025578698, "grad_norm": 0.302734375, "learning_rate": 2.4522716146203974e-05, "loss": 1.4086, "step": 7195 }, { "epoch": 0.7955361582233026, "grad_norm": 0.31640625, "learning_rate": 2.4396346972172634e-05, "loss": 1.3506, "step": 7200 }, { "epoch": 0.7960886138887354, "grad_norm": 0.3203125, "learning_rate": 2.4270258988368376e-05, "loss": 1.3757, "step": 7205 }, { "epoch": 0.7966410695541682, "grad_norm": 0.341796875, "learning_rate": 2.4144452663746176e-05, "loss": 1.4284, "step": 7210 }, { "epoch": 0.7971935252196011, "grad_norm": 0.30078125, "learning_rate": 2.401892846621344e-05, "loss": 1.4101, "step": 7215 }, { "epoch": 0.797745980885034, "grad_norm": 0.32421875, "learning_rate": 2.3893686862628263e-05, "loss": 1.3381, "step": 7220 }, { "epoch": 0.7982984365504668, "grad_norm": 0.30859375, "learning_rate": 2.3768728318797684e-05, "loss": 1.4574, "step": 7225 }, { "epoch": 0.7988508922158997, "grad_norm": 0.33203125, "learning_rate": 2.364405329947603e-05, "loss": 1.3972, "step": 7230 }, { "epoch": 0.7994033478813325, "grad_norm": 0.314453125, "learning_rate": 2.3519662268363006e-05, "loss": 1.4289, "step": 7235 }, { "epoch": 0.7999558035467653, "grad_norm": 0.3359375, "learning_rate": 2.339555568810221e-05, "loss": 1.3185, "step": 7240 }, { "epoch": 0.8005082592121983, "grad_norm": 0.322265625, "learning_rate": 2.3271734020279225e-05, "loss": 1.4884, "step": 7245 }, { "epoch": 0.8010607148776311, "grad_norm": 0.31640625, "learning_rate": 2.3148197725419983e-05, "loss": 1.4936, "step": 7250 }, { "epoch": 0.8016131705430639, "grad_norm": 0.3203125, "learning_rate": 2.3024947262989038e-05, "loss": 1.4092, "step": 7255 }, { "epoch": 0.8021656262084967, "grad_norm": 0.310546875, "learning_rate": 2.2901983091387867e-05, "loss": 1.3706, "step": 7260 }, { "epoch": 0.8027180818739296, "grad_norm": 0.337890625, "learning_rate": 2.2779305667953154e-05, "loss": 1.4157, "step": 7265 }, { "epoch": 0.8032705375393625, "grad_norm": 0.298828125, "learning_rate": 2.2656915448955053e-05, "loss": 1.4659, "step": 7270 }, { "epoch": 0.8038229932047953, "grad_norm": 0.322265625, "learning_rate": 2.253481288959558e-05, "loss": 1.4226, "step": 7275 }, { "epoch": 0.8043754488702282, "grad_norm": 0.314453125, "learning_rate": 2.241299844400684e-05, "loss": 1.43, "step": 7280 }, { "epoch": 0.804927904535661, "grad_norm": 0.3203125, "learning_rate": 2.2291472565249384e-05, "loss": 1.2571, "step": 7285 }, { "epoch": 0.8054803602010938, "grad_norm": 0.33203125, "learning_rate": 2.217023570531045e-05, "loss": 1.3901, "step": 7290 }, { "epoch": 0.8060328158665268, "grad_norm": 0.30859375, "learning_rate": 2.2049288315102412e-05, "loss": 1.3579, "step": 7295 }, { "epoch": 0.8065852715319596, "grad_norm": 0.30078125, "learning_rate": 2.1928630844460973e-05, "loss": 1.3872, "step": 7300 }, { "epoch": 0.8071377271973924, "grad_norm": 0.298828125, "learning_rate": 2.1808263742143585e-05, "loss": 1.4049, "step": 7305 }, { "epoch": 0.8076901828628252, "grad_norm": 0.310546875, "learning_rate": 2.1688187455827736e-05, "loss": 1.39, "step": 7310 }, { "epoch": 0.8082426385282581, "grad_norm": 0.341796875, "learning_rate": 2.1568402432109257e-05, "loss": 1.4856, "step": 7315 }, { "epoch": 0.808795094193691, "grad_norm": 0.322265625, "learning_rate": 2.1448909116500747e-05, "loss": 1.4358, "step": 7320 }, { "epoch": 0.8093475498591238, "grad_norm": 0.3203125, "learning_rate": 2.1329707953429822e-05, "loss": 1.4261, "step": 7325 }, { "epoch": 0.8099000055245567, "grad_norm": 0.31640625, "learning_rate": 2.1210799386237535e-05, "loss": 1.4537, "step": 7330 }, { "epoch": 0.8104524611899895, "grad_norm": 0.341796875, "learning_rate": 2.1092183857176683e-05, "loss": 1.4784, "step": 7335 }, { "epoch": 0.8110049168554223, "grad_norm": 0.30078125, "learning_rate": 2.097386180741019e-05, "loss": 1.4695, "step": 7340 }, { "epoch": 0.8115573725208552, "grad_norm": 0.318359375, "learning_rate": 2.0855833677009384e-05, "loss": 1.4501, "step": 7345 }, { "epoch": 0.8121098281862881, "grad_norm": 0.314453125, "learning_rate": 2.0738099904952512e-05, "loss": 1.3878, "step": 7350 }, { "epoch": 0.8126622838517209, "grad_norm": 0.3125, "learning_rate": 2.0620660929123004e-05, "loss": 1.4786, "step": 7355 }, { "epoch": 0.8132147395171537, "grad_norm": 0.31640625, "learning_rate": 2.0503517186307842e-05, "loss": 1.3569, "step": 7360 }, { "epoch": 0.8137671951825866, "grad_norm": 0.3125, "learning_rate": 2.0386669112195976e-05, "loss": 1.3043, "step": 7365 }, { "epoch": 0.8143196508480195, "grad_norm": 0.314453125, "learning_rate": 2.0270117141376664e-05, "loss": 1.3695, "step": 7370 }, { "epoch": 0.8148721065134523, "grad_norm": 0.318359375, "learning_rate": 2.0153861707337906e-05, "loss": 1.4239, "step": 7375 }, { "epoch": 0.8154245621788851, "grad_norm": 0.310546875, "learning_rate": 2.0037903242464785e-05, "loss": 1.4389, "step": 7380 }, { "epoch": 0.815977017844318, "grad_norm": 0.30078125, "learning_rate": 1.9922242178037864e-05, "loss": 1.349, "step": 7385 }, { "epoch": 0.8165294735097508, "grad_norm": 0.30078125, "learning_rate": 1.9806878944231643e-05, "loss": 1.4655, "step": 7390 }, { "epoch": 0.8170819291751837, "grad_norm": 0.322265625, "learning_rate": 1.9691813970112827e-05, "loss": 1.4086, "step": 7395 }, { "epoch": 0.8176343848406166, "grad_norm": 0.322265625, "learning_rate": 1.9577047683638873e-05, "loss": 1.4064, "step": 7400 }, { "epoch": 0.8181868405060494, "grad_norm": 0.33203125, "learning_rate": 1.9462580511656338e-05, "loss": 1.472, "step": 7405 }, { "epoch": 0.8187392961714822, "grad_norm": 0.294921875, "learning_rate": 1.934841287989928e-05, "loss": 1.374, "step": 7410 }, { "epoch": 0.819291751836915, "grad_norm": 0.345703125, "learning_rate": 1.9234545212987688e-05, "loss": 1.4272, "step": 7415 }, { "epoch": 0.819844207502348, "grad_norm": 0.302734375, "learning_rate": 1.91209779344259e-05, "loss": 1.4148, "step": 7420 }, { "epoch": 0.8203966631677808, "grad_norm": 0.29296875, "learning_rate": 1.900771146660103e-05, "loss": 1.4199, "step": 7425 }, { "epoch": 0.8209491188332136, "grad_norm": 0.32421875, "learning_rate": 1.88947462307814e-05, "loss": 1.4539, "step": 7430 }, { "epoch": 0.8215015744986465, "grad_norm": 0.306640625, "learning_rate": 1.8782082647114962e-05, "loss": 1.3607, "step": 7435 }, { "epoch": 0.8220540301640793, "grad_norm": 0.3125, "learning_rate": 1.8669721134627748e-05, "loss": 1.3939, "step": 7440 }, { "epoch": 0.8226064858295122, "grad_norm": 0.30859375, "learning_rate": 1.855766211122234e-05, "loss": 1.3466, "step": 7445 }, { "epoch": 0.8231589414949451, "grad_norm": 0.298828125, "learning_rate": 1.8445905993676183e-05, "loss": 1.4769, "step": 7450 }, { "epoch": 0.8237113971603779, "grad_norm": 0.31640625, "learning_rate": 1.8334453197640224e-05, "loss": 1.4343, "step": 7455 }, { "epoch": 0.8242638528258107, "grad_norm": 0.330078125, "learning_rate": 1.8223304137637243e-05, "loss": 1.4035, "step": 7460 }, { "epoch": 0.8248163084912435, "grad_norm": 0.306640625, "learning_rate": 1.8112459227060386e-05, "loss": 1.4191, "step": 7465 }, { "epoch": 0.8253687641566765, "grad_norm": 0.298828125, "learning_rate": 1.8001918878171532e-05, "loss": 1.3589, "step": 7470 }, { "epoch": 0.8259212198221093, "grad_norm": 0.30078125, "learning_rate": 1.789168350209983e-05, "loss": 1.4379, "step": 7475 }, { "epoch": 0.8264736754875421, "grad_norm": 0.30859375, "learning_rate": 1.778175350884016e-05, "loss": 1.4714, "step": 7480 }, { "epoch": 0.827026131152975, "grad_norm": 0.326171875, "learning_rate": 1.767212930725163e-05, "loss": 1.3933, "step": 7485 }, { "epoch": 0.8275785868184078, "grad_norm": 0.306640625, "learning_rate": 1.756281130505595e-05, "loss": 1.3793, "step": 7490 }, { "epoch": 0.8281310424838406, "grad_norm": 0.345703125, "learning_rate": 1.745379990883603e-05, "loss": 1.4228, "step": 7495 }, { "epoch": 0.8286834981492736, "grad_norm": 0.30859375, "learning_rate": 1.7345095524034484e-05, "loss": 1.4343, "step": 7500 }, { "epoch": 0.8292359538147064, "grad_norm": 0.29296875, "learning_rate": 1.723669855495199e-05, "loss": 1.3563, "step": 7505 }, { "epoch": 0.8297884094801392, "grad_norm": 0.31640625, "learning_rate": 1.712860940474591e-05, "loss": 1.3763, "step": 7510 }, { "epoch": 0.830340865145572, "grad_norm": 0.30859375, "learning_rate": 1.702082847542873e-05, "loss": 1.4033, "step": 7515 }, { "epoch": 0.8308933208110049, "grad_norm": 0.306640625, "learning_rate": 1.6913356167866578e-05, "loss": 1.3725, "step": 7520 }, { "epoch": 0.8314457764764378, "grad_norm": 0.3359375, "learning_rate": 1.680619288177775e-05, "loss": 1.4081, "step": 7525 }, { "epoch": 0.8319982321418706, "grad_norm": 0.328125, "learning_rate": 1.6699339015731185e-05, "loss": 1.4427, "step": 7530 }, { "epoch": 0.8325506878073035, "grad_norm": 0.2890625, "learning_rate": 1.659279496714503e-05, "loss": 1.3909, "step": 7535 }, { "epoch": 0.8331031434727363, "grad_norm": 0.3359375, "learning_rate": 1.648656113228515e-05, "loss": 1.4603, "step": 7540 }, { "epoch": 0.8336555991381691, "grad_norm": 0.34765625, "learning_rate": 1.6380637906263574e-05, "loss": 1.3975, "step": 7545 }, { "epoch": 0.834208054803602, "grad_norm": 0.384765625, "learning_rate": 1.6275025683037148e-05, "loss": 1.3924, "step": 7550 }, { "epoch": 0.8347605104690349, "grad_norm": 0.31640625, "learning_rate": 1.616972485540601e-05, "loss": 1.4571, "step": 7555 }, { "epoch": 0.8353129661344677, "grad_norm": 0.427734375, "learning_rate": 1.6064735815012145e-05, "loss": 1.4475, "step": 7560 }, { "epoch": 0.8358654217999005, "grad_norm": 0.3203125, "learning_rate": 1.5960058952337887e-05, "loss": 1.3907, "step": 7565 }, { "epoch": 0.8364178774653334, "grad_norm": 0.330078125, "learning_rate": 1.585569465670451e-05, "loss": 1.4703, "step": 7570 }, { "epoch": 0.8369703331307663, "grad_norm": 0.30859375, "learning_rate": 1.575164331627079e-05, "loss": 1.3927, "step": 7575 }, { "epoch": 0.8375227887961991, "grad_norm": 0.3125, "learning_rate": 1.5647905318031507e-05, "loss": 1.3797, "step": 7580 }, { "epoch": 0.838075244461632, "grad_norm": 0.31640625, "learning_rate": 1.554448104781606e-05, "loss": 1.4504, "step": 7585 }, { "epoch": 0.8386277001270648, "grad_norm": 0.318359375, "learning_rate": 1.5441370890287022e-05, "loss": 1.3753, "step": 7590 }, { "epoch": 0.8391801557924976, "grad_norm": 0.294921875, "learning_rate": 1.5338575228938614e-05, "loss": 1.3978, "step": 7595 }, { "epoch": 0.8397326114579305, "grad_norm": 0.333984375, "learning_rate": 1.523609444609545e-05, "loss": 1.4968, "step": 7600 }, { "epoch": 0.8402850671233634, "grad_norm": 0.296875, "learning_rate": 1.5133928922911012e-05, "loss": 1.34, "step": 7605 }, { "epoch": 0.8408375227887962, "grad_norm": 0.328125, "learning_rate": 1.5032079039366209e-05, "loss": 1.4587, "step": 7610 }, { "epoch": 0.841389978454229, "grad_norm": 0.310546875, "learning_rate": 1.4930545174268062e-05, "loss": 1.4655, "step": 7615 }, { "epoch": 0.8419424341196619, "grad_norm": 0.306640625, "learning_rate": 1.4829327705248164e-05, "loss": 1.3438, "step": 7620 }, { "epoch": 0.8424948897850948, "grad_norm": 0.3203125, "learning_rate": 1.4728427008761402e-05, "loss": 1.469, "step": 7625 }, { "epoch": 0.8430473454505276, "grad_norm": 0.318359375, "learning_rate": 1.4627843460084478e-05, "loss": 1.4929, "step": 7630 }, { "epoch": 0.8435998011159604, "grad_norm": 0.318359375, "learning_rate": 1.4527577433314532e-05, "loss": 1.3739, "step": 7635 }, { "epoch": 0.8441522567813933, "grad_norm": 0.3203125, "learning_rate": 1.4427629301367773e-05, "loss": 1.4415, "step": 7640 }, { "epoch": 0.8447047124468261, "grad_norm": 0.3046875, "learning_rate": 1.4327999435978068e-05, "loss": 1.388, "step": 7645 }, { "epoch": 0.845257168112259, "grad_norm": 0.3515625, "learning_rate": 1.422868820769554e-05, "loss": 1.439, "step": 7650 }, { "epoch": 0.8458096237776919, "grad_norm": 0.30859375, "learning_rate": 1.4129695985885228e-05, "loss": 1.3525, "step": 7655 }, { "epoch": 0.8463620794431247, "grad_norm": 0.302734375, "learning_rate": 1.403102313872573e-05, "loss": 1.4214, "step": 7660 }, { "epoch": 0.8469145351085575, "grad_norm": 0.3125, "learning_rate": 1.3932670033207784e-05, "loss": 1.4142, "step": 7665 }, { "epoch": 0.8474669907739903, "grad_norm": 0.330078125, "learning_rate": 1.3834637035132903e-05, "loss": 1.4412, "step": 7670 }, { "epoch": 0.8480194464394233, "grad_norm": 0.3125, "learning_rate": 1.373692450911207e-05, "loss": 1.3676, "step": 7675 }, { "epoch": 0.8485719021048561, "grad_norm": 0.310546875, "learning_rate": 1.3639532818564327e-05, "loss": 1.305, "step": 7680 }, { "epoch": 0.8491243577702889, "grad_norm": 0.310546875, "learning_rate": 1.3542462325715443e-05, "loss": 1.3647, "step": 7685 }, { "epoch": 0.8496768134357218, "grad_norm": 0.30859375, "learning_rate": 1.344571339159657e-05, "loss": 1.4363, "step": 7690 }, { "epoch": 0.8502292691011546, "grad_norm": 0.34375, "learning_rate": 1.3349286376042914e-05, "loss": 1.3643, "step": 7695 }, { "epoch": 0.8507817247665875, "grad_norm": 0.310546875, "learning_rate": 1.3253181637692324e-05, "loss": 1.374, "step": 7700 }, { "epoch": 0.8513341804320204, "grad_norm": 0.341796875, "learning_rate": 1.3157399533984082e-05, "loss": 1.4333, "step": 7705 }, { "epoch": 0.8518866360974532, "grad_norm": 0.32421875, "learning_rate": 1.3061940421157459e-05, "loss": 1.3429, "step": 7710 }, { "epoch": 0.852439091762886, "grad_norm": 0.36328125, "learning_rate": 1.2966804654250465e-05, "loss": 1.3501, "step": 7715 }, { "epoch": 0.8529915474283188, "grad_norm": 0.302734375, "learning_rate": 1.287199258709848e-05, "loss": 1.3569, "step": 7720 }, { "epoch": 0.8535440030937518, "grad_norm": 0.3359375, "learning_rate": 1.2777504572332976e-05, "loss": 1.3849, "step": 7725 }, { "epoch": 0.8540964587591846, "grad_norm": 0.302734375, "learning_rate": 1.2683340961380163e-05, "loss": 1.441, "step": 7730 }, { "epoch": 0.8546489144246174, "grad_norm": 0.310546875, "learning_rate": 1.2589502104459738e-05, "loss": 1.4048, "step": 7735 }, { "epoch": 0.8552013700900503, "grad_norm": 0.310546875, "learning_rate": 1.249598835058352e-05, "loss": 1.3855, "step": 7740 }, { "epoch": 0.8557538257554831, "grad_norm": 0.330078125, "learning_rate": 1.2402800047554208e-05, "loss": 1.4787, "step": 7745 }, { "epoch": 0.856306281420916, "grad_norm": 0.337890625, "learning_rate": 1.2309937541964057e-05, "loss": 1.3971, "step": 7750 }, { "epoch": 0.8568587370863489, "grad_norm": 0.306640625, "learning_rate": 1.2217401179193556e-05, "loss": 1.433, "step": 7755 }, { "epoch": 0.8574111927517817, "grad_norm": 0.314453125, "learning_rate": 1.2125191303410221e-05, "loss": 1.3942, "step": 7760 }, { "epoch": 0.8579636484172145, "grad_norm": 0.31640625, "learning_rate": 1.2033308257567289e-05, "loss": 1.4165, "step": 7765 }, { "epoch": 0.8585161040826473, "grad_norm": 0.302734375, "learning_rate": 1.1941752383402394e-05, "loss": 1.4166, "step": 7770 }, { "epoch": 0.8590685597480802, "grad_norm": 0.30078125, "learning_rate": 1.1850524021436337e-05, "loss": 1.3896, "step": 7775 }, { "epoch": 0.8596210154135131, "grad_norm": 0.306640625, "learning_rate": 1.175962351097184e-05, "loss": 1.4108, "step": 7780 }, { "epoch": 0.8601734710789459, "grad_norm": 0.302734375, "learning_rate": 1.166905119009223e-05, "loss": 1.3806, "step": 7785 }, { "epoch": 0.8607259267443788, "grad_norm": 0.318359375, "learning_rate": 1.1578807395660207e-05, "loss": 1.476, "step": 7790 }, { "epoch": 0.8612783824098116, "grad_norm": 0.30859375, "learning_rate": 1.1488892463316615e-05, "loss": 1.3664, "step": 7795 }, { "epoch": 0.8618308380752444, "grad_norm": 0.318359375, "learning_rate": 1.1399306727479164e-05, "loss": 1.3496, "step": 7800 }, { "epoch": 0.8623832937406773, "grad_norm": 0.322265625, "learning_rate": 1.1310050521341198e-05, "loss": 1.4572, "step": 7805 }, { "epoch": 0.8629357494061102, "grad_norm": 0.326171875, "learning_rate": 1.1221124176870412e-05, "loss": 1.3888, "step": 7810 }, { "epoch": 0.863488205071543, "grad_norm": 0.306640625, "learning_rate": 1.1132528024807686e-05, "loss": 1.4096, "step": 7815 }, { "epoch": 0.8640406607369758, "grad_norm": 0.3203125, "learning_rate": 1.1044262394665872e-05, "loss": 1.4765, "step": 7820 }, { "epoch": 0.8645931164024087, "grad_norm": 0.298828125, "learning_rate": 1.0956327614728457e-05, "loss": 1.3902, "step": 7825 }, { "epoch": 0.8651455720678416, "grad_norm": 0.32421875, "learning_rate": 1.0868724012048438e-05, "loss": 1.4174, "step": 7830 }, { "epoch": 0.8656980277332744, "grad_norm": 0.322265625, "learning_rate": 1.078145191244706e-05, "loss": 1.3627, "step": 7835 }, { "epoch": 0.8662504833987072, "grad_norm": 0.302734375, "learning_rate": 1.069451164051264e-05, "loss": 1.3879, "step": 7840 }, { "epoch": 0.8668029390641401, "grad_norm": 0.30078125, "learning_rate": 1.0607903519599328e-05, "loss": 1.3847, "step": 7845 }, { "epoch": 0.8673553947295729, "grad_norm": 0.318359375, "learning_rate": 1.052162787182588e-05, "loss": 1.3491, "step": 7850 }, { "epoch": 0.8679078503950058, "grad_norm": 0.32421875, "learning_rate": 1.0435685018074548e-05, "loss": 1.4534, "step": 7855 }, { "epoch": 0.8684603060604387, "grad_norm": 0.3125, "learning_rate": 1.0350075277989812e-05, "loss": 1.4259, "step": 7860 }, { "epoch": 0.8690127617258715, "grad_norm": 0.296875, "learning_rate": 1.026479896997723e-05, "loss": 1.3976, "step": 7865 }, { "epoch": 0.8695652173913043, "grad_norm": 0.333984375, "learning_rate": 1.0179856411202204e-05, "loss": 1.4493, "step": 7870 }, { "epoch": 0.8701176730567372, "grad_norm": 0.302734375, "learning_rate": 1.0095247917588869e-05, "loss": 1.4796, "step": 7875 }, { "epoch": 0.8706701287221701, "grad_norm": 0.3125, "learning_rate": 1.0010973803818857e-05, "loss": 1.3621, "step": 7880 }, { "epoch": 0.8712225843876029, "grad_norm": 0.306640625, "learning_rate": 9.927034383330159e-06, "loss": 1.3101, "step": 7885 }, { "epoch": 0.8717750400530357, "grad_norm": 0.298828125, "learning_rate": 9.843429968315943e-06, "loss": 1.4251, "step": 7890 }, { "epoch": 0.8723274957184686, "grad_norm": 0.33203125, "learning_rate": 9.760160869723456e-06, "loss": 1.5168, "step": 7895 }, { "epoch": 0.8728799513839014, "grad_norm": 0.322265625, "learning_rate": 9.677227397252708e-06, "loss": 1.4325, "step": 7900 }, { "epoch": 0.8734324070493343, "grad_norm": 0.32421875, "learning_rate": 9.594629859355519e-06, "loss": 1.4289, "step": 7905 }, { "epoch": 0.8739848627147672, "grad_norm": 0.3203125, "learning_rate": 9.512368563234241e-06, "loss": 1.4303, "step": 7910 }, { "epoch": 0.8745373183802, "grad_norm": 0.3203125, "learning_rate": 9.430443814840662e-06, "loss": 1.4177, "step": 7915 }, { "epoch": 0.8750897740456328, "grad_norm": 0.30859375, "learning_rate": 9.348855918874844e-06, "loss": 1.4034, "step": 7920 }, { "epoch": 0.8756422297110656, "grad_norm": 0.302734375, "learning_rate": 9.267605178784033e-06, "loss": 1.442, "step": 7925 }, { "epoch": 0.8761946853764986, "grad_norm": 0.30859375, "learning_rate": 9.186691896761479e-06, "loss": 1.313, "step": 7930 }, { "epoch": 0.8767471410419314, "grad_norm": 0.296875, "learning_rate": 9.106116373745332e-06, "loss": 1.4715, "step": 7935 }, { "epoch": 0.8772995967073642, "grad_norm": 0.322265625, "learning_rate": 9.025878909417552e-06, "loss": 1.4123, "step": 7940 }, { "epoch": 0.8778520523727971, "grad_norm": 0.314453125, "learning_rate": 8.94597980220273e-06, "loss": 1.3528, "step": 7945 }, { "epoch": 0.8784045080382299, "grad_norm": 0.310546875, "learning_rate": 8.866419349267064e-06, "loss": 1.4126, "step": 7950 }, { "epoch": 0.8789569637036628, "grad_norm": 0.322265625, "learning_rate": 8.787197846517148e-06, "loss": 1.4004, "step": 7955 }, { "epoch": 0.8795094193690957, "grad_norm": 0.302734375, "learning_rate": 8.70831558859897e-06, "loss": 1.4267, "step": 7960 }, { "epoch": 0.8800618750345285, "grad_norm": 0.333984375, "learning_rate": 8.629772868896779e-06, "loss": 1.3844, "step": 7965 }, { "epoch": 0.8806143306999613, "grad_norm": 0.328125, "learning_rate": 8.55156997953197e-06, "loss": 1.3774, "step": 7970 }, { "epoch": 0.8811667863653941, "grad_norm": 0.32421875, "learning_rate": 8.473707211362026e-06, "loss": 1.4431, "step": 7975 }, { "epoch": 0.8817192420308271, "grad_norm": 0.330078125, "learning_rate": 8.396184853979416e-06, "loss": 1.3998, "step": 7980 }, { "epoch": 0.8822716976962599, "grad_norm": 0.310546875, "learning_rate": 8.319003195710574e-06, "loss": 1.4928, "step": 7985 }, { "epoch": 0.8828241533616927, "grad_norm": 0.306640625, "learning_rate": 8.242162523614716e-06, "loss": 1.3697, "step": 7990 }, { "epoch": 0.8833766090271256, "grad_norm": 0.296875, "learning_rate": 8.165663123482903e-06, "loss": 1.3745, "step": 7995 }, { "epoch": 0.8839290646925584, "grad_norm": 0.30859375, "learning_rate": 8.089505279836873e-06, "loss": 1.4191, "step": 8000 }, { "epoch": 0.8844815203579913, "grad_norm": 0.3125, "learning_rate": 8.013689275928037e-06, "loss": 1.3809, "step": 8005 }, { "epoch": 0.8850339760234242, "grad_norm": 0.365234375, "learning_rate": 7.938215393736414e-06, "loss": 1.3693, "step": 8010 }, { "epoch": 0.885586431688857, "grad_norm": 0.341796875, "learning_rate": 7.86308391396956e-06, "loss": 1.4136, "step": 8015 }, { "epoch": 0.8861388873542898, "grad_norm": 0.296875, "learning_rate": 7.788295116061584e-06, "loss": 1.3523, "step": 8020 }, { "epoch": 0.8866913430197226, "grad_norm": 0.326171875, "learning_rate": 7.713849278172047e-06, "loss": 1.3841, "step": 8025 }, { "epoch": 0.8872437986851556, "grad_norm": 0.306640625, "learning_rate": 7.639746677184945e-06, "loss": 1.46, "step": 8030 }, { "epoch": 0.8877962543505884, "grad_norm": 0.314453125, "learning_rate": 7.5659875887076905e-06, "loss": 1.4176, "step": 8035 }, { "epoch": 0.8883487100160212, "grad_norm": 0.328125, "learning_rate": 7.492572287070088e-06, "loss": 1.4437, "step": 8040 }, { "epoch": 0.888901165681454, "grad_norm": 0.3359375, "learning_rate": 7.419501045323296e-06, "loss": 1.478, "step": 8045 }, { "epoch": 0.8894536213468869, "grad_norm": 0.31640625, "learning_rate": 7.346774135238832e-06, "loss": 1.3829, "step": 8050 }, { "epoch": 0.8900060770123197, "grad_norm": 0.30078125, "learning_rate": 7.274391827307547e-06, "loss": 1.4267, "step": 8055 }, { "epoch": 0.8905585326777526, "grad_norm": 0.291015625, "learning_rate": 7.202354390738608e-06, "loss": 1.3574, "step": 8060 }, { "epoch": 0.8911109883431855, "grad_norm": 0.328125, "learning_rate": 7.130662093458529e-06, "loss": 1.5083, "step": 8065 }, { "epoch": 0.8916634440086183, "grad_norm": 0.298828125, "learning_rate": 7.059315202110173e-06, "loss": 1.483, "step": 8070 }, { "epoch": 0.8922158996740511, "grad_norm": 0.318359375, "learning_rate": 6.98831398205172e-06, "loss": 1.4793, "step": 8075 }, { "epoch": 0.892768355339484, "grad_norm": 0.3203125, "learning_rate": 6.917658697355722e-06, "loss": 1.4368, "step": 8080 }, { "epoch": 0.8933208110049169, "grad_norm": 0.3203125, "learning_rate": 6.8473496108080845e-06, "loss": 1.3433, "step": 8085 }, { "epoch": 0.8938732666703497, "grad_norm": 0.3046875, "learning_rate": 6.777386983907152e-06, "loss": 1.4376, "step": 8090 }, { "epoch": 0.8944257223357825, "grad_norm": 0.31640625, "learning_rate": 6.7077710768626455e-06, "loss": 1.5059, "step": 8095 }, { "epoch": 0.8949781780012154, "grad_norm": 0.302734375, "learning_rate": 6.638502148594772e-06, "loss": 1.4354, "step": 8100 }, { "epoch": 0.8955306336666482, "grad_norm": 0.296875, "learning_rate": 6.5695804567332044e-06, "loss": 1.4234, "step": 8105 }, { "epoch": 0.8960830893320811, "grad_norm": 0.33203125, "learning_rate": 6.501006257616205e-06, "loss": 1.4147, "step": 8110 }, { "epoch": 0.896635544997514, "grad_norm": 0.33203125, "learning_rate": 6.432779806289535e-06, "loss": 1.4996, "step": 8115 }, { "epoch": 0.8971880006629468, "grad_norm": 0.3125, "learning_rate": 6.364901356505648e-06, "loss": 1.36, "step": 8120 }, { "epoch": 0.8977404563283796, "grad_norm": 0.318359375, "learning_rate": 6.297371160722676e-06, "loss": 1.397, "step": 8125 }, { "epoch": 0.8982929119938124, "grad_norm": 0.326171875, "learning_rate": 6.230189470103498e-06, "loss": 1.4007, "step": 8130 }, { "epoch": 0.8988453676592454, "grad_norm": 0.3125, "learning_rate": 6.163356534514808e-06, "loss": 1.4182, "step": 8135 }, { "epoch": 0.8993978233246782, "grad_norm": 0.314453125, "learning_rate": 6.096872602526182e-06, "loss": 1.4674, "step": 8140 }, { "epoch": 0.899950278990111, "grad_norm": 0.310546875, "learning_rate": 6.030737921409169e-06, "loss": 1.3817, "step": 8145 }, { "epoch": 0.9005027346555439, "grad_norm": 0.314453125, "learning_rate": 5.964952737136353e-06, "loss": 1.366, "step": 8150 }, { "epoch": 0.9010551903209767, "grad_norm": 0.3046875, "learning_rate": 5.899517294380441e-06, "loss": 1.3948, "step": 8155 }, { "epoch": 0.9016076459864096, "grad_norm": 0.314453125, "learning_rate": 5.834431836513388e-06, "loss": 1.4826, "step": 8160 }, { "epoch": 0.9021601016518425, "grad_norm": 0.3359375, "learning_rate": 5.769696605605379e-06, "loss": 1.4108, "step": 8165 }, { "epoch": 0.9027125573172753, "grad_norm": 0.306640625, "learning_rate": 5.705311842424133e-06, "loss": 1.444, "step": 8170 }, { "epoch": 0.9032650129827081, "grad_norm": 0.326171875, "learning_rate": 5.641277786433796e-06, "loss": 1.3812, "step": 8175 }, { "epoch": 0.9038174686481409, "grad_norm": 0.3203125, "learning_rate": 5.577594675794162e-06, "loss": 1.451, "step": 8180 }, { "epoch": 0.9043699243135739, "grad_norm": 0.30078125, "learning_rate": 5.514262747359778e-06, "loss": 1.3467, "step": 8185 }, { "epoch": 0.9049223799790067, "grad_norm": 0.310546875, "learning_rate": 5.451282236679045e-06, "loss": 1.3463, "step": 8190 }, { "epoch": 0.9054748356444395, "grad_norm": 0.322265625, "learning_rate": 5.388653377993324e-06, "loss": 1.415, "step": 8195 }, { "epoch": 0.9060272913098724, "grad_norm": 0.32421875, "learning_rate": 5.326376404236133e-06, "loss": 1.4425, "step": 8200 }, { "epoch": 0.9065797469753052, "grad_norm": 0.333984375, "learning_rate": 5.264451547032212e-06, "loss": 1.2929, "step": 8205 }, { "epoch": 0.9071322026407381, "grad_norm": 0.30078125, "learning_rate": 5.202879036696662e-06, "loss": 1.4322, "step": 8210 }, { "epoch": 0.907684658306171, "grad_norm": 0.32421875, "learning_rate": 5.141659102234131e-06, "loss": 1.3896, "step": 8215 }, { "epoch": 0.9082371139716038, "grad_norm": 0.30859375, "learning_rate": 5.080791971337972e-06, "loss": 1.3918, "step": 8220 }, { "epoch": 0.9087895696370366, "grad_norm": 0.318359375, "learning_rate": 5.020277870389312e-06, "loss": 1.3893, "step": 8225 }, { "epoch": 0.9093420253024694, "grad_norm": 0.330078125, "learning_rate": 4.960117024456323e-06, "loss": 1.4313, "step": 8230 }, { "epoch": 0.9098944809679024, "grad_norm": 0.310546875, "learning_rate": 4.9003096572932785e-06, "loss": 1.4205, "step": 8235 }, { "epoch": 0.9104469366333352, "grad_norm": 0.32421875, "learning_rate": 4.840855991339799e-06, "loss": 1.3035, "step": 8240 }, { "epoch": 0.910999392298768, "grad_norm": 0.30078125, "learning_rate": 4.781756247719982e-06, "loss": 1.4739, "step": 8245 }, { "epoch": 0.9115518479642009, "grad_norm": 0.294921875, "learning_rate": 4.7230106462415876e-06, "loss": 1.4569, "step": 8250 }, { "epoch": 0.9121043036296337, "grad_norm": 0.32421875, "learning_rate": 4.6646194053952656e-06, "loss": 1.4513, "step": 8255 }, { "epoch": 0.9126567592950666, "grad_norm": 0.31640625, "learning_rate": 4.6065827423536375e-06, "loss": 1.5213, "step": 8260 }, { "epoch": 0.9132092149604994, "grad_norm": 0.328125, "learning_rate": 4.548900872970607e-06, "loss": 1.4524, "step": 8265 }, { "epoch": 0.9137616706259323, "grad_norm": 0.328125, "learning_rate": 4.491574011780497e-06, "loss": 1.446, "step": 8270 }, { "epoch": 0.9143141262913651, "grad_norm": 0.314453125, "learning_rate": 4.434602371997243e-06, "loss": 1.4165, "step": 8275 }, { "epoch": 0.9148665819567979, "grad_norm": 0.3125, "learning_rate": 4.3779861655136255e-06, "loss": 1.4219, "step": 8280 }, { "epoch": 0.9154190376222309, "grad_norm": 0.310546875, "learning_rate": 4.321725602900473e-06, "loss": 1.4697, "step": 8285 }, { "epoch": 0.9159714932876637, "grad_norm": 0.310546875, "learning_rate": 4.265820893405892e-06, "loss": 1.3736, "step": 8290 }, { "epoch": 0.9165239489530965, "grad_norm": 0.302734375, "learning_rate": 4.210272244954449e-06, "loss": 1.4606, "step": 8295 }, { "epoch": 0.9170764046185294, "grad_norm": 0.314453125, "learning_rate": 4.1550798641464605e-06, "loss": 1.3963, "step": 8300 }, { "epoch": 0.9176288602839622, "grad_norm": 0.3203125, "learning_rate": 4.100243956257144e-06, "loss": 1.3756, "step": 8305 }, { "epoch": 0.9181813159493951, "grad_norm": 0.3203125, "learning_rate": 4.045764725235956e-06, "loss": 1.4139, "step": 8310 }, { "epoch": 0.9187337716148279, "grad_norm": 0.3125, "learning_rate": 3.991642373705695e-06, "loss": 1.3603, "step": 8315 }, { "epoch": 0.9192862272802608, "grad_norm": 0.31640625, "learning_rate": 3.937877102961918e-06, "loss": 1.4415, "step": 8320 }, { "epoch": 0.9198386829456936, "grad_norm": 0.3125, "learning_rate": 3.884469112972033e-06, "loss": 1.5217, "step": 8325 }, { "epoch": 0.9203911386111264, "grad_norm": 0.30859375, "learning_rate": 3.83141860237467e-06, "loss": 1.498, "step": 8330 }, { "epoch": 0.9209435942765593, "grad_norm": 0.310546875, "learning_rate": 3.7787257684788745e-06, "loss": 1.3353, "step": 8335 }, { "epoch": 0.9214960499419922, "grad_norm": 0.32421875, "learning_rate": 3.7263908072634025e-06, "loss": 1.432, "step": 8340 }, { "epoch": 0.922048505607425, "grad_norm": 0.302734375, "learning_rate": 3.6744139133759957e-06, "loss": 1.4104, "step": 8345 }, { "epoch": 0.9226009612728578, "grad_norm": 0.3359375, "learning_rate": 3.6227952801326404e-06, "loss": 1.505, "step": 8350 }, { "epoch": 0.9231534169382907, "grad_norm": 0.322265625, "learning_rate": 3.571535099516832e-06, "loss": 1.3811, "step": 8355 }, { "epoch": 0.9237058726037235, "grad_norm": 0.3125, "learning_rate": 3.520633562178932e-06, "loss": 1.379, "step": 8360 }, { "epoch": 0.9242583282691564, "grad_norm": 0.31640625, "learning_rate": 3.470090857435371e-06, "loss": 1.4132, "step": 8365 }, { "epoch": 0.9248107839345893, "grad_norm": 0.3515625, "learning_rate": 3.419907173268e-06, "loss": 1.4266, "step": 8370 }, { "epoch": 0.9253632396000221, "grad_norm": 0.3046875, "learning_rate": 3.3700826963233735e-06, "loss": 1.4106, "step": 8375 }, { "epoch": 0.9259156952654549, "grad_norm": 0.3125, "learning_rate": 3.320617611912069e-06, "loss": 1.478, "step": 8380 }, { "epoch": 0.9264681509308877, "grad_norm": 0.310546875, "learning_rate": 3.271512104007979e-06, "loss": 1.398, "step": 8385 }, { "epoch": 0.9270206065963207, "grad_norm": 0.3203125, "learning_rate": 3.2227663552476194e-06, "loss": 1.409, "step": 8390 }, { "epoch": 0.9275730622617535, "grad_norm": 0.302734375, "learning_rate": 3.174380546929501e-06, "loss": 1.3739, "step": 8395 }, { "epoch": 0.9281255179271863, "grad_norm": 0.3046875, "learning_rate": 3.1263548590133917e-06, "loss": 1.4024, "step": 8400 }, { "epoch": 0.9286779735926192, "grad_norm": 0.345703125, "learning_rate": 3.0786894701196777e-06, "loss": 1.3786, "step": 8405 }, { "epoch": 0.929230429258052, "grad_norm": 0.330078125, "learning_rate": 3.031384557528716e-06, "loss": 1.4136, "step": 8410 }, { "epoch": 0.9297828849234849, "grad_norm": 0.3125, "learning_rate": 2.9844402971801242e-06, "loss": 1.3836, "step": 8415 }, { "epoch": 0.9303353405889178, "grad_norm": 0.298828125, "learning_rate": 2.9378568636721835e-06, "loss": 1.3386, "step": 8420 }, { "epoch": 0.9308877962543506, "grad_norm": 0.30078125, "learning_rate": 2.8916344302611586e-06, "loss": 1.3776, "step": 8425 }, { "epoch": 0.9314402519197834, "grad_norm": 0.302734375, "learning_rate": 2.845773168860644e-06, "loss": 1.3683, "step": 8430 }, { "epoch": 0.9319927075852162, "grad_norm": 0.3125, "learning_rate": 2.800273250040952e-06, "loss": 1.3795, "step": 8435 }, { "epoch": 0.9325451632506492, "grad_norm": 0.322265625, "learning_rate": 2.755134843028462e-06, "loss": 1.5185, "step": 8440 }, { "epoch": 0.933097618916082, "grad_norm": 0.310546875, "learning_rate": 2.710358115705003e-06, "loss": 1.355, "step": 8445 }, { "epoch": 0.9336500745815148, "grad_norm": 0.357421875, "learning_rate": 2.6659432346072156e-06, "loss": 1.4586, "step": 8450 }, { "epoch": 0.9342025302469477, "grad_norm": 0.314453125, "learning_rate": 2.6218903649259163e-06, "loss": 1.3691, "step": 8455 }, { "epoch": 0.9347549859123805, "grad_norm": 0.30859375, "learning_rate": 2.578199670505532e-06, "loss": 1.4656, "step": 8460 }, { "epoch": 0.9353074415778134, "grad_norm": 0.330078125, "learning_rate": 2.5348713138434564e-06, "loss": 1.4454, "step": 8465 }, { "epoch": 0.9358598972432463, "grad_norm": 0.302734375, "learning_rate": 2.4919054560894383e-06, "loss": 1.3853, "step": 8470 }, { "epoch": 0.9364123529086791, "grad_norm": 0.31640625, "learning_rate": 2.4493022570450164e-06, "loss": 1.457, "step": 8475 }, { "epoch": 0.9369648085741119, "grad_norm": 0.30859375, "learning_rate": 2.4070618751628748e-06, "loss": 1.4484, "step": 8480 }, { "epoch": 0.9375172642395447, "grad_norm": 0.291015625, "learning_rate": 2.365184467546333e-06, "loss": 1.3214, "step": 8485 }, { "epoch": 0.9380697199049777, "grad_norm": 0.322265625, "learning_rate": 2.3236701899486566e-06, "loss": 1.4981, "step": 8490 }, { "epoch": 0.9386221755704105, "grad_norm": 0.306640625, "learning_rate": 2.28251919677257e-06, "loss": 1.343, "step": 8495 }, { "epoch": 0.9391746312358433, "grad_norm": 0.30859375, "learning_rate": 2.2417316410696333e-06, "loss": 1.4634, "step": 8500 }, { "epoch": 0.9397270869012762, "grad_norm": 0.361328125, "learning_rate": 2.2013076745396765e-06, "loss": 1.3891, "step": 8505 }, { "epoch": 0.940279542566709, "grad_norm": 0.31640625, "learning_rate": 2.161247447530268e-06, "loss": 1.418, "step": 8510 }, { "epoch": 0.9408319982321419, "grad_norm": 0.318359375, "learning_rate": 2.121551109036124e-06, "loss": 1.3871, "step": 8515 }, { "epoch": 0.9413844538975747, "grad_norm": 0.94921875, "learning_rate": 2.0822188066985214e-06, "loss": 1.3596, "step": 8520 }, { "epoch": 0.9419369095630076, "grad_norm": 0.32421875, "learning_rate": 2.043250686804865e-06, "loss": 1.4703, "step": 8525 }, { "epoch": 0.9424893652284404, "grad_norm": 0.302734375, "learning_rate": 2.004646894287987e-06, "loss": 1.4486, "step": 8530 }, { "epoch": 0.9430418208938732, "grad_norm": 0.318359375, "learning_rate": 1.9664075727257593e-06, "loss": 1.4389, "step": 8535 }, { "epoch": 0.9435942765593062, "grad_norm": 0.318359375, "learning_rate": 1.928532864340438e-06, "loss": 1.3897, "step": 8540 }, { "epoch": 0.944146732224739, "grad_norm": 0.31640625, "learning_rate": 1.891022909998208e-06, "loss": 1.361, "step": 8545 }, { "epoch": 0.9446991878901718, "grad_norm": 0.33203125, "learning_rate": 1.8538778492086407e-06, "loss": 1.3845, "step": 8550 }, { "epoch": 0.9452516435556046, "grad_norm": 0.30078125, "learning_rate": 1.8170978201241474e-06, "loss": 1.3632, "step": 8555 }, { "epoch": 0.9458040992210375, "grad_norm": 0.31640625, "learning_rate": 1.7806829595395147e-06, "loss": 1.4402, "step": 8560 }, { "epoch": 0.9463565548864704, "grad_norm": 0.298828125, "learning_rate": 1.7446334028913491e-06, "loss": 1.4234, "step": 8565 }, { "epoch": 0.9469090105519032, "grad_norm": 0.306640625, "learning_rate": 1.7089492842576106e-06, "loss": 1.3891, "step": 8570 }, { "epoch": 0.9474614662173361, "grad_norm": 0.326171875, "learning_rate": 1.6736307363570903e-06, "loss": 1.336, "step": 8575 }, { "epoch": 0.9480139218827689, "grad_norm": 0.306640625, "learning_rate": 1.638677890548912e-06, "loss": 1.4111, "step": 8580 }, { "epoch": 0.9485663775482017, "grad_norm": 0.30078125, "learning_rate": 1.6040908768320872e-06, "loss": 1.4412, "step": 8585 }, { "epoch": 0.9491188332136347, "grad_norm": 0.310546875, "learning_rate": 1.5698698238449716e-06, "loss": 1.3974, "step": 8590 }, { "epoch": 0.9496712888790675, "grad_norm": 0.322265625, "learning_rate": 1.5360148588648093e-06, "loss": 1.2959, "step": 8595 }, { "epoch": 0.9502237445445003, "grad_norm": 0.314453125, "learning_rate": 1.5025261078073005e-06, "loss": 1.4659, "step": 8600 }, { "epoch": 0.9507762002099331, "grad_norm": 0.298828125, "learning_rate": 1.469403695226057e-06, "loss": 1.4154, "step": 8605 }, { "epoch": 0.951328655875366, "grad_norm": 0.318359375, "learning_rate": 1.436647744312214e-06, "loss": 1.4045, "step": 8610 }, { "epoch": 0.9518811115407988, "grad_norm": 0.326171875, "learning_rate": 1.4042583768939298e-06, "loss": 1.358, "step": 8615 }, { "epoch": 0.9524335672062317, "grad_norm": 0.326171875, "learning_rate": 1.3722357134359099e-06, "loss": 1.3804, "step": 8620 }, { "epoch": 0.9529860228716646, "grad_norm": 0.34375, "learning_rate": 1.3405798730390273e-06, "loss": 1.4756, "step": 8625 }, { "epoch": 0.9535384785370974, "grad_norm": 0.306640625, "learning_rate": 1.3092909734398251e-06, "loss": 1.4696, "step": 8630 }, { "epoch": 0.9540909342025302, "grad_norm": 0.318359375, "learning_rate": 1.278369131010093e-06, "loss": 1.3755, "step": 8635 }, { "epoch": 0.954643389867963, "grad_norm": 0.3125, "learning_rate": 1.2478144607564469e-06, "loss": 1.419, "step": 8640 }, { "epoch": 0.955195845533396, "grad_norm": 0.31640625, "learning_rate": 1.2176270763198828e-06, "loss": 1.4191, "step": 8645 }, { "epoch": 0.9557483011988288, "grad_norm": 0.322265625, "learning_rate": 1.187807089975379e-06, "loss": 1.3157, "step": 8650 }, { "epoch": 0.9563007568642616, "grad_norm": 0.330078125, "learning_rate": 1.1583546126314293e-06, "loss": 1.3631, "step": 8655 }, { "epoch": 0.9568532125296945, "grad_norm": 0.369140625, "learning_rate": 1.1292697538296982e-06, "loss": 1.4025, "step": 8660 }, { "epoch": 0.9574056681951273, "grad_norm": 0.3203125, "learning_rate": 1.100552621744555e-06, "loss": 1.3619, "step": 8665 }, { "epoch": 0.9579581238605602, "grad_norm": 0.3125, "learning_rate": 1.0722033231826967e-06, "loss": 1.4364, "step": 8670 }, { "epoch": 0.9585105795259931, "grad_norm": 0.3125, "learning_rate": 1.0442219635827587e-06, "loss": 1.4032, "step": 8675 }, { "epoch": 0.9590630351914259, "grad_norm": 0.34765625, "learning_rate": 1.016608647014916e-06, "loss": 1.4398, "step": 8680 }, { "epoch": 0.9596154908568587, "grad_norm": 0.302734375, "learning_rate": 9.893634761804827e-07, "loss": 1.4672, "step": 8685 }, { "epoch": 0.9601679465222915, "grad_norm": 0.310546875, "learning_rate": 9.624865524115346e-07, "loss": 1.3971, "step": 8690 }, { "epoch": 0.9607204021877245, "grad_norm": 0.318359375, "learning_rate": 9.359779756705544e-07, "loss": 1.5505, "step": 8695 }, { "epoch": 0.9612728578531573, "grad_norm": 0.3203125, "learning_rate": 9.098378445500322e-07, "loss": 1.3814, "step": 8700 }, { "epoch": 0.9618253135185901, "grad_norm": 0.333984375, "learning_rate": 8.840662562721314e-07, "loss": 1.5304, "step": 8705 }, { "epoch": 0.962377769184023, "grad_norm": 0.322265625, "learning_rate": 8.586633066882565e-07, "loss": 1.4123, "step": 8710 }, { "epoch": 0.9629302248494558, "grad_norm": 0.3125, "learning_rate": 8.336290902788091e-07, "loss": 1.421, "step": 8715 }, { "epoch": 0.9634826805148887, "grad_norm": 0.314453125, "learning_rate": 8.089637001527317e-07, "loss": 1.3782, "step": 8720 }, { "epoch": 0.9640351361803216, "grad_norm": 0.298828125, "learning_rate": 7.84667228047209e-07, "loss": 1.4276, "step": 8725 }, { "epoch": 0.9645875918457544, "grad_norm": 0.318359375, "learning_rate": 7.607397643273229e-07, "loss": 1.426, "step": 8730 }, { "epoch": 0.9651400475111872, "grad_norm": 0.333984375, "learning_rate": 7.371813979857312e-07, "loss": 1.4849, "step": 8735 }, { "epoch": 0.96569250317662, "grad_norm": 0.298828125, "learning_rate": 7.139922166422896e-07, "loss": 1.3483, "step": 8740 }, { "epoch": 0.966244958842053, "grad_norm": 0.322265625, "learning_rate": 6.911723065437858e-07, "loss": 1.406, "step": 8745 }, { "epoch": 0.9667974145074858, "grad_norm": 0.31640625, "learning_rate": 6.687217525635614e-07, "loss": 1.4094, "step": 8750 }, { "epoch": 0.9673498701729186, "grad_norm": 0.318359375, "learning_rate": 6.466406382012457e-07, "loss": 1.3523, "step": 8755 }, { "epoch": 0.9679023258383515, "grad_norm": 0.3203125, "learning_rate": 6.249290455824231e-07, "loss": 1.3543, "step": 8760 }, { "epoch": 0.9684547815037843, "grad_norm": 0.35546875, "learning_rate": 6.03587055458299e-07, "loss": 1.4777, "step": 8765 }, { "epoch": 0.9690072371692172, "grad_norm": 0.3203125, "learning_rate": 5.826147472054677e-07, "loss": 1.4154, "step": 8770 }, { "epoch": 0.96955969283465, "grad_norm": 0.3125, "learning_rate": 5.620121988255567e-07, "loss": 1.4028, "step": 8775 }, { "epoch": 0.9701121485000829, "grad_norm": 0.302734375, "learning_rate": 5.417794869449377e-07, "loss": 1.4488, "step": 8780 }, { "epoch": 0.9706646041655157, "grad_norm": 0.298828125, "learning_rate": 5.219166868145164e-07, "loss": 1.3658, "step": 8785 }, { "epoch": 0.9712170598309485, "grad_norm": 0.310546875, "learning_rate": 5.024238723093322e-07, "loss": 1.4321, "step": 8790 }, { "epoch": 0.9717695154963815, "grad_norm": 0.337890625, "learning_rate": 4.833011159284029e-07, "loss": 1.3371, "step": 8795 }, { "epoch": 0.9723219711618143, "grad_norm": 0.31640625, "learning_rate": 4.645484887943696e-07, "loss": 1.4278, "step": 8800 }, { "epoch": 0.9728744268272471, "grad_norm": 0.32421875, "learning_rate": 4.461660606532747e-07, "loss": 1.4344, "step": 8805 }, { "epoch": 0.97342688249268, "grad_norm": 0.283203125, "learning_rate": 4.281538998742951e-07, "loss": 1.4349, "step": 8810 }, { "epoch": 0.9739793381581128, "grad_norm": 0.30078125, "learning_rate": 4.1051207344946495e-07, "loss": 1.385, "step": 8815 }, { "epoch": 0.9745317938235457, "grad_norm": 0.318359375, "learning_rate": 3.9324064699348686e-07, "loss": 1.4262, "step": 8820 }, { "epoch": 0.9750842494889785, "grad_norm": 0.318359375, "learning_rate": 3.763396847433875e-07, "loss": 1.3442, "step": 8825 }, { "epoch": 0.9756367051544114, "grad_norm": 0.3046875, "learning_rate": 3.59809249558396e-07, "loss": 1.4551, "step": 8830 }, { "epoch": 0.9761891608198442, "grad_norm": 0.328125, "learning_rate": 3.436494029196102e-07, "loss": 1.3699, "step": 8835 }, { "epoch": 0.976741616485277, "grad_norm": 0.345703125, "learning_rate": 3.278602049298418e-07, "loss": 1.4721, "step": 8840 }, { "epoch": 0.97729407215071, "grad_norm": 0.30859375, "learning_rate": 3.1244171431332737e-07, "loss": 1.3806, "step": 8845 }, { "epoch": 0.9778465278161428, "grad_norm": 0.306640625, "learning_rate": 2.9739398841557295e-07, "loss": 1.4959, "step": 8850 }, { "epoch": 0.9783989834815756, "grad_norm": 0.310546875, "learning_rate": 2.8271708320309895e-07, "loss": 1.4017, "step": 8855 }, { "epoch": 0.9789514391470084, "grad_norm": 0.328125, "learning_rate": 2.6841105326324e-07, "loss": 1.4334, "step": 8860 }, { "epoch": 0.9795038948124413, "grad_norm": 0.3515625, "learning_rate": 2.544759518039674e-07, "loss": 1.3534, "step": 8865 }, { "epoch": 0.9800563504778742, "grad_norm": 0.294921875, "learning_rate": 2.409118306536229e-07, "loss": 1.4502, "step": 8870 }, { "epoch": 0.980608806143307, "grad_norm": 0.31640625, "learning_rate": 2.277187402608405e-07, "loss": 1.46, "step": 8875 }, { "epoch": 0.9811612618087399, "grad_norm": 0.31640625, "learning_rate": 2.1489672969423614e-07, "loss": 1.445, "step": 8880 }, { "epoch": 0.9817137174741727, "grad_norm": 0.33203125, "learning_rate": 2.0244584664229628e-07, "loss": 1.4572, "step": 8885 }, { "epoch": 0.9822661731396055, "grad_norm": 0.3046875, "learning_rate": 1.9036613741320043e-07, "loss": 1.5019, "step": 8890 }, { "epoch": 0.9828186288050383, "grad_norm": 0.31640625, "learning_rate": 1.7865764693461017e-07, "loss": 1.3375, "step": 8895 }, { "epoch": 0.9833710844704713, "grad_norm": 0.3125, "learning_rate": 1.6732041875354709e-07, "loss": 1.3856, "step": 8900 }, { "epoch": 0.9839235401359041, "grad_norm": 0.3203125, "learning_rate": 1.563544950361928e-07, "loss": 1.3956, "step": 8905 }, { "epoch": 0.9844759958013369, "grad_norm": 0.3046875, "learning_rate": 1.457599165677559e-07, "loss": 1.3132, "step": 8910 }, { "epoch": 0.9850284514667698, "grad_norm": 0.29296875, "learning_rate": 1.3553672275230523e-07, "loss": 1.3367, "step": 8915 }, { "epoch": 0.9855809071322026, "grad_norm": 0.306640625, "learning_rate": 1.2568495161264793e-07, "loss": 1.4318, "step": 8920 }, { "epoch": 0.9861333627976355, "grad_norm": 0.330078125, "learning_rate": 1.1620463979014062e-07, "loss": 1.4308, "step": 8925 }, { "epoch": 0.9866858184630684, "grad_norm": 0.326171875, "learning_rate": 1.070958225446006e-07, "loss": 1.4621, "step": 8930 }, { "epoch": 0.9872382741285012, "grad_norm": 0.3125, "learning_rate": 9.835853375418368e-08, "loss": 1.3236, "step": 8935 }, { "epoch": 0.987790729793934, "grad_norm": 0.3046875, "learning_rate": 8.999280591518444e-08, "loss": 1.4286, "step": 8940 }, { "epoch": 0.9883431854593668, "grad_norm": 0.283203125, "learning_rate": 8.199867014198059e-08, "loss": 1.4852, "step": 8945 }, { "epoch": 0.9888956411247998, "grad_norm": 0.318359375, "learning_rate": 7.437615616692206e-08, "loss": 1.4493, "step": 8950 }, { "epoch": 0.9894480967902326, "grad_norm": 0.3125, "learning_rate": 6.712529234016441e-08, "loss": 1.3923, "step": 8955 }, { "epoch": 0.9900005524556654, "grad_norm": 0.3046875, "learning_rate": 6.024610562962441e-08, "loss": 1.4233, "step": 8960 }, { "epoch": 0.9905530081210983, "grad_norm": 0.31640625, "learning_rate": 5.3738621620824657e-08, "loss": 1.4113, "step": 8965 }, { "epoch": 0.9911054637865311, "grad_norm": 0.318359375, "learning_rate": 4.7602864516849144e-08, "loss": 1.3821, "step": 8970 }, { "epoch": 0.991657919451964, "grad_norm": 0.31640625, "learning_rate": 4.183885713822111e-08, "loss": 1.4023, "step": 8975 }, { "epoch": 0.9922103751173968, "grad_norm": 0.30859375, "learning_rate": 3.6446620922825356e-08, "loss": 1.3899, "step": 8980 }, { "epoch": 0.9927628307828297, "grad_norm": 0.34375, "learning_rate": 3.142617592583052e-08, "loss": 1.4127, "step": 8985 }, { "epoch": 0.9933152864482625, "grad_norm": 0.3046875, "learning_rate": 2.677754081961137e-08, "loss": 1.4235, "step": 8990 }, { "epoch": 0.9938677421136953, "grad_norm": 0.318359375, "learning_rate": 2.2500732893704358e-08, "loss": 1.4802, "step": 8995 }, { "epoch": 0.9944201977791283, "grad_norm": 0.330078125, "learning_rate": 1.8595768054674444e-08, "loss": 1.3623, "step": 9000 }, { "epoch": 0.9949726534445611, "grad_norm": 0.310546875, "learning_rate": 1.506266082615948e-08, "loss": 1.3708, "step": 9005 }, { "epoch": 0.9955251091099939, "grad_norm": 0.326171875, "learning_rate": 1.1901424348714774e-08, "loss": 1.3615, "step": 9010 }, { "epoch": 0.9960775647754267, "grad_norm": 0.306640625, "learning_rate": 9.112070379835302e-09, "loss": 1.3391, "step": 9015 }, { "epoch": 0.9966300204408596, "grad_norm": 0.328125, "learning_rate": 6.6946092938668934e-09, "loss": 1.4416, "step": 9020 }, { "epoch": 0.9971824761062925, "grad_norm": 0.3203125, "learning_rate": 4.649050082006223e-09, "loss": 1.4236, "step": 9025 }, { "epoch": 0.9977349317717253, "grad_norm": 0.306640625, "learning_rate": 2.975400352211999e-09, "loss": 1.3989, "step": 9030 }, { "epoch": 0.9982873874371582, "grad_norm": 0.318359375, "learning_rate": 1.6736663292604704e-09, "loss": 1.3484, "step": 9035 }, { "epoch": 0.998839843102591, "grad_norm": 0.310546875, "learning_rate": 7.438528546344082e-10, "loss": 1.3898, "step": 9040 }, { "epoch": 0.9993922987680238, "grad_norm": 0.3046875, "learning_rate": 1.8596338656751145e-10, "loss": 1.3651, "step": 9045 }, { "epoch": 0.9999447544334568, "grad_norm": 0.369140625, "learning_rate": 0.0, "loss": 1.4435, "step": 9050 }, { "epoch": 0.9999447544334568, "eval_loss": 1.4126445055007935, "eval_runtime": 1823.1317, "eval_samples_per_second": 3.953, "eval_steps_per_second": 0.494, "step": 9050 }, { "epoch": 0.9999447544334568, "step": 9050, "total_flos": 6.363322205866033e+18, "train_loss": 1.4676292563538524, "train_runtime": 67905.3687, "train_samples_per_second": 1.066, "train_steps_per_second": 0.133 } ], "logging_steps": 5, "max_steps": 9050, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 6.363322205866033e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }