{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.999756275895686,
  "eval_steps": 500,
  "global_step": 2051,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0004874482086278333,
      "grad_norm": 0.3594866991043091,
      "learning_rate": 2e-05,
      "loss": 1.5386,
      "step": 1
    },
    {
      "epoch": 0.0009748964172556666,
      "grad_norm": 0.3198799788951874,
      "learning_rate": 4e-05,
      "loss": 1.547,
      "step": 2
    },
    {
      "epoch": 0.0014623446258835,
      "grad_norm": 0.3148403763771057,
      "learning_rate": 6e-05,
      "loss": 1.3611,
      "step": 3
    },
    {
      "epoch": 0.0019497928345113332,
      "grad_norm": 0.3041052222251892,
      "learning_rate": 8e-05,
      "loss": 1.3769,
      "step": 4
    },
    {
      "epoch": 0.0024372410431391664,
      "grad_norm": 0.3455198109149933,
      "learning_rate": 0.0001,
      "loss": 1.5368,
      "step": 5
    },
    {
      "epoch": 0.002924689251767,
      "grad_norm": 0.33201417326927185,
      "learning_rate": 0.00012,
      "loss": 1.6664,
      "step": 6
    },
    {
      "epoch": 0.003412137460394833,
      "grad_norm": 0.30785706639289856,
      "learning_rate": 0.00014,
      "loss": 1.5451,
      "step": 7
    },
    {
      "epoch": 0.0038995856690226664,
      "grad_norm": 0.36058735847473145,
      "learning_rate": 0.00016,
      "loss": 1.5256,
      "step": 8
    },
    {
      "epoch": 0.004387033877650499,
      "grad_norm": 0.38930457830429077,
      "learning_rate": 0.00018,
      "loss": 1.4659,
      "step": 9
    },
    {
      "epoch": 0.004874482086278333,
      "grad_norm": 0.3993895649909973,
      "learning_rate": 0.0002,
      "loss": 1.5789,
      "step": 10
    },
    {
      "epoch": 0.005361930294906166,
      "grad_norm": 0.44698166847229004,
      "learning_rate": 0.00019999988153674691,
      "loss": 1.4087,
      "step": 11
    },
    {
      "epoch": 0.005849378503534,
      "grad_norm": 0.5011526942253113,
      "learning_rate": 0.0001999995261472683,
      "loss": 1.5465,
      "step": 12
    },
    {
      "epoch": 0.006336826712161832,
      "grad_norm": 0.3983900249004364,
      "learning_rate": 0.00019999893383240616,
      "loss": 1.5136,
      "step": 13
    },
    {
      "epoch": 0.006824274920789666,
      "grad_norm": 0.5348988771438599,
      "learning_rate": 0.00019999810459356388,
      "loss": 1.317,
      "step": 14
    },
    {
      "epoch": 0.007311723129417499,
      "grad_norm": 0.36835604906082153,
      "learning_rate": 0.00019999703843270612,
      "loss": 1.2642,
      "step": 15
    },
    {
      "epoch": 0.007799171338045333,
      "grad_norm": 0.5402485728263855,
      "learning_rate": 0.0001999957353523589,
      "loss": 1.4507,
      "step": 16
    },
    {
      "epoch": 0.008286619546673165,
      "grad_norm": 0.37945985794067383,
      "learning_rate": 0.00019999419535560956,
      "loss": 1.3151,
      "step": 17
    },
    {
      "epoch": 0.008774067755300999,
      "grad_norm": 0.401275634765625,
      "learning_rate": 0.00019999241844610678,
      "loss": 1.4396,
      "step": 18
    },
    {
      "epoch": 0.009261515963928832,
      "grad_norm": 0.30488014221191406,
      "learning_rate": 0.0001999904046280605,
      "loss": 1.3752,
      "step": 19
    },
    {
      "epoch": 0.009748964172556666,
      "grad_norm": 0.4626210629940033,
      "learning_rate": 0.00019998815390624204,
      "loss": 1.4507,
      "step": 20
    },
    {
      "epoch": 0.0102364123811845,
      "grad_norm": 0.45440447330474854,
      "learning_rate": 0.00019998566628598392,
      "loss": 1.3766,
      "step": 21
    },
    {
      "epoch": 0.010723860589812333,
      "grad_norm": 0.4724906384944916,
      "learning_rate": 0.00019998294177317995,
      "loss": 1.4923,
      "step": 22
    },
    {
      "epoch": 0.011211308798440166,
      "grad_norm": 0.38014769554138184,
      "learning_rate": 0.00019997998037428526,
      "loss": 1.3906,
      "step": 23
    },
    {
      "epoch": 0.011698757007068,
      "grad_norm": 0.41498008370399475,
      "learning_rate": 0.0001999767820963162,
      "loss": 1.3639,
      "step": 24
    },
    {
      "epoch": 0.012186205215695833,
      "grad_norm": 0.41264939308166504,
      "learning_rate": 0.00019997334694685028,
      "loss": 1.3366,
      "step": 25
    },
    {
      "epoch": 0.012673653424323665,
      "grad_norm": 0.2975503206253052,
      "learning_rate": 0.00019996967493402632,
      "loss": 1.2275,
      "step": 26
    },
    {
      "epoch": 0.013161101632951498,
      "grad_norm": 0.26862838864326477,
      "learning_rate": 0.0001999657660665443,
      "loss": 1.3027,
      "step": 27
    },
    {
      "epoch": 0.013648549841579332,
      "grad_norm": 0.3438441753387451,
      "learning_rate": 0.00019996162035366535,
      "loss": 1.2117,
      "step": 28
    },
    {
      "epoch": 0.014135998050207165,
      "grad_norm": 0.26122623682022095,
      "learning_rate": 0.00019995723780521173,
      "loss": 1.3813,
      "step": 29
    },
    {
      "epoch": 0.014623446258834999,
      "grad_norm": 0.37747007608413696,
      "learning_rate": 0.00019995261843156693,
      "loss": 1.3035,
      "step": 30
    },
    {
      "epoch": 0.015110894467462832,
      "grad_norm": 0.36326363682746887,
      "learning_rate": 0.0001999477622436754,
      "loss": 1.4155,
      "step": 31
    },
    {
      "epoch": 0.015598342676090666,
      "grad_norm": 0.25670093297958374,
      "learning_rate": 0.00019994266925304279,
      "loss": 1.3552,
      "step": 32
    },
    {
      "epoch": 0.0160857908847185,
      "grad_norm": 0.31610992550849915,
      "learning_rate": 0.0001999373394717357,
      "loss": 1.5197,
      "step": 33
    },
    {
      "epoch": 0.01657323909334633,
      "grad_norm": 0.3175552189350128,
      "learning_rate": 0.00019993177291238186,
      "loss": 1.3403,
      "step": 34
    },
    {
      "epoch": 0.017060687301974166,
      "grad_norm": 0.30159491300582886,
      "learning_rate": 0.00019992596958816984,
      "loss": 1.3754,
      "step": 35
    },
    {
      "epoch": 0.017548135510601998,
      "grad_norm": 0.2562412917613983,
      "learning_rate": 0.00019991992951284932,
      "loss": 1.2276,
      "step": 36
    },
    {
      "epoch": 0.018035583719229833,
      "grad_norm": 0.4372316598892212,
      "learning_rate": 0.0001999136527007308,
      "loss": 1.3332,
      "step": 37
    },
    {
      "epoch": 0.018523031927857665,
      "grad_norm": 0.2722485363483429,
      "learning_rate": 0.00019990713916668575,
      "loss": 1.3357,
      "step": 38
    },
    {
      "epoch": 0.0190104801364855,
      "grad_norm": 0.3781321346759796,
      "learning_rate": 0.00019990038892614642,
      "loss": 1.2892,
      "step": 39
    },
    {
      "epoch": 0.01949792834511333,
      "grad_norm": 0.3160063326358795,
      "learning_rate": 0.00019989340199510594,
      "loss": 1.2825,
      "step": 40
    },
    {
      "epoch": 0.019985376553741167,
      "grad_norm": 0.3378397226333618,
      "learning_rate": 0.00019988617839011816,
      "loss": 1.3731,
      "step": 41
    },
    {
      "epoch": 0.020472824762369,
      "grad_norm": 0.39730527997016907,
      "learning_rate": 0.00019987871812829778,
      "loss": 1.4537,
      "step": 42
    },
    {
      "epoch": 0.02096027297099683,
      "grad_norm": 0.3675004541873932,
      "learning_rate": 0.0001998710212273201,
      "loss": 1.3518,
      "step": 43
    },
    {
      "epoch": 0.021447721179624665,
      "grad_norm": 0.3094078302383423,
      "learning_rate": 0.00019986308770542115,
      "loss": 1.2811,
      "step": 44
    },
    {
      "epoch": 0.021935169388252497,
      "grad_norm": 0.3008870780467987,
      "learning_rate": 0.0001998549175813975,
      "loss": 1.2485,
      "step": 45
    },
    {
      "epoch": 0.022422617596880332,
      "grad_norm": 0.3380783200263977,
      "learning_rate": 0.00019984651087460637,
      "loss": 1.4253,
      "step": 46
    },
    {
      "epoch": 0.022910065805508164,
      "grad_norm": 0.29196858406066895,
      "learning_rate": 0.00019983786760496548,
      "loss": 1.3116,
      "step": 47
    },
    {
      "epoch": 0.023397514014136,
      "grad_norm": 0.2729412615299225,
      "learning_rate": 0.000199828987792953,
      "loss": 1.2858,
      "step": 48
    },
    {
      "epoch": 0.02388496222276383,
      "grad_norm": 0.28062939643859863,
      "learning_rate": 0.00019981987145960755,
      "loss": 1.2848,
      "step": 49
    },
    {
      "epoch": 0.024372410431391666,
      "grad_norm": 0.3024255335330963,
      "learning_rate": 0.00019981051862652822,
      "loss": 1.3813,
      "step": 50
    },
    {
      "epoch": 0.024859858640019498,
      "grad_norm": 0.2562197744846344,
      "learning_rate": 0.00019980092931587423,
      "loss": 1.3828,
      "step": 51
    },
    {
      "epoch": 0.02534730684864733,
      "grad_norm": 0.2957954406738281,
      "learning_rate": 0.00019979110355036533,
      "loss": 1.3334,
      "step": 52
    },
    {
      "epoch": 0.025834755057275165,
      "grad_norm": 0.33199235796928406,
      "learning_rate": 0.00019978104135328126,
      "loss": 1.2957,
      "step": 53
    },
    {
      "epoch": 0.026322203265902996,
      "grad_norm": 0.28630098700523376,
      "learning_rate": 0.0001997707427484621,
      "loss": 1.2411,
      "step": 54
    },
    {
      "epoch": 0.02680965147453083,
      "grad_norm": 0.2983357310295105,
      "learning_rate": 0.00019976020776030787,
      "loss": 1.3413,
      "step": 55
    },
    {
      "epoch": 0.027297099683158663,
      "grad_norm": 0.2890589237213135,
      "learning_rate": 0.0001997494364137789,
      "loss": 1.3473,
      "step": 56
    },
    {
      "epoch": 0.0277845478917865,
      "grad_norm": 0.34288671612739563,
      "learning_rate": 0.00019973842873439527,
      "loss": 1.431,
      "step": 57
    },
    {
      "epoch": 0.02827199610041433,
      "grad_norm": 0.356336385011673,
      "learning_rate": 0.00019972718474823707,
      "loss": 1.3814,
      "step": 58
    },
    {
      "epoch": 0.028759444309042165,
      "grad_norm": 0.29968544840812683,
      "learning_rate": 0.00019971570448194437,
      "loss": 1.2104,
      "step": 59
    },
    {
      "epoch": 0.029246892517669997,
      "grad_norm": 0.31277570128440857,
      "learning_rate": 0.0001997039879627169,
      "loss": 1.3111,
      "step": 60
    },
    {
      "epoch": 0.029734340726297832,
      "grad_norm": 0.29665568470954895,
      "learning_rate": 0.0001996920352183142,
      "loss": 1.2978,
      "step": 61
    },
    {
      "epoch": 0.030221788934925664,
      "grad_norm": 0.2886115312576294,
      "learning_rate": 0.00019967984627705548,
      "loss": 1.2012,
      "step": 62
    },
    {
      "epoch": 0.030709237143553496,
      "grad_norm": 0.3373067378997803,
      "learning_rate": 0.00019966742116781964,
      "loss": 1.2666,
      "step": 63
    },
    {
      "epoch": 0.03119668535218133,
      "grad_norm": 0.3190588355064392,
      "learning_rate": 0.00019965475992004503,
      "loss": 1.3503,
      "step": 64
    },
    {
      "epoch": 0.031684133560809166,
      "grad_norm": 0.290356308221817,
      "learning_rate": 0.00019964186256372945,
      "loss": 1.3847,
      "step": 65
    },
    {
      "epoch": 0.032171581769437,
      "grad_norm": 0.3749725818634033,
      "learning_rate": 0.0001996287291294302,
      "loss": 1.3672,
      "step": 66
    },
    {
      "epoch": 0.03265902997806483,
      "grad_norm": 0.2738218605518341,
      "learning_rate": 0.00019961535964826385,
      "loss": 1.347,
      "step": 67
    },
    {
      "epoch": 0.03314647818669266,
      "grad_norm": 0.23812803626060486,
      "learning_rate": 0.00019960175415190628,
      "loss": 1.2569,
      "step": 68
    },
    {
      "epoch": 0.0336339263953205,
      "grad_norm": 0.31040453910827637,
      "learning_rate": 0.0001995879126725925,
      "loss": 1.2219,
      "step": 69
    },
    {
      "epoch": 0.03412137460394833,
      "grad_norm": 0.35229820013046265,
      "learning_rate": 0.0001995738352431166,
      "loss": 1.293,
      "step": 70
    },
    {
      "epoch": 0.034608822812576163,
      "grad_norm": 0.22976358234882355,
      "learning_rate": 0.00019955952189683177,
      "loss": 1.1595,
      "step": 71
    },
    {
      "epoch": 0.035096271021203995,
      "grad_norm": 0.35664403438568115,
      "learning_rate": 0.00019954497266765016,
      "loss": 1.3777,
      "step": 72
    },
    {
      "epoch": 0.03558371922983183,
      "grad_norm": 0.28703221678733826,
      "learning_rate": 0.00019953018759004268,
      "loss": 1.4143,
      "step": 73
    },
    {
      "epoch": 0.036071167438459666,
      "grad_norm": 0.32665184140205383,
      "learning_rate": 0.0001995151666990392,
      "loss": 1.3514,
      "step": 74
    },
    {
      "epoch": 0.0365586156470875,
      "grad_norm": 0.2924026846885681,
      "learning_rate": 0.00019949991003022808,
      "loss": 1.2566,
      "step": 75
    },
    {
      "epoch": 0.03704606385571533,
      "grad_norm": 0.3736989498138428,
      "learning_rate": 0.00019948441761975645,
      "loss": 1.4126,
      "step": 76
    },
    {
      "epoch": 0.03753351206434316,
      "grad_norm": 0.268311470746994,
      "learning_rate": 0.00019946868950432997,
      "loss": 1.3878,
      "step": 77
    },
    {
      "epoch": 0.038020960272971,
      "grad_norm": 0.3075256049633026,
      "learning_rate": 0.00019945272572121267,
      "loss": 1.1759,
      "step": 78
    },
    {
      "epoch": 0.03850840848159883,
      "grad_norm": 0.28338623046875,
      "learning_rate": 0.00019943652630822703,
      "loss": 1.3394,
      "step": 79
    },
    {
      "epoch": 0.03899585669022666,
      "grad_norm": 0.2940373718738556,
      "learning_rate": 0.00019942009130375369,
      "loss": 1.3232,
      "step": 80
    },
    {
      "epoch": 0.039483304898854495,
      "grad_norm": 0.29318469762802124,
      "learning_rate": 0.0001994034207467316,
      "loss": 1.3109,
      "step": 81
    },
    {
      "epoch": 0.03997075310748233,
      "grad_norm": 0.3388112187385559,
      "learning_rate": 0.00019938651467665773,
      "loss": 1.1776,
      "step": 82
    },
    {
      "epoch": 0.040458201316110165,
      "grad_norm": 0.29703381657600403,
      "learning_rate": 0.00019936937313358696,
      "loss": 1.28,
      "step": 83
    },
    {
      "epoch": 0.040945649524738,
      "grad_norm": 0.3102991580963135,
      "learning_rate": 0.00019935199615813223,
      "loss": 1.2495,
      "step": 84
    },
    {
      "epoch": 0.04143309773336583,
      "grad_norm": 0.3132036328315735,
      "learning_rate": 0.00019933438379146414,
      "loss": 1.1491,
      "step": 85
    },
    {
      "epoch": 0.04192054594199366,
      "grad_norm": 0.37797728180885315,
      "learning_rate": 0.0001993165360753111,
      "loss": 1.3577,
      "step": 86
    },
    {
      "epoch": 0.0424079941506215,
      "grad_norm": 0.3102344274520874,
      "learning_rate": 0.00019929845305195906,
      "loss": 1.2842,
      "step": 87
    },
    {
      "epoch": 0.04289544235924933,
      "grad_norm": 0.2861989736557007,
      "learning_rate": 0.0001992801347642515,
      "loss": 1.2836,
      "step": 88
    },
    {
      "epoch": 0.04338289056787716,
      "grad_norm": 0.3444364666938782,
      "learning_rate": 0.00019926158125558932,
      "loss": 1.3017,
      "step": 89
    },
    {
      "epoch": 0.043870338776504994,
      "grad_norm": 0.3026193678379059,
      "learning_rate": 0.00019924279256993064,
      "loss": 1.1877,
      "step": 90
    },
    {
      "epoch": 0.04435778698513283,
      "grad_norm": 0.3032093942165375,
      "learning_rate": 0.00019922376875179093,
      "loss": 1.2327,
      "step": 91
    },
    {
      "epoch": 0.044845235193760664,
      "grad_norm": 0.2894906997680664,
      "learning_rate": 0.00019920450984624256,
      "loss": 1.1543,
      "step": 92
    },
    {
      "epoch": 0.045332683402388496,
      "grad_norm": 0.328391432762146,
      "learning_rate": 0.00019918501589891502,
      "loss": 1.1122,
      "step": 93
    },
    {
      "epoch": 0.04582013161101633,
      "grad_norm": 0.27859413623809814,
      "learning_rate": 0.00019916528695599465,
      "loss": 1.2589,
      "step": 94
    },
    {
      "epoch": 0.04630757981964416,
      "grad_norm": 0.32805296778678894,
      "learning_rate": 0.0001991453230642246,
      "loss": 1.3096,
      "step": 95
    },
    {
      "epoch": 0.046795028028272,
      "grad_norm": 0.3156702220439911,
      "learning_rate": 0.00019912512427090447,
      "loss": 1.2511,
      "step": 96
    },
    {
      "epoch": 0.04728247623689983,
      "grad_norm": 0.3225579261779785,
      "learning_rate": 0.0001991046906238907,
      "loss": 1.2565,
      "step": 97
    },
    {
      "epoch": 0.04776992444552766,
      "grad_norm": 0.3395773768424988,
      "learning_rate": 0.00019908402217159595,
      "loss": 1.2728,
      "step": 98
    },
    {
      "epoch": 0.04825737265415549,
      "grad_norm": 0.3359152674674988,
      "learning_rate": 0.00019906311896298923,
      "loss": 1.3422,
      "step": 99
    },
    {
      "epoch": 0.04874482086278333,
      "grad_norm": 0.3509308397769928,
      "learning_rate": 0.00019904198104759587,
      "loss": 1.3714,
      "step": 100
    },
    {
      "epoch": 0.049232269071411164,
      "grad_norm": 0.32388588786125183,
      "learning_rate": 0.00019902060847549718,
      "loss": 1.2591,
      "step": 101
    },
    {
      "epoch": 0.049719717280038996,
      "grad_norm": 0.29979950189590454,
      "learning_rate": 0.00019899900129733035,
      "loss": 1.3937,
      "step": 102
    },
    {
      "epoch": 0.05020716548866683,
      "grad_norm": 0.3444582223892212,
      "learning_rate": 0.00019897715956428862,
      "loss": 1.3148,
      "step": 103
    },
    {
      "epoch": 0.05069461369729466,
      "grad_norm": 0.31882134079933167,
      "learning_rate": 0.0001989550833281208,
      "loss": 1.2834,
      "step": 104
    },
    {
      "epoch": 0.0511820619059225,
      "grad_norm": 0.2893996834754944,
      "learning_rate": 0.00019893277264113136,
      "loss": 1.395,
      "step": 105
    },
    {
      "epoch": 0.05166951011455033,
      "grad_norm": 0.30966147780418396,
      "learning_rate": 0.00019891022755618018,
      "loss": 1.2128,
      "step": 106
    },
    {
      "epoch": 0.05215695832317816,
      "grad_norm": 0.2864265441894531,
      "learning_rate": 0.00019888744812668262,
      "loss": 1.4032,
      "step": 107
    },
    {
      "epoch": 0.05264440653180599,
      "grad_norm": 0.3411870300769806,
      "learning_rate": 0.00019886443440660917,
      "loss": 1.3424,
      "step": 108
    },
    {
      "epoch": 0.05313185474043383,
      "grad_norm": 0.29663193225860596,
      "learning_rate": 0.00019884118645048537,
      "loss": 1.2325,
      "step": 109
    },
    {
      "epoch": 0.05361930294906166,
      "grad_norm": 0.28539153933525085,
      "learning_rate": 0.00019881770431339187,
      "loss": 1.2694,
      "step": 110
    },
    {
      "epoch": 0.054106751157689495,
      "grad_norm": 0.32653146982192993,
      "learning_rate": 0.000198793988050964,
      "loss": 1.3857,
      "step": 111
    },
    {
      "epoch": 0.05459419936631733,
      "grad_norm": 0.32705414295196533,
      "learning_rate": 0.00019877003771939192,
      "loss": 1.305,
      "step": 112
    },
    {
      "epoch": 0.055081647574945165,
      "grad_norm": 0.334217369556427,
      "learning_rate": 0.00019874585337542033,
      "loss": 1.2222,
      "step": 113
    },
    {
      "epoch": 0.055569095783573,
      "grad_norm": 0.4032392203807831,
      "learning_rate": 0.0001987214350763483,
      "loss": 1.2944,
      "step": 114
    },
    {
      "epoch": 0.05605654399220083,
      "grad_norm": 0.36320260167121887,
      "learning_rate": 0.00019869678288002927,
      "loss": 1.3496,
      "step": 115
    },
    {
      "epoch": 0.05654399220082866,
      "grad_norm": 0.2804063856601715,
      "learning_rate": 0.00019867189684487092,
      "loss": 1.2681,
      "step": 116
    },
    {
      "epoch": 0.05703144040945649,
      "grad_norm": 0.2654062807559967,
      "learning_rate": 0.0001986467770298347,
      "loss": 1.2292,
      "step": 117
    },
    {
      "epoch": 0.05751888861808433,
      "grad_norm": 0.3031540811061859,
      "learning_rate": 0.00019862142349443622,
      "loss": 1.2431,
      "step": 118
    },
    {
      "epoch": 0.05800633682671216,
      "grad_norm": 0.3151783049106598,
      "learning_rate": 0.00019859583629874467,
      "loss": 1.2,
      "step": 119
    },
    {
      "epoch": 0.058493785035339994,
      "grad_norm": 0.3036620020866394,
      "learning_rate": 0.00019857001550338294,
      "loss": 1.2311,
      "step": 120
    },
    {
      "epoch": 0.058981233243967826,
      "grad_norm": 0.32396501302719116,
      "learning_rate": 0.00019854396116952735,
      "loss": 1.3063,
      "step": 121
    },
    {
      "epoch": 0.059468681452595665,
      "grad_norm": 0.33199331164360046,
      "learning_rate": 0.00019851767335890747,
      "loss": 1.1809,
      "step": 122
    },
    {
      "epoch": 0.059956129661223496,
      "grad_norm": 0.26451751589775085,
      "learning_rate": 0.00019849115213380612,
      "loss": 1.3042,
      "step": 123
    },
    {
      "epoch": 0.06044357786985133,
      "grad_norm": 0.35607317090034485,
      "learning_rate": 0.00019846439755705913,
      "loss": 1.1293,
      "step": 124
    },
    {
      "epoch": 0.06093102607847916,
      "grad_norm": 0.29516515135765076,
      "learning_rate": 0.00019843740969205517,
      "loss": 1.2308,
      "step": 125
    },
    {
      "epoch": 0.06141847428710699,
      "grad_norm": 0.2964312434196472,
      "learning_rate": 0.00019841018860273558,
      "loss": 1.284,
      "step": 126
    },
    {
      "epoch": 0.06190592249573483,
      "grad_norm": 0.2765253186225891,
      "learning_rate": 0.00019838273435359444,
      "loss": 1.315,
      "step": 127
    },
    {
      "epoch": 0.06239337070436266,
      "grad_norm": 0.33663836121559143,
      "learning_rate": 0.00019835504700967807,
      "loss": 1.3234,
      "step": 128
    },
    {
      "epoch": 0.0628808189129905,
      "grad_norm": 0.29934364557266235,
      "learning_rate": 0.00019832712663658518,
      "loss": 1.4297,
      "step": 129
    },
    {
      "epoch": 0.06336826712161833,
      "grad_norm": 0.3140650987625122,
      "learning_rate": 0.00019829897330046653,
      "loss": 1.2515,
      "step": 130
    },
    {
      "epoch": 0.06385571533024616,
      "grad_norm": 0.29144319891929626,
      "learning_rate": 0.0001982705870680248,
      "loss": 1.1624,
      "step": 131
    },
    {
      "epoch": 0.064343163538874,
      "grad_norm": 0.30572909116744995,
      "learning_rate": 0.0001982419680065145,
      "loss": 1.3521,
      "step": 132
    },
    {
      "epoch": 0.06483061174750183,
      "grad_norm": 0.30701354146003723,
      "learning_rate": 0.0001982131161837418,
      "loss": 1.3164,
      "step": 133
    },
    {
      "epoch": 0.06531805995612966,
      "grad_norm": 0.32151156663894653,
      "learning_rate": 0.0001981840316680643,
      "loss": 1.3548,
      "step": 134
    },
    {
      "epoch": 0.0658055081647575,
      "grad_norm": 0.3038557767868042,
      "learning_rate": 0.00019815471452839094,
      "loss": 1.2915,
      "step": 135
    },
    {
      "epoch": 0.06629295637338532,
      "grad_norm": 0.3463563024997711,
      "learning_rate": 0.00019812516483418176,
      "loss": 1.2835,
      "step": 136
    },
    {
      "epoch": 0.06678040458201316,
      "grad_norm": 0.30769020318984985,
      "learning_rate": 0.00019809538265544785,
      "loss": 1.1407,
      "step": 137
    },
    {
      "epoch": 0.067267852790641,
      "grad_norm": 0.31621092557907104,
      "learning_rate": 0.00019806536806275108,
      "loss": 1.2259,
      "step": 138
    },
    {
      "epoch": 0.06775530099926882,
      "grad_norm": 0.31812846660614014,
      "learning_rate": 0.00019803512112720397,
      "loss": 1.1366,
      "step": 139
    },
    {
      "epoch": 0.06824274920789666,
      "grad_norm": 0.2971956133842468,
      "learning_rate": 0.00019800464192046955,
      "loss": 1.3306,
      "step": 140
    },
    {
      "epoch": 0.06873019741652449,
      "grad_norm": 0.3192349374294281,
      "learning_rate": 0.0001979739305147611,
      "loss": 1.3795,
      "step": 141
    },
    {
      "epoch": 0.06921764562515233,
      "grad_norm": 0.2938215732574463,
      "learning_rate": 0.0001979429869828421,
      "loss": 1.2272,
      "step": 142
    },
    {
      "epoch": 0.06970509383378017,
      "grad_norm": 0.3284856677055359,
      "learning_rate": 0.00019791181139802602,
      "loss": 1.2728,
      "step": 143
    },
    {
      "epoch": 0.07019254204240799,
      "grad_norm": 0.30583491921424866,
      "learning_rate": 0.00019788040383417597,
      "loss": 1.1829,
      "step": 144
    },
    {
      "epoch": 0.07067999025103583,
      "grad_norm": 0.31813549995422363,
      "learning_rate": 0.00019784876436570493,
      "loss": 1.1448,
      "step": 145
    },
    {
      "epoch": 0.07116743845966365,
      "grad_norm": 0.2916363477706909,
      "learning_rate": 0.00019781689306757512,
      "loss": 1.2429,
      "step": 146
    },
    {
      "epoch": 0.07165488666829149,
      "grad_norm": 0.34224483370780945,
      "learning_rate": 0.00019778479001529808,
      "loss": 1.2228,
      "step": 147
    },
    {
      "epoch": 0.07214233487691933,
      "grad_norm": 0.26968345046043396,
      "learning_rate": 0.00019775245528493447,
      "loss": 1.2041,
      "step": 148
    },
    {
      "epoch": 0.07262978308554716,
      "grad_norm": 0.32275018095970154,
      "learning_rate": 0.00019771988895309384,
      "loss": 1.4233,
      "step": 149
    },
    {
      "epoch": 0.073117231294175,
      "grad_norm": 0.30032193660736084,
      "learning_rate": 0.00019768709109693443,
      "loss": 1.1385,
      "step": 150
    },
    {
      "epoch": 0.07360467950280283,
      "grad_norm": 0.3605499267578125,
      "learning_rate": 0.00019765406179416312,
      "loss": 1.1803,
      "step": 151
    },
    {
      "epoch": 0.07409212771143066,
      "grad_norm": 0.29714006185531616,
      "learning_rate": 0.00019762080112303504,
      "loss": 1.2595,
      "step": 152
    },
    {
      "epoch": 0.0745795759200585,
      "grad_norm": 0.28255385160446167,
      "learning_rate": 0.00019758730916235356,
      "loss": 1.1942,
      "step": 153
    },
    {
      "epoch": 0.07506702412868632,
      "grad_norm": 0.3305562734603882,
      "learning_rate": 0.00019755358599146994,
      "loss": 1.364,
      "step": 154
    },
    {
      "epoch": 0.07555447233731416,
      "grad_norm": 0.30705878138542175,
      "learning_rate": 0.00019751963169028342,
      "loss": 1.226,
      "step": 155
    },
    {
      "epoch": 0.076041920545942,
      "grad_norm": 0.29158154129981995,
      "learning_rate": 0.00019748544633924064,
      "loss": 1.2687,
      "step": 156
    },
    {
      "epoch": 0.07652936875456982,
      "grad_norm": 0.2785685658454895,
      "learning_rate": 0.00019745103001933583,
      "loss": 1.2601,
      "step": 157
    },
    {
      "epoch": 0.07701681696319766,
      "grad_norm": 0.3190236985683441,
      "learning_rate": 0.00019741638281211033,
      "loss": 1.2195,
      "step": 158
    },
    {
      "epoch": 0.07750426517182549,
      "grad_norm": 0.3836527168750763,
      "learning_rate": 0.00019738150479965257,
      "loss": 1.197,
      "step": 159
    },
    {
      "epoch": 0.07799171338045333,
      "grad_norm": 0.33303093910217285,
      "learning_rate": 0.00019734639606459783,
      "loss": 1.3514,
      "step": 160
    },
    {
      "epoch": 0.07847916158908116,
      "grad_norm": 0.28645503520965576,
      "learning_rate": 0.000197311056690128,
      "loss": 1.2765,
      "step": 161
    },
    {
      "epoch": 0.07896660979770899,
      "grad_norm": 0.3097892701625824,
      "learning_rate": 0.00019727548675997137,
      "loss": 1.3315,
      "step": 162
    },
    {
      "epoch": 0.07945405800633683,
      "grad_norm": 0.27848270535469055,
      "learning_rate": 0.0001972396863584026,
      "loss": 1.2799,
      "step": 163
    },
    {
      "epoch": 0.07994150621496467,
      "grad_norm": 0.25580862164497375,
      "learning_rate": 0.0001972036555702423,
      "loss": 1.2318,
      "step": 164
    },
    {
      "epoch": 0.08042895442359249,
      "grad_norm": 0.3450034260749817,
      "learning_rate": 0.000197167394480857,
      "loss": 1.2851,
      "step": 165
    },
    {
      "epoch": 0.08091640263222033,
      "grad_norm": 0.32682400941848755,
      "learning_rate": 0.00019713090317615876,
      "loss": 1.1519,
      "step": 166
    },
    {
      "epoch": 0.08140385084084815,
      "grad_norm": 0.30758345127105713,
      "learning_rate": 0.0001970941817426052,
      "loss": 1.1295,
      "step": 167
    },
    {
      "epoch": 0.081891299049476,
      "grad_norm": 0.3209025263786316,
      "learning_rate": 0.00019705723026719913,
      "loss": 1.2555,
      "step": 168
    },
    {
      "epoch": 0.08237874725810383,
      "grad_norm": 0.2652692198753357,
      "learning_rate": 0.0001970200488374884,
      "loss": 1.2326,
      "step": 169
    },
    {
      "epoch": 0.08286619546673166,
      "grad_norm": 0.3520825207233429,
      "learning_rate": 0.0001969826375415656,
      "loss": 1.3077,
      "step": 170
    },
    {
      "epoch": 0.0833536436753595,
      "grad_norm": 0.28723257780075073,
      "learning_rate": 0.00019694499646806808,
      "loss": 1.2389,
      "step": 171
    },
    {
      "epoch": 0.08384109188398732,
      "grad_norm": 0.31456753611564636,
      "learning_rate": 0.0001969071257061775,
      "loss": 1.4082,
      "step": 172
    },
    {
      "epoch": 0.08432854009261516,
      "grad_norm": 0.3273375332355499,
      "learning_rate": 0.00019686902534561974,
      "loss": 1.2289,
      "step": 173
    },
    {
      "epoch": 0.084815988301243,
      "grad_norm": 0.3643217384815216,
      "learning_rate": 0.00019683069547666467,
      "loss": 1.3557,
      "step": 174
    },
    {
      "epoch": 0.08530343650987082,
      "grad_norm": 0.33192405104637146,
      "learning_rate": 0.00019679213619012585,
      "loss": 1.251,
      "step": 175
    },
    {
      "epoch": 0.08579088471849866,
      "grad_norm": 0.301046222448349,
      "learning_rate": 0.0001967533475773605,
      "loss": 1.3237,
      "step": 176
    },
    {
      "epoch": 0.08627833292712649,
      "grad_norm": 0.29805856943130493,
      "learning_rate": 0.0001967143297302691,
      "loss": 1.1757,
      "step": 177
    },
    {
      "epoch": 0.08676578113575432,
      "grad_norm": 0.3067397475242615,
      "learning_rate": 0.00019667508274129526,
      "loss": 1.3353,
      "step": 178
    },
    {
      "epoch": 0.08725322934438216,
      "grad_norm": 0.3156089782714844,
      "learning_rate": 0.00019663560670342558,
      "loss": 1.3084,
      "step": 179
    },
    {
      "epoch": 0.08774067755300999,
      "grad_norm": 0.3230333924293518,
      "learning_rate": 0.00019659590171018914,
      "loss": 1.3054,
      "step": 180
    },
    {
      "epoch": 0.08822812576163783,
      "grad_norm": 0.33901235461235046,
      "learning_rate": 0.00019655596785565768,
      "loss": 1.2629,
      "step": 181
    },
    {
      "epoch": 0.08871557397026567,
      "grad_norm": 0.3485647141933441,
      "learning_rate": 0.00019651580523444507,
      "loss": 1.2564,
      "step": 182
    },
    {
      "epoch": 0.08920302217889349,
      "grad_norm": 0.3248312175273895,
      "learning_rate": 0.00019647541394170718,
      "loss": 1.373,
      "step": 183
    },
    {
      "epoch": 0.08969047038752133,
      "grad_norm": 0.3881993293762207,
      "learning_rate": 0.00019643479407314168,
      "loss": 1.2413,
      "step": 184
    },
    {
      "epoch": 0.09017791859614915,
      "grad_norm": 0.36643728613853455,
      "learning_rate": 0.00019639394572498788,
      "loss": 1.1796,
      "step": 185
    },
    {
      "epoch": 0.09066536680477699,
      "grad_norm": 0.3189202845096588,
      "learning_rate": 0.00019635286899402624,
      "loss": 0.9767,
      "step": 186
    },
    {
      "epoch": 0.09115281501340483,
      "grad_norm": 0.3280291259288788,
      "learning_rate": 0.00019631156397757852,
      "loss": 1.3026,
      "step": 187
    },
    {
      "epoch": 0.09164026322203266,
      "grad_norm": 0.2965233027935028,
      "learning_rate": 0.00019627003077350716,
      "loss": 1.2552,
      "step": 188
    },
    {
      "epoch": 0.0921277114306605,
      "grad_norm": 0.35237714648246765,
      "learning_rate": 0.0001962282694802154,
      "loss": 1.3261,
      "step": 189
    },
    {
      "epoch": 0.09261515963928832,
      "grad_norm": 0.34230533242225647,
      "learning_rate": 0.00019618628019664683,
      "loss": 1.186,
      "step": 190
    },
    {
      "epoch": 0.09310260784791616,
      "grad_norm": 0.3429117202758789,
      "learning_rate": 0.0001961440630222851,
      "loss": 1.3247,
      "step": 191
    },
    {
      "epoch": 0.093590056056544,
      "grad_norm": 0.3420495390892029,
      "learning_rate": 0.00019610161805715397,
      "loss": 1.2166,
      "step": 192
    },
    {
      "epoch": 0.09407750426517182,
      "grad_norm": 0.4569028913974762,
      "learning_rate": 0.00019605894540181677,
      "loss": 1.1844,
      "step": 193
    },
    {
      "epoch": 0.09456495247379966,
      "grad_norm": 0.37705421447753906,
      "learning_rate": 0.00019601604515737635,
      "loss": 1.2311,
      "step": 194
    },
    {
      "epoch": 0.0950524006824275,
      "grad_norm": 0.3612574636936188,
      "learning_rate": 0.00019597291742547474,
      "loss": 1.2255,
      "step": 195
    },
    {
      "epoch": 0.09553984889105532,
      "grad_norm": 0.3655405044555664,
      "learning_rate": 0.000195929562308293,
      "loss": 1.1927,
      "step": 196
    },
    {
      "epoch": 0.09602729709968316,
      "grad_norm": 0.3330654203891754,
      "learning_rate": 0.00019588597990855084,
      "loss": 1.2456,
      "step": 197
    },
    {
      "epoch": 0.09651474530831099,
      "grad_norm": 0.3447643518447876,
      "learning_rate": 0.00019584217032950658,
      "loss": 1.1579,
      "step": 198
    },
    {
      "epoch": 0.09700219351693883,
      "grad_norm": 0.36293572187423706,
      "learning_rate": 0.00019579813367495672,
      "loss": 1.2373,
      "step": 199
    },
    {
      "epoch": 0.09748964172556666,
      "grad_norm": 0.35249459743499756,
      "learning_rate": 0.0001957538700492357,
      "loss": 1.2832,
      "step": 200
    },
    {
      "epoch": 0.09797708993419449,
      "grad_norm": 0.4142071008682251,
      "learning_rate": 0.00019570937955721586,
      "loss": 1.1746,
      "step": 201
    },
    {
      "epoch": 0.09846453814282233,
      "grad_norm": 0.29519200325012207,
      "learning_rate": 0.00019566466230430693,
      "loss": 1.2661,
      "step": 202
    },
    {
      "epoch": 0.09895198635145015,
      "grad_norm": 0.39531150460243225,
      "learning_rate": 0.00019561971839645594,
      "loss": 1.2058,
      "step": 203
    },
    {
      "epoch": 0.09943943456007799,
      "grad_norm": 0.3783504366874695,
      "learning_rate": 0.0001955745479401469,
      "loss": 1.2174,
      "step": 204
    },
    {
      "epoch": 0.09992688276870583,
      "grad_norm": 0.3396473824977875,
      "learning_rate": 0.00019552915104240065,
      "loss": 1.248,
      "step": 205
    },
    {
      "epoch": 0.10041433097733365,
      "grad_norm": 0.39432066679000854,
      "learning_rate": 0.00019548352781077443,
      "loss": 1.1647,
      "step": 206
    },
    {
      "epoch": 0.1009017791859615,
      "grad_norm": 0.37768709659576416,
      "learning_rate": 0.0001954376783533618,
      "loss": 1.31,
      "step": 207
    },
    {
      "epoch": 0.10138922739458932,
      "grad_norm": 0.3488147556781769,
      "learning_rate": 0.00019539160277879224,
      "loss": 1.195,
      "step": 208
    },
    {
      "epoch": 0.10187667560321716,
      "grad_norm": 0.3478896915912628,
      "learning_rate": 0.00019534530119623097,
      "loss": 1.2701,
      "step": 209
    },
    {
      "epoch": 0.102364123811845,
      "grad_norm": 0.34134477376937866,
      "learning_rate": 0.00019529877371537882,
      "loss": 1.3193,
      "step": 210
    },
    {
      "epoch": 0.10285157202047282,
      "grad_norm": 0.3868890404701233,
      "learning_rate": 0.00019525202044647162,
      "loss": 1.2999,
      "step": 211
    },
    {
      "epoch": 0.10333902022910066,
      "grad_norm": 0.35992351174354553,
      "learning_rate": 0.00019520504150028032,
      "loss": 1.5135,
      "step": 212
    },
    {
      "epoch": 0.1038264684377285,
      "grad_norm": 0.337139755487442,
      "learning_rate": 0.00019515783698811044,
      "loss": 1.2961,
      "step": 213
    },
    {
      "epoch": 0.10431391664635632,
      "grad_norm": 0.2877628207206726,
      "learning_rate": 0.00019511040702180203,
      "loss": 1.284,
      "step": 214
    },
    {
      "epoch": 0.10480136485498416,
      "grad_norm": 0.3476368188858032,
      "learning_rate": 0.00019506275171372926,
      "loss": 1.3162,
      "step": 215
    },
    {
      "epoch": 0.10528881306361199,
      "grad_norm": 0.2870039641857147,
      "learning_rate": 0.00019501487117680016,
      "loss": 1.0493,
      "step": 216
    },
    {
      "epoch": 0.10577626127223982,
      "grad_norm": 0.3463204801082611,
      "learning_rate": 0.0001949667655244564,
      "loss": 1.2352,
      "step": 217
    },
    {
      "epoch": 0.10626370948086766,
      "grad_norm": 0.31859636306762695,
      "learning_rate": 0.00019491843487067306,
      "loss": 1.2171,
      "step": 218
    },
    {
      "epoch": 0.10675115768949549,
      "grad_norm": 0.29325994849205017,
      "learning_rate": 0.00019486987932995823,
      "loss": 1.3066,
      "step": 219
    },
    {
      "epoch": 0.10723860589812333,
      "grad_norm": 0.3366852402687073,
      "learning_rate": 0.00019482109901735285,
      "loss": 1.2206,
      "step": 220
    },
    {
      "epoch": 0.10772605410675115,
      "grad_norm": 0.3563458025455475,
      "learning_rate": 0.00019477209404843049,
      "loss": 1.1842,
      "step": 221
    },
    {
      "epoch": 0.10821350231537899,
      "grad_norm": 0.2932288646697998,
      "learning_rate": 0.00019472286453929682,
      "loss": 1.1563,
      "step": 222
    },
    {
      "epoch": 0.10870095052400683,
      "grad_norm": 0.3266105353832245,
      "learning_rate": 0.00019467341060658963,
      "loss": 1.1246,
      "step": 223
    },
    {
      "epoch": 0.10918839873263465,
      "grad_norm": 0.33189934492111206,
      "learning_rate": 0.0001946237323674784,
      "loss": 1.3277,
      "step": 224
    },
    {
      "epoch": 0.10967584694126249,
      "grad_norm": 0.3772162199020386,
      "learning_rate": 0.00019457382993966405,
      "loss": 1.1527,
      "step": 225
    },
    {
      "epoch": 0.11016329514989033,
      "grad_norm": 0.34699949622154236,
      "learning_rate": 0.00019452370344137868,
      "loss": 1.2081,
      "step": 226
    },
    {
      "epoch": 0.11065074335851816,
      "grad_norm": 0.3212660253047943,
      "learning_rate": 0.00019447335299138517,
      "loss": 1.237,
      "step": 227
    },
    {
      "epoch": 0.111138191567146,
      "grad_norm": 0.33385413885116577,
      "learning_rate": 0.00019442277870897713,
      "loss": 1.3355,
      "step": 228
    },
    {
      "epoch": 0.11162563977577382,
      "grad_norm": 0.3604995608329773,
      "learning_rate": 0.0001943719807139785,
      "loss": 1.4172,
      "step": 229
    },
    {
      "epoch": 0.11211308798440166,
      "grad_norm": 0.2965205907821655,
      "learning_rate": 0.0001943209591267431,
      "loss": 1.2438,
      "step": 230
    },
    {
      "epoch": 0.1126005361930295,
      "grad_norm": 0.27478981018066406,
      "learning_rate": 0.00019426971406815463,
      "loss": 1.1359,
      "step": 231
    },
    {
      "epoch": 0.11308798440165732,
      "grad_norm": 0.3109903037548065,
      "learning_rate": 0.00019421824565962623,
      "loss": 1.2647,
      "step": 232
    },
    {
      "epoch": 0.11357543261028516,
      "grad_norm": 0.2932765781879425,
      "learning_rate": 0.0001941665540231002,
      "loss": 1.3322,
      "step": 233
    },
    {
      "epoch": 0.11406288081891298,
      "grad_norm": 0.28574731945991516,
      "learning_rate": 0.0001941146392810477,
      "loss": 1.2771,
      "step": 234
    },
    {
      "epoch": 0.11455032902754082,
      "grad_norm": 0.3058600425720215,
      "learning_rate": 0.00019406250155646856,
      "loss": 1.1512,
      "step": 235
    },
    {
      "epoch": 0.11503777723616866,
      "grad_norm": 0.345528781414032,
      "learning_rate": 0.00019401014097289083,
      "loss": 1.3235,
      "step": 236
    },
    {
      "epoch": 0.11552522544479649,
      "grad_norm": 0.28189167380332947,
      "learning_rate": 0.00019395755765437062,
      "loss": 1.159,
      "step": 237
    },
    {
      "epoch": 0.11601267365342433,
      "grad_norm": 0.3478774428367615,
      "learning_rate": 0.00019390475172549176,
      "loss": 1.3016,
      "step": 238
    },
    {
      "epoch": 0.11650012186205215,
      "grad_norm": 0.28645917773246765,
      "learning_rate": 0.0001938517233113655,
      "loss": 1.3106,
      "step": 239
    },
    {
      "epoch": 0.11698757007067999,
      "grad_norm": 0.3461976647377014,
      "learning_rate": 0.0001937984725376302,
      "loss": 1.3887,
      "step": 240
    },
    {
      "epoch": 0.11747501827930783,
      "grad_norm": 0.34896981716156006,
      "learning_rate": 0.00019374499953045104,
      "loss": 1.2542,
      "step": 241
    },
    {
      "epoch": 0.11796246648793565,
      "grad_norm": 0.3654549717903137,
      "learning_rate": 0.00019369130441651978,
      "loss": 1.2987,
      "step": 242
    },
    {
      "epoch": 0.11844991469656349,
      "grad_norm": 0.283341646194458,
      "learning_rate": 0.00019363738732305433,
      "loss": 1.2665,
      "step": 243
    },
    {
      "epoch": 0.11893736290519133,
      "grad_norm": 0.38239938020706177,
      "learning_rate": 0.00019358324837779863,
      "loss": 1.1503,
      "step": 244
    },
    {
      "epoch": 0.11942481111381915,
      "grad_norm": 0.35076814889907837,
      "learning_rate": 0.00019352888770902214,
      "loss": 1.2163,
      "step": 245
    },
    {
      "epoch": 0.11991225932244699,
      "grad_norm": 0.34189239144325256,
      "learning_rate": 0.00019347430544551975,
      "loss": 1.2757,
      "step": 246
    },
    {
      "epoch": 0.12039970753107482,
      "grad_norm": 0.3641584813594818,
      "learning_rate": 0.00019341950171661125,
      "loss": 1.2257,
      "step": 247
    },
    {
      "epoch": 0.12088715573970266,
      "grad_norm": 0.29058733582496643,
      "learning_rate": 0.00019336447665214125,
      "loss": 1.1369,
      "step": 248
    },
    {
      "epoch": 0.1213746039483305,
      "grad_norm": 0.3116576373577118,
      "learning_rate": 0.0001933092303824787,
      "loss": 1.1738,
      "step": 249
    },
    {
      "epoch": 0.12186205215695832,
      "grad_norm": 0.3579969108104706,
      "learning_rate": 0.00019325376303851662,
      "loss": 1.2442,
      "step": 250
    },
    {
      "epoch": 0.12234950036558616,
      "grad_norm": 0.3996794819831848,
      "learning_rate": 0.0001931980747516719,
      "loss": 1.3211,
      "step": 251
    },
    {
      "epoch": 0.12283694857421398,
      "grad_norm": 0.3101326823234558,
      "learning_rate": 0.0001931421656538848,
      "loss": 1.2243,
      "step": 252
    },
    {
      "epoch": 0.12332439678284182,
      "grad_norm": 0.42262008786201477,
      "learning_rate": 0.00019308603587761888,
      "loss": 1.3067,
      "step": 253
    },
    {
      "epoch": 0.12381184499146966,
      "grad_norm": 0.33090803027153015,
      "learning_rate": 0.0001930296855558604,
      "loss": 1.2928,
      "step": 254
    },
    {
      "epoch": 0.12429929320009749,
      "grad_norm": 0.2976021468639374,
      "learning_rate": 0.00019297311482211816,
      "loss": 1.2932,
      "step": 255
    },
    {
      "epoch": 0.12478674140872532,
      "grad_norm": 0.27347099781036377,
      "learning_rate": 0.00019291632381042328,
      "loss": 1.2138,
      "step": 256
    },
    {
      "epoch": 0.12527418961735315,
      "grad_norm": 0.39553186297416687,
      "learning_rate": 0.00019285931265532871,
      "loss": 1.3444,
      "step": 257
    },
    {
      "epoch": 0.125761637825981,
      "grad_norm": 0.3036799132823944,
      "learning_rate": 0.00019280208149190903,
      "loss": 1.1993,
      "step": 258
    },
    {
      "epoch": 0.12624908603460883,
      "grad_norm": 0.31273937225341797,
      "learning_rate": 0.00019274463045575998,
      "loss": 1.1879,
      "step": 259
    },
    {
      "epoch": 0.12673653424323666,
      "grad_norm": 0.3476383686065674,
      "learning_rate": 0.00019268695968299832,
      "loss": 1.3481,
      "step": 260
    },
    {
      "epoch": 0.12722398245186448,
      "grad_norm": 0.30680546164512634,
      "learning_rate": 0.00019262906931026138,
      "loss": 1.3024,
      "step": 261
    },
    {
      "epoch": 0.12771143066049231,
      "grad_norm": 0.3958556056022644,
      "learning_rate": 0.00019257095947470678,
      "loss": 1.3361,
      "step": 262
    },
    {
      "epoch": 0.12819887886912015,
      "grad_norm": 0.2787322700023651,
      "learning_rate": 0.00019251263031401216,
      "loss": 1.1156,
      "step": 263
    },
    {
      "epoch": 0.128686327077748,
      "grad_norm": 0.33875012397766113,
      "learning_rate": 0.00019245408196637477,
      "loss": 1.4094,
      "step": 264
    },
    {
      "epoch": 0.12917377528637583,
      "grad_norm": 0.32169806957244873,
      "learning_rate": 0.00019239531457051112,
      "loss": 1.2505,
      "step": 265
    },
    {
      "epoch": 0.12966122349500367,
      "grad_norm": 0.36968526244163513,
      "learning_rate": 0.00019233632826565677,
      "loss": 1.264,
      "step": 266
    },
    {
      "epoch": 0.13014867170363148,
      "grad_norm": 0.3718290328979492,
      "learning_rate": 0.00019227712319156592,
      "loss": 1.2319,
      "step": 267
    },
    {
      "epoch": 0.13063611991225932,
      "grad_norm": 0.32931840419769287,
      "learning_rate": 0.0001922176994885111,
      "loss": 1.3845,
      "step": 268
    },
    {
      "epoch": 0.13112356812088716,
      "grad_norm": 0.3489207625389099,
      "learning_rate": 0.00019215805729728275,
      "loss": 1.3742,
      "step": 269
    },
    {
      "epoch": 0.131611016329515,
      "grad_norm": 0.30691617727279663,
      "learning_rate": 0.0001920981967591891,
      "loss": 1.2871,
      "step": 270
    },
    {
      "epoch": 0.13209846453814283,
      "grad_norm": 0.4441424012184143,
      "learning_rate": 0.00019203811801605557,
      "loss": 1.194,
      "step": 271
    },
    {
      "epoch": 0.13258591274677065,
      "grad_norm": 0.33883553743362427,
      "learning_rate": 0.0001919778212102247,
      "loss": 1.2959,
      "step": 272
    },
    {
      "epoch": 0.13307336095539848,
      "grad_norm": 0.35645484924316406,
      "learning_rate": 0.00019191730648455556,
      "loss": 1.2679,
      "step": 273
    },
    {
      "epoch": 0.13356080916402632,
      "grad_norm": 0.3160024881362915,
      "learning_rate": 0.00019185657398242356,
      "loss": 1.2795,
      "step": 274
    },
    {
      "epoch": 0.13404825737265416,
      "grad_norm": 0.30766648054122925,
      "learning_rate": 0.00019179562384772012,
      "loss": 1.1325,
      "step": 275
    },
    {
      "epoch": 0.134535705581282,
      "grad_norm": 0.33924242854118347,
      "learning_rate": 0.00019173445622485224,
      "loss": 1.3172,
      "step": 276
    },
    {
      "epoch": 0.1350231537899098,
      "grad_norm": 0.30728015303611755,
      "learning_rate": 0.00019167307125874227,
      "loss": 1.3195,
      "step": 277
    },
    {
      "epoch": 0.13551060199853765,
      "grad_norm": 0.34279197454452515,
      "learning_rate": 0.00019161146909482747,
      "loss": 1.3398,
      "step": 278
    },
    {
      "epoch": 0.1359980502071655,
      "grad_norm": 0.27993321418762207,
      "learning_rate": 0.00019154964987905964,
      "loss": 1.1722,
      "step": 279
    },
    {
      "epoch": 0.13648549841579333,
      "grad_norm": 0.297305166721344,
      "learning_rate": 0.0001914876137579049,
      "loss": 1.2413,
      "step": 280
    },
    {
      "epoch": 0.13697294662442117,
      "grad_norm": 0.3172423243522644,
      "learning_rate": 0.00019142536087834335,
      "loss": 1.2392,
      "step": 281
    },
    {
      "epoch": 0.13746039483304898,
      "grad_norm": 0.39218711853027344,
      "learning_rate": 0.00019136289138786845,
      "loss": 1.1854,
      "step": 282
    },
    {
      "epoch": 0.13794784304167682,
      "grad_norm": 0.3725161850452423,
      "learning_rate": 0.00019130020543448704,
      "loss": 1.3279,
      "step": 283
    },
    {
      "epoch": 0.13843529125030465,
      "grad_norm": 0.3030851483345032,
      "learning_rate": 0.00019123730316671872,
      "loss": 1.2448,
      "step": 284
    },
    {
      "epoch": 0.1389227394589325,
      "grad_norm": 0.29124540090560913,
      "learning_rate": 0.00019117418473359573,
      "loss": 1.1935,
      "step": 285
    },
    {
      "epoch": 0.13941018766756033,
      "grad_norm": 0.3250710964202881,
      "learning_rate": 0.00019111085028466224,
      "loss": 1.2645,
      "step": 286
    },
    {
      "epoch": 0.13989763587618814,
      "grad_norm": 0.3330450654029846,
      "learning_rate": 0.0001910472999699744,
      "loss": 1.1181,
      "step": 287
    },
    {
      "epoch": 0.14038508408481598,
      "grad_norm": 0.2867342233657837,
      "learning_rate": 0.0001909835339400998,
      "loss": 1.1891,
      "step": 288
    },
    {
      "epoch": 0.14087253229344382,
      "grad_norm": 0.3709441125392914,
      "learning_rate": 0.000190919552346117,
      "loss": 1.3854,
      "step": 289
    },
    {
      "epoch": 0.14135998050207166,
      "grad_norm": 0.3034871816635132,
      "learning_rate": 0.00019085535533961537,
      "loss": 1.2462,
      "step": 290
    },
    {
      "epoch": 0.1418474287106995,
      "grad_norm": 0.29863473773002625,
      "learning_rate": 0.00019079094307269468,
      "loss": 1.2237,
      "step": 291
    },
    {
      "epoch": 0.1423348769193273,
      "grad_norm": 0.5280107259750366,
      "learning_rate": 0.00019072631569796463,
      "loss": 1.2578,
      "step": 292
    },
    {
      "epoch": 0.14282232512795515,
      "grad_norm": 0.3158257007598877,
      "learning_rate": 0.00019066147336854457,
      "loss": 1.1757,
      "step": 293
    },
    {
      "epoch": 0.14330977333658299,
      "grad_norm": 0.34107086062431335,
      "learning_rate": 0.0001905964162380632,
      "loss": 1.1711,
      "step": 294
    },
    {
      "epoch": 0.14379722154521082,
      "grad_norm": 0.3253112733364105,
      "learning_rate": 0.0001905311444606581,
      "loss": 1.2229,
      "step": 295
    },
    {
      "epoch": 0.14428466975383866,
      "grad_norm": 0.28953564167022705,
      "learning_rate": 0.00019046565819097545,
      "loss": 1.2001,
      "step": 296
    },
    {
      "epoch": 0.1447721179624665,
      "grad_norm": 0.2937741279602051,
      "learning_rate": 0.0001903999575841695,
      "loss": 1.1535,
      "step": 297
    },
    {
      "epoch": 0.1452595661710943,
      "grad_norm": 0.3405333459377289,
      "learning_rate": 0.00019033404279590244,
      "loss": 1.2373,
      "step": 298
    },
    {
      "epoch": 0.14574701437972215,
      "grad_norm": 0.33529722690582275,
      "learning_rate": 0.00019026791398234392,
      "loss": 1.1769,
      "step": 299
    },
    {
      "epoch": 0.14623446258835,
      "grad_norm": 0.3445146381855011,
      "learning_rate": 0.00019020157130017053,
      "loss": 1.2635,
      "step": 300
    },
    {
      "epoch": 0.14672191079697783,
      "grad_norm": 0.30162152647972107,
      "learning_rate": 0.00019013501490656578,
      "loss": 1.2069,
      "step": 301
    },
    {
      "epoch": 0.14720935900560567,
      "grad_norm": 0.3126993477344513,
      "learning_rate": 0.00019006824495921936,
      "loss": 1.1025,
      "step": 302
    },
    {
      "epoch": 0.14769680721423348,
      "grad_norm": 0.35758090019226074,
      "learning_rate": 0.0001900012616163269,
      "loss": 1.1173,
      "step": 303
    },
    {
      "epoch": 0.14818425542286132,
      "grad_norm": 0.3262757658958435,
      "learning_rate": 0.00018993406503658983,
      "loss": 1.1772,
      "step": 304
    },
    {
      "epoch": 0.14867170363148915,
      "grad_norm": 0.3326362371444702,
      "learning_rate": 0.00018986665537921456,
      "loss": 1.187,
      "step": 305
    },
    {
      "epoch": 0.149159151840117,
      "grad_norm": 0.375229150056839,
      "learning_rate": 0.0001897990328039125,
      "loss": 1.2361,
      "step": 306
    },
    {
      "epoch": 0.14964660004874483,
      "grad_norm": 0.35128822922706604,
      "learning_rate": 0.0001897311974708994,
      "loss": 1.1719,
      "step": 307
    },
    {
      "epoch": 0.15013404825737264,
      "grad_norm": 0.2904835641384125,
      "learning_rate": 0.00018966314954089517,
      "loss": 1.2435,
      "step": 308
    },
    {
      "epoch": 0.15062149646600048,
      "grad_norm": 0.42316755652427673,
      "learning_rate": 0.0001895948891751234,
      "loss": 1.2499,
      "step": 309
    },
    {
      "epoch": 0.15110894467462832,
      "grad_norm": 0.30534741282463074,
      "learning_rate": 0.00018952641653531096,
      "loss": 1.1374,
      "step": 310
    },
    {
      "epoch": 0.15159639288325616,
      "grad_norm": 0.42398756742477417,
      "learning_rate": 0.0001894577317836877,
      "loss": 1.2051,
      "step": 311
    },
    {
      "epoch": 0.152083841091884,
      "grad_norm": 0.3085149824619293,
      "learning_rate": 0.00018938883508298605,
      "loss": 1.289,
      "step": 312
    },
    {
      "epoch": 0.1525712893005118,
      "grad_norm": 0.3194994032382965,
      "learning_rate": 0.00018931972659644045,
      "loss": 1.355,
      "step": 313
    },
    {
      "epoch": 0.15305873750913965,
      "grad_norm": 0.3154197335243225,
      "learning_rate": 0.00018925040648778732,
      "loss": 1.1218,
      "step": 314
    },
    {
      "epoch": 0.15354618571776749,
      "grad_norm": 0.3922419548034668,
      "learning_rate": 0.00018918087492126432,
      "loss": 1.206,
      "step": 315
    },
    {
      "epoch": 0.15403363392639532,
      "grad_norm": 0.37550610303878784,
      "learning_rate": 0.0001891111320616102,
      "loss": 1.2382,
      "step": 316
    },
    {
      "epoch": 0.15452108213502316,
      "grad_norm": 0.300519198179245,
      "learning_rate": 0.00018904117807406424,
      "loss": 1.0981,
      "step": 317
    },
    {
      "epoch": 0.15500853034365097,
      "grad_norm": 0.32601064443588257,
      "learning_rate": 0.000188971013124366,
      "loss": 1.1217,
      "step": 318
    },
    {
      "epoch": 0.1554959785522788,
      "grad_norm": 0.37468162178993225,
      "learning_rate": 0.00018890063737875482,
      "loss": 1.2628,
      "step": 319
    },
    {
      "epoch": 0.15598342676090665,
      "grad_norm": 0.4266589879989624,
      "learning_rate": 0.00018883005100396957,
      "loss": 1.2281,
      "step": 320
    },
    {
      "epoch": 0.1564708749695345,
      "grad_norm": 0.33140459656715393,
      "learning_rate": 0.00018875925416724794,
      "loss": 1.301,
      "step": 321
    },
    {
      "epoch": 0.15695832317816233,
      "grad_norm": 0.28100690245628357,
      "learning_rate": 0.00018868824703632657,
      "loss": 1.3174,
      "step": 322
    },
    {
      "epoch": 0.15744577138679014,
      "grad_norm": 0.2905332148075104,
      "learning_rate": 0.00018861702977944006,
      "loss": 1.2159,
      "step": 323
    },
    {
      "epoch": 0.15793321959541798,
      "grad_norm": 0.3181256353855133,
      "learning_rate": 0.000188545602565321,
      "loss": 1.3239,
      "step": 324
    },
    {
      "epoch": 0.15842066780404582,
      "grad_norm": 0.31099027395248413,
      "learning_rate": 0.0001884739655631994,
      "loss": 1.2747,
      "step": 325
    },
    {
      "epoch": 0.15890811601267366,
      "grad_norm": 0.3274545967578888,
      "learning_rate": 0.00018840211894280228,
      "loss": 1.1979,
      "step": 326
    },
    {
      "epoch": 0.1593955642213015,
      "grad_norm": 0.39264824986457825,
      "learning_rate": 0.0001883300628743534,
      "loss": 1.1653,
      "step": 327
    },
    {
      "epoch": 0.15988301242992933,
      "grad_norm": 0.32801032066345215,
      "learning_rate": 0.00018825779752857258,
      "loss": 1.1604,
      "step": 328
    },
    {
      "epoch": 0.16037046063855714,
      "grad_norm": 0.30960729718208313,
      "learning_rate": 0.00018818532307667566,
      "loss": 1.2533,
      "step": 329
    },
    {
      "epoch": 0.16085790884718498,
      "grad_norm": 0.3749324083328247,
      "learning_rate": 0.00018811263969037377,
      "loss": 1.2268,
      "step": 330
    },
    {
      "epoch": 0.16134535705581282,
      "grad_norm": 0.30750614404678345,
      "learning_rate": 0.0001880397475418732,
      "loss": 1.2403,
      "step": 331
    },
    {
      "epoch": 0.16183280526444066,
      "grad_norm": 0.3399461507797241,
      "learning_rate": 0.00018796664680387467,
      "loss": 1.2749,
      "step": 332
    },
    {
      "epoch": 0.1623202534730685,
      "grad_norm": 0.35204702615737915,
      "learning_rate": 0.0001878933376495733,
      "loss": 1.2544,
      "step": 333
    },
    {
      "epoch": 0.1628077016816963,
      "grad_norm": 0.3700251579284668,
      "learning_rate": 0.0001878198202526578,
      "loss": 1.315,
      "step": 334
    },
    {
      "epoch": 0.16329514989032415,
      "grad_norm": 0.34206223487854004,
      "learning_rate": 0.00018774609478731046,
      "loss": 1.1894,
      "step": 335
    },
    {
      "epoch": 0.163782598098952,
      "grad_norm": 0.3301335871219635,
      "learning_rate": 0.00018767216142820642,
      "loss": 1.1981,
      "step": 336
    },
    {
      "epoch": 0.16427004630757983,
      "grad_norm": 0.35820314288139343,
      "learning_rate": 0.0001875980203505134,
      "loss": 1.2314,
      "step": 337
    },
    {
      "epoch": 0.16475749451620766,
      "grad_norm": 0.4263235926628113,
      "learning_rate": 0.0001875236717298913,
      "loss": 1.3417,
      "step": 338
    },
    {
      "epoch": 0.16524494272483548,
      "grad_norm": 0.3576142191886902,
      "learning_rate": 0.00018744911574249161,
      "loss": 1.1498,
      "step": 339
    },
    {
      "epoch": 0.1657323909334633,
      "grad_norm": 0.3001526892185211,
      "learning_rate": 0.00018737435256495734,
      "loss": 1.177,
      "step": 340
    },
    {
      "epoch": 0.16621983914209115,
      "grad_norm": 0.30892422795295715,
      "learning_rate": 0.00018729938237442223,
      "loss": 1.0733,
      "step": 341
    },
    {
      "epoch": 0.166707287350719,
      "grad_norm": 0.3621678948402405,
      "learning_rate": 0.00018722420534851052,
      "loss": 1.2447,
      "step": 342
    },
    {
      "epoch": 0.16719473555934683,
      "grad_norm": 0.2934684157371521,
      "learning_rate": 0.00018714882166533656,
      "loss": 1.1664,
      "step": 343
    },
    {
      "epoch": 0.16768218376797464,
      "grad_norm": 0.3934597671031952,
      "learning_rate": 0.0001870732315035042,
      "loss": 1.1904,
      "step": 344
    },
    {
      "epoch": 0.16816963197660248,
      "grad_norm": 0.35083746910095215,
      "learning_rate": 0.00018699743504210664,
      "loss": 1.2573,
      "step": 345
    },
    {
      "epoch": 0.16865708018523032,
      "grad_norm": 0.3115769922733307,
      "learning_rate": 0.0001869214324607257,
      "loss": 1.0691,
      "step": 346
    },
    {
      "epoch": 0.16914452839385816,
      "grad_norm": 0.3621445298194885,
      "learning_rate": 0.00018684522393943177,
      "loss": 1.2203,
      "step": 347
    },
    {
      "epoch": 0.169631976602486,
      "grad_norm": 0.34863755106925964,
      "learning_rate": 0.00018676880965878291,
      "loss": 1.2223,
      "step": 348
    },
    {
      "epoch": 0.1701194248111138,
      "grad_norm": 0.3783847391605377,
      "learning_rate": 0.00018669218979982489,
      "loss": 1.2681,
      "step": 349
    },
    {
      "epoch": 0.17060687301974164,
      "grad_norm": 0.311643123626709,
      "learning_rate": 0.00018661536454409042,
      "loss": 1.2847,
      "step": 350
    },
    {
      "epoch": 0.17109432122836948,
      "grad_norm": 0.38355201482772827,
      "learning_rate": 0.00018653833407359893,
      "loss": 1.191,
      "step": 351
    },
    {
      "epoch": 0.17158176943699732,
      "grad_norm": 0.2972296476364136,
      "learning_rate": 0.00018646109857085597,
      "loss": 1.2257,
      "step": 352
    },
    {
      "epoch": 0.17206921764562516,
      "grad_norm": 0.3391678035259247,
      "learning_rate": 0.00018638365821885297,
      "loss": 1.2264,
      "step": 353
    },
    {
      "epoch": 0.17255666585425297,
      "grad_norm": 0.3344445824623108,
      "learning_rate": 0.00018630601320106664,
      "loss": 1.0415,
      "step": 354
    },
    {
      "epoch": 0.1730441140628808,
      "grad_norm": 0.3566620945930481,
      "learning_rate": 0.0001862281637014586,
      "loss": 1.1043,
      "step": 355
    },
    {
      "epoch": 0.17353156227150865,
      "grad_norm": 0.4023699462413788,
      "learning_rate": 0.0001861501099044749,
      "loss": 1.361,
      "step": 356
    },
    {
      "epoch": 0.1740190104801365,
      "grad_norm": 0.3327338695526123,
      "learning_rate": 0.00018607185199504578,
      "loss": 1.1473,
      "step": 357
    },
    {
      "epoch": 0.17450645868876433,
      "grad_norm": 0.3657204806804657,
      "learning_rate": 0.00018599339015858485,
      "loss": 1.1703,
      "step": 358
    },
    {
      "epoch": 0.17499390689739217,
      "grad_norm": 0.3360118865966797,
      "learning_rate": 0.00018591472458098912,
      "loss": 1.1784,
      "step": 359
    },
    {
      "epoch": 0.17548135510601998,
      "grad_norm": 0.38200175762176514,
      "learning_rate": 0.0001858358554486381,
      "loss": 1.3757,
      "step": 360
    },
    {
      "epoch": 0.17596880331464781,
      "grad_norm": 0.3309694528579712,
      "learning_rate": 0.00018575678294839373,
      "loss": 1.1339,
      "step": 361
    },
    {
      "epoch": 0.17645625152327565,
      "grad_norm": 0.34614065289497375,
      "learning_rate": 0.00018567750726759967,
      "loss": 1.2028,
      "step": 362
    },
    {
      "epoch": 0.1769436997319035,
      "grad_norm": 0.30191949009895325,
      "learning_rate": 0.00018559802859408108,
      "loss": 1.1361,
      "step": 363
    },
    {
      "epoch": 0.17743114794053133,
      "grad_norm": 0.3177981674671173,
      "learning_rate": 0.00018551834711614396,
      "loss": 1.3001,
      "step": 364
    },
    {
      "epoch": 0.17791859614915914,
      "grad_norm": 0.31745445728302,
      "learning_rate": 0.00018543846302257485,
      "loss": 1.1994,
      "step": 365
    },
    {
      "epoch": 0.17840604435778698,
      "grad_norm": 0.3130457401275635,
      "learning_rate": 0.00018535837650264037,
      "loss": 1.0229,
      "step": 366
    },
    {
      "epoch": 0.17889349256641482,
      "grad_norm": 0.3922497034072876,
      "learning_rate": 0.0001852780877460867,
      "loss": 1.2234,
      "step": 367
    },
    {
      "epoch": 0.17938094077504266,
      "grad_norm": 0.34110864996910095,
      "learning_rate": 0.00018519759694313916,
      "loss": 1.3665,
      "step": 368
    },
    {
      "epoch": 0.1798683889836705,
      "grad_norm": 0.3519771695137024,
      "learning_rate": 0.00018511690428450183,
      "loss": 1.2674,
      "step": 369
    },
    {
      "epoch": 0.1803558371922983,
      "grad_norm": 0.3103514611721039,
      "learning_rate": 0.000185036009961357,
      "loss": 1.1153,
      "step": 370
    },
    {
      "epoch": 0.18084328540092615,
      "grad_norm": 0.2760174572467804,
      "learning_rate": 0.00018495491416536478,
      "loss": 1.1707,
      "step": 371
    },
    {
      "epoch": 0.18133073360955398,
      "grad_norm": 0.33489930629730225,
      "learning_rate": 0.0001848736170886626,
      "loss": 1.1774,
      "step": 372
    },
    {
      "epoch": 0.18181818181818182,
      "grad_norm": 0.34386131167411804,
      "learning_rate": 0.00018479211892386474,
      "loss": 1.2538,
      "step": 373
    },
    {
      "epoch": 0.18230563002680966,
      "grad_norm": 0.37152737379074097,
      "learning_rate": 0.000184710419864062,
      "loss": 1.2242,
      "step": 374
    },
    {
      "epoch": 0.18279307823543747,
      "grad_norm": 0.29477307200431824,
      "learning_rate": 0.00018462852010282108,
      "loss": 1.3295,
      "step": 375
    },
    {
      "epoch": 0.1832805264440653,
      "grad_norm": 0.35536712408065796,
      "learning_rate": 0.00018454641983418427,
      "loss": 1.1886,
      "step": 376
    },
    {
      "epoch": 0.18376797465269315,
      "grad_norm": 0.30594703555107117,
      "learning_rate": 0.0001844641192526688,
      "loss": 1.0854,
      "step": 377
    },
    {
      "epoch": 0.184255422861321,
      "grad_norm": 0.27677103877067566,
      "learning_rate": 0.0001843816185532666,
      "loss": 1.1368,
      "step": 378
    },
    {
      "epoch": 0.18474287106994883,
      "grad_norm": 0.32766619324684143,
      "learning_rate": 0.00018429891793144375,
      "loss": 1.1257,
      "step": 379
    },
    {
      "epoch": 0.18523031927857664,
      "grad_norm": 0.35552579164505005,
      "learning_rate": 0.00018421601758313982,
      "loss": 1.2394,
      "step": 380
    },
    {
      "epoch": 0.18571776748720448,
      "grad_norm": 0.34337764978408813,
      "learning_rate": 0.0001841329177047678,
      "loss": 1.2604,
      "step": 381
    },
    {
      "epoch": 0.18620521569583232,
      "grad_norm": 0.2597070336341858,
      "learning_rate": 0.0001840496184932133,
      "loss": 1.1631,
      "step": 382
    },
    {
      "epoch": 0.18669266390446015,
      "grad_norm": 0.3200598359107971,
      "learning_rate": 0.00018396612014583423,
      "loss": 1.1131,
      "step": 383
    },
    {
      "epoch": 0.187180112113088,
      "grad_norm": 0.3345329761505127,
      "learning_rate": 0.0001838824228604603,
      "loss": 1.1062,
      "step": 384
    },
    {
      "epoch": 0.1876675603217158,
      "grad_norm": 0.3126945495605469,
      "learning_rate": 0.00018379852683539263,
      "loss": 1.1057,
      "step": 385
    },
    {
      "epoch": 0.18815500853034364,
      "grad_norm": 0.38702392578125,
      "learning_rate": 0.00018371443226940305,
      "loss": 1.2012,
      "step": 386
    },
    {
      "epoch": 0.18864245673897148,
      "grad_norm": 0.3303517997264862,
      "learning_rate": 0.00018363013936173393,
      "loss": 1.1981,
      "step": 387
    },
    {
      "epoch": 0.18912990494759932,
      "grad_norm": 0.3743020296096802,
      "learning_rate": 0.00018354564831209747,
      "loss": 1.251,
      "step": 388
    },
    {
      "epoch": 0.18961735315622716,
      "grad_norm": 0.31129929423332214,
      "learning_rate": 0.0001834609593206754,
      "loss": 1.1852,
      "step": 389
    },
    {
      "epoch": 0.190104801364855,
      "grad_norm": 0.31025078892707825,
      "learning_rate": 0.0001833760725881184,
      "loss": 1.1386,
      "step": 390
    },
    {
      "epoch": 0.1905922495734828,
      "grad_norm": 0.3130515515804291,
      "learning_rate": 0.00018329098831554557,
      "loss": 1.2102,
      "step": 391
    },
    {
      "epoch": 0.19107969778211065,
      "grad_norm": 0.28376305103302,
      "learning_rate": 0.0001832057067045442,
      "loss": 1.1814,
      "step": 392
    },
    {
      "epoch": 0.19156714599073849,
      "grad_norm": 0.32261911034584045,
      "learning_rate": 0.00018312022795716895,
      "loss": 1.3395,
      "step": 393
    },
    {
      "epoch": 0.19205459419936632,
      "grad_norm": 0.3088441491127014,
      "learning_rate": 0.00018303455227594166,
      "loss": 1.2452,
      "step": 394
    },
    {
      "epoch": 0.19254204240799416,
      "grad_norm": 0.35038337111473083,
      "learning_rate": 0.00018294867986385076,
      "loss": 1.182,
      "step": 395
    },
    {
      "epoch": 0.19302949061662197,
      "grad_norm": 0.3092940151691437,
      "learning_rate": 0.00018286261092435068,
      "loss": 1.3214,
      "step": 396
    },
    {
      "epoch": 0.1935169388252498,
      "grad_norm": 0.30770185589790344,
      "learning_rate": 0.00018277634566136165,
      "loss": 1.2192,
      "step": 397
    },
    {
      "epoch": 0.19400438703387765,
      "grad_norm": 0.3027122914791107,
      "learning_rate": 0.00018268988427926885,
      "loss": 1.1351,
      "step": 398
    },
    {
      "epoch": 0.1944918352425055,
      "grad_norm": 0.27010977268218994,
      "learning_rate": 0.00018260322698292224,
      "loss": 1.1809,
      "step": 399
    },
    {
      "epoch": 0.19497928345113333,
      "grad_norm": 0.33920818567276,
      "learning_rate": 0.00018251637397763597,
      "loss": 1.2581,
      "step": 400
    },
    {
      "epoch": 0.19546673165976114,
      "grad_norm": 0.25426357984542847,
      "learning_rate": 0.00018242932546918777,
      "loss": 1.1661,
      "step": 401
    },
    {
      "epoch": 0.19595417986838898,
      "grad_norm": 0.27038267254829407,
      "learning_rate": 0.0001823420816638187,
      "loss": 1.2815,
      "step": 402
    },
    {
      "epoch": 0.19644162807701682,
      "grad_norm": 0.37560659646987915,
      "learning_rate": 0.00018225464276823238,
      "loss": 1.2335,
      "step": 403
    },
    {
      "epoch": 0.19692907628564466,
      "grad_norm": 0.35653063654899597,
      "learning_rate": 0.00018216700898959477,
      "loss": 1.1732,
      "step": 404
    },
    {
      "epoch": 0.1974165244942725,
      "grad_norm": 0.3689841330051422,
      "learning_rate": 0.00018207918053553355,
      "loss": 1.2544,
      "step": 405
    },
    {
      "epoch": 0.1979039727029003,
      "grad_norm": 0.3493061065673828,
      "learning_rate": 0.00018199115761413753,
      "loss": 1.2338,
      "step": 406
    },
    {
      "epoch": 0.19839142091152814,
      "grad_norm": 0.42514804005622864,
      "learning_rate": 0.0001819029404339564,
      "loss": 1.2576,
      "step": 407
    },
    {
      "epoch": 0.19887886912015598,
      "grad_norm": 0.33240950107574463,
      "learning_rate": 0.00018181452920400007,
      "loss": 1.1487,
      "step": 408
    },
    {
      "epoch": 0.19936631732878382,
      "grad_norm": 0.37072715163230896,
      "learning_rate": 0.0001817259241337381,
      "loss": 1.3214,
      "step": 409
    },
    {
      "epoch": 0.19985376553741166,
      "grad_norm": 0.3414487838745117,
      "learning_rate": 0.00018163712543309944,
      "loss": 1.2432,
      "step": 410
    },
    {
      "epoch": 0.20034121374603947,
      "grad_norm": 0.34911903738975525,
      "learning_rate": 0.00018154813331247173,
      "loss": 1.2067,
      "step": 411
    },
    {
      "epoch": 0.2008286619546673,
      "grad_norm": 0.3342655301094055,
      "learning_rate": 0.00018145894798270092,
      "loss": 1.1907,
      "step": 412
    },
    {
      "epoch": 0.20131611016329515,
      "grad_norm": 0.32328155636787415,
      "learning_rate": 0.00018136956965509064,
      "loss": 1.2565,
      "step": 413
    },
    {
      "epoch": 0.201803558371923,
      "grad_norm": 0.33764177560806274,
      "learning_rate": 0.00018127999854140186,
      "loss": 1.186,
      "step": 414
    },
    {
      "epoch": 0.20229100658055083,
      "grad_norm": 0.3673051595687866,
      "learning_rate": 0.00018119023485385234,
      "loss": 1.1403,
      "step": 415
    },
    {
      "epoch": 0.20277845478917864,
      "grad_norm": 0.31863540410995483,
      "learning_rate": 0.000181100278805116,
      "loss": 1.0823,
      "step": 416
    },
    {
      "epoch": 0.20326590299780647,
      "grad_norm": 0.31400150060653687,
      "learning_rate": 0.00018101013060832255,
      "loss": 1.1745,
      "step": 417
    },
    {
      "epoch": 0.2037533512064343,
      "grad_norm": 0.3357588052749634,
      "learning_rate": 0.00018091979047705703,
      "loss": 1.3198,
      "step": 418
    },
    {
      "epoch": 0.20424079941506215,
      "grad_norm": 0.33937594294548035,
      "learning_rate": 0.00018082925862535908,
      "loss": 1.092,
      "step": 419
    },
    {
      "epoch": 0.20472824762369,
      "grad_norm": 0.34498557448387146,
      "learning_rate": 0.0001807385352677227,
      "loss": 1.2231,
      "step": 420
    },
    {
      "epoch": 0.20521569583231783,
      "grad_norm": 0.3494696021080017,
      "learning_rate": 0.00018064762061909554,
      "loss": 1.199,
      "step": 421
    },
    {
      "epoch": 0.20570314404094564,
      "grad_norm": 0.3133601248264313,
      "learning_rate": 0.00018055651489487853,
      "loss": 1.1441,
      "step": 422
    },
    {
      "epoch": 0.20619059224957348,
      "grad_norm": 0.3035077452659607,
      "learning_rate": 0.0001804652183109253,
      "loss": 1.1614,
      "step": 423
    },
    {
      "epoch": 0.20667804045820132,
      "grad_norm": 0.34320294857025146,
      "learning_rate": 0.0001803737310835416,
      "loss": 1.2356,
      "step": 424
    },
    {
      "epoch": 0.20716548866682916,
      "grad_norm": 0.38479337096214294,
      "learning_rate": 0.00018028205342948494,
      "loss": 1.1434,
      "step": 425
    },
    {
      "epoch": 0.207652936875457,
      "grad_norm": 0.30601903796195984,
      "learning_rate": 0.000180190185565964,
      "loss": 1.149,
      "step": 426
    },
    {
      "epoch": 0.2081403850840848,
      "grad_norm": 0.3483642637729645,
      "learning_rate": 0.00018009812771063808,
      "loss": 1.2152,
      "step": 427
    },
    {
      "epoch": 0.20862783329271264,
      "grad_norm": 0.3802853226661682,
      "learning_rate": 0.00018000588008161667,
      "loss": 1.2896,
      "step": 428
    },
    {
      "epoch": 0.20911528150134048,
      "grad_norm": 0.29257655143737793,
      "learning_rate": 0.0001799134428974588,
      "loss": 1.2532,
      "step": 429
    },
    {
      "epoch": 0.20960272970996832,
      "grad_norm": 0.3451422452926636,
      "learning_rate": 0.00017982081637717273,
      "loss": 1.3011,
      "step": 430
    },
    {
      "epoch": 0.21009017791859616,
      "grad_norm": 0.3802796006202698,
      "learning_rate": 0.00017972800074021516,
      "loss": 1.2363,
      "step": 431
    },
    {
      "epoch": 0.21057762612722397,
      "grad_norm": 0.31327179074287415,
      "learning_rate": 0.00017963499620649102,
      "loss": 1.0632,
      "step": 432
    },
    {
      "epoch": 0.2110650743358518,
      "grad_norm": 0.3250715732574463,
      "learning_rate": 0.00017954180299635265,
      "loss": 1.369,
      "step": 433
    },
    {
      "epoch": 0.21155252254447965,
      "grad_norm": 0.334495484828949,
      "learning_rate": 0.00017944842133059947,
      "loss": 1.2512,
      "step": 434
    },
    {
      "epoch": 0.2120399707531075,
      "grad_norm": 0.35229137539863586,
      "learning_rate": 0.0001793548514304774,
      "loss": 1.2477,
      "step": 435
    },
    {
      "epoch": 0.21252741896173533,
      "grad_norm": 0.3113347291946411,
      "learning_rate": 0.00017926109351767836,
      "loss": 1.1872,
      "step": 436
    },
    {
      "epoch": 0.21301486717036314,
      "grad_norm": 0.38488292694091797,
      "learning_rate": 0.00017916714781433964,
      "loss": 1.1569,
      "step": 437
    },
    {
      "epoch": 0.21350231537899098,
      "grad_norm": 0.3689945936203003,
      "learning_rate": 0.0001790730145430436,
      "loss": 1.2452,
      "step": 438
    },
    {
      "epoch": 0.21398976358761881,
      "grad_norm": 0.3782726526260376,
      "learning_rate": 0.00017897869392681685,
      "loss": 1.2962,
      "step": 439
    },
    {
      "epoch": 0.21447721179624665,
      "grad_norm": 0.31284990906715393,
      "learning_rate": 0.00017888418618912993,
      "loss": 1.3208,
      "step": 440
    },
    {
      "epoch": 0.2149646600048745,
      "grad_norm": 0.33481279015541077,
      "learning_rate": 0.00017878949155389676,
      "loss": 1.1801,
      "step": 441
    },
    {
      "epoch": 0.2154521082135023,
      "grad_norm": 0.3809351623058319,
      "learning_rate": 0.00017869461024547394,
      "loss": 1.2612,
      "step": 442
    },
    {
      "epoch": 0.21593955642213014,
      "grad_norm": 0.3469904661178589,
      "learning_rate": 0.00017859954248866056,
      "loss": 1.2604,
      "step": 443
    },
    {
      "epoch": 0.21642700463075798,
      "grad_norm": 0.3663221299648285,
      "learning_rate": 0.00017850428850869725,
      "loss": 1.1382,
      "step": 444
    },
    {
      "epoch": 0.21691445283938582,
      "grad_norm": 0.34318575263023376,
      "learning_rate": 0.000178408848531266,
      "loss": 1.1353,
      "step": 445
    },
    {
      "epoch": 0.21740190104801366,
      "grad_norm": 0.3427807688713074,
      "learning_rate": 0.00017831322278248935,
      "loss": 1.3791,
      "step": 446
    },
    {
      "epoch": 0.21788934925664147,
      "grad_norm": 0.27582523226737976,
      "learning_rate": 0.00017821741148893008,
      "loss": 1.273,
      "step": 447
    },
    {
      "epoch": 0.2183767974652693,
      "grad_norm": 0.3410693109035492,
      "learning_rate": 0.00017812141487759053,
      "loss": 1.1782,
      "step": 448
    },
    {
      "epoch": 0.21886424567389715,
      "grad_norm": 0.41272151470184326,
      "learning_rate": 0.00017802523317591212,
      "loss": 1.3831,
      "step": 449
    },
    {
      "epoch": 0.21935169388252498,
      "grad_norm": 0.35077860951423645,
      "learning_rate": 0.0001779288666117748,
      "loss": 1.1319,
      "step": 450
    },
    {
      "epoch": 0.21983914209115282,
      "grad_norm": 0.3441184461116791,
      "learning_rate": 0.0001778323154134965,
      "loss": 1.2284,
      "step": 451
    },
    {
      "epoch": 0.22032659029978066,
      "grad_norm": 0.2734520137310028,
      "learning_rate": 0.00017773557980983262,
      "loss": 1.1776,
      "step": 452
    },
    {
      "epoch": 0.22081403850840847,
      "grad_norm": 0.2958824038505554,
      "learning_rate": 0.0001776386600299754,
      "loss": 1.1105,
      "step": 453
    },
    {
      "epoch": 0.2213014867170363,
      "grad_norm": 0.3324015438556671,
      "learning_rate": 0.00017754155630355354,
      "loss": 1.1959,
      "step": 454
    },
    {
      "epoch": 0.22178893492566415,
      "grad_norm": 0.38719967007637024,
      "learning_rate": 0.00017744426886063145,
      "loss": 1.1342,
      "step": 455
    },
    {
      "epoch": 0.222276383134292,
      "grad_norm": 0.3017652928829193,
      "learning_rate": 0.00017734679793170895,
      "loss": 1.0807,
      "step": 456
    },
    {
      "epoch": 0.22276383134291983,
      "grad_norm": 0.32308661937713623,
      "learning_rate": 0.00017724914374772042,
      "loss": 1.2144,
      "step": 457
    },
    {
      "epoch": 0.22325127955154764,
      "grad_norm": 0.49217668175697327,
      "learning_rate": 0.00017715130654003452,
      "loss": 1.22,
      "step": 458
    },
    {
      "epoch": 0.22373872776017548,
      "grad_norm": 0.4496031701564789,
      "learning_rate": 0.00017705328654045362,
      "loss": 1.1971,
      "step": 459
    },
    {
      "epoch": 0.22422617596880332,
      "grad_norm": 0.35932832956314087,
      "learning_rate": 0.00017695508398121298,
      "loss": 1.2142,
      "step": 460
    },
    {
      "epoch": 0.22471362417743115,
      "grad_norm": 0.32042956352233887,
      "learning_rate": 0.00017685669909498053,
      "loss": 1.1094,
      "step": 461
    },
    {
      "epoch": 0.225201072386059,
      "grad_norm": 0.3304695785045624,
      "learning_rate": 0.00017675813211485614,
      "loss": 1.1935,
      "step": 462
    },
    {
      "epoch": 0.2256885205946868,
      "grad_norm": 0.33314943313598633,
      "learning_rate": 0.0001766593832743711,
      "loss": 1.2314,
      "step": 463
    },
    {
      "epoch": 0.22617596880331464,
      "grad_norm": 0.32802191376686096,
      "learning_rate": 0.0001765604528074876,
      "loss": 1.3842,
      "step": 464
    },
    {
      "epoch": 0.22666341701194248,
      "grad_norm": 0.39321258664131165,
      "learning_rate": 0.00017646134094859815,
      "loss": 1.2211,
      "step": 465
    },
    {
      "epoch": 0.22715086522057032,
      "grad_norm": 0.3253491222858429,
      "learning_rate": 0.00017636204793252498,
      "loss": 1.1863,
      "step": 466
    },
    {
      "epoch": 0.22763831342919816,
      "grad_norm": 0.3173609972000122,
      "learning_rate": 0.0001762625739945196,
      "loss": 1.1983,
      "step": 467
    },
    {
      "epoch": 0.22812576163782597,
      "grad_norm": 0.3017527163028717,
      "learning_rate": 0.00017616291937026213,
      "loss": 1.1544,
      "step": 468
    },
    {
      "epoch": 0.2286132098464538,
      "grad_norm": 0.2875792682170868,
      "learning_rate": 0.00017606308429586078,
      "loss": 1.1805,
      "step": 469
    },
    {
      "epoch": 0.22910065805508165,
      "grad_norm": 0.37087053060531616,
      "learning_rate": 0.0001759630690078513,
      "loss": 1.1787,
      "step": 470
    },
    {
      "epoch": 0.22958810626370948,
      "grad_norm": 0.3738652169704437,
      "learning_rate": 0.00017586287374319644,
      "loss": 1.2943,
      "step": 471
    },
    {
      "epoch": 0.23007555447233732,
      "grad_norm": 0.34178316593170166,
      "learning_rate": 0.0001757624987392853,
      "loss": 1.1116,
      "step": 472
    },
    {
      "epoch": 0.23056300268096513,
      "grad_norm": 0.3692106306552887,
      "learning_rate": 0.0001756619442339329,
      "loss": 1.2682,
      "step": 473
    },
    {
      "epoch": 0.23105045088959297,
      "grad_norm": 0.3724597990512848,
      "learning_rate": 0.00017556121046537947,
      "loss": 1.2064,
      "step": 474
    },
    {
      "epoch": 0.2315378990982208,
      "grad_norm": 0.35810819268226624,
      "learning_rate": 0.00017546029767229011,
      "loss": 1.1982,
      "step": 475
    },
    {
      "epoch": 0.23202534730684865,
      "grad_norm": 0.3127930760383606,
      "learning_rate": 0.0001753592060937539,
      "loss": 1.2766,
      "step": 476
    },
    {
      "epoch": 0.2325127955154765,
      "grad_norm": 0.29817426204681396,
      "learning_rate": 0.00017525793596928356,
      "loss": 1.0259,
      "step": 477
    },
    {
      "epoch": 0.2330002437241043,
      "grad_norm": 0.4083329439163208,
      "learning_rate": 0.00017515648753881492,
      "loss": 1.2894,
      "step": 478
    },
    {
      "epoch": 0.23348769193273214,
      "grad_norm": 0.3159523904323578,
      "learning_rate": 0.00017505486104270616,
      "loss": 1.0282,
      "step": 479
    },
    {
      "epoch": 0.23397514014135998,
      "grad_norm": 0.3796326518058777,
      "learning_rate": 0.00017495305672173746,
      "loss": 1.278,
      "step": 480
    },
    {
      "epoch": 0.23446258834998782,
      "grad_norm": 0.3640994727611542,
      "learning_rate": 0.00017485107481711012,
      "loss": 1.224,
      "step": 481
    },
    {
      "epoch": 0.23495003655861565,
      "grad_norm": 0.36146458983421326,
      "learning_rate": 0.0001747489155704464,
      "loss": 1.2988,
      "step": 482
    },
    {
      "epoch": 0.2354374847672435,
      "grad_norm": 0.361672967672348,
      "learning_rate": 0.00017464657922378857,
      "loss": 1.2746,
      "step": 483
    },
    {
      "epoch": 0.2359249329758713,
      "grad_norm": 0.3305426239967346,
      "learning_rate": 0.00017454406601959862,
      "loss": 1.1935,
      "step": 484
    },
    {
      "epoch": 0.23641238118449914,
      "grad_norm": 0.3048730194568634,
      "learning_rate": 0.00017444137620075748,
      "loss": 1.2769,
      "step": 485
    },
    {
      "epoch": 0.23689982939312698,
      "grad_norm": 0.3524467647075653,
      "learning_rate": 0.00017433851001056453,
      "loss": 1.2119,
      "step": 486
    },
    {
      "epoch": 0.23738727760175482,
      "grad_norm": 0.36313095688819885,
      "learning_rate": 0.00017423546769273707,
      "loss": 1.1705,
      "step": 487
    },
    {
      "epoch": 0.23787472581038266,
      "grad_norm": 0.3175797164440155,
      "learning_rate": 0.00017413224949140962,
      "loss": 1.2086,
      "step": 488
    },
    {
      "epoch": 0.23836217401901047,
      "grad_norm": 0.382396936416626,
      "learning_rate": 0.00017402885565113353,
      "loss": 1.1239,
      "step": 489
    },
    {
      "epoch": 0.2388496222276383,
      "grad_norm": 0.4053979814052582,
      "learning_rate": 0.00017392528641687615,
      "loss": 1.242,
      "step": 490
    },
    {
      "epoch": 0.23933707043626615,
      "grad_norm": 0.36382076144218445,
      "learning_rate": 0.00017382154203402048,
      "loss": 1.2455,
      "step": 491
    },
    {
      "epoch": 0.23982451864489399,
      "grad_norm": 0.29931992292404175,
      "learning_rate": 0.00017371762274836442,
      "loss": 1.1521,
      "step": 492
    },
    {
      "epoch": 0.24031196685352182,
      "grad_norm": 0.30255070328712463,
      "learning_rate": 0.00017361352880612037,
      "loss": 1.0897,
      "step": 493
    },
    {
      "epoch": 0.24079941506214964,
      "grad_norm": 0.3097134828567505,
      "learning_rate": 0.0001735092604539144,
      "loss": 1.2051,
      "step": 494
    },
    {
      "epoch": 0.24128686327077747,
      "grad_norm": 0.27947649359703064,
      "learning_rate": 0.00017340481793878592,
      "loss": 1.1624,
      "step": 495
    },
    {
      "epoch": 0.2417743114794053,
      "grad_norm": 0.3660297691822052,
      "learning_rate": 0.00017330020150818697,
      "loss": 1.2286,
      "step": 496
    },
    {
      "epoch": 0.24226175968803315,
      "grad_norm": 0.2641275227069855,
      "learning_rate": 0.0001731954114099815,
      "loss": 1.1223,
      "step": 497
    },
    {
      "epoch": 0.242749207896661,
      "grad_norm": 0.2959223687648773,
      "learning_rate": 0.0001730904478924451,
      "loss": 1.3362,
      "step": 498
    },
    {
      "epoch": 0.2432366561052888,
      "grad_norm": 0.2990069091320038,
      "learning_rate": 0.00017298531120426414,
      "loss": 1.259,
      "step": 499
    },
    {
      "epoch": 0.24372410431391664,
      "grad_norm": 0.40709275007247925,
      "learning_rate": 0.0001728800015945353,
      "loss": 1.2636,
      "step": 500
    },
    {
      "epoch": 0.24421155252254448,
      "grad_norm": 0.3209191560745239,
      "learning_rate": 0.00017277451931276496,
      "loss": 1.195,
      "step": 501
    },
    {
      "epoch": 0.24469900073117232,
      "grad_norm": 0.42993372678756714,
      "learning_rate": 0.00017266886460886864,
      "loss": 1.2563,
      "step": 502
    },
    {
      "epoch": 0.24518644893980016,
      "grad_norm": 0.3334101140499115,
      "learning_rate": 0.0001725630377331703,
      "loss": 1.102,
      "step": 503
    },
    {
      "epoch": 0.24567389714842797,
      "grad_norm": 0.3589898943901062,
      "learning_rate": 0.0001724570389364019,
      "loss": 1.225,
      "step": 504
    },
    {
      "epoch": 0.2461613453570558,
      "grad_norm": 0.37592238187789917,
      "learning_rate": 0.00017235086846970264,
      "loss": 1.1681,
      "step": 505
    },
    {
      "epoch": 0.24664879356568364,
      "grad_norm": 0.36380186676979065,
      "learning_rate": 0.0001722445265846185,
      "loss": 1.0927,
      "step": 506
    },
    {
      "epoch": 0.24713624177431148,
      "grad_norm": 0.3600970506668091,
      "learning_rate": 0.00017213801353310163,
      "loss": 1.2181,
      "step": 507
    },
    {
      "epoch": 0.24762368998293932,
      "grad_norm": 0.29900628328323364,
      "learning_rate": 0.00017203132956750963,
      "loss": 1.1451,
      "step": 508
    },
    {
      "epoch": 0.24811113819156713,
      "grad_norm": 0.3672201931476593,
      "learning_rate": 0.00017192447494060513,
      "loss": 1.2164,
      "step": 509
    },
    {
      "epoch": 0.24859858640019497,
      "grad_norm": 0.3158378601074219,
      "learning_rate": 0.00017181744990555506,
      "loss": 1.1199,
      "step": 510
    },
    {
      "epoch": 0.2490860346088228,
      "grad_norm": 0.3321176767349243,
      "learning_rate": 0.0001717102547159301,
      "loss": 1.2702,
      "step": 511
    },
    {
      "epoch": 0.24957348281745065,
      "grad_norm": 0.3186010420322418,
      "learning_rate": 0.00017160288962570402,
      "loss": 1.1354,
      "step": 512
    },
    {
      "epoch": 0.25006093102607846,
      "grad_norm": 0.3101379871368408,
      "learning_rate": 0.0001714953548892533,
      "loss": 1.1087,
      "step": 513
    },
    {
      "epoch": 0.2505483792347063,
      "grad_norm": 0.3541036546230316,
      "learning_rate": 0.00017138765076135608,
      "loss": 1.2284,
      "step": 514
    },
    {
      "epoch": 0.25103582744333414,
      "grad_norm": 0.30092599987983704,
      "learning_rate": 0.00017127977749719207,
      "loss": 1.16,
      "step": 515
    },
    {
      "epoch": 0.251523275651962,
      "grad_norm": 0.3944350481033325,
      "learning_rate": 0.00017117173535234162,
      "loss": 1.2925,
      "step": 516
    },
    {
      "epoch": 0.2520107238605898,
      "grad_norm": 0.33906981348991394,
      "learning_rate": 0.00017106352458278522,
      "loss": 1.1652,
      "step": 517
    },
    {
      "epoch": 0.25249817206921765,
      "grad_norm": 0.3461517095565796,
      "learning_rate": 0.00017095514544490282,
      "loss": 1.0418,
      "step": 518
    },
    {
      "epoch": 0.2529856202778455,
      "grad_norm": 0.3587675988674164,
      "learning_rate": 0.00017084659819547338,
      "loss": 1.2534,
      "step": 519
    },
    {
      "epoch": 0.25347306848647333,
      "grad_norm": 0.32428374886512756,
      "learning_rate": 0.00017073788309167408,
      "loss": 1.16,
      "step": 520
    },
    {
      "epoch": 0.25396051669510117,
      "grad_norm": 0.3599848449230194,
      "learning_rate": 0.00017062900039107977,
      "loss": 1.1422,
      "step": 521
    },
    {
      "epoch": 0.25444796490372895,
      "grad_norm": 0.3633671700954437,
      "learning_rate": 0.00017051995035166252,
      "loss": 1.1323,
      "step": 522
    },
    {
      "epoch": 0.2549354131123568,
      "grad_norm": 0.38934600353240967,
      "learning_rate": 0.0001704107332317907,
      "loss": 1.2117,
      "step": 523
    },
    {
      "epoch": 0.25542286132098463,
      "grad_norm": 0.3670594096183777,
      "learning_rate": 0.0001703013492902287,
      "loss": 1.1839,
      "step": 524
    },
    {
      "epoch": 0.25591030952961247,
      "grad_norm": 0.40726083517074585,
      "learning_rate": 0.00017019179878613596,
      "loss": 1.2079,
      "step": 525
    },
    {
      "epoch": 0.2563977577382403,
      "grad_norm": 0.32286155223846436,
      "learning_rate": 0.00017008208197906674,
      "loss": 1.3053,
      "step": 526
    },
    {
      "epoch": 0.25688520594686814,
      "grad_norm": 0.33716845512390137,
      "learning_rate": 0.00016997219912896923,
      "loss": 1.3214,
      "step": 527
    },
    {
      "epoch": 0.257372654155496,
      "grad_norm": 0.36912742257118225,
      "learning_rate": 0.00016986215049618505,
      "loss": 1.2605,
      "step": 528
    },
    {
      "epoch": 0.2578601023641238,
      "grad_norm": 0.4095950424671173,
      "learning_rate": 0.0001697519363414485,
      "loss": 1.276,
      "step": 529
    },
    {
      "epoch": 0.25834755057275166,
      "grad_norm": 0.4105255603790283,
      "learning_rate": 0.00016964155692588616,
      "loss": 1.231,
      "step": 530
    },
    {
      "epoch": 0.2588349987813795,
      "grad_norm": 0.3191453516483307,
      "learning_rate": 0.00016953101251101618,
      "loss": 1.1161,
      "step": 531
    },
    {
      "epoch": 0.25932244699000734,
      "grad_norm": 0.3601817488670349,
      "learning_rate": 0.00016942030335874753,
      "loss": 1.2283,
      "step": 532
    },
    {
      "epoch": 0.2598098951986351,
      "grad_norm": 0.3143230378627777,
      "learning_rate": 0.00016930942973137952,
      "loss": 1.1582,
      "step": 533
    },
    {
      "epoch": 0.26029734340726296,
      "grad_norm": 0.3640122711658478,
      "learning_rate": 0.00016919839189160122,
      "loss": 1.1361,
      "step": 534
    },
    {
      "epoch": 0.2607847916158908,
      "grad_norm": 0.3497964143753052,
      "learning_rate": 0.00016908719010249064,
      "loss": 1.1434,
      "step": 535
    },
    {
      "epoch": 0.26127223982451864,
      "grad_norm": 0.3731510639190674,
      "learning_rate": 0.00016897582462751433,
      "loss": 1.2552,
      "step": 536
    },
    {
      "epoch": 0.2617596880331465,
      "grad_norm": 0.3403242826461792,
      "learning_rate": 0.00016886429573052664,
      "loss": 1.2285,
      "step": 537
    },
    {
      "epoch": 0.2622471362417743,
      "grad_norm": 0.33967721462249756,
      "learning_rate": 0.00016875260367576903,
      "loss": 1.1403,
      "step": 538
    },
    {
      "epoch": 0.26273458445040215,
      "grad_norm": 0.33196625113487244,
      "learning_rate": 0.00016864074872786962,
      "loss": 1.0713,
      "step": 539
    },
    {
      "epoch": 0.26322203265903,
      "grad_norm": 0.3464784622192383,
      "learning_rate": 0.00016852873115184242,
      "loss": 0.9796,
      "step": 540
    },
    {
      "epoch": 0.26370948086765783,
      "grad_norm": 0.4036043882369995,
      "learning_rate": 0.00016841655121308677,
      "loss": 1.3277,
      "step": 541
    },
    {
      "epoch": 0.26419692907628567,
      "grad_norm": 0.3961755335330963,
      "learning_rate": 0.00016830420917738668,
      "loss": 1.3286,
      "step": 542
    },
    {
      "epoch": 0.26468437728491345,
      "grad_norm": 0.29868268966674805,
      "learning_rate": 0.00016819170531091017,
      "loss": 1.1358,
      "step": 543
    },
    {
      "epoch": 0.2651718254935413,
      "grad_norm": 0.3506234884262085,
      "learning_rate": 0.0001680790398802088,
      "loss": 1.098,
      "step": 544
    },
    {
      "epoch": 0.26565927370216913,
      "grad_norm": 0.3245921730995178,
      "learning_rate": 0.00016796621315221677,
      "loss": 1.3453,
      "step": 545
    },
    {
      "epoch": 0.26614672191079697,
      "grad_norm": 0.37823474407196045,
      "learning_rate": 0.00016785322539425053,
      "loss": 1.1506,
      "step": 546
    },
    {
      "epoch": 0.2666341701194248,
      "grad_norm": 0.3234574794769287,
      "learning_rate": 0.00016774007687400802,
      "loss": 1.1525,
      "step": 547
    },
    {
      "epoch": 0.26712161832805265,
      "grad_norm": 0.33606481552124023,
      "learning_rate": 0.00016762676785956807,
      "loss": 1.2782,
      "step": 548
    },
    {
      "epoch": 0.2676090665366805,
      "grad_norm": 0.3780474364757538,
      "learning_rate": 0.0001675132986193898,
      "loss": 1.1686,
      "step": 549
    },
    {
      "epoch": 0.2680965147453083,
      "grad_norm": 0.34344884753227234,
      "learning_rate": 0.0001673996694223119,
      "loss": 1.2374,
      "step": 550
    },
    {
      "epoch": 0.26858396295393616,
      "grad_norm": 0.3959074914455414,
      "learning_rate": 0.00016728588053755203,
      "loss": 1.2054,
      "step": 551
    },
    {
      "epoch": 0.269071411162564,
      "grad_norm": 0.4201538562774658,
      "learning_rate": 0.00016717193223470623,
      "loss": 1.1499,
      "step": 552
    },
    {
      "epoch": 0.2695588593711918,
      "grad_norm": 0.37865114212036133,
      "learning_rate": 0.00016705782478374827,
      "loss": 1.2507,
      "step": 553
    },
    {
      "epoch": 0.2700463075798196,
      "grad_norm": 0.3289918601512909,
      "learning_rate": 0.0001669435584550289,
      "loss": 1.2065,
      "step": 554
    },
    {
      "epoch": 0.27053375578844746,
      "grad_norm": 0.3060459494590759,
      "learning_rate": 0.0001668291335192754,
      "loss": 1.1876,
      "step": 555
    },
    {
      "epoch": 0.2710212039970753,
      "grad_norm": 0.3648618161678314,
      "learning_rate": 0.00016671455024759067,
      "loss": 1.1132,
      "step": 556
    },
    {
      "epoch": 0.27150865220570314,
      "grad_norm": 0.35501593351364136,
      "learning_rate": 0.00016659980891145298,
      "loss": 1.1747,
      "step": 557
    },
    {
      "epoch": 0.271996100414331,
      "grad_norm": 0.31883859634399414,
      "learning_rate": 0.0001664849097827149,
      "loss": 1.217,
      "step": 558
    },
    {
      "epoch": 0.2724835486229588,
      "grad_norm": 0.27165141701698303,
      "learning_rate": 0.00016636985313360289,
      "loss": 1.1628,
      "step": 559
    },
    {
      "epoch": 0.27297099683158665,
      "grad_norm": 0.3059232831001282,
      "learning_rate": 0.00016625463923671668,
      "loss": 1.043,
      "step": 560
    },
    {
      "epoch": 0.2734584450402145,
      "grad_norm": 0.35888320207595825,
      "learning_rate": 0.0001661392683650286,
      "loss": 1.1712,
      "step": 561
    },
    {
      "epoch": 0.27394589324884233,
      "grad_norm": 0.3877924978733063,
      "learning_rate": 0.0001660237407918827,
      "loss": 1.1422,
      "step": 562
    },
    {
      "epoch": 0.27443334145747017,
      "grad_norm": 0.3447536528110504,
      "learning_rate": 0.0001659080567909945,
      "loss": 1.0769,
      "step": 563
    },
    {
      "epoch": 0.27492078966609795,
      "grad_norm": 0.3665088415145874,
      "learning_rate": 0.00016579221663645007,
      "loss": 1.1923,
      "step": 564
    },
    {
      "epoch": 0.2754082378747258,
      "grad_norm": 0.323263943195343,
      "learning_rate": 0.0001656762206027054,
      "loss": 1.2303,
      "step": 565
    },
    {
      "epoch": 0.27589568608335363,
      "grad_norm": 0.3286653757095337,
      "learning_rate": 0.0001655600689645858,
      "loss": 1.3282,
      "step": 566
    },
    {
      "epoch": 0.27638313429198147,
      "grad_norm": 0.30817481875419617,
      "learning_rate": 0.00016544376199728543,
      "loss": 1.2157,
      "step": 567
    },
    {
      "epoch": 0.2768705825006093,
      "grad_norm": 0.31664392352104187,
      "learning_rate": 0.0001653272999763662,
      "loss": 1.1781,
      "step": 568
    },
    {
      "epoch": 0.27735803070923715,
      "grad_norm": 0.3369152247905731,
      "learning_rate": 0.00016521068317775753,
      "loss": 1.2071,
      "step": 569
    },
    {
      "epoch": 0.277845478917865,
      "grad_norm": 0.4035291075706482,
      "learning_rate": 0.00016509391187775557,
      "loss": 1.1537,
      "step": 570
    },
    {
      "epoch": 0.2783329271264928,
      "grad_norm": 0.2857488691806793,
      "learning_rate": 0.00016497698635302243,
      "loss": 1.1335,
      "step": 571
    },
    {
      "epoch": 0.27882037533512066,
      "grad_norm": 0.3045223653316498,
      "learning_rate": 0.0001648599068805857,
      "loss": 1.2276,
      "step": 572
    },
    {
      "epoch": 0.2793078235437485,
      "grad_norm": 0.3673149347305298,
      "learning_rate": 0.00016474267373783768,
      "loss": 1.3442,
      "step": 573
    },
    {
      "epoch": 0.2797952717523763,
      "grad_norm": 0.464558482170105,
      "learning_rate": 0.00016462528720253482,
      "loss": 1.0838,
      "step": 574
    },
    {
      "epoch": 0.2802827199610041,
      "grad_norm": 0.40606024861335754,
      "learning_rate": 0.00016450774755279678,
      "loss": 1.266,
      "step": 575
    },
    {
      "epoch": 0.28077016816963196,
      "grad_norm": 0.3047773241996765,
      "learning_rate": 0.0001643900550671063,
      "loss": 1.125,
      "step": 576
    },
    {
      "epoch": 0.2812576163782598,
      "grad_norm": 0.30575650930404663,
      "learning_rate": 0.00016427221002430798,
      "loss": 1.0903,
      "step": 577
    },
    {
      "epoch": 0.28174506458688764,
      "grad_norm": 0.3327069580554962,
      "learning_rate": 0.000164154212703608,
      "loss": 1.0927,
      "step": 578
    },
    {
      "epoch": 0.2822325127955155,
      "grad_norm": 0.3694591522216797,
      "learning_rate": 0.0001640360633845733,
      "loss": 1.2099,
      "step": 579
    },
    {
      "epoch": 0.2827199610041433,
      "grad_norm": 0.4236904978752136,
      "learning_rate": 0.0001639177623471309,
      "loss": 1.2688,
      "step": 580
    },
    {
      "epoch": 0.28320740921277116,
      "grad_norm": 0.4387606978416443,
      "learning_rate": 0.00016379930987156735,
      "loss": 1.2561,
      "step": 581
    },
    {
      "epoch": 0.283694857421399,
      "grad_norm": 0.32503339648246765,
      "learning_rate": 0.00016368070623852792,
      "loss": 1.2589,
      "step": 582
    },
    {
      "epoch": 0.28418230563002683,
      "grad_norm": 0.3146470785140991,
      "learning_rate": 0.00016356195172901613,
      "loss": 1.151,
      "step": 583
    },
    {
      "epoch": 0.2846697538386546,
      "grad_norm": 0.28814375400543213,
      "learning_rate": 0.00016344304662439284,
      "loss": 1.1437,
      "step": 584
    },
    {
      "epoch": 0.28515720204728245,
      "grad_norm": 0.342899888753891,
      "learning_rate": 0.0001633239912063757,
      "loss": 1.1183,
      "step": 585
    },
    {
      "epoch": 0.2856446502559103,
      "grad_norm": 0.40479201078414917,
      "learning_rate": 0.00016320478575703864,
      "loss": 1.15,
      "step": 586
    },
    {
      "epoch": 0.28613209846453813,
      "grad_norm": 0.4515096843242645,
      "learning_rate": 0.00016308543055881098,
      "loss": 1.1598,
      "step": 587
    },
    {
      "epoch": 0.28661954667316597,
      "grad_norm": 0.35229426622390747,
      "learning_rate": 0.00016296592589447673,
      "loss": 1.3097,
      "step": 588
    },
    {
      "epoch": 0.2871069948817938,
      "grad_norm": 0.3666463792324066,
      "learning_rate": 0.00016284627204717417,
      "loss": 1.2794,
      "step": 589
    },
    {
      "epoch": 0.28759444309042165,
      "grad_norm": 0.30023884773254395,
      "learning_rate": 0.000162726469300395,
      "loss": 1.1387,
      "step": 590
    },
    {
      "epoch": 0.2880818912990495,
      "grad_norm": 0.3334953784942627,
      "learning_rate": 0.00016260651793798366,
      "loss": 1.1033,
      "step": 591
    },
    {
      "epoch": 0.2885693395076773,
      "grad_norm": 0.3084891736507416,
      "learning_rate": 0.00016248641824413671,
      "loss": 1.2619,
      "step": 592
    },
    {
      "epoch": 0.28905678771630516,
      "grad_norm": 0.35241594910621643,
      "learning_rate": 0.00016236617050340214,
      "loss": 1.1398,
      "step": 593
    },
    {
      "epoch": 0.289544235924933,
      "grad_norm": 0.33038970828056335,
      "learning_rate": 0.00016224577500067877,
      "loss": 1.128,
      "step": 594
    },
    {
      "epoch": 0.2900316841335608,
      "grad_norm": 0.3409155607223511,
      "learning_rate": 0.00016212523202121544,
      "loss": 1.0931,
      "step": 595
    },
    {
      "epoch": 0.2905191323421886,
      "grad_norm": 0.3841816782951355,
      "learning_rate": 0.00016200454185061043,
      "loss": 1.0484,
      "step": 596
    },
    {
      "epoch": 0.29100658055081646,
      "grad_norm": 0.3630054295063019,
      "learning_rate": 0.00016188370477481073,
      "loss": 1.2399,
      "step": 597
    },
    {
      "epoch": 0.2914940287594443,
      "grad_norm": 0.35175657272338867,
      "learning_rate": 0.00016176272108011142,
      "loss": 1.1994,
      "step": 598
    },
    {
      "epoch": 0.29198147696807214,
      "grad_norm": 0.2972811460494995,
      "learning_rate": 0.00016164159105315487,
      "loss": 1.0144,
      "step": 599
    },
    {
      "epoch": 0.2924689251767,
      "grad_norm": 0.32659175992012024,
      "learning_rate": 0.0001615203149809303,
      "loss": 1.1463,
      "step": 600
    },
    {
      "epoch": 0.2929563733853278,
      "grad_norm": 0.3672257363796234,
      "learning_rate": 0.00016139889315077287,
      "loss": 1.0791,
      "step": 601
    },
    {
      "epoch": 0.29344382159395566,
      "grad_norm": 0.3619696795940399,
      "learning_rate": 0.00016127732585036303,
      "loss": 1.2297,
      "step": 602
    },
    {
      "epoch": 0.2939312698025835,
      "grad_norm": 0.34306833148002625,
      "learning_rate": 0.00016115561336772598,
      "loss": 1.1338,
      "step": 603
    },
    {
      "epoch": 0.29441871801121133,
      "grad_norm": 0.3267507553100586,
      "learning_rate": 0.00016103375599123084,
      "loss": 1.0824,
      "step": 604
    },
    {
      "epoch": 0.2949061662198391,
      "grad_norm": 0.3431141972541809,
      "learning_rate": 0.00016091175400959005,
      "loss": 1.181,
      "step": 605
    },
    {
      "epoch": 0.29539361442846696,
      "grad_norm": 0.4020690619945526,
      "learning_rate": 0.00016078960771185856,
      "loss": 1.1829,
      "step": 606
    },
    {
      "epoch": 0.2958810626370948,
      "grad_norm": 0.32137131690979004,
      "learning_rate": 0.00016066731738743344,
      "loss": 1.1541,
      "step": 607
    },
    {
      "epoch": 0.29636851084572263,
      "grad_norm": 0.39603304862976074,
      "learning_rate": 0.00016054488332605283,
      "loss": 1.0096,
      "step": 608
    },
    {
      "epoch": 0.29685595905435047,
      "grad_norm": 0.3227417767047882,
      "learning_rate": 0.00016042230581779547,
      "loss": 1.2699,
      "step": 609
    },
    {
      "epoch": 0.2973434072629783,
      "grad_norm": 0.391846626996994,
      "learning_rate": 0.00016029958515307997,
      "loss": 1.219,
      "step": 610
    },
    {
      "epoch": 0.29783085547160615,
      "grad_norm": 0.33323460817337036,
      "learning_rate": 0.00016017672162266412,
      "loss": 1.1751,
      "step": 611
    },
    {
      "epoch": 0.298318303680234,
      "grad_norm": 0.3665689527988434,
      "learning_rate": 0.0001600537155176442,
      "loss": 1.134,
      "step": 612
    },
    {
      "epoch": 0.2988057518888618,
      "grad_norm": 0.4079165458679199,
      "learning_rate": 0.00015993056712945423,
      "loss": 1.051,
      "step": 613
    },
    {
      "epoch": 0.29929320009748966,
      "grad_norm": 0.4210244417190552,
      "learning_rate": 0.00015980727674986547,
      "loss": 1.3286,
      "step": 614
    },
    {
      "epoch": 0.29978064830611745,
      "grad_norm": 0.35658687353134155,
      "learning_rate": 0.0001596838446709854,
      "loss": 1.1324,
      "step": 615
    },
    {
      "epoch": 0.3002680965147453,
      "grad_norm": 0.30646565556526184,
      "learning_rate": 0.00015956027118525743,
      "loss": 1.1269,
      "step": 616
    },
    {
      "epoch": 0.3007555447233731,
      "grad_norm": 0.4578656852245331,
      "learning_rate": 0.00015943655658545987,
      "loss": 1.505,
      "step": 617
    },
    {
      "epoch": 0.30124299293200096,
      "grad_norm": 0.3560977876186371,
      "learning_rate": 0.00015931270116470537,
      "loss": 1.0863,
      "step": 618
    },
    {
      "epoch": 0.3017304411406288,
      "grad_norm": 0.3431141972541809,
      "learning_rate": 0.00015918870521644025,
      "loss": 1.1088,
      "step": 619
    },
    {
      "epoch": 0.30221788934925664,
      "grad_norm": 0.364183634519577,
      "learning_rate": 0.00015906456903444385,
      "loss": 1.168,
      "step": 620
    },
    {
      "epoch": 0.3027053375578845,
      "grad_norm": 0.33767372369766235,
      "learning_rate": 0.00015894029291282758,
      "loss": 1.1933,
      "step": 621
    },
    {
      "epoch": 0.3031927857665123,
      "grad_norm": 0.3631367087364197,
      "learning_rate": 0.00015881587714603463,
      "loss": 1.0731,
      "step": 622
    },
    {
      "epoch": 0.30368023397514016,
      "grad_norm": 0.3278936445713043,
      "learning_rate": 0.00015869132202883885,
      "loss": 1.169,
      "step": 623
    },
    {
      "epoch": 0.304167682183768,
      "grad_norm": 0.38841626048088074,
      "learning_rate": 0.00015856662785634432,
      "loss": 1.2075,
      "step": 624
    },
    {
      "epoch": 0.30465513039239583,
      "grad_norm": 0.3402353823184967,
      "learning_rate": 0.00015844179492398465,
      "loss": 1.1615,
      "step": 625
    },
    {
      "epoch": 0.3051425786010236,
      "grad_norm": 0.2887095510959625,
      "learning_rate": 0.0001583168235275221,
      "loss": 1.1755,
      "step": 626
    },
    {
      "epoch": 0.30563002680965146,
      "grad_norm": 0.327481746673584,
      "learning_rate": 0.00015819171396304704,
      "loss": 1.0832,
      "step": 627
    },
    {
      "epoch": 0.3061174750182793,
      "grad_norm": 0.33721551299095154,
      "learning_rate": 0.00015806646652697718,
      "loss": 1.1645,
      "step": 628
    },
    {
      "epoch": 0.30660492322690713,
      "grad_norm": 0.36406847834587097,
      "learning_rate": 0.00015794108151605696,
      "loss": 1.2106,
      "step": 629
    },
    {
      "epoch": 0.30709237143553497,
      "grad_norm": 0.3128565847873688,
      "learning_rate": 0.0001578155592273566,
      "loss": 1.2149,
      "step": 630
    },
    {
      "epoch": 0.3075798196441628,
      "grad_norm": 0.31122851371765137,
      "learning_rate": 0.00015768989995827175,
      "loss": 1.2292,
      "step": 631
    },
    {
      "epoch": 0.30806726785279065,
      "grad_norm": 0.37615668773651123,
      "learning_rate": 0.00015756410400652248,
      "loss": 1.3363,
      "step": 632
    },
    {
      "epoch": 0.3085547160614185,
      "grad_norm": 0.3348335027694702,
      "learning_rate": 0.0001574381716701528,
      "loss": 1.1649,
      "step": 633
    },
    {
      "epoch": 0.3090421642700463,
      "grad_norm": 0.3323783278465271,
      "learning_rate": 0.00015731210324752972,
      "loss": 1.1069,
      "step": 634
    },
    {
      "epoch": 0.30952961247867417,
      "grad_norm": 0.37065181136131287,
      "learning_rate": 0.00015718589903734282,
      "loss": 1.2328,
      "step": 635
    },
    {
      "epoch": 0.31001706068730195,
      "grad_norm": 0.3175657391548157,
      "learning_rate": 0.0001570595593386033,
      "loss": 1.0801,
      "step": 636
    },
    {
      "epoch": 0.3105045088959298,
      "grad_norm": 0.30346453189849854,
      "learning_rate": 0.00015693308445064336,
      "loss": 1.1101,
      "step": 637
    },
    {
      "epoch": 0.3109919571045576,
      "grad_norm": 0.31637394428253174,
      "learning_rate": 0.00015680647467311557,
      "loss": 1.1573,
      "step": 638
    },
    {
      "epoch": 0.31147940531318546,
      "grad_norm": 0.2695266306400299,
      "learning_rate": 0.00015667973030599207,
      "loss": 1.0157,
      "step": 639
    },
    {
      "epoch": 0.3119668535218133,
      "grad_norm": 0.34872207045555115,
      "learning_rate": 0.00015655285164956385,
      "loss": 1.2239,
      "step": 640
    },
    {
      "epoch": 0.31245430173044114,
      "grad_norm": 0.3599780797958374,
      "learning_rate": 0.00015642583900444,
      "loss": 1.145,
      "step": 641
    },
    {
      "epoch": 0.312941749939069,
      "grad_norm": 0.3584708869457245,
      "learning_rate": 0.00015629869267154726,
      "loss": 1.3491,
      "step": 642
    },
    {
      "epoch": 0.3134291981476968,
      "grad_norm": 0.3707854449748993,
      "learning_rate": 0.0001561714129521289,
      "loss": 1.1459,
      "step": 643
    },
    {
      "epoch": 0.31391664635632466,
      "grad_norm": 0.3733751177787781,
      "learning_rate": 0.00015604400014774443,
      "loss": 1.2256,
      "step": 644
    },
    {
      "epoch": 0.3144040945649525,
      "grad_norm": 0.39851659536361694,
      "learning_rate": 0.0001559164545602684,
      "loss": 1.1227,
      "step": 645
    },
    {
      "epoch": 0.3148915427735803,
      "grad_norm": 0.3187284469604492,
      "learning_rate": 0.0001557887764918902,
      "loss": 1.1991,
      "step": 646
    },
    {
      "epoch": 0.3153789909822081,
      "grad_norm": 0.4330853521823883,
      "learning_rate": 0.00015566096624511307,
      "loss": 1.1385,
      "step": 647
    },
    {
      "epoch": 0.31586643919083596,
      "grad_norm": 0.3386607766151428,
      "learning_rate": 0.00015553302412275326,
      "loss": 1.1159,
      "step": 648
    },
    {
      "epoch": 0.3163538873994638,
      "grad_norm": 0.36546099185943604,
      "learning_rate": 0.0001554049504279396,
      "loss": 1.1105,
      "step": 649
    },
    {
      "epoch": 0.31684133560809163,
      "grad_norm": 0.34602367877960205,
      "learning_rate": 0.00015527674546411265,
      "loss": 1.0896,
      "step": 650
    },
    {
      "epoch": 0.3173287838167195,
      "grad_norm": 0.4234674274921417,
      "learning_rate": 0.00015514840953502392,
      "loss": 1.084,
      "step": 651
    },
    {
      "epoch": 0.3178162320253473,
      "grad_norm": 0.4006870687007904,
      "learning_rate": 0.0001550199429447353,
      "loss": 1.2597,
      "step": 652
    },
    {
      "epoch": 0.31830368023397515,
      "grad_norm": 0.2935373783111572,
      "learning_rate": 0.0001548913459976181,
      "loss": 1.1391,
      "step": 653
    },
    {
      "epoch": 0.318791128442603,
      "grad_norm": 0.34012746810913086,
      "learning_rate": 0.00015476261899835265,
      "loss": 1.2911,
      "step": 654
    },
    {
      "epoch": 0.3192785766512308,
      "grad_norm": 0.4001348614692688,
      "learning_rate": 0.00015463376225192734,
      "loss": 1.2319,
      "step": 655
    },
    {
      "epoch": 0.31976602485985867,
      "grad_norm": 0.35862889885902405,
      "learning_rate": 0.00015450477606363786,
      "loss": 1.2286,
      "step": 656
    },
    {
      "epoch": 0.32025347306848645,
      "grad_norm": 0.3550409972667694,
      "learning_rate": 0.00015437566073908681,
      "loss": 1.1207,
      "step": 657
    },
    {
      "epoch": 0.3207409212771143,
      "grad_norm": 0.3076868951320648,
      "learning_rate": 0.0001542464165841825,
      "loss": 1.2359,
      "step": 658
    },
    {
      "epoch": 0.3212283694857421,
      "grad_norm": 0.3051203787326813,
      "learning_rate": 0.00015411704390513867,
      "loss": 1.2276,
      "step": 659
    },
    {
      "epoch": 0.32171581769436997,
      "grad_norm": 0.3316993713378906,
      "learning_rate": 0.00015398754300847343,
      "loss": 1.188,
      "step": 660
    },
    {
      "epoch": 0.3222032659029978,
      "grad_norm": 0.35070377588272095,
      "learning_rate": 0.00015385791420100876,
      "loss": 0.951,
      "step": 661
    },
    {
      "epoch": 0.32269071411162564,
      "grad_norm": 0.31650400161743164,
      "learning_rate": 0.00015372815778986971,
      "loss": 1.2612,
      "step": 662
    },
    {
      "epoch": 0.3231781623202535,
      "grad_norm": 0.2900366485118866,
      "learning_rate": 0.00015359827408248356,
      "loss": 1.055,
      "step": 663
    },
    {
      "epoch": 0.3236656105288813,
      "grad_norm": 0.3152225613594055,
      "learning_rate": 0.0001534682633865792,
      "loss": 1.0437,
      "step": 664
    },
    {
      "epoch": 0.32415305873750916,
      "grad_norm": 0.3429870009422302,
      "learning_rate": 0.0001533381260101865,
      "loss": 1.1633,
      "step": 665
    },
    {
      "epoch": 0.324640506946137,
      "grad_norm": 0.36703914403915405,
      "learning_rate": 0.00015320786226163537,
      "loss": 1.254,
      "step": 666
    },
    {
      "epoch": 0.3251279551547648,
      "grad_norm": 0.4383450150489807,
      "learning_rate": 0.00015307747244955517,
      "loss": 1.225,
      "step": 667
    },
    {
      "epoch": 0.3256154033633926,
      "grad_norm": 0.30220985412597656,
      "learning_rate": 0.00015294695688287396,
      "loss": 1.0824,
      "step": 668
    },
    {
      "epoch": 0.32610285157202046,
      "grad_norm": 0.3530774414539337,
      "learning_rate": 0.00015281631587081763,
      "loss": 1.1806,
      "step": 669
    },
    {
      "epoch": 0.3265902997806483,
      "grad_norm": 0.32462990283966064,
      "learning_rate": 0.00015268554972290937,
      "loss": 1.1544,
      "step": 670
    },
    {
      "epoch": 0.32707774798927614,
      "grad_norm": 0.3622325360774994,
      "learning_rate": 0.00015255465874896898,
      "loss": 1.2438,
      "step": 671
    },
    {
      "epoch": 0.327565196197904,
      "grad_norm": 0.3258713185787201,
      "learning_rate": 0.0001524236432591117,
      "loss": 1.1685,
      "step": 672
    },
    {
      "epoch": 0.3280526444065318,
      "grad_norm": 0.3288484811782837,
      "learning_rate": 0.00015229250356374804,
      "loss": 1.163,
      "step": 673
    },
    {
      "epoch": 0.32854009261515965,
      "grad_norm": 0.28387171030044556,
      "learning_rate": 0.0001521612399735827,
      "loss": 1.1612,
      "step": 674
    },
    {
      "epoch": 0.3290275408237875,
      "grad_norm": 0.3023267090320587,
      "learning_rate": 0.0001520298527996139,
      "loss": 1.107,
      "step": 675
    },
    {
      "epoch": 0.32951498903241533,
      "grad_norm": 0.33846303820610046,
      "learning_rate": 0.00015189834235313266,
      "loss": 1.1018,
      "step": 676
    },
    {
      "epoch": 0.3300024372410431,
      "grad_norm": 0.4579784870147705,
      "learning_rate": 0.00015176670894572212,
      "loss": 1.2989,
      "step": 677
    },
    {
      "epoch": 0.33048988544967095,
      "grad_norm": 0.4055747985839844,
      "learning_rate": 0.00015163495288925672,
      "loss": 1.164,
      "step": 678
    },
    {
      "epoch": 0.3309773336582988,
      "grad_norm": 0.310495525598526,
      "learning_rate": 0.00015150307449590143,
      "loss": 1.216,
      "step": 679
    },
    {
      "epoch": 0.3314647818669266,
      "grad_norm": 0.3050582706928253,
      "learning_rate": 0.0001513710740781112,
      "loss": 1.0968,
      "step": 680
    },
    {
      "epoch": 0.33195223007555447,
      "grad_norm": 0.3576156497001648,
      "learning_rate": 0.00015123895194862997,
      "loss": 1.0803,
      "step": 681
    },
    {
      "epoch": 0.3324396782841823,
      "grad_norm": 0.4268052577972412,
      "learning_rate": 0.00015110670842049005,
      "loss": 1.1273,
      "step": 682
    },
    {
      "epoch": 0.33292712649281014,
      "grad_norm": 0.35450395941734314,
      "learning_rate": 0.00015097434380701143,
      "loss": 1.2042,
      "step": 683
    },
    {
      "epoch": 0.333414574701438,
      "grad_norm": 0.3277284502983093,
      "learning_rate": 0.000150841858421801,
      "loss": 1.099,
      "step": 684
    },
    {
      "epoch": 0.3339020229100658,
      "grad_norm": 0.3645627796649933,
      "learning_rate": 0.00015070925257875173,
      "loss": 1.1794,
      "step": 685
    },
    {
      "epoch": 0.33438947111869366,
      "grad_norm": 0.3086869716644287,
      "learning_rate": 0.00015057652659204197,
      "loss": 1.0318,
      "step": 686
    },
    {
      "epoch": 0.3348769193273215,
      "grad_norm": 0.2932779788970947,
      "learning_rate": 0.00015044368077613482,
      "loss": 1.086,
      "step": 687
    },
    {
      "epoch": 0.3353643675359493,
      "grad_norm": 0.3116811513900757,
      "learning_rate": 0.00015031071544577724,
      "loss": 1.0702,
      "step": 688
    },
    {
      "epoch": 0.3358518157445771,
      "grad_norm": 0.36161184310913086,
      "learning_rate": 0.00015017763091599928,
      "loss": 1.1014,
      "step": 689
    },
    {
      "epoch": 0.33633926395320496,
      "grad_norm": 0.3198288381099701,
      "learning_rate": 0.00015004442750211352,
      "loss": 1.2034,
      "step": 690
    },
    {
      "epoch": 0.3368267121618328,
      "grad_norm": 0.350289523601532,
      "learning_rate": 0.00014991110551971414,
      "loss": 1.2719,
      "step": 691
    },
    {
      "epoch": 0.33731416037046064,
      "grad_norm": 0.3655170798301697,
      "learning_rate": 0.0001497776652846762,
      "loss": 1.1432,
      "step": 692
    },
    {
      "epoch": 0.3378016085790885,
      "grad_norm": 0.41630303859710693,
      "learning_rate": 0.0001496441071131551,
      "loss": 1.2402,
      "step": 693
    },
    {
      "epoch": 0.3382890567877163,
      "grad_norm": 0.3537638485431671,
      "learning_rate": 0.00014951043132158546,
      "loss": 1.3121,
      "step": 694
    },
    {
      "epoch": 0.33877650499634415,
      "grad_norm": 0.29230767488479614,
      "learning_rate": 0.00014937663822668065,
      "loss": 0.9822,
      "step": 695
    },
    {
      "epoch": 0.339263953204972,
      "grad_norm": 0.3335493505001068,
      "learning_rate": 0.00014924272814543208,
      "loss": 1.2364,
      "step": 696
    },
    {
      "epoch": 0.33975140141359983,
      "grad_norm": 0.3376023471355438,
      "learning_rate": 0.00014910870139510815,
      "loss": 1.2683,
      "step": 697
    },
    {
      "epoch": 0.3402388496222276,
      "grad_norm": 0.3680487275123596,
      "learning_rate": 0.00014897455829325374,
      "loss": 1.0877,
      "step": 698
    },
    {
      "epoch": 0.34072629783085545,
      "grad_norm": 0.33870601654052734,
      "learning_rate": 0.00014884029915768944,
      "loss": 1.1303,
      "step": 699
    },
    {
      "epoch": 0.3412137460394833,
      "grad_norm": 0.31731757521629333,
      "learning_rate": 0.00014870592430651073,
      "loss": 1.2437,
      "step": 700
    },
    {
      "epoch": 0.34170119424811113,
      "grad_norm": 0.31661751866340637,
      "learning_rate": 0.00014857143405808728,
      "loss": 1.0931,
      "step": 701
    },
    {
      "epoch": 0.34218864245673897,
      "grad_norm": 0.38429930806159973,
      "learning_rate": 0.0001484368287310621,
      "loss": 1.115,
      "step": 702
    },
    {
      "epoch": 0.3426760906653668,
      "grad_norm": 0.4297671914100647,
      "learning_rate": 0.00014830210864435087,
      "loss": 1.2848,
      "step": 703
    },
    {
      "epoch": 0.34316353887399464,
      "grad_norm": 0.2973197400569916,
      "learning_rate": 0.00014816727411714125,
      "loss": 1.0212,
      "step": 704
    },
    {
      "epoch": 0.3436509870826225,
      "grad_norm": 0.3790116310119629,
      "learning_rate": 0.00014803232546889192,
      "loss": 1.1624,
      "step": 705
    },
    {
      "epoch": 0.3441384352912503,
      "grad_norm": 0.3607287108898163,
      "learning_rate": 0.000147897263019332,
      "loss": 1.1087,
      "step": 706
    },
    {
      "epoch": 0.34462588349987816,
      "grad_norm": 0.3548680245876312,
      "learning_rate": 0.00014776208708846026,
      "loss": 1.1597,
      "step": 707
    },
    {
      "epoch": 0.34511333170850594,
      "grad_norm": 0.351642370223999,
      "learning_rate": 0.0001476267979965443,
      "loss": 1.1223,
      "step": 708
    },
    {
      "epoch": 0.3456007799171338,
      "grad_norm": 0.3794901669025421,
      "learning_rate": 0.00014749139606411982,
      "loss": 1.2532,
      "step": 709
    },
    {
      "epoch": 0.3460882281257616,
      "grad_norm": 0.36454519629478455,
      "learning_rate": 0.0001473558816119899,
      "loss": 1.2419,
      "step": 710
    },
    {
      "epoch": 0.34657567633438946,
      "grad_norm": 0.37354356050491333,
      "learning_rate": 0.00014722025496122421,
      "loss": 1.0632,
      "step": 711
    },
    {
      "epoch": 0.3470631245430173,
      "grad_norm": 0.4036533534526825,
      "learning_rate": 0.00014708451643315827,
      "loss": 1.2025,
      "step": 712
    },
    {
      "epoch": 0.34755057275164514,
      "grad_norm": 0.3371066451072693,
      "learning_rate": 0.0001469486663493925,
      "loss": 1.1734,
      "step": 713
    },
    {
      "epoch": 0.348038020960273,
      "grad_norm": 0.34339281916618347,
      "learning_rate": 0.00014681270503179192,
      "loss": 1.0125,
      "step": 714
    },
    {
      "epoch": 0.3485254691689008,
      "grad_norm": 0.3904884457588196,
      "learning_rate": 0.0001466766328024848,
      "loss": 1.2684,
      "step": 715
    },
    {
      "epoch": 0.34901291737752865,
      "grad_norm": 0.3271896541118622,
      "learning_rate": 0.00014654044998386242,
      "loss": 1.2133,
      "step": 716
    },
    {
      "epoch": 0.3495003655861565,
      "grad_norm": 0.4050779938697815,
      "learning_rate": 0.0001464041568985779,
      "loss": 1.0819,
      "step": 717
    },
    {
      "epoch": 0.34998781379478433,
      "grad_norm": 0.35053789615631104,
      "learning_rate": 0.0001462677538695457,
      "loss": 1.1508,
      "step": 718
    },
    {
      "epoch": 0.3504752620034121,
      "grad_norm": 0.3360033929347992,
      "learning_rate": 0.00014613124121994078,
      "loss": 1.2554,
      "step": 719
    },
    {
      "epoch": 0.35096271021203995,
      "grad_norm": 0.2929222583770752,
      "learning_rate": 0.00014599461927319778,
      "loss": 1.1245,
      "step": 720
    },
    {
      "epoch": 0.3514501584206678,
      "grad_norm": 0.3568030595779419,
      "learning_rate": 0.00014585788835301026,
      "loss": 1.1633,
      "step": 721
    },
    {
      "epoch": 0.35193760662929563,
      "grad_norm": 0.3090217709541321,
      "learning_rate": 0.00014572104878333007,
      "loss": 1.1178,
      "step": 722
    },
    {
      "epoch": 0.35242505483792347,
      "grad_norm": 0.3651696741580963,
      "learning_rate": 0.00014558410088836643,
      "loss": 1.2256,
      "step": 723
    },
    {
      "epoch": 0.3529125030465513,
      "grad_norm": 0.4779524803161621,
      "learning_rate": 0.00014544704499258514,
      "loss": 1.2883,
      "step": 724
    },
    {
      "epoch": 0.35339995125517915,
      "grad_norm": 0.31629809737205505,
      "learning_rate": 0.00014530988142070803,
      "loss": 1.1007,
      "step": 725
    },
    {
      "epoch": 0.353887399463807,
      "grad_norm": 0.33082258701324463,
      "learning_rate": 0.00014517261049771187,
      "loss": 1.1855,
      "step": 726
    },
    {
      "epoch": 0.3543748476724348,
      "grad_norm": 0.34488508105278015,
      "learning_rate": 0.0001450352325488279,
      "loss": 1.0958,
      "step": 727
    },
    {
      "epoch": 0.35486229588106266,
      "grad_norm": 0.4402891993522644,
      "learning_rate": 0.00014489774789954094,
      "loss": 1.1953,
      "step": 728
    },
    {
      "epoch": 0.35534974408969044,
      "grad_norm": 0.3120886981487274,
      "learning_rate": 0.00014476015687558846,
      "loss": 1.0842,
      "step": 729
    },
    {
      "epoch": 0.3558371922983183,
      "grad_norm": 0.42847946286201477,
      "learning_rate": 0.00014462245980296018,
      "loss": 1.1887,
      "step": 730
    },
    {
      "epoch": 0.3563246405069461,
      "grad_norm": 0.2847718298435211,
      "learning_rate": 0.00014448465700789685,
      "loss": 1.1547,
      "step": 731
    },
    {
      "epoch": 0.35681208871557396,
      "grad_norm": 0.4204113781452179,
      "learning_rate": 0.00014434674881688995,
      "loss": 1.1289,
      "step": 732
    },
    {
      "epoch": 0.3572995369242018,
      "grad_norm": 0.28889018297195435,
      "learning_rate": 0.00014420873555668045,
      "loss": 1.204,
      "step": 733
    },
    {
      "epoch": 0.35778698513282964,
      "grad_norm": 0.41692429780960083,
      "learning_rate": 0.00014407061755425832,
      "loss": 1.0804,
      "step": 734
    },
    {
      "epoch": 0.3582744333414575,
      "grad_norm": 0.32253298163414,
      "learning_rate": 0.00014393239513686178,
      "loss": 1.2295,
      "step": 735
    },
    {
      "epoch": 0.3587618815500853,
      "grad_norm": 0.4006897211074829,
      "learning_rate": 0.00014379406863197636,
      "loss": 1.1688,
      "step": 736
    },
    {
      "epoch": 0.35924932975871315,
      "grad_norm": 0.4267953932285309,
      "learning_rate": 0.00014365563836733425,
      "loss": 1.2379,
      "step": 737
    },
    {
      "epoch": 0.359736777967341,
      "grad_norm": 0.3543131947517395,
      "learning_rate": 0.00014351710467091336,
      "loss": 1.1089,
      "step": 738
    },
    {
      "epoch": 0.3602242261759688,
      "grad_norm": 0.33696895837783813,
      "learning_rate": 0.00014337846787093679,
      "loss": 1.1694,
      "step": 739
    },
    {
      "epoch": 0.3607116743845966,
      "grad_norm": 0.2915426790714264,
      "learning_rate": 0.00014323972829587183,
      "loss": 1.1127,
      "step": 740
    },
    {
      "epoch": 0.36119912259322445,
      "grad_norm": 0.3513476252555847,
      "learning_rate": 0.00014310088627442937,
      "loss": 1.1797,
      "step": 741
    },
    {
      "epoch": 0.3616865708018523,
      "grad_norm": 0.34113776683807373,
      "learning_rate": 0.00014296194213556289,
      "loss": 1.1351,
      "step": 742
    },
    {
      "epoch": 0.36217401901048013,
      "grad_norm": 0.48686835169792175,
      "learning_rate": 0.0001428228962084679,
      "loss": 1.4038,
      "step": 743
    },
    {
      "epoch": 0.36266146721910797,
      "grad_norm": 0.3965618908405304,
      "learning_rate": 0.00014268374882258112,
      "loss": 1.1309,
      "step": 744
    },
    {
      "epoch": 0.3631489154277358,
      "grad_norm": 0.36199474334716797,
      "learning_rate": 0.0001425445003075795,
      "loss": 1.1132,
      "step": 745
    },
    {
      "epoch": 0.36363636363636365,
      "grad_norm": 0.315054714679718,
      "learning_rate": 0.00014240515099337976,
      "loss": 1.2075,
      "step": 746
    },
    {
      "epoch": 0.3641238118449915,
      "grad_norm": 0.37648653984069824,
      "learning_rate": 0.00014226570121013733,
      "loss": 1.1172,
      "step": 747
    },
    {
      "epoch": 0.3646112600536193,
      "grad_norm": 0.38015294075012207,
      "learning_rate": 0.00014212615128824564,
      "loss": 1.1411,
      "step": 748
    },
    {
      "epoch": 0.36509870826224716,
      "grad_norm": 0.3627578616142273,
      "learning_rate": 0.00014198650155833556,
      "loss": 1.0399,
      "step": 749
    },
    {
      "epoch": 0.36558615647087495,
      "grad_norm": 0.3377631604671478,
      "learning_rate": 0.00014184675235127427,
      "loss": 1.1939,
      "step": 750
    },
    {
      "epoch": 0.3660736046795028,
      "grad_norm": 0.3698626458644867,
      "learning_rate": 0.0001417069039981647,
      "loss": 1.1762,
      "step": 751
    },
    {
      "epoch": 0.3665610528881306,
      "grad_norm": 0.3546832203865051,
      "learning_rate": 0.0001415669568303446,
      "loss": 1.0773,
      "step": 752
    },
    {
      "epoch": 0.36704850109675846,
      "grad_norm": 0.34372153878211975,
      "learning_rate": 0.00014142691117938593,
      "loss": 1.091,
      "step": 753
    },
    {
      "epoch": 0.3675359493053863,
      "grad_norm": 0.36699178814888,
      "learning_rate": 0.00014128676737709404,
      "loss": 1.2068,
      "step": 754
    },
    {
      "epoch": 0.36802339751401414,
      "grad_norm": 0.39629635214805603,
      "learning_rate": 0.00014114652575550663,
      "loss": 1.2551,
      "step": 755
    },
    {
      "epoch": 0.368510845722642,
      "grad_norm": 0.4091489613056183,
      "learning_rate": 0.0001410061866468934,
      "loss": 1.3286,
      "step": 756
    },
    {
      "epoch": 0.3689982939312698,
      "grad_norm": 0.3547840714454651,
      "learning_rate": 0.00014086575038375475,
      "loss": 1.1421,
      "step": 757
    },
    {
      "epoch": 0.36948574213989765,
      "grad_norm": 0.3419768214225769,
      "learning_rate": 0.00014072521729882153,
      "loss": 1.1423,
      "step": 758
    },
    {
      "epoch": 0.3699731903485255,
      "grad_norm": 0.4046650826931,
      "learning_rate": 0.00014058458772505384,
      "loss": 1.1828,
      "step": 759
    },
    {
      "epoch": 0.3704606385571533,
      "grad_norm": 0.4120461642742157,
      "learning_rate": 0.00014044386199564034,
      "loss": 1.1604,
      "step": 760
    },
    {
      "epoch": 0.3709480867657811,
      "grad_norm": 0.3190382122993469,
      "learning_rate": 0.00014030304044399764,
      "loss": 1.2429,
      "step": 761
    },
    {
      "epoch": 0.37143553497440895,
      "grad_norm": 0.4019441604614258,
      "learning_rate": 0.00014016212340376937,
      "loss": 1.1157,
      "step": 762
    },
    {
      "epoch": 0.3719229831830368,
      "grad_norm": 0.3926464915275574,
      "learning_rate": 0.00014002111120882532,
      "loss": 1.0597,
      "step": 763
    },
    {
      "epoch": 0.37241043139166463,
      "grad_norm": 0.33766475319862366,
      "learning_rate": 0.00013988000419326072,
      "loss": 1.2093,
      "step": 764
    },
    {
      "epoch": 0.37289787960029247,
      "grad_norm": 0.3478360176086426,
      "learning_rate": 0.0001397388026913955,
      "loss": 1.1229,
      "step": 765
    },
    {
      "epoch": 0.3733853278089203,
      "grad_norm": 0.4054871201515198,
      "learning_rate": 0.0001395975070377735,
      "loss": 1.2193,
      "step": 766
    },
    {
      "epoch": 0.37387277601754815,
      "grad_norm": 0.3610527515411377,
      "learning_rate": 0.0001394561175671615,
      "loss": 1.1604,
      "step": 767
    },
    {
      "epoch": 0.374360224226176,
      "grad_norm": 0.3942972421646118,
      "learning_rate": 0.00013931463461454868,
      "loss": 1.0723,
      "step": 768
    },
    {
      "epoch": 0.3748476724348038,
      "grad_norm": 0.38327381014823914,
      "learning_rate": 0.00013917305851514564,
      "loss": 1.1892,
      "step": 769
    },
    {
      "epoch": 0.3753351206434316,
      "grad_norm": 0.3456951677799225,
      "learning_rate": 0.00013903138960438368,
      "loss": 1.2442,
      "step": 770
    },
    {
      "epoch": 0.37582256885205945,
      "grad_norm": 0.3242649435997009,
      "learning_rate": 0.00013888962821791405,
      "loss": 1.1207,
      "step": 771
    },
    {
      "epoch": 0.3763100170606873,
      "grad_norm": 0.2798099219799042,
      "learning_rate": 0.000138747774691607,
      "loss": 0.9834,
      "step": 772
    },
    {
      "epoch": 0.3767974652693151,
      "grad_norm": 0.37123337388038635,
      "learning_rate": 0.00013860582936155112,
      "loss": 1.125,
      "step": 773
    },
    {
      "epoch": 0.37728491347794296,
      "grad_norm": 0.35441383719444275,
      "learning_rate": 0.00013846379256405257,
      "loss": 1.1059,
      "step": 774
    },
    {
      "epoch": 0.3777723616865708,
      "grad_norm": 0.3213593065738678,
      "learning_rate": 0.00013832166463563413,
      "loss": 1.1108,
      "step": 775
    },
    {
      "epoch": 0.37825980989519864,
      "grad_norm": 0.32971900701522827,
      "learning_rate": 0.00013817944591303457,
      "loss": 1.1158,
      "step": 776
    },
    {
      "epoch": 0.3787472581038265,
      "grad_norm": 0.36593112349510193,
      "learning_rate": 0.00013803713673320772,
      "loss": 1.2137,
      "step": 777
    },
    {
      "epoch": 0.3792347063124543,
      "grad_norm": 0.40837281942367554,
      "learning_rate": 0.00013789473743332174,
      "loss": 1.2048,
      "step": 778
    },
    {
      "epoch": 0.37972215452108216,
      "grad_norm": 0.3851155936717987,
      "learning_rate": 0.00013775224835075835,
      "loss": 1.1175,
      "step": 779
    },
    {
      "epoch": 0.38020960272971,
      "grad_norm": 0.352002888917923,
      "learning_rate": 0.00013760966982311192,
      "loss": 1.1423,
      "step": 780
    },
    {
      "epoch": 0.3806970509383378,
      "grad_norm": 0.3558059632778168,
      "learning_rate": 0.0001374670021881888,
      "loss": 1.1444,
      "step": 781
    },
    {
      "epoch": 0.3811844991469656,
      "grad_norm": 0.35093048214912415,
      "learning_rate": 0.0001373242457840064,
      "loss": 1.1088,
      "step": 782
    },
    {
      "epoch": 0.38167194735559345,
      "grad_norm": 0.41846126317977905,
      "learning_rate": 0.00013718140094879253,
      "loss": 1.1784,
      "step": 783
    },
    {
      "epoch": 0.3821593955642213,
      "grad_norm": 0.4293888807296753,
      "learning_rate": 0.00013703846802098443,
      "loss": 1.1374,
      "step": 784
    },
    {
      "epoch": 0.38264684377284913,
      "grad_norm": 0.38579028844833374,
      "learning_rate": 0.0001368954473392281,
      "loss": 1.2998,
      "step": 785
    },
    {
      "epoch": 0.38313429198147697,
      "grad_norm": 0.3546122610569,
      "learning_rate": 0.00013675233924237743,
      "loss": 0.9899,
      "step": 786
    },
    {
      "epoch": 0.3836217401901048,
      "grad_norm": 0.35483241081237793,
      "learning_rate": 0.00013660914406949344,
      "loss": 1.1544,
      "step": 787
    },
    {
      "epoch": 0.38410918839873265,
      "grad_norm": 0.326667845249176,
      "learning_rate": 0.00013646586215984347,
      "loss": 1.0996,
      "step": 788
    },
    {
      "epoch": 0.3845966366073605,
      "grad_norm": 0.29748162627220154,
      "learning_rate": 0.00013632249385290033,
      "loss": 1.0593,
      "step": 789
    },
    {
      "epoch": 0.3850840848159883,
      "grad_norm": 0.4415862560272217,
      "learning_rate": 0.00013617903948834155,
      "loss": 1.261,
      "step": 790
    },
    {
      "epoch": 0.3855715330246161,
      "grad_norm": 0.33770832419395447,
      "learning_rate": 0.00013603549940604853,
      "loss": 1.2808,
      "step": 791
    },
    {
      "epoch": 0.38605898123324395,
      "grad_norm": 0.35436463356018066,
      "learning_rate": 0.0001358918739461058,
      "loss": 1.1541,
      "step": 792
    },
    {
      "epoch": 0.3865464294418718,
      "grad_norm": 0.3231455683708191,
      "learning_rate": 0.0001357481634488001,
      "loss": 1.1652,
      "step": 793
    },
    {
      "epoch": 0.3870338776504996,
      "grad_norm": 0.39417925477027893,
      "learning_rate": 0.0001356043682546197,
      "loss": 1.0421,
      "step": 794
    },
    {
      "epoch": 0.38752132585912746,
      "grad_norm": 0.3667939603328705,
      "learning_rate": 0.00013546048870425356,
      "loss": 1.133,
      "step": 795
    },
    {
      "epoch": 0.3880087740677553,
      "grad_norm": 0.332832932472229,
      "learning_rate": 0.00013531652513859048,
      "loss": 1.0692,
      "step": 796
    },
    {
      "epoch": 0.38849622227638314,
      "grad_norm": 0.42437613010406494,
      "learning_rate": 0.00013517247789871824,
      "loss": 1.1769,
      "step": 797
    },
    {
      "epoch": 0.388983670485011,
      "grad_norm": 0.3812074363231659,
      "learning_rate": 0.000135028347325923,
      "loss": 1.2091,
      "step": 798
    },
    {
      "epoch": 0.3894711186936388,
      "grad_norm": 0.3778877258300781,
      "learning_rate": 0.0001348841337616882,
      "loss": 1.2446,
      "step": 799
    },
    {
      "epoch": 0.38995856690226666,
      "grad_norm": 0.4080727696418762,
      "learning_rate": 0.00013473983754769413,
      "loss": 1.2096,
      "step": 800
    },
    {
      "epoch": 0.39044601511089444,
      "grad_norm": 0.3175933361053467,
      "learning_rate": 0.0001345954590258167,
      "loss": 1.0889,
      "step": 801
    },
    {
      "epoch": 0.3909334633195223,
      "grad_norm": 0.35111868381500244,
      "learning_rate": 0.00013445099853812687,
      "loss": 1.1557,
      "step": 802
    },
    {
      "epoch": 0.3914209115281501,
      "grad_norm": 0.36578190326690674,
      "learning_rate": 0.00013430645642688988,
      "loss": 1.2139,
      "step": 803
    },
    {
      "epoch": 0.39190835973677796,
      "grad_norm": 0.33647310733795166,
      "learning_rate": 0.00013416183303456425,
      "loss": 1.1446,
      "step": 804
    },
    {
      "epoch": 0.3923958079454058,
      "grad_norm": 0.36450833082199097,
      "learning_rate": 0.0001340171287038012,
      "loss": 1.118,
      "step": 805
    },
    {
      "epoch": 0.39288325615403363,
      "grad_norm": 0.3469671308994293,
      "learning_rate": 0.00013387234377744357,
      "loss": 1.2262,
      "step": 806
    },
    {
      "epoch": 0.39337070436266147,
      "grad_norm": 0.3348066210746765,
      "learning_rate": 0.00013372747859852527,
      "loss": 1.1409,
      "step": 807
    },
    {
      "epoch": 0.3938581525712893,
      "grad_norm": 0.35472628474235535,
      "learning_rate": 0.00013358253351027031,
      "loss": 1.1755,
      "step": 808
    },
    {
      "epoch": 0.39434560077991715,
      "grad_norm": 0.32164493203163147,
      "learning_rate": 0.000133437508856092,
      "loss": 1.187,
      "step": 809
    },
    {
      "epoch": 0.394833048988545,
      "grad_norm": 0.32905271649360657,
      "learning_rate": 0.00013329240497959218,
      "loss": 1.1553,
      "step": 810
    },
    {
      "epoch": 0.3953204971971728,
      "grad_norm": 0.39591020345687866,
      "learning_rate": 0.0001331472222245605,
      "loss": 1.2909,
      "step": 811
    },
    {
      "epoch": 0.3958079454058006,
      "grad_norm": 0.3336966633796692,
      "learning_rate": 0.00013300196093497322,
      "loss": 1.2074,
      "step": 812
    },
    {
      "epoch": 0.39629539361442845,
      "grad_norm": 0.3042989671230316,
      "learning_rate": 0.00013285662145499292,
      "loss": 1.1509,
      "step": 813
    },
    {
      "epoch": 0.3967828418230563,
      "grad_norm": 0.3268469572067261,
      "learning_rate": 0.0001327112041289674,
      "loss": 1.1062,
      "step": 814
    },
    {
      "epoch": 0.3972702900316841,
      "grad_norm": 0.33514949679374695,
      "learning_rate": 0.0001325657093014288,
      "loss": 0.9606,
      "step": 815
    },
    {
      "epoch": 0.39775773824031196,
      "grad_norm": 0.4372316598892212,
      "learning_rate": 0.00013242013731709287,
      "loss": 1.1181,
      "step": 816
    },
    {
      "epoch": 0.3982451864489398,
      "grad_norm": 0.2983655631542206,
      "learning_rate": 0.00013227448852085836,
      "loss": 1.2019,
      "step": 817
    },
    {
      "epoch": 0.39873263465756764,
      "grad_norm": 0.3565848171710968,
      "learning_rate": 0.0001321287632578058,
      "loss": 1.129,
      "step": 818
    },
    {
      "epoch": 0.3992200828661955,
      "grad_norm": 0.33070090413093567,
      "learning_rate": 0.00013198296187319695,
      "loss": 1.0851,
      "step": 819
    },
    {
      "epoch": 0.3997075310748233,
      "grad_norm": 0.3758980333805084,
      "learning_rate": 0.00013183708471247395,
      "loss": 1.2546,
      "step": 820
    },
    {
      "epoch": 0.40019497928345116,
      "grad_norm": 0.3357064425945282,
      "learning_rate": 0.00013169113212125848,
      "loss": 1.1528,
      "step": 821
    },
    {
      "epoch": 0.40068242749207894,
      "grad_norm": 0.37070250511169434,
      "learning_rate": 0.00013154510444535092,
      "loss": 1.108,
      "step": 822
    },
    {
      "epoch": 0.4011698757007068,
      "grad_norm": 0.38929644227027893,
      "learning_rate": 0.0001313990020307295,
      "loss": 1.1151,
      "step": 823
    },
    {
      "epoch": 0.4016573239093346,
      "grad_norm": 0.36671310663223267,
      "learning_rate": 0.00013125282522354957,
      "loss": 1.0753,
      "step": 824
    },
    {
      "epoch": 0.40214477211796246,
      "grad_norm": 0.3539426326751709,
      "learning_rate": 0.00013110657437014278,
      "loss": 1.2476,
      "step": 825
    },
    {
      "epoch": 0.4026322203265903,
      "grad_norm": 0.3895106315612793,
      "learning_rate": 0.00013096024981701612,
      "loss": 1.2082,
      "step": 826
    },
    {
      "epoch": 0.40311966853521813,
      "grad_norm": 0.45794007182121277,
      "learning_rate": 0.00013081385191085127,
      "loss": 1.22,
      "step": 827
    },
    {
      "epoch": 0.403607116743846,
      "grad_norm": 0.37870723009109497,
      "learning_rate": 0.0001306673809985037,
      "loss": 1.1618,
      "step": 828
    },
    {
      "epoch": 0.4040945649524738,
      "grad_norm": 0.3808088004589081,
      "learning_rate": 0.00013052083742700173,
      "loss": 1.2146,
      "step": 829
    },
    {
      "epoch": 0.40458201316110165,
      "grad_norm": 0.3768877685070038,
      "learning_rate": 0.000130374221543546,
      "loss": 1.2292,
      "step": 830
    },
    {
      "epoch": 0.4050694613697295,
      "grad_norm": 0.38629233837127686,
      "learning_rate": 0.0001302275336955084,
      "loss": 1.187,
      "step": 831
    },
    {
      "epoch": 0.40555690957835727,
      "grad_norm": 0.3110441267490387,
      "learning_rate": 0.00013008077423043131,
      "loss": 1.3096,
      "step": 832
    },
    {
      "epoch": 0.4060443577869851,
      "grad_norm": 0.3257690668106079,
      "learning_rate": 0.0001299339434960268,
      "loss": 1.1468,
      "step": 833
    },
    {
      "epoch": 0.40653180599561295,
      "grad_norm": 0.3355731666088104,
      "learning_rate": 0.00012978704184017577,
      "loss": 1.2313,
      "step": 834
    },
    {
      "epoch": 0.4070192542042408,
      "grad_norm": 0.2959268093109131,
      "learning_rate": 0.00012964006961092722,
      "loss": 1.0838,
      "step": 835
    },
    {
      "epoch": 0.4075067024128686,
      "grad_norm": 0.3371834456920624,
      "learning_rate": 0.00012949302715649732,
      "loss": 1.1436,
      "step": 836
    },
    {
      "epoch": 0.40799415062149647,
      "grad_norm": 0.29665321111679077,
      "learning_rate": 0.0001293459148252686,
      "loss": 1.1141,
      "step": 837
    },
    {
      "epoch": 0.4084815988301243,
      "grad_norm": 0.29049184918403625,
      "learning_rate": 0.00012919873296578918,
      "loss": 1.1032,
      "step": 838
    },
    {
      "epoch": 0.40896904703875214,
      "grad_norm": 0.301238477230072,
      "learning_rate": 0.00012905148192677188,
      "loss": 1.0863,
      "step": 839
    },
    {
      "epoch": 0.40945649524738,
      "grad_norm": 0.3428902328014374,
      "learning_rate": 0.0001289041620570935,
      "loss": 1.1414,
      "step": 840
    },
    {
      "epoch": 0.4099439434560078,
      "grad_norm": 0.3976534605026245,
      "learning_rate": 0.00012875677370579377,
      "loss": 1.1849,
      "step": 841
    },
    {
      "epoch": 0.41043139166463566,
      "grad_norm": 0.3917904198169708,
      "learning_rate": 0.0001286093172220748,
      "loss": 1.1065,
      "step": 842
    },
    {
      "epoch": 0.41091883987326344,
      "grad_norm": 0.3630690574645996,
      "learning_rate": 0.0001284617929553001,
      "loss": 1.1917,
      "step": 843
    },
    {
      "epoch": 0.4114062880818913,
      "grad_norm": 0.383533775806427,
      "learning_rate": 0.00012831420125499374,
      "loss": 1.184,
      "step": 844
    },
    {
      "epoch": 0.4118937362905191,
      "grad_norm": 0.36599770188331604,
      "learning_rate": 0.0001281665424708396,
      "loss": 1.1461,
      "step": 845
    },
    {
      "epoch": 0.41238118449914696,
      "grad_norm": 0.36383891105651855,
      "learning_rate": 0.0001280188169526805,
      "loss": 1.1423,
      "step": 846
    },
    {
      "epoch": 0.4128686327077748,
      "grad_norm": 0.3431175947189331,
      "learning_rate": 0.00012787102505051727,
      "loss": 1.0849,
      "step": 847
    },
    {
      "epoch": 0.41335608091640264,
      "grad_norm": 0.43728041648864746,
      "learning_rate": 0.00012772316711450815,
      "loss": 1.1391,
      "step": 848
    },
    {
      "epoch": 0.4138435291250305,
      "grad_norm": 0.3861692249774933,
      "learning_rate": 0.00012757524349496778,
      "loss": 1.1677,
      "step": 849
    },
    {
      "epoch": 0.4143309773336583,
      "grad_norm": 0.3486277759075165,
      "learning_rate": 0.00012742725454236646,
      "loss": 1.1588,
      "step": 850
    },
    {
      "epoch": 0.41481842554228615,
      "grad_norm": 0.34165406227111816,
      "learning_rate": 0.0001272792006073292,
      "loss": 1.1943,
      "step": 851
    },
    {
      "epoch": 0.415305873750914,
      "grad_norm": 0.4407804608345032,
      "learning_rate": 0.000127131082040635,
      "loss": 1.1215,
      "step": 852
    },
    {
      "epoch": 0.4157933219595418,
      "grad_norm": 0.322651743888855,
      "learning_rate": 0.00012698289919321605,
      "loss": 1.0882,
      "step": 853
    },
    {
      "epoch": 0.4162807701681696,
      "grad_norm": 0.3131794035434723,
      "learning_rate": 0.00012683465241615678,
      "loss": 1.1753,
      "step": 854
    },
    {
      "epoch": 0.41676821837679745,
      "grad_norm": 0.33760449290275574,
      "learning_rate": 0.00012668634206069304,
      "loss": 1.0094,
      "step": 855
    },
    {
      "epoch": 0.4172556665854253,
      "grad_norm": 0.3074570596218109,
      "learning_rate": 0.00012653796847821147,
      "loss": 1.019,
      "step": 856
    },
    {
      "epoch": 0.4177431147940531,
      "grad_norm": 0.3627846837043762,
      "learning_rate": 0.00012638953202024836,
      "loss": 1.0777,
      "step": 857
    },
    {
      "epoch": 0.41823056300268097,
      "grad_norm": 0.37333551049232483,
      "learning_rate": 0.00012624103303848902,
      "loss": 1.1969,
      "step": 858
    },
    {
      "epoch": 0.4187180112113088,
      "grad_norm": 0.4356592893600464,
      "learning_rate": 0.00012609247188476695,
      "loss": 1.2257,
      "step": 859
    },
    {
      "epoch": 0.41920545941993664,
      "grad_norm": 0.38547471165657043,
      "learning_rate": 0.0001259438489110628,
      "loss": 1.1097,
      "step": 860
    },
    {
      "epoch": 0.4196929076285645,
      "grad_norm": 0.3097715675830841,
      "learning_rate": 0.0001257951644695039,
      "loss": 1.2319,
      "step": 861
    },
    {
      "epoch": 0.4201803558371923,
      "grad_norm": 0.38433364033699036,
      "learning_rate": 0.00012564641891236303,
      "loss": 1.1097,
      "step": 862
    },
    {
      "epoch": 0.4206678040458201,
      "grad_norm": 0.3095664978027344,
      "learning_rate": 0.0001254976125920579,
      "loss": 1.1319,
      "step": 863
    },
    {
      "epoch": 0.42115525225444794,
      "grad_norm": 0.39881959557533264,
      "learning_rate": 0.00012534874586115008,
      "loss": 1.1986,
      "step": 864
    },
    {
      "epoch": 0.4216427004630758,
      "grad_norm": 0.3613760769367218,
      "learning_rate": 0.00012519981907234434,
      "loss": 1.213,
      "step": 865
    },
    {
      "epoch": 0.4221301486717036,
      "grad_norm": 0.3688783347606659,
      "learning_rate": 0.00012505083257848768,
      "loss": 1.1223,
      "step": 866
    },
    {
      "epoch": 0.42261759688033146,
      "grad_norm": 0.39314547181129456,
      "learning_rate": 0.0001249017867325686,
      "loss": 1.2198,
      "step": 867
    },
    {
      "epoch": 0.4231050450889593,
      "grad_norm": 0.33712512254714966,
      "learning_rate": 0.00012475268188771627,
      "loss": 1.2416,
      "step": 868
    },
    {
      "epoch": 0.42359249329758714,
      "grad_norm": 0.39671799540519714,
      "learning_rate": 0.00012460351839719958,
      "loss": 1.0538,
      "step": 869
    },
    {
      "epoch": 0.424079941506215,
      "grad_norm": 0.30700284242630005,
      "learning_rate": 0.0001244542966144263,
      "loss": 1.1185,
      "step": 870
    },
    {
      "epoch": 0.4245673897148428,
      "grad_norm": 0.3273939788341522,
      "learning_rate": 0.00012430501689294246,
      "loss": 1.0733,
      "step": 871
    },
    {
      "epoch": 0.42505483792347065,
      "grad_norm": 0.36876797676086426,
      "learning_rate": 0.00012415567958643127,
      "loss": 1.2011,
      "step": 872
    },
    {
      "epoch": 0.4255422861320985,
      "grad_norm": 0.35608360171318054,
      "learning_rate": 0.00012400628504871235,
      "loss": 1.173,
      "step": 873
    },
    {
      "epoch": 0.4260297343407263,
      "grad_norm": 0.43939632177352905,
      "learning_rate": 0.00012385683363374105,
      "loss": 1.243,
      "step": 874
    },
    {
      "epoch": 0.4265171825493541,
      "grad_norm": 0.3559912443161011,
      "learning_rate": 0.0001237073256956073,
      "loss": 1.216,
      "step": 875
    },
    {
      "epoch": 0.42700463075798195,
      "grad_norm": 0.3987311124801636,
      "learning_rate": 0.0001235577615885351,
      "loss": 1.124,
      "step": 876
    },
    {
      "epoch": 0.4274920789666098,
      "grad_norm": 0.3169601261615753,
      "learning_rate": 0.0001234081416668814,
      "loss": 1.0132,
      "step": 877
    },
    {
      "epoch": 0.42797952717523763,
      "grad_norm": 0.416533499956131,
      "learning_rate": 0.00012325846628513548,
      "loss": 1.1214,
      "step": 878
    },
    {
      "epoch": 0.42846697538386547,
      "grad_norm": 0.5308129787445068,
      "learning_rate": 0.00012310873579791804,
      "loss": 1.2743,
      "step": 879
    },
    {
      "epoch": 0.4289544235924933,
      "grad_norm": 0.3403697609901428,
      "learning_rate": 0.0001229589505599802,
      "loss": 1.1407,
      "step": 880
    },
    {
      "epoch": 0.42944187180112114,
      "grad_norm": 0.35060033202171326,
      "learning_rate": 0.00012280911092620297,
      "loss": 1.1729,
      "step": 881
    },
    {
      "epoch": 0.429929320009749,
      "grad_norm": 0.3814738988876343,
      "learning_rate": 0.0001226592172515961,
      "loss": 1.1063,
      "step": 882
    },
    {
      "epoch": 0.4304167682183768,
      "grad_norm": 0.31343165040016174,
      "learning_rate": 0.0001225092698912975,
      "loss": 1.2022,
      "step": 883
    },
    {
      "epoch": 0.4309042164270046,
      "grad_norm": 0.3462253212928772,
      "learning_rate": 0.00012235926920057218,
      "loss": 1.1213,
      "step": 884
    },
    {
      "epoch": 0.43139166463563244,
      "grad_norm": 0.35463854670524597,
      "learning_rate": 0.0001222092155348115,
      "loss": 1.1719,
      "step": 885
    },
    {
      "epoch": 0.4318791128442603,
      "grad_norm": 0.38044965267181396,
      "learning_rate": 0.00012205910924953241,
      "loss": 1.201,
      "step": 886
    },
    {
      "epoch": 0.4323665610528881,
      "grad_norm": 0.3340301513671875,
      "learning_rate": 0.00012190895070037647,
      "loss": 1.1672,
      "step": 887
    },
    {
      "epoch": 0.43285400926151596,
      "grad_norm": 0.4610227942466736,
      "learning_rate": 0.00012175874024310909,
      "loss": 1.2076,
      "step": 888
    },
    {
      "epoch": 0.4333414574701438,
      "grad_norm": 0.3781220018863678,
      "learning_rate": 0.0001216084782336187,
      "loss": 1.1193,
      "step": 889
    },
    {
      "epoch": 0.43382890567877164,
      "grad_norm": 0.36019018292427063,
      "learning_rate": 0.00012145816502791576,
      "loss": 1.1613,
      "step": 890
    },
    {
      "epoch": 0.4343163538873995,
      "grad_norm": 0.3611498177051544,
      "learning_rate": 0.00012130780098213212,
      "loss": 1.1421,
      "step": 891
    },
    {
      "epoch": 0.4348038020960273,
      "grad_norm": 0.3429311513900757,
      "learning_rate": 0.00012115738645252008,
      "loss": 1.1785,
      "step": 892
    },
    {
      "epoch": 0.43529125030465515,
      "grad_norm": 0.3426477313041687,
      "learning_rate": 0.0001210069217954515,
      "loss": 1.1375,
      "step": 893
    },
    {
      "epoch": 0.43577869851328294,
      "grad_norm": 0.35320284962654114,
      "learning_rate": 0.00012085640736741708,
      "loss": 1.1069,
      "step": 894
    },
    {
      "epoch": 0.4362661467219108,
      "grad_norm": 0.37135952711105347,
      "learning_rate": 0.00012070584352502535,
      "loss": 1.1532,
      "step": 895
    },
    {
      "epoch": 0.4367535949305386,
      "grad_norm": 0.39013463258743286,
      "learning_rate": 0.00012055523062500195,
      "loss": 1.0486,
      "step": 896
    },
    {
      "epoch": 0.43724104313916645,
      "grad_norm": 0.3446867763996124,
      "learning_rate": 0.00012040456902418882,
      "loss": 1.1962,
      "step": 897
    },
    {
      "epoch": 0.4377284913477943,
      "grad_norm": 0.3435569703578949,
      "learning_rate": 0.00012025385907954324,
      "loss": 1.2106,
      "step": 898
    },
    {
      "epoch": 0.43821593955642213,
      "grad_norm": 0.3562975823879242,
      "learning_rate": 0.0001201031011481369,
      "loss": 1.1922,
      "step": 899
    },
    {
      "epoch": 0.43870338776504997,
      "grad_norm": 0.38896191120147705,
      "learning_rate": 0.00011995229558715541,
      "loss": 1.0976,
      "step": 900
    },
    {
      "epoch": 0.4391908359736778,
      "grad_norm": 0.3850138783454895,
      "learning_rate": 0.00011980144275389706,
      "loss": 1.0942,
      "step": 901
    },
    {
      "epoch": 0.43967828418230565,
      "grad_norm": 0.41809114813804626,
      "learning_rate": 0.00011965054300577226,
      "loss": 1.203,
      "step": 902
    },
    {
      "epoch": 0.4401657323909335,
      "grad_norm": 0.3288028836250305,
      "learning_rate": 0.00011949959670030244,
      "loss": 1.1751,
      "step": 903
    },
    {
      "epoch": 0.4406531805995613,
      "grad_norm": 0.3803359568119049,
      "learning_rate": 0.00011934860419511942,
      "loss": 1.1751,
      "step": 904
    },
    {
      "epoch": 0.4411406288081891,
      "grad_norm": 0.30799826979637146,
      "learning_rate": 0.00011919756584796449,
      "loss": 1.1237,
      "step": 905
    },
    {
      "epoch": 0.44162807701681694,
      "grad_norm": 0.4452258348464966,
      "learning_rate": 0.00011904648201668754,
      "loss": 1.1655,
      "step": 906
    },
    {
      "epoch": 0.4421155252254448,
      "grad_norm": 0.3488793671131134,
      "learning_rate": 0.00011889535305924618,
      "loss": 1.1729,
      "step": 907
    },
    {
      "epoch": 0.4426029734340726,
      "grad_norm": 0.42165330052375793,
      "learning_rate": 0.000118744179333705,
      "loss": 1.1175,
      "step": 908
    },
    {
      "epoch": 0.44309042164270046,
      "grad_norm": 0.37287434935569763,
      "learning_rate": 0.00011859296119823459,
      "loss": 1.1214,
      "step": 909
    },
    {
      "epoch": 0.4435778698513283,
      "grad_norm": 0.39584994316101074,
      "learning_rate": 0.00011844169901111082,
      "loss": 1.1547,
      "step": 910
    },
    {
      "epoch": 0.44406531805995614,
      "grad_norm": 0.3444364070892334,
      "learning_rate": 0.00011829039313071393,
      "loss": 1.0781,
      "step": 911
    },
    {
      "epoch": 0.444552766268584,
      "grad_norm": 0.39970532059669495,
      "learning_rate": 0.00011813904391552759,
      "loss": 1.1958,
      "step": 912
    },
    {
      "epoch": 0.4450402144772118,
      "grad_norm": 0.3787233531475067,
      "learning_rate": 0.00011798765172413826,
      "loss": 1.1973,
      "step": 913
    },
    {
      "epoch": 0.44552766268583965,
      "grad_norm": 0.3653548061847687,
      "learning_rate": 0.00011783621691523415,
      "loss": 1.1862,
      "step": 914
    },
    {
      "epoch": 0.44601511089446744,
      "grad_norm": 0.41322481632232666,
      "learning_rate": 0.00011768473984760447,
      "loss": 1.0597,
      "step": 915
    },
    {
      "epoch": 0.4465025591030953,
      "grad_norm": 0.3496185541152954,
      "learning_rate": 0.00011753322088013853,
      "loss": 1.1075,
      "step": 916
    },
    {
      "epoch": 0.4469900073117231,
      "grad_norm": 0.3806665539741516,
      "learning_rate": 0.00011738166037182492,
      "loss": 1.1132,
      "step": 917
    },
    {
      "epoch": 0.44747745552035095,
      "grad_norm": 0.37213313579559326,
      "learning_rate": 0.0001172300586817507,
      "loss": 1.0747,
      "step": 918
    },
    {
      "epoch": 0.4479649037289788,
      "grad_norm": 0.3017314672470093,
      "learning_rate": 0.00011707841616910042,
      "loss": 1.0555,
      "step": 919
    },
    {
      "epoch": 0.44845235193760663,
      "grad_norm": 0.35828927159309387,
      "learning_rate": 0.00011692673319315541,
      "loss": 1.1143,
      "step": 920
    },
    {
      "epoch": 0.44893980014623447,
      "grad_norm": 0.4186861217021942,
      "learning_rate": 0.00011677501011329283,
      "loss": 0.9841,
      "step": 921
    },
    {
      "epoch": 0.4494272483548623,
      "grad_norm": 0.4104650914669037,
      "learning_rate": 0.00011662324728898486,
      "loss": 1.0589,
      "step": 922
    },
    {
      "epoch": 0.44991469656349015,
      "grad_norm": 0.31428661942481995,
      "learning_rate": 0.00011647144507979788,
      "loss": 1.0162,
      "step": 923
    },
    {
      "epoch": 0.450402144772118,
      "grad_norm": 0.35930880904197693,
      "learning_rate": 0.00011631960384539157,
      "loss": 1.1889,
      "step": 924
    },
    {
      "epoch": 0.45088959298074577,
      "grad_norm": 0.38938355445861816,
      "learning_rate": 0.00011616772394551802,
      "loss": 1.1148,
      "step": 925
    },
    {
      "epoch": 0.4513770411893736,
      "grad_norm": 0.3785998821258545,
      "learning_rate": 0.00011601580574002102,
      "loss": 1.14,
      "step": 926
    },
    {
      "epoch": 0.45186448939800145,
      "grad_norm": 0.41007423400878906,
      "learning_rate": 0.000115863849588835,
      "loss": 1.0682,
      "step": 927
    },
    {
      "epoch": 0.4523519376066293,
      "grad_norm": 0.3938082456588745,
      "learning_rate": 0.00011571185585198445,
      "loss": 1.3111,
      "step": 928
    },
    {
      "epoch": 0.4528393858152571,
      "grad_norm": 0.35383960604667664,
      "learning_rate": 0.00011555982488958274,
      "loss": 1.1986,
      "step": 929
    },
    {
      "epoch": 0.45332683402388496,
      "grad_norm": 0.3635252118110657,
      "learning_rate": 0.00011540775706183156,
      "loss": 1.07,
      "step": 930
    },
    {
      "epoch": 0.4538142822325128,
      "grad_norm": 0.3247283399105072,
      "learning_rate": 0.00011525565272901988,
      "loss": 1.1154,
      "step": 931
    },
    {
      "epoch": 0.45430173044114064,
      "grad_norm": 0.36164093017578125,
      "learning_rate": 0.00011510351225152321,
      "loss": 1.0798,
      "step": 932
    },
    {
      "epoch": 0.4547891786497685,
      "grad_norm": 0.3158092498779297,
      "learning_rate": 0.00011495133598980263,
      "loss": 1.065,
      "step": 933
    },
    {
      "epoch": 0.4552766268583963,
      "grad_norm": 0.39821863174438477,
      "learning_rate": 0.00011479912430440409,
      "loss": 1.0929,
      "step": 934
    },
    {
      "epoch": 0.45576407506702415,
      "grad_norm": 0.4439769685268402,
      "learning_rate": 0.00011464687755595736,
      "loss": 1.1168,
      "step": 935
    },
    {
      "epoch": 0.45625152327565194,
      "grad_norm": 0.31098422408103943,
      "learning_rate": 0.00011449459610517537,
      "loss": 1.0636,
      "step": 936
    },
    {
      "epoch": 0.4567389714842798,
      "grad_norm": 0.3414660692214966,
      "learning_rate": 0.00011434228031285328,
      "loss": 1.1077,
      "step": 937
    },
    {
      "epoch": 0.4572264196929076,
      "grad_norm": 0.3762691915035248,
      "learning_rate": 0.00011418993053986748,
      "loss": 1.1334,
      "step": 938
    },
    {
      "epoch": 0.45771386790153545,
      "grad_norm": 0.35831066966056824,
      "learning_rate": 0.00011403754714717505,
      "loss": 1.15,
      "step": 939
    },
    {
      "epoch": 0.4582013161101633,
      "grad_norm": 0.36059486865997314,
      "learning_rate": 0.00011388513049581261,
      "loss": 1.1737,
      "step": 940
    },
    {
      "epoch": 0.45868876431879113,
      "grad_norm": 0.4233929514884949,
      "learning_rate": 0.00011373268094689562,
      "loss": 1.06,
      "step": 941
    },
    {
      "epoch": 0.45917621252741897,
      "grad_norm": 0.34159737825393677,
      "learning_rate": 0.00011358019886161743,
      "loss": 1.1018,
      "step": 942
    },
    {
      "epoch": 0.4596636607360468,
      "grad_norm": 0.3699301779270172,
      "learning_rate": 0.00011342768460124856,
      "loss": 1.0605,
      "step": 943
    },
    {
      "epoch": 0.46015110894467465,
      "grad_norm": 0.33748912811279297,
      "learning_rate": 0.00011327513852713567,
      "loss": 1.1299,
      "step": 944
    },
    {
      "epoch": 0.4606385571533025,
      "grad_norm": 0.3668537735939026,
      "learning_rate": 0.00011312256100070091,
      "loss": 1.048,
      "step": 945
    },
    {
      "epoch": 0.46112600536193027,
      "grad_norm": 0.3471434414386749,
      "learning_rate": 0.00011296995238344084,
      "loss": 1.1807,
      "step": 946
    },
    {
      "epoch": 0.4616134535705581,
      "grad_norm": 0.31438618898391724,
      "learning_rate": 0.00011281731303692575,
      "loss": 1.0701,
      "step": 947
    },
    {
      "epoch": 0.46210090177918595,
      "grad_norm": 0.35458657145500183,
      "learning_rate": 0.00011266464332279864,
      "loss": 1.0558,
      "step": 948
    },
    {
      "epoch": 0.4625883499878138,
      "grad_norm": 0.387273371219635,
      "learning_rate": 0.00011251194360277462,
      "loss": 1.21,
      "step": 949
    },
    {
      "epoch": 0.4630757981964416,
      "grad_norm": 0.3416357636451721,
      "learning_rate": 0.00011235921423863978,
      "loss": 1.0773,
      "step": 950
    },
    {
      "epoch": 0.46356324640506946,
      "grad_norm": 0.3603675663471222,
      "learning_rate": 0.00011220645559225042,
      "loss": 1.1588,
      "step": 951
    },
    {
      "epoch": 0.4640506946136973,
      "grad_norm": 0.39804136753082275,
      "learning_rate": 0.0001120536680255323,
      "loss": 1.0687,
      "step": 952
    },
    {
      "epoch": 0.46453814282232514,
      "grad_norm": 0.3613085448741913,
      "learning_rate": 0.00011190085190047968,
      "loss": 1.1643,
      "step": 953
    },
    {
      "epoch": 0.465025591030953,
      "grad_norm": 0.4029476046562195,
      "learning_rate": 0.00011174800757915444,
      "loss": 1.1701,
      "step": 954
    },
    {
      "epoch": 0.4655130392395808,
      "grad_norm": 0.41172125935554504,
      "learning_rate": 0.00011159513542368529,
      "loss": 1.1043,
      "step": 955
    },
    {
      "epoch": 0.4660004874482086,
      "grad_norm": 0.36017680168151855,
      "learning_rate": 0.00011144223579626689,
      "loss": 1.0258,
      "step": 956
    },
    {
      "epoch": 0.46648793565683644,
      "grad_norm": 0.39928194880485535,
      "learning_rate": 0.00011128930905915897,
      "loss": 1.1159,
      "step": 957
    },
    {
      "epoch": 0.4669753838654643,
      "grad_norm": 0.41062191128730774,
      "learning_rate": 0.00011113635557468555,
      "loss": 1.1225,
      "step": 958
    },
    {
      "epoch": 0.4674628320740921,
      "grad_norm": 0.36912107467651367,
      "learning_rate": 0.00011098337570523396,
      "loss": 1.0881,
      "step": 959
    },
    {
      "epoch": 0.46795028028271995,
      "grad_norm": 0.3492516279220581,
      "learning_rate": 0.00011083036981325403,
      "loss": 1.0598,
      "step": 960
    },
    {
      "epoch": 0.4684377284913478,
      "grad_norm": 0.4227747917175293,
      "learning_rate": 0.00011067733826125729,
      "loss": 1.2674,
      "step": 961
    },
    {
      "epoch": 0.46892517669997563,
      "grad_norm": 0.3656153678894043,
      "learning_rate": 0.00011052428141181604,
      "loss": 1.1262,
      "step": 962
    },
    {
      "epoch": 0.46941262490860347,
      "grad_norm": 0.3131183683872223,
      "learning_rate": 0.00011037119962756257,
      "loss": 1.0621,
      "step": 963
    },
    {
      "epoch": 0.4699000731172313,
      "grad_norm": 0.3493298590183258,
      "learning_rate": 0.00011021809327118817,
      "loss": 1.18,
      "step": 964
    },
    {
      "epoch": 0.47038752132585915,
      "grad_norm": 0.3524402379989624,
      "learning_rate": 0.00011006496270544235,
      "loss": 1.1393,
      "step": 965
    },
    {
      "epoch": 0.470874969534487,
      "grad_norm": 0.3299414813518524,
      "learning_rate": 0.00010991180829313208,
      "loss": 1.0965,
      "step": 966
    },
    {
      "epoch": 0.47136241774311477,
      "grad_norm": 0.3031936585903168,
      "learning_rate": 0.00010975863039712068,
      "loss": 1.0382,
      "step": 967
    },
    {
      "epoch": 0.4718498659517426,
      "grad_norm": 0.31835681200027466,
      "learning_rate": 0.00010960542938032729,
      "loss": 1.115,
      "step": 968
    },
    {
      "epoch": 0.47233731416037045,
      "grad_norm": 0.39866137504577637,
      "learning_rate": 0.00010945220560572562,
      "loss": 1.1216,
      "step": 969
    },
    {
      "epoch": 0.4728247623689983,
      "grad_norm": 0.3873158395290375,
      "learning_rate": 0.00010929895943634343,
      "loss": 1.0729,
      "step": 970
    },
    {
      "epoch": 0.4733122105776261,
      "grad_norm": 0.31303170323371887,
      "learning_rate": 0.00010914569123526157,
      "loss": 1.1368,
      "step": 971
    },
    {
      "epoch": 0.47379965878625396,
      "grad_norm": 0.38255077600479126,
      "learning_rate": 0.00010899240136561299,
      "loss": 1.1969,
      "step": 972
    },
    {
      "epoch": 0.4742871069948818,
      "grad_norm": 0.4179987609386444,
      "learning_rate": 0.00010883909019058203,
      "loss": 1.2155,
      "step": 973
    },
    {
      "epoch": 0.47477455520350964,
      "grad_norm": 0.3629905581474304,
      "learning_rate": 0.00010868575807340351,
      "loss": 1.1925,
      "step": 974
    },
    {
      "epoch": 0.4752620034121375,
      "grad_norm": 0.35936662554740906,
      "learning_rate": 0.00010853240537736184,
      "loss": 1.2603,
      "step": 975
    },
    {
      "epoch": 0.4757494516207653,
      "grad_norm": 0.38720080256462097,
      "learning_rate": 0.00010837903246579022,
      "loss": 1.2434,
      "step": 976
    },
    {
      "epoch": 0.4762368998293931,
      "grad_norm": 0.4208745062351227,
      "learning_rate": 0.00010822563970206973,
      "loss": 1.2084,
      "step": 977
    },
    {
      "epoch": 0.47672434803802094,
      "grad_norm": 0.38924330472946167,
      "learning_rate": 0.00010807222744962849,
      "loss": 0.9611,
      "step": 978
    },
    {
      "epoch": 0.4772117962466488,
      "grad_norm": 0.3220176100730896,
      "learning_rate": 0.00010791879607194078,
      "loss": 1.1464,
      "step": 979
    },
    {
      "epoch": 0.4776992444552766,
      "grad_norm": 0.36164960265159607,
      "learning_rate": 0.00010776534593252616,
      "loss": 1.0847,
      "step": 980
    },
    {
      "epoch": 0.47818669266390446,
      "grad_norm": 0.4573691785335541,
      "learning_rate": 0.0001076118773949488,
      "loss": 1.1267,
      "step": 981
    },
    {
      "epoch": 0.4786741408725323,
      "grad_norm": 0.39894211292266846,
      "learning_rate": 0.00010745839082281621,
      "loss": 1.2592,
      "step": 982
    },
    {
      "epoch": 0.47916158908116013,
      "grad_norm": 0.3292525112628937,
      "learning_rate": 0.00010730488657977884,
      "loss": 1.0486,
      "step": 983
    },
    {
      "epoch": 0.47964903728978797,
      "grad_norm": 0.4128897786140442,
      "learning_rate": 0.00010715136502952893,
      "loss": 1.0303,
      "step": 984
    },
    {
      "epoch": 0.4801364854984158,
      "grad_norm": 0.35075098276138306,
      "learning_rate": 0.00010699782653579973,
      "loss": 1.1236,
      "step": 985
    },
    {
      "epoch": 0.48062393370704365,
      "grad_norm": 0.33849889039993286,
      "learning_rate": 0.00010684427146236457,
      "loss": 1.2145,
      "step": 986
    },
    {
      "epoch": 0.48111138191567143,
      "grad_norm": 0.4169270992279053,
      "learning_rate": 0.00010669070017303618,
      "loss": 1.1762,
      "step": 987
    },
    {
      "epoch": 0.48159883012429927,
      "grad_norm": 0.29890525341033936,
      "learning_rate": 0.0001065371130316656,
      "loss": 1.0611,
      "step": 988
    },
    {
      "epoch": 0.4820862783329271,
      "grad_norm": 0.33520928025245667,
      "learning_rate": 0.00010638351040214156,
      "loss": 1.1776,
      "step": 989
    },
    {
      "epoch": 0.48257372654155495,
      "grad_norm": 0.38371822237968445,
      "learning_rate": 0.00010622989264838934,
      "loss": 1.1733,
      "step": 990
    },
    {
      "epoch": 0.4830611747501828,
      "grad_norm": 0.36155542731285095,
      "learning_rate": 0.00010607626013437009,
      "loss": 1.2099,
      "step": 991
    },
    {
      "epoch": 0.4835486229588106,
      "grad_norm": 0.390323668718338,
      "learning_rate": 0.00010592261322408004,
      "loss": 1.0072,
      "step": 992
    },
    {
      "epoch": 0.48403607116743846,
      "grad_norm": 0.34715718030929565,
      "learning_rate": 0.00010576895228154935,
      "loss": 1.1499,
      "step": 993
    },
    {
      "epoch": 0.4845235193760663,
      "grad_norm": 0.40919023752212524,
      "learning_rate": 0.00010561527767084165,
      "loss": 1.0154,
      "step": 994
    },
    {
      "epoch": 0.48501096758469414,
      "grad_norm": 0.3707391619682312,
      "learning_rate": 0.0001054615897560527,
      "loss": 1.1641,
      "step": 995
    },
    {
      "epoch": 0.485498415793322,
      "grad_norm": 0.394755095243454,
      "learning_rate": 0.00010530788890130995,
      "loss": 1.151,
      "step": 996
    },
    {
      "epoch": 0.4859858640019498,
      "grad_norm": 0.3748216927051544,
      "learning_rate": 0.00010515417547077149,
      "loss": 1.1462,
      "step": 997
    },
    {
      "epoch": 0.4864733122105776,
      "grad_norm": 0.3286537230014801,
      "learning_rate": 0.00010500044982862519,
      "loss": 1.1958,
      "step": 998
    },
    {
      "epoch": 0.48696076041920544,
      "grad_norm": 0.39391735196113586,
      "learning_rate": 0.00010484671233908779,
      "loss": 1.2084,
      "step": 999
    },
    {
      "epoch": 0.4874482086278333,
      "grad_norm": 0.31442493200302124,
      "learning_rate": 0.00010469296336640417,
      "loss": 1.0804,
      "step": 1000
    },
    {
      "epoch": 0.4879356568364611,
      "grad_norm": 0.39494597911834717,
      "learning_rate": 0.00010453920327484641,
      "loss": 1.0454,
      "step": 1001
    },
    {
      "epoch": 0.48842310504508896,
      "grad_norm": 0.3640933930873871,
      "learning_rate": 0.00010438543242871295,
      "loss": 1.1034,
      "step": 1002
    },
    {
      "epoch": 0.4889105532537168,
      "grad_norm": 0.4298470914363861,
      "learning_rate": 0.00010423165119232765,
      "loss": 1.1456,
      "step": 1003
    },
    {
      "epoch": 0.48939800146234463,
      "grad_norm": 0.36693236231803894,
      "learning_rate": 0.000104077859930039,
      "loss": 1.0827,
      "step": 1004
    },
    {
      "epoch": 0.4898854496709725,
      "grad_norm": 0.3547927141189575,
      "learning_rate": 0.0001039240590062193,
      "loss": 1.1993,
      "step": 1005
    },
    {
      "epoch": 0.4903728978796003,
      "grad_norm": 0.339211642742157,
      "learning_rate": 0.00010377024878526369,
      "loss": 1.1297,
      "step": 1006
    },
    {
      "epoch": 0.49086034608822815,
      "grad_norm": 0.35470589995384216,
      "learning_rate": 0.0001036164296315894,
      "loss": 1.1887,
      "step": 1007
    },
    {
      "epoch": 0.49134779429685593,
      "grad_norm": 0.3784750699996948,
      "learning_rate": 0.00010346260190963468,
      "loss": 1.129,
      "step": 1008
    },
    {
      "epoch": 0.49183524250548377,
      "grad_norm": 0.3729405999183655,
      "learning_rate": 0.00010330876598385826,
      "loss": 1.1308,
      "step": 1009
    },
    {
      "epoch": 0.4923226907141116,
      "grad_norm": 0.3662540316581726,
      "learning_rate": 0.00010315492221873819,
      "loss": 1.0715,
      "step": 1010
    },
    {
      "epoch": 0.49281013892273945,
      "grad_norm": 0.37958213686943054,
      "learning_rate": 0.00010300107097877114,
      "loss": 1.0676,
      "step": 1011
    },
    {
      "epoch": 0.4932975871313673,
      "grad_norm": 0.3876712918281555,
      "learning_rate": 0.00010284721262847146,
      "loss": 1.1149,
      "step": 1012
    },
    {
      "epoch": 0.4937850353399951,
      "grad_norm": 0.3441215455532074,
      "learning_rate": 0.00010269334753237038,
      "loss": 1.011,
      "step": 1013
    },
    {
      "epoch": 0.49427248354862297,
      "grad_norm": 0.30004268884658813,
      "learning_rate": 0.0001025394760550151,
      "loss": 1.1088,
      "step": 1014
    },
    {
      "epoch": 0.4947599317572508,
      "grad_norm": 0.3573639690876007,
      "learning_rate": 0.00010238559856096792,
      "loss": 1.2165,
      "step": 1015
    },
    {
      "epoch": 0.49524737996587864,
      "grad_norm": 0.3701191246509552,
      "learning_rate": 0.00010223171541480543,
      "loss": 1.2121,
      "step": 1016
    },
    {
      "epoch": 0.4957348281745065,
      "grad_norm": 0.3917965590953827,
      "learning_rate": 0.00010207782698111757,
      "loss": 1.1857,
      "step": 1017
    },
    {
      "epoch": 0.49622227638313426,
      "grad_norm": 0.3756738305091858,
      "learning_rate": 0.00010192393362450685,
      "loss": 1.1381,
      "step": 1018
    },
    {
      "epoch": 0.4967097245917621,
      "grad_norm": 0.35566744208335876,
      "learning_rate": 0.00010177003570958738,
      "loss": 1.1536,
      "step": 1019
    },
    {
      "epoch": 0.49719717280038994,
      "grad_norm": 0.3449627459049225,
      "learning_rate": 0.00010161613360098417,
      "loss": 1.2262,
      "step": 1020
    },
    {
      "epoch": 0.4976846210090178,
      "grad_norm": 0.3484485149383545,
      "learning_rate": 0.00010146222766333209,
      "loss": 1.1646,
      "step": 1021
    },
    {
      "epoch": 0.4981720692176456,
      "grad_norm": 0.36827757954597473,
      "learning_rate": 0.00010130831826127507,
      "loss": 1.0605,
      "step": 1022
    },
    {
      "epoch": 0.49865951742627346,
      "grad_norm": 0.3689895272254944,
      "learning_rate": 0.00010115440575946533,
      "loss": 1.0536,
      "step": 1023
    },
    {
      "epoch": 0.4991469656349013,
      "grad_norm": 0.3833921551704407,
      "learning_rate": 0.00010100049052256235,
      "loss": 1.0727,
      "step": 1024
    },
    {
      "epoch": 0.49963441384352913,
      "grad_norm": 0.37425750494003296,
      "learning_rate": 0.00010084657291523212,
      "loss": 1.1639,
      "step": 1025
    },
    {
      "epoch": 0.5001218620521569,
      "grad_norm": 0.38358545303344727,
      "learning_rate": 0.00010069265330214626,
      "loss": 1.2006,
      "step": 1026
    },
    {
      "epoch": 0.5006093102607848,
      "grad_norm": 0.37975242733955383,
      "learning_rate": 0.00010053873204798112,
      "loss": 1.2182,
      "step": 1027
    },
    {
      "epoch": 0.5010967584694126,
      "grad_norm": 0.33467257022857666,
      "learning_rate": 0.000100384809517417,
      "loss": 1.0435,
      "step": 1028
    },
    {
      "epoch": 0.5015842066780405,
      "grad_norm": 0.3885016441345215,
      "learning_rate": 0.00010023088607513711,
      "loss": 1.0711,
      "step": 1029
    },
    {
      "epoch": 0.5020716548866683,
      "grad_norm": 0.35991692543029785,
      "learning_rate": 0.00010007696208582688,
      "loss": 1.2578,
      "step": 1030
    },
    {
      "epoch": 0.5025591030952962,
      "grad_norm": 0.42300593852996826,
      "learning_rate": 9.992303791417313e-05,
      "loss": 1.094,
      "step": 1031
    },
    {
      "epoch": 0.503046551303924,
      "grad_norm": 0.3597909212112427,
      "learning_rate": 9.976911392486294e-05,
      "loss": 1.1153,
      "step": 1032
    },
    {
      "epoch": 0.5035339995125518,
      "grad_norm": 0.41715627908706665,
      "learning_rate": 9.961519048258304e-05,
      "loss": 1.2809,
      "step": 1033
    },
    {
      "epoch": 0.5040214477211796,
      "grad_norm": 0.4352867603302002,
      "learning_rate": 9.94612679520189e-05,
      "loss": 1.2658,
      "step": 1034
    },
    {
      "epoch": 0.5045088959298074,
      "grad_norm": 0.3663417398929596,
      "learning_rate": 9.930734669785378e-05,
      "loss": 1.0499,
      "step": 1035
    },
    {
      "epoch": 0.5049963441384353,
      "grad_norm": 0.3860229253768921,
      "learning_rate": 9.915342708476789e-05,
      "loss": 1.1598,
      "step": 1036
    },
    {
      "epoch": 0.5054837923470631,
      "grad_norm": 0.33618807792663574,
      "learning_rate": 9.899950947743767e-05,
      "loss": 1.07,
      "step": 1037
    },
    {
      "epoch": 0.505971240555691,
      "grad_norm": 0.37883251905441284,
      "learning_rate": 9.884559424053472e-05,
      "loss": 1.1359,
      "step": 1038
    },
    {
      "epoch": 0.5064586887643188,
      "grad_norm": 0.37886759638786316,
      "learning_rate": 9.869168173872493e-05,
      "loss": 1.0496,
      "step": 1039
    },
    {
      "epoch": 0.5069461369729467,
      "grad_norm": 0.37057310342788696,
      "learning_rate": 9.853777233666794e-05,
      "loss": 1.1659,
      "step": 1040
    },
    {
      "epoch": 0.5074335851815744,
      "grad_norm": 0.3882743716239929,
      "learning_rate": 9.838386639901584e-05,
      "loss": 0.9907,
      "step": 1041
    },
    {
      "epoch": 0.5079210333902023,
      "grad_norm": 0.3489919900894165,
      "learning_rate": 9.822996429041263e-05,
      "loss": 1.1121,
      "step": 1042
    },
    {
      "epoch": 0.5084084815988301,
      "grad_norm": 0.3696196675300598,
      "learning_rate": 9.807606637549318e-05,
      "loss": 1.0515,
      "step": 1043
    },
    {
      "epoch": 0.5088959298074579,
      "grad_norm": 0.4114032983779907,
      "learning_rate": 9.792217301888245e-05,
      "loss": 1.1051,
      "step": 1044
    },
    {
      "epoch": 0.5093833780160858,
      "grad_norm": 0.3358118236064911,
      "learning_rate": 9.77682845851946e-05,
      "loss": 1.0681,
      "step": 1045
    },
    {
      "epoch": 0.5098708262247136,
      "grad_norm": 0.412111759185791,
      "learning_rate": 9.761440143903209e-05,
      "loss": 1.2802,
      "step": 1046
    },
    {
      "epoch": 0.5103582744333415,
      "grad_norm": 0.3456852436065674,
      "learning_rate": 9.746052394498492e-05,
      "loss": 1.1589,
      "step": 1047
    },
    {
      "epoch": 0.5108457226419693,
      "grad_norm": 0.5172877907752991,
      "learning_rate": 9.730665246762964e-05,
      "loss": 1.151,
      "step": 1048
    },
    {
      "epoch": 0.5113331708505972,
      "grad_norm": 0.403706818819046,
      "learning_rate": 9.715278737152853e-05,
      "loss": 1.0278,
      "step": 1049
    },
    {
      "epoch": 0.5118206190592249,
      "grad_norm": 0.35117772221565247,
      "learning_rate": 9.699892902122886e-05,
      "loss": 1.1306,
      "step": 1050
    },
    {
      "epoch": 0.5123080672678528,
      "grad_norm": 0.31138819456100464,
      "learning_rate": 9.684507778126184e-05,
      "loss": 1.195,
      "step": 1051
    },
    {
      "epoch": 0.5127955154764806,
      "grad_norm": 0.3926421105861664,
      "learning_rate": 9.669123401614174e-05,
      "loss": 1.1606,
      "step": 1052
    },
    {
      "epoch": 0.5132829636851085,
      "grad_norm": 0.36821529269218445,
      "learning_rate": 9.653739809036533e-05,
      "loss": 1.0833,
      "step": 1053
    },
    {
      "epoch": 0.5137704118937363,
      "grad_norm": 0.43311622738838196,
      "learning_rate": 9.638357036841064e-05,
      "loss": 1.121,
      "step": 1054
    },
    {
      "epoch": 0.5142578601023641,
      "grad_norm": 0.35603591799736023,
      "learning_rate": 9.622975121473631e-05,
      "loss": 1.1267,
      "step": 1055
    },
    {
      "epoch": 0.514745308310992,
      "grad_norm": 0.3597733974456787,
      "learning_rate": 9.607594099378072e-05,
      "loss": 1.0459,
      "step": 1056
    },
    {
      "epoch": 0.5152327565196198,
      "grad_norm": 0.32538890838623047,
      "learning_rate": 9.592214006996104e-05,
      "loss": 1.1216,
      "step": 1057
    },
    {
      "epoch": 0.5157202047282476,
      "grad_norm": 0.38531047105789185,
      "learning_rate": 9.57683488076724e-05,
      "loss": 1.0783,
      "step": 1058
    },
    {
      "epoch": 0.5162076529368754,
      "grad_norm": 0.44185683131217957,
      "learning_rate": 9.561456757128707e-05,
      "loss": 1.2387,
      "step": 1059
    },
    {
      "epoch": 0.5166951011455033,
      "grad_norm": 0.32214614748954773,
      "learning_rate": 9.546079672515361e-05,
      "loss": 1.2315,
      "step": 1060
    },
    {
      "epoch": 0.5171825493541311,
      "grad_norm": 0.3528206944465637,
      "learning_rate": 9.530703663359587e-05,
      "loss": 1.0234,
      "step": 1061
    },
    {
      "epoch": 0.517669997562759,
      "grad_norm": 0.4029186964035034,
      "learning_rate": 9.515328766091222e-05,
      "loss": 1.011,
      "step": 1062
    },
    {
      "epoch": 0.5181574457713868,
      "grad_norm": 0.41676968336105347,
      "learning_rate": 9.499955017137484e-05,
      "loss": 1.1195,
      "step": 1063
    },
    {
      "epoch": 0.5186448939800147,
      "grad_norm": 0.4211922287940979,
      "learning_rate": 9.484582452922855e-05,
      "loss": 1.1499,
      "step": 1064
    },
    {
      "epoch": 0.5191323421886425,
      "grad_norm": 0.3917767107486725,
      "learning_rate": 9.469211109869006e-05,
      "loss": 1.0296,
      "step": 1065
    },
    {
      "epoch": 0.5196197903972702,
      "grad_norm": 0.4115936756134033,
      "learning_rate": 9.453841024394733e-05,
      "loss": 1.2161,
      "step": 1066
    },
    {
      "epoch": 0.5201072386058981,
      "grad_norm": 0.3712995946407318,
      "learning_rate": 9.43847223291584e-05,
      "loss": 1.1072,
      "step": 1067
    },
    {
      "epoch": 0.5205946868145259,
      "grad_norm": 0.40360167622566223,
      "learning_rate": 9.423104771845064e-05,
      "loss": 1.1405,
      "step": 1068
    },
    {
      "epoch": 0.5210821350231538,
      "grad_norm": 0.38009801506996155,
      "learning_rate": 9.407738677592e-05,
      "loss": 1.1727,
      "step": 1069
    },
    {
      "epoch": 0.5215695832317816,
      "grad_norm": 0.40101245045661926,
      "learning_rate": 9.392373986562993e-05,
      "loss": 0.9816,
      "step": 1070
    },
    {
      "epoch": 0.5220570314404095,
      "grad_norm": 0.40247631072998047,
      "learning_rate": 9.377010735161069e-05,
      "loss": 1.018,
      "step": 1071
    },
    {
      "epoch": 0.5225444796490373,
      "grad_norm": 0.38892799615859985,
      "learning_rate": 9.361648959785846e-05,
      "loss": 1.1797,
      "step": 1072
    },
    {
      "epoch": 0.5230319278576652,
      "grad_norm": 0.38088804483413696,
      "learning_rate": 9.346288696833441e-05,
      "loss": 1.2571,
      "step": 1073
    },
    {
      "epoch": 0.523519376066293,
      "grad_norm": 0.4315434396266937,
      "learning_rate": 9.330929982696386e-05,
      "loss": 1.0405,
      "step": 1074
    },
    {
      "epoch": 0.5240068242749207,
      "grad_norm": 0.3794160783290863,
      "learning_rate": 9.315572853763544e-05,
      "loss": 1.1522,
      "step": 1075
    },
    {
      "epoch": 0.5244942724835486,
      "grad_norm": 0.3297971189022064,
      "learning_rate": 9.30021734642003e-05,
      "loss": 1.2665,
      "step": 1076
    },
    {
      "epoch": 0.5249817206921764,
      "grad_norm": 0.3364350199699402,
      "learning_rate": 9.28486349704711e-05,
      "loss": 1.067,
      "step": 1077
    },
    {
      "epoch": 0.5254691689008043,
      "grad_norm": 0.3259110748767853,
      "learning_rate": 9.269511342022115e-05,
      "loss": 1.1788,
      "step": 1078
    },
    {
      "epoch": 0.5259566171094321,
      "grad_norm": 0.4151977300643921,
      "learning_rate": 9.25416091771838e-05,
      "loss": 1.0303,
      "step": 1079
    },
    {
      "epoch": 0.52644406531806,
      "grad_norm": 0.29196158051490784,
      "learning_rate": 9.238812260505124e-05,
      "loss": 1.0754,
      "step": 1080
    },
    {
      "epoch": 0.5269315135266878,
      "grad_norm": 0.4657594859600067,
      "learning_rate": 9.223465406747383e-05,
      "loss": 1.1716,
      "step": 1081
    },
    {
      "epoch": 0.5274189617353157,
      "grad_norm": 0.35524073243141174,
      "learning_rate": 9.208120392805926e-05,
      "loss": 0.9974,
      "step": 1082
    },
    {
      "epoch": 0.5279064099439434,
      "grad_norm": 0.40014946460723877,
      "learning_rate": 9.192777255037155e-05,
      "loss": 1.1262,
      "step": 1083
    },
    {
      "epoch": 0.5283938581525713,
      "grad_norm": 0.4188205301761627,
      "learning_rate": 9.177436029793025e-05,
      "loss": 1.1687,
      "step": 1084
    },
    {
      "epoch": 0.5288813063611991,
      "grad_norm": 0.3595307767391205,
      "learning_rate": 9.16209675342098e-05,
      "loss": 1.0407,
      "step": 1085
    },
    {
      "epoch": 0.5293687545698269,
      "grad_norm": 0.37308305501937866,
      "learning_rate": 9.146759462263818e-05,
      "loss": 1.0952,
      "step": 1086
    },
    {
      "epoch": 0.5298562027784548,
      "grad_norm": 0.3582593500614166,
      "learning_rate": 9.131424192659653e-05,
      "loss": 1.1283,
      "step": 1087
    },
    {
      "epoch": 0.5303436509870826,
      "grad_norm": 0.37929674983024597,
      "learning_rate": 9.116090980941796e-05,
      "loss": 1.1726,
      "step": 1088
    },
    {
      "epoch": 0.5308310991957105,
      "grad_norm": 0.35771459341049194,
      "learning_rate": 9.100759863438702e-05,
      "loss": 1.1852,
      "step": 1089
    },
    {
      "epoch": 0.5313185474043383,
      "grad_norm": 0.39543354511260986,
      "learning_rate": 9.085430876473845e-05,
      "loss": 1.1532,
      "step": 1090
    },
    {
      "epoch": 0.5318059956129662,
      "grad_norm": 0.34235867857933044,
      "learning_rate": 9.070104056365657e-05,
      "loss": 1.133,
      "step": 1091
    },
    {
      "epoch": 0.5322934438215939,
      "grad_norm": 0.3834592401981354,
      "learning_rate": 9.054779439427441e-05,
      "loss": 1.2228,
      "step": 1092
    },
    {
      "epoch": 0.5327808920302218,
      "grad_norm": 0.34508809447288513,
      "learning_rate": 9.039457061967276e-05,
      "loss": 1.2762,
      "step": 1093
    },
    {
      "epoch": 0.5332683402388496,
      "grad_norm": 0.39939218759536743,
      "learning_rate": 9.024136960287931e-05,
      "loss": 1.0644,
      "step": 1094
    },
    {
      "epoch": 0.5337557884474775,
      "grad_norm": 0.4075930118560791,
      "learning_rate": 9.008819170686796e-05,
      "loss": 1.132,
      "step": 1095
    },
    {
      "epoch": 0.5342432366561053,
      "grad_norm": 0.3297237455844879,
      "learning_rate": 8.993503729455767e-05,
      "loss": 1.1041,
      "step": 1096
    },
    {
      "epoch": 0.5347306848647331,
      "grad_norm": 0.3641759753227234,
      "learning_rate": 8.978190672881184e-05,
      "loss": 1.0265,
      "step": 1097
    },
    {
      "epoch": 0.535218133073361,
      "grad_norm": 0.40472713112831116,
      "learning_rate": 8.962880037243746e-05,
      "loss": 1.1839,
      "step": 1098
    },
    {
      "epoch": 0.5357055812819888,
      "grad_norm": 0.36499154567718506,
      "learning_rate": 8.947571858818397e-05,
      "loss": 1.2158,
      "step": 1099
    },
    {
      "epoch": 0.5361930294906166,
      "grad_norm": 0.35857489705085754,
      "learning_rate": 8.932266173874276e-05,
      "loss": 1.0693,
      "step": 1100
    },
    {
      "epoch": 0.5366804776992444,
      "grad_norm": 0.3424220681190491,
      "learning_rate": 8.9169630186746e-05,
      "loss": 1.0986,
      "step": 1101
    },
    {
      "epoch": 0.5371679259078723,
      "grad_norm": 0.35947033762931824,
      "learning_rate": 8.901662429476607e-05,
      "loss": 1.143,
      "step": 1102
    },
    {
      "epoch": 0.5376553741165001,
      "grad_norm": 0.3797532021999359,
      "learning_rate": 8.886364442531447e-05,
      "loss": 0.9482,
      "step": 1103
    },
    {
      "epoch": 0.538142822325128,
      "grad_norm": 0.3743857741355896,
      "learning_rate": 8.871069094084102e-05,
      "loss": 1.0776,
      "step": 1104
    },
    {
      "epoch": 0.5386302705337558,
      "grad_norm": 0.34824487566947937,
      "learning_rate": 8.855776420373313e-05,
      "loss": 1.0339,
      "step": 1105
    },
    {
      "epoch": 0.5391177187423836,
      "grad_norm": 0.36332109570503235,
      "learning_rate": 8.840486457631475e-05,
      "loss": 1.1866,
      "step": 1106
    },
    {
      "epoch": 0.5396051669510115,
      "grad_norm": 0.30786654353141785,
      "learning_rate": 8.825199242084558e-05,
      "loss": 1.0333,
      "step": 1107
    },
    {
      "epoch": 0.5400926151596392,
      "grad_norm": 0.33310338854789734,
      "learning_rate": 8.809914809952033e-05,
      "loss": 1.051,
      "step": 1108
    },
    {
      "epoch": 0.5405800633682671,
      "grad_norm": 0.3896653652191162,
      "learning_rate": 8.79463319744677e-05,
      "loss": 1.1292,
      "step": 1109
    },
    {
      "epoch": 0.5410675115768949,
      "grad_norm": 0.35318389534950256,
      "learning_rate": 8.779354440774957e-05,
      "loss": 1.1307,
      "step": 1110
    },
    {
      "epoch": 0.5415549597855228,
      "grad_norm": 0.37618759274482727,
      "learning_rate": 8.764078576136026e-05,
      "loss": 1.0323,
      "step": 1111
    },
    {
      "epoch": 0.5420424079941506,
      "grad_norm": 0.36355626583099365,
      "learning_rate": 8.74880563972254e-05,
      "loss": 1.0024,
      "step": 1112
    },
    {
      "epoch": 0.5425298562027785,
      "grad_norm": 0.37075328826904297,
      "learning_rate": 8.733535667720138e-05,
      "loss": 1.1014,
      "step": 1113
    },
    {
      "epoch": 0.5430173044114063,
      "grad_norm": 0.38809290528297424,
      "learning_rate": 8.718268696307428e-05,
      "loss": 0.9998,
      "step": 1114
    },
    {
      "epoch": 0.5435047526200342,
      "grad_norm": 0.3356851041316986,
      "learning_rate": 8.703004761655917e-05,
      "loss": 1.1497,
      "step": 1115
    },
    {
      "epoch": 0.543992200828662,
      "grad_norm": 0.3875788450241089,
      "learning_rate": 8.687743899929913e-05,
      "loss": 1.3084,
      "step": 1116
    },
    {
      "epoch": 0.5444796490372897,
      "grad_norm": 0.30841848254203796,
      "learning_rate": 8.672486147286432e-05,
      "loss": 1.0747,
      "step": 1117
    },
    {
      "epoch": 0.5449670972459176,
      "grad_norm": 0.3297688066959381,
      "learning_rate": 8.657231539875148e-05,
      "loss": 1.212,
      "step": 1118
    },
    {
      "epoch": 0.5454545454545454,
      "grad_norm": 0.3741207420825958,
      "learning_rate": 8.64198011383826e-05,
      "loss": 1.154,
      "step": 1119
    },
    {
      "epoch": 0.5459419936631733,
      "grad_norm": 0.35746538639068604,
      "learning_rate": 8.626731905310442e-05,
      "loss": 1.2548,
      "step": 1120
    },
    {
      "epoch": 0.5464294418718011,
      "grad_norm": 0.3836494982242584,
      "learning_rate": 8.611486950418741e-05,
      "loss": 1.0074,
      "step": 1121
    },
    {
      "epoch": 0.546916890080429,
      "grad_norm": 0.3392050564289093,
      "learning_rate": 8.596245285282498e-05,
      "loss": 1.0208,
      "step": 1122
    },
    {
      "epoch": 0.5474043382890568,
      "grad_norm": 0.3157902657985687,
      "learning_rate": 8.581006946013252e-05,
      "loss": 1.0768,
      "step": 1123
    },
    {
      "epoch": 0.5478917864976847,
      "grad_norm": 0.3608684539794922,
      "learning_rate": 8.565771968714675e-05,
      "loss": 1.0902,
      "step": 1124
    },
    {
      "epoch": 0.5483792347063124,
      "grad_norm": 0.3302469253540039,
      "learning_rate": 8.550540389482466e-05,
      "loss": 1.1155,
      "step": 1125
    },
    {
      "epoch": 0.5488666829149403,
      "grad_norm": 0.35549214482307434,
      "learning_rate": 8.535312244404269e-05,
      "loss": 1.1235,
      "step": 1126
    },
    {
      "epoch": 0.5493541311235681,
      "grad_norm": 0.37752172350883484,
      "learning_rate": 8.520087569559592e-05,
      "loss": 1.0419,
      "step": 1127
    },
    {
      "epoch": 0.5498415793321959,
      "grad_norm": 0.46155112981796265,
      "learning_rate": 8.504866401019737e-05,
      "loss": 1.2448,
      "step": 1128
    },
    {
      "epoch": 0.5503290275408238,
      "grad_norm": 0.3703531324863434,
      "learning_rate": 8.489648774847683e-05,
      "loss": 1.1997,
      "step": 1129
    },
    {
      "epoch": 0.5508164757494516,
      "grad_norm": 0.38380300998687744,
      "learning_rate": 8.474434727098013e-05,
      "loss": 1.1803,
      "step": 1130
    },
    {
      "epoch": 0.5513039239580795,
      "grad_norm": 0.42736393213272095,
      "learning_rate": 8.459224293816846e-05,
      "loss": 1.2428,
      "step": 1131
    },
    {
      "epoch": 0.5517913721667073,
      "grad_norm": 0.40469399094581604,
      "learning_rate": 8.44401751104173e-05,
      "loss": 1.1447,
      "step": 1132
    },
    {
      "epoch": 0.5522788203753352,
      "grad_norm": 0.4133606255054474,
      "learning_rate": 8.428814414801558e-05,
      "loss": 1.0887,
      "step": 1133
    },
    {
      "epoch": 0.5527662685839629,
      "grad_norm": 0.35687559843063354,
      "learning_rate": 8.413615041116502e-05,
      "loss": 1.0438,
      "step": 1134
    },
    {
      "epoch": 0.5532537167925908,
      "grad_norm": 0.3688930571079254,
      "learning_rate": 8.398419425997903e-05,
      "loss": 1.1282,
      "step": 1135
    },
    {
      "epoch": 0.5537411650012186,
      "grad_norm": 0.29839542508125305,
      "learning_rate": 8.383227605448199e-05,
      "loss": 1.0962,
      "step": 1136
    },
    {
      "epoch": 0.5542286132098464,
      "grad_norm": 0.36062946915626526,
      "learning_rate": 8.368039615460844e-05,
      "loss": 1.1615,
      "step": 1137
    },
    {
      "epoch": 0.5547160614184743,
      "grad_norm": 0.37687721848487854,
      "learning_rate": 8.352855492020215e-05,
      "loss": 1.1831,
      "step": 1138
    },
    {
      "epoch": 0.5552035096271021,
      "grad_norm": 0.3692804276943207,
      "learning_rate": 8.337675271101518e-05,
      "loss": 1.1095,
      "step": 1139
    },
    {
      "epoch": 0.55569095783573,
      "grad_norm": 0.35587865114212036,
      "learning_rate": 8.322498988670718e-05,
      "loss": 1.009,
      "step": 1140
    },
    {
      "epoch": 0.5561784060443578,
      "grad_norm": 0.373296856880188,
      "learning_rate": 8.307326680684461e-05,
      "loss": 1.091,
      "step": 1141
    },
    {
      "epoch": 0.5566658542529856,
      "grad_norm": 0.42510151863098145,
      "learning_rate": 8.29215838308996e-05,
      "loss": 1.1292,
      "step": 1142
    },
    {
      "epoch": 0.5571533024616134,
      "grad_norm": 0.3493712544441223,
      "learning_rate": 8.27699413182493e-05,
      "loss": 1.0554,
      "step": 1143
    },
    {
      "epoch": 0.5576407506702413,
      "grad_norm": 0.39959388971328735,
      "learning_rate": 8.261833962817509e-05,
      "loss": 1.0718,
      "step": 1144
    },
    {
      "epoch": 0.5581281988788691,
      "grad_norm": 0.3636349141597748,
      "learning_rate": 8.246677911986152e-05,
      "loss": 1.1488,
      "step": 1145
    },
    {
      "epoch": 0.558615647087497,
      "grad_norm": 0.3723439574241638,
      "learning_rate": 8.231526015239557e-05,
      "loss": 1.0863,
      "step": 1146
    },
    {
      "epoch": 0.5591030952961248,
      "grad_norm": 0.3718213737010956,
      "learning_rate": 8.216378308476589e-05,
      "loss": 1.0069,
      "step": 1147
    },
    {
      "epoch": 0.5595905435047526,
      "grad_norm": 0.39794471859931946,
      "learning_rate": 8.201234827586178e-05,
      "loss": 1.2078,
      "step": 1148
    },
    {
      "epoch": 0.5600779917133805,
      "grad_norm": 0.34235015511512756,
      "learning_rate": 8.186095608447242e-05,
      "loss": 0.9326,
      "step": 1149
    },
    {
      "epoch": 0.5605654399220082,
      "grad_norm": 0.47578248381614685,
      "learning_rate": 8.170960686928609e-05,
      "loss": 1.259,
      "step": 1150
    },
    {
      "epoch": 0.5610528881306361,
      "grad_norm": 0.40527671575546265,
      "learning_rate": 8.155830098888922e-05,
      "loss": 0.9756,
      "step": 1151
    },
    {
      "epoch": 0.5615403363392639,
      "grad_norm": 0.3799116313457489,
      "learning_rate": 8.140703880176542e-05,
      "loss": 1.0295,
      "step": 1152
    },
    {
      "epoch": 0.5620277845478918,
      "grad_norm": 0.40373730659484863,
      "learning_rate": 8.125582066629502e-05,
      "loss": 1.0046,
      "step": 1153
    },
    {
      "epoch": 0.5625152327565196,
      "grad_norm": 0.32776764035224915,
      "learning_rate": 8.110464694075383e-05,
      "loss": 1.1579,
      "step": 1154
    },
    {
      "epoch": 0.5630026809651475,
      "grad_norm": 0.35236409306526184,
      "learning_rate": 8.09535179833125e-05,
      "loss": 1.0318,
      "step": 1155
    },
    {
      "epoch": 0.5634901291737753,
      "grad_norm": 0.3265283703804016,
      "learning_rate": 8.080243415203552e-05,
      "loss": 1.1214,
      "step": 1156
    },
    {
      "epoch": 0.5639775773824032,
      "grad_norm": 0.3723972737789154,
      "learning_rate": 8.065139580488061e-05,
      "loss": 1.0587,
      "step": 1157
    },
    {
      "epoch": 0.564465025591031,
      "grad_norm": 0.3946072459220886,
      "learning_rate": 8.050040329969761e-05,
      "loss": 0.9452,
      "step": 1158
    },
    {
      "epoch": 0.5649524737996587,
      "grad_norm": 0.3760214149951935,
      "learning_rate": 8.034945699422778e-05,
      "loss": 0.9506,
      "step": 1159
    },
    {
      "epoch": 0.5654399220082866,
      "grad_norm": 0.34320196509361267,
      "learning_rate": 8.019855724610296e-05,
      "loss": 1.1211,
      "step": 1160
    },
    {
      "epoch": 0.5659273702169144,
      "grad_norm": 0.3584868311882019,
      "learning_rate": 8.004770441284462e-05,
      "loss": 1.1019,
      "step": 1161
    },
    {
      "epoch": 0.5664148184255423,
      "grad_norm": 0.3501480519771576,
      "learning_rate": 7.98968988518631e-05,
      "loss": 1.0358,
      "step": 1162
    },
    {
      "epoch": 0.5669022666341701,
      "grad_norm": 0.3758191466331482,
      "learning_rate": 7.974614092045679e-05,
      "loss": 1.0262,
      "step": 1163
    },
    {
      "epoch": 0.567389714842798,
      "grad_norm": 0.34351179003715515,
      "learning_rate": 7.95954309758112e-05,
      "loss": 1.0973,
      "step": 1164
    },
    {
      "epoch": 0.5678771630514258,
      "grad_norm": 0.3510177433490753,
      "learning_rate": 7.944476937499803e-05,
      "loss": 1.2233,
      "step": 1165
    },
    {
      "epoch": 0.5683646112600537,
      "grad_norm": 0.4014304578304291,
      "learning_rate": 7.929415647497466e-05,
      "loss": 1.0638,
      "step": 1166
    },
    {
      "epoch": 0.5688520594686814,
      "grad_norm": 0.3942334055900574,
      "learning_rate": 7.914359263258295e-05,
      "loss": 1.0488,
      "step": 1167
    },
    {
      "epoch": 0.5693395076773092,
      "grad_norm": 0.3765864968299866,
      "learning_rate": 7.899307820454852e-05,
      "loss": 1.1342,
      "step": 1168
    },
    {
      "epoch": 0.5698269558859371,
      "grad_norm": 0.4033709764480591,
      "learning_rate": 7.884261354747994e-05,
      "loss": 1.1466,
      "step": 1169
    },
    {
      "epoch": 0.5703144040945649,
      "grad_norm": 0.41917553544044495,
      "learning_rate": 7.869219901786791e-05,
      "loss": 1.3071,
      "step": 1170
    },
    {
      "epoch": 0.5708018523031928,
      "grad_norm": 0.36985212564468384,
      "learning_rate": 7.854183497208428e-05,
      "loss": 1.011,
      "step": 1171
    },
    {
      "epoch": 0.5712893005118206,
      "grad_norm": 0.39377880096435547,
      "learning_rate": 7.839152176638134e-05,
      "loss": 1.1878,
      "step": 1172
    },
    {
      "epoch": 0.5717767487204485,
      "grad_norm": 0.3966328799724579,
      "learning_rate": 7.824125975689092e-05,
      "loss": 1.1757,
      "step": 1173
    },
    {
      "epoch": 0.5722641969290763,
      "grad_norm": 0.41750824451446533,
      "learning_rate": 7.809104929962357e-05,
      "loss": 1.0878,
      "step": 1174
    },
    {
      "epoch": 0.5727516451377042,
      "grad_norm": 0.44299137592315674,
      "learning_rate": 7.79408907504676e-05,
      "loss": 1.0769,
      "step": 1175
    },
    {
      "epoch": 0.5732390933463319,
      "grad_norm": 0.37647318840026855,
      "learning_rate": 7.779078446518853e-05,
      "loss": 1.074,
      "step": 1176
    },
    {
      "epoch": 0.5737265415549598,
      "grad_norm": 0.34447044134140015,
      "learning_rate": 7.764073079942786e-05,
      "loss": 1.067,
      "step": 1177
    },
    {
      "epoch": 0.5742139897635876,
      "grad_norm": 0.3282855749130249,
      "learning_rate": 7.749073010870252e-05,
      "loss": 1.1691,
      "step": 1178
    },
    {
      "epoch": 0.5747014379722154,
      "grad_norm": 0.38534215092658997,
      "learning_rate": 7.734078274840391e-05,
      "loss": 1.0661,
      "step": 1179
    },
    {
      "epoch": 0.5751888861808433,
      "grad_norm": 0.35340821743011475,
      "learning_rate": 7.719088907379706e-05,
      "loss": 1.2258,
      "step": 1180
    },
    {
      "epoch": 0.5756763343894711,
      "grad_norm": 0.353468120098114,
      "learning_rate": 7.704104944001982e-05,
      "loss": 1.0874,
      "step": 1181
    },
    {
      "epoch": 0.576163782598099,
      "grad_norm": 0.3499086797237396,
      "learning_rate": 7.6891264202082e-05,
      "loss": 1.066,
      "step": 1182
    },
    {
      "epoch": 0.5766512308067268,
      "grad_norm": 0.3244962990283966,
      "learning_rate": 7.674153371486453e-05,
      "loss": 1.1346,
      "step": 1183
    },
    {
      "epoch": 0.5771386790153546,
      "grad_norm": 0.4745854139328003,
      "learning_rate": 7.659185833311864e-05,
      "loss": 1.1834,
      "step": 1184
    },
    {
      "epoch": 0.5776261272239824,
      "grad_norm": 0.3582706153392792,
      "learning_rate": 7.644223841146492e-05,
      "loss": 1.1274,
      "step": 1185
    },
    {
      "epoch": 0.5781135754326103,
      "grad_norm": 0.3575795590877533,
      "learning_rate": 7.629267430439273e-05,
      "loss": 0.9748,
      "step": 1186
    },
    {
      "epoch": 0.5786010236412381,
      "grad_norm": 0.34705471992492676,
      "learning_rate": 7.614316636625899e-05,
      "loss": 0.9847,
      "step": 1187
    },
    {
      "epoch": 0.579088471849866,
      "grad_norm": 0.35855981707572937,
      "learning_rate": 7.599371495128763e-05,
      "loss": 1.2269,
      "step": 1188
    },
    {
      "epoch": 0.5795759200584938,
      "grad_norm": 0.3544778525829315,
      "learning_rate": 7.584432041356875e-05,
      "loss": 1.1568,
      "step": 1189
    },
    {
      "epoch": 0.5800633682671216,
      "grad_norm": 0.37013569474220276,
      "learning_rate": 7.569498310705756e-05,
      "loss": 1.0729,
      "step": 1190
    },
    {
      "epoch": 0.5805508164757495,
      "grad_norm": 0.34105226397514343,
      "learning_rate": 7.554570338557371e-05,
      "loss": 1.0768,
      "step": 1191
    },
    {
      "epoch": 0.5810382646843772,
      "grad_norm": 0.329398512840271,
      "learning_rate": 7.539648160280045e-05,
      "loss": 0.9802,
      "step": 1192
    },
    {
      "epoch": 0.5815257128930051,
      "grad_norm": 0.3747677803039551,
      "learning_rate": 7.524731811228374e-05,
      "loss": 1.1414,
      "step": 1193
    },
    {
      "epoch": 0.5820131611016329,
      "grad_norm": 0.4513600170612335,
      "learning_rate": 7.50982132674314e-05,
      "loss": 1.318,
      "step": 1194
    },
    {
      "epoch": 0.5825006093102608,
      "grad_norm": 0.3539467453956604,
      "learning_rate": 7.494916742151234e-05,
      "loss": 1.1125,
      "step": 1195
    },
    {
      "epoch": 0.5829880575188886,
      "grad_norm": 0.4409025311470032,
      "learning_rate": 7.48001809276557e-05,
      "loss": 1.0658,
      "step": 1196
    },
    {
      "epoch": 0.5834755057275165,
      "grad_norm": 0.46018657088279724,
      "learning_rate": 7.465125413884995e-05,
      "loss": 1.1515,
      "step": 1197
    },
    {
      "epoch": 0.5839629539361443,
      "grad_norm": 0.39541178941726685,
      "learning_rate": 7.450238740794212e-05,
      "loss": 1.0762,
      "step": 1198
    },
    {
      "epoch": 0.5844504021447721,
      "grad_norm": 0.3461151123046875,
      "learning_rate": 7.435358108763698e-05,
      "loss": 1.158,
      "step": 1199
    },
    {
      "epoch": 0.5849378503534,
      "grad_norm": 0.36114776134490967,
      "learning_rate": 7.420483553049613e-05,
      "loss": 1.0179,
      "step": 1200
    },
    {
      "epoch": 0.5854252985620277,
      "grad_norm": 0.4009512960910797,
      "learning_rate": 7.40561510889372e-05,
      "loss": 1.1028,
      "step": 1201
    },
    {
      "epoch": 0.5859127467706556,
      "grad_norm": 0.36980295181274414,
      "learning_rate": 7.39075281152331e-05,
      "loss": 1.1139,
      "step": 1202
    },
    {
      "epoch": 0.5864001949792834,
      "grad_norm": 0.38044968247413635,
      "learning_rate": 7.3758966961511e-05,
      "loss": 1.1793,
      "step": 1203
    },
    {
      "epoch": 0.5868876431879113,
      "grad_norm": 0.3833613395690918,
      "learning_rate": 7.361046797975167e-05,
      "loss": 1.1187,
      "step": 1204
    },
    {
      "epoch": 0.5873750913965391,
      "grad_norm": 0.33397236466407776,
      "learning_rate": 7.346203152178855e-05,
      "loss": 0.9398,
      "step": 1205
    },
    {
      "epoch": 0.587862539605167,
      "grad_norm": 0.4107745587825775,
      "learning_rate": 7.331365793930698e-05,
      "loss": 1.0091,
      "step": 1206
    },
    {
      "epoch": 0.5883499878137948,
      "grad_norm": 0.3655603229999542,
      "learning_rate": 7.316534758384328e-05,
      "loss": 1.1232,
      "step": 1207
    },
    {
      "epoch": 0.5888374360224227,
      "grad_norm": 0.33455514907836914,
      "learning_rate": 7.301710080678398e-05,
      "loss": 1.0878,
      "step": 1208
    },
    {
      "epoch": 0.5893248842310504,
      "grad_norm": 0.4005405306816101,
      "learning_rate": 7.286891795936502e-05,
      "loss": 1.2077,
      "step": 1209
    },
    {
      "epoch": 0.5898123324396782,
      "grad_norm": 0.37796056270599365,
      "learning_rate": 7.272079939267084e-05,
      "loss": 1.2196,
      "step": 1210
    },
    {
      "epoch": 0.5902997806483061,
      "grad_norm": 0.4098646342754364,
      "learning_rate": 7.257274545763355e-05,
      "loss": 1.0526,
      "step": 1211
    },
    {
      "epoch": 0.5907872288569339,
      "grad_norm": 0.4132624864578247,
      "learning_rate": 7.242475650503223e-05,
      "loss": 1.176,
      "step": 1212
    },
    {
      "epoch": 0.5912746770655618,
      "grad_norm": 0.35944047570228577,
      "learning_rate": 7.227683288549187e-05,
      "loss": 1.2631,
      "step": 1213
    },
    {
      "epoch": 0.5917621252741896,
      "grad_norm": 0.3995838463306427,
      "learning_rate": 7.212897494948274e-05,
      "loss": 1.0708,
      "step": 1214
    },
    {
      "epoch": 0.5922495734828175,
      "grad_norm": 0.3299524784088135,
      "learning_rate": 7.198118304731953e-05,
      "loss": 1.0689,
      "step": 1215
    },
    {
      "epoch": 0.5927370216914453,
      "grad_norm": 0.32375314831733704,
      "learning_rate": 7.183345752916042e-05,
      "loss": 1.0989,
      "step": 1216
    },
    {
      "epoch": 0.5932244699000732,
      "grad_norm": 0.3651908338069916,
      "learning_rate": 7.168579874500627e-05,
      "loss": 1.0571,
      "step": 1217
    },
    {
      "epoch": 0.5937119181087009,
      "grad_norm": 0.4099787473678589,
      "learning_rate": 7.153820704469993e-05,
      "loss": 1.1053,
      "step": 1218
    },
    {
      "epoch": 0.5941993663173288,
      "grad_norm": 0.34599772095680237,
      "learning_rate": 7.139068277792523e-05,
      "loss": 1.1592,
      "step": 1219
    },
    {
      "epoch": 0.5946868145259566,
      "grad_norm": 0.4628382921218872,
      "learning_rate": 7.124322629420628e-05,
      "loss": 1.1632,
      "step": 1220
    },
    {
      "epoch": 0.5951742627345844,
      "grad_norm": 0.3830983638763428,
      "learning_rate": 7.109583794290655e-05,
      "loss": 1.1266,
      "step": 1221
    },
    {
      "epoch": 0.5956617109432123,
      "grad_norm": 0.3308325707912445,
      "learning_rate": 7.094851807322813e-05,
      "loss": 1.0726,
      "step": 1222
    },
    {
      "epoch": 0.5961491591518401,
      "grad_norm": 0.3864315450191498,
      "learning_rate": 7.080126703421087e-05,
      "loss": 1.1274,
      "step": 1223
    },
    {
      "epoch": 0.596636607360468,
      "grad_norm": 0.3869364559650421,
      "learning_rate": 7.06540851747314e-05,
      "loss": 1.1278,
      "step": 1224
    },
    {
      "epoch": 0.5971240555690958,
      "grad_norm": 0.3456849157810211,
      "learning_rate": 7.050697284350271e-05,
      "loss": 1.0372,
      "step": 1225
    },
    {
      "epoch": 0.5976115037777237,
      "grad_norm": 0.34329748153686523,
      "learning_rate": 7.035993038907281e-05,
      "loss": 0.9344,
      "step": 1226
    },
    {
      "epoch": 0.5980989519863514,
      "grad_norm": 0.35609936714172363,
      "learning_rate": 7.021295815982424e-05,
      "loss": 1.1151,
      "step": 1227
    },
    {
      "epoch": 0.5985864001949793,
      "grad_norm": 0.38585638999938965,
      "learning_rate": 7.006605650397323e-05,
      "loss": 0.9919,
      "step": 1228
    },
    {
      "epoch": 0.5990738484036071,
      "grad_norm": 0.38502153754234314,
      "learning_rate": 6.991922576956872e-05,
      "loss": 1.15,
      "step": 1229
    },
    {
      "epoch": 0.5995612966122349,
      "grad_norm": 0.44235166907310486,
      "learning_rate": 6.977246630449161e-05,
      "loss": 1.1095,
      "step": 1230
    },
    {
      "epoch": 0.6000487448208628,
      "grad_norm": 0.36498481035232544,
      "learning_rate": 6.9625778456454e-05,
      "loss": 1.0007,
      "step": 1231
    },
    {
      "epoch": 0.6005361930294906,
      "grad_norm": 0.37926092743873596,
      "learning_rate": 6.94791625729983e-05,
      "loss": 1.1216,
      "step": 1232
    },
    {
      "epoch": 0.6010236412381185,
      "grad_norm": 0.3852653503417969,
      "learning_rate": 6.933261900149633e-05,
      "loss": 0.904,
      "step": 1233
    },
    {
      "epoch": 0.6015110894467462,
      "grad_norm": 0.43301111459732056,
      "learning_rate": 6.918614808914874e-05,
      "loss": 1.1529,
      "step": 1234
    },
    {
      "epoch": 0.6019985376553741,
      "grad_norm": 0.385044127702713,
      "learning_rate": 6.90397501829839e-05,
      "loss": 1.2066,
      "step": 1235
    },
    {
      "epoch": 0.6024859858640019,
      "grad_norm": 0.33385398983955383,
      "learning_rate": 6.889342562985725e-05,
      "loss": 1.1449,
      "step": 1236
    },
    {
      "epoch": 0.6029734340726298,
      "grad_norm": 0.377290278673172,
      "learning_rate": 6.874717477645043e-05,
      "loss": 1.2258,
      "step": 1237
    },
    {
      "epoch": 0.6034608822812576,
      "grad_norm": 0.36140093207359314,
      "learning_rate": 6.860099796927055e-05,
      "loss": 1.1105,
      "step": 1238
    },
    {
      "epoch": 0.6039483304898855,
      "grad_norm": 0.4266805946826935,
      "learning_rate": 6.845489555464915e-05,
      "loss": 1.1524,
      "step": 1239
    },
    {
      "epoch": 0.6044357786985133,
      "grad_norm": 0.4107736647129059,
      "learning_rate": 6.830886787874154e-05,
      "loss": 1.3052,
      "step": 1240
    },
    {
      "epoch": 0.6049232269071411,
      "grad_norm": 0.3631647527217865,
      "learning_rate": 6.816291528752606e-05,
      "loss": 0.9635,
      "step": 1241
    },
    {
      "epoch": 0.605410675115769,
      "grad_norm": 0.34765157103538513,
      "learning_rate": 6.801703812680309e-05,
      "loss": 1.0847,
      "step": 1242
    },
    {
      "epoch": 0.6058981233243967,
      "grad_norm": 0.35418158769607544,
      "learning_rate": 6.787123674219422e-05,
      "loss": 1.1145,
      "step": 1243
    },
    {
      "epoch": 0.6063855715330246,
      "grad_norm": 0.42017999291419983,
      "learning_rate": 6.772551147914165e-05,
      "loss": 0.9823,
      "step": 1244
    },
    {
      "epoch": 0.6068730197416524,
      "grad_norm": 0.38183996081352234,
      "learning_rate": 6.757986268290712e-05,
      "loss": 1.1948,
      "step": 1245
    },
    {
      "epoch": 0.6073604679502803,
      "grad_norm": 0.3763807713985443,
      "learning_rate": 6.743429069857123e-05,
      "loss": 1.1282,
      "step": 1246
    },
    {
      "epoch": 0.6078479161589081,
      "grad_norm": 0.4228421151638031,
      "learning_rate": 6.728879587103263e-05,
      "loss": 1.0836,
      "step": 1247
    },
    {
      "epoch": 0.608335364367536,
      "grad_norm": 0.3279537558555603,
      "learning_rate": 6.71433785450071e-05,
      "loss": 1.1569,
      "step": 1248
    },
    {
      "epoch": 0.6088228125761638,
      "grad_norm": 0.34171488881111145,
      "learning_rate": 6.699803906502682e-05,
      "loss": 1.1143,
      "step": 1249
    },
    {
      "epoch": 0.6093102607847917,
      "grad_norm": 0.40301311016082764,
      "learning_rate": 6.685277777543953e-05,
      "loss": 1.1053,
      "step": 1250
    },
    {
      "epoch": 0.6097977089934195,
      "grad_norm": 0.3519479036331177,
      "learning_rate": 6.670759502040782e-05,
      "loss": 1.057,
      "step": 1251
    },
    {
      "epoch": 0.6102851572020472,
      "grad_norm": 0.4781578779220581,
      "learning_rate": 6.656249114390803e-05,
      "loss": 1.2084,
      "step": 1252
    },
    {
      "epoch": 0.6107726054106751,
      "grad_norm": 0.43604883551597595,
      "learning_rate": 6.64174664897297e-05,
      "loss": 1.1374,
      "step": 1253
    },
    {
      "epoch": 0.6112600536193029,
      "grad_norm": 0.34350964426994324,
      "learning_rate": 6.627252140147474e-05,
      "loss": 1.1858,
      "step": 1254
    },
    {
      "epoch": 0.6117475018279308,
      "grad_norm": 0.37544548511505127,
      "learning_rate": 6.612765622255645e-05,
      "loss": 1.224,
      "step": 1255
    },
    {
      "epoch": 0.6122349500365586,
      "grad_norm": 0.40213069319725037,
      "learning_rate": 6.598287129619882e-05,
      "loss": 0.9491,
      "step": 1256
    },
    {
      "epoch": 0.6127223982451865,
      "grad_norm": 0.4172384738922119,
      "learning_rate": 6.583816696543576e-05,
      "loss": 1.1478,
      "step": 1257
    },
    {
      "epoch": 0.6132098464538143,
      "grad_norm": 0.3520037531852722,
      "learning_rate": 6.569354357311014e-05,
      "loss": 1.0307,
      "step": 1258
    },
    {
      "epoch": 0.6136972946624422,
      "grad_norm": 0.4001389741897583,
      "learning_rate": 6.554900146187312e-05,
      "loss": 1.1889,
      "step": 1259
    },
    {
      "epoch": 0.6141847428710699,
      "grad_norm": 0.39461663365364075,
      "learning_rate": 6.540454097418331e-05,
      "loss": 1.0742,
      "step": 1260
    },
    {
      "epoch": 0.6146721910796977,
      "grad_norm": 0.40418586134910583,
      "learning_rate": 6.526016245230589e-05,
      "loss": 1.1277,
      "step": 1261
    },
    {
      "epoch": 0.6151596392883256,
      "grad_norm": 0.35330745577812195,
      "learning_rate": 6.511586623831181e-05,
      "loss": 0.9615,
      "step": 1262
    },
    {
      "epoch": 0.6156470874969534,
      "grad_norm": 0.3787367641925812,
      "learning_rate": 6.497165267407703e-05,
      "loss": 1.1368,
      "step": 1263
    },
    {
      "epoch": 0.6161345357055813,
      "grad_norm": 0.3234662413597107,
      "learning_rate": 6.48275221012818e-05,
      "loss": 1.1478,
      "step": 1264
    },
    {
      "epoch": 0.6166219839142091,
      "grad_norm": 0.3629220426082611,
      "learning_rate": 6.468347486140957e-05,
      "loss": 1.1581,
      "step": 1265
    },
    {
      "epoch": 0.617109432122837,
      "grad_norm": 0.35167911648750305,
      "learning_rate": 6.453951129574644e-05,
      "loss": 1.1511,
      "step": 1266
    },
    {
      "epoch": 0.6175968803314648,
      "grad_norm": 0.39326807856559753,
      "learning_rate": 6.43956317453803e-05,
      "loss": 1.083,
      "step": 1267
    },
    {
      "epoch": 0.6180843285400927,
      "grad_norm": 0.4391644597053528,
      "learning_rate": 6.425183655119993e-05,
      "loss": 1.0469,
      "step": 1268
    },
    {
      "epoch": 0.6185717767487204,
      "grad_norm": 0.3080829381942749,
      "learning_rate": 6.410812605389423e-05,
      "loss": 0.9698,
      "step": 1269
    },
    {
      "epoch": 0.6190592249573483,
      "grad_norm": 0.3172454237937927,
      "learning_rate": 6.396450059395148e-05,
      "loss": 1.1241,
      "step": 1270
    },
    {
      "epoch": 0.6195466731659761,
      "grad_norm": 0.4044014513492584,
      "learning_rate": 6.382096051165847e-05,
      "loss": 0.9526,
      "step": 1271
    },
    {
      "epoch": 0.6200341213746039,
      "grad_norm": 0.34943583607673645,
      "learning_rate": 6.367750614709968e-05,
      "loss": 1.1092,
      "step": 1272
    },
    {
      "epoch": 0.6205215695832318,
      "grad_norm": 0.42065444588661194,
      "learning_rate": 6.353413784015654e-05,
      "loss": 1.1317,
      "step": 1273
    },
    {
      "epoch": 0.6210090177918596,
      "grad_norm": 0.3605908751487732,
      "learning_rate": 6.33908559305066e-05,
      "loss": 0.925,
      "step": 1274
    },
    {
      "epoch": 0.6214964660004875,
      "grad_norm": 0.3660207986831665,
      "learning_rate": 6.324766075762263e-05,
      "loss": 1.0133,
      "step": 1275
    },
    {
      "epoch": 0.6219839142091153,
      "grad_norm": 0.3236888647079468,
      "learning_rate": 6.310455266077193e-05,
      "loss": 1.0744,
      "step": 1276
    },
    {
      "epoch": 0.6224713624177431,
      "grad_norm": 0.3828015923500061,
      "learning_rate": 6.29615319790156e-05,
      "loss": 1.0188,
      "step": 1277
    },
    {
      "epoch": 0.6229588106263709,
      "grad_norm": 0.39991846680641174,
      "learning_rate": 6.28185990512075e-05,
      "loss": 1.0982,
      "step": 1278
    },
    {
      "epoch": 0.6234462588349988,
      "grad_norm": 0.4092521667480469,
      "learning_rate": 6.267575421599359e-05,
      "loss": 1.0448,
      "step": 1279
    },
    {
      "epoch": 0.6239337070436266,
      "grad_norm": 0.40827932953834534,
      "learning_rate": 6.253299781181121e-05,
      "loss": 1.2288,
      "step": 1280
    },
    {
      "epoch": 0.6244211552522545,
      "grad_norm": 0.3454175293445587,
      "learning_rate": 6.239033017688809e-05,
      "loss": 1.0822,
      "step": 1281
    },
    {
      "epoch": 0.6249086034608823,
      "grad_norm": 0.3596000671386719,
      "learning_rate": 6.224775164924164e-05,
      "loss": 1.1887,
      "step": 1282
    },
    {
      "epoch": 0.6253960516695101,
      "grad_norm": 0.31120550632476807,
      "learning_rate": 6.210526256667825e-05,
      "loss": 1.1056,
      "step": 1283
    },
    {
      "epoch": 0.625883499878138,
      "grad_norm": 0.34262052178382874,
      "learning_rate": 6.19628632667923e-05,
      "loss": 1.087,
      "step": 1284
    },
    {
      "epoch": 0.6263709480867657,
      "grad_norm": 0.42662423849105835,
      "learning_rate": 6.182055408696544e-05,
      "loss": 1.0435,
      "step": 1285
    },
    {
      "epoch": 0.6268583962953936,
      "grad_norm": 0.43115729093551636,
      "learning_rate": 6.167833536436588e-05,
      "loss": 1.1769,
      "step": 1286
    },
    {
      "epoch": 0.6273458445040214,
      "grad_norm": 0.34297922253608704,
      "learning_rate": 6.153620743594746e-05,
      "loss": 1.0313,
      "step": 1287
    },
    {
      "epoch": 0.6278332927126493,
      "grad_norm": 0.4172343313694,
      "learning_rate": 6.139417063844892e-05,
      "loss": 1.1047,
      "step": 1288
    },
    {
      "epoch": 0.6283207409212771,
      "grad_norm": 0.3697414994239807,
      "learning_rate": 6.125222530839301e-05,
      "loss": 1.2737,
      "step": 1289
    },
    {
      "epoch": 0.628808189129905,
      "grad_norm": 0.39292341470718384,
      "learning_rate": 6.111037178208597e-05,
      "loss": 1.0398,
      "step": 1290
    },
    {
      "epoch": 0.6292956373385328,
      "grad_norm": 0.34564071893692017,
      "learning_rate": 6.0968610395616345e-05,
      "loss": 1.0393,
      "step": 1291
    },
    {
      "epoch": 0.6297830855471606,
      "grad_norm": 0.39844003319740295,
      "learning_rate": 6.082694148485437e-05,
      "loss": 1.0699,
      "step": 1292
    },
    {
      "epoch": 0.6302705337557885,
      "grad_norm": 0.32988882064819336,
      "learning_rate": 6.068536538545133e-05,
      "loss": 0.9944,
      "step": 1293
    },
    {
      "epoch": 0.6307579819644162,
      "grad_norm": 0.4675695598125458,
      "learning_rate": 6.054388243283853e-05,
      "loss": 1.1975,
      "step": 1294
    },
    {
      "epoch": 0.6312454301730441,
      "grad_norm": 0.43154412508010864,
      "learning_rate": 6.040249296222653e-05,
      "loss": 1.0772,
      "step": 1295
    },
    {
      "epoch": 0.6317328783816719,
      "grad_norm": 0.44753265380859375,
      "learning_rate": 6.026119730860451e-05,
      "loss": 1.0963,
      "step": 1296
    },
    {
      "epoch": 0.6322203265902998,
      "grad_norm": 0.40827831625938416,
      "learning_rate": 6.011999580673931e-05,
      "loss": 1.1346,
      "step": 1297
    },
    {
      "epoch": 0.6327077747989276,
      "grad_norm": 0.34523019194602966,
      "learning_rate": 5.9978888791174705e-05,
      "loss": 1.0493,
      "step": 1298
    },
    {
      "epoch": 0.6331952230075555,
      "grad_norm": 0.3623436987400055,
      "learning_rate": 5.983787659623064e-05,
      "loss": 1.1105,
      "step": 1299
    },
    {
      "epoch": 0.6336826712161833,
      "grad_norm": 0.3779457211494446,
      "learning_rate": 5.969695955600236e-05,
      "loss": 1.1782,
      "step": 1300
    },
    {
      "epoch": 0.6341701194248112,
      "grad_norm": 0.4165879487991333,
      "learning_rate": 5.955613800435971e-05,
      "loss": 1.1784,
      "step": 1301
    },
    {
      "epoch": 0.634657567633439,
      "grad_norm": 0.3469805121421814,
      "learning_rate": 5.94154122749462e-05,
      "loss": 1.2509,
      "step": 1302
    },
    {
      "epoch": 0.6351450158420667,
      "grad_norm": 0.3427400588989258,
      "learning_rate": 5.9274782701178496e-05,
      "loss": 1.1735,
      "step": 1303
    },
    {
      "epoch": 0.6356324640506946,
      "grad_norm": 0.40498340129852295,
      "learning_rate": 5.913424961624528e-05,
      "loss": 1.1969,
      "step": 1304
    },
    {
      "epoch": 0.6361199122593224,
      "grad_norm": 0.4550219476222992,
      "learning_rate": 5.899381335310663e-05,
      "loss": 1.1946,
      "step": 1305
    },
    {
      "epoch": 0.6366073604679503,
      "grad_norm": 0.36041921377182007,
      "learning_rate": 5.885347424449337e-05,
      "loss": 1.0194,
      "step": 1306
    },
    {
      "epoch": 0.6370948086765781,
      "grad_norm": 0.38736119866371155,
      "learning_rate": 5.871323262290599e-05,
      "loss": 1.0996,
      "step": 1307
    },
    {
      "epoch": 0.637582256885206,
      "grad_norm": 0.37170708179473877,
      "learning_rate": 5.857308882061406e-05,
      "loss": 1.1053,
      "step": 1308
    },
    {
      "epoch": 0.6380697050938338,
      "grad_norm": 0.35190945863723755,
      "learning_rate": 5.843304316965543e-05,
      "loss": 1.2578,
      "step": 1309
    },
    {
      "epoch": 0.6385571533024617,
      "grad_norm": 0.36703935265541077,
      "learning_rate": 5.829309600183536e-05,
      "loss": 1.0586,
      "step": 1310
    },
    {
      "epoch": 0.6390446015110894,
      "grad_norm": 0.38557273149490356,
      "learning_rate": 5.8153247648725715e-05,
      "loss": 1.1757,
      "step": 1311
    },
    {
      "epoch": 0.6395320497197173,
      "grad_norm": 0.47165647149086,
      "learning_rate": 5.801349844166443e-05,
      "loss": 1.1128,
      "step": 1312
    },
    {
      "epoch": 0.6400194979283451,
      "grad_norm": 0.380670428276062,
      "learning_rate": 5.7873848711754345e-05,
      "loss": 1.1902,
      "step": 1313
    },
    {
      "epoch": 0.6405069461369729,
      "grad_norm": 0.3996541500091553,
      "learning_rate": 5.773429878986272e-05,
      "loss": 1.1026,
      "step": 1314
    },
    {
      "epoch": 0.6409943943456008,
      "grad_norm": 0.3766230642795563,
      "learning_rate": 5.759484900662027e-05,
      "loss": 1.2191,
      "step": 1315
    },
    {
      "epoch": 0.6414818425542286,
      "grad_norm": 0.31586742401123047,
      "learning_rate": 5.745549969242052e-05,
      "loss": 1.1092,
      "step": 1316
    },
    {
      "epoch": 0.6419692907628565,
      "grad_norm": 0.40071529150009155,
      "learning_rate": 5.731625117741892e-05,
      "loss": 0.9477,
      "step": 1317
    },
    {
      "epoch": 0.6424567389714843,
      "grad_norm": 0.4465852975845337,
      "learning_rate": 5.7177103791532096e-05,
      "loss": 1.1996,
      "step": 1318
    },
    {
      "epoch": 0.6429441871801121,
      "grad_norm": 0.38537243008613586,
      "learning_rate": 5.7038057864437144e-05,
      "loss": 1.0195,
      "step": 1319
    },
    {
      "epoch": 0.6434316353887399,
      "grad_norm": 0.3876956105232239,
      "learning_rate": 5.689911372557067e-05,
      "loss": 1.176,
      "step": 1320
    },
    {
      "epoch": 0.6439190835973678,
      "grad_norm": 0.3757091164588928,
      "learning_rate": 5.676027170412816e-05,
      "loss": 1.0577,
      "step": 1321
    },
    {
      "epoch": 0.6444065318059956,
      "grad_norm": 0.4040318727493286,
      "learning_rate": 5.6621532129063224e-05,
      "loss": 1.0778,
      "step": 1322
    },
    {
      "epoch": 0.6448939800146234,
      "grad_norm": 0.41619521379470825,
      "learning_rate": 5.648289532908666e-05,
      "loss": 1.2163,
      "step": 1323
    },
    {
      "epoch": 0.6453814282232513,
      "grad_norm": 0.4285852611064911,
      "learning_rate": 5.634436163266579e-05,
      "loss": 1.1286,
      "step": 1324
    },
    {
      "epoch": 0.6458688764318791,
      "grad_norm": 0.38491562008857727,
      "learning_rate": 5.620593136802365e-05,
      "loss": 1.2055,
      "step": 1325
    },
    {
      "epoch": 0.646356324640507,
      "grad_norm": 0.4486836791038513,
      "learning_rate": 5.6067604863138245e-05,
      "loss": 1.1919,
      "step": 1326
    },
    {
      "epoch": 0.6468437728491347,
      "grad_norm": 0.36280357837677,
      "learning_rate": 5.592938244574169e-05,
      "loss": 1.3017,
      "step": 1327
    },
    {
      "epoch": 0.6473312210577626,
      "grad_norm": 0.35584884881973267,
      "learning_rate": 5.579126444331959e-05,
      "loss": 1.0426,
      "step": 1328
    },
    {
      "epoch": 0.6478186692663904,
      "grad_norm": 0.3492376208305359,
      "learning_rate": 5.5653251183110075e-05,
      "loss": 1.1458,
      "step": 1329
    },
    {
      "epoch": 0.6483061174750183,
      "grad_norm": 0.46781599521636963,
      "learning_rate": 5.551534299210315e-05,
      "loss": 1.0435,
      "step": 1330
    },
    {
      "epoch": 0.6487935656836461,
      "grad_norm": 0.47177448868751526,
      "learning_rate": 5.5377540197039866e-05,
      "loss": 1.0767,
      "step": 1331
    },
    {
      "epoch": 0.649281013892274,
      "grad_norm": 0.3602699935436249,
      "learning_rate": 5.523984312441157e-05,
      "loss": 1.1655,
      "step": 1332
    },
    {
      "epoch": 0.6497684621009018,
      "grad_norm": 0.39494967460632324,
      "learning_rate": 5.510225210045914e-05,
      "loss": 1.1493,
      "step": 1333
    },
    {
      "epoch": 0.6502559103095296,
      "grad_norm": 0.3143066167831421,
      "learning_rate": 5.496476745117211e-05,
      "loss": 1.1146,
      "step": 1334
    },
    {
      "epoch": 0.6507433585181575,
      "grad_norm": 0.3416048288345337,
      "learning_rate": 5.4827389502288166e-05,
      "loss": 1.0537,
      "step": 1335
    },
    {
      "epoch": 0.6512308067267852,
      "grad_norm": 0.4054270088672638,
      "learning_rate": 5.4690118579292015e-05,
      "loss": 1.1853,
      "step": 1336
    },
    {
      "epoch": 0.6517182549354131,
      "grad_norm": 0.43888169527053833,
      "learning_rate": 5.455295500741484e-05,
      "loss": 1.0151,
      "step": 1337
    },
    {
      "epoch": 0.6522057031440409,
      "grad_norm": 0.4000617563724518,
      "learning_rate": 5.441589911163358e-05,
      "loss": 1.0849,
      "step": 1338
    },
    {
      "epoch": 0.6526931513526688,
      "grad_norm": 0.3413247764110565,
      "learning_rate": 5.427895121666993e-05,
      "loss": 1.1687,
      "step": 1339
    },
    {
      "epoch": 0.6531805995612966,
      "grad_norm": 0.3506993055343628,
      "learning_rate": 5.414211164698976e-05,
      "loss": 1.153,
      "step": 1340
    },
    {
      "epoch": 0.6536680477699245,
      "grad_norm": 0.37291571497917175,
      "learning_rate": 5.400538072680228e-05,
      "loss": 1.1409,
      "step": 1341
    },
    {
      "epoch": 0.6541554959785523,
      "grad_norm": 0.4084410071372986,
      "learning_rate": 5.386875878005927e-05,
      "loss": 1.12,
      "step": 1342
    },
    {
      "epoch": 0.6546429441871802,
      "grad_norm": 0.38073453307151794,
      "learning_rate": 5.3732246130454356e-05,
      "loss": 1.0577,
      "step": 1343
    },
    {
      "epoch": 0.655130392395808,
      "grad_norm": 0.3509850800037384,
      "learning_rate": 5.3595843101422136e-05,
      "loss": 1.0801,
      "step": 1344
    },
    {
      "epoch": 0.6556178406044357,
      "grad_norm": 0.40296420454978943,
      "learning_rate": 5.3459550016137626e-05,
      "loss": 1.1404,
      "step": 1345
    },
    {
      "epoch": 0.6561052888130636,
      "grad_norm": 0.3997831344604492,
      "learning_rate": 5.332336719751523e-05,
      "loss": 1.0855,
      "step": 1346
    },
    {
      "epoch": 0.6565927370216914,
      "grad_norm": 0.4008702337741852,
      "learning_rate": 5.318729496820809e-05,
      "loss": 1.1426,
      "step": 1347
    },
    {
      "epoch": 0.6570801852303193,
      "grad_norm": 0.45951130986213684,
      "learning_rate": 5.305133365060748e-05,
      "loss": 1.1355,
      "step": 1348
    },
    {
      "epoch": 0.6575676334389471,
      "grad_norm": 0.34970512986183167,
      "learning_rate": 5.291548356684177e-05,
      "loss": 1.0636,
      "step": 1349
    },
    {
      "epoch": 0.658055081647575,
      "grad_norm": 0.3302462697029114,
      "learning_rate": 5.277974503877579e-05,
      "loss": 1.1759,
      "step": 1350
    },
    {
      "epoch": 0.6585425298562028,
      "grad_norm": 0.38779914379119873,
      "learning_rate": 5.264411838801011e-05,
      "loss": 1.2124,
      "step": 1351
    },
    {
      "epoch": 0.6590299780648307,
      "grad_norm": 0.3708178699016571,
      "learning_rate": 5.250860393588022e-05,
      "loss": 1.1036,
      "step": 1352
    },
    {
      "epoch": 0.6595174262734584,
      "grad_norm": 0.3890998363494873,
      "learning_rate": 5.23732020034557e-05,
      "loss": 1.153,
      "step": 1353
    },
    {
      "epoch": 0.6600048744820862,
      "grad_norm": 0.37718912959098816,
      "learning_rate": 5.223791291153974e-05,
      "loss": 1.0722,
      "step": 1354
    },
    {
      "epoch": 0.6604923226907141,
      "grad_norm": 0.43145954608917236,
      "learning_rate": 5.210273698066801e-05,
      "loss": 1.1016,
      "step": 1355
    },
    {
      "epoch": 0.6609797708993419,
      "grad_norm": 0.4002271592617035,
      "learning_rate": 5.196767453110811e-05,
      "loss": 1.2884,
      "step": 1356
    },
    {
      "epoch": 0.6614672191079698,
      "grad_norm": 0.3822720944881439,
      "learning_rate": 5.1832725882858745e-05,
      "loss": 1.0398,
      "step": 1357
    },
    {
      "epoch": 0.6619546673165976,
      "grad_norm": 0.3739412724971771,
      "learning_rate": 5.169789135564915e-05,
      "loss": 1.1957,
      "step": 1358
    },
    {
      "epoch": 0.6624421155252255,
      "grad_norm": 0.3349688649177551,
      "learning_rate": 5.156317126893795e-05,
      "loss": 1.0651,
      "step": 1359
    },
    {
      "epoch": 0.6629295637338533,
      "grad_norm": 0.44151395559310913,
      "learning_rate": 5.142856594191274e-05,
      "loss": 1.2503,
      "step": 1360
    },
    {
      "epoch": 0.6634170119424811,
      "grad_norm": 0.41473668813705444,
      "learning_rate": 5.129407569348927e-05,
      "loss": 1.1377,
      "step": 1361
    },
    {
      "epoch": 0.6639044601511089,
      "grad_norm": 0.3414384722709656,
      "learning_rate": 5.115970084231059e-05,
      "loss": 1.004,
      "step": 1362
    },
    {
      "epoch": 0.6643919083597368,
      "grad_norm": 0.4229922592639923,
      "learning_rate": 5.102544170674628e-05,
      "loss": 1.1093,
      "step": 1363
    },
    {
      "epoch": 0.6648793565683646,
      "grad_norm": 0.31604325771331787,
      "learning_rate": 5.089129860489188e-05,
      "loss": 1.094,
      "step": 1364
    },
    {
      "epoch": 0.6653668047769924,
      "grad_norm": 0.40494808554649353,
      "learning_rate": 5.075727185456793e-05,
      "loss": 1.1109,
      "step": 1365
    },
    {
      "epoch": 0.6658542529856203,
      "grad_norm": 0.45369645953178406,
      "learning_rate": 5.062336177331934e-05,
      "loss": 1.1929,
      "step": 1366
    },
    {
      "epoch": 0.6663417011942481,
      "grad_norm": 0.35073715448379517,
      "learning_rate": 5.048956867841459e-05,
      "loss": 1.1078,
      "step": 1367
    },
    {
      "epoch": 0.666829149402876,
      "grad_norm": 0.3622107207775116,
      "learning_rate": 5.035589288684495e-05,
      "loss": 1.1053,
      "step": 1368
    },
    {
      "epoch": 0.6673165976115037,
      "grad_norm": 0.3263455033302307,
      "learning_rate": 5.0222334715323825e-05,
      "loss": 1.1512,
      "step": 1369
    },
    {
      "epoch": 0.6678040458201316,
      "grad_norm": 0.35704344511032104,
      "learning_rate": 5.0088894480285887e-05,
      "loss": 1.1933,
      "step": 1370
    },
    {
      "epoch": 0.6682914940287594,
      "grad_norm": 0.36799290776252747,
      "learning_rate": 4.9955572497886505e-05,
      "loss": 1.1541,
      "step": 1371
    },
    {
      "epoch": 0.6687789422373873,
      "grad_norm": 0.3307376205921173,
      "learning_rate": 4.982236908400074e-05,
      "loss": 1.046,
      "step": 1372
    },
    {
      "epoch": 0.6692663904460151,
      "grad_norm": 0.3832852244377136,
      "learning_rate": 4.968928455422277e-05,
      "loss": 1.1152,
      "step": 1373
    },
    {
      "epoch": 0.669753838654643,
      "grad_norm": 0.3245486319065094,
      "learning_rate": 4.955631922386517e-05,
      "loss": 1.0141,
      "step": 1374
    },
    {
      "epoch": 0.6702412868632708,
      "grad_norm": 0.3602396249771118,
      "learning_rate": 4.9423473407958035e-05,
      "loss": 0.968,
      "step": 1375
    },
    {
      "epoch": 0.6707287350718986,
      "grad_norm": 0.40289953351020813,
      "learning_rate": 4.929074742124831e-05,
      "loss": 1.0,
      "step": 1376
    },
    {
      "epoch": 0.6712161832805265,
      "grad_norm": 0.363689661026001,
      "learning_rate": 4.915814157819903e-05,
      "loss": 1.1995,
      "step": 1377
    },
    {
      "epoch": 0.6717036314891542,
      "grad_norm": 0.431208074092865,
      "learning_rate": 4.902565619298859e-05,
      "loss": 1.0191,
      "step": 1378
    },
    {
      "epoch": 0.6721910796977821,
      "grad_norm": 0.47906234860420227,
      "learning_rate": 4.889329157950996e-05,
      "loss": 1.0698,
      "step": 1379
    },
    {
      "epoch": 0.6726785279064099,
      "grad_norm": 0.4144451916217804,
      "learning_rate": 4.876104805137005e-05,
      "loss": 1.0121,
      "step": 1380
    },
    {
      "epoch": 0.6731659761150378,
      "grad_norm": 0.3713430166244507,
      "learning_rate": 4.86289259218888e-05,
      "loss": 1.345,
      "step": 1381
    },
    {
      "epoch": 0.6736534243236656,
      "grad_norm": 0.40037861466407776,
      "learning_rate": 4.849692550409857e-05,
      "loss": 1.1318,
      "step": 1382
    },
    {
      "epoch": 0.6741408725322935,
      "grad_norm": 0.41233551502227783,
      "learning_rate": 4.836504711074328e-05,
      "loss": 1.1669,
      "step": 1383
    },
    {
      "epoch": 0.6746283207409213,
      "grad_norm": 0.36600425839424133,
      "learning_rate": 4.8233291054277905e-05,
      "loss": 1.1323,
      "step": 1384
    },
    {
      "epoch": 0.675115768949549,
      "grad_norm": 0.4426422417163849,
      "learning_rate": 4.8101657646867396e-05,
      "loss": 1.1922,
      "step": 1385
    },
    {
      "epoch": 0.675603217158177,
      "grad_norm": 0.39272043108940125,
      "learning_rate": 4.797014720038614e-05,
      "loss": 1.1978,
      "step": 1386
    },
    {
      "epoch": 0.6760906653668047,
      "grad_norm": 0.3592641353607178,
      "learning_rate": 4.783876002641734e-05,
      "loss": 1.1467,
      "step": 1387
    },
    {
      "epoch": 0.6765781135754326,
      "grad_norm": 0.37689095735549927,
      "learning_rate": 4.7707496436252e-05,
      "loss": 1.0715,
      "step": 1388
    },
    {
      "epoch": 0.6770655617840604,
      "grad_norm": 0.4962100386619568,
      "learning_rate": 4.7576356740888315e-05,
      "loss": 1.1173,
      "step": 1389
    },
    {
      "epoch": 0.6775530099926883,
      "grad_norm": 0.3450334966182709,
      "learning_rate": 4.744534125103106e-05,
      "loss": 1.1435,
      "step": 1390
    },
    {
      "epoch": 0.6780404582013161,
      "grad_norm": 0.3096364736557007,
      "learning_rate": 4.7314450277090626e-05,
      "loss": 0.957,
      "step": 1391
    },
    {
      "epoch": 0.678527906409944,
      "grad_norm": 0.32999417185783386,
      "learning_rate": 4.7183684129182414e-05,
      "loss": 1.12,
      "step": 1392
    },
    {
      "epoch": 0.6790153546185718,
      "grad_norm": 0.45688876509666443,
      "learning_rate": 4.705304311712609e-05,
      "loss": 1.0668,
      "step": 1393
    },
    {
      "epoch": 0.6795028028271997,
      "grad_norm": 0.4175863564014435,
      "learning_rate": 4.692252755044485e-05,
      "loss": 1.0344,
      "step": 1394
    },
    {
      "epoch": 0.6799902510358274,
      "grad_norm": 0.36583220958709717,
      "learning_rate": 4.679213773836463e-05,
      "loss": 1.0781,
      "step": 1395
    },
    {
      "epoch": 0.6804776992444552,
      "grad_norm": 0.3702552616596222,
      "learning_rate": 4.666187398981351e-05,
      "loss": 1.0035,
      "step": 1396
    },
    {
      "epoch": 0.6809651474530831,
      "grad_norm": 0.45179715752601624,
      "learning_rate": 4.6531736613420826e-05,
      "loss": 1.0166,
      "step": 1397
    },
    {
      "epoch": 0.6814525956617109,
      "grad_norm": 0.2747941017150879,
      "learning_rate": 4.6401725917516505e-05,
      "loss": 1.0981,
      "step": 1398
    },
    {
      "epoch": 0.6819400438703388,
      "grad_norm": 0.34443342685699463,
      "learning_rate": 4.62718422101303e-05,
      "loss": 0.9779,
      "step": 1399
    },
    {
      "epoch": 0.6824274920789666,
      "grad_norm": 0.4068431258201599,
      "learning_rate": 4.614208579899123e-05,
      "loss": 1.0938,
      "step": 1400
    },
    {
      "epoch": 0.6829149402875945,
      "grad_norm": 0.4701009690761566,
      "learning_rate": 4.601245699152659e-05,
      "loss": 1.1205,
      "step": 1401
    },
    {
      "epoch": 0.6834023884962223,
      "grad_norm": 0.3799245357513428,
      "learning_rate": 4.5882956094861375e-05,
      "loss": 1.1747,
      "step": 1402
    },
    {
      "epoch": 0.6838898367048502,
      "grad_norm": 0.345043420791626,
      "learning_rate": 4.5753583415817536e-05,
      "loss": 1.0841,
      "step": 1403
    },
    {
      "epoch": 0.6843772849134779,
      "grad_norm": 0.3221161365509033,
      "learning_rate": 4.562433926091325e-05,
      "loss": 1.0883,
      "step": 1404
    },
    {
      "epoch": 0.6848647331221058,
      "grad_norm": 0.35330602526664734,
      "learning_rate": 4.549522393636214e-05,
      "loss": 1.1009,
      "step": 1405
    },
    {
      "epoch": 0.6853521813307336,
      "grad_norm": 0.3911016881465912,
      "learning_rate": 4.536623774807269e-05,
      "loss": 1.0455,
      "step": 1406
    },
    {
      "epoch": 0.6858396295393614,
      "grad_norm": 0.3859974145889282,
      "learning_rate": 4.523738100164736e-05,
      "loss": 1.0063,
      "step": 1407
    },
    {
      "epoch": 0.6863270777479893,
      "grad_norm": 0.39257508516311646,
      "learning_rate": 4.5108654002381875e-05,
      "loss": 1.0503,
      "step": 1408
    },
    {
      "epoch": 0.6868145259566171,
      "grad_norm": 0.36935654282569885,
      "learning_rate": 4.4980057055264714e-05,
      "loss": 0.9985,
      "step": 1409
    },
    {
      "epoch": 0.687301974165245,
      "grad_norm": 0.3954034745693207,
      "learning_rate": 4.485159046497607e-05,
      "loss": 1.1401,
      "step": 1410
    },
    {
      "epoch": 0.6877894223738727,
      "grad_norm": 0.3816031813621521,
      "learning_rate": 4.4723254535887395e-05,
      "loss": 1.1157,
      "step": 1411
    },
    {
      "epoch": 0.6882768705825006,
      "grad_norm": 0.42583298683166504,
      "learning_rate": 4.459504957206041e-05,
      "loss": 1.1594,
      "step": 1412
    },
    {
      "epoch": 0.6887643187911284,
      "grad_norm": 0.45978638529777527,
      "learning_rate": 4.446697587724677e-05,
      "loss": 1.048,
      "step": 1413
    },
    {
      "epoch": 0.6892517669997563,
      "grad_norm": 0.3576642572879791,
      "learning_rate": 4.433903375488697e-05,
      "loss": 0.9329,
      "step": 1414
    },
    {
      "epoch": 0.6897392152083841,
      "grad_norm": 0.3792358934879303,
      "learning_rate": 4.421122350810978e-05,
      "loss": 1.1325,
      "step": 1415
    },
    {
      "epoch": 0.6902266634170119,
      "grad_norm": 0.3718627989292145,
      "learning_rate": 4.4083545439731614e-05,
      "loss": 1.1786,
      "step": 1416
    },
    {
      "epoch": 0.6907141116256398,
      "grad_norm": 0.3628930449485779,
      "learning_rate": 4.395599985225561e-05,
      "loss": 1.0596,
      "step": 1417
    },
    {
      "epoch": 0.6912015598342676,
      "grad_norm": 0.38017037510871887,
      "learning_rate": 4.38285870478711e-05,
      "loss": 1.2319,
      "step": 1418
    },
    {
      "epoch": 0.6916890080428955,
      "grad_norm": 0.49403607845306396,
      "learning_rate": 4.370130732845277e-05,
      "loss": 1.111,
      "step": 1419
    },
    {
      "epoch": 0.6921764562515232,
      "grad_norm": 0.4223406910896301,
      "learning_rate": 4.357416099556002e-05,
      "loss": 1.2386,
      "step": 1420
    },
    {
      "epoch": 0.6926639044601511,
      "grad_norm": 0.39490723609924316,
      "learning_rate": 4.344714835043618e-05,
      "loss": 1.1064,
      "step": 1421
    },
    {
      "epoch": 0.6931513526687789,
      "grad_norm": 0.42644432187080383,
      "learning_rate": 4.332026969400794e-05,
      "loss": 1.2032,
      "step": 1422
    },
    {
      "epoch": 0.6936388008774068,
      "grad_norm": 0.40960004925727844,
      "learning_rate": 4.3193525326884435e-05,
      "loss": 1.1461,
      "step": 1423
    },
    {
      "epoch": 0.6941262490860346,
      "grad_norm": 0.4543960690498352,
      "learning_rate": 4.306691554935667e-05,
      "loss": 1.0211,
      "step": 1424
    },
    {
      "epoch": 0.6946136972946625,
      "grad_norm": 0.42792388796806335,
      "learning_rate": 4.294044066139671e-05,
      "loss": 1.1311,
      "step": 1425
    },
    {
      "epoch": 0.6951011455032903,
      "grad_norm": 0.37301984429359436,
      "learning_rate": 4.281410096265719e-05,
      "loss": 1.0813,
      "step": 1426
    },
    {
      "epoch": 0.6955885937119181,
      "grad_norm": 0.370306134223938,
      "learning_rate": 4.268789675247029e-05,
      "loss": 1.1299,
      "step": 1427
    },
    {
      "epoch": 0.696076041920546,
      "grad_norm": 0.42097169160842896,
      "learning_rate": 4.256182832984724e-05,
      "loss": 1.1097,
      "step": 1428
    },
    {
      "epoch": 0.6965634901291737,
      "grad_norm": 0.4374851882457733,
      "learning_rate": 4.243589599347755e-05,
      "loss": 1.1263,
      "step": 1429
    },
    {
      "epoch": 0.6970509383378016,
      "grad_norm": 0.3310086727142334,
      "learning_rate": 4.23101000417283e-05,
      "loss": 1.103,
      "step": 1430
    },
    {
      "epoch": 0.6975383865464294,
      "grad_norm": 0.3817380368709564,
      "learning_rate": 4.218444077264342e-05,
      "loss": 0.95,
      "step": 1431
    },
    {
      "epoch": 0.6980258347550573,
      "grad_norm": 0.35832837224006653,
      "learning_rate": 4.205891848394308e-05,
      "loss": 1.1271,
      "step": 1432
    },
    {
      "epoch": 0.6985132829636851,
      "grad_norm": 0.3595336973667145,
      "learning_rate": 4.193353347302282e-05,
      "loss": 1.0738,
      "step": 1433
    },
    {
      "epoch": 0.699000731172313,
      "grad_norm": 0.33713003993034363,
      "learning_rate": 4.180828603695296e-05,
      "loss": 1.0913,
      "step": 1434
    },
    {
      "epoch": 0.6994881793809408,
      "grad_norm": 0.31479769945144653,
      "learning_rate": 4.16831764724779e-05,
      "loss": 1.096,
      "step": 1435
    },
    {
      "epoch": 0.6999756275895687,
      "grad_norm": 0.33350107073783875,
      "learning_rate": 4.155820507601536e-05,
      "loss": 1.0534,
      "step": 1436
    },
    {
      "epoch": 0.7004630757981964,
      "grad_norm": 0.3710598647594452,
      "learning_rate": 4.143337214365572e-05,
      "loss": 1.0831,
      "step": 1437
    },
    {
      "epoch": 0.7009505240068242,
      "grad_norm": 0.41655072569847107,
      "learning_rate": 4.130867797116118e-05,
      "loss": 1.0895,
      "step": 1438
    },
    {
      "epoch": 0.7014379722154521,
      "grad_norm": 0.3327270746231079,
      "learning_rate": 4.1184122853965415e-05,
      "loss": 1.1405,
      "step": 1439
    },
    {
      "epoch": 0.7019254204240799,
      "grad_norm": 0.44724804162979126,
      "learning_rate": 4.105970708717244e-05,
      "loss": 1.173,
      "step": 1440
    },
    {
      "epoch": 0.7024128686327078,
      "grad_norm": 0.47310054302215576,
      "learning_rate": 4.093543096555616e-05,
      "loss": 1.2473,
      "step": 1441
    },
    {
      "epoch": 0.7029003168413356,
      "grad_norm": 0.3690038323402405,
      "learning_rate": 4.081129478355975e-05,
      "loss": 1.2197,
      "step": 1442
    },
    {
      "epoch": 0.7033877650499635,
      "grad_norm": 0.3522759974002838,
      "learning_rate": 4.0687298835294663e-05,
      "loss": 1.0677,
      "step": 1443
    },
    {
      "epoch": 0.7038752132585913,
      "grad_norm": 0.3102143406867981,
      "learning_rate": 4.0563443414540136e-05,
      "loss": 1.1593,
      "step": 1444
    },
    {
      "epoch": 0.7043626614672192,
      "grad_norm": 0.3423633873462677,
      "learning_rate": 4.0439728814742596e-05,
      "loss": 1.247,
      "step": 1445
    },
    {
      "epoch": 0.7048501096758469,
      "grad_norm": 0.4625047445297241,
      "learning_rate": 4.031615532901463e-05,
      "loss": 1.1411,
      "step": 1446
    },
    {
      "epoch": 0.7053375578844747,
      "grad_norm": 0.35930484533309937,
      "learning_rate": 4.019272325013456e-05,
      "loss": 1.0514,
      "step": 1447
    },
    {
      "epoch": 0.7058250060931026,
      "grad_norm": 0.42370128631591797,
      "learning_rate": 4.0069432870545776e-05,
      "loss": 0.9846,
      "step": 1448
    },
    {
      "epoch": 0.7063124543017304,
      "grad_norm": 0.513253927230835,
      "learning_rate": 3.994628448235583e-05,
      "loss": 1.0176,
      "step": 1449
    },
    {
      "epoch": 0.7067999025103583,
      "grad_norm": 0.32254430651664734,
      "learning_rate": 3.9823278377335914e-05,
      "loss": 0.9944,
      "step": 1450
    },
    {
      "epoch": 0.7072873507189861,
      "grad_norm": 0.36206114292144775,
      "learning_rate": 3.970041484692003e-05,
      "loss": 1.0255,
      "step": 1451
    },
    {
      "epoch": 0.707774798927614,
      "grad_norm": 0.40832626819610596,
      "learning_rate": 3.957769418220455e-05,
      "loss": 1.0763,
      "step": 1452
    },
    {
      "epoch": 0.7082622471362418,
      "grad_norm": 0.3668363094329834,
      "learning_rate": 3.945511667394719e-05,
      "loss": 1.05,
      "step": 1453
    },
    {
      "epoch": 0.7087496953448696,
      "grad_norm": 0.3837505578994751,
      "learning_rate": 3.9332682612566585e-05,
      "loss": 1.3172,
      "step": 1454
    },
    {
      "epoch": 0.7092371435534974,
      "grad_norm": 0.3345847427845001,
      "learning_rate": 3.921039228814145e-05,
      "loss": 1.0437,
      "step": 1455
    },
    {
      "epoch": 0.7097245917621253,
      "grad_norm": 0.38669687509536743,
      "learning_rate": 3.908824599041001e-05,
      "loss": 1.1198,
      "step": 1456
    },
    {
      "epoch": 0.7102120399707531,
      "grad_norm": 0.3960307240486145,
      "learning_rate": 3.896624400876917e-05,
      "loss": 1.1377,
      "step": 1457
    },
    {
      "epoch": 0.7106994881793809,
      "grad_norm": 0.43880727887153625,
      "learning_rate": 3.884438663227403e-05,
      "loss": 1.1162,
      "step": 1458
    },
    {
      "epoch": 0.7111869363880088,
      "grad_norm": 0.4575899541378021,
      "learning_rate": 3.8722674149636986e-05,
      "loss": 1.1324,
      "step": 1459
    },
    {
      "epoch": 0.7116743845966366,
      "grad_norm": 0.359183669090271,
      "learning_rate": 3.860110684922713e-05,
      "loss": 1.0138,
      "step": 1460
    },
    {
      "epoch": 0.7121618328052645,
      "grad_norm": 0.4640527367591858,
      "learning_rate": 3.847968501906969e-05,
      "loss": 1.0692,
      "step": 1461
    },
    {
      "epoch": 0.7126492810138922,
      "grad_norm": 0.3558286130428314,
      "learning_rate": 3.835840894684514e-05,
      "loss": 1.2269,
      "step": 1462
    },
    {
      "epoch": 0.7131367292225201,
      "grad_norm": 0.4080779254436493,
      "learning_rate": 3.8237278919888616e-05,
      "loss": 1.0738,
      "step": 1463
    },
    {
      "epoch": 0.7136241774311479,
      "grad_norm": 0.3963766098022461,
      "learning_rate": 3.8116295225189305e-05,
      "loss": 1.0178,
      "step": 1464
    },
    {
      "epoch": 0.7141116256397758,
      "grad_norm": 0.3747573792934418,
      "learning_rate": 3.799545814938959e-05,
      "loss": 0.9247,
      "step": 1465
    },
    {
      "epoch": 0.7145990738484036,
      "grad_norm": 0.3791253864765167,
      "learning_rate": 3.787476797878459e-05,
      "loss": 1.0676,
      "step": 1466
    },
    {
      "epoch": 0.7150865220570315,
      "grad_norm": 0.35384225845336914,
      "learning_rate": 3.775422499932123e-05,
      "loss": 1.1025,
      "step": 1467
    },
    {
      "epoch": 0.7155739702656593,
      "grad_norm": 0.35406047105789185,
      "learning_rate": 3.763382949659787e-05,
      "loss": 1.0788,
      "step": 1468
    },
    {
      "epoch": 0.7160614184742871,
      "grad_norm": 0.40216243267059326,
      "learning_rate": 3.7513581755863336e-05,
      "loss": 1.1128,
      "step": 1469
    },
    {
      "epoch": 0.716548866682915,
      "grad_norm": 0.37562674283981323,
      "learning_rate": 3.739348206201635e-05,
      "loss": 1.1785,
      "step": 1470
    },
    {
      "epoch": 0.7170363148915427,
      "grad_norm": 0.4050292372703552,
      "learning_rate": 3.7273530699605044e-05,
      "loss": 1.0603,
      "step": 1471
    },
    {
      "epoch": 0.7175237631001706,
      "grad_norm": 0.34335386753082275,
      "learning_rate": 3.715372795282587e-05,
      "loss": 1.119,
      "step": 1472
    },
    {
      "epoch": 0.7180112113087984,
      "grad_norm": 0.4139067828655243,
      "learning_rate": 3.7034074105523284e-05,
      "loss": 1.0752,
      "step": 1473
    },
    {
      "epoch": 0.7184986595174263,
      "grad_norm": 0.42542508244514465,
      "learning_rate": 3.691456944118906e-05,
      "loss": 1.0176,
      "step": 1474
    },
    {
      "epoch": 0.7189861077260541,
      "grad_norm": 0.3980538249015808,
      "learning_rate": 3.679521424296137e-05,
      "loss": 1.0894,
      "step": 1475
    },
    {
      "epoch": 0.719473555934682,
      "grad_norm": 0.4451710283756256,
      "learning_rate": 3.6676008793624296e-05,
      "loss": 1.0847,
      "step": 1476
    },
    {
      "epoch": 0.7199610041433098,
      "grad_norm": 0.4051099717617035,
      "learning_rate": 3.6556953375607186e-05,
      "loss": 1.2075,
      "step": 1477
    },
    {
      "epoch": 0.7204484523519376,
      "grad_norm": 0.45044445991516113,
      "learning_rate": 3.643804827098388e-05,
      "loss": 1.184,
      "step": 1478
    },
    {
      "epoch": 0.7209359005605654,
      "grad_norm": 0.3991248607635498,
      "learning_rate": 3.631929376147207e-05,
      "loss": 1.1447,
      "step": 1479
    },
    {
      "epoch": 0.7214233487691932,
      "grad_norm": 0.38065215945243835,
      "learning_rate": 3.620069012843267e-05,
      "loss": 1.2044,
      "step": 1480
    },
    {
      "epoch": 0.7219107969778211,
      "grad_norm": 0.42019620537757874,
      "learning_rate": 3.608223765286912e-05,
      "loss": 1.1288,
      "step": 1481
    },
    {
      "epoch": 0.7223982451864489,
      "grad_norm": 0.4416203200817108,
      "learning_rate": 3.596393661542674e-05,
      "loss": 1.1556,
      "step": 1482
    },
    {
      "epoch": 0.7228856933950768,
      "grad_norm": 0.48135778307914734,
      "learning_rate": 3.5845787296392006e-05,
      "loss": 1.165,
      "step": 1483
    },
    {
      "epoch": 0.7233731416037046,
      "grad_norm": 0.3307107985019684,
      "learning_rate": 3.572778997569204e-05,
      "loss": 1.0351,
      "step": 1484
    },
    {
      "epoch": 0.7238605898123325,
      "grad_norm": 0.36838921904563904,
      "learning_rate": 3.5609944932893736e-05,
      "loss": 1.0249,
      "step": 1485
    },
    {
      "epoch": 0.7243480380209603,
      "grad_norm": 0.3509773910045624,
      "learning_rate": 3.54922524472032e-05,
      "loss": 1.2437,
      "step": 1486
    },
    {
      "epoch": 0.7248354862295882,
      "grad_norm": 0.39572563767433167,
      "learning_rate": 3.5374712797465214e-05,
      "loss": 1.0417,
      "step": 1487
    },
    {
      "epoch": 0.7253229344382159,
      "grad_norm": 0.4869961440563202,
      "learning_rate": 3.5257326262162304e-05,
      "loss": 1.1738,
      "step": 1488
    },
    {
      "epoch": 0.7258103826468437,
      "grad_norm": 0.34554779529571533,
      "learning_rate": 3.51400931194143e-05,
      "loss": 1.1651,
      "step": 1489
    },
    {
      "epoch": 0.7262978308554716,
      "grad_norm": 0.3999471366405487,
      "learning_rate": 3.502301364697758e-05,
      "loss": 1.1151,
      "step": 1490
    },
    {
      "epoch": 0.7267852790640994,
      "grad_norm": 0.3819086253643036,
      "learning_rate": 3.490608812224446e-05,
      "loss": 1.0452,
      "step": 1491
    },
    {
      "epoch": 0.7272727272727273,
      "grad_norm": 0.30517885088920593,
      "learning_rate": 3.47893168222425e-05,
      "loss": 1.0678,
      "step": 1492
    },
    {
      "epoch": 0.7277601754813551,
      "grad_norm": 0.3606143891811371,
      "learning_rate": 3.467270002363382e-05,
      "loss": 1.0806,
      "step": 1493
    },
    {
      "epoch": 0.728247623689983,
      "grad_norm": 0.41307225823402405,
      "learning_rate": 3.45562380027146e-05,
      "loss": 1.2821,
      "step": 1494
    },
    {
      "epoch": 0.7287350718986108,
      "grad_norm": 0.4158805310726166,
      "learning_rate": 3.44399310354142e-05,
      "loss": 1.2177,
      "step": 1495
    },
    {
      "epoch": 0.7292225201072386,
      "grad_norm": 0.32801222801208496,
      "learning_rate": 3.432377939729462e-05,
      "loss": 1.0945,
      "step": 1496
    },
    {
      "epoch": 0.7297099683158664,
      "grad_norm": 0.4302747845649719,
      "learning_rate": 3.420778336354995e-05,
      "loss": 1.1308,
      "step": 1497
    },
    {
      "epoch": 0.7301974165244943,
      "grad_norm": 0.3798311948776245,
      "learning_rate": 3.409194320900554e-05,
      "loss": 1.1171,
      "step": 1498
    },
    {
      "epoch": 0.7306848647331221,
      "grad_norm": 0.41932743787765503,
      "learning_rate": 3.397625920811731e-05,
      "loss": 1.1425,
      "step": 1499
    },
    {
      "epoch": 0.7311723129417499,
      "grad_norm": 0.33966439962387085,
      "learning_rate": 3.386073163497144e-05,
      "loss": 1.0847,
      "step": 1500
    },
    {
      "epoch": 0.7316597611503778,
      "grad_norm": 0.3647656738758087,
      "learning_rate": 3.374536076328333e-05,
      "loss": 0.9508,
      "step": 1501
    },
    {
      "epoch": 0.7321472093590056,
      "grad_norm": 0.3434484302997589,
      "learning_rate": 3.3630146866397104e-05,
      "loss": 0.9775,
      "step": 1502
    },
    {
      "epoch": 0.7326346575676335,
      "grad_norm": 0.42506644129753113,
      "learning_rate": 3.351509021728512e-05,
      "loss": 1.1988,
      "step": 1503
    },
    {
      "epoch": 0.7331221057762612,
      "grad_norm": 0.4124164879322052,
      "learning_rate": 3.340019108854703e-05,
      "loss": 1.0521,
      "step": 1504
    },
    {
      "epoch": 0.7336095539848891,
      "grad_norm": 0.38989806175231934,
      "learning_rate": 3.328544975240932e-05,
      "loss": 1.1389,
      "step": 1505
    },
    {
      "epoch": 0.7340970021935169,
      "grad_norm": 0.40123671293258667,
      "learning_rate": 3.3170866480724637e-05,
      "loss": 1.0026,
      "step": 1506
    },
    {
      "epoch": 0.7345844504021448,
      "grad_norm": 0.403225839138031,
      "learning_rate": 3.3056441544971115e-05,
      "loss": 1.1522,
      "step": 1507
    },
    {
      "epoch": 0.7350718986107726,
      "grad_norm": 0.42019128799438477,
      "learning_rate": 3.294217521625177e-05,
      "loss": 1.1012,
      "step": 1508
    },
    {
      "epoch": 0.7355593468194004,
      "grad_norm": 0.35564446449279785,
      "learning_rate": 3.282806776529378e-05,
      "loss": 1.1675,
      "step": 1509
    },
    {
      "epoch": 0.7360467950280283,
      "grad_norm": 0.479063481092453,
      "learning_rate": 3.2714119462448e-05,
      "loss": 1.1614,
      "step": 1510
    },
    {
      "epoch": 0.7365342432366561,
      "grad_norm": 0.46616676449775696,
      "learning_rate": 3.260033057768814e-05,
      "loss": 1.1798,
      "step": 1511
    },
    {
      "epoch": 0.737021691445284,
      "grad_norm": 0.3975960314273834,
      "learning_rate": 3.248670138061021e-05,
      "loss": 1.0032,
      "step": 1512
    },
    {
      "epoch": 0.7375091396539117,
      "grad_norm": 0.3194623291492462,
      "learning_rate": 3.237323214043193e-05,
      "loss": 1.0289,
      "step": 1513
    },
    {
      "epoch": 0.7379965878625396,
      "grad_norm": 0.3215973377227783,
      "learning_rate": 3.2259923125992e-05,
      "loss": 1.0274,
      "step": 1514
    },
    {
      "epoch": 0.7384840360711674,
      "grad_norm": 0.4231413006782532,
      "learning_rate": 3.21467746057495e-05,
      "loss": 1.0905,
      "step": 1515
    },
    {
      "epoch": 0.7389714842797953,
      "grad_norm": 0.45163217186927795,
      "learning_rate": 3.203378684778326e-05,
      "loss": 1.1281,
      "step": 1516
    },
    {
      "epoch": 0.7394589324884231,
      "grad_norm": 0.35993802547454834,
      "learning_rate": 3.192096011979124e-05,
      "loss": 1.1051,
      "step": 1517
    },
    {
      "epoch": 0.739946380697051,
      "grad_norm": 0.33846375346183777,
      "learning_rate": 3.180829468908986e-05,
      "loss": 1.0615,
      "step": 1518
    },
    {
      "epoch": 0.7404338289056788,
      "grad_norm": 0.38558444380760193,
      "learning_rate": 3.169579082261335e-05,
      "loss": 1.2364,
      "step": 1519
    },
    {
      "epoch": 0.7409212771143066,
      "grad_norm": 0.41832321882247925,
      "learning_rate": 3.158344878691325e-05,
      "loss": 1.1329,
      "step": 1520
    },
    {
      "epoch": 0.7414087253229344,
      "grad_norm": 0.4375525712966919,
      "learning_rate": 3.147126884815761e-05,
      "loss": 1.052,
      "step": 1521
    },
    {
      "epoch": 0.7418961735315622,
      "grad_norm": 0.3645784258842468,
      "learning_rate": 3.1359251272130384e-05,
      "loss": 1.0807,
      "step": 1522
    },
    {
      "epoch": 0.7423836217401901,
      "grad_norm": 0.3972663879394531,
      "learning_rate": 3.124739632423098e-05,
      "loss": 1.0337,
      "step": 1523
    },
    {
      "epoch": 0.7428710699488179,
      "grad_norm": 0.33198773860931396,
      "learning_rate": 3.113570426947342e-05,
      "loss": 1.0686,
      "step": 1524
    },
    {
      "epoch": 0.7433585181574458,
      "grad_norm": 0.417258620262146,
      "learning_rate": 3.1024175372485685e-05,
      "loss": 1.0509,
      "step": 1525
    },
    {
      "epoch": 0.7438459663660736,
      "grad_norm": 0.4219992458820343,
      "learning_rate": 3.091280989750937e-05,
      "loss": 1.1581,
      "step": 1526
    },
    {
      "epoch": 0.7443334145747015,
      "grad_norm": 0.4250520169734955,
      "learning_rate": 3.080160810839881e-05,
      "loss": 1.1641,
      "step": 1527
    },
    {
      "epoch": 0.7448208627833293,
      "grad_norm": 0.38769063353538513,
      "learning_rate": 3.069057026862048e-05,
      "loss": 1.0963,
      "step": 1528
    },
    {
      "epoch": 0.7453083109919572,
      "grad_norm": 0.3646620512008667,
      "learning_rate": 3.057969664125248e-05,
      "loss": 1.1754,
      "step": 1529
    },
    {
      "epoch": 0.7457957592005849,
      "grad_norm": 0.4347061514854431,
      "learning_rate": 3.0468987488983837e-05,
      "loss": 1.1011,
      "step": 1530
    },
    {
      "epoch": 0.7462832074092127,
      "grad_norm": 0.3439730107784271,
      "learning_rate": 3.035844307411384e-05,
      "loss": 1.0675,
      "step": 1531
    },
    {
      "epoch": 0.7467706556178406,
      "grad_norm": 0.335822194814682,
      "learning_rate": 3.024806365855154e-05,
      "loss": 1.0758,
      "step": 1532
    },
    {
      "epoch": 0.7472581038264684,
      "grad_norm": 0.3784874975681305,
      "learning_rate": 3.013784950381501e-05,
      "loss": 1.049,
      "step": 1533
    },
    {
      "epoch": 0.7477455520350963,
      "grad_norm": 0.35510337352752686,
      "learning_rate": 3.0027800871030797e-05,
      "loss": 1.0305,
      "step": 1534
    },
    {
      "epoch": 0.7482330002437241,
      "grad_norm": 0.3230135440826416,
      "learning_rate": 2.9917918020933267e-05,
      "loss": 1.0568,
      "step": 1535
    },
    {
      "epoch": 0.748720448452352,
      "grad_norm": 0.3796325922012329,
      "learning_rate": 2.9808201213864062e-05,
      "loss": 1.0139,
      "step": 1536
    },
    {
      "epoch": 0.7492078966609798,
      "grad_norm": 0.3692516088485718,
      "learning_rate": 2.9698650709771347e-05,
      "loss": 1.0474,
      "step": 1537
    },
    {
      "epoch": 0.7496953448696076,
      "grad_norm": 0.39915838837623596,
      "learning_rate": 2.958926676820929e-05,
      "loss": 1.0897,
      "step": 1538
    },
    {
      "epoch": 0.7501827930782354,
      "grad_norm": 0.41971927881240845,
      "learning_rate": 2.9480049648337493e-05,
      "loss": 1.0735,
      "step": 1539
    },
    {
      "epoch": 0.7506702412868632,
      "grad_norm": 0.3538713753223419,
      "learning_rate": 2.9370999608920237e-05,
      "loss": 1.189,
      "step": 1540
    },
    {
      "epoch": 0.7511576894954911,
      "grad_norm": 0.4109525978565216,
      "learning_rate": 2.9262116908325965e-05,
      "loss": 1.1621,
      "step": 1541
    },
    {
      "epoch": 0.7516451377041189,
      "grad_norm": 0.4208962023258209,
      "learning_rate": 2.915340180452666e-05,
      "loss": 1.0659,
      "step": 1542
    },
    {
      "epoch": 0.7521325859127468,
      "grad_norm": 0.39982888102531433,
      "learning_rate": 2.9044854555097212e-05,
      "loss": 1.156,
      "step": 1543
    },
    {
      "epoch": 0.7526200341213746,
      "grad_norm": 0.43298467993736267,
      "learning_rate": 2.8936475417214794e-05,
      "loss": 1.0843,
      "step": 1544
    },
    {
      "epoch": 0.7531074823300025,
      "grad_norm": 0.38478174805641174,
      "learning_rate": 2.8828264647658388e-05,
      "loss": 1.21,
      "step": 1545
    },
    {
      "epoch": 0.7535949305386302,
      "grad_norm": 0.42979133129119873,
      "learning_rate": 2.8720222502807946e-05,
      "loss": 0.9725,
      "step": 1546
    },
    {
      "epoch": 0.7540823787472581,
      "grad_norm": 0.3768276274204254,
      "learning_rate": 2.861234923864394e-05,
      "loss": 1.0635,
      "step": 1547
    },
    {
      "epoch": 0.7545698269558859,
      "grad_norm": 0.39537665247917175,
      "learning_rate": 2.850464511074672e-05,
      "loss": 1.1399,
      "step": 1548
    },
    {
      "epoch": 0.7550572751645138,
      "grad_norm": 0.3813226521015167,
      "learning_rate": 2.8397110374295955e-05,
      "loss": 1.0786,
      "step": 1549
    },
    {
      "epoch": 0.7555447233731416,
      "grad_norm": 0.3653690218925476,
      "learning_rate": 2.828974528406991e-05,
      "loss": 1.0643,
      "step": 1550
    },
    {
      "epoch": 0.7560321715817694,
      "grad_norm": 0.3394027054309845,
      "learning_rate": 2.818255009444496e-05,
      "loss": 0.9943,
      "step": 1551
    },
    {
      "epoch": 0.7565196197903973,
      "grad_norm": 0.40031924843788147,
      "learning_rate": 2.8075525059394893e-05,
      "loss": 1.1436,
      "step": 1552
    },
    {
      "epoch": 0.7570070679990251,
      "grad_norm": 0.33379706740379333,
      "learning_rate": 2.7968670432490408e-05,
      "loss": 1.044,
      "step": 1553
    },
    {
      "epoch": 0.757494516207653,
      "grad_norm": 0.3941287398338318,
      "learning_rate": 2.7861986466898395e-05,
      "loss": 1.1144,
      "step": 1554
    },
    {
      "epoch": 0.7579819644162807,
      "grad_norm": 0.3709263801574707,
      "learning_rate": 2.7755473415381517e-05,
      "loss": 1.0881,
      "step": 1555
    },
    {
      "epoch": 0.7584694126249086,
      "grad_norm": 0.4325391352176666,
      "learning_rate": 2.7649131530297388e-05,
      "loss": 1.1278,
      "step": 1556
    },
    {
      "epoch": 0.7589568608335364,
      "grad_norm": 0.36034736037254333,
      "learning_rate": 2.7542961063598104e-05,
      "loss": 1.1199,
      "step": 1557
    },
    {
      "epoch": 0.7594443090421643,
      "grad_norm": 0.39042332768440247,
      "learning_rate": 2.7436962266829715e-05,
      "loss": 1.1726,
      "step": 1558
    },
    {
      "epoch": 0.7599317572507921,
      "grad_norm": 0.3944944739341736,
      "learning_rate": 2.733113539113139e-05,
      "loss": 1.1469,
      "step": 1559
    },
    {
      "epoch": 0.76041920545942,
      "grad_norm": 0.3531627058982849,
      "learning_rate": 2.722548068723506e-05,
      "loss": 0.9875,
      "step": 1560
    },
    {
      "epoch": 0.7609066536680478,
      "grad_norm": 0.3481125831604004,
      "learning_rate": 2.711999840546472e-05,
      "loss": 1.2121,
      "step": 1561
    },
    {
      "epoch": 0.7613941018766756,
      "grad_norm": 0.4217544198036194,
      "learning_rate": 2.7014688795735898e-05,
      "loss": 1.1031,
      "step": 1562
    },
    {
      "epoch": 0.7618815500853034,
      "grad_norm": 0.3160876929759979,
      "learning_rate": 2.690955210755495e-05,
      "loss": 1.0948,
      "step": 1563
    },
    {
      "epoch": 0.7623689982939312,
      "grad_norm": 0.35992559790611267,
      "learning_rate": 2.680458859001852e-05,
      "loss": 1.1592,
      "step": 1564
    },
    {
      "epoch": 0.7628564465025591,
      "grad_norm": 0.34475815296173096,
      "learning_rate": 2.6699798491813065e-05,
      "loss": 1.1727,
      "step": 1565
    },
    {
      "epoch": 0.7633438947111869,
      "grad_norm": 0.4077177345752716,
      "learning_rate": 2.6595182061214075e-05,
      "loss": 1.0463,
      "step": 1566
    },
    {
      "epoch": 0.7638313429198148,
      "grad_norm": 0.35198870301246643,
      "learning_rate": 2.6490739546085607e-05,
      "loss": 0.9737,
      "step": 1567
    },
    {
      "epoch": 0.7643187911284426,
      "grad_norm": 0.39966869354248047,
      "learning_rate": 2.6386471193879658e-05,
      "loss": 1.0667,
      "step": 1568
    },
    {
      "epoch": 0.7648062393370705,
      "grad_norm": 0.4429773986339569,
      "learning_rate": 2.6282377251635604e-05,
      "loss": 1.1457,
      "step": 1569
    },
    {
      "epoch": 0.7652936875456983,
      "grad_norm": 0.39318573474884033,
      "learning_rate": 2.617845796597954e-05,
      "loss": 1.0209,
      "step": 1570
    },
    {
      "epoch": 0.765781135754326,
      "grad_norm": 0.5061572194099426,
      "learning_rate": 2.6074713583123866e-05,
      "loss": 1.1212,
      "step": 1571
    },
    {
      "epoch": 0.7662685839629539,
      "grad_norm": 0.43377435207366943,
      "learning_rate": 2.5971144348866494e-05,
      "loss": 1.2506,
      "step": 1572
    },
    {
      "epoch": 0.7667560321715817,
      "grad_norm": 0.39093247056007385,
      "learning_rate": 2.5867750508590382e-05,
      "loss": 1.1338,
      "step": 1573
    },
    {
      "epoch": 0.7672434803802096,
      "grad_norm": 0.37731918692588806,
      "learning_rate": 2.5764532307262934e-05,
      "loss": 1.1943,
      "step": 1574
    },
    {
      "epoch": 0.7677309285888374,
      "grad_norm": 0.4303186237812042,
      "learning_rate": 2.566148998943547e-05,
      "loss": 1.0712,
      "step": 1575
    },
    {
      "epoch": 0.7682183767974653,
      "grad_norm": 0.4580343961715698,
      "learning_rate": 2.555862379924253e-05,
      "loss": 0.9849,
      "step": 1576
    },
    {
      "epoch": 0.7687058250060931,
      "grad_norm": 0.4869823157787323,
      "learning_rate": 2.5455933980401393e-05,
      "loss": 1.1867,
      "step": 1577
    },
    {
      "epoch": 0.769193273214721,
      "grad_norm": 0.42554041743278503,
      "learning_rate": 2.5353420776211447e-05,
      "loss": 1.0575,
      "step": 1578
    },
    {
      "epoch": 0.7696807214233488,
      "grad_norm": 0.41002970933914185,
      "learning_rate": 2.525108442955364e-05,
      "loss": 1.0154,
      "step": 1579
    },
    {
      "epoch": 0.7701681696319767,
      "grad_norm": 0.4180346429347992,
      "learning_rate": 2.514892518288988e-05,
      "loss": 1.0725,
      "step": 1580
    },
    {
      "epoch": 0.7706556178406044,
      "grad_norm": 0.4667299687862396,
      "learning_rate": 2.504694327826258e-05,
      "loss": 0.9928,
      "step": 1581
    },
    {
      "epoch": 0.7711430660492322,
      "grad_norm": 0.34292903542518616,
      "learning_rate": 2.4945138957293835e-05,
      "loss": 0.9856,
      "step": 1582
    },
    {
      "epoch": 0.7716305142578601,
      "grad_norm": 0.39459550380706787,
      "learning_rate": 2.484351246118507e-05,
      "loss": 1.2468,
      "step": 1583
    },
    {
      "epoch": 0.7721179624664879,
      "grad_norm": 0.3772342801094055,
      "learning_rate": 2.474206403071644e-05,
      "loss": 1.1554,
      "step": 1584
    },
    {
      "epoch": 0.7726054106751158,
      "grad_norm": 0.43720147013664246,
      "learning_rate": 2.464079390624615e-05,
      "loss": 1.1343,
      "step": 1585
    },
    {
      "epoch": 0.7730928588837436,
      "grad_norm": 0.510027289390564,
      "learning_rate": 2.4539702327709936e-05,
      "loss": 1.2158,
      "step": 1586
    },
    {
      "epoch": 0.7735803070923715,
      "grad_norm": 0.41168540716171265,
      "learning_rate": 2.4438789534620522e-05,
      "loss": 1.1311,
      "step": 1587
    },
    {
      "epoch": 0.7740677553009992,
      "grad_norm": 0.3641628921031952,
      "learning_rate": 2.4338055766067135e-05,
      "loss": 1.0413,
      "step": 1588
    },
    {
      "epoch": 0.7745552035096271,
      "grad_norm": 0.33775594830513,
      "learning_rate": 2.4237501260714734e-05,
      "loss": 1.039,
      "step": 1589
    },
    {
      "epoch": 0.7750426517182549,
      "grad_norm": 0.3569764196872711,
      "learning_rate": 2.413712625680358e-05,
      "loss": 1.159,
      "step": 1590
    },
    {
      "epoch": 0.7755300999268828,
      "grad_norm": 0.46986496448516846,
      "learning_rate": 2.403693099214871e-05,
      "loss": 1.2067,
      "step": 1591
    },
    {
      "epoch": 0.7760175481355106,
      "grad_norm": 0.39282193779945374,
      "learning_rate": 2.393691570413924e-05,
      "loss": 1.1314,
      "step": 1592
    },
    {
      "epoch": 0.7765049963441384,
      "grad_norm": 0.36249709129333496,
      "learning_rate": 2.3837080629737884e-05,
      "loss": 1.0121,
      "step": 1593
    },
    {
      "epoch": 0.7769924445527663,
      "grad_norm": 0.4231277406215668,
      "learning_rate": 2.3737426005480414e-05,
      "loss": 1.0772,
      "step": 1594
    },
    {
      "epoch": 0.7774798927613941,
      "grad_norm": 0.38033294677734375,
      "learning_rate": 2.3637952067475043e-05,
      "loss": 1.1995,
      "step": 1595
    },
    {
      "epoch": 0.777967340970022,
      "grad_norm": 0.33983561396598816,
      "learning_rate": 2.353865905140187e-05,
      "loss": 1.0918,
      "step": 1596
    },
    {
      "epoch": 0.7784547891786497,
      "grad_norm": 0.448473185300827,
      "learning_rate": 2.343954719251241e-05,
      "loss": 1.0779,
      "step": 1597
    },
    {
      "epoch": 0.7789422373872776,
      "grad_norm": 0.4694695472717285,
      "learning_rate": 2.3340616725628926e-05,
      "loss": 1.0535,
      "step": 1598
    },
    {
      "epoch": 0.7794296855959054,
      "grad_norm": 0.39060917496681213,
      "learning_rate": 2.32418678851439e-05,
      "loss": 1.1766,
      "step": 1599
    },
    {
      "epoch": 0.7799171338045333,
      "grad_norm": 0.3499775528907776,
      "learning_rate": 2.3143300905019484e-05,
      "loss": 1.0203,
      "step": 1600
    },
    {
      "epoch": 0.7804045820131611,
      "grad_norm": 0.3471561074256897,
      "learning_rate": 2.3044916018787032e-05,
      "loss": 1.0232,
      "step": 1601
    },
    {
      "epoch": 0.7808920302217889,
      "grad_norm": 0.4390745759010315,
      "learning_rate": 2.29467134595464e-05,
      "loss": 1.1864,
      "step": 1602
    },
    {
      "epoch": 0.7813794784304168,
      "grad_norm": 0.41209185123443604,
      "learning_rate": 2.2848693459965475e-05,
      "loss": 1.0051,
      "step": 1603
    },
    {
      "epoch": 0.7818669266390446,
      "grad_norm": 0.38516929745674133,
      "learning_rate": 2.2750856252279608e-05,
      "loss": 1.0338,
      "step": 1604
    },
    {
      "epoch": 0.7823543748476725,
      "grad_norm": 0.42860835790634155,
      "learning_rate": 2.26532020682911e-05,
      "loss": 1.1316,
      "step": 1605
    },
    {
      "epoch": 0.7828418230563002,
      "grad_norm": 0.330546110868454,
      "learning_rate": 2.255573113936855e-05,
      "loss": 1.013,
      "step": 1606
    },
    {
      "epoch": 0.7833292712649281,
      "grad_norm": 0.39379894733428955,
      "learning_rate": 2.2458443696446484e-05,
      "loss": 1.1574,
      "step": 1607
    },
    {
      "epoch": 0.7838167194735559,
      "grad_norm": 0.4301210939884186,
      "learning_rate": 2.236133997002462e-05,
      "loss": 1.1746,
      "step": 1608
    },
    {
      "epoch": 0.7843041676821838,
      "grad_norm": 0.36347338557243347,
      "learning_rate": 2.226442019016739e-05,
      "loss": 0.9925,
      "step": 1609
    },
    {
      "epoch": 0.7847916158908116,
      "grad_norm": 0.33933982253074646,
      "learning_rate": 2.21676845865035e-05,
      "loss": 0.9947,
      "step": 1610
    },
    {
      "epoch": 0.7852790640994395,
      "grad_norm": 0.40830859541893005,
      "learning_rate": 2.207113338822524e-05,
      "loss": 1.1907,
      "step": 1611
    },
    {
      "epoch": 0.7857665123080673,
      "grad_norm": 0.4412747621536255,
      "learning_rate": 2.197476682408792e-05,
      "loss": 1.0358,
      "step": 1612
    },
    {
      "epoch": 0.786253960516695,
      "grad_norm": 0.36393675208091736,
      "learning_rate": 2.18785851224095e-05,
      "loss": 1.13,
      "step": 1613
    },
    {
      "epoch": 0.7867414087253229,
      "grad_norm": 0.409282922744751,
      "learning_rate": 2.1782588511069957e-05,
      "loss": 1.061,
      "step": 1614
    },
    {
      "epoch": 0.7872288569339507,
      "grad_norm": 0.3050946891307831,
      "learning_rate": 2.168677721751069e-05,
      "loss": 1.1321,
      "step": 1615
    },
    {
      "epoch": 0.7877163051425786,
      "grad_norm": 0.40699470043182373,
      "learning_rate": 2.1591151468734027e-05,
      "loss": 1.0106,
      "step": 1616
    },
    {
      "epoch": 0.7882037533512064,
      "grad_norm": 0.3460084795951843,
      "learning_rate": 2.149571149130276e-05,
      "loss": 1.1958,
      "step": 1617
    },
    {
      "epoch": 0.7886912015598343,
      "grad_norm": 0.4064023494720459,
      "learning_rate": 2.1400457511339467e-05,
      "loss": 0.925,
      "step": 1618
    },
    {
      "epoch": 0.7891786497684621,
      "grad_norm": 0.327619731426239,
      "learning_rate": 2.1305389754526074e-05,
      "loss": 1.0711,
      "step": 1619
    },
    {
      "epoch": 0.78966609797709,
      "grad_norm": 0.4511876106262207,
      "learning_rate": 2.1210508446103293e-05,
      "loss": 0.9372,
      "step": 1620
    },
    {
      "epoch": 0.7901535461857178,
      "grad_norm": 0.44113391637802124,
      "learning_rate": 2.111581381087011e-05,
      "loss": 1.1551,
      "step": 1621
    },
    {
      "epoch": 0.7906409943943457,
      "grad_norm": 0.4107889235019684,
      "learning_rate": 2.1021306073183167e-05,
      "loss": 1.0985,
      "step": 1622
    },
    {
      "epoch": 0.7911284426029734,
      "grad_norm": 0.3807615339756012,
      "learning_rate": 2.0926985456956417e-05,
      "loss": 1.0919,
      "step": 1623
    },
    {
      "epoch": 0.7916158908116012,
      "grad_norm": 0.3942524194717407,
      "learning_rate": 2.0832852185660356e-05,
      "loss": 1.299,
      "step": 1624
    },
    {
      "epoch": 0.7921033390202291,
      "grad_norm": 0.34323641657829285,
      "learning_rate": 2.073890648232164e-05,
      "loss": 0.9839,
      "step": 1625
    },
    {
      "epoch": 0.7925907872288569,
      "grad_norm": 0.32075735926628113,
      "learning_rate": 2.06451485695226e-05,
      "loss": 1.0878,
      "step": 1626
    },
    {
      "epoch": 0.7930782354374848,
      "grad_norm": 0.38971444964408875,
      "learning_rate": 2.055157866940054e-05,
      "loss": 0.9919,
      "step": 1627
    },
    {
      "epoch": 0.7935656836461126,
      "grad_norm": 0.3504349887371063,
      "learning_rate": 2.0458197003647373e-05,
      "loss": 1.1747,
      "step": 1628
    },
    {
      "epoch": 0.7940531318547405,
      "grad_norm": 0.35090172290802,
      "learning_rate": 2.0365003793509007e-05,
      "loss": 1.1011,
      "step": 1629
    },
    {
      "epoch": 0.7945405800633683,
      "grad_norm": 0.34960517287254333,
      "learning_rate": 2.0271999259784858e-05,
      "loss": 0.9365,
      "step": 1630
    },
    {
      "epoch": 0.7950280282719961,
      "grad_norm": 0.31795409321784973,
      "learning_rate": 2.0179183622827312e-05,
      "loss": 1.0213,
      "step": 1631
    },
    {
      "epoch": 0.7955154764806239,
      "grad_norm": 0.3856673836708069,
      "learning_rate": 2.0086557102541203e-05,
      "loss": 1.0721,
      "step": 1632
    },
    {
      "epoch": 0.7960029246892517,
      "grad_norm": 0.40313300490379333,
      "learning_rate": 1.9994119918383358e-05,
      "loss": 1.1315,
      "step": 1633
    },
    {
      "epoch": 0.7964903728978796,
      "grad_norm": 0.37088483572006226,
      "learning_rate": 1.9901872289361935e-05,
      "loss": 1.1286,
      "step": 1634
    },
    {
      "epoch": 0.7969778211065074,
      "grad_norm": 0.3714759945869446,
      "learning_rate": 1.9809814434036e-05,
      "loss": 1.1514,
      "step": 1635
    },
    {
      "epoch": 0.7974652693151353,
      "grad_norm": 0.40931692719459534,
      "learning_rate": 1.9717946570515066e-05,
      "loss": 1.1017,
      "step": 1636
    },
    {
      "epoch": 0.7979527175237631,
      "grad_norm": 0.3661656677722931,
      "learning_rate": 1.962626891645841e-05,
      "loss": 1.0751,
      "step": 1637
    },
    {
      "epoch": 0.798440165732391,
      "grad_norm": 0.38110336661338806,
      "learning_rate": 1.9534781689074722e-05,
      "loss": 1.1106,
      "step": 1638
    },
    {
      "epoch": 0.7989276139410187,
      "grad_norm": 0.3157220780849457,
      "learning_rate": 1.944348510512147e-05,
      "loss": 1.0537,
      "step": 1639
    },
    {
      "epoch": 0.7994150621496466,
      "grad_norm": 0.40534940361976624,
      "learning_rate": 1.9352379380904473e-05,
      "loss": 1.0117,
      "step": 1640
    },
    {
      "epoch": 0.7999025103582744,
      "grad_norm": 0.3753153681755066,
      "learning_rate": 1.9261464732277334e-05,
      "loss": 1.1414,
      "step": 1641
    },
    {
      "epoch": 0.8003899585669023,
      "grad_norm": 0.4216148257255554,
      "learning_rate": 1.9170741374640933e-05,
      "loss": 1.1454,
      "step": 1642
    },
    {
      "epoch": 0.8008774067755301,
      "grad_norm": 0.4276210367679596,
      "learning_rate": 1.9080209522942992e-05,
      "loss": 1.065,
      "step": 1643
    },
    {
      "epoch": 0.8013648549841579,
      "grad_norm": 0.4728935956954956,
      "learning_rate": 1.8989869391677462e-05,
      "loss": 1.1932,
      "step": 1644
    },
    {
      "epoch": 0.8018523031927858,
      "grad_norm": 0.3567184507846832,
      "learning_rate": 1.8899721194884035e-05,
      "loss": 1.0522,
      "step": 1645
    },
    {
      "epoch": 0.8023397514014136,
      "grad_norm": 0.3507539629936218,
      "learning_rate": 1.8809765146147697e-05,
      "loss": 1.0161,
      "step": 1646
    },
    {
      "epoch": 0.8028271996100415,
      "grad_norm": 0.3791554868221283,
      "learning_rate": 1.872000145859816e-05,
      "loss": 1.0541,
      "step": 1647
    },
    {
      "epoch": 0.8033146478186692,
      "grad_norm": 0.38745689392089844,
      "learning_rate": 1.863043034490938e-05,
      "loss": 1.1743,
      "step": 1648
    },
    {
      "epoch": 0.8038020960272971,
      "grad_norm": 0.3388438820838928,
      "learning_rate": 1.854105201729912e-05,
      "loss": 1.0733,
      "step": 1649
    },
    {
      "epoch": 0.8042895442359249,
      "grad_norm": 0.38149645924568176,
      "learning_rate": 1.8451866687528284e-05,
      "loss": 1.185,
      "step": 1650
    },
    {
      "epoch": 0.8047769924445528,
      "grad_norm": 0.39097803831100464,
      "learning_rate": 1.8362874566900556e-05,
      "loss": 1.034,
      "step": 1651
    },
    {
      "epoch": 0.8052644406531806,
      "grad_norm": 0.4000914990901947,
      "learning_rate": 1.8274075866261896e-05,
      "loss": 1.143,
      "step": 1652
    },
    {
      "epoch": 0.8057518888618085,
      "grad_norm": 0.40363654494285583,
      "learning_rate": 1.818547079599995e-05,
      "loss": 1.0637,
      "step": 1653
    },
    {
      "epoch": 0.8062393370704363,
      "grad_norm": 0.352938175201416,
      "learning_rate": 1.8097059566043595e-05,
      "loss": 1.0713,
      "step": 1654
    },
    {
      "epoch": 0.806726785279064,
      "grad_norm": 0.37018442153930664,
      "learning_rate": 1.800884238586248e-05,
      "loss": 1.0661,
      "step": 1655
    },
    {
      "epoch": 0.807214233487692,
      "grad_norm": 0.4333856701850891,
      "learning_rate": 1.7920819464466486e-05,
      "loss": 1.1136,
      "step": 1656
    },
    {
      "epoch": 0.8077016816963197,
      "grad_norm": 0.4050823748111725,
      "learning_rate": 1.7832991010405243e-05,
      "loss": 1.0066,
      "step": 1657
    },
    {
      "epoch": 0.8081891299049476,
      "grad_norm": 0.3594289720058441,
      "learning_rate": 1.7745357231767622e-05,
      "loss": 1.0689,
      "step": 1658
    },
    {
      "epoch": 0.8086765781135754,
      "grad_norm": 0.2837337255477905,
      "learning_rate": 1.7657918336181324e-05,
      "loss": 1.0588,
      "step": 1659
    },
    {
      "epoch": 0.8091640263222033,
      "grad_norm": 0.41181838512420654,
      "learning_rate": 1.7570674530812224e-05,
      "loss": 1.1877,
      "step": 1660
    },
    {
      "epoch": 0.8096514745308311,
      "grad_norm": 0.3461904525756836,
      "learning_rate": 1.748362602236403e-05,
      "loss": 1.1245,
      "step": 1661
    },
    {
      "epoch": 0.810138922739459,
      "grad_norm": 0.486634761095047,
      "learning_rate": 1.7396773017077748e-05,
      "loss": 1.1609,
      "step": 1662
    },
    {
      "epoch": 0.8106263709480868,
      "grad_norm": 0.33511972427368164,
      "learning_rate": 1.731011572073117e-05,
      "loss": 1.0115,
      "step": 1663
    },
    {
      "epoch": 0.8111138191567145,
      "grad_norm": 0.5276228785514832,
      "learning_rate": 1.7223654338638385e-05,
      "loss": 1.0923,
      "step": 1664
    },
    {
      "epoch": 0.8116012673653424,
      "grad_norm": 0.4225917160511017,
      "learning_rate": 1.7137389075649335e-05,
      "loss": 1.1583,
      "step": 1665
    },
    {
      "epoch": 0.8120887155739702,
      "grad_norm": 0.3635517954826355,
      "learning_rate": 1.7051320136149286e-05,
      "loss": 1.0727,
      "step": 1666
    },
    {
      "epoch": 0.8125761637825981,
      "grad_norm": 0.3625679314136505,
      "learning_rate": 1.696544772405836e-05,
      "loss": 0.9549,
      "step": 1667
    },
    {
      "epoch": 0.8130636119912259,
      "grad_norm": 0.3724707365036011,
      "learning_rate": 1.6879772042831065e-05,
      "loss": 1.0298,
      "step": 1668
    },
    {
      "epoch": 0.8135510601998538,
      "grad_norm": 0.4418652057647705,
      "learning_rate": 1.679429329545582e-05,
      "loss": 1.1513,
      "step": 1669
    },
    {
      "epoch": 0.8140385084084816,
      "grad_norm": 0.4359528422355652,
      "learning_rate": 1.6709011684454435e-05,
      "loss": 1.0941,
      "step": 1670
    },
    {
      "epoch": 0.8145259566171095,
      "grad_norm": 0.3722599148750305,
      "learning_rate": 1.662392741188161e-05,
      "loss": 1.1031,
      "step": 1671
    },
    {
      "epoch": 0.8150134048257373,
      "grad_norm": 0.37397316098213196,
      "learning_rate": 1.6539040679324623e-05,
      "loss": 1.1302,
      "step": 1672
    },
    {
      "epoch": 0.8155008530343651,
      "grad_norm": 0.3645596206188202,
      "learning_rate": 1.6454351687902557e-05,
      "loss": 1.0137,
      "step": 1673
    },
    {
      "epoch": 0.8159883012429929,
      "grad_norm": 0.38736292719841003,
      "learning_rate": 1.63698606382661e-05,
      "loss": 1.0888,
      "step": 1674
    },
    {
      "epoch": 0.8164757494516207,
      "grad_norm": 0.3472338318824768,
      "learning_rate": 1.6285567730596974e-05,
      "loss": 0.9184,
      "step": 1675
    },
    {
      "epoch": 0.8169631976602486,
      "grad_norm": 0.36622050404548645,
      "learning_rate": 1.6201473164607396e-05,
      "loss": 0.8993,
      "step": 1676
    },
    {
      "epoch": 0.8174506458688764,
      "grad_norm": 0.3877585232257843,
      "learning_rate": 1.6117577139539676e-05,
      "loss": 1.1695,
      "step": 1677
    },
    {
      "epoch": 0.8179380940775043,
      "grad_norm": 0.38888975977897644,
      "learning_rate": 1.6033879854165766e-05,
      "loss": 1.1307,
      "step": 1678
    },
    {
      "epoch": 0.8184255422861321,
      "grad_norm": 0.415061354637146,
      "learning_rate": 1.5950381506786714e-05,
      "loss": 1.0329,
      "step": 1679
    },
    {
      "epoch": 0.81891299049476,
      "grad_norm": 0.34671249985694885,
      "learning_rate": 1.5867082295232216e-05,
      "loss": 1.0966,
      "step": 1680
    },
    {
      "epoch": 0.8194004387033877,
      "grad_norm": 0.45664656162261963,
      "learning_rate": 1.5783982416860198e-05,
      "loss": 1.0137,
      "step": 1681
    },
    {
      "epoch": 0.8198878869120156,
      "grad_norm": 0.4034433960914612,
      "learning_rate": 1.5701082068556304e-05,
      "loss": 1.1861,
      "step": 1682
    },
    {
      "epoch": 0.8203753351206434,
      "grad_norm": 0.3839215636253357,
      "learning_rate": 1.561838144673341e-05,
      "loss": 1.2001,
      "step": 1683
    },
    {
      "epoch": 0.8208627833292713,
      "grad_norm": 0.330584317445755,
      "learning_rate": 1.553588074733121e-05,
      "loss": 1.1973,
      "step": 1684
    },
    {
      "epoch": 0.8213502315378991,
      "grad_norm": 0.3817874491214752,
      "learning_rate": 1.5453580165815762e-05,
      "loss": 0.9829,
      "step": 1685
    },
    {
      "epoch": 0.8218376797465269,
      "grad_norm": 0.3511894941329956,
      "learning_rate": 1.537147989717893e-05,
      "loss": 1.1028,
      "step": 1686
    },
    {
      "epoch": 0.8223251279551548,
      "grad_norm": 0.38406670093536377,
      "learning_rate": 1.528958013593801e-05,
      "loss": 1.0144,
      "step": 1687
    },
    {
      "epoch": 0.8228125761637826,
      "grad_norm": 0.4310559630393982,
      "learning_rate": 1.5207881076135267e-05,
      "loss": 1.0646,
      "step": 1688
    },
    {
      "epoch": 0.8233000243724105,
      "grad_norm": 0.3396536111831665,
      "learning_rate": 1.5126382911337422e-05,
      "loss": 1.1363,
      "step": 1689
    },
    {
      "epoch": 0.8237874725810382,
      "grad_norm": 0.3851539194583893,
      "learning_rate": 1.5045085834635231e-05,
      "loss": 1.0915,
      "step": 1690
    },
    {
      "epoch": 0.8242749207896661,
      "grad_norm": 0.4351629614830017,
      "learning_rate": 1.4963990038643005e-05,
      "loss": 1.0605,
      "step": 1691
    },
    {
      "epoch": 0.8247623689982939,
      "grad_norm": 0.3781146705150604,
      "learning_rate": 1.4883095715498185e-05,
      "loss": 1.1243,
      "step": 1692
    },
    {
      "epoch": 0.8252498172069218,
      "grad_norm": 0.41910192370414734,
      "learning_rate": 1.480240305686087e-05,
      "loss": 1.1733,
      "step": 1693
    },
    {
      "epoch": 0.8257372654155496,
      "grad_norm": 0.4027787148952484,
      "learning_rate": 1.4721912253913328e-05,
      "loss": 1.0486,
      "step": 1694
    },
    {
      "epoch": 0.8262247136241774,
      "grad_norm": 0.4166243374347687,
      "learning_rate": 1.4641623497359658e-05,
      "loss": 1.1038,
      "step": 1695
    },
    {
      "epoch": 0.8267121618328053,
      "grad_norm": 0.3793354630470276,
      "learning_rate": 1.4561536977425172e-05,
      "loss": 1.1123,
      "step": 1696
    },
    {
      "epoch": 0.827199610041433,
      "grad_norm": 0.36616671085357666,
      "learning_rate": 1.4481652883856056e-05,
      "loss": 1.2744,
      "step": 1697
    },
    {
      "epoch": 0.827687058250061,
      "grad_norm": 0.4479994475841522,
      "learning_rate": 1.4401971405918957e-05,
      "loss": 1.2132,
      "step": 1698
    },
    {
      "epoch": 0.8281745064586887,
      "grad_norm": 0.35453662276268005,
      "learning_rate": 1.432249273240035e-05,
      "loss": 1.109,
      "step": 1699
    },
    {
      "epoch": 0.8286619546673166,
      "grad_norm": 0.34129640460014343,
      "learning_rate": 1.4243217051606283e-05,
      "loss": 1.0581,
      "step": 1700
    },
    {
      "epoch": 0.8291494028759444,
      "grad_norm": 0.4323829710483551,
      "learning_rate": 1.4164144551361902e-05,
      "loss": 1.0317,
      "step": 1701
    },
    {
      "epoch": 0.8296368510845723,
      "grad_norm": 0.43410375714302063,
      "learning_rate": 1.40852754190109e-05,
      "loss": 1.0699,
      "step": 1702
    },
    {
      "epoch": 0.8301242992932001,
      "grad_norm": 0.42333585023880005,
      "learning_rate": 1.400660984141513e-05,
      "loss": 1.1351,
      "step": 1703
    },
    {
      "epoch": 0.830611747501828,
      "grad_norm": 0.4202088415622711,
      "learning_rate": 1.3928148004954244e-05,
      "loss": 1.1727,
      "step": 1704
    },
    {
      "epoch": 0.8310991957104558,
      "grad_norm": 0.3802170157432556,
      "learning_rate": 1.3849890095525108e-05,
      "loss": 1.0649,
      "step": 1705
    },
    {
      "epoch": 0.8315866439190835,
      "grad_norm": 0.40519291162490845,
      "learning_rate": 1.377183629854144e-05,
      "loss": 1.1673,
      "step": 1706
    },
    {
      "epoch": 0.8320740921277114,
      "grad_norm": 0.3862631618976593,
      "learning_rate": 1.369398679893339e-05,
      "loss": 1.1161,
      "step": 1707
    },
    {
      "epoch": 0.8325615403363392,
      "grad_norm": 0.3797990083694458,
      "learning_rate": 1.3616341781147046e-05,
      "loss": 1.1403,
      "step": 1708
    },
    {
      "epoch": 0.8330489885449671,
      "grad_norm": 0.3590017259120941,
      "learning_rate": 1.3538901429144057e-05,
      "loss": 1.2033,
      "step": 1709
    },
    {
      "epoch": 0.8335364367535949,
      "grad_norm": 0.39021775126457214,
      "learning_rate": 1.3461665926401091e-05,
      "loss": 1.0198,
      "step": 1710
    },
    {
      "epoch": 0.8340238849622228,
      "grad_norm": 0.4277026951313019,
      "learning_rate": 1.3384635455909588e-05,
      "loss": 1.1402,
      "step": 1711
    },
    {
      "epoch": 0.8345113331708506,
      "grad_norm": 0.3869093358516693,
      "learning_rate": 1.330781020017513e-05,
      "loss": 1.0954,
      "step": 1712
    },
    {
      "epoch": 0.8349987813794785,
      "grad_norm": 0.5378305315971375,
      "learning_rate": 1.3231190341217081e-05,
      "loss": 1.1328,
      "step": 1713
    },
    {
      "epoch": 0.8354862295881063,
      "grad_norm": 0.4125831127166748,
      "learning_rate": 1.3154776060568252e-05,
      "loss": 1.0873,
      "step": 1714
    },
    {
      "epoch": 0.8359736777967341,
      "grad_norm": 0.36874914169311523,
      "learning_rate": 1.3078567539274288e-05,
      "loss": 1.0784,
      "step": 1715
    },
    {
      "epoch": 0.8364611260053619,
      "grad_norm": 0.37857338786125183,
      "learning_rate": 1.3002564957893393e-05,
      "loss": 1.1726,
      "step": 1716
    },
    {
      "epoch": 0.8369485742139897,
      "grad_norm": 0.347371369600296,
      "learning_rate": 1.2926768496495811e-05,
      "loss": 1.1752,
      "step": 1717
    },
    {
      "epoch": 0.8374360224226176,
      "grad_norm": 0.3918173909187317,
      "learning_rate": 1.2851178334663471e-05,
      "loss": 1.1114,
      "step": 1718
    },
    {
      "epoch": 0.8379234706312454,
      "grad_norm": 0.3389439880847931,
      "learning_rate": 1.277579465148946e-05,
      "loss": 1.0704,
      "step": 1719
    },
    {
      "epoch": 0.8384109188398733,
      "grad_norm": 0.42370593547821045,
      "learning_rate": 1.270061762557776e-05,
      "loss": 1.1535,
      "step": 1720
    },
    {
      "epoch": 0.8388983670485011,
      "grad_norm": 0.3641960620880127,
      "learning_rate": 1.2625647435042654e-05,
      "loss": 1.1482,
      "step": 1721
    },
    {
      "epoch": 0.839385815257129,
      "grad_norm": 0.3834351599216461,
      "learning_rate": 1.2550884257508389e-05,
      "loss": 1.031,
      "step": 1722
    },
    {
      "epoch": 0.8398732634657567,
      "grad_norm": 0.3766709566116333,
      "learning_rate": 1.2476328270108716e-05,
      "loss": 1.0101,
      "step": 1723
    },
    {
      "epoch": 0.8403607116743846,
      "grad_norm": 0.3967483341693878,
      "learning_rate": 1.2401979649486595e-05,
      "loss": 1.1647,
      "step": 1724
    },
    {
      "epoch": 0.8408481598830124,
      "grad_norm": 0.3371667265892029,
      "learning_rate": 1.2327838571793604e-05,
      "loss": 1.0442,
      "step": 1725
    },
    {
      "epoch": 0.8413356080916402,
      "grad_norm": 0.5255165100097656,
      "learning_rate": 1.2253905212689553e-05,
      "loss": 1.161,
      "step": 1726
    },
    {
      "epoch": 0.8418230563002681,
      "grad_norm": 0.3469902575016022,
      "learning_rate": 1.2180179747342213e-05,
      "loss": 1.0652,
      "step": 1727
    },
    {
      "epoch": 0.8423105045088959,
      "grad_norm": 0.40540820360183716,
      "learning_rate": 1.2106662350426746e-05,
      "loss": 1.1305,
      "step": 1728
    },
    {
      "epoch": 0.8427979527175238,
      "grad_norm": 0.3705641031265259,
      "learning_rate": 1.203335319612533e-05,
      "loss": 1.0239,
      "step": 1729
    },
    {
      "epoch": 0.8432854009261516,
      "grad_norm": 0.3371503949165344,
      "learning_rate": 1.196025245812682e-05,
      "loss": 1.0642,
      "step": 1730
    },
    {
      "epoch": 0.8437728491347795,
      "grad_norm": 0.42029133439064026,
      "learning_rate": 1.1887360309626227e-05,
      "loss": 1.0277,
      "step": 1731
    },
    {
      "epoch": 0.8442602973434072,
      "grad_norm": 0.42342185974121094,
      "learning_rate": 1.1814676923324364e-05,
      "loss": 1.1448,
      "step": 1732
    },
    {
      "epoch": 0.8447477455520351,
      "grad_norm": 0.39126384258270264,
      "learning_rate": 1.1742202471427443e-05,
      "loss": 1.0664,
      "step": 1733
    },
    {
      "epoch": 0.8452351937606629,
      "grad_norm": 0.40211716294288635,
      "learning_rate": 1.1669937125646646e-05,
      "loss": 1.0426,
      "step": 1734
    },
    {
      "epoch": 0.8457226419692908,
      "grad_norm": 0.3491050899028778,
      "learning_rate": 1.1597881057197735e-05,
      "loss": 1.1108,
      "step": 1735
    },
    {
      "epoch": 0.8462100901779186,
      "grad_norm": 0.4353748559951782,
      "learning_rate": 1.1526034436800614e-05,
      "loss": 1.0399,
      "step": 1736
    },
    {
      "epoch": 0.8466975383865464,
      "grad_norm": 0.4242027699947357,
      "learning_rate": 1.1454397434679021e-05,
      "loss": 0.9967,
      "step": 1737
    },
    {
      "epoch": 0.8471849865951743,
      "grad_norm": 0.390888512134552,
      "learning_rate": 1.1382970220559963e-05,
      "loss": 1.1165,
      "step": 1738
    },
    {
      "epoch": 0.847672434803802,
      "grad_norm": 0.36227092146873474,
      "learning_rate": 1.1311752963673439e-05,
      "loss": 1.0547,
      "step": 1739
    },
    {
      "epoch": 0.84815988301243,
      "grad_norm": 0.37730830907821655,
      "learning_rate": 1.1240745832752042e-05,
      "loss": 1.1506,
      "step": 1740
    },
    {
      "epoch": 0.8486473312210577,
      "grad_norm": 0.38817811012268066,
      "learning_rate": 1.1169948996030467e-05,
      "loss": 1.282,
      "step": 1741
    },
    {
      "epoch": 0.8491347794296856,
      "grad_norm": 0.45126134157180786,
      "learning_rate": 1.109936262124518e-05,
      "loss": 1.2197,
      "step": 1742
    },
    {
      "epoch": 0.8496222276383134,
      "grad_norm": 0.3940946161746979,
      "learning_rate": 1.1028986875634028e-05,
      "loss": 0.9121,
      "step": 1743
    },
    {
      "epoch": 0.8501096758469413,
      "grad_norm": 0.4234504699707031,
      "learning_rate": 1.095882192593579e-05,
      "loss": 1.1229,
      "step": 1744
    },
    {
      "epoch": 0.8505971240555691,
      "grad_norm": 0.3620837330818176,
      "learning_rate": 1.0888867938389813e-05,
      "loss": 1.0812,
      "step": 1745
    },
    {
      "epoch": 0.851084572264197,
      "grad_norm": 0.3606151342391968,
      "learning_rate": 1.0819125078735681e-05,
      "loss": 1.1089,
      "step": 1746
    },
    {
      "epoch": 0.8515720204728248,
      "grad_norm": 0.4045717716217041,
      "learning_rate": 1.0749593512212697e-05,
      "loss": 1.0729,
      "step": 1747
    },
    {
      "epoch": 0.8520594686814525,
      "grad_norm": 0.524031937122345,
      "learning_rate": 1.068027340355956e-05,
      "loss": 1.1044,
      "step": 1748
    },
    {
      "epoch": 0.8525469168900804,
      "grad_norm": 0.3646783232688904,
      "learning_rate": 1.0611164917013972e-05,
      "loss": 1.0587,
      "step": 1749
    },
    {
      "epoch": 0.8530343650987082,
      "grad_norm": 0.3999814987182617,
      "learning_rate": 1.054226821631229e-05,
      "loss": 1.1673,
      "step": 1750
    },
    {
      "epoch": 0.8535218133073361,
      "grad_norm": 0.37726038694381714,
      "learning_rate": 1.0473583464689074e-05,
      "loss": 1.158,
      "step": 1751
    },
    {
      "epoch": 0.8540092615159639,
      "grad_norm": 0.3894549310207367,
      "learning_rate": 1.0405110824876619e-05,
      "loss": 1.1205,
      "step": 1752
    },
    {
      "epoch": 0.8544967097245918,
      "grad_norm": 0.4052984416484833,
      "learning_rate": 1.0336850459104852e-05,
      "loss": 1.0923,
      "step": 1753
    },
    {
      "epoch": 0.8549841579332196,
      "grad_norm": 0.36014947295188904,
      "learning_rate": 1.0268802529100619e-05,
      "loss": 1.0199,
      "step": 1754
    },
    {
      "epoch": 0.8554716061418475,
      "grad_norm": 0.38177689909935,
      "learning_rate": 1.0200967196087508e-05,
      "loss": 1.1592,
      "step": 1755
    },
    {
      "epoch": 0.8559590543504753,
      "grad_norm": 0.39407598972320557,
      "learning_rate": 1.0133344620785435e-05,
      "loss": 1.0895,
      "step": 1756
    },
    {
      "epoch": 0.856446502559103,
      "grad_norm": 0.37637194991111755,
      "learning_rate": 1.0065934963410173e-05,
      "loss": 1.1463,
      "step": 1757
    },
    {
      "epoch": 0.8569339507677309,
      "grad_norm": 0.4099627733230591,
      "learning_rate": 9.99873838367309e-06,
      "loss": 1.2048,
      "step": 1758
    },
    {
      "epoch": 0.8574213989763587,
      "grad_norm": 0.3457520306110382,
      "learning_rate": 9.931755040780676e-06,
      "loss": 1.0743,
      "step": 1759
    },
    {
      "epoch": 0.8579088471849866,
      "grad_norm": 0.5198414921760559,
      "learning_rate": 9.864985093434243e-06,
      "loss": 1.202,
      "step": 1760
    },
    {
      "epoch": 0.8583962953936144,
      "grad_norm": 0.35789576172828674,
      "learning_rate": 9.798428699829476e-06,
      "loss": 0.9899,
      "step": 1761
    },
    {
      "epoch": 0.8588837436022423,
      "grad_norm": 0.36002370715141296,
      "learning_rate": 9.732086017656117e-06,
      "loss": 1.0878,
      "step": 1762
    },
    {
      "epoch": 0.8593711918108701,
      "grad_norm": 0.3624090254306793,
      "learning_rate": 9.665957204097575e-06,
      "loss": 1.0194,
      "step": 1763
    },
    {
      "epoch": 0.859858640019498,
      "grad_norm": 0.3344210982322693,
      "learning_rate": 9.600042415830535e-06,
      "loss": 1.0616,
      "step": 1764
    },
    {
      "epoch": 0.8603460882281257,
      "grad_norm": 0.3071996867656708,
      "learning_rate": 9.534341809024583e-06,
      "loss": 1.0228,
      "step": 1765
    },
    {
      "epoch": 0.8608335364367536,
      "grad_norm": 0.34209540486335754,
      "learning_rate": 9.468855539341904e-06,
      "loss": 1.0505,
      "step": 1766
    },
    {
      "epoch": 0.8613209846453814,
      "grad_norm": 0.4070102572441101,
      "learning_rate": 9.403583761936806e-06,
      "loss": 1.0844,
      "step": 1767
    },
    {
      "epoch": 0.8618084328540092,
      "grad_norm": 0.3737240731716156,
      "learning_rate": 9.338526631455447e-06,
      "loss": 1.006,
      "step": 1768
    },
    {
      "epoch": 0.8622958810626371,
      "grad_norm": 0.3421536684036255,
      "learning_rate": 9.273684302035402e-06,
      "loss": 1.0558,
      "step": 1769
    },
    {
      "epoch": 0.8627833292712649,
      "grad_norm": 0.3916158676147461,
      "learning_rate": 9.209056927305337e-06,
      "loss": 1.0472,
      "step": 1770
    },
    {
      "epoch": 0.8632707774798928,
      "grad_norm": 0.3504614233970642,
      "learning_rate": 9.144644660384615e-06,
      "loss": 1.2017,
      "step": 1771
    },
    {
      "epoch": 0.8637582256885206,
      "grad_norm": 0.3385731279850006,
      "learning_rate": 9.080447653883007e-06,
      "loss": 1.1729,
      "step": 1772
    },
    {
      "epoch": 0.8642456738971485,
      "grad_norm": 0.40618082880973816,
      "learning_rate": 9.01646605990022e-06,
      "loss": 0.998,
      "step": 1773
    },
    {
      "epoch": 0.8647331221057762,
      "grad_norm": 0.3386068046092987,
      "learning_rate": 8.952700030025597e-06,
      "loss": 0.9828,
      "step": 1774
    },
    {
      "epoch": 0.8652205703144041,
      "grad_norm": 0.34959864616394043,
      "learning_rate": 8.889149715337774e-06,
      "loss": 0.9908,
      "step": 1775
    },
    {
      "epoch": 0.8657080185230319,
      "grad_norm": 0.3689625859260559,
      "learning_rate": 8.825815266404302e-06,
      "loss": 1.1452,
      "step": 1776
    },
    {
      "epoch": 0.8661954667316598,
      "grad_norm": 0.35879069566726685,
      "learning_rate": 8.76269683328127e-06,
      "loss": 1.0694,
      "step": 1777
    },
    {
      "epoch": 0.8666829149402876,
      "grad_norm": 0.41808176040649414,
      "learning_rate": 8.699794565512975e-06,
      "loss": 1.0624,
      "step": 1778
    },
    {
      "epoch": 0.8671703631489154,
      "grad_norm": 0.4924822449684143,
      "learning_rate": 8.637108612131572e-06,
      "loss": 1.1,
      "step": 1779
    },
    {
      "epoch": 0.8676578113575433,
      "grad_norm": 0.3990834057331085,
      "learning_rate": 8.57463912165668e-06,
      "loss": 1.0657,
      "step": 1780
    },
    {
      "epoch": 0.8681452595661711,
      "grad_norm": 0.4121594727039337,
      "learning_rate": 8.512386242095083e-06,
      "loss": 1.0388,
      "step": 1781
    },
    {
      "epoch": 0.868632707774799,
      "grad_norm": 0.33079513907432556,
      "learning_rate": 8.450350120940365e-06,
      "loss": 1.0413,
      "step": 1782
    },
    {
      "epoch": 0.8691201559834267,
      "grad_norm": 0.38366904854774475,
      "learning_rate": 8.388530905172553e-06,
      "loss": 1.1262,
      "step": 1783
    },
    {
      "epoch": 0.8696076041920546,
      "grad_norm": 0.40860646963119507,
      "learning_rate": 8.32692874125771e-06,
      "loss": 1.0713,
      "step": 1784
    },
    {
      "epoch": 0.8700950524006824,
      "grad_norm": 0.36312568187713623,
      "learning_rate": 8.265543775147767e-06,
      "loss": 1.1937,
      "step": 1785
    },
    {
      "epoch": 0.8705825006093103,
      "grad_norm": 0.36420392990112305,
      "learning_rate": 8.204376152279914e-06,
      "loss": 1.0061,
      "step": 1786
    },
    {
      "epoch": 0.8710699488179381,
      "grad_norm": 0.35609522461891174,
      "learning_rate": 8.14342601757645e-06,
      "loss": 1.1563,
      "step": 1787
    },
    {
      "epoch": 0.8715573970265659,
      "grad_norm": 0.3678031265735626,
      "learning_rate": 8.082693515444462e-06,
      "loss": 0.9554,
      "step": 1788
    },
    {
      "epoch": 0.8720448452351938,
      "grad_norm": 0.42413562536239624,
      "learning_rate": 8.022178789775315e-06,
      "loss": 1.0543,
      "step": 1789
    },
    {
      "epoch": 0.8725322934438215,
      "grad_norm": 0.3686645030975342,
      "learning_rate": 7.961881983944431e-06,
      "loss": 1.0988,
      "step": 1790
    },
    {
      "epoch": 0.8730197416524494,
      "grad_norm": 0.4381706714630127,
      "learning_rate": 7.9018032408109e-06,
      "loss": 1.0733,
      "step": 1791
    },
    {
      "epoch": 0.8735071898610772,
      "grad_norm": 0.4402759075164795,
      "learning_rate": 7.841942702717253e-06,
      "loss": 1.1313,
      "step": 1792
    },
    {
      "epoch": 0.8739946380697051,
      "grad_norm": 0.4035143256187439,
      "learning_rate": 7.782300511488928e-06,
      "loss": 0.9872,
      "step": 1793
    },
    {
      "epoch": 0.8744820862783329,
      "grad_norm": 0.3443876802921295,
      "learning_rate": 7.722876808434087e-06,
      "loss": 1.0903,
      "step": 1794
    },
    {
      "epoch": 0.8749695344869608,
      "grad_norm": 0.4076747000217438,
      "learning_rate": 7.663671734343247e-06,
      "loss": 1.1202,
      "step": 1795
    },
    {
      "epoch": 0.8754569826955886,
      "grad_norm": 0.39271125197410583,
      "learning_rate": 7.6046854294889e-06,
      "loss": 1.1263,
      "step": 1796
    },
    {
      "epoch": 0.8759444309042165,
      "grad_norm": 0.4301184117794037,
      "learning_rate": 7.545918033625254e-06,
      "loss": 1.2152,
      "step": 1797
    },
    {
      "epoch": 0.8764318791128443,
      "grad_norm": 0.36459746956825256,
      "learning_rate": 7.487369685987844e-06,
      "loss": 1.1336,
      "step": 1798
    },
    {
      "epoch": 0.876919327321472,
      "grad_norm": 0.4065280556678772,
      "learning_rate": 7.42904052529324e-06,
      "loss": 1.1319,
      "step": 1799
    },
    {
      "epoch": 0.8774067755300999,
      "grad_norm": 0.37590742111206055,
      "learning_rate": 7.370930689738642e-06,
      "loss": 1.0246,
      "step": 1800
    },
    {
      "epoch": 0.8778942237387277,
      "grad_norm": 0.40792787075042725,
      "learning_rate": 7.3130403170016955e-06,
      "loss": 1.0296,
      "step": 1801
    },
    {
      "epoch": 0.8783816719473556,
      "grad_norm": 0.35659080743789673,
      "learning_rate": 7.255369544240021e-06,
      "loss": 1.008,
      "step": 1802
    },
    {
      "epoch": 0.8788691201559834,
      "grad_norm": 0.36563870310783386,
      "learning_rate": 7.197918508090973e-06,
      "loss": 1.0822,
      "step": 1803
    },
    {
      "epoch": 0.8793565683646113,
      "grad_norm": 0.3770640194416046,
      "learning_rate": 7.140687344671282e-06,
      "loss": 1.1595,
      "step": 1804
    },
    {
      "epoch": 0.8798440165732391,
      "grad_norm": 0.45971986651420593,
      "learning_rate": 7.0836761895767265e-06,
      "loss": 1.1517,
      "step": 1805
    },
    {
      "epoch": 0.880331464781867,
      "grad_norm": 0.32596591114997864,
      "learning_rate": 7.02688517788187e-06,
      "loss": 1.0074,
      "step": 1806
    },
    {
      "epoch": 0.8808189129904948,
      "grad_norm": 0.3988722562789917,
      "learning_rate": 6.970314444139636e-06,
      "loss": 1.1779,
      "step": 1807
    },
    {
      "epoch": 0.8813063611991226,
      "grad_norm": 0.3868488073348999,
      "learning_rate": 6.913964122381134e-06,
      "loss": 1.0499,
      "step": 1808
    },
    {
      "epoch": 0.8817938094077504,
      "grad_norm": 0.3938734531402588,
      "learning_rate": 6.8578343461151885e-06,
      "loss": 0.9922,
      "step": 1809
    },
    {
      "epoch": 0.8822812576163782,
      "grad_norm": 0.412203311920166,
      "learning_rate": 6.8019252483281e-06,
      "loss": 1.0873,
      "step": 1810
    },
    {
      "epoch": 0.8827687058250061,
      "grad_norm": 0.4144209027290344,
      "learning_rate": 6.746236961483399e-06,
      "loss": 1.0318,
      "step": 1811
    },
    {
      "epoch": 0.8832561540336339,
      "grad_norm": 0.4311385452747345,
      "learning_rate": 6.690769617521342e-06,
      "loss": 1.2014,
      "step": 1812
    },
    {
      "epoch": 0.8837436022422618,
      "grad_norm": 0.4309900104999542,
      "learning_rate": 6.635523347858763e-06,
      "loss": 1.0218,
      "step": 1813
    },
    {
      "epoch": 0.8842310504508896,
      "grad_norm": 0.43849343061447144,
      "learning_rate": 6.580498283388758e-06,
      "loss": 1.1611,
      "step": 1814
    },
    {
      "epoch": 0.8847184986595175,
      "grad_norm": 0.37283089756965637,
      "learning_rate": 6.525694554480277e-06,
      "loss": 1.0965,
      "step": 1815
    },
    {
      "epoch": 0.8852059468681452,
      "grad_norm": 0.3518564999103546,
      "learning_rate": 6.471112290977877e-06,
      "loss": 1.1687,
      "step": 1816
    },
    {
      "epoch": 0.8856933950767731,
      "grad_norm": 0.3284810483455658,
      "learning_rate": 6.416751622201389e-06,
      "loss": 1.0962,
      "step": 1817
    },
    {
      "epoch": 0.8861808432854009,
      "grad_norm": 0.39844444394111633,
      "learning_rate": 6.362612676945678e-06,
      "loss": 0.9922,
      "step": 1818
    },
    {
      "epoch": 0.8866682914940287,
      "grad_norm": 0.3707285523414612,
      "learning_rate": 6.308695583480251e-06,
      "loss": 1.0288,
      "step": 1819
    },
    {
      "epoch": 0.8871557397026566,
      "grad_norm": 0.3834349513053894,
      "learning_rate": 6.2550004695489775e-06,
      "loss": 1.169,
      "step": 1820
    },
    {
      "epoch": 0.8876431879112844,
      "grad_norm": 0.37593066692352295,
      "learning_rate": 6.201527462369827e-06,
      "loss": 0.9882,
      "step": 1821
    },
    {
      "epoch": 0.8881306361199123,
      "grad_norm": 0.3793316185474396,
      "learning_rate": 6.1482766886345134e-06,
      "loss": 1.0577,
      "step": 1822
    },
    {
      "epoch": 0.8886180843285401,
      "grad_norm": 0.2997931241989136,
      "learning_rate": 6.095248274508236e-06,
      "loss": 0.9785,
      "step": 1823
    },
    {
      "epoch": 0.889105532537168,
      "grad_norm": 0.3628780245780945,
      "learning_rate": 6.042442345629384e-06,
      "loss": 1.0471,
      "step": 1824
    },
    {
      "epoch": 0.8895929807457957,
      "grad_norm": 0.37528806924819946,
      "learning_rate": 5.989859027109179e-06,
      "loss": 1.1316,
      "step": 1825
    },
    {
      "epoch": 0.8900804289544236,
      "grad_norm": 0.3268503248691559,
      "learning_rate": 5.937498443531442e-06,
      "loss": 0.9988,
      "step": 1826
    },
    {
      "epoch": 0.8905678771630514,
      "grad_norm": 0.4406483471393585,
      "learning_rate": 5.885360718952293e-06,
      "loss": 1.0693,
      "step": 1827
    },
    {
      "epoch": 0.8910553253716793,
      "grad_norm": 0.37791526317596436,
      "learning_rate": 5.83344597689981e-06,
      "loss": 1.044,
      "step": 1828
    },
    {
      "epoch": 0.8915427735803071,
      "grad_norm": 0.35093453526496887,
      "learning_rate": 5.78175434037378e-06,
      "loss": 0.9856,
      "step": 1829
    },
    {
      "epoch": 0.8920302217889349,
      "grad_norm": 0.39958176016807556,
      "learning_rate": 5.73028593184538e-06,
      "loss": 1.0952,
      "step": 1830
    },
    {
      "epoch": 0.8925176699975628,
      "grad_norm": 0.34690409898757935,
      "learning_rate": 5.679040873256924e-06,
      "loss": 1.0718,
      "step": 1831
    },
    {
      "epoch": 0.8930051182061906,
      "grad_norm": 0.3513603210449219,
      "learning_rate": 5.628019286021535e-06,
      "loss": 1.0157,
      "step": 1832
    },
    {
      "epoch": 0.8934925664148184,
      "grad_norm": 0.40844032168388367,
      "learning_rate": 5.5772212910228606e-06,
      "loss": 1.0604,
      "step": 1833
    },
    {
      "epoch": 0.8939800146234462,
      "grad_norm": 0.3466549813747406,
      "learning_rate": 5.526647008614849e-06,
      "loss": 1.1159,
      "step": 1834
    },
    {
      "epoch": 0.8944674628320741,
      "grad_norm": 0.37515002489089966,
      "learning_rate": 5.476296558621363e-06,
      "loss": 1.0628,
      "step": 1835
    },
    {
      "epoch": 0.8949549110407019,
      "grad_norm": 0.34914758801460266,
      "learning_rate": 5.426170060335944e-06,
      "loss": 0.9619,
      "step": 1836
    },
    {
      "epoch": 0.8954423592493298,
      "grad_norm": 0.38144829869270325,
      "learning_rate": 5.376267632521592e-06,
      "loss": 1.157,
      "step": 1837
    },
    {
      "epoch": 0.8959298074579576,
      "grad_norm": 0.3572610318660736,
      "learning_rate": 5.326589393410386e-06,
      "loss": 1.0672,
      "step": 1838
    },
    {
      "epoch": 0.8964172556665855,
      "grad_norm": 0.46000754833221436,
      "learning_rate": 5.277135460703198e-06,
      "loss": 1.1727,
      "step": 1839
    },
    {
      "epoch": 0.8969047038752133,
      "grad_norm": 0.3217918276786804,
      "learning_rate": 5.2279059515695336e-06,
      "loss": 0.9638,
      "step": 1840
    },
    {
      "epoch": 0.897392152083841,
      "grad_norm": 0.38089826703071594,
      "learning_rate": 5.17890098264715e-06,
      "loss": 0.99,
      "step": 1841
    },
    {
      "epoch": 0.8978796002924689,
      "grad_norm": 0.3441983759403229,
      "learning_rate": 5.1301206700417935e-06,
      "loss": 1.0239,
      "step": 1842
    },
    {
      "epoch": 0.8983670485010967,
      "grad_norm": 0.4059714078903198,
      "learning_rate": 5.08156512932696e-06,
      "loss": 1.112,
      "step": 1843
    },
    {
      "epoch": 0.8988544967097246,
      "grad_norm": 0.42819744348526,
      "learning_rate": 5.033234475543613e-06,
      "loss": 1.2109,
      "step": 1844
    },
    {
      "epoch": 0.8993419449183524,
      "grad_norm": 0.34663084149360657,
      "learning_rate": 4.985128823199858e-06,
      "loss": 0.9653,
      "step": 1845
    },
    {
      "epoch": 0.8998293931269803,
      "grad_norm": 0.41473668813705444,
      "learning_rate": 4.937248286270757e-06,
      "loss": 0.9576,
      "step": 1846
    },
    {
      "epoch": 0.9003168413356081,
      "grad_norm": 0.3552911877632141,
      "learning_rate": 4.8895929781979765e-06,
      "loss": 1.0767,
      "step": 1847
    },
    {
      "epoch": 0.900804289544236,
      "grad_norm": 0.3645229637622833,
      "learning_rate": 4.8421630118895775e-06,
      "loss": 1.1575,
      "step": 1848
    },
    {
      "epoch": 0.9012917377528638,
      "grad_norm": 0.38062170147895813,
      "learning_rate": 4.794958499719704e-06,
      "loss": 1.0531,
      "step": 1849
    },
    {
      "epoch": 0.9017791859614915,
      "grad_norm": 0.3905470073223114,
      "learning_rate": 4.747979553528404e-06,
      "loss": 0.9985,
      "step": 1850
    },
    {
      "epoch": 0.9022666341701194,
      "grad_norm": 0.39329779148101807,
      "learning_rate": 4.701226284621218e-06,
      "loss": 1.0583,
      "step": 1851
    },
    {
      "epoch": 0.9027540823787472,
      "grad_norm": 0.34848731756210327,
      "learning_rate": 4.654698803769031e-06,
      "loss": 1.0841,
      "step": 1852
    },
    {
      "epoch": 0.9032415305873751,
      "grad_norm": 0.3833371102809906,
      "learning_rate": 4.608397221207794e-06,
      "loss": 1.0772,
      "step": 1853
    },
    {
      "epoch": 0.9037289787960029,
      "grad_norm": 0.3949624001979828,
      "learning_rate": 4.5623216466382235e-06,
      "loss": 1.0457,
      "step": 1854
    },
    {
      "epoch": 0.9042164270046308,
      "grad_norm": 0.3338606059551239,
      "learning_rate": 4.516472189225574e-06,
      "loss": 0.9934,
      "step": 1855
    },
    {
      "epoch": 0.9047038752132586,
      "grad_norm": 0.41134586930274963,
      "learning_rate": 4.4708489575993496e-06,
      "loss": 0.9311,
      "step": 1856
    },
    {
      "epoch": 0.9051913234218865,
      "grad_norm": 0.31631216406822205,
      "learning_rate": 4.425452059853086e-06,
      "loss": 1.0665,
      "step": 1857
    },
    {
      "epoch": 0.9056787716305142,
      "grad_norm": 0.3777748942375183,
      "learning_rate": 4.3802816035440786e-06,
      "loss": 1.1026,
      "step": 1858
    },
    {
      "epoch": 0.9061662198391421,
      "grad_norm": 0.37006500363349915,
      "learning_rate": 4.335337695693076e-06,
      "loss": 1.0635,
      "step": 1859
    },
    {
      "epoch": 0.9066536680477699,
      "grad_norm": 0.34530532360076904,
      "learning_rate": 4.290620442784144e-06,
      "loss": 1.0578,
      "step": 1860
    },
    {
      "epoch": 0.9071411162563977,
      "grad_norm": 0.34728294610977173,
      "learning_rate": 4.246129950764299e-06,
      "loss": 0.9875,
      "step": 1861
    },
    {
      "epoch": 0.9076285644650256,
      "grad_norm": 0.37285178899765015,
      "learning_rate": 4.201866325043291e-06,
      "loss": 1.0715,
      "step": 1862
    },
    {
      "epoch": 0.9081160126736534,
      "grad_norm": 0.47885051369667053,
      "learning_rate": 4.157829670493418e-06,
      "loss": 1.0613,
      "step": 1863
    },
    {
      "epoch": 0.9086034608822813,
      "grad_norm": 0.3461344838142395,
      "learning_rate": 4.114020091449166e-06,
      "loss": 1.165,
      "step": 1864
    },
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.38396957516670227,
      "learning_rate": 4.07043769170703e-06,
      "loss": 1.1602,
      "step": 1865
    },
    {
      "epoch": 0.909578357299537,
      "grad_norm": 0.3966604769229889,
      "learning_rate": 4.027082574525276e-06,
      "loss": 1.0622,
      "step": 1866
    },
    {
      "epoch": 0.9100658055081647,
      "grad_norm": 0.3196715712547302,
      "learning_rate": 3.983954842623683e-06,
      "loss": 1.0808,
      "step": 1867
    },
    {
      "epoch": 0.9105532537167926,
      "grad_norm": 0.3249403238296509,
      "learning_rate": 3.941054598183247e-06,
      "loss": 1.0614,
      "step": 1868
    },
    {
      "epoch": 0.9110407019254204,
      "grad_norm": 0.3917873501777649,
      "learning_rate": 3.898381942846041e-06,
      "loss": 1.0006,
      "step": 1869
    },
    {
      "epoch": 0.9115281501340483,
      "grad_norm": 0.46330371499061584,
      "learning_rate": 3.855936977714902e-06,
      "loss": 1.0365,
      "step": 1870
    },
    {
      "epoch": 0.9120155983426761,
      "grad_norm": 0.3643684983253479,
      "learning_rate": 3.8137198033531996e-06,
      "loss": 1.0078,
      "step": 1871
    },
    {
      "epoch": 0.9125030465513039,
      "grad_norm": 0.42036598920822144,
      "learning_rate": 3.7717305197845885e-06,
      "loss": 1.0816,
      "step": 1872
    },
    {
      "epoch": 0.9129904947599318,
      "grad_norm": 0.31484517455101013,
      "learning_rate": 3.7299692264928354e-06,
      "loss": 0.9581,
      "step": 1873
    },
    {
      "epoch": 0.9134779429685596,
      "grad_norm": 0.4296029806137085,
      "learning_rate": 3.688436022421504e-06,
      "loss": 1.0468,
      "step": 1874
    },
    {
      "epoch": 0.9139653911771874,
      "grad_norm": 0.32022643089294434,
      "learning_rate": 3.6471310059737583e-06,
      "loss": 0.9181,
      "step": 1875
    },
    {
      "epoch": 0.9144528393858152,
      "grad_norm": 0.4117008447647095,
      "learning_rate": 3.6060542750121493e-06,
      "loss": 1.1994,
      "step": 1876
    },
    {
      "epoch": 0.9149402875944431,
      "grad_norm": 0.4310991168022156,
      "learning_rate": 3.565205926858317e-06,
      "loss": 1.0439,
      "step": 1877
    },
    {
      "epoch": 0.9154277358030709,
      "grad_norm": 0.4342654347419739,
      "learning_rate": 3.5245860582928334e-06,
      "loss": 1.011,
      "step": 1878
    },
    {
      "epoch": 0.9159151840116988,
      "grad_norm": 0.37502872943878174,
      "learning_rate": 3.484194765554949e-06,
      "loss": 1.1992,
      "step": 1879
    },
    {
      "epoch": 0.9164026322203266,
      "grad_norm": 0.4077601730823517,
      "learning_rate": 3.444032144342324e-06,
      "loss": 1.2782,
      "step": 1880
    },
    {
      "epoch": 0.9168900804289544,
      "grad_norm": 0.4238269329071045,
      "learning_rate": 3.4040982898108644e-06,
      "loss": 1.1753,
      "step": 1881
    },
    {
      "epoch": 0.9173775286375823,
      "grad_norm": 0.382989764213562,
      "learning_rate": 3.364393296574453e-06,
      "loss": 1.0529,
      "step": 1882
    },
    {
      "epoch": 0.91786497684621,
      "grad_norm": 0.38156425952911377,
      "learning_rate": 3.3249172587047406e-06,
      "loss": 1.0902,
      "step": 1883
    },
    {
      "epoch": 0.9183524250548379,
      "grad_norm": 0.39040836691856384,
      "learning_rate": 3.2856702697309337e-06,
      "loss": 1.0067,
      "step": 1884
    },
    {
      "epoch": 0.9188398732634657,
      "grad_norm": 0.4316060543060303,
      "learning_rate": 3.2466524226395177e-06,
      "loss": 1.2456,
      "step": 1885
    },
    {
      "epoch": 0.9193273214720936,
      "grad_norm": 0.4829002618789673,
      "learning_rate": 3.2078638098741674e-06,
      "loss": 0.9577,
      "step": 1886
    },
    {
      "epoch": 0.9198147696807214,
      "grad_norm": 0.4624776840209961,
      "learning_rate": 3.1693045233353593e-06,
      "loss": 1.1169,
      "step": 1887
    },
    {
      "epoch": 0.9203022178893493,
      "grad_norm": 0.37384673953056335,
      "learning_rate": 3.1309746543802474e-06,
      "loss": 1.051,
      "step": 1888
    },
    {
      "epoch": 0.9207896660979771,
      "grad_norm": 0.3338543176651001,
      "learning_rate": 3.0928742938224896e-06,
      "loss": 1.1515,
      "step": 1889
    },
    {
      "epoch": 0.921277114306605,
      "grad_norm": 0.4934646189212799,
      "learning_rate": 3.0550035319319215e-06,
      "loss": 0.9284,
      "step": 1890
    },
    {
      "epoch": 0.9217645625152328,
      "grad_norm": 0.39777880907058716,
      "learning_rate": 3.017362458434403e-06,
      "loss": 1.0181,
      "step": 1891
    },
    {
      "epoch": 0.9222520107238605,
      "grad_norm": 0.4428083300590515,
      "learning_rate": 2.9799511625116294e-06,
      "loss": 1.1124,
      "step": 1892
    },
    {
      "epoch": 0.9227394589324884,
      "grad_norm": 0.4063623249530792,
      "learning_rate": 2.9427697328008763e-06,
      "loss": 1.0634,
      "step": 1893
    },
    {
      "epoch": 0.9232269071411162,
      "grad_norm": 0.36762094497680664,
      "learning_rate": 2.905818257394799e-06,
      "loss": 1.0807,
      "step": 1894
    },
    {
      "epoch": 0.9237143553497441,
      "grad_norm": 0.4164426624774933,
      "learning_rate": 2.869096823841244e-06,
      "loss": 1.1103,
      "step": 1895
    },
    {
      "epoch": 0.9242018035583719,
      "grad_norm": 0.43294432759284973,
      "learning_rate": 2.832605519143017e-06,
      "loss": 1.0317,
      "step": 1896
    },
    {
      "epoch": 0.9246892517669998,
      "grad_norm": 0.3280065059661865,
      "learning_rate": 2.7963444297576912e-06,
      "loss": 1.1099,
      "step": 1897
    },
    {
      "epoch": 0.9251766999756276,
      "grad_norm": 0.5042917728424072,
      "learning_rate": 2.760313641597401e-06,
      "loss": 1.173,
      "step": 1898
    },
    {
      "epoch": 0.9256641481842555,
      "grad_norm": 0.5354404449462891,
      "learning_rate": 2.7245132400286366e-06,
      "loss": 1.098,
      "step": 1899
    },
    {
      "epoch": 0.9261515963928832,
      "grad_norm": 0.4489479959011078,
      "learning_rate": 2.6889433098720273e-06,
      "loss": 1.0235,
      "step": 1900
    },
    {
      "epoch": 0.9266390446015111,
      "grad_norm": 0.3835557997226715,
      "learning_rate": 2.6536039354021715e-06,
      "loss": 1.05,
      "step": 1901
    },
    {
      "epoch": 0.9271264928101389,
      "grad_norm": 0.37679949402809143,
      "learning_rate": 2.618495200347426e-06,
      "loss": 0.8709,
      "step": 1902
    },
    {
      "epoch": 0.9276139410187667,
      "grad_norm": 0.38232672214508057,
      "learning_rate": 2.5836171878896755e-06,
      "loss": 1.2003,
      "step": 1903
    },
    {
      "epoch": 0.9281013892273946,
      "grad_norm": 0.31981360912323,
      "learning_rate": 2.548969980664173e-06,
      "loss": 0.9475,
      "step": 1904
    },
    {
      "epoch": 0.9285888374360224,
      "grad_norm": 0.40195339918136597,
      "learning_rate": 2.514553660759356e-06,
      "loss": 1.0274,
      "step": 1905
    },
    {
      "epoch": 0.9290762856446503,
      "grad_norm": 0.332801878452301,
      "learning_rate": 2.4803683097165964e-06,
      "loss": 0.9474,
      "step": 1906
    },
    {
      "epoch": 0.9295637338532781,
      "grad_norm": 0.3179659843444824,
      "learning_rate": 2.446414008530051e-06,
      "loss": 1.0433,
      "step": 1907
    },
    {
      "epoch": 0.930051182061906,
      "grad_norm": 0.39270058274269104,
      "learning_rate": 2.41269083764647e-06,
      "loss": 1.0861,
      "step": 1908
    },
    {
      "epoch": 0.9305386302705337,
      "grad_norm": 0.35846665501594543,
      "learning_rate": 2.379198876964961e-06,
      "loss": 1.0013,
      "step": 1909
    },
    {
      "epoch": 0.9310260784791616,
      "grad_norm": 0.3707181513309479,
      "learning_rate": 2.345938205836884e-06,
      "loss": 0.9876,
      "step": 1910
    },
    {
      "epoch": 0.9315135266877894,
      "grad_norm": 0.41161656379699707,
      "learning_rate": 2.3129089030655584e-06,
      "loss": 1.1426,
      "step": 1911
    },
    {
      "epoch": 0.9320009748964172,
      "grad_norm": 0.42365649342536926,
      "learning_rate": 2.280111046906175e-06,
      "loss": 1.1656,
      "step": 1912
    },
    {
      "epoch": 0.9324884231050451,
      "grad_norm": 0.3958723545074463,
      "learning_rate": 2.2475447150655415e-06,
      "loss": 1.1817,
      "step": 1913
    },
    {
      "epoch": 0.9329758713136729,
      "grad_norm": 0.4035016894340515,
      "learning_rate": 2.215209984701927e-06,
      "loss": 1.066,
      "step": 1914
    },
    {
      "epoch": 0.9334633195223008,
      "grad_norm": 0.37086769938468933,
      "learning_rate": 2.1831069324248942e-06,
      "loss": 1.0681,
      "step": 1915
    },
    {
      "epoch": 0.9339507677309286,
      "grad_norm": 0.4109595715999603,
      "learning_rate": 2.151235634295079e-06,
      "loss": 1.1106,
      "step": 1916
    },
    {
      "epoch": 0.9344382159395564,
      "grad_norm": 0.37377893924713135,
      "learning_rate": 2.119596165824023e-06,
      "loss": 0.965,
      "step": 1917
    },
    {
      "epoch": 0.9349256641481842,
      "grad_norm": 0.34386566281318665,
      "learning_rate": 2.088188601974017e-06,
      "loss": 1.0639,
      "step": 1918
    },
    {
      "epoch": 0.9354131123568121,
      "grad_norm": 0.3515344262123108,
      "learning_rate": 2.057013017157905e-06,
      "loss": 1.0336,
      "step": 1919
    },
    {
      "epoch": 0.9359005605654399,
      "grad_norm": 0.3488283157348633,
      "learning_rate": 2.0260694852389015e-06,
      "loss": 1.0284,
      "step": 1920
    },
    {
      "epoch": 0.9363880087740678,
      "grad_norm": 0.32755133509635925,
      "learning_rate": 1.995358079530463e-06,
      "loss": 1.0561,
      "step": 1921
    },
    {
      "epoch": 0.9368754569826956,
      "grad_norm": 0.3984243869781494,
      "learning_rate": 1.9648788727960276e-06,
      "loss": 1.0979,
      "step": 1922
    },
    {
      "epoch": 0.9373629051913234,
      "grad_norm": 0.3786798119544983,
      "learning_rate": 1.9346319372489318e-06,
      "loss": 1.0841,
      "step": 1923
    },
    {
      "epoch": 0.9378503533999513,
      "grad_norm": 0.3391832113265991,
      "learning_rate": 1.9046173445521509e-06,
      "loss": 1.0141,
      "step": 1924
    },
    {
      "epoch": 0.938337801608579,
      "grad_norm": 0.4297967255115509,
      "learning_rate": 1.874835165818256e-06,
      "loss": 1.1618,
      "step": 1925
    },
    {
      "epoch": 0.9388252498172069,
      "grad_norm": 0.4208715260028839,
      "learning_rate": 1.8452854716090928e-06,
      "loss": 1.1664,
      "step": 1926
    },
    {
      "epoch": 0.9393126980258347,
      "grad_norm": 0.39006009697914124,
      "learning_rate": 1.8159683319357135e-06,
      "loss": 1.1445,
      "step": 1927
    },
    {
      "epoch": 0.9398001462344626,
      "grad_norm": 0.33365562558174133,
      "learning_rate": 1.7868838162582114e-06,
      "loss": 1.1154,
      "step": 1928
    },
    {
      "epoch": 0.9402875944430904,
      "grad_norm": 0.4178657531738281,
      "learning_rate": 1.7580319934855094e-06,
      "loss": 1.1662,
      "step": 1929
    },
    {
      "epoch": 0.9407750426517183,
      "grad_norm": 0.47014760971069336,
      "learning_rate": 1.729412931975205e-06,
      "loss": 1.0061,
      "step": 1930
    },
    {
      "epoch": 0.9412624908603461,
      "grad_norm": 0.3823905885219574,
      "learning_rate": 1.7010266995334811e-06,
      "loss": 1.0879,
      "step": 1931
    },
    {
      "epoch": 0.941749939068974,
      "grad_norm": 0.41770321130752563,
      "learning_rate": 1.6728733634148064e-06,
      "loss": 1.0673,
      "step": 1932
    },
    {
      "epoch": 0.9422373872776018,
      "grad_norm": 0.36765381693840027,
      "learning_rate": 1.644952990321913e-06,
      "loss": 1.0879,
      "step": 1933
    },
    {
      "epoch": 0.9427248354862295,
      "grad_norm": 0.4739663600921631,
      "learning_rate": 1.6172656464055747e-06,
      "loss": 1.1542,
      "step": 1934
    },
    {
      "epoch": 0.9432122836948574,
      "grad_norm": 0.4071044325828552,
      "learning_rate": 1.589811397264429e-06,
      "loss": 1.1813,
      "step": 1935
    },
    {
      "epoch": 0.9436997319034852,
      "grad_norm": 0.35680529475212097,
      "learning_rate": 1.5625903079448667e-06,
      "loss": 1.1284,
      "step": 1936
    },
    {
      "epoch": 0.9441871801121131,
      "grad_norm": 0.41342172026634216,
      "learning_rate": 1.5356024429408867e-06,
      "loss": 1.1501,
      "step": 1937
    },
    {
      "epoch": 0.9446746283207409,
      "grad_norm": 0.3705962598323822,
      "learning_rate": 1.5088478661938855e-06,
      "loss": 1.0652,
      "step": 1938
    },
    {
      "epoch": 0.9451620765293688,
      "grad_norm": 0.37899836897850037,
      "learning_rate": 1.4823266410925463e-06,
      "loss": 1.018,
      "step": 1939
    },
    {
      "epoch": 0.9456495247379966,
      "grad_norm": 0.38751640915870667,
      "learning_rate": 1.4560388304726613e-06,
      "loss": 1.1265,
      "step": 1940
    },
    {
      "epoch": 0.9461369729466245,
      "grad_norm": 0.32274675369262695,
      "learning_rate": 1.429984496617054e-06,
      "loss": 1.0278,
      "step": 1941
    },
    {
      "epoch": 0.9466244211552522,
      "grad_norm": 0.37253156304359436,
      "learning_rate": 1.4041637012553345e-06,
      "loss": 1.062,
      "step": 1942
    },
    {
      "epoch": 0.94711186936388,
      "grad_norm": 0.38027337193489075,
      "learning_rate": 1.3785765055638e-06,
      "loss": 1.1756,
      "step": 1943
    },
    {
      "epoch": 0.9475993175725079,
      "grad_norm": 0.38690176606178284,
      "learning_rate": 1.3532229701653242e-06,
      "loss": 1.0456,
      "step": 1944
    },
    {
      "epoch": 0.9480867657811357,
      "grad_norm": 0.3302153944969177,
      "learning_rate": 1.3281031551291233e-06,
      "loss": 0.9826,
      "step": 1945
    },
    {
      "epoch": 0.9485742139897636,
      "grad_norm": 0.34769314527511597,
      "learning_rate": 1.303217119970712e-06,
      "loss": 1.1577,
      "step": 1946
    },
    {
      "epoch": 0.9490616621983914,
      "grad_norm": 0.33927199244499207,
      "learning_rate": 1.2785649236517038e-06,
      "loss": 0.9491,
      "step": 1947
    },
    {
      "epoch": 0.9495491104070193,
      "grad_norm": 0.37198346853256226,
      "learning_rate": 1.2541466245796885e-06,
      "loss": 1.0319,
      "step": 1948
    },
    {
      "epoch": 0.9500365586156471,
      "grad_norm": 0.39865556359291077,
      "learning_rate": 1.2299622806080767e-06,
      "loss": 0.9899,
      "step": 1949
    },
    {
      "epoch": 0.950524006824275,
      "grad_norm": 0.4067472517490387,
      "learning_rate": 1.2060119490360123e-06,
      "loss": 1.1005,
      "step": 1950
    },
    {
      "epoch": 0.9510114550329027,
      "grad_norm": 0.388704389333725,
      "learning_rate": 1.1822956866081702e-06,
      "loss": 0.9327,
      "step": 1951
    },
    {
      "epoch": 0.9514989032415306,
      "grad_norm": 0.4057963788509369,
      "learning_rate": 1.1588135495146478e-06,
      "loss": 1.1373,
      "step": 1952
    },
    {
      "epoch": 0.9519863514501584,
      "grad_norm": 0.42301106452941895,
      "learning_rate": 1.1355655933908638e-06,
      "loss": 1.0342,
      "step": 1953
    },
    {
      "epoch": 0.9524737996587862,
      "grad_norm": 0.39528149366378784,
      "learning_rate": 1.11255187331738e-06,
      "loss": 1.1442,
      "step": 1954
    },
    {
      "epoch": 0.9529612478674141,
      "grad_norm": 0.39889761805534363,
      "learning_rate": 1.0897724438198142e-06,
      "loss": 1.0789,
      "step": 1955
    },
    {
      "epoch": 0.9534486960760419,
      "grad_norm": 0.42788776755332947,
      "learning_rate": 1.067227358868661e-06,
      "loss": 1.0739,
      "step": 1956
    },
    {
      "epoch": 0.9539361442846698,
      "grad_norm": 0.42129066586494446,
      "learning_rate": 1.0449166718792147e-06,
      "loss": 1.0375,
      "step": 1957
    },
    {
      "epoch": 0.9544235924932976,
      "grad_norm": 0.4017135798931122,
      "learning_rate": 1.0228404357113919e-06,
      "loss": 1.1505,
      "step": 1958
    },
    {
      "epoch": 0.9549110407019255,
      "grad_norm": 0.38789933919906616,
      "learning_rate": 1.0009987026696532e-06,
      "loss": 1.0732,
      "step": 1959
    },
    {
      "epoch": 0.9553984889105532,
      "grad_norm": 0.4593614935874939,
      "learning_rate": 9.793915245028596e-07,
      "loss": 1.242,
      "step": 1960
    },
    {
      "epoch": 0.9558859371191811,
      "grad_norm": 0.3683159649372101,
      "learning_rate": 9.580189524041272e-07,
      "loss": 1.1475,
      "step": 1961
    },
    {
      "epoch": 0.9563733853278089,
      "grad_norm": 0.3410833179950714,
      "learning_rate": 9.368810370107617e-07,
      "loss": 1.0107,
      "step": 1962
    },
    {
      "epoch": 0.9568608335364368,
      "grad_norm": 0.37654101848602295,
      "learning_rate": 9.159778284040799e-07,
      "loss": 1.0231,
      "step": 1963
    },
    {
      "epoch": 0.9573482817450646,
      "grad_norm": 0.41946423053741455,
      "learning_rate": 8.953093761093323e-07,
      "loss": 1.0215,
      "step": 1964
    },
    {
      "epoch": 0.9578357299536924,
      "grad_norm": 0.4294183552265167,
      "learning_rate": 8.748757290955478e-07,
      "loss": 1.0651,
      "step": 1965
    },
    {
      "epoch": 0.9583231781623203,
      "grad_norm": 0.4403562843799591,
      "learning_rate": 8.546769357754447e-07,
      "loss": 1.0721,
      "step": 1966
    },
    {
      "epoch": 0.958810626370948,
      "grad_norm": 0.504021167755127,
      "learning_rate": 8.347130440053419e-07,
      "loss": 1.0633,
      "step": 1967
    },
    {
      "epoch": 0.9592980745795759,
      "grad_norm": 0.37429291009902954,
      "learning_rate": 8.149841010849923e-07,
      "loss": 0.993,
      "step": 1968
    },
    {
      "epoch": 0.9597855227882037,
      "grad_norm": 0.40227779746055603,
      "learning_rate": 7.95490153757461e-07,
      "loss": 1.0361,
      "step": 1969
    },
    {
      "epoch": 0.9602729709968316,
      "grad_norm": 0.38147246837615967,
      "learning_rate": 7.762312482091027e-07,
      "loss": 1.04,
      "step": 1970
    },
    {
      "epoch": 0.9607604192054594,
      "grad_norm": 0.3755912184715271,
      "learning_rate": 7.572074300693621e-07,
      "loss": 1.0474,
      "step": 1971
    },
    {
      "epoch": 0.9612478674140873,
      "grad_norm": 0.38925471901893616,
      "learning_rate": 7.384187444107071e-07,
      "loss": 1.1567,
      "step": 1972
    },
    {
      "epoch": 0.9617353156227151,
      "grad_norm": 0.39746856689453125,
      "learning_rate": 7.19865235748507e-07,
      "loss": 1.1433,
      "step": 1973
    },
    {
      "epoch": 0.9622227638313429,
      "grad_norm": 0.34805506467819214,
      "learning_rate": 7.015469480409542e-07,
      "loss": 0.9856,
      "step": 1974
    },
    {
      "epoch": 0.9627102120399708,
      "grad_norm": 0.4245430529117584,
      "learning_rate": 6.834639246889096e-07,
      "loss": 1.0389,
      "step": 1975
    },
    {
      "epoch": 0.9631976602485985,
      "grad_norm": 0.3707633316516876,
      "learning_rate": 6.656162085358686e-07,
      "loss": 1.2374,
      "step": 1976
    },
    {
      "epoch": 0.9636851084572264,
      "grad_norm": 0.433009535074234,
      "learning_rate": 6.480038418677947e-07,
      "loss": 1.0547,
      "step": 1977
    },
    {
      "epoch": 0.9641725566658542,
      "grad_norm": 0.3832406997680664,
      "learning_rate": 6.306268664130533e-07,
      "loss": 1.0589,
      "step": 1978
    },
    {
      "epoch": 0.9646600048744821,
      "grad_norm": 0.3488512635231018,
      "learning_rate": 6.134853233422999e-07,
      "loss": 1.0335,
      "step": 1979
    },
    {
      "epoch": 0.9651474530831099,
      "grad_norm": 0.40218132734298706,
      "learning_rate": 5.965792532683923e-07,
      "loss": 1.0572,
      "step": 1980
    },
    {
      "epoch": 0.9656349012917378,
      "grad_norm": 0.43588852882385254,
      "learning_rate": 5.799086962463007e-07,
      "loss": 1.1437,
      "step": 1981
    },
    {
      "epoch": 0.9661223495003656,
      "grad_norm": 0.3971550166606903,
      "learning_rate": 5.634736917729865e-07,
      "loss": 1.0291,
      "step": 1982
    },
    {
      "epoch": 0.9666097977089935,
      "grad_norm": 0.4068727195262909,
      "learning_rate": 5.472742787873352e-07,
      "loss": 1.1587,
      "step": 1983
    },
    {
      "epoch": 0.9670972459176213,
      "grad_norm": 0.42422133684158325,
      "learning_rate": 5.313104956700565e-07,
      "loss": 1.0437,
      "step": 1984
    },
    {
      "epoch": 0.967584694126249,
      "grad_norm": 0.4129694104194641,
      "learning_rate": 5.155823802435622e-07,
      "loss": 1.1479,
      "step": 1985
    },
    {
      "epoch": 0.9680721423348769,
      "grad_norm": 0.3818117082118988,
      "learning_rate": 5.000899697719552e-07,
      "loss": 1.1197,
      "step": 1986
    },
    {
      "epoch": 0.9685595905435047,
      "grad_norm": 0.4490480124950409,
      "learning_rate": 4.848333009608408e-07,
      "loss": 1.0951,
      "step": 1987
    },
    {
      "epoch": 0.9690470387521326,
      "grad_norm": 0.4608059823513031,
      "learning_rate": 4.6981240995731537e-07,
      "loss": 1.1753,
      "step": 1988
    },
    {
      "epoch": 0.9695344869607604,
      "grad_norm": 0.47589248418807983,
      "learning_rate": 4.550273323498555e-07,
      "loss": 1.0552,
      "step": 1989
    },
    {
      "epoch": 0.9700219351693883,
      "grad_norm": 0.3511136770248413,
      "learning_rate": 4.4047810316822925e-07,
      "loss": 1.0957,
      "step": 1990
    },
    {
      "epoch": 0.9705093833780161,
      "grad_norm": 0.4456351101398468,
      "learning_rate": 4.261647568834182e-07,
      "loss": 1.1183,
      "step": 1991
    },
    {
      "epoch": 0.970996831586644,
      "grad_norm": 0.39235809445381165,
      "learning_rate": 4.1208732740752875e-07,
      "loss": 1.0132,
      "step": 1992
    },
    {
      "epoch": 0.9714842797952717,
      "grad_norm": 0.4110061526298523,
      "learning_rate": 3.9824584809372566e-07,
      "loss": 1.1303,
      "step": 1993
    },
    {
      "epoch": 0.9719717280038996,
      "grad_norm": 0.4614067077636719,
      "learning_rate": 3.84640351736143e-07,
      "loss": 1.1847,
      "step": 1994
    },
    {
      "epoch": 0.9724591762125274,
      "grad_norm": 0.3240770697593689,
      "learning_rate": 3.7127087056980647e-07,
      "loss": 1.0747,
      "step": 1995
    },
    {
      "epoch": 0.9729466244211552,
      "grad_norm": 0.3777024447917938,
      "learning_rate": 3.5813743627055584e-07,
      "loss": 1.209,
      "step": 1996
    },
    {
      "epoch": 0.9734340726297831,
      "grad_norm": 0.3978724777698517,
      "learning_rate": 3.452400799549893e-07,
      "loss": 1.1577,
      "step": 1997
    },
    {
      "epoch": 0.9739215208384109,
      "grad_norm": 0.364408403635025,
      "learning_rate": 3.3257883218035245e-07,
      "loss": 1.0135,
      "step": 1998
    },
    {
      "epoch": 0.9744089690470388,
      "grad_norm": 0.44452327489852905,
      "learning_rate": 3.2015372294450507e-07,
      "loss": 1.2438,
      "step": 1999
    },
    {
      "epoch": 0.9748964172556666,
      "grad_norm": 0.4765107333660126,
      "learning_rate": 3.0796478168582113e-07,
      "loss": 1.0167,
      "step": 2000
    },
    {
      "epoch": 0.9753838654642945,
      "grad_norm": 0.4003845453262329,
      "learning_rate": 2.960120372831221e-07,
      "loss": 1.0883,
      "step": 2001
    },
    {
      "epoch": 0.9758713136729222,
      "grad_norm": 0.49801144003868103,
      "learning_rate": 2.8429551805564394e-07,
      "loss": 1.0007,
      "step": 2002
    },
    {
      "epoch": 0.9763587618815501,
      "grad_norm": 0.3565743863582611,
      "learning_rate": 2.7281525176292565e-07,
      "loss": 1.1447,
      "step": 2003
    },
    {
      "epoch": 0.9768462100901779,
      "grad_norm": 0.37986013293266296,
      "learning_rate": 2.615712656047542e-07,
      "loss": 1.0881,
      "step": 2004
    },
    {
      "epoch": 0.9773336582988057,
      "grad_norm": 0.32770147919654846,
      "learning_rate": 2.5056358622110866e-07,
      "loss": 0.9509,
      "step": 2005
    },
    {
      "epoch": 0.9778211065074336,
      "grad_norm": 0.41189002990722656,
      "learning_rate": 2.3979223969211594e-07,
      "loss": 1.1882,
      "step": 2006
    },
    {
      "epoch": 0.9783085547160614,
      "grad_norm": 0.3875510096549988,
      "learning_rate": 2.2925725153793986e-07,
      "loss": 1.0768,
      "step": 2007
    },
    {
      "epoch": 0.9787960029246893,
      "grad_norm": 0.41866934299468994,
      "learning_rate": 2.1895864671874767e-07,
      "loss": 1.0993,
      "step": 2008
    },
    {
      "epoch": 0.979283451133317,
      "grad_norm": 0.3891562223434448,
      "learning_rate": 2.088964496346879e-07,
      "loss": 1.0648,
      "step": 2009
    },
    {
      "epoch": 0.979770899341945,
      "grad_norm": 0.4654984176158905,
      "learning_rate": 1.9907068412575725e-07,
      "loss": 1.15,
      "step": 2010
    },
    {
      "epoch": 0.9802583475505727,
      "grad_norm": 0.35890793800354004,
      "learning_rate": 1.894813734718004e-07,
      "loss": 1.0544,
      "step": 2011
    },
    {
      "epoch": 0.9807457957592006,
      "grad_norm": 0.36428067088127136,
      "learning_rate": 1.8012854039244353e-07,
      "loss": 1.0215,
      "step": 2012
    },
    {
      "epoch": 0.9812332439678284,
      "grad_norm": 0.36865493655204773,
      "learning_rate": 1.710122070470277e-07,
      "loss": 1.1238,
      "step": 2013
    },
    {
      "epoch": 0.9817206921764563,
      "grad_norm": 0.38733819127082825,
      "learning_rate": 1.6213239503454215e-07,
      "loss": 1.0061,
      "step": 2014
    },
    {
      "epoch": 0.9822081403850841,
      "grad_norm": 0.3741471469402313,
      "learning_rate": 1.5348912539364658e-07,
      "loss": 1.1446,
      "step": 2015
    },
    {
      "epoch": 0.9826955885937119,
      "grad_norm": 0.376385360956192,
      "learning_rate": 1.450824186025157e-07,
      "loss": 1.0023,
      "step": 2016
    },
    {
      "epoch": 0.9831830368023398,
      "grad_norm": 0.3962882459163666,
      "learning_rate": 1.3691229457887257e-07,
      "loss": 0.9648,
      "step": 2017
    },
    {
      "epoch": 0.9836704850109675,
      "grad_norm": 0.3473024070262909,
      "learning_rate": 1.2897877267989966e-07,
      "loss": 1.1262,
      "step": 2018
    },
    {
      "epoch": 0.9841579332195954,
      "grad_norm": 0.3921591341495514,
      "learning_rate": 1.2128187170222792e-07,
      "loss": 1.043,
      "step": 2019
    },
    {
      "epoch": 0.9846453814282232,
      "grad_norm": 0.39327341318130493,
      "learning_rate": 1.1382160988184786e-07,
      "loss": 1.0769,
      "step": 2020
    },
    {
      "epoch": 0.9851328296368511,
      "grad_norm": 0.3739471435546875,
      "learning_rate": 1.0659800489408734e-07,
      "loss": 0.9578,
      "step": 2021
    },
    {
      "epoch": 0.9856202778454789,
      "grad_norm": 0.4034399092197418,
      "learning_rate": 9.961107385360046e-08,
      "loss": 1.1441,
      "step": 2022
    },
    {
      "epoch": 0.9861077260541068,
      "grad_norm": 0.38543474674224854,
      "learning_rate": 9.286083331426776e-08,
      "loss": 1.084,
      "step": 2023
    },
    {
      "epoch": 0.9865951742627346,
      "grad_norm": 0.3662292957305908,
      "learning_rate": 8.634729926920715e-08,
      "loss": 1.0999,
      "step": 2024
    },
    {
      "epoch": 0.9870826224713625,
      "grad_norm": 0.3803950548171997,
      "learning_rate": 8.007048715068521e-08,
      "loss": 1.0877,
      "step": 2025
    },
    {
      "epoch": 0.9875700706799903,
      "grad_norm": 0.34015125036239624,
      "learning_rate": 7.403041183016158e-08,
      "loss": 1.0899,
      "step": 2026
    },
    {
      "epoch": 0.988057518888618,
      "grad_norm": 0.37489980459213257,
      "learning_rate": 6.822708761815566e-08,
      "loss": 1.0894,
      "step": 2027
    },
    {
      "epoch": 0.9885449670972459,
      "grad_norm": 0.3569638133049011,
      "learning_rate": 6.266052826429114e-08,
      "loss": 1.1452,
      "step": 2028
    },
    {
      "epoch": 0.9890324153058737,
      "grad_norm": 0.35684725642204285,
      "learning_rate": 5.733074695721819e-08,
      "loss": 1.0874,
      "step": 2029
    },
    {
      "epoch": 0.9895198635145016,
      "grad_norm": 0.3525896966457367,
      "learning_rate": 5.223775632460237e-08,
      "loss": 1.052,
      "step": 2030
    },
    {
      "epoch": 0.9900073117231294,
      "grad_norm": 0.37919196486473083,
      "learning_rate": 4.738156843309138e-08,
      "loss": 1.0934,
      "step": 2031
    },
    {
      "epoch": 0.9904947599317573,
      "grad_norm": 0.4706897735595703,
      "learning_rate": 4.276219478827059e-08,
      "loss": 1.1683,
      "step": 2032
    },
    {
      "epoch": 0.9909822081403851,
      "grad_norm": 0.39712199568748474,
      "learning_rate": 3.837964633467417e-08,
      "loss": 1.1454,
      "step": 2033
    },
    {
      "epoch": 0.991469656349013,
      "grad_norm": 0.4138774871826172,
      "learning_rate": 3.423393345571846e-08,
      "loss": 1.1165,
      "step": 2034
    },
    {
      "epoch": 0.9919571045576407,
      "grad_norm": 0.38895630836486816,
      "learning_rate": 3.032506597369089e-08,
      "loss": 0.9747,
      "step": 2035
    },
    {
      "epoch": 0.9924445527662685,
      "grad_norm": 0.4397059977054596,
      "learning_rate": 2.6653053149738872e-08,
      "loss": 1.0906,
      "step": 2036
    },
    {
      "epoch": 0.9929320009748964,
      "grad_norm": 0.3787862956523895,
      "learning_rate": 2.321790368382537e-08,
      "loss": 1.0971,
      "step": 2037
    },
    {
      "epoch": 0.9934194491835242,
      "grad_norm": 0.40022116899490356,
      "learning_rate": 2.0019625714740032e-08,
      "loss": 1.2189,
      "step": 2038
    },
    {
      "epoch": 0.9939068973921521,
      "grad_norm": 0.3774934411048889,
      "learning_rate": 1.7058226820054758e-08,
      "loss": 0.9943,
      "step": 2039
    },
    {
      "epoch": 0.9943943456007799,
      "grad_norm": 0.36211833357810974,
      "learning_rate": 1.4333714016090404e-08,
      "loss": 1.128,
      "step": 2040
    },
    {
      "epoch": 0.9948817938094078,
      "grad_norm": 0.35082337260246277,
      "learning_rate": 1.1846093757961196e-08,
      "loss": 1.0167,
      "step": 2041
    },
    {
      "epoch": 0.9953692420180356,
      "grad_norm": 0.3229121267795563,
      "learning_rate": 9.595371939485897e-09,
      "loss": 0.9801,
      "step": 2042
    },
    {
      "epoch": 0.9958566902266635,
      "grad_norm": 0.4398113787174225,
      "learning_rate": 7.581553893221127e-09,
      "loss": 1.0982,
      "step": 2043
    },
    {
      "epoch": 0.9963441384352912,
      "grad_norm": 0.3718317151069641,
      "learning_rate": 5.804644390439151e-09,
      "loss": 1.1165,
      "step": 2044
    },
    {
      "epoch": 0.9968315866439191,
      "grad_norm": 0.4172276258468628,
      "learning_rate": 4.264647641105679e-09,
      "loss": 1.2252,
      "step": 2045
    },
    {
      "epoch": 0.9973190348525469,
      "grad_norm": 0.317125141620636,
      "learning_rate": 2.9615672938909656e-09,
      "loss": 1.1027,
      "step": 2046
    },
    {
      "epoch": 0.9978064830611747,
      "grad_norm": 0.41433629393577576,
      "learning_rate": 1.895406436136504e-09,
      "loss": 0.9324,
      "step": 2047
    },
    {
      "epoch": 0.9982939312698026,
      "grad_norm": 0.35381460189819336,
      "learning_rate": 1.0661675938439253e-09,
      "loss": 1.1131,
      "step": 2048
    },
    {
      "epoch": 0.9987813794784304,
      "grad_norm": 0.422343373298645,
      "learning_rate": 4.738527317194041e-10,
      "loss": 1.0122,
      "step": 2049
    },
    {
      "epoch": 0.9992688276870583,
      "grad_norm": 0.36507782340049744,
      "learning_rate": 1.1846325309594619e-10,
      "loss": 1.1554,
      "step": 2050
    },
    {
      "epoch": 0.999756275895686,
      "grad_norm": 0.4285239577293396,
      "learning_rate": 0.0,
      "loss": 1.3001,
      "step": 2051
    },
    {
      "epoch": 0.999756275895686,
      "eval_loss": 1.0897523164749146,
      "eval_runtime": 134.4524,
      "eval_samples_per_second": 25.704,
      "eval_steps_per_second": 3.213,
      "step": 2051
    }
  ],
  "logging_steps": 1,
  "max_steps": 2051,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.636008272566682e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}