{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.643835616438356,
  "eval_steps": 500,
  "global_step": 150,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.010958904109589041,
      "grad_norm": 5.003701210021973,
      "learning_rate": 0.0002,
      "loss": 1.3947,
      "step": 1
    },
    {
      "epoch": 0.021917808219178082,
      "grad_norm": 10.884932518005371,
      "learning_rate": 0.0002,
      "loss": 0.657,
      "step": 2
    },
    {
      "epoch": 0.03287671232876712,
      "grad_norm": 1.7771106958389282,
      "learning_rate": 0.0002,
      "loss": 1.1432,
      "step": 3
    },
    {
      "epoch": 0.043835616438356165,
      "grad_norm": 2.750317096710205,
      "learning_rate": 0.0002,
      "loss": 0.6564,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "grad_norm": 1.633827567100525,
      "learning_rate": 0.0002,
      "loss": 0.9156,
      "step": 5
    },
    {
      "epoch": 0.06575342465753424,
      "grad_norm": 1.747514009475708,
      "learning_rate": 0.0002,
      "loss": 0.5935,
      "step": 6
    },
    {
      "epoch": 0.07671232876712329,
      "grad_norm": 1.3404035568237305,
      "learning_rate": 0.0002,
      "loss": 0.5328,
      "step": 7
    },
    {
      "epoch": 0.08767123287671233,
      "grad_norm": 7.1872968673706055,
      "learning_rate": 0.0002,
      "loss": 1.0489,
      "step": 8
    },
    {
      "epoch": 0.09863013698630137,
      "grad_norm": 1.8788518905639648,
      "learning_rate": 0.0002,
      "loss": 0.8711,
      "step": 9
    },
    {
      "epoch": 0.1095890410958904,
      "grad_norm": 1.554734706878662,
      "learning_rate": 0.0002,
      "loss": 0.6025,
      "step": 10
    },
    {
      "epoch": 0.12054794520547946,
      "grad_norm": 1.5181900262832642,
      "learning_rate": 0.0002,
      "loss": 0.9178,
      "step": 11
    },
    {
      "epoch": 0.13150684931506848,
      "grad_norm": 1.74048912525177,
      "learning_rate": 0.0002,
      "loss": 0.8413,
      "step": 12
    },
    {
      "epoch": 0.14246575342465753,
      "grad_norm": 1.5069341659545898,
      "learning_rate": 0.0002,
      "loss": 0.7297,
      "step": 13
    },
    {
      "epoch": 0.15342465753424658,
      "grad_norm": 1.9904654026031494,
      "learning_rate": 0.0002,
      "loss": 0.909,
      "step": 14
    },
    {
      "epoch": 0.1643835616438356,
      "grad_norm": 1.329214334487915,
      "learning_rate": 0.0002,
      "loss": 0.6475,
      "step": 15
    },
    {
      "epoch": 0.17534246575342466,
      "grad_norm": 3.0678200721740723,
      "learning_rate": 0.0002,
      "loss": 0.8995,
      "step": 16
    },
    {
      "epoch": 0.1863013698630137,
      "grad_norm": 1.5679476261138916,
      "learning_rate": 0.0002,
      "loss": 0.6728,
      "step": 17
    },
    {
      "epoch": 0.19726027397260273,
      "grad_norm": 1.196855068206787,
      "learning_rate": 0.0002,
      "loss": 0.4871,
      "step": 18
    },
    {
      "epoch": 0.20821917808219179,
      "grad_norm": 1.8244729042053223,
      "learning_rate": 0.0002,
      "loss": 0.5748,
      "step": 19
    },
    {
      "epoch": 0.2191780821917808,
      "grad_norm": 1.1432626247406006,
      "learning_rate": 0.0002,
      "loss": 0.5982,
      "step": 20
    },
    {
      "epoch": 0.23013698630136986,
      "grad_norm": 1.672303557395935,
      "learning_rate": 0.0002,
      "loss": 0.5639,
      "step": 21
    },
    {
      "epoch": 0.2410958904109589,
      "grad_norm": 1.5940009355545044,
      "learning_rate": 0.0002,
      "loss": 0.6198,
      "step": 22
    },
    {
      "epoch": 0.25205479452054796,
      "grad_norm": 1.710236668586731,
      "learning_rate": 0.0002,
      "loss": 0.8638,
      "step": 23
    },
    {
      "epoch": 0.26301369863013696,
      "grad_norm": 1.7786955833435059,
      "learning_rate": 0.0002,
      "loss": 0.7169,
      "step": 24
    },
    {
      "epoch": 0.273972602739726,
      "grad_norm": 1.2856895923614502,
      "learning_rate": 0.0002,
      "loss": 0.5522,
      "step": 25
    },
    {
      "epoch": 0.28493150684931506,
      "grad_norm": 1.3859294652938843,
      "learning_rate": 0.0002,
      "loss": 0.6687,
      "step": 26
    },
    {
      "epoch": 0.2958904109589041,
      "grad_norm": 1.3184758424758911,
      "learning_rate": 0.0002,
      "loss": 0.6227,
      "step": 27
    },
    {
      "epoch": 0.30684931506849317,
      "grad_norm": 1.3072282075881958,
      "learning_rate": 0.0002,
      "loss": 0.4417,
      "step": 28
    },
    {
      "epoch": 0.3178082191780822,
      "grad_norm": 1.7866010665893555,
      "learning_rate": 0.0002,
      "loss": 0.7648,
      "step": 29
    },
    {
      "epoch": 0.3287671232876712,
      "grad_norm": 1.0981870889663696,
      "learning_rate": 0.0002,
      "loss": 0.6496,
      "step": 30
    },
    {
      "epoch": 0.33972602739726027,
      "grad_norm": 1.586014747619629,
      "learning_rate": 0.0002,
      "loss": 0.7389,
      "step": 31
    },
    {
      "epoch": 0.3506849315068493,
      "grad_norm": 1.4227101802825928,
      "learning_rate": 0.0002,
      "loss": 0.7764,
      "step": 32
    },
    {
      "epoch": 0.36164383561643837,
      "grad_norm": 1.0654901266098022,
      "learning_rate": 0.0002,
      "loss": 0.7015,
      "step": 33
    },
    {
      "epoch": 0.3726027397260274,
      "grad_norm": 1.22892427444458,
      "learning_rate": 0.0002,
      "loss": 0.6085,
      "step": 34
    },
    {
      "epoch": 0.3835616438356164,
      "grad_norm": 1.1209129095077515,
      "learning_rate": 0.0002,
      "loss": 0.334,
      "step": 35
    },
    {
      "epoch": 0.39452054794520547,
      "grad_norm": 1.3106253147125244,
      "learning_rate": 0.0002,
      "loss": 0.6377,
      "step": 36
    },
    {
      "epoch": 0.4054794520547945,
      "grad_norm": 1.1807057857513428,
      "learning_rate": 0.0002,
      "loss": 0.4686,
      "step": 37
    },
    {
      "epoch": 0.41643835616438357,
      "grad_norm": 1.1512413024902344,
      "learning_rate": 0.0002,
      "loss": 0.5934,
      "step": 38
    },
    {
      "epoch": 0.4273972602739726,
      "grad_norm": 1.2861087322235107,
      "learning_rate": 0.0002,
      "loss": 0.4494,
      "step": 39
    },
    {
      "epoch": 0.4383561643835616,
      "grad_norm": 1.6408014297485352,
      "learning_rate": 0.0002,
      "loss": 0.8894,
      "step": 40
    },
    {
      "epoch": 0.44931506849315067,
      "grad_norm": 1.1644452810287476,
      "learning_rate": 0.0002,
      "loss": 0.9024,
      "step": 41
    },
    {
      "epoch": 0.4602739726027397,
      "grad_norm": 0.989621639251709,
      "learning_rate": 0.0002,
      "loss": 0.4873,
      "step": 42
    },
    {
      "epoch": 0.4712328767123288,
      "grad_norm": 1.2218654155731201,
      "learning_rate": 0.0002,
      "loss": 0.7415,
      "step": 43
    },
    {
      "epoch": 0.4821917808219178,
      "grad_norm": 1.2144018411636353,
      "learning_rate": 0.0002,
      "loss": 0.6073,
      "step": 44
    },
    {
      "epoch": 0.4931506849315068,
      "grad_norm": 1.5843247175216675,
      "learning_rate": 0.0002,
      "loss": 0.8353,
      "step": 45
    },
    {
      "epoch": 0.5041095890410959,
      "grad_norm": 1.3587316274642944,
      "learning_rate": 0.0002,
      "loss": 0.5154,
      "step": 46
    },
    {
      "epoch": 0.5150684931506849,
      "grad_norm": 1.173448085784912,
      "learning_rate": 0.0002,
      "loss": 0.616,
      "step": 47
    },
    {
      "epoch": 0.5260273972602739,
      "grad_norm": 1.6074247360229492,
      "learning_rate": 0.0002,
      "loss": 0.8318,
      "step": 48
    },
    {
      "epoch": 0.536986301369863,
      "grad_norm": 1.0739307403564453,
      "learning_rate": 0.0002,
      "loss": 0.5982,
      "step": 49
    },
    {
      "epoch": 0.547945205479452,
      "grad_norm": 1.330855131149292,
      "learning_rate": 0.0002,
      "loss": 0.5309,
      "step": 50
    },
    {
      "epoch": 0.5589041095890411,
      "grad_norm": 1.5128343105316162,
      "learning_rate": 0.0002,
      "loss": 0.5063,
      "step": 51
    },
    {
      "epoch": 0.5698630136986301,
      "grad_norm": 1.5110679864883423,
      "learning_rate": 0.0002,
      "loss": 0.5551,
      "step": 52
    },
    {
      "epoch": 0.5808219178082191,
      "grad_norm": 2.263357639312744,
      "learning_rate": 0.0002,
      "loss": 0.6387,
      "step": 53
    },
    {
      "epoch": 0.5917808219178082,
      "grad_norm": 1.3241772651672363,
      "learning_rate": 0.0002,
      "loss": 0.8482,
      "step": 54
    },
    {
      "epoch": 0.6027397260273972,
      "grad_norm": 1.246489405632019,
      "learning_rate": 0.0002,
      "loss": 0.7622,
      "step": 55
    },
    {
      "epoch": 0.6136986301369863,
      "grad_norm": 1.2963398694992065,
      "learning_rate": 0.0002,
      "loss": 0.6943,
      "step": 56
    },
    {
      "epoch": 0.6246575342465753,
      "grad_norm": 1.116220474243164,
      "learning_rate": 0.0002,
      "loss": 0.6305,
      "step": 57
    },
    {
      "epoch": 0.6356164383561644,
      "grad_norm": 1.4782965183258057,
      "learning_rate": 0.0002,
      "loss": 0.7089,
      "step": 58
    },
    {
      "epoch": 0.6465753424657534,
      "grad_norm": 1.207879662513733,
      "learning_rate": 0.0002,
      "loss": 0.8837,
      "step": 59
    },
    {
      "epoch": 0.6575342465753424,
      "grad_norm": 1.0886225700378418,
      "learning_rate": 0.0002,
      "loss": 0.7521,
      "step": 60
    },
    {
      "epoch": 0.6684931506849315,
      "grad_norm": 1.1209737062454224,
      "learning_rate": 0.0002,
      "loss": 0.6905,
      "step": 61
    },
    {
      "epoch": 0.6794520547945205,
      "grad_norm": 1.732853889465332,
      "learning_rate": 0.0002,
      "loss": 0.6397,
      "step": 62
    },
    {
      "epoch": 0.6904109589041096,
      "grad_norm": 1.2688523530960083,
      "learning_rate": 0.0002,
      "loss": 0.647,
      "step": 63
    },
    {
      "epoch": 0.7013698630136986,
      "grad_norm": 1.3005374670028687,
      "learning_rate": 0.0002,
      "loss": 0.6742,
      "step": 64
    },
    {
      "epoch": 0.7123287671232876,
      "grad_norm": 1.3675568103790283,
      "learning_rate": 0.0002,
      "loss": 0.8946,
      "step": 65
    },
    {
      "epoch": 0.7232876712328767,
      "grad_norm": 1.3661890029907227,
      "learning_rate": 0.0002,
      "loss": 0.6946,
      "step": 66
    },
    {
      "epoch": 0.7342465753424657,
      "grad_norm": 1.4970860481262207,
      "learning_rate": 0.0002,
      "loss": 0.6293,
      "step": 67
    },
    {
      "epoch": 0.7452054794520548,
      "grad_norm": 1.445917010307312,
      "learning_rate": 0.0002,
      "loss": 0.8058,
      "step": 68
    },
    {
      "epoch": 0.7561643835616438,
      "grad_norm": 1.6117463111877441,
      "learning_rate": 0.0002,
      "loss": 0.7998,
      "step": 69
    },
    {
      "epoch": 0.7671232876712328,
      "grad_norm": 1.6023530960083008,
      "learning_rate": 0.0002,
      "loss": 0.5355,
      "step": 70
    },
    {
      "epoch": 0.7780821917808219,
      "grad_norm": 1.4635958671569824,
      "learning_rate": 0.0002,
      "loss": 0.6999,
      "step": 71
    },
    {
      "epoch": 0.7890410958904109,
      "grad_norm": 1.4061299562454224,
      "learning_rate": 0.0002,
      "loss": 0.6554,
      "step": 72
    },
    {
      "epoch": 0.8,
      "grad_norm": 1.4091109037399292,
      "learning_rate": 0.0002,
      "loss": 0.6972,
      "step": 73
    },
    {
      "epoch": 0.810958904109589,
      "grad_norm": 1.3066381216049194,
      "learning_rate": 0.0002,
      "loss": 0.742,
      "step": 74
    },
    {
      "epoch": 0.821917808219178,
      "grad_norm": 0.9933669567108154,
      "learning_rate": 0.0002,
      "loss": 0.6989,
      "step": 75
    },
    {
      "epoch": 0.8328767123287671,
      "grad_norm": 1.2205321788787842,
      "learning_rate": 0.0002,
      "loss": 0.6398,
      "step": 76
    },
    {
      "epoch": 0.8438356164383561,
      "grad_norm": 1.3536911010742188,
      "learning_rate": 0.0002,
      "loss": 0.5861,
      "step": 77
    },
    {
      "epoch": 0.8547945205479452,
      "grad_norm": 1.5119093656539917,
      "learning_rate": 0.0002,
      "loss": 0.9953,
      "step": 78
    },
    {
      "epoch": 0.8657534246575342,
      "grad_norm": 1.0627142190933228,
      "learning_rate": 0.0002,
      "loss": 0.4492,
      "step": 79
    },
    {
      "epoch": 0.8767123287671232,
      "grad_norm": 1.2815035581588745,
      "learning_rate": 0.0002,
      "loss": 0.7471,
      "step": 80
    },
    {
      "epoch": 0.8876712328767123,
      "grad_norm": 1.376985788345337,
      "learning_rate": 0.0002,
      "loss": 0.8526,
      "step": 81
    },
    {
      "epoch": 0.8986301369863013,
      "grad_norm": 1.3588144779205322,
      "learning_rate": 0.0002,
      "loss": 0.7122,
      "step": 82
    },
    {
      "epoch": 0.9095890410958904,
      "grad_norm": 1.378824234008789,
      "learning_rate": 0.0002,
      "loss": 0.8444,
      "step": 83
    },
    {
      "epoch": 0.9205479452054794,
      "grad_norm": 1.5447663068771362,
      "learning_rate": 0.0002,
      "loss": 0.6788,
      "step": 84
    },
    {
      "epoch": 0.9315068493150684,
      "grad_norm": 1.4500224590301514,
      "learning_rate": 0.0002,
      "loss": 0.5721,
      "step": 85
    },
    {
      "epoch": 0.9424657534246575,
      "grad_norm": 1.0830070972442627,
      "learning_rate": 0.0002,
      "loss": 0.657,
      "step": 86
    },
    {
      "epoch": 0.9534246575342465,
      "grad_norm": 1.3003672361373901,
      "learning_rate": 0.0002,
      "loss": 0.4806,
      "step": 87
    },
    {
      "epoch": 0.9643835616438357,
      "grad_norm": 1.1137444972991943,
      "learning_rate": 0.0002,
      "loss": 0.7444,
      "step": 88
    },
    {
      "epoch": 0.9753424657534246,
      "grad_norm": 1.2204691171646118,
      "learning_rate": 0.0002,
      "loss": 0.7924,
      "step": 89
    },
    {
      "epoch": 0.9863013698630136,
      "grad_norm": 1.3225165605545044,
      "learning_rate": 0.0002,
      "loss": 0.7357,
      "step": 90
    },
    {
      "epoch": 0.9972602739726028,
      "grad_norm": 1.2743207216262817,
      "learning_rate": 0.0002,
      "loss": 0.6903,
      "step": 91
    },
    {
      "epoch": 1.0082191780821919,
      "grad_norm": 1.2072831392288208,
      "learning_rate": 0.0002,
      "loss": 0.4617,
      "step": 92
    },
    {
      "epoch": 1.0191780821917809,
      "grad_norm": 1.0190479755401611,
      "learning_rate": 0.0002,
      "loss": 0.4412,
      "step": 93
    },
    {
      "epoch": 1.0301369863013699,
      "grad_norm": 0.8685715198516846,
      "learning_rate": 0.0002,
      "loss": 0.4422,
      "step": 94
    },
    {
      "epoch": 1.0410958904109588,
      "grad_norm": 0.6671916246414185,
      "learning_rate": 0.0002,
      "loss": 0.2872,
      "step": 95
    },
    {
      "epoch": 1.0520547945205478,
      "grad_norm": 0.8552739024162292,
      "learning_rate": 0.0002,
      "loss": 0.2837,
      "step": 96
    },
    {
      "epoch": 1.063013698630137,
      "grad_norm": 0.8662064075469971,
      "learning_rate": 0.0002,
      "loss": 0.2549,
      "step": 97
    },
    {
      "epoch": 1.073972602739726,
      "grad_norm": 1.6159878969192505,
      "learning_rate": 0.0002,
      "loss": 0.4545,
      "step": 98
    },
    {
      "epoch": 1.084931506849315,
      "grad_norm": 1.0922621488571167,
      "learning_rate": 0.0002,
      "loss": 0.2703,
      "step": 99
    },
    {
      "epoch": 1.095890410958904,
      "grad_norm": 0.9011418223381042,
      "learning_rate": 0.0002,
      "loss": 0.1948,
      "step": 100
    },
    {
      "epoch": 1.106849315068493,
      "grad_norm": 1.094281554222107,
      "learning_rate": 0.0002,
      "loss": 0.2967,
      "step": 101
    },
    {
      "epoch": 1.1178082191780823,
      "grad_norm": 0.9296566843986511,
      "learning_rate": 0.0002,
      "loss": 0.2372,
      "step": 102
    },
    {
      "epoch": 1.1287671232876713,
      "grad_norm": 1.2015409469604492,
      "learning_rate": 0.0002,
      "loss": 0.2724,
      "step": 103
    },
    {
      "epoch": 1.1397260273972603,
      "grad_norm": 1.0707019567489624,
      "learning_rate": 0.0002,
      "loss": 0.242,
      "step": 104
    },
    {
      "epoch": 1.1506849315068493,
      "grad_norm": 1.381605863571167,
      "learning_rate": 0.0002,
      "loss": 0.4983,
      "step": 105
    },
    {
      "epoch": 1.1616438356164385,
      "grad_norm": 1.3150050640106201,
      "learning_rate": 0.0002,
      "loss": 0.3801,
      "step": 106
    },
    {
      "epoch": 1.1726027397260275,
      "grad_norm": 1.2527716159820557,
      "learning_rate": 0.0002,
      "loss": 0.3798,
      "step": 107
    },
    {
      "epoch": 1.1835616438356165,
      "grad_norm": 1.2365212440490723,
      "learning_rate": 0.0002,
      "loss": 0.2736,
      "step": 108
    },
    {
      "epoch": 1.1945205479452055,
      "grad_norm": 1.1183747053146362,
      "learning_rate": 0.0002,
      "loss": 0.4753,
      "step": 109
    },
    {
      "epoch": 1.2054794520547945,
      "grad_norm": 0.8566204905509949,
      "learning_rate": 0.0002,
      "loss": 0.2531,
      "step": 110
    },
    {
      "epoch": 1.2164383561643834,
      "grad_norm": 1.0663121938705444,
      "learning_rate": 0.0002,
      "loss": 0.1986,
      "step": 111
    },
    {
      "epoch": 1.2273972602739727,
      "grad_norm": 1.4607142210006714,
      "learning_rate": 0.0002,
      "loss": 0.3589,
      "step": 112
    },
    {
      "epoch": 1.2383561643835617,
      "grad_norm": 0.7903380990028381,
      "learning_rate": 0.0002,
      "loss": 0.1885,
      "step": 113
    },
    {
      "epoch": 1.2493150684931507,
      "grad_norm": 1.3529448509216309,
      "learning_rate": 0.0002,
      "loss": 0.2417,
      "step": 114
    },
    {
      "epoch": 1.2602739726027397,
      "grad_norm": 1.0445804595947266,
      "learning_rate": 0.0002,
      "loss": 0.2208,
      "step": 115
    },
    {
      "epoch": 1.2712328767123289,
      "grad_norm": 1.0864062309265137,
      "learning_rate": 0.0002,
      "loss": 0.2603,
      "step": 116
    },
    {
      "epoch": 1.2821917808219179,
      "grad_norm": 1.0503292083740234,
      "learning_rate": 0.0002,
      "loss": 0.1478,
      "step": 117
    },
    {
      "epoch": 1.2931506849315069,
      "grad_norm": 1.4396042823791504,
      "learning_rate": 0.0002,
      "loss": 0.2482,
      "step": 118
    },
    {
      "epoch": 1.3041095890410959,
      "grad_norm": 1.7265571355819702,
      "learning_rate": 0.0002,
      "loss": 0.3195,
      "step": 119
    },
    {
      "epoch": 1.3150684931506849,
      "grad_norm": 1.2890552282333374,
      "learning_rate": 0.0002,
      "loss": 0.1891,
      "step": 120
    },
    {
      "epoch": 1.3260273972602739,
      "grad_norm": 1.25291109085083,
      "learning_rate": 0.0002,
      "loss": 0.272,
      "step": 121
    },
    {
      "epoch": 1.336986301369863,
      "grad_norm": 1.3044368028640747,
      "learning_rate": 0.0002,
      "loss": 0.3068,
      "step": 122
    },
    {
      "epoch": 1.347945205479452,
      "grad_norm": 1.7130950689315796,
      "learning_rate": 0.0002,
      "loss": 0.5022,
      "step": 123
    },
    {
      "epoch": 1.358904109589041,
      "grad_norm": 2.3856253623962402,
      "learning_rate": 0.0002,
      "loss": 0.2692,
      "step": 124
    },
    {
      "epoch": 1.36986301369863,
      "grad_norm": 1.2418773174285889,
      "learning_rate": 0.0002,
      "loss": 0.3586,
      "step": 125
    },
    {
      "epoch": 1.3808219178082193,
      "grad_norm": 1.4788987636566162,
      "learning_rate": 0.0002,
      "loss": 0.3331,
      "step": 126
    },
    {
      "epoch": 1.3917808219178083,
      "grad_norm": 0.8837617635726929,
      "learning_rate": 0.0002,
      "loss": 0.2599,
      "step": 127
    },
    {
      "epoch": 1.4027397260273973,
      "grad_norm": 1.1440480947494507,
      "learning_rate": 0.0002,
      "loss": 0.4741,
      "step": 128
    },
    {
      "epoch": 1.4136986301369863,
      "grad_norm": 0.924139142036438,
      "learning_rate": 0.0002,
      "loss": 0.3046,
      "step": 129
    },
    {
      "epoch": 1.4246575342465753,
      "grad_norm": 1.0871144533157349,
      "learning_rate": 0.0002,
      "loss": 0.2887,
      "step": 130
    },
    {
      "epoch": 1.4356164383561643,
      "grad_norm": 0.9994255304336548,
      "learning_rate": 0.0002,
      "loss": 0.2292,
      "step": 131
    },
    {
      "epoch": 1.4465753424657535,
      "grad_norm": 1.2388752698898315,
      "learning_rate": 0.0002,
      "loss": 0.2912,
      "step": 132
    },
    {
      "epoch": 1.4575342465753425,
      "grad_norm": 1.0453673601150513,
      "learning_rate": 0.0002,
      "loss": 0.2324,
      "step": 133
    },
    {
      "epoch": 1.4684931506849315,
      "grad_norm": 1.558586597442627,
      "learning_rate": 0.0002,
      "loss": 0.3854,
      "step": 134
    },
    {
      "epoch": 1.4794520547945205,
      "grad_norm": 1.2428361177444458,
      "learning_rate": 0.0002,
      "loss": 0.2,
      "step": 135
    },
    {
      "epoch": 1.4904109589041097,
      "grad_norm": 1.2706862688064575,
      "learning_rate": 0.0002,
      "loss": 0.1539,
      "step": 136
    },
    {
      "epoch": 1.5013698630136987,
      "grad_norm": 1.4815326929092407,
      "learning_rate": 0.0002,
      "loss": 0.4198,
      "step": 137
    },
    {
      "epoch": 1.5123287671232877,
      "grad_norm": 1.3065235614776611,
      "learning_rate": 0.0002,
      "loss": 0.2996,
      "step": 138
    },
    {
      "epoch": 1.5232876712328767,
      "grad_norm": 1.1650217771530151,
      "learning_rate": 0.0002,
      "loss": 0.2343,
      "step": 139
    },
    {
      "epoch": 1.5342465753424657,
      "grad_norm": 2.339799165725708,
      "learning_rate": 0.0002,
      "loss": 0.3193,
      "step": 140
    },
    {
      "epoch": 1.5452054794520547,
      "grad_norm": 1.2828121185302734,
      "learning_rate": 0.0002,
      "loss": 0.3996,
      "step": 141
    },
    {
      "epoch": 1.5561643835616439,
      "grad_norm": 1.0856819152832031,
      "learning_rate": 0.0002,
      "loss": 0.3242,
      "step": 142
    },
    {
      "epoch": 1.5671232876712329,
      "grad_norm": 1.0250024795532227,
      "learning_rate": 0.0002,
      "loss": 0.23,
      "step": 143
    },
    {
      "epoch": 1.5780821917808219,
      "grad_norm": 0.9548241496086121,
      "learning_rate": 0.0002,
      "loss": 0.1995,
      "step": 144
    },
    {
      "epoch": 1.589041095890411,
      "grad_norm": 0.966123104095459,
      "learning_rate": 0.0002,
      "loss": 0.3443,
      "step": 145
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.8860892057418823,
      "learning_rate": 0.0002,
      "loss": 0.3481,
      "step": 146
    },
    {
      "epoch": 1.610958904109589,
      "grad_norm": 1.1538076400756836,
      "learning_rate": 0.0002,
      "loss": 0.2511,
      "step": 147
    },
    {
      "epoch": 1.621917808219178,
      "grad_norm": 1.4117934703826904,
      "learning_rate": 0.0002,
      "loss": 0.3807,
      "step": 148
    },
    {
      "epoch": 1.632876712328767,
      "grad_norm": 1.4486627578735352,
      "learning_rate": 0.0002,
      "loss": 0.2264,
      "step": 149
    },
    {
      "epoch": 1.643835616438356,
      "grad_norm": 0.643312931060791,
      "learning_rate": 0.0002,
      "loss": 0.0966,
      "step": 150
    }
  ],
  "logging_steps": 1,
  "max_steps": 364,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 50,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.76287234956329e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}