zLlamask / trainer_state.json

Upload 10 files

1340c42 verified 4 months ago

24.3 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.643835616438356,
	"eval_steps": 500,
	"global_step": 150,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.010958904109589041,
	"grad_norm": 5.003701210021973,
	"learning_rate": 0.0002,
	"loss": 1.3947,
	"step": 1
	},
	{
	"epoch": 0.021917808219178082,
	"grad_norm": 10.884932518005371,
	"learning_rate": 0.0002,
	"loss": 0.657,
	"step": 2
	},
	{
	"epoch": 0.03287671232876712,
	"grad_norm": 1.7771106958389282,
	"learning_rate": 0.0002,
	"loss": 1.1432,
	"step": 3
	},
	{
	"epoch": 0.043835616438356165,
	"grad_norm": 2.750317096710205,
	"learning_rate": 0.0002,
	"loss": 0.6564,
	"step": 4
	},
	{
	"epoch": 0.0547945205479452,
	"grad_norm": 1.633827567100525,
	"learning_rate": 0.0002,
	"loss": 0.9156,
	"step": 5
	},
	{
	"epoch": 0.06575342465753424,
	"grad_norm": 1.747514009475708,
	"learning_rate": 0.0002,
	"loss": 0.5935,
	"step": 6
	},
	{
	"epoch": 0.07671232876712329,
	"grad_norm": 1.3404035568237305,
	"learning_rate": 0.0002,
	"loss": 0.5328,
	"step": 7
	},
	{
	"epoch": 0.08767123287671233,
	"grad_norm": 7.1872968673706055,
	"learning_rate": 0.0002,
	"loss": 1.0489,
	"step": 8
	},
	{
	"epoch": 0.09863013698630137,
	"grad_norm": 1.8788518905639648,
	"learning_rate": 0.0002,
	"loss": 0.8711,
	"step": 9
	},
	{
	"epoch": 0.1095890410958904,
	"grad_norm": 1.554734706878662,
	"learning_rate": 0.0002,
	"loss": 0.6025,
	"step": 10
	},
	{
	"epoch": 0.12054794520547946,
	"grad_norm": 1.5181900262832642,
	"learning_rate": 0.0002,
	"loss": 0.9178,
	"step": 11
	},
	{
	"epoch": 0.13150684931506848,
	"grad_norm": 1.74048912525177,
	"learning_rate": 0.0002,
	"loss": 0.8413,
	"step": 12
	},
	{
	"epoch": 0.14246575342465753,
	"grad_norm": 1.5069341659545898,
	"learning_rate": 0.0002,
	"loss": 0.7297,
	"step": 13
	},
	{
	"epoch": 0.15342465753424658,
	"grad_norm": 1.9904654026031494,
	"learning_rate": 0.0002,
	"loss": 0.909,
	"step": 14
	},
	{
	"epoch": 0.1643835616438356,
	"grad_norm": 1.329214334487915,
	"learning_rate": 0.0002,
	"loss": 0.6475,
	"step": 15
	},
	{
	"epoch": 0.17534246575342466,
	"grad_norm": 3.0678200721740723,
	"learning_rate": 0.0002,
	"loss": 0.8995,
	"step": 16
	},
	{
	"epoch": 0.1863013698630137,
	"grad_norm": 1.5679476261138916,
	"learning_rate": 0.0002,
	"loss": 0.6728,
	"step": 17
	},
	{
	"epoch": 0.19726027397260273,
	"grad_norm": 1.196855068206787,
	"learning_rate": 0.0002,
	"loss": 0.4871,
	"step": 18
	},
	{
	"epoch": 0.20821917808219179,
	"grad_norm": 1.8244729042053223,
	"learning_rate": 0.0002,
	"loss": 0.5748,
	"step": 19
	},
	{
	"epoch": 0.2191780821917808,
	"grad_norm": 1.1432626247406006,
	"learning_rate": 0.0002,
	"loss": 0.5982,
	"step": 20
	},
	{
	"epoch": 0.23013698630136986,
	"grad_norm": 1.672303557395935,
	"learning_rate": 0.0002,
	"loss": 0.5639,
	"step": 21
	},
	{
	"epoch": 0.2410958904109589,
	"grad_norm": 1.5940009355545044,
	"learning_rate": 0.0002,
	"loss": 0.6198,
	"step": 22
	},
	{
	"epoch": 0.25205479452054796,
	"grad_norm": 1.710236668586731,
	"learning_rate": 0.0002,
	"loss": 0.8638,
	"step": 23
	},
	{
	"epoch": 0.26301369863013696,
	"grad_norm": 1.7786955833435059,
	"learning_rate": 0.0002,
	"loss": 0.7169,
	"step": 24
	},
	{
	"epoch": 0.273972602739726,
	"grad_norm": 1.2856895923614502,
	"learning_rate": 0.0002,
	"loss": 0.5522,
	"step": 25
	},
	{
	"epoch": 0.28493150684931506,
	"grad_norm": 1.3859294652938843,
	"learning_rate": 0.0002,
	"loss": 0.6687,
	"step": 26
	},
	{
	"epoch": 0.2958904109589041,
	"grad_norm": 1.3184758424758911,
	"learning_rate": 0.0002,
	"loss": 0.6227,
	"step": 27
	},
	{
	"epoch": 0.30684931506849317,
	"grad_norm": 1.3072282075881958,
	"learning_rate": 0.0002,
	"loss": 0.4417,
	"step": 28
	},
	{
	"epoch": 0.3178082191780822,
	"grad_norm": 1.7866010665893555,
	"learning_rate": 0.0002,
	"loss": 0.7648,
	"step": 29
	},
	{
	"epoch": 0.3287671232876712,
	"grad_norm": 1.0981870889663696,
	"learning_rate": 0.0002,
	"loss": 0.6496,
	"step": 30
	},
	{
	"epoch": 0.33972602739726027,
	"grad_norm": 1.586014747619629,
	"learning_rate": 0.0002,
	"loss": 0.7389,
	"step": 31
	},
	{
	"epoch": 0.3506849315068493,
	"grad_norm": 1.4227101802825928,
	"learning_rate": 0.0002,
	"loss": 0.7764,
	"step": 32
	},
	{
	"epoch": 0.36164383561643837,
	"grad_norm": 1.0654901266098022,
	"learning_rate": 0.0002,
	"loss": 0.7015,
	"step": 33
	},
	{
	"epoch": 0.3726027397260274,
	"grad_norm": 1.22892427444458,
	"learning_rate": 0.0002,
	"loss": 0.6085,
	"step": 34
	},
	{
	"epoch": 0.3835616438356164,
	"grad_norm": 1.1209129095077515,
	"learning_rate": 0.0002,
	"loss": 0.334,
	"step": 35
	},
	{
	"epoch": 0.39452054794520547,
	"grad_norm": 1.3106253147125244,
	"learning_rate": 0.0002,
	"loss": 0.6377,
	"step": 36
	},
	{
	"epoch": 0.4054794520547945,
	"grad_norm": 1.1807057857513428,
	"learning_rate": 0.0002,
	"loss": 0.4686,
	"step": 37
	},
	{
	"epoch": 0.41643835616438357,
	"grad_norm": 1.1512413024902344,
	"learning_rate": 0.0002,
	"loss": 0.5934,
	"step": 38
	},
	{
	"epoch": 0.4273972602739726,
	"grad_norm": 1.2861087322235107,
	"learning_rate": 0.0002,
	"loss": 0.4494,
	"step": 39
	},
	{
	"epoch": 0.4383561643835616,
	"grad_norm": 1.6408014297485352,
	"learning_rate": 0.0002,
	"loss": 0.8894,
	"step": 40
	},
	{
	"epoch": 0.44931506849315067,
	"grad_norm": 1.1644452810287476,
	"learning_rate": 0.0002,
	"loss": 0.9024,
	"step": 41
	},
	{
	"epoch": 0.4602739726027397,
	"grad_norm": 0.989621639251709,
	"learning_rate": 0.0002,
	"loss": 0.4873,
	"step": 42
	},
	{
	"epoch": 0.4712328767123288,
	"grad_norm": 1.2218654155731201,
	"learning_rate": 0.0002,
	"loss": 0.7415,
	"step": 43
	},
	{
	"epoch": 0.4821917808219178,
	"grad_norm": 1.2144018411636353,
	"learning_rate": 0.0002,
	"loss": 0.6073,
	"step": 44
	},
	{
	"epoch": 0.4931506849315068,
	"grad_norm": 1.5843247175216675,
	"learning_rate": 0.0002,
	"loss": 0.8353,
	"step": 45
	},
	{
	"epoch": 0.5041095890410959,
	"grad_norm": 1.3587316274642944,
	"learning_rate": 0.0002,
	"loss": 0.5154,
	"step": 46
	},
	{
	"epoch": 0.5150684931506849,
	"grad_norm": 1.173448085784912,
	"learning_rate": 0.0002,
	"loss": 0.616,
	"step": 47
	},
	{
	"epoch": 0.5260273972602739,
	"grad_norm": 1.6074247360229492,
	"learning_rate": 0.0002,
	"loss": 0.8318,
	"step": 48
	},
	{
	"epoch": 0.536986301369863,
	"grad_norm": 1.0739307403564453,
	"learning_rate": 0.0002,
	"loss": 0.5982,
	"step": 49
	},
	{
	"epoch": 0.547945205479452,
	"grad_norm": 1.330855131149292,
	"learning_rate": 0.0002,
	"loss": 0.5309,
	"step": 50
	},
	{
	"epoch": 0.5589041095890411,
	"grad_norm": 1.5128343105316162,
	"learning_rate": 0.0002,
	"loss": 0.5063,
	"step": 51
	},
	{
	"epoch": 0.5698630136986301,
	"grad_norm": 1.5110679864883423,
	"learning_rate": 0.0002,
	"loss": 0.5551,
	"step": 52
	},
	{
	"epoch": 0.5808219178082191,
	"grad_norm": 2.263357639312744,
	"learning_rate": 0.0002,
	"loss": 0.6387,
	"step": 53
	},
	{
	"epoch": 0.5917808219178082,
	"grad_norm": 1.3241772651672363,
	"learning_rate": 0.0002,
	"loss": 0.8482,
	"step": 54
	},
	{
	"epoch": 0.6027397260273972,
	"grad_norm": 1.246489405632019,
	"learning_rate": 0.0002,
	"loss": 0.7622,
	"step": 55
	},
	{
	"epoch": 0.6136986301369863,
	"grad_norm": 1.2963398694992065,
	"learning_rate": 0.0002,
	"loss": 0.6943,
	"step": 56
	},
	{
	"epoch": 0.6246575342465753,
	"grad_norm": 1.116220474243164,
	"learning_rate": 0.0002,
	"loss": 0.6305,
	"step": 57
	},
	{
	"epoch": 0.6356164383561644,
	"grad_norm": 1.4782965183258057,
	"learning_rate": 0.0002,
	"loss": 0.7089,
	"step": 58
	},
	{
	"epoch": 0.6465753424657534,
	"grad_norm": 1.207879662513733,
	"learning_rate": 0.0002,
	"loss": 0.8837,
	"step": 59
	},
	{
	"epoch": 0.6575342465753424,
	"grad_norm": 1.0886225700378418,
	"learning_rate": 0.0002,
	"loss": 0.7521,
	"step": 60
	},
	{
	"epoch": 0.6684931506849315,
	"grad_norm": 1.1209737062454224,
	"learning_rate": 0.0002,
	"loss": 0.6905,
	"step": 61
	},
	{
	"epoch": 0.6794520547945205,
	"grad_norm": 1.732853889465332,
	"learning_rate": 0.0002,
	"loss": 0.6397,
	"step": 62
	},
	{
	"epoch": 0.6904109589041096,
	"grad_norm": 1.2688523530960083,
	"learning_rate": 0.0002,
	"loss": 0.647,
	"step": 63
	},
	{
	"epoch": 0.7013698630136986,
	"grad_norm": 1.3005374670028687,
	"learning_rate": 0.0002,
	"loss": 0.6742,
	"step": 64
	},
	{
	"epoch": 0.7123287671232876,
	"grad_norm": 1.3675568103790283,
	"learning_rate": 0.0002,
	"loss": 0.8946,
	"step": 65
	},
	{
	"epoch": 0.7232876712328767,
	"grad_norm": 1.3661890029907227,
	"learning_rate": 0.0002,
	"loss": 0.6946,
	"step": 66
	},
	{
	"epoch": 0.7342465753424657,
	"grad_norm": 1.4970860481262207,
	"learning_rate": 0.0002,
	"loss": 0.6293,
	"step": 67
	},
	{
	"epoch": 0.7452054794520548,
	"grad_norm": 1.445917010307312,
	"learning_rate": 0.0002,
	"loss": 0.8058,
	"step": 68
	},
	{
	"epoch": 0.7561643835616438,
	"grad_norm": 1.6117463111877441,
	"learning_rate": 0.0002,
	"loss": 0.7998,
	"step": 69
	},
	{
	"epoch": 0.7671232876712328,
	"grad_norm": 1.6023530960083008,
	"learning_rate": 0.0002,
	"loss": 0.5355,
	"step": 70
	},
	{
	"epoch": 0.7780821917808219,
	"grad_norm": 1.4635958671569824,
	"learning_rate": 0.0002,
	"loss": 0.6999,
	"step": 71
	},
	{
	"epoch": 0.7890410958904109,
	"grad_norm": 1.4061299562454224,
	"learning_rate": 0.0002,
	"loss": 0.6554,
	"step": 72
	},
	{
	"epoch": 0.8,
	"grad_norm": 1.4091109037399292,
	"learning_rate": 0.0002,
	"loss": 0.6972,
	"step": 73
	},
	{
	"epoch": 0.810958904109589,
	"grad_norm": 1.3066381216049194,
	"learning_rate": 0.0002,
	"loss": 0.742,
	"step": 74
	},
	{
	"epoch": 0.821917808219178,
	"grad_norm": 0.9933669567108154,
	"learning_rate": 0.0002,
	"loss": 0.6989,
	"step": 75
	},
	{
	"epoch": 0.8328767123287671,
	"grad_norm": 1.2205321788787842,
	"learning_rate": 0.0002,
	"loss": 0.6398,
	"step": 76
	},
	{
	"epoch": 0.8438356164383561,
	"grad_norm": 1.3536911010742188,
	"learning_rate": 0.0002,
	"loss": 0.5861,
	"step": 77
	},
	{
	"epoch": 0.8547945205479452,
	"grad_norm": 1.5119093656539917,
	"learning_rate": 0.0002,
	"loss": 0.9953,
	"step": 78
	},
	{
	"epoch": 0.8657534246575342,
	"grad_norm": 1.0627142190933228,
	"learning_rate": 0.0002,
	"loss": 0.4492,
	"step": 79
	},
	{
	"epoch": 0.8767123287671232,
	"grad_norm": 1.2815035581588745,
	"learning_rate": 0.0002,
	"loss": 0.7471,
	"step": 80
	},
	{
	"epoch": 0.8876712328767123,
	"grad_norm": 1.376985788345337,
	"learning_rate": 0.0002,
	"loss": 0.8526,
	"step": 81
	},
	{
	"epoch": 0.8986301369863013,
	"grad_norm": 1.3588144779205322,
	"learning_rate": 0.0002,
	"loss": 0.7122,
	"step": 82
	},
	{
	"epoch": 0.9095890410958904,
	"grad_norm": 1.378824234008789,
	"learning_rate": 0.0002,
	"loss": 0.8444,
	"step": 83
	},
	{
	"epoch": 0.9205479452054794,
	"grad_norm": 1.5447663068771362,
	"learning_rate": 0.0002,
	"loss": 0.6788,
	"step": 84
	},
	{
	"epoch": 0.9315068493150684,
	"grad_norm": 1.4500224590301514,
	"learning_rate": 0.0002,
	"loss": 0.5721,
	"step": 85
	},
	{
	"epoch": 0.9424657534246575,
	"grad_norm": 1.0830070972442627,
	"learning_rate": 0.0002,
	"loss": 0.657,
	"step": 86
	},
	{
	"epoch": 0.9534246575342465,
	"grad_norm": 1.3003672361373901,
	"learning_rate": 0.0002,
	"loss": 0.4806,
	"step": 87
	},
	{
	"epoch": 0.9643835616438357,
	"grad_norm": 1.1137444972991943,
	"learning_rate": 0.0002,
	"loss": 0.7444,
	"step": 88
	},
	{
	"epoch": 0.9753424657534246,
	"grad_norm": 1.2204691171646118,
	"learning_rate": 0.0002,
	"loss": 0.7924,
	"step": 89
	},
	{
	"epoch": 0.9863013698630136,
	"grad_norm": 1.3225165605545044,
	"learning_rate": 0.0002,
	"loss": 0.7357,
	"step": 90
	},
	{
	"epoch": 0.9972602739726028,
	"grad_norm": 1.2743207216262817,
	"learning_rate": 0.0002,
	"loss": 0.6903,
	"step": 91
	},
	{
	"epoch": 1.0082191780821919,
	"grad_norm": 1.2072831392288208,
	"learning_rate": 0.0002,
	"loss": 0.4617,
	"step": 92
	},
	{
	"epoch": 1.0191780821917809,
	"grad_norm": 1.0190479755401611,
	"learning_rate": 0.0002,
	"loss": 0.4412,
	"step": 93
	},
	{
	"epoch": 1.0301369863013699,
	"grad_norm": 0.8685715198516846,
	"learning_rate": 0.0002,
	"loss": 0.4422,
	"step": 94
	},
	{
	"epoch": 1.0410958904109588,
	"grad_norm": 0.6671916246414185,
	"learning_rate": 0.0002,
	"loss": 0.2872,
	"step": 95
	},
	{
	"epoch": 1.0520547945205478,
	"grad_norm": 0.8552739024162292,
	"learning_rate": 0.0002,
	"loss": 0.2837,
	"step": 96
	},
	{
	"epoch": 1.063013698630137,
	"grad_norm": 0.8662064075469971,
	"learning_rate": 0.0002,
	"loss": 0.2549,
	"step": 97
	},
	{
	"epoch": 1.073972602739726,
	"grad_norm": 1.6159878969192505,
	"learning_rate": 0.0002,
	"loss": 0.4545,
	"step": 98
	},
	{
	"epoch": 1.084931506849315,
	"grad_norm": 1.0922621488571167,
	"learning_rate": 0.0002,
	"loss": 0.2703,
	"step": 99
	},
	{
	"epoch": 1.095890410958904,
	"grad_norm": 0.9011418223381042,
	"learning_rate": 0.0002,
	"loss": 0.1948,
	"step": 100
	},
	{
	"epoch": 1.106849315068493,
	"grad_norm": 1.094281554222107,
	"learning_rate": 0.0002,
	"loss": 0.2967,
	"step": 101
	},
	{
	"epoch": 1.1178082191780823,
	"grad_norm": 0.9296566843986511,
	"learning_rate": 0.0002,
	"loss": 0.2372,
	"step": 102
	},
	{
	"epoch": 1.1287671232876713,
	"grad_norm": 1.2015409469604492,
	"learning_rate": 0.0002,
	"loss": 0.2724,
	"step": 103
	},
	{
	"epoch": 1.1397260273972603,
	"grad_norm": 1.0707019567489624,
	"learning_rate": 0.0002,
	"loss": 0.242,
	"step": 104
	},
	{
	"epoch": 1.1506849315068493,
	"grad_norm": 1.381605863571167,
	"learning_rate": 0.0002,
	"loss": 0.4983,
	"step": 105
	},
	{
	"epoch": 1.1616438356164385,
	"grad_norm": 1.3150050640106201,
	"learning_rate": 0.0002,
	"loss": 0.3801,
	"step": 106
	},
	{
	"epoch": 1.1726027397260275,
	"grad_norm": 1.2527716159820557,
	"learning_rate": 0.0002,
	"loss": 0.3798,
	"step": 107
	},
	{
	"epoch": 1.1835616438356165,
	"grad_norm": 1.2365212440490723,
	"learning_rate": 0.0002,
	"loss": 0.2736,
	"step": 108
	},
	{
	"epoch": 1.1945205479452055,
	"grad_norm": 1.1183747053146362,
	"learning_rate": 0.0002,
	"loss": 0.4753,
	"step": 109
	},
	{
	"epoch": 1.2054794520547945,
	"grad_norm": 0.8566204905509949,
	"learning_rate": 0.0002,
	"loss": 0.2531,
	"step": 110
	},
	{
	"epoch": 1.2164383561643834,
	"grad_norm": 1.0663121938705444,
	"learning_rate": 0.0002,
	"loss": 0.1986,
	"step": 111
	},
	{
	"epoch": 1.2273972602739727,
	"grad_norm": 1.4607142210006714,
	"learning_rate": 0.0002,
	"loss": 0.3589,
	"step": 112
	},
	{
	"epoch": 1.2383561643835617,
	"grad_norm": 0.7903380990028381,
	"learning_rate": 0.0002,
	"loss": 0.1885,
	"step": 113
	},
	{
	"epoch": 1.2493150684931507,
	"grad_norm": 1.3529448509216309,
	"learning_rate": 0.0002,
	"loss": 0.2417,
	"step": 114
	},
	{
	"epoch": 1.2602739726027397,
	"grad_norm": 1.0445804595947266,
	"learning_rate": 0.0002,
	"loss": 0.2208,
	"step": 115
	},
	{
	"epoch": 1.2712328767123289,
	"grad_norm": 1.0864062309265137,
	"learning_rate": 0.0002,
	"loss": 0.2603,
	"step": 116
	},
	{
	"epoch": 1.2821917808219179,
	"grad_norm": 1.0503292083740234,
	"learning_rate": 0.0002,
	"loss": 0.1478,
	"step": 117
	},
	{
	"epoch": 1.2931506849315069,
	"grad_norm": 1.4396042823791504,
	"learning_rate": 0.0002,
	"loss": 0.2482,
	"step": 118
	},
	{
	"epoch": 1.3041095890410959,
	"grad_norm": 1.7265571355819702,
	"learning_rate": 0.0002,
	"loss": 0.3195,
	"step": 119
	},
	{
	"epoch": 1.3150684931506849,
	"grad_norm": 1.2890552282333374,
	"learning_rate": 0.0002,
	"loss": 0.1891,
	"step": 120
	},
	{
	"epoch": 1.3260273972602739,
	"grad_norm": 1.25291109085083,
	"learning_rate": 0.0002,
	"loss": 0.272,
	"step": 121
	},
	{
	"epoch": 1.336986301369863,
	"grad_norm": 1.3044368028640747,
	"learning_rate": 0.0002,
	"loss": 0.3068,
	"step": 122
	},
	{
	"epoch": 1.347945205479452,
	"grad_norm": 1.7130950689315796,
	"learning_rate": 0.0002,
	"loss": 0.5022,
	"step": 123
	},
	{
	"epoch": 1.358904109589041,
	"grad_norm": 2.3856253623962402,
	"learning_rate": 0.0002,
	"loss": 0.2692,
	"step": 124
	},
	{
	"epoch": 1.36986301369863,
	"grad_norm": 1.2418773174285889,
	"learning_rate": 0.0002,
	"loss": 0.3586,
	"step": 125
	},
	{
	"epoch": 1.3808219178082193,
	"grad_norm": 1.4788987636566162,
	"learning_rate": 0.0002,
	"loss": 0.3331,
	"step": 126
	},
	{
	"epoch": 1.3917808219178083,
	"grad_norm": 0.8837617635726929,
	"learning_rate": 0.0002,
	"loss": 0.2599,
	"step": 127
	},
	{
	"epoch": 1.4027397260273973,
	"grad_norm": 1.1440480947494507,
	"learning_rate": 0.0002,
	"loss": 0.4741,
	"step": 128
	},
	{
	"epoch": 1.4136986301369863,
	"grad_norm": 0.924139142036438,
	"learning_rate": 0.0002,
	"loss": 0.3046,
	"step": 129
	},
	{
	"epoch": 1.4246575342465753,
	"grad_norm": 1.0871144533157349,
	"learning_rate": 0.0002,
	"loss": 0.2887,
	"step": 130
	},
	{
	"epoch": 1.4356164383561643,
	"grad_norm": 0.9994255304336548,
	"learning_rate": 0.0002,
	"loss": 0.2292,
	"step": 131
	},
	{
	"epoch": 1.4465753424657535,
	"grad_norm": 1.2388752698898315,
	"learning_rate": 0.0002,
	"loss": 0.2912,
	"step": 132
	},
	{
	"epoch": 1.4575342465753425,
	"grad_norm": 1.0453673601150513,
	"learning_rate": 0.0002,
	"loss": 0.2324,
	"step": 133
	},
	{
	"epoch": 1.4684931506849315,
	"grad_norm": 1.558586597442627,
	"learning_rate": 0.0002,
	"loss": 0.3854,
	"step": 134
	},
	{
	"epoch": 1.4794520547945205,
	"grad_norm": 1.2428361177444458,
	"learning_rate": 0.0002,
	"loss": 0.2,
	"step": 135
	},
	{
	"epoch": 1.4904109589041097,
	"grad_norm": 1.2706862688064575,
	"learning_rate": 0.0002,
	"loss": 0.1539,
	"step": 136
	},
	{
	"epoch": 1.5013698630136987,
	"grad_norm": 1.4815326929092407,
	"learning_rate": 0.0002,
	"loss": 0.4198,
	"step": 137
	},
	{
	"epoch": 1.5123287671232877,
	"grad_norm": 1.3065235614776611,
	"learning_rate": 0.0002,
	"loss": 0.2996,
	"step": 138
	},
	{
	"epoch": 1.5232876712328767,
	"grad_norm": 1.1650217771530151,
	"learning_rate": 0.0002,
	"loss": 0.2343,
	"step": 139
	},
	{
	"epoch": 1.5342465753424657,
	"grad_norm": 2.339799165725708,
	"learning_rate": 0.0002,
	"loss": 0.3193,
	"step": 140
	},
	{
	"epoch": 1.5452054794520547,
	"grad_norm": 1.2828121185302734,
	"learning_rate": 0.0002,
	"loss": 0.3996,
	"step": 141
	},
	{
	"epoch": 1.5561643835616439,
	"grad_norm": 1.0856819152832031,
	"learning_rate": 0.0002,
	"loss": 0.3242,
	"step": 142
	},
	{
	"epoch": 1.5671232876712329,
	"grad_norm": 1.0250024795532227,
	"learning_rate": 0.0002,
	"loss": 0.23,
	"step": 143
	},
	{
	"epoch": 1.5780821917808219,
	"grad_norm": 0.9548241496086121,
	"learning_rate": 0.0002,
	"loss": 0.1995,
	"step": 144
	},
	{
	"epoch": 1.589041095890411,
	"grad_norm": 0.966123104095459,
	"learning_rate": 0.0002,
	"loss": 0.3443,
	"step": 145
	},
	{
	"epoch": 1.6,
	"grad_norm": 1.8860892057418823,
	"learning_rate": 0.0002,
	"loss": 0.3481,
	"step": 146
	},
	{
	"epoch": 1.610958904109589,
	"grad_norm": 1.1538076400756836,
	"learning_rate": 0.0002,
	"loss": 0.2511,
	"step": 147
	},
	{
	"epoch": 1.621917808219178,
	"grad_norm": 1.4117934703826904,
	"learning_rate": 0.0002,
	"loss": 0.3807,
	"step": 148
	},
	{
	"epoch": 1.632876712328767,
	"grad_norm": 1.4486627578735352,
	"learning_rate": 0.0002,
	"loss": 0.2264,
	"step": 149
	},
	{
	"epoch": 1.643835616438356,
	"grad_norm": 0.643312931060791,
	"learning_rate": 0.0002,
	"loss": 0.0966,
	"step": 150
	}
	],
	"logging_steps": 1,
	"max_steps": 364,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 4,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.76287234956329e+16,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}