t5-base-qlora-finetune-tweetsumm / trainer_state.json
samuellimabraz's picture
End of training
f16d96e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 330,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00909090909090909,
"grad_norm": 4.071784019470215,
"learning_rate": 0.00025,
"loss": 3.1182,
"step": 1
},
{
"epoch": 0.01818181818181818,
"grad_norm": 4.663088321685791,
"learning_rate": 0.0005,
"loss": 3.4235,
"step": 2
},
{
"epoch": 0.02727272727272727,
"grad_norm": 3.732215404510498,
"learning_rate": 0.0004984756097560976,
"loss": 3.1436,
"step": 3
},
{
"epoch": 0.03636363636363636,
"grad_norm": 3.961653232574463,
"learning_rate": 0.0004969512195121951,
"loss": 3.167,
"step": 4
},
{
"epoch": 0.045454545454545456,
"grad_norm": 3.813910484313965,
"learning_rate": 0.0004954268292682927,
"loss": 3.2398,
"step": 5
},
{
"epoch": 0.05454545454545454,
"grad_norm": 3.4221489429473877,
"learning_rate": 0.0004939024390243902,
"loss": 3.0624,
"step": 6
},
{
"epoch": 0.06363636363636363,
"grad_norm": 2.992903232574463,
"learning_rate": 0.0004923780487804878,
"loss": 2.2161,
"step": 7
},
{
"epoch": 0.07272727272727272,
"grad_norm": 4.314737319946289,
"learning_rate": 0.0004908536585365854,
"loss": 3.2214,
"step": 8
},
{
"epoch": 0.08181818181818182,
"grad_norm": 3.5429513454437256,
"learning_rate": 0.0004893292682926829,
"loss": 2.6857,
"step": 9
},
{
"epoch": 0.09090909090909091,
"grad_norm": NaN,
"learning_rate": 0.0004893292682926829,
"loss": 2.4876,
"step": 10
},
{
"epoch": 0.1,
"grad_norm": 3.5051212310791016,
"learning_rate": 0.0004878048780487805,
"loss": 2.6111,
"step": 11
},
{
"epoch": 0.10909090909090909,
"grad_norm": 4.561686038970947,
"learning_rate": 0.00048628048780487805,
"loss": 2.8702,
"step": 12
},
{
"epoch": 0.11818181818181818,
"grad_norm": 3.360241413116455,
"learning_rate": 0.0004847560975609756,
"loss": 2.3469,
"step": 13
},
{
"epoch": 0.12727272727272726,
"grad_norm": 4.544058799743652,
"learning_rate": 0.00048323170731707317,
"loss": 2.4873,
"step": 14
},
{
"epoch": 0.13636363636363635,
"grad_norm": 3.3817787170410156,
"learning_rate": 0.0004817073170731707,
"loss": 2.5345,
"step": 15
},
{
"epoch": 0.14545454545454545,
"grad_norm": 3.405874013900757,
"learning_rate": 0.0004801829268292683,
"loss": 2.1892,
"step": 16
},
{
"epoch": 0.15454545454545454,
"grad_norm": 3.5852108001708984,
"learning_rate": 0.00047865853658536585,
"loss": 2.1381,
"step": 17
},
{
"epoch": 0.16363636363636364,
"grad_norm": 3.0975358486175537,
"learning_rate": 0.0004771341463414634,
"loss": 2.3727,
"step": 18
},
{
"epoch": 0.17272727272727273,
"grad_norm": 3.514904260635376,
"learning_rate": 0.00047560975609756096,
"loss": 2.4526,
"step": 19
},
{
"epoch": 0.18181818181818182,
"grad_norm": 4.176087379455566,
"learning_rate": 0.0004740853658536585,
"loss": 2.2999,
"step": 20
},
{
"epoch": 0.19090909090909092,
"grad_norm": 2.7949447631835938,
"learning_rate": 0.0004725609756097561,
"loss": 2.2605,
"step": 21
},
{
"epoch": 0.2,
"grad_norm": 3.023271322250366,
"learning_rate": 0.00047103658536585364,
"loss": 2.2398,
"step": 22
},
{
"epoch": 0.20909090909090908,
"grad_norm": 3.608038902282715,
"learning_rate": 0.0004695121951219512,
"loss": 2.2346,
"step": 23
},
{
"epoch": 0.21818181818181817,
"grad_norm": 2.7044689655303955,
"learning_rate": 0.0004679878048780488,
"loss": 1.8128,
"step": 24
},
{
"epoch": 0.22727272727272727,
"grad_norm": 2.9385201930999756,
"learning_rate": 0.0004664634146341464,
"loss": 1.9915,
"step": 25
},
{
"epoch": 0.23636363636363636,
"grad_norm": 2.5581564903259277,
"learning_rate": 0.00046493902439024394,
"loss": 2.0105,
"step": 26
},
{
"epoch": 0.24545454545454545,
"grad_norm": 3.774770498275757,
"learning_rate": 0.0004634146341463415,
"loss": 2.4997,
"step": 27
},
{
"epoch": 0.2545454545454545,
"grad_norm": 3.650287628173828,
"learning_rate": 0.00046189024390243906,
"loss": 2.6754,
"step": 28
},
{
"epoch": 0.2636363636363636,
"grad_norm": 3.0995235443115234,
"learning_rate": 0.0004603658536585366,
"loss": 2.5039,
"step": 29
},
{
"epoch": 0.2727272727272727,
"grad_norm": 2.6960413455963135,
"learning_rate": 0.0004588414634146342,
"loss": 2.2671,
"step": 30
},
{
"epoch": 0.2818181818181818,
"grad_norm": 2.8800947666168213,
"learning_rate": 0.00045731707317073173,
"loss": 1.9923,
"step": 31
},
{
"epoch": 0.2909090909090909,
"grad_norm": 2.768280506134033,
"learning_rate": 0.0004557926829268293,
"loss": 2.323,
"step": 32
},
{
"epoch": 0.3,
"grad_norm": 3.0355064868927,
"learning_rate": 0.00045426829268292685,
"loss": 2.1258,
"step": 33
},
{
"epoch": 0.3090909090909091,
"grad_norm": 2.860558032989502,
"learning_rate": 0.0004527439024390244,
"loss": 2.2574,
"step": 34
},
{
"epoch": 0.3181818181818182,
"grad_norm": 3.0588767528533936,
"learning_rate": 0.00045121951219512197,
"loss": 2.2413,
"step": 35
},
{
"epoch": 0.32727272727272727,
"grad_norm": 3.065910577774048,
"learning_rate": 0.00044969512195121953,
"loss": 1.899,
"step": 36
},
{
"epoch": 0.33636363636363636,
"grad_norm": 2.7733941078186035,
"learning_rate": 0.0004481707317073171,
"loss": 2.1158,
"step": 37
},
{
"epoch": 0.34545454545454546,
"grad_norm": 2.811892509460449,
"learning_rate": 0.00044664634146341465,
"loss": 2.48,
"step": 38
},
{
"epoch": 0.35454545454545455,
"grad_norm": 2.3305535316467285,
"learning_rate": 0.0004451219512195122,
"loss": 2.0319,
"step": 39
},
{
"epoch": 0.36363636363636365,
"grad_norm": 2.5622308254241943,
"learning_rate": 0.00044359756097560977,
"loss": 1.9229,
"step": 40
},
{
"epoch": 0.37272727272727274,
"grad_norm": 3.1169259548187256,
"learning_rate": 0.00044207317073170733,
"loss": 2.3218,
"step": 41
},
{
"epoch": 0.38181818181818183,
"grad_norm": 2.607619285583496,
"learning_rate": 0.0004405487804878049,
"loss": 2.5146,
"step": 42
},
{
"epoch": 0.39090909090909093,
"grad_norm": 3.1160788536071777,
"learning_rate": 0.00043902439024390245,
"loss": 2.234,
"step": 43
},
{
"epoch": 0.4,
"grad_norm": 3.0238115787506104,
"learning_rate": 0.0004375,
"loss": 1.6721,
"step": 44
},
{
"epoch": 0.4090909090909091,
"grad_norm": 2.4802684783935547,
"learning_rate": 0.00043597560975609757,
"loss": 1.7682,
"step": 45
},
{
"epoch": 0.41818181818181815,
"grad_norm": 2.3478028774261475,
"learning_rate": 0.00043445121951219513,
"loss": 1.7369,
"step": 46
},
{
"epoch": 0.42727272727272725,
"grad_norm": 3.590702772140503,
"learning_rate": 0.0004329268292682927,
"loss": 2.2643,
"step": 47
},
{
"epoch": 0.43636363636363634,
"grad_norm": 2.9007341861724854,
"learning_rate": 0.00043140243902439025,
"loss": 2.0415,
"step": 48
},
{
"epoch": 0.44545454545454544,
"grad_norm": 2.371359348297119,
"learning_rate": 0.0004298780487804878,
"loss": 1.7783,
"step": 49
},
{
"epoch": 0.45454545454545453,
"grad_norm": 2.953730344772339,
"learning_rate": 0.00042835365853658537,
"loss": 2.3273,
"step": 50
},
{
"epoch": 0.4636363636363636,
"grad_norm": 3.101320266723633,
"learning_rate": 0.0004268292682926829,
"loss": 2.2416,
"step": 51
},
{
"epoch": 0.4727272727272727,
"grad_norm": 2.7180256843566895,
"learning_rate": 0.0004253048780487805,
"loss": 2.3769,
"step": 52
},
{
"epoch": 0.4818181818181818,
"grad_norm": 3.001201868057251,
"learning_rate": 0.00042378048780487805,
"loss": 2.189,
"step": 53
},
{
"epoch": 0.4909090909090909,
"grad_norm": 3.2738707065582275,
"learning_rate": 0.0004222560975609756,
"loss": 2.2839,
"step": 54
},
{
"epoch": 0.5,
"grad_norm": 2.4475655555725098,
"learning_rate": 0.00042073170731707316,
"loss": 1.8438,
"step": 55
},
{
"epoch": 0.509090909090909,
"grad_norm": 2.2483181953430176,
"learning_rate": 0.0004192073170731707,
"loss": 1.6211,
"step": 56
},
{
"epoch": 0.5181818181818182,
"grad_norm": 3.2838337421417236,
"learning_rate": 0.0004176829268292683,
"loss": 2.255,
"step": 57
},
{
"epoch": 0.5272727272727272,
"grad_norm": 2.5919408798217773,
"learning_rate": 0.00041615853658536584,
"loss": 2.0132,
"step": 58
},
{
"epoch": 0.5363636363636364,
"grad_norm": 2.7850112915039062,
"learning_rate": 0.0004146341463414634,
"loss": 2.1505,
"step": 59
},
{
"epoch": 0.5454545454545454,
"grad_norm": 2.3396100997924805,
"learning_rate": 0.00041310975609756096,
"loss": 1.8747,
"step": 60
},
{
"epoch": 0.5545454545454546,
"grad_norm": 2.788200616836548,
"learning_rate": 0.0004115853658536585,
"loss": 2.305,
"step": 61
},
{
"epoch": 0.5636363636363636,
"grad_norm": 2.750195264816284,
"learning_rate": 0.0004100609756097561,
"loss": 1.8235,
"step": 62
},
{
"epoch": 0.5727272727272728,
"grad_norm": 3.041684865951538,
"learning_rate": 0.00040853658536585364,
"loss": 2.1002,
"step": 63
},
{
"epoch": 0.5818181818181818,
"grad_norm": 3.0440473556518555,
"learning_rate": 0.0004070121951219512,
"loss": 1.7584,
"step": 64
},
{
"epoch": 0.5909090909090909,
"grad_norm": 3.090240955352783,
"learning_rate": 0.0004054878048780488,
"loss": 1.8738,
"step": 65
},
{
"epoch": 0.6,
"grad_norm": 2.376418352127075,
"learning_rate": 0.0004039634146341464,
"loss": 1.8655,
"step": 66
},
{
"epoch": 0.6090909090909091,
"grad_norm": 2.83278489112854,
"learning_rate": 0.00040243902439024393,
"loss": 2.6537,
"step": 67
},
{
"epoch": 0.6181818181818182,
"grad_norm": 3.2327969074249268,
"learning_rate": 0.0004009146341463415,
"loss": 1.9686,
"step": 68
},
{
"epoch": 0.6272727272727273,
"grad_norm": 2.8755669593811035,
"learning_rate": 0.00039939024390243905,
"loss": 2.1451,
"step": 69
},
{
"epoch": 0.6363636363636364,
"grad_norm": 2.6152892112731934,
"learning_rate": 0.0003978658536585366,
"loss": 1.5981,
"step": 70
},
{
"epoch": 0.6454545454545455,
"grad_norm": 3.076869010925293,
"learning_rate": 0.0003963414634146342,
"loss": 1.9654,
"step": 71
},
{
"epoch": 0.6545454545454545,
"grad_norm": 2.6173629760742188,
"learning_rate": 0.00039481707317073173,
"loss": 2.0392,
"step": 72
},
{
"epoch": 0.6636363636363637,
"grad_norm": 3.449314594268799,
"learning_rate": 0.0003932926829268293,
"loss": 2.0893,
"step": 73
},
{
"epoch": 0.6727272727272727,
"grad_norm": 2.5918149948120117,
"learning_rate": 0.00039176829268292685,
"loss": 1.969,
"step": 74
},
{
"epoch": 0.6818181818181818,
"grad_norm": 2.780331611633301,
"learning_rate": 0.0003902439024390244,
"loss": 2.0783,
"step": 75
},
{
"epoch": 0.6909090909090909,
"grad_norm": 2.8215320110321045,
"learning_rate": 0.00038871951219512197,
"loss": 1.9011,
"step": 76
},
{
"epoch": 0.7,
"grad_norm": 2.9159982204437256,
"learning_rate": 0.00038719512195121953,
"loss": 1.9432,
"step": 77
},
{
"epoch": 0.7090909090909091,
"grad_norm": 2.684636116027832,
"learning_rate": 0.0003856707317073171,
"loss": 1.9473,
"step": 78
},
{
"epoch": 0.7181818181818181,
"grad_norm": 2.554502248764038,
"learning_rate": 0.00038414634146341465,
"loss": 1.9211,
"step": 79
},
{
"epoch": 0.7272727272727273,
"grad_norm": 2.93910551071167,
"learning_rate": 0.0003826219512195122,
"loss": 1.7052,
"step": 80
},
{
"epoch": 0.7363636363636363,
"grad_norm": 3.246049165725708,
"learning_rate": 0.00038109756097560977,
"loss": 1.9143,
"step": 81
},
{
"epoch": 0.7454545454545455,
"grad_norm": 2.407006025314331,
"learning_rate": 0.00037957317073170733,
"loss": 1.8885,
"step": 82
},
{
"epoch": 0.7545454545454545,
"grad_norm": 3.0212433338165283,
"learning_rate": 0.0003780487804878049,
"loss": 2.2017,
"step": 83
},
{
"epoch": 0.7636363636363637,
"grad_norm": 2.7870118618011475,
"learning_rate": 0.00037652439024390245,
"loss": 1.8038,
"step": 84
},
{
"epoch": 0.7727272727272727,
"grad_norm": 2.3417677879333496,
"learning_rate": 0.000375,
"loss": 1.6184,
"step": 85
},
{
"epoch": 0.7818181818181819,
"grad_norm": 3.359086036682129,
"learning_rate": 0.00037347560975609757,
"loss": 2.3235,
"step": 86
},
{
"epoch": 0.7909090909090909,
"grad_norm": 2.854968786239624,
"learning_rate": 0.0003719512195121951,
"loss": 1.9161,
"step": 87
},
{
"epoch": 0.8,
"grad_norm": 3.0338873863220215,
"learning_rate": 0.0003704268292682927,
"loss": 2.5778,
"step": 88
},
{
"epoch": 0.8090909090909091,
"grad_norm": 2.7215914726257324,
"learning_rate": 0.00036890243902439025,
"loss": 2.2668,
"step": 89
},
{
"epoch": 0.8181818181818182,
"grad_norm": 3.0825934410095215,
"learning_rate": 0.0003673780487804878,
"loss": 2.0312,
"step": 90
},
{
"epoch": 0.8272727272727273,
"grad_norm": 4.025455951690674,
"learning_rate": 0.00036585365853658537,
"loss": 2.0015,
"step": 91
},
{
"epoch": 0.8363636363636363,
"grad_norm": 2.394049882888794,
"learning_rate": 0.0003643292682926829,
"loss": 1.7331,
"step": 92
},
{
"epoch": 0.8454545454545455,
"grad_norm": 3.181750535964966,
"learning_rate": 0.0003628048780487805,
"loss": 1.8799,
"step": 93
},
{
"epoch": 0.8545454545454545,
"grad_norm": 3.0317776203155518,
"learning_rate": 0.00036128048780487804,
"loss": 1.7447,
"step": 94
},
{
"epoch": 0.8636363636363636,
"grad_norm": 2.537506341934204,
"learning_rate": 0.0003597560975609756,
"loss": 1.5733,
"step": 95
},
{
"epoch": 0.8727272727272727,
"grad_norm": 2.7143495082855225,
"learning_rate": 0.00035823170731707316,
"loss": 1.7666,
"step": 96
},
{
"epoch": 0.8818181818181818,
"grad_norm": 2.9140336513519287,
"learning_rate": 0.0003567073170731707,
"loss": 1.5887,
"step": 97
},
{
"epoch": 0.8909090909090909,
"grad_norm": 2.7197532653808594,
"learning_rate": 0.0003551829268292683,
"loss": 2.0022,
"step": 98
},
{
"epoch": 0.9,
"grad_norm": 3.1423003673553467,
"learning_rate": 0.00035365853658536584,
"loss": 1.9328,
"step": 99
},
{
"epoch": 0.9090909090909091,
"grad_norm": 2.541865110397339,
"learning_rate": 0.0003521341463414634,
"loss": 1.7566,
"step": 100
},
{
"epoch": 0.9181818181818182,
"grad_norm": 2.9177896976470947,
"learning_rate": 0.00035060975609756096,
"loss": 1.7848,
"step": 101
},
{
"epoch": 0.9272727272727272,
"grad_norm": 2.671201467514038,
"learning_rate": 0.0003490853658536585,
"loss": 2.0213,
"step": 102
},
{
"epoch": 0.9363636363636364,
"grad_norm": 2.950990676879883,
"learning_rate": 0.0003475609756097561,
"loss": 1.8383,
"step": 103
},
{
"epoch": 0.9454545454545454,
"grad_norm": 2.508028984069824,
"learning_rate": 0.00034603658536585364,
"loss": 1.8774,
"step": 104
},
{
"epoch": 0.9545454545454546,
"grad_norm": 2.4655253887176514,
"learning_rate": 0.0003445121951219512,
"loss": 1.7972,
"step": 105
},
{
"epoch": 0.9636363636363636,
"grad_norm": 3.085402727127075,
"learning_rate": 0.0003429878048780488,
"loss": 1.8712,
"step": 106
},
{
"epoch": 0.9727272727272728,
"grad_norm": 2.305389642715454,
"learning_rate": 0.0003414634146341464,
"loss": 1.48,
"step": 107
},
{
"epoch": 0.9818181818181818,
"grad_norm": 2.7900662422180176,
"learning_rate": 0.00033993902439024393,
"loss": 1.6433,
"step": 108
},
{
"epoch": 0.990909090909091,
"grad_norm": 2.7554140090942383,
"learning_rate": 0.0003384146341463415,
"loss": 2.2131,
"step": 109
},
{
"epoch": 1.0,
"grad_norm": 2.8607048988342285,
"learning_rate": 0.00033689024390243905,
"loss": 2.062,
"step": 110
},
{
"epoch": 1.0,
"eval_f1": 0.8911,
"eval_gen_len": 49.7273,
"eval_loss": 1.8471709489822388,
"eval_precision": 0.8897,
"eval_recall": 0.8927,
"eval_rouge1": 0.4633,
"eval_rouge2": 0.2177,
"eval_rougeL": 0.3919,
"eval_rougeLsum": 0.428,
"eval_runtime": 46.8612,
"eval_samples_per_second": 2.347,
"eval_steps_per_second": 0.299,
"step": 110
},
{
"epoch": 1.009090909090909,
"grad_norm": 2.5493690967559814,
"learning_rate": 0.0003353658536585366,
"loss": 1.9424,
"step": 111
},
{
"epoch": 1.018181818181818,
"grad_norm": 2.030336618423462,
"learning_rate": 0.00033384146341463417,
"loss": 1.4907,
"step": 112
},
{
"epoch": 1.0272727272727273,
"grad_norm": 2.3484795093536377,
"learning_rate": 0.00033231707317073173,
"loss": 1.8514,
"step": 113
},
{
"epoch": 1.0363636363636364,
"grad_norm": 3.1272692680358887,
"learning_rate": 0.0003307926829268293,
"loss": 2.2966,
"step": 114
},
{
"epoch": 1.0454545454545454,
"grad_norm": 2.4133095741271973,
"learning_rate": 0.00032926829268292685,
"loss": 1.5813,
"step": 115
},
{
"epoch": 1.0545454545454545,
"grad_norm": 2.597964286804199,
"learning_rate": 0.0003277439024390244,
"loss": 1.9743,
"step": 116
},
{
"epoch": 1.0636363636363637,
"grad_norm": 2.755969524383545,
"learning_rate": 0.00032621951219512197,
"loss": 1.4676,
"step": 117
},
{
"epoch": 1.0727272727272728,
"grad_norm": 2.3715810775756836,
"learning_rate": 0.00032469512195121953,
"loss": 1.9406,
"step": 118
},
{
"epoch": 1.0818181818181818,
"grad_norm": 2.3229820728302,
"learning_rate": 0.0003231707317073171,
"loss": 1.9787,
"step": 119
},
{
"epoch": 1.0909090909090908,
"grad_norm": 2.724597215652466,
"learning_rate": 0.00032164634146341465,
"loss": 2.1574,
"step": 120
},
{
"epoch": 1.1,
"grad_norm": 3.342278242111206,
"learning_rate": 0.0003201219512195122,
"loss": 1.8255,
"step": 121
},
{
"epoch": 1.1090909090909091,
"grad_norm": 2.4172379970550537,
"learning_rate": 0.00031859756097560977,
"loss": 1.7576,
"step": 122
},
{
"epoch": 1.1181818181818182,
"grad_norm": 3.158111095428467,
"learning_rate": 0.00031707317073170733,
"loss": 1.9855,
"step": 123
},
{
"epoch": 1.1272727272727272,
"grad_norm": 2.9254257678985596,
"learning_rate": 0.0003155487804878049,
"loss": 2.0363,
"step": 124
},
{
"epoch": 1.1363636363636362,
"grad_norm": 2.5448672771453857,
"learning_rate": 0.00031402439024390245,
"loss": 1.8373,
"step": 125
},
{
"epoch": 1.1454545454545455,
"grad_norm": 2.693936347961426,
"learning_rate": 0.0003125,
"loss": 1.8654,
"step": 126
},
{
"epoch": 1.1545454545454545,
"grad_norm": 2.5871312618255615,
"learning_rate": 0.00031097560975609757,
"loss": 1.7372,
"step": 127
},
{
"epoch": 1.1636363636363636,
"grad_norm": 2.709867000579834,
"learning_rate": 0.0003094512195121951,
"loss": 1.8044,
"step": 128
},
{
"epoch": 1.1727272727272728,
"grad_norm": 2.741077423095703,
"learning_rate": 0.0003079268292682927,
"loss": 1.9361,
"step": 129
},
{
"epoch": 1.1818181818181819,
"grad_norm": 2.9570066928863525,
"learning_rate": 0.00030640243902439024,
"loss": 2.0666,
"step": 130
},
{
"epoch": 1.190909090909091,
"grad_norm": 2.356640100479126,
"learning_rate": 0.0003048780487804878,
"loss": 1.7525,
"step": 131
},
{
"epoch": 1.2,
"grad_norm": 2.7299695014953613,
"learning_rate": 0.00030335365853658536,
"loss": 1.8064,
"step": 132
},
{
"epoch": 1.209090909090909,
"grad_norm": 2.3065450191497803,
"learning_rate": 0.0003018292682926829,
"loss": 1.652,
"step": 133
},
{
"epoch": 1.2181818181818183,
"grad_norm": 2.8539586067199707,
"learning_rate": 0.0003003048780487805,
"loss": 1.8285,
"step": 134
},
{
"epoch": 1.2272727272727273,
"grad_norm": 2.843475580215454,
"learning_rate": 0.00029878048780487804,
"loss": 1.9938,
"step": 135
},
{
"epoch": 1.2363636363636363,
"grad_norm": 2.547865390777588,
"learning_rate": 0.0002972560975609756,
"loss": 1.9472,
"step": 136
},
{
"epoch": 1.2454545454545454,
"grad_norm": 2.1262078285217285,
"learning_rate": 0.00029573170731707316,
"loss": 1.3486,
"step": 137
},
{
"epoch": 1.2545454545454544,
"grad_norm": 2.93005633354187,
"learning_rate": 0.0002942073170731707,
"loss": 1.5797,
"step": 138
},
{
"epoch": 1.2636363636363637,
"grad_norm": 2.5727319717407227,
"learning_rate": 0.0002926829268292683,
"loss": 1.8976,
"step": 139
},
{
"epoch": 1.2727272727272727,
"grad_norm": 3.29372501373291,
"learning_rate": 0.00029115853658536584,
"loss": 1.9751,
"step": 140
},
{
"epoch": 1.2818181818181817,
"grad_norm": NaN,
"learning_rate": 0.00029115853658536584,
"loss": 1.9404,
"step": 141
},
{
"epoch": 1.290909090909091,
"grad_norm": 2.868084669113159,
"learning_rate": 0.0002896341463414634,
"loss": 1.5832,
"step": 142
},
{
"epoch": 1.3,
"grad_norm": 2.792365312576294,
"learning_rate": 0.00028810975609756096,
"loss": 2.0512,
"step": 143
},
{
"epoch": 1.309090909090909,
"grad_norm": 2.333376169204712,
"learning_rate": 0.0002865853658536585,
"loss": 1.5019,
"step": 144
},
{
"epoch": 1.3181818181818181,
"grad_norm": 3.5344386100769043,
"learning_rate": 0.0002850609756097561,
"loss": 2.0819,
"step": 145
},
{
"epoch": 1.3272727272727272,
"grad_norm": 2.7761566638946533,
"learning_rate": 0.00028353658536585364,
"loss": 1.8773,
"step": 146
},
{
"epoch": 1.3363636363636364,
"grad_norm": 2.796036720275879,
"learning_rate": 0.0002820121951219512,
"loss": 1.8373,
"step": 147
},
{
"epoch": 1.3454545454545455,
"grad_norm": 3.2006378173828125,
"learning_rate": 0.0002804878048780488,
"loss": 2.0195,
"step": 148
},
{
"epoch": 1.3545454545454545,
"grad_norm": 3.6497931480407715,
"learning_rate": 0.00027896341463414637,
"loss": 1.7754,
"step": 149
},
{
"epoch": 1.3636363636363638,
"grad_norm": 3.0451290607452393,
"learning_rate": 0.00027743902439024393,
"loss": 1.6708,
"step": 150
},
{
"epoch": 1.3727272727272728,
"grad_norm": 3.2684431076049805,
"learning_rate": 0.0002759146341463415,
"loss": 2.0718,
"step": 151
},
{
"epoch": 1.3818181818181818,
"grad_norm": 2.6152052879333496,
"learning_rate": 0.00027439024390243905,
"loss": 1.87,
"step": 152
},
{
"epoch": 1.3909090909090909,
"grad_norm": 2.713304281234741,
"learning_rate": 0.0002728658536585366,
"loss": 1.707,
"step": 153
},
{
"epoch": 1.4,
"grad_norm": 2.6585283279418945,
"learning_rate": 0.00027134146341463417,
"loss": 1.8551,
"step": 154
},
{
"epoch": 1.4090909090909092,
"grad_norm": 2.903383255004883,
"learning_rate": 0.00026981707317073173,
"loss": 1.9101,
"step": 155
},
{
"epoch": 1.4181818181818182,
"grad_norm": 2.6489484310150146,
"learning_rate": 0.0002682926829268293,
"loss": 1.8333,
"step": 156
},
{
"epoch": 1.4272727272727272,
"grad_norm": 3.004567861557007,
"learning_rate": 0.00026676829268292685,
"loss": 1.4886,
"step": 157
},
{
"epoch": 1.4363636363636363,
"grad_norm": 2.774327278137207,
"learning_rate": 0.0002652439024390244,
"loss": 1.8893,
"step": 158
},
{
"epoch": 1.4454545454545453,
"grad_norm": 2.8652050495147705,
"learning_rate": 0.00026371951219512197,
"loss": 1.8702,
"step": 159
},
{
"epoch": 1.4545454545454546,
"grad_norm": 2.2331173419952393,
"learning_rate": 0.00026219512195121953,
"loss": 1.9589,
"step": 160
},
{
"epoch": 1.4636363636363636,
"grad_norm": 2.5273420810699463,
"learning_rate": 0.0002606707317073171,
"loss": 1.7515,
"step": 161
},
{
"epoch": 1.4727272727272727,
"grad_norm": 3.0862534046173096,
"learning_rate": 0.00025914634146341465,
"loss": 1.4377,
"step": 162
},
{
"epoch": 1.481818181818182,
"grad_norm": 3.167388677597046,
"learning_rate": 0.0002576219512195122,
"loss": 2.2095,
"step": 163
},
{
"epoch": 1.490909090909091,
"grad_norm": 2.9668028354644775,
"learning_rate": 0.00025609756097560977,
"loss": 1.7428,
"step": 164
},
{
"epoch": 1.5,
"grad_norm": 2.4375245571136475,
"learning_rate": 0.0002545731707317073,
"loss": 1.392,
"step": 165
},
{
"epoch": 1.509090909090909,
"grad_norm": 2.71293568611145,
"learning_rate": 0.0002530487804878049,
"loss": 1.9706,
"step": 166
},
{
"epoch": 1.518181818181818,
"grad_norm": 2.6570401191711426,
"learning_rate": 0.00025152439024390245,
"loss": 1.8777,
"step": 167
},
{
"epoch": 1.5272727272727273,
"grad_norm": 2.379110813140869,
"learning_rate": 0.00025,
"loss": 1.4582,
"step": 168
},
{
"epoch": 1.5363636363636364,
"grad_norm": 3.0288100242614746,
"learning_rate": 0.00024847560975609756,
"loss": 1.9216,
"step": 169
},
{
"epoch": 1.5454545454545454,
"grad_norm": 2.574794054031372,
"learning_rate": 0.0002469512195121951,
"loss": 1.5392,
"step": 170
},
{
"epoch": 1.5545454545454547,
"grad_norm": 2.8772997856140137,
"learning_rate": 0.0002454268292682927,
"loss": 1.8662,
"step": 171
},
{
"epoch": 1.5636363636363635,
"grad_norm": 3.186906099319458,
"learning_rate": 0.00024390243902439024,
"loss": 1.7958,
"step": 172
},
{
"epoch": 1.5727272727272728,
"grad_norm": 2.5595905780792236,
"learning_rate": 0.0002423780487804878,
"loss": 1.6044,
"step": 173
},
{
"epoch": 1.5818181818181818,
"grad_norm": 2.952155828475952,
"learning_rate": 0.00024085365853658536,
"loss": 1.9509,
"step": 174
},
{
"epoch": 1.5909090909090908,
"grad_norm": 2.6648874282836914,
"learning_rate": 0.00023932926829268292,
"loss": 1.8644,
"step": 175
},
{
"epoch": 1.6,
"grad_norm": 2.649273157119751,
"learning_rate": 0.00023780487804878048,
"loss": 1.9846,
"step": 176
},
{
"epoch": 1.6090909090909091,
"grad_norm": 3.196133852005005,
"learning_rate": 0.00023628048780487804,
"loss": 1.689,
"step": 177
},
{
"epoch": 1.6181818181818182,
"grad_norm": 2.5493838787078857,
"learning_rate": 0.0002347560975609756,
"loss": 1.6899,
"step": 178
},
{
"epoch": 1.6272727272727274,
"grad_norm": 4.60580587387085,
"learning_rate": 0.0002332317073170732,
"loss": 1.8183,
"step": 179
},
{
"epoch": 1.6363636363636362,
"grad_norm": 2.7253472805023193,
"learning_rate": 0.00023170731707317075,
"loss": 1.7829,
"step": 180
},
{
"epoch": 1.6454545454545455,
"grad_norm": 2.9373483657836914,
"learning_rate": 0.0002301829268292683,
"loss": 1.7837,
"step": 181
},
{
"epoch": 1.6545454545454545,
"grad_norm": 2.4897494316101074,
"learning_rate": 0.00022865853658536587,
"loss": 1.4675,
"step": 182
},
{
"epoch": 1.6636363636363636,
"grad_norm": 2.3043053150177,
"learning_rate": 0.00022713414634146343,
"loss": 1.5805,
"step": 183
},
{
"epoch": 1.6727272727272728,
"grad_norm": 3.0130205154418945,
"learning_rate": 0.00022560975609756099,
"loss": 2.1577,
"step": 184
},
{
"epoch": 1.6818181818181817,
"grad_norm": 3.0861997604370117,
"learning_rate": 0.00022408536585365855,
"loss": 2.011,
"step": 185
},
{
"epoch": 1.690909090909091,
"grad_norm": 2.6685359477996826,
"learning_rate": 0.0002225609756097561,
"loss": 1.9333,
"step": 186
},
{
"epoch": 1.7,
"grad_norm": 2.309566020965576,
"learning_rate": 0.00022103658536585367,
"loss": 1.7602,
"step": 187
},
{
"epoch": 1.709090909090909,
"grad_norm": 2.686500072479248,
"learning_rate": 0.00021951219512195122,
"loss": 1.9934,
"step": 188
},
{
"epoch": 1.7181818181818183,
"grad_norm": 2.0740697383880615,
"learning_rate": 0.00021798780487804878,
"loss": 1.3903,
"step": 189
},
{
"epoch": 1.7272727272727273,
"grad_norm": 2.9675910472869873,
"learning_rate": 0.00021646341463414634,
"loss": 1.8017,
"step": 190
},
{
"epoch": 1.7363636363636363,
"grad_norm": 2.5559232234954834,
"learning_rate": 0.0002149390243902439,
"loss": 1.8526,
"step": 191
},
{
"epoch": 1.7454545454545456,
"grad_norm": 3.0341475009918213,
"learning_rate": 0.00021341463414634146,
"loss": 1.9374,
"step": 192
},
{
"epoch": 1.7545454545454544,
"grad_norm": 2.351853609085083,
"learning_rate": 0.00021189024390243902,
"loss": 1.695,
"step": 193
},
{
"epoch": 1.7636363636363637,
"grad_norm": 2.8605730533599854,
"learning_rate": 0.00021036585365853658,
"loss": 2.1981,
"step": 194
},
{
"epoch": 1.7727272727272727,
"grad_norm": 2.4615988731384277,
"learning_rate": 0.00020884146341463414,
"loss": 1.8249,
"step": 195
},
{
"epoch": 1.7818181818181817,
"grad_norm": 2.9569573402404785,
"learning_rate": 0.0002073170731707317,
"loss": 2.0083,
"step": 196
},
{
"epoch": 1.790909090909091,
"grad_norm": 2.4674253463745117,
"learning_rate": 0.00020579268292682926,
"loss": 1.4235,
"step": 197
},
{
"epoch": 1.8,
"grad_norm": 2.693068027496338,
"learning_rate": 0.00020426829268292682,
"loss": 2.0025,
"step": 198
},
{
"epoch": 1.809090909090909,
"grad_norm": 2.6479923725128174,
"learning_rate": 0.0002027439024390244,
"loss": 1.8531,
"step": 199
},
{
"epoch": 1.8181818181818183,
"grad_norm": 2.5726828575134277,
"learning_rate": 0.00020121951219512197,
"loss": 1.9046,
"step": 200
},
{
"epoch": 1.8272727272727272,
"grad_norm": 2.286144971847534,
"learning_rate": 0.00019969512195121953,
"loss": 1.8147,
"step": 201
},
{
"epoch": 1.8363636363636364,
"grad_norm": 2.4627556800842285,
"learning_rate": 0.0001981707317073171,
"loss": 1.7151,
"step": 202
},
{
"epoch": 1.8454545454545455,
"grad_norm": 2.3641176223754883,
"learning_rate": 0.00019664634146341465,
"loss": 1.6279,
"step": 203
},
{
"epoch": 1.8545454545454545,
"grad_norm": 2.3488380908966064,
"learning_rate": 0.0001951219512195122,
"loss": 1.7368,
"step": 204
},
{
"epoch": 1.8636363636363638,
"grad_norm": 2.626936435699463,
"learning_rate": 0.00019359756097560977,
"loss": 1.6347,
"step": 205
},
{
"epoch": 1.8727272727272726,
"grad_norm": 2.7492589950561523,
"learning_rate": 0.00019207317073170733,
"loss": 1.8449,
"step": 206
},
{
"epoch": 1.8818181818181818,
"grad_norm": 2.2747907638549805,
"learning_rate": 0.00019054878048780488,
"loss": 1.669,
"step": 207
},
{
"epoch": 1.8909090909090909,
"grad_norm": 2.540228843688965,
"learning_rate": 0.00018902439024390244,
"loss": 1.6892,
"step": 208
},
{
"epoch": 1.9,
"grad_norm": 2.521294593811035,
"learning_rate": 0.0001875,
"loss": 1.7682,
"step": 209
},
{
"epoch": 1.9090909090909092,
"grad_norm": 2.7691824436187744,
"learning_rate": 0.00018597560975609756,
"loss": 1.7694,
"step": 210
},
{
"epoch": 1.9181818181818182,
"grad_norm": 2.7531917095184326,
"learning_rate": 0.00018445121951219512,
"loss": 1.7217,
"step": 211
},
{
"epoch": 1.9272727272727272,
"grad_norm": 2.3607912063598633,
"learning_rate": 0.00018292682926829268,
"loss": 1.9776,
"step": 212
},
{
"epoch": 1.9363636363636365,
"grad_norm": 2.737658739089966,
"learning_rate": 0.00018140243902439024,
"loss": 1.7351,
"step": 213
},
{
"epoch": 1.9454545454545453,
"grad_norm": 2.491176128387451,
"learning_rate": 0.0001798780487804878,
"loss": 1.7922,
"step": 214
},
{
"epoch": 1.9545454545454546,
"grad_norm": 2.4534223079681396,
"learning_rate": 0.00017835365853658536,
"loss": 1.8887,
"step": 215
},
{
"epoch": 1.9636363636363636,
"grad_norm": 3.2689099311828613,
"learning_rate": 0.00017682926829268292,
"loss": 2.1455,
"step": 216
},
{
"epoch": 1.9727272727272727,
"grad_norm": 2.444859743118286,
"learning_rate": 0.00017530487804878048,
"loss": 1.6893,
"step": 217
},
{
"epoch": 1.981818181818182,
"grad_norm": 2.3944003582000732,
"learning_rate": 0.00017378048780487804,
"loss": 1.4158,
"step": 218
},
{
"epoch": 1.990909090909091,
"grad_norm": 2.8656787872314453,
"learning_rate": 0.0001722560975609756,
"loss": 2.1827,
"step": 219
},
{
"epoch": 2.0,
"grad_norm": 2.836941719055176,
"learning_rate": 0.0001707317073170732,
"loss": 1.7853,
"step": 220
},
{
"epoch": 2.0,
"eval_f1": 0.8953,
"eval_gen_len": 49.4273,
"eval_loss": 1.8119523525238037,
"eval_precision": 0.8945,
"eval_recall": 0.8963,
"eval_rouge1": 0.4633,
"eval_rouge2": 0.2203,
"eval_rougeL": 0.3941,
"eval_rougeLsum": 0.4285,
"eval_runtime": 47.0298,
"eval_samples_per_second": 2.339,
"eval_steps_per_second": 0.298,
"step": 220
},
{
"epoch": 2.0090909090909093,
"grad_norm": 2.421740770339966,
"learning_rate": 0.00016920731707317075,
"loss": 1.3819,
"step": 221
},
{
"epoch": 2.018181818181818,
"grad_norm": 2.5827627182006836,
"learning_rate": 0.0001676829268292683,
"loss": 1.7222,
"step": 222
},
{
"epoch": 2.0272727272727273,
"grad_norm": 2.4553208351135254,
"learning_rate": 0.00016615853658536587,
"loss": 1.7625,
"step": 223
},
{
"epoch": 2.036363636363636,
"grad_norm": 2.6801366806030273,
"learning_rate": 0.00016463414634146343,
"loss": 1.6591,
"step": 224
},
{
"epoch": 2.0454545454545454,
"grad_norm": 2.8583321571350098,
"learning_rate": 0.00016310975609756098,
"loss": 1.8691,
"step": 225
},
{
"epoch": 2.0545454545454547,
"grad_norm": 2.8577945232391357,
"learning_rate": 0.00016158536585365854,
"loss": 2.2397,
"step": 226
},
{
"epoch": 2.0636363636363635,
"grad_norm": 2.5665602684020996,
"learning_rate": 0.0001600609756097561,
"loss": 1.6606,
"step": 227
},
{
"epoch": 2.0727272727272728,
"grad_norm": 2.4023563861846924,
"learning_rate": 0.00015853658536585366,
"loss": 1.6621,
"step": 228
},
{
"epoch": 2.081818181818182,
"grad_norm": 2.426421880722046,
"learning_rate": 0.00015701219512195122,
"loss": 1.5207,
"step": 229
},
{
"epoch": 2.090909090909091,
"grad_norm": 2.462782382965088,
"learning_rate": 0.00015548780487804878,
"loss": 1.6258,
"step": 230
},
{
"epoch": 2.1,
"grad_norm": 2.5428097248077393,
"learning_rate": 0.00015396341463414634,
"loss": 1.4525,
"step": 231
},
{
"epoch": 2.109090909090909,
"grad_norm": 2.179856538772583,
"learning_rate": 0.0001524390243902439,
"loss": 1.3824,
"step": 232
},
{
"epoch": 2.118181818181818,
"grad_norm": 2.4684817790985107,
"learning_rate": 0.00015091463414634146,
"loss": 1.5785,
"step": 233
},
{
"epoch": 2.1272727272727274,
"grad_norm": 2.5517914295196533,
"learning_rate": 0.00014939024390243902,
"loss": 1.5097,
"step": 234
},
{
"epoch": 2.1363636363636362,
"grad_norm": 2.6141982078552246,
"learning_rate": 0.00014786585365853658,
"loss": 1.4524,
"step": 235
},
{
"epoch": 2.1454545454545455,
"grad_norm": 2.820064067840576,
"learning_rate": 0.00014634146341463414,
"loss": 1.8698,
"step": 236
},
{
"epoch": 2.1545454545454543,
"grad_norm": 2.5539379119873047,
"learning_rate": 0.0001448170731707317,
"loss": 1.5742,
"step": 237
},
{
"epoch": 2.1636363636363636,
"grad_norm": 2.738567352294922,
"learning_rate": 0.00014329268292682926,
"loss": 1.6458,
"step": 238
},
{
"epoch": 2.172727272727273,
"grad_norm": 2.583866834640503,
"learning_rate": 0.00014176829268292682,
"loss": 1.7928,
"step": 239
},
{
"epoch": 2.1818181818181817,
"grad_norm": 2.7706844806671143,
"learning_rate": 0.0001402439024390244,
"loss": 2.1484,
"step": 240
},
{
"epoch": 2.190909090909091,
"grad_norm": 2.333019495010376,
"learning_rate": 0.00013871951219512197,
"loss": 1.4385,
"step": 241
},
{
"epoch": 2.2,
"grad_norm": 2.4395415782928467,
"learning_rate": 0.00013719512195121953,
"loss": 1.1418,
"step": 242
},
{
"epoch": 2.209090909090909,
"grad_norm": 2.8756039142608643,
"learning_rate": 0.00013567073170731709,
"loss": 2.121,
"step": 243
},
{
"epoch": 2.2181818181818183,
"grad_norm": 2.7827882766723633,
"learning_rate": 0.00013414634146341464,
"loss": 1.829,
"step": 244
},
{
"epoch": 2.227272727272727,
"grad_norm": 2.5495877265930176,
"learning_rate": 0.0001326219512195122,
"loss": 1.8662,
"step": 245
},
{
"epoch": 2.2363636363636363,
"grad_norm": 2.831456422805786,
"learning_rate": 0.00013109756097560976,
"loss": 1.9483,
"step": 246
},
{
"epoch": 2.2454545454545456,
"grad_norm": 3.0142741203308105,
"learning_rate": 0.00012957317073170732,
"loss": 1.7868,
"step": 247
},
{
"epoch": 2.2545454545454544,
"grad_norm": 2.723198652267456,
"learning_rate": 0.00012804878048780488,
"loss": 1.7103,
"step": 248
},
{
"epoch": 2.2636363636363637,
"grad_norm": 3.161470890045166,
"learning_rate": 0.00012652439024390244,
"loss": 1.8972,
"step": 249
},
{
"epoch": 2.2727272727272725,
"grad_norm": 2.5970962047576904,
"learning_rate": 0.000125,
"loss": 2.1127,
"step": 250
},
{
"epoch": 2.2818181818181817,
"grad_norm": 2.5795202255249023,
"learning_rate": 0.00012347560975609756,
"loss": 1.3795,
"step": 251
},
{
"epoch": 2.290909090909091,
"grad_norm": 2.3511993885040283,
"learning_rate": 0.00012195121951219512,
"loss": 1.2534,
"step": 252
},
{
"epoch": 2.3,
"grad_norm": 2.6542067527770996,
"learning_rate": 0.00012042682926829268,
"loss": 1.6894,
"step": 253
},
{
"epoch": 2.309090909090909,
"grad_norm": 4.014543533325195,
"learning_rate": 0.00011890243902439024,
"loss": 1.9333,
"step": 254
},
{
"epoch": 2.3181818181818183,
"grad_norm": 2.828244924545288,
"learning_rate": 0.0001173780487804878,
"loss": 1.7024,
"step": 255
},
{
"epoch": 2.327272727272727,
"grad_norm": 2.9751951694488525,
"learning_rate": 0.00011585365853658537,
"loss": 1.7545,
"step": 256
},
{
"epoch": 2.3363636363636364,
"grad_norm": 2.876709461212158,
"learning_rate": 0.00011432926829268293,
"loss": 2.1827,
"step": 257
},
{
"epoch": 2.3454545454545457,
"grad_norm": 3.0717837810516357,
"learning_rate": 0.00011280487804878049,
"loss": 1.5924,
"step": 258
},
{
"epoch": 2.3545454545454545,
"grad_norm": 2.5759644508361816,
"learning_rate": 0.00011128048780487805,
"loss": 1.8342,
"step": 259
},
{
"epoch": 2.3636363636363638,
"grad_norm": 2.3989076614379883,
"learning_rate": 0.00010975609756097561,
"loss": 1.8479,
"step": 260
},
{
"epoch": 2.3727272727272726,
"grad_norm": 2.9248263835906982,
"learning_rate": 0.00010823170731707317,
"loss": 1.5602,
"step": 261
},
{
"epoch": 2.381818181818182,
"grad_norm": 2.715651512145996,
"learning_rate": 0.00010670731707317073,
"loss": 1.5377,
"step": 262
},
{
"epoch": 2.390909090909091,
"grad_norm": 2.504502534866333,
"learning_rate": 0.00010518292682926829,
"loss": 1.2146,
"step": 263
},
{
"epoch": 2.4,
"grad_norm": 2.516601085662842,
"learning_rate": 0.00010365853658536585,
"loss": 1.7834,
"step": 264
},
{
"epoch": 2.409090909090909,
"grad_norm": 2.362786293029785,
"learning_rate": 0.00010213414634146341,
"loss": 1.5664,
"step": 265
},
{
"epoch": 2.418181818181818,
"grad_norm": 2.057528257369995,
"learning_rate": 0.00010060975609756098,
"loss": 1.5126,
"step": 266
},
{
"epoch": 2.4272727272727272,
"grad_norm": 2.4843454360961914,
"learning_rate": 9.908536585365854e-05,
"loss": 1.883,
"step": 267
},
{
"epoch": 2.4363636363636365,
"grad_norm": 2.3680319786071777,
"learning_rate": 9.75609756097561e-05,
"loss": 1.521,
"step": 268
},
{
"epoch": 2.4454545454545453,
"grad_norm": 2.7291035652160645,
"learning_rate": 9.603658536585366e-05,
"loss": 1.7955,
"step": 269
},
{
"epoch": 2.4545454545454546,
"grad_norm": 2.395080327987671,
"learning_rate": 9.451219512195122e-05,
"loss": 1.7271,
"step": 270
},
{
"epoch": 2.463636363636364,
"grad_norm": 2.8394501209259033,
"learning_rate": 9.298780487804878e-05,
"loss": 1.5939,
"step": 271
},
{
"epoch": 2.4727272727272727,
"grad_norm": 2.4888384342193604,
"learning_rate": 9.146341463414634e-05,
"loss": 1.282,
"step": 272
},
{
"epoch": 2.481818181818182,
"grad_norm": 2.417750835418701,
"learning_rate": 8.99390243902439e-05,
"loss": 1.4742,
"step": 273
},
{
"epoch": 2.4909090909090907,
"grad_norm": 2.7631969451904297,
"learning_rate": 8.841463414634146e-05,
"loss": 1.7823,
"step": 274
},
{
"epoch": 2.5,
"grad_norm": 2.7598719596862793,
"learning_rate": 8.689024390243902e-05,
"loss": 1.7852,
"step": 275
},
{
"epoch": 2.509090909090909,
"grad_norm": 2.4489023685455322,
"learning_rate": 8.53658536585366e-05,
"loss": 1.4942,
"step": 276
},
{
"epoch": 2.518181818181818,
"grad_norm": 2.320030927658081,
"learning_rate": 8.384146341463415e-05,
"loss": 1.5197,
"step": 277
},
{
"epoch": 2.5272727272727273,
"grad_norm": 2.592423677444458,
"learning_rate": 8.231707317073171e-05,
"loss": 1.7495,
"step": 278
},
{
"epoch": 2.536363636363636,
"grad_norm": 2.7762107849121094,
"learning_rate": 8.079268292682927e-05,
"loss": 1.9257,
"step": 279
},
{
"epoch": 2.5454545454545454,
"grad_norm": 2.527858018875122,
"learning_rate": 7.926829268292683e-05,
"loss": 1.5523,
"step": 280
},
{
"epoch": 2.5545454545454547,
"grad_norm": 3.0844714641571045,
"learning_rate": 7.774390243902439e-05,
"loss": 1.5701,
"step": 281
},
{
"epoch": 2.5636363636363635,
"grad_norm": 2.9077203273773193,
"learning_rate": 7.621951219512195e-05,
"loss": 1.4133,
"step": 282
},
{
"epoch": 2.5727272727272728,
"grad_norm": 3.1426498889923096,
"learning_rate": 7.469512195121951e-05,
"loss": 1.6216,
"step": 283
},
{
"epoch": 2.581818181818182,
"grad_norm": 3.0162813663482666,
"learning_rate": 7.317073170731707e-05,
"loss": 1.6784,
"step": 284
},
{
"epoch": 2.590909090909091,
"grad_norm": 2.502703905105591,
"learning_rate": 7.164634146341463e-05,
"loss": 1.6971,
"step": 285
},
{
"epoch": 2.6,
"grad_norm": 2.4731063842773438,
"learning_rate": 7.01219512195122e-05,
"loss": 1.4637,
"step": 286
},
{
"epoch": 2.6090909090909093,
"grad_norm": 2.648430109024048,
"learning_rate": 6.859756097560976e-05,
"loss": 1.5027,
"step": 287
},
{
"epoch": 2.618181818181818,
"grad_norm": 3.18878173828125,
"learning_rate": 6.707317073170732e-05,
"loss": 1.8242,
"step": 288
},
{
"epoch": 2.6272727272727274,
"grad_norm": 2.5465493202209473,
"learning_rate": 6.554878048780488e-05,
"loss": 1.4872,
"step": 289
},
{
"epoch": 2.6363636363636362,
"grad_norm": 2.4700820446014404,
"learning_rate": 6.402439024390244e-05,
"loss": 1.7537,
"step": 290
},
{
"epoch": 2.6454545454545455,
"grad_norm": 3.4680936336517334,
"learning_rate": 6.25e-05,
"loss": 1.8912,
"step": 291
},
{
"epoch": 2.6545454545454543,
"grad_norm": 3.104785442352295,
"learning_rate": 6.097560975609756e-05,
"loss": 2.276,
"step": 292
},
{
"epoch": 2.6636363636363636,
"grad_norm": 3.0287201404571533,
"learning_rate": 5.945121951219512e-05,
"loss": 1.6728,
"step": 293
},
{
"epoch": 2.672727272727273,
"grad_norm": 2.384228229522705,
"learning_rate": 5.792682926829269e-05,
"loss": 1.6343,
"step": 294
},
{
"epoch": 2.6818181818181817,
"grad_norm": 2.866724967956543,
"learning_rate": 5.6402439024390247e-05,
"loss": 2.0956,
"step": 295
},
{
"epoch": 2.690909090909091,
"grad_norm": 2.9918506145477295,
"learning_rate": 5.4878048780487806e-05,
"loss": 1.7989,
"step": 296
},
{
"epoch": 2.7,
"grad_norm": 2.5096092224121094,
"learning_rate": 5.3353658536585366e-05,
"loss": 1.6828,
"step": 297
},
{
"epoch": 2.709090909090909,
"grad_norm": 2.7829229831695557,
"learning_rate": 5.1829268292682925e-05,
"loss": 1.4211,
"step": 298
},
{
"epoch": 2.7181818181818183,
"grad_norm": 2.250296115875244,
"learning_rate": 5.030487804878049e-05,
"loss": 1.4267,
"step": 299
},
{
"epoch": 2.7272727272727275,
"grad_norm": 3.163660764694214,
"learning_rate": 4.878048780487805e-05,
"loss": 2.1689,
"step": 300
},
{
"epoch": 2.7363636363636363,
"grad_norm": 2.386986255645752,
"learning_rate": 4.725609756097561e-05,
"loss": 1.4535,
"step": 301
},
{
"epoch": 2.7454545454545456,
"grad_norm": 2.807040214538574,
"learning_rate": 4.573170731707317e-05,
"loss": 1.5864,
"step": 302
},
{
"epoch": 2.7545454545454544,
"grad_norm": 3.6512951850891113,
"learning_rate": 4.420731707317073e-05,
"loss": 1.6136,
"step": 303
},
{
"epoch": 2.7636363636363637,
"grad_norm": 2.888395071029663,
"learning_rate": 4.26829268292683e-05,
"loss": 1.5037,
"step": 304
},
{
"epoch": 2.7727272727272725,
"grad_norm": 2.2506160736083984,
"learning_rate": 4.1158536585365856e-05,
"loss": 1.2207,
"step": 305
},
{
"epoch": 2.7818181818181817,
"grad_norm": 2.5099334716796875,
"learning_rate": 3.9634146341463416e-05,
"loss": 1.6804,
"step": 306
},
{
"epoch": 2.790909090909091,
"grad_norm": 2.87251615524292,
"learning_rate": 3.8109756097560976e-05,
"loss": 1.8993,
"step": 307
},
{
"epoch": 2.8,
"grad_norm": 2.648142099380493,
"learning_rate": 3.6585365853658535e-05,
"loss": 1.5677,
"step": 308
},
{
"epoch": 2.809090909090909,
"grad_norm": 3.0312211513519287,
"learning_rate": 3.50609756097561e-05,
"loss": 2.1024,
"step": 309
},
{
"epoch": 2.8181818181818183,
"grad_norm": 2.32504940032959,
"learning_rate": 3.353658536585366e-05,
"loss": 1.498,
"step": 310
},
{
"epoch": 2.827272727272727,
"grad_norm": 2.7433340549468994,
"learning_rate": 3.201219512195122e-05,
"loss": 2.0186,
"step": 311
},
{
"epoch": 2.8363636363636364,
"grad_norm": 2.5491738319396973,
"learning_rate": 3.048780487804878e-05,
"loss": 1.5459,
"step": 312
},
{
"epoch": 2.8454545454545457,
"grad_norm": 3.0771000385284424,
"learning_rate": 2.8963414634146343e-05,
"loss": 1.8847,
"step": 313
},
{
"epoch": 2.8545454545454545,
"grad_norm": 2.719658851623535,
"learning_rate": 2.7439024390243903e-05,
"loss": 1.6282,
"step": 314
},
{
"epoch": 2.8636363636363638,
"grad_norm": 2.6213059425354004,
"learning_rate": 2.5914634146341463e-05,
"loss": 1.3235,
"step": 315
},
{
"epoch": 2.8727272727272726,
"grad_norm": 2.4952800273895264,
"learning_rate": 2.4390243902439026e-05,
"loss": 1.6865,
"step": 316
},
{
"epoch": 2.881818181818182,
"grad_norm": 2.896984577178955,
"learning_rate": 2.2865853658536585e-05,
"loss": 1.5933,
"step": 317
},
{
"epoch": 2.8909090909090907,
"grad_norm": 2.54345965385437,
"learning_rate": 2.134146341463415e-05,
"loss": 1.9299,
"step": 318
},
{
"epoch": 2.9,
"grad_norm": 2.8932416439056396,
"learning_rate": 1.9817073170731708e-05,
"loss": 2.0065,
"step": 319
},
{
"epoch": 2.909090909090909,
"grad_norm": 2.6085596084594727,
"learning_rate": 1.8292682926829268e-05,
"loss": 1.3924,
"step": 320
},
{
"epoch": 2.918181818181818,
"grad_norm": 2.9155259132385254,
"learning_rate": 1.676829268292683e-05,
"loss": 2.0032,
"step": 321
},
{
"epoch": 2.9272727272727272,
"grad_norm": 2.5170652866363525,
"learning_rate": 1.524390243902439e-05,
"loss": 1.3955,
"step": 322
},
{
"epoch": 2.9363636363636365,
"grad_norm": 2.5169925689697266,
"learning_rate": 1.3719512195121952e-05,
"loss": 1.5228,
"step": 323
},
{
"epoch": 2.9454545454545453,
"grad_norm": 2.683560848236084,
"learning_rate": 1.2195121951219513e-05,
"loss": 1.6762,
"step": 324
},
{
"epoch": 2.9545454545454546,
"grad_norm": 2.675593614578247,
"learning_rate": 1.0670731707317074e-05,
"loss": 1.6192,
"step": 325
},
{
"epoch": 2.963636363636364,
"grad_norm": 2.860233783721924,
"learning_rate": 9.146341463414634e-06,
"loss": 1.9632,
"step": 326
},
{
"epoch": 2.9727272727272727,
"grad_norm": 2.5503525733947754,
"learning_rate": 7.621951219512195e-06,
"loss": 1.4166,
"step": 327
},
{
"epoch": 2.981818181818182,
"grad_norm": 2.5347251892089844,
"learning_rate": 6.0975609756097564e-06,
"loss": 1.4993,
"step": 328
},
{
"epoch": 2.990909090909091,
"grad_norm": 2.628443479537964,
"learning_rate": 4.573170731707317e-06,
"loss": 1.4424,
"step": 329
},
{
"epoch": 3.0,
"grad_norm": 2.5161614418029785,
"learning_rate": 3.0487804878048782e-06,
"loss": 1.5952,
"step": 330
},
{
"epoch": 3.0,
"eval_f1": 0.8942,
"eval_gen_len": 49.4091,
"eval_loss": 1.7933717966079712,
"eval_precision": 0.8941,
"eval_recall": 0.8945,
"eval_rouge1": 0.4708,
"eval_rouge2": 0.2246,
"eval_rougeL": 0.3984,
"eval_rougeLsum": 0.4357,
"eval_runtime": 47.9405,
"eval_samples_per_second": 2.295,
"eval_steps_per_second": 0.292,
"step": 330
},
{
"epoch": 3.0,
"step": 330,
"total_flos": 2506179136462848.0,
"train_loss": 1.8787952170227513,
"train_runtime": 633.4063,
"train_samples_per_second": 4.163,
"train_steps_per_second": 0.521
}
],
"logging_steps": 1,
"max_steps": 330,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2506179136462848.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}