pegasus-samsum / checkpoint-7366 /trainer_state.json
chris-santiago's picture
Upload folder using huggingface_hub
ee17dd1 verified
{
"best_metric": 1.4270155429840088,
"best_model_checkpoint": "autotrain-7t7rk-gfqs1/checkpoint-7366",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 7366,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 17.693096160888672,
"learning_rate": 5.088195386702849e-06,
"loss": 3.139,
"step": 81
},
{
"epoch": 0.02,
"grad_norm": 22.15139389038086,
"learning_rate": 1.0515603799185889e-05,
"loss": 2.5999,
"step": 162
},
{
"epoch": 0.03,
"grad_norm": 14.272784233093262,
"learning_rate": 1.5943012211668928e-05,
"loss": 2.3391,
"step": 243
},
{
"epoch": 0.04,
"grad_norm": 17.81722640991211,
"learning_rate": 2.1438263229308007e-05,
"loss": 2.1259,
"step": 324
},
{
"epoch": 0.05,
"grad_norm": 9.983675956726074,
"learning_rate": 2.6933514246947083e-05,
"loss": 1.8274,
"step": 405
},
{
"epoch": 0.07,
"grad_norm": 10.129060745239258,
"learning_rate": 3.2428765264586165e-05,
"loss": 1.963,
"step": 486
},
{
"epoch": 0.08,
"grad_norm": 7.1694655418396,
"learning_rate": 3.792401628222524e-05,
"loss": 1.8109,
"step": 567
},
{
"epoch": 0.09,
"grad_norm": 11.632865905761719,
"learning_rate": 4.3419267299864316e-05,
"loss": 1.8193,
"step": 648
},
{
"epoch": 0.1,
"grad_norm": 42.26719665527344,
"learning_rate": 4.891451831750339e-05,
"loss": 1.7714,
"step": 729
},
{
"epoch": 0.11,
"grad_norm": 15.210801124572754,
"learning_rate": 4.950972997435511e-05,
"loss": 1.7169,
"step": 810
},
{
"epoch": 0.12,
"grad_norm": 25.737319946289062,
"learning_rate": 4.889877809624378e-05,
"loss": 1.7059,
"step": 891
},
{
"epoch": 0.13,
"grad_norm": 7.352960109710693,
"learning_rate": 4.828782621813245e-05,
"loss": 1.7726,
"step": 972
},
{
"epoch": 0.14,
"grad_norm": 20.277687072753906,
"learning_rate": 4.767687434002112e-05,
"loss": 1.6683,
"step": 1053
},
{
"epoch": 0.15,
"grad_norm": 11.7589750289917,
"learning_rate": 4.706592246190979e-05,
"loss": 1.73,
"step": 1134
},
{
"epoch": 0.16,
"grad_norm": 12.03089714050293,
"learning_rate": 4.6454970583798465e-05,
"loss": 1.6849,
"step": 1215
},
{
"epoch": 0.18,
"grad_norm": 7.4564056396484375,
"learning_rate": 4.5844018705687136e-05,
"loss": 1.6458,
"step": 1296
},
{
"epoch": 0.19,
"grad_norm": 16.240528106689453,
"learning_rate": 4.523306682757581e-05,
"loss": 1.6597,
"step": 1377
},
{
"epoch": 0.2,
"grad_norm": 8.320143699645996,
"learning_rate": 4.4629657565243625e-05,
"loss": 1.6376,
"step": 1458
},
{
"epoch": 0.21,
"grad_norm": 7.235590934753418,
"learning_rate": 4.40187056871323e-05,
"loss": 1.6217,
"step": 1539
},
{
"epoch": 0.22,
"grad_norm": 5.213665008544922,
"learning_rate": 4.340775380902097e-05,
"loss": 1.6757,
"step": 1620
},
{
"epoch": 0.23,
"grad_norm": 7.416939735412598,
"learning_rate": 4.279680193090964e-05,
"loss": 1.6158,
"step": 1701
},
{
"epoch": 0.24,
"grad_norm": 6.5677103996276855,
"learning_rate": 4.218585005279832e-05,
"loss": 1.589,
"step": 1782
},
{
"epoch": 0.25,
"grad_norm": 11.56302261352539,
"learning_rate": 4.157489817468698e-05,
"loss": 1.6241,
"step": 1863
},
{
"epoch": 0.26,
"grad_norm": 10.844891548156738,
"learning_rate": 4.096394629657566e-05,
"loss": 1.6057,
"step": 1944
},
{
"epoch": 0.27,
"grad_norm": 7.110713958740234,
"learning_rate": 4.0352994418464324e-05,
"loss": 1.6382,
"step": 2025
},
{
"epoch": 0.29,
"grad_norm": 10.134081840515137,
"learning_rate": 3.9742042540352995e-05,
"loss": 1.6126,
"step": 2106
},
{
"epoch": 0.3,
"grad_norm": 40.32931137084961,
"learning_rate": 3.9131090662241666e-05,
"loss": 1.5994,
"step": 2187
},
{
"epoch": 0.31,
"grad_norm": 15.259111404418945,
"learning_rate": 3.852013878413034e-05,
"loss": 1.6828,
"step": 2268
},
{
"epoch": 0.32,
"grad_norm": 8.93104362487793,
"learning_rate": 3.7909186906019015e-05,
"loss": 1.6281,
"step": 2349
},
{
"epoch": 0.33,
"grad_norm": 7.933257102966309,
"learning_rate": 3.729823502790768e-05,
"loss": 1.586,
"step": 2430
},
{
"epoch": 0.34,
"grad_norm": 7.876591205596924,
"learning_rate": 3.668728314979635e-05,
"loss": 1.7194,
"step": 2511
},
{
"epoch": 0.35,
"grad_norm": 10.981273651123047,
"learning_rate": 3.607633127168502e-05,
"loss": 1.7405,
"step": 2592
},
{
"epoch": 0.36,
"grad_norm": 5.999240398406982,
"learning_rate": 3.546537939357369e-05,
"loss": 1.6577,
"step": 2673
},
{
"epoch": 0.37,
"grad_norm": 6.238893985748291,
"learning_rate": 3.4854427515462364e-05,
"loss": 1.6111,
"step": 2754
},
{
"epoch": 0.38,
"grad_norm": 8.094114303588867,
"learning_rate": 3.4243475637351035e-05,
"loss": 1.5915,
"step": 2835
},
{
"epoch": 0.4,
"grad_norm": 6.022202014923096,
"learning_rate": 3.363252375923971e-05,
"loss": 1.6025,
"step": 2916
},
{
"epoch": 0.41,
"grad_norm": 12.310922622680664,
"learning_rate": 3.302157188112838e-05,
"loss": 1.5482,
"step": 2997
},
{
"epoch": 0.42,
"grad_norm": 5.6983256340026855,
"learning_rate": 3.241062000301705e-05,
"loss": 1.6938,
"step": 3078
},
{
"epoch": 0.43,
"grad_norm": 11.397314071655273,
"learning_rate": 3.179966812490572e-05,
"loss": 1.5931,
"step": 3159
},
{
"epoch": 0.44,
"grad_norm": 10.81894588470459,
"learning_rate": 3.118871624679439e-05,
"loss": 1.6076,
"step": 3240
},
{
"epoch": 0.45,
"grad_norm": 13.12124252319336,
"learning_rate": 3.0585306984462217e-05,
"loss": 1.6337,
"step": 3321
},
{
"epoch": 0.46,
"grad_norm": 10.692020416259766,
"learning_rate": 2.997435510635088e-05,
"loss": 1.5236,
"step": 3402
},
{
"epoch": 0.47,
"grad_norm": 8.20348834991455,
"learning_rate": 2.9363403228239556e-05,
"loss": 1.665,
"step": 3483
},
{
"epoch": 0.48,
"grad_norm": 9.92470932006836,
"learning_rate": 2.8752451350128223e-05,
"loss": 1.5857,
"step": 3564
},
{
"epoch": 0.49,
"grad_norm": 20.7886962890625,
"learning_rate": 2.8141499472016898e-05,
"loss": 1.5629,
"step": 3645
},
{
"epoch": 0.51,
"grad_norm": 16.494325637817383,
"learning_rate": 2.753054759390557e-05,
"loss": 1.6066,
"step": 3726
},
{
"epoch": 0.52,
"grad_norm": 6.577916145324707,
"learning_rate": 2.6919595715794237e-05,
"loss": 1.571,
"step": 3807
},
{
"epoch": 0.53,
"grad_norm": 7.186463356018066,
"learning_rate": 2.630864383768291e-05,
"loss": 1.5828,
"step": 3888
},
{
"epoch": 0.54,
"grad_norm": 12.30034065246582,
"learning_rate": 2.569769195957158e-05,
"loss": 1.6485,
"step": 3969
},
{
"epoch": 0.55,
"grad_norm": 8.444236755371094,
"learning_rate": 2.5086740081460254e-05,
"loss": 1.5417,
"step": 4050
},
{
"epoch": 0.56,
"grad_norm": 11.730199813842773,
"learning_rate": 2.447578820334892e-05,
"loss": 1.5634,
"step": 4131
},
{
"epoch": 0.57,
"grad_norm": 9.82486629486084,
"learning_rate": 2.3864836325237593e-05,
"loss": 1.6321,
"step": 4212
},
{
"epoch": 0.58,
"grad_norm": 9.35803508758545,
"learning_rate": 2.3253884447126264e-05,
"loss": 1.5536,
"step": 4293
},
{
"epoch": 0.59,
"grad_norm": 8.086362838745117,
"learning_rate": 2.2642932569014935e-05,
"loss": 1.6023,
"step": 4374
},
{
"epoch": 0.6,
"grad_norm": 12.126749038696289,
"learning_rate": 2.2031980690903606e-05,
"loss": 1.5755,
"step": 4455
},
{
"epoch": 0.62,
"grad_norm": 4.729997158050537,
"learning_rate": 2.1421028812792278e-05,
"loss": 1.6292,
"step": 4536
},
{
"epoch": 0.63,
"grad_norm": 5.383386135101318,
"learning_rate": 2.0810076934680945e-05,
"loss": 1.6119,
"step": 4617
},
{
"epoch": 0.64,
"grad_norm": 7.945587635040283,
"learning_rate": 2.019912505656962e-05,
"loss": 1.4745,
"step": 4698
},
{
"epoch": 0.65,
"grad_norm": 10.771860122680664,
"learning_rate": 1.958817317845829e-05,
"loss": 1.5889,
"step": 4779
},
{
"epoch": 0.66,
"grad_norm": 7.598873615264893,
"learning_rate": 1.8977221300346962e-05,
"loss": 1.5232,
"step": 4860
},
{
"epoch": 0.67,
"grad_norm": 9.717412948608398,
"learning_rate": 1.8366269422235634e-05,
"loss": 1.6095,
"step": 4941
},
{
"epoch": 0.68,
"grad_norm": 6.822420597076416,
"learning_rate": 1.77553175441243e-05,
"loss": 1.5692,
"step": 5022
},
{
"epoch": 0.69,
"grad_norm": 9.613419532775879,
"learning_rate": 1.7144365666012973e-05,
"loss": 1.5622,
"step": 5103
},
{
"epoch": 0.7,
"grad_norm": 8.61678409576416,
"learning_rate": 1.6533413787901644e-05,
"loss": 1.5005,
"step": 5184
},
{
"epoch": 0.71,
"grad_norm": 9.557565689086914,
"learning_rate": 1.5922461909790315e-05,
"loss": 1.5246,
"step": 5265
},
{
"epoch": 0.73,
"grad_norm": 6.297652721405029,
"learning_rate": 1.531151003167899e-05,
"loss": 1.5551,
"step": 5346
},
{
"epoch": 0.74,
"grad_norm": 7.161550521850586,
"learning_rate": 1.4700558153567659e-05,
"loss": 1.5132,
"step": 5427
},
{
"epoch": 0.75,
"grad_norm": 5.5995635986328125,
"learning_rate": 1.408960627545633e-05,
"loss": 1.5309,
"step": 5508
},
{
"epoch": 0.76,
"grad_norm": 7.648403644561768,
"learning_rate": 1.3478654397345e-05,
"loss": 1.5823,
"step": 5589
},
{
"epoch": 0.77,
"grad_norm": 12.299725532531738,
"learning_rate": 1.2867702519233671e-05,
"loss": 1.5483,
"step": 5670
},
{
"epoch": 0.78,
"grad_norm": 18.328304290771484,
"learning_rate": 1.2256750641122342e-05,
"loss": 1.5496,
"step": 5751
},
{
"epoch": 0.79,
"grad_norm": 7.374316692352295,
"learning_rate": 1.1645798763011012e-05,
"loss": 1.497,
"step": 5832
},
{
"epoch": 0.8,
"grad_norm": 6.491637706756592,
"learning_rate": 1.1034846884899685e-05,
"loss": 1.6182,
"step": 5913
},
{
"epoch": 0.81,
"grad_norm": 8.2967529296875,
"learning_rate": 1.0423895006788354e-05,
"loss": 1.5618,
"step": 5994
},
{
"epoch": 0.82,
"grad_norm": 8.186725616455078,
"learning_rate": 9.812943128677025e-06,
"loss": 1.5187,
"step": 6075
},
{
"epoch": 0.84,
"grad_norm": 14.838460922241211,
"learning_rate": 9.201991250565696e-06,
"loss": 1.5244,
"step": 6156
},
{
"epoch": 0.85,
"grad_norm": 17.547962188720703,
"learning_rate": 8.591039372454368e-06,
"loss": 1.5382,
"step": 6237
},
{
"epoch": 0.86,
"grad_norm": 10.257816314697266,
"learning_rate": 7.980087494343039e-06,
"loss": 1.5117,
"step": 6318
},
{
"epoch": 0.87,
"grad_norm": 8.583636283874512,
"learning_rate": 7.369135616231709e-06,
"loss": 1.5942,
"step": 6399
},
{
"epoch": 0.88,
"grad_norm": 10.24096393585205,
"learning_rate": 6.75818373812038e-06,
"loss": 1.4925,
"step": 6480
},
{
"epoch": 0.89,
"grad_norm": 71.38775634765625,
"learning_rate": 6.1472318600090515e-06,
"loss": 1.5429,
"step": 6561
},
{
"epoch": 0.9,
"grad_norm": 6.424124240875244,
"learning_rate": 5.536279981897723e-06,
"loss": 1.5851,
"step": 6642
},
{
"epoch": 0.91,
"grad_norm": 9.493167877197266,
"learning_rate": 4.925328103786393e-06,
"loss": 1.5089,
"step": 6723
},
{
"epoch": 0.92,
"grad_norm": 7.073258399963379,
"learning_rate": 4.314376225675064e-06,
"loss": 1.4964,
"step": 6804
},
{
"epoch": 0.93,
"grad_norm": 20.42180061340332,
"learning_rate": 3.703424347563735e-06,
"loss": 1.5215,
"step": 6885
},
{
"epoch": 0.95,
"grad_norm": 12.12634563446045,
"learning_rate": 3.0924724694524062e-06,
"loss": 1.551,
"step": 6966
},
{
"epoch": 0.96,
"grad_norm": 11.100313186645508,
"learning_rate": 2.481520591341077e-06,
"loss": 1.4721,
"step": 7047
},
{
"epoch": 0.97,
"grad_norm": 8.091376304626465,
"learning_rate": 1.8705687132297482e-06,
"loss": 1.4702,
"step": 7128
},
{
"epoch": 0.98,
"grad_norm": 7.810707092285156,
"learning_rate": 1.2596168351184192e-06,
"loss": 1.5667,
"step": 7209
},
{
"epoch": 0.99,
"grad_norm": 8.03673267364502,
"learning_rate": 6.486649570070901e-07,
"loss": 1.477,
"step": 7290
},
{
"epoch": 1.0,
"eval_gen_len": 35.9694,
"eval_loss": 1.4270155429840088,
"eval_rouge1": 46.4301,
"eval_rouge2": 23.4668,
"eval_rougeL": 37.0224,
"eval_rougeLsum": 42.8893,
"eval_runtime": 468.1997,
"eval_samples_per_second": 1.747,
"eval_steps_per_second": 0.438,
"step": 7366
}
],
"logging_steps": 81,
"max_steps": 7366,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 7678847620005888.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}