MarkKisker's picture
Training in progress, epoch 1
9ec2f44 verified
raw
history blame
17.4 kB
{
"best_metric": 0.7780232429504395,
"best_model_checkpoint": "MarkKisker/RoBERTa-base-RottenTomatoes_v2\\checkpoint-1067",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1067,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"grad_norm": 0.03423001989722252,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0018,
"step": 10
},
{
"epoch": 0.02,
"grad_norm": 0.04875793680548668,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0014,
"step": 20
},
{
"epoch": 0.03,
"grad_norm": 20.525543212890625,
"learning_rate": 3e-06,
"loss": 0.1788,
"step": 30
},
{
"epoch": 0.04,
"grad_norm": 0.02198684774339199,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0014,
"step": 40
},
{
"epoch": 0.05,
"grad_norm": 0.09893256425857544,
"learning_rate": 5e-06,
"loss": 0.1576,
"step": 50
},
{
"epoch": 0.06,
"grad_norm": 0.04567793011665344,
"learning_rate": 6e-06,
"loss": 0.1448,
"step": 60
},
{
"epoch": 0.07,
"grad_norm": 0.7623605728149414,
"learning_rate": 7.000000000000001e-06,
"loss": 0.0016,
"step": 70
},
{
"epoch": 0.07,
"grad_norm": 0.023505745455622673,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0017,
"step": 80
},
{
"epoch": 0.08,
"grad_norm": 0.022804420441389084,
"learning_rate": 9e-06,
"loss": 0.1304,
"step": 90
},
{
"epoch": 0.09,
"grad_norm": 0.03228422999382019,
"learning_rate": 1e-05,
"loss": 0.1446,
"step": 100
},
{
"epoch": 0.1,
"grad_norm": 0.23935572803020477,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.0425,
"step": 110
},
{
"epoch": 0.11,
"grad_norm": 7.51793098449707,
"learning_rate": 1.2e-05,
"loss": 0.0024,
"step": 120
},
{
"epoch": 0.12,
"grad_norm": 0.023124126717448235,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.0228,
"step": 130
},
{
"epoch": 0.13,
"grad_norm": 17.817167282104492,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.0753,
"step": 140
},
{
"epoch": 0.14,
"grad_norm": 0.015741823241114616,
"learning_rate": 1.5e-05,
"loss": 0.1112,
"step": 150
},
{
"epoch": 0.15,
"grad_norm": 0.26574474573135376,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0092,
"step": 160
},
{
"epoch": 0.16,
"grad_norm": 4.9772748947143555,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.2384,
"step": 170
},
{
"epoch": 0.17,
"grad_norm": 0.061539579182863235,
"learning_rate": 1.8e-05,
"loss": 0.0012,
"step": 180
},
{
"epoch": 0.18,
"grad_norm": 0.08193587511777878,
"learning_rate": 1.9e-05,
"loss": 0.0812,
"step": 190
},
{
"epoch": 0.19,
"grad_norm": 0.023329803720116615,
"learning_rate": 2e-05,
"loss": 0.1662,
"step": 200
},
{
"epoch": 0.2,
"grad_norm": 0.031778186559677124,
"learning_rate": 2.1e-05,
"loss": 0.0013,
"step": 210
},
{
"epoch": 0.21,
"grad_norm": 0.023606792092323303,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.0547,
"step": 220
},
{
"epoch": 0.22,
"grad_norm": 0.019992610439658165,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.0011,
"step": 230
},
{
"epoch": 0.22,
"grad_norm": 0.09242820739746094,
"learning_rate": 2.4e-05,
"loss": 0.0012,
"step": 240
},
{
"epoch": 0.23,
"grad_norm": 0.01790749281644821,
"learning_rate": 2.5e-05,
"loss": 0.0803,
"step": 250
},
{
"epoch": 0.24,
"grad_norm": 0.013409961014986038,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.0633,
"step": 260
},
{
"epoch": 0.25,
"grad_norm": 0.045994311571121216,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.1399,
"step": 270
},
{
"epoch": 0.26,
"grad_norm": 0.030622974038124084,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.0015,
"step": 280
},
{
"epoch": 0.27,
"grad_norm": 0.052308339625597,
"learning_rate": 2.9e-05,
"loss": 0.0852,
"step": 290
},
{
"epoch": 0.28,
"grad_norm": 0.09171419590711594,
"learning_rate": 3e-05,
"loss": 0.0022,
"step": 300
},
{
"epoch": 0.29,
"grad_norm": 0.024435508996248245,
"learning_rate": 3.1e-05,
"loss": 0.0859,
"step": 310
},
{
"epoch": 0.3,
"grad_norm": 330.25262451171875,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.0497,
"step": 320
},
{
"epoch": 0.31,
"grad_norm": 0.021127384155988693,
"learning_rate": 3.3e-05,
"loss": 0.0818,
"step": 330
},
{
"epoch": 0.32,
"grad_norm": 0.030799318104982376,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.0009,
"step": 340
},
{
"epoch": 0.33,
"grad_norm": 0.0609976127743721,
"learning_rate": 3.5e-05,
"loss": 0.0016,
"step": 350
},
{
"epoch": 0.34,
"grad_norm": 0.013851546682417393,
"learning_rate": 3.6e-05,
"loss": 0.2128,
"step": 360
},
{
"epoch": 0.35,
"grad_norm": 0.012113348580896854,
"learning_rate": 3.7e-05,
"loss": 0.119,
"step": 370
},
{
"epoch": 0.36,
"grad_norm": 0.0172914806753397,
"learning_rate": 3.8e-05,
"loss": 0.2769,
"step": 380
},
{
"epoch": 0.37,
"grad_norm": 0.047122351825237274,
"learning_rate": 3.9000000000000006e-05,
"loss": 0.0015,
"step": 390
},
{
"epoch": 0.37,
"grad_norm": 0.029489964246749878,
"learning_rate": 4e-05,
"loss": 0.0019,
"step": 400
},
{
"epoch": 0.38,
"grad_norm": 0.02585042454302311,
"learning_rate": 4.1e-05,
"loss": 0.0759,
"step": 410
},
{
"epoch": 0.39,
"grad_norm": 0.044562604278326035,
"learning_rate": 4.2e-05,
"loss": 0.0025,
"step": 420
},
{
"epoch": 0.4,
"grad_norm": 0.14535053074359894,
"learning_rate": 4.3e-05,
"loss": 0.0856,
"step": 430
},
{
"epoch": 0.41,
"grad_norm": 14.468594551086426,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.1576,
"step": 440
},
{
"epoch": 0.42,
"grad_norm": 3.1556191444396973,
"learning_rate": 4.5e-05,
"loss": 0.1671,
"step": 450
},
{
"epoch": 0.43,
"grad_norm": 3.5952117443084717,
"learning_rate": 4.600000000000001e-05,
"loss": 0.1422,
"step": 460
},
{
"epoch": 0.44,
"grad_norm": 0.10417389869689941,
"learning_rate": 4.7e-05,
"loss": 0.0705,
"step": 470
},
{
"epoch": 0.45,
"grad_norm": 24.245695114135742,
"learning_rate": 4.8e-05,
"loss": 0.0586,
"step": 480
},
{
"epoch": 0.46,
"grad_norm": 0.6337321400642395,
"learning_rate": 4.9e-05,
"loss": 0.1252,
"step": 490
},
{
"epoch": 0.47,
"grad_norm": 62.256656646728516,
"learning_rate": 5e-05,
"loss": 0.1297,
"step": 500
},
{
"epoch": 0.48,
"grad_norm": 0.2806699573993683,
"learning_rate": 4.9896587383660806e-05,
"loss": 0.1789,
"step": 510
},
{
"epoch": 0.49,
"grad_norm": 0.05291756987571716,
"learning_rate": 4.9793174767321616e-05,
"loss": 0.1434,
"step": 520
},
{
"epoch": 0.5,
"grad_norm": 0.12910176813602448,
"learning_rate": 4.968976215098242e-05,
"loss": 0.2909,
"step": 530
},
{
"epoch": 0.51,
"grad_norm": 9.672201156616211,
"learning_rate": 4.958634953464323e-05,
"loss": 0.178,
"step": 540
},
{
"epoch": 0.52,
"grad_norm": 0.1485089659690857,
"learning_rate": 4.948293691830403e-05,
"loss": 0.0731,
"step": 550
},
{
"epoch": 0.52,
"grad_norm": 0.11018037796020508,
"learning_rate": 4.937952430196484e-05,
"loss": 0.1337,
"step": 560
},
{
"epoch": 0.53,
"grad_norm": 0.047798193991184235,
"learning_rate": 4.9276111685625646e-05,
"loss": 0.0028,
"step": 570
},
{
"epoch": 0.54,
"grad_norm": 3.2003190517425537,
"learning_rate": 4.9172699069286456e-05,
"loss": 0.2401,
"step": 580
},
{
"epoch": 0.55,
"grad_norm": 0.033508703112602234,
"learning_rate": 4.906928645294726e-05,
"loss": 0.057,
"step": 590
},
{
"epoch": 0.56,
"grad_norm": 0.07519116997718811,
"learning_rate": 4.896587383660807e-05,
"loss": 0.1506,
"step": 600
},
{
"epoch": 0.57,
"grad_norm": 0.10883668810129166,
"learning_rate": 4.886246122026887e-05,
"loss": 0.0799,
"step": 610
},
{
"epoch": 0.58,
"grad_norm": 3.15360689163208,
"learning_rate": 4.8759048603929683e-05,
"loss": 0.0904,
"step": 620
},
{
"epoch": 0.59,
"grad_norm": 2.191103219985962,
"learning_rate": 4.865563598759049e-05,
"loss": 0.0639,
"step": 630
},
{
"epoch": 0.6,
"grad_norm": 0.05860808119177818,
"learning_rate": 4.855222337125129e-05,
"loss": 0.2293,
"step": 640
},
{
"epoch": 0.61,
"grad_norm": 180.98397827148438,
"learning_rate": 4.84488107549121e-05,
"loss": 0.1808,
"step": 650
},
{
"epoch": 0.62,
"grad_norm": 127.35352325439453,
"learning_rate": 4.8345398138572904e-05,
"loss": 0.1102,
"step": 660
},
{
"epoch": 0.63,
"grad_norm": 0.03393542766571045,
"learning_rate": 4.8241985522233714e-05,
"loss": 0.2701,
"step": 670
},
{
"epoch": 0.64,
"grad_norm": 0.3284960985183716,
"learning_rate": 4.813857290589452e-05,
"loss": 0.0668,
"step": 680
},
{
"epoch": 0.65,
"grad_norm": 0.05796672776341438,
"learning_rate": 4.803516028955533e-05,
"loss": 0.1625,
"step": 690
},
{
"epoch": 0.66,
"grad_norm": 0.607434093952179,
"learning_rate": 4.793174767321613e-05,
"loss": 0.3021,
"step": 700
},
{
"epoch": 0.67,
"grad_norm": 0.1597072184085846,
"learning_rate": 4.782833505687694e-05,
"loss": 0.1627,
"step": 710
},
{
"epoch": 0.67,
"grad_norm": 0.0897730141878128,
"learning_rate": 4.772492244053775e-05,
"loss": 0.1379,
"step": 720
},
{
"epoch": 0.68,
"grad_norm": 70.59358978271484,
"learning_rate": 4.7621509824198554e-05,
"loss": 0.571,
"step": 730
},
{
"epoch": 0.69,
"grad_norm": 7.284711837768555,
"learning_rate": 4.7518097207859365e-05,
"loss": 0.4225,
"step": 740
},
{
"epoch": 0.7,
"grad_norm": 1.0496598482131958,
"learning_rate": 4.741468459152017e-05,
"loss": 0.0981,
"step": 750
},
{
"epoch": 0.71,
"grad_norm": 0.06985878944396973,
"learning_rate": 4.731127197518098e-05,
"loss": 0.1593,
"step": 760
},
{
"epoch": 0.72,
"grad_norm": 3.0135066509246826,
"learning_rate": 4.720785935884178e-05,
"loss": 0.3379,
"step": 770
},
{
"epoch": 0.73,
"grad_norm": 0.7335708141326904,
"learning_rate": 4.710444674250259e-05,
"loss": 0.4362,
"step": 780
},
{
"epoch": 0.74,
"grad_norm": 47.25736618041992,
"learning_rate": 4.7001034126163395e-05,
"loss": 0.3113,
"step": 790
},
{
"epoch": 0.75,
"grad_norm": 0.23570404946804047,
"learning_rate": 4.6897621509824205e-05,
"loss": 0.2591,
"step": 800
},
{
"epoch": 0.76,
"grad_norm": 1.6712744235992432,
"learning_rate": 4.679420889348501e-05,
"loss": 0.3156,
"step": 810
},
{
"epoch": 0.77,
"grad_norm": 3.803595781326294,
"learning_rate": 4.669079627714581e-05,
"loss": 0.2374,
"step": 820
},
{
"epoch": 0.78,
"grad_norm": 0.053344208747148514,
"learning_rate": 4.658738366080662e-05,
"loss": 0.0993,
"step": 830
},
{
"epoch": 0.79,
"grad_norm": 0.0863012745976448,
"learning_rate": 4.6483971044467425e-05,
"loss": 0.2164,
"step": 840
},
{
"epoch": 0.8,
"grad_norm": 50.73778533935547,
"learning_rate": 4.6380558428128236e-05,
"loss": 0.7427,
"step": 850
},
{
"epoch": 0.81,
"grad_norm": 1.3867453336715698,
"learning_rate": 4.627714581178904e-05,
"loss": 0.4358,
"step": 860
},
{
"epoch": 0.82,
"grad_norm": 2.2237284183502197,
"learning_rate": 4.617373319544985e-05,
"loss": 0.1522,
"step": 870
},
{
"epoch": 0.82,
"grad_norm": 189.51943969726562,
"learning_rate": 4.607032057911065e-05,
"loss": 0.3018,
"step": 880
},
{
"epoch": 0.83,
"grad_norm": 76.69792938232422,
"learning_rate": 4.596690796277146e-05,
"loss": 0.4078,
"step": 890
},
{
"epoch": 0.84,
"grad_norm": 11.872729301452637,
"learning_rate": 4.5863495346432266e-05,
"loss": 0.3047,
"step": 900
},
{
"epoch": 0.85,
"grad_norm": 2.8548457622528076,
"learning_rate": 4.5760082730093076e-05,
"loss": 0.5353,
"step": 910
},
{
"epoch": 0.86,
"grad_norm": 1.8873672485351562,
"learning_rate": 4.565667011375388e-05,
"loss": 0.1907,
"step": 920
},
{
"epoch": 0.87,
"grad_norm": 0.31615641713142395,
"learning_rate": 4.555325749741469e-05,
"loss": 0.0634,
"step": 930
},
{
"epoch": 0.88,
"grad_norm": 44.98077392578125,
"learning_rate": 4.544984488107549e-05,
"loss": 0.2581,
"step": 940
},
{
"epoch": 0.89,
"grad_norm": 0.426419734954834,
"learning_rate": 4.5346432264736296e-05,
"loss": 0.4992,
"step": 950
},
{
"epoch": 0.9,
"grad_norm": 0.7562969923019409,
"learning_rate": 4.5243019648397106e-05,
"loss": 0.3383,
"step": 960
},
{
"epoch": 0.91,
"grad_norm": 0.20380929112434387,
"learning_rate": 4.513960703205791e-05,
"loss": 0.1892,
"step": 970
},
{
"epoch": 0.92,
"grad_norm": 2.884596586227417,
"learning_rate": 4.503619441571872e-05,
"loss": 0.3536,
"step": 980
},
{
"epoch": 0.93,
"grad_norm": 14.720122337341309,
"learning_rate": 4.493278179937952e-05,
"loss": 0.4856,
"step": 990
},
{
"epoch": 0.94,
"grad_norm": 0.2174040526151657,
"learning_rate": 4.4829369183040333e-05,
"loss": 0.1159,
"step": 1000
},
{
"epoch": 0.95,
"grad_norm": 0.5567955374717712,
"learning_rate": 4.472595656670114e-05,
"loss": 0.5414,
"step": 1010
},
{
"epoch": 0.96,
"grad_norm": 0.8856528997421265,
"learning_rate": 4.462254395036195e-05,
"loss": 0.2257,
"step": 1020
},
{
"epoch": 0.97,
"grad_norm": 0.36583012342453003,
"learning_rate": 4.451913133402275e-05,
"loss": 0.0835,
"step": 1030
},
{
"epoch": 0.97,
"grad_norm": 2.9916436672210693,
"learning_rate": 4.441571871768356e-05,
"loss": 0.2258,
"step": 1040
},
{
"epoch": 0.98,
"grad_norm": 0.16345125436782837,
"learning_rate": 4.4312306101344364e-05,
"loss": 0.2466,
"step": 1050
},
{
"epoch": 0.99,
"grad_norm": 0.1895245909690857,
"learning_rate": 4.420889348500517e-05,
"loss": 0.4954,
"step": 1060
},
{
"epoch": 1.0,
"eval_accuracy": 0.8030018761726079,
"eval_f1": 0.8080438756855576,
"eval_loss": 0.7780232429504395,
"eval_precision": 0.7864768683274022,
"eval_recall": 0.8308270676691729,
"eval_runtime": 1.4434,
"eval_samples_per_second": 369.265,
"eval_steps_per_second": 46.418,
"step": 1067
}
],
"logging_steps": 10,
"max_steps": 5335,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 350677703472000.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}