|
{ |
|
"best_metric": 0.7780232429504395, |
|
"best_model_checkpoint": "MarkKisker/RoBERTa-base-RottenTomatoes_v2\\checkpoint-1067", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1067, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.03423001989722252, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.0018, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.04875793680548668, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0014, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 20.525543212890625, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1788, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.02198684774339199, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0014, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.09893256425857544, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1576, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.04567793011665344, |
|
"learning_rate": 6e-06, |
|
"loss": 0.1448, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.7623605728149414, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.0016, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.023505745455622673, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0017, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.022804420441389084, |
|
"learning_rate": 9e-06, |
|
"loss": 0.1304, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.03228422999382019, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1446, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.23935572803020477, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.0425, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 7.51793098449707, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0024, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.023124126717448235, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.0228, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 17.817167282104492, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.0753, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.015741823241114616, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1112, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.26574474573135376, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0092, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.9772748947143555, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.2384, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.061539579182863235, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.0012, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.08193587511777878, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.0812, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.023329803720116615, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1662, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.031778186559677124, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.0013, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.023606792092323303, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.0547, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.019992610439658165, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.0011, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.09242820739746094, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.0012, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.01790749281644821, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0803, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.013409961014986038, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.0633, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.045994311571121216, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.1399, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.030622974038124084, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.0015, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.052308339625597, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.0852, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.09171419590711594, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0022, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.024435508996248245, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.0859, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 330.25262451171875, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.0497, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.021127384155988693, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.0818, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.030799318104982376, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.0009, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.0609976127743721, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0016, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.013851546682417393, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.2128, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.012113348580896854, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.119, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.0172914806753397, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.2769, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.047122351825237274, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.0015, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.029489964246749878, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0019, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.02585042454302311, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.0759, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.044562604278326035, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.0025, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.14535053074359894, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.0856, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 14.468594551086426, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.1576, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.1556191444396973, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1671, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.5952117443084717, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.1422, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.10417389869689941, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.0705, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 24.245695114135742, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.0586, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.6337321400642395, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.1252, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 62.256656646728516, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1297, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.2806699573993683, |
|
"learning_rate": 4.9896587383660806e-05, |
|
"loss": 0.1789, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.05291756987571716, |
|
"learning_rate": 4.9793174767321616e-05, |
|
"loss": 0.1434, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.12910176813602448, |
|
"learning_rate": 4.968976215098242e-05, |
|
"loss": 0.2909, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.672201156616211, |
|
"learning_rate": 4.958634953464323e-05, |
|
"loss": 0.178, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.1485089659690857, |
|
"learning_rate": 4.948293691830403e-05, |
|
"loss": 0.0731, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.11018037796020508, |
|
"learning_rate": 4.937952430196484e-05, |
|
"loss": 0.1337, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.047798193991184235, |
|
"learning_rate": 4.9276111685625646e-05, |
|
"loss": 0.0028, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.2003190517425537, |
|
"learning_rate": 4.9172699069286456e-05, |
|
"loss": 0.2401, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.033508703112602234, |
|
"learning_rate": 4.906928645294726e-05, |
|
"loss": 0.057, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.07519116997718811, |
|
"learning_rate": 4.896587383660807e-05, |
|
"loss": 0.1506, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.10883668810129166, |
|
"learning_rate": 4.886246122026887e-05, |
|
"loss": 0.0799, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 3.15360689163208, |
|
"learning_rate": 4.8759048603929683e-05, |
|
"loss": 0.0904, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.191103219985962, |
|
"learning_rate": 4.865563598759049e-05, |
|
"loss": 0.0639, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.05860808119177818, |
|
"learning_rate": 4.855222337125129e-05, |
|
"loss": 0.2293, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 180.98397827148438, |
|
"learning_rate": 4.84488107549121e-05, |
|
"loss": 0.1808, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 127.35352325439453, |
|
"learning_rate": 4.8345398138572904e-05, |
|
"loss": 0.1102, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.03393542766571045, |
|
"learning_rate": 4.8241985522233714e-05, |
|
"loss": 0.2701, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.3284960985183716, |
|
"learning_rate": 4.813857290589452e-05, |
|
"loss": 0.0668, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.05796672776341438, |
|
"learning_rate": 4.803516028955533e-05, |
|
"loss": 0.1625, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.607434093952179, |
|
"learning_rate": 4.793174767321613e-05, |
|
"loss": 0.3021, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.1597072184085846, |
|
"learning_rate": 4.782833505687694e-05, |
|
"loss": 0.1627, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.0897730141878128, |
|
"learning_rate": 4.772492244053775e-05, |
|
"loss": 0.1379, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 70.59358978271484, |
|
"learning_rate": 4.7621509824198554e-05, |
|
"loss": 0.571, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 7.284711837768555, |
|
"learning_rate": 4.7518097207859365e-05, |
|
"loss": 0.4225, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.0496598482131958, |
|
"learning_rate": 4.741468459152017e-05, |
|
"loss": 0.0981, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.06985878944396973, |
|
"learning_rate": 4.731127197518098e-05, |
|
"loss": 0.1593, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.0135066509246826, |
|
"learning_rate": 4.720785935884178e-05, |
|
"loss": 0.3379, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.7335708141326904, |
|
"learning_rate": 4.710444674250259e-05, |
|
"loss": 0.4362, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 47.25736618041992, |
|
"learning_rate": 4.7001034126163395e-05, |
|
"loss": 0.3113, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.23570404946804047, |
|
"learning_rate": 4.6897621509824205e-05, |
|
"loss": 0.2591, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.6712744235992432, |
|
"learning_rate": 4.679420889348501e-05, |
|
"loss": 0.3156, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 3.803595781326294, |
|
"learning_rate": 4.669079627714581e-05, |
|
"loss": 0.2374, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.053344208747148514, |
|
"learning_rate": 4.658738366080662e-05, |
|
"loss": 0.0993, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0863012745976448, |
|
"learning_rate": 4.6483971044467425e-05, |
|
"loss": 0.2164, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 50.73778533935547, |
|
"learning_rate": 4.6380558428128236e-05, |
|
"loss": 0.7427, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.3867453336715698, |
|
"learning_rate": 4.627714581178904e-05, |
|
"loss": 0.4358, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.2237284183502197, |
|
"learning_rate": 4.617373319544985e-05, |
|
"loss": 0.1522, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 189.51943969726562, |
|
"learning_rate": 4.607032057911065e-05, |
|
"loss": 0.3018, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 76.69792938232422, |
|
"learning_rate": 4.596690796277146e-05, |
|
"loss": 0.4078, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 11.872729301452637, |
|
"learning_rate": 4.5863495346432266e-05, |
|
"loss": 0.3047, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.8548457622528076, |
|
"learning_rate": 4.5760082730093076e-05, |
|
"loss": 0.5353, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.8873672485351562, |
|
"learning_rate": 4.565667011375388e-05, |
|
"loss": 0.1907, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.31615641713142395, |
|
"learning_rate": 4.555325749741469e-05, |
|
"loss": 0.0634, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 44.98077392578125, |
|
"learning_rate": 4.544984488107549e-05, |
|
"loss": 0.2581, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.426419734954834, |
|
"learning_rate": 4.5346432264736296e-05, |
|
"loss": 0.4992, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.7562969923019409, |
|
"learning_rate": 4.5243019648397106e-05, |
|
"loss": 0.3383, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.20380929112434387, |
|
"learning_rate": 4.513960703205791e-05, |
|
"loss": 0.1892, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.884596586227417, |
|
"learning_rate": 4.503619441571872e-05, |
|
"loss": 0.3536, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 14.720122337341309, |
|
"learning_rate": 4.493278179937952e-05, |
|
"loss": 0.4856, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.2174040526151657, |
|
"learning_rate": 4.4829369183040333e-05, |
|
"loss": 0.1159, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.5567955374717712, |
|
"learning_rate": 4.472595656670114e-05, |
|
"loss": 0.5414, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.8856528997421265, |
|
"learning_rate": 4.462254395036195e-05, |
|
"loss": 0.2257, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.36583012342453003, |
|
"learning_rate": 4.451913133402275e-05, |
|
"loss": 0.0835, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.9916436672210693, |
|
"learning_rate": 4.441571871768356e-05, |
|
"loss": 0.2258, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.16345125436782837, |
|
"learning_rate": 4.4312306101344364e-05, |
|
"loss": 0.2466, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.1895245909690857, |
|
"learning_rate": 4.420889348500517e-05, |
|
"loss": 0.4954, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8030018761726079, |
|
"eval_f1": 0.8080438756855576, |
|
"eval_loss": 0.7780232429504395, |
|
"eval_precision": 0.7864768683274022, |
|
"eval_recall": 0.8308270676691729, |
|
"eval_runtime": 1.4434, |
|
"eval_samples_per_second": 369.265, |
|
"eval_steps_per_second": 46.418, |
|
"step": 1067 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5335, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 350677703472000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|