hf-100's picture
Upload folder using huggingface_hub
f0b767e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.11943863839952225,
"eval_steps": 100,
"global_step": 1200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0009953219866626853,
"grad_norm": 1.912980556488037,
"learning_rate": 9.995023390066686e-06,
"loss": 1.8703,
"step": 10
},
{
"epoch": 0.0019906439733253707,
"grad_norm": 1.866821050643921,
"learning_rate": 9.990046780133374e-06,
"loss": 1.8723,
"step": 20
},
{
"epoch": 0.002985965959988056,
"grad_norm": 2.058809280395508,
"learning_rate": 9.985070170200061e-06,
"loss": 1.8097,
"step": 30
},
{
"epoch": 0.003981287946650741,
"grad_norm": 1.459013819694519,
"learning_rate": 9.980093560266747e-06,
"loss": 1.7456,
"step": 40
},
{
"epoch": 0.004976609933313427,
"grad_norm": 0.9095586538314819,
"learning_rate": 9.975116950333434e-06,
"loss": 1.7195,
"step": 50
},
{
"epoch": 0.005971931919976112,
"grad_norm": 1.1065226793289185,
"learning_rate": 9.970140340400121e-06,
"loss": 1.6502,
"step": 60
},
{
"epoch": 0.0069672539066387975,
"grad_norm": 0.8301252126693726,
"learning_rate": 9.965163730466807e-06,
"loss": 1.5699,
"step": 70
},
{
"epoch": 0.007962575893301483,
"grad_norm": 1.0762828588485718,
"learning_rate": 9.960187120533493e-06,
"loss": 1.5072,
"step": 80
},
{
"epoch": 0.008957897879964169,
"grad_norm": 1.0814900398254395,
"learning_rate": 9.95521051060018e-06,
"loss": 1.4369,
"step": 90
},
{
"epoch": 0.009953219866626855,
"grad_norm": 1.3561326265335083,
"learning_rate": 9.950233900666867e-06,
"loss": 1.3467,
"step": 100
},
{
"epoch": 0.009953219866626855,
"eval_loss": 1.2846794128417969,
"eval_runtime": 147.6242,
"eval_samples_per_second": 1.375,
"eval_steps_per_second": 0.691,
"step": 100
},
{
"epoch": 0.010948541853289539,
"grad_norm": 1.438547968864441,
"learning_rate": 9.945257290733553e-06,
"loss": 1.2222,
"step": 110
},
{
"epoch": 0.011943863839952225,
"grad_norm": 1.402588963508606,
"learning_rate": 9.94028068080024e-06,
"loss": 1.1001,
"step": 120
},
{
"epoch": 0.012939185826614909,
"grad_norm": 1.4357985258102417,
"learning_rate": 9.935304070866926e-06,
"loss": 0.9657,
"step": 130
},
{
"epoch": 0.013934507813277595,
"grad_norm": 2.137953042984009,
"learning_rate": 9.930327460933613e-06,
"loss": 0.8211,
"step": 140
},
{
"epoch": 0.014929829799940281,
"grad_norm": 1.374299168586731,
"learning_rate": 9.925350851000299e-06,
"loss": 0.7142,
"step": 150
},
{
"epoch": 0.015925151786602965,
"grad_norm": 1.1510456800460815,
"learning_rate": 9.920374241066986e-06,
"loss": 0.656,
"step": 160
},
{
"epoch": 0.01692047377326565,
"grad_norm": 1.0226788520812988,
"learning_rate": 9.915397631133673e-06,
"loss": 0.6212,
"step": 170
},
{
"epoch": 0.017915795759928337,
"grad_norm": 0.9365411400794983,
"learning_rate": 9.910421021200359e-06,
"loss": 0.6069,
"step": 180
},
{
"epoch": 0.018911117746591023,
"grad_norm": 0.6880003213882446,
"learning_rate": 9.905444411267046e-06,
"loss": 0.6128,
"step": 190
},
{
"epoch": 0.01990643973325371,
"grad_norm": 1.1190361976623535,
"learning_rate": 9.900467801333732e-06,
"loss": 0.5426,
"step": 200
},
{
"epoch": 0.01990643973325371,
"eval_loss": 0.5788590908050537,
"eval_runtime": 147.511,
"eval_samples_per_second": 1.376,
"eval_steps_per_second": 0.691,
"step": 200
},
{
"epoch": 0.02090176171991639,
"grad_norm": 1.184279441833496,
"learning_rate": 9.895491191400419e-06,
"loss": 0.5887,
"step": 210
},
{
"epoch": 0.021897083706579078,
"grad_norm": 0.7627615928649902,
"learning_rate": 9.890514581467106e-06,
"loss": 0.5433,
"step": 220
},
{
"epoch": 0.022892405693241764,
"grad_norm": 0.7858164310455322,
"learning_rate": 9.885537971533792e-06,
"loss": 0.5843,
"step": 230
},
{
"epoch": 0.02388772767990445,
"grad_norm": 0.695697009563446,
"learning_rate": 9.880561361600478e-06,
"loss": 0.5365,
"step": 240
},
{
"epoch": 0.024883049666567136,
"grad_norm": 0.8994197845458984,
"learning_rate": 9.875584751667165e-06,
"loss": 0.5662,
"step": 250
},
{
"epoch": 0.025878371653229818,
"grad_norm": 0.8016309142112732,
"learning_rate": 9.870608141733852e-06,
"loss": 0.5592,
"step": 260
},
{
"epoch": 0.026873693639892504,
"grad_norm": 0.8534384369850159,
"learning_rate": 9.865631531800538e-06,
"loss": 0.5248,
"step": 270
},
{
"epoch": 0.02786901562655519,
"grad_norm": 0.9857029914855957,
"learning_rate": 9.860654921867225e-06,
"loss": 0.5294,
"step": 280
},
{
"epoch": 0.028864337613217876,
"grad_norm": 0.7766090631484985,
"learning_rate": 9.855678311933912e-06,
"loss": 0.5198,
"step": 290
},
{
"epoch": 0.029859659599880562,
"grad_norm": 0.6832401752471924,
"learning_rate": 9.850701702000598e-06,
"loss": 0.5844,
"step": 300
},
{
"epoch": 0.029859659599880562,
"eval_loss": 0.536589503288269,
"eval_runtime": 147.4968,
"eval_samples_per_second": 1.376,
"eval_steps_per_second": 0.692,
"step": 300
},
{
"epoch": 0.030854981586543248,
"grad_norm": 0.7720848917961121,
"learning_rate": 9.845725092067284e-06,
"loss": 0.5365,
"step": 310
},
{
"epoch": 0.03185030357320593,
"grad_norm": 0.7022100687026978,
"learning_rate": 9.840748482133971e-06,
"loss": 0.4841,
"step": 320
},
{
"epoch": 0.03284562555986862,
"grad_norm": 1.0030310153961182,
"learning_rate": 9.835771872200658e-06,
"loss": 0.4635,
"step": 330
},
{
"epoch": 0.0338409475465313,
"grad_norm": 0.8628882765769958,
"learning_rate": 9.830795262267344e-06,
"loss": 0.4932,
"step": 340
},
{
"epoch": 0.034836269533193985,
"grad_norm": 0.7178316712379456,
"learning_rate": 9.825818652334031e-06,
"loss": 0.6057,
"step": 350
},
{
"epoch": 0.035831591519856675,
"grad_norm": 0.9564626216888428,
"learning_rate": 9.820842042400718e-06,
"loss": 0.5371,
"step": 360
},
{
"epoch": 0.03682691350651936,
"grad_norm": 0.7041760683059692,
"learning_rate": 9.815865432467404e-06,
"loss": 0.513,
"step": 370
},
{
"epoch": 0.037822235493182046,
"grad_norm": 1.0203750133514404,
"learning_rate": 9.81088882253409e-06,
"loss": 0.5118,
"step": 380
},
{
"epoch": 0.03881755747984473,
"grad_norm": 0.8765382170677185,
"learning_rate": 9.805912212600777e-06,
"loss": 0.4529,
"step": 390
},
{
"epoch": 0.03981287946650742,
"grad_norm": 0.9951983690261841,
"learning_rate": 9.800935602667464e-06,
"loss": 0.5336,
"step": 400
},
{
"epoch": 0.03981287946650742,
"eval_loss": 0.5151349306106567,
"eval_runtime": 147.6615,
"eval_samples_per_second": 1.375,
"eval_steps_per_second": 0.691,
"step": 400
},
{
"epoch": 0.0408082014531701,
"grad_norm": 0.7691435813903809,
"learning_rate": 9.79595899273415e-06,
"loss": 0.506,
"step": 410
},
{
"epoch": 0.04180352343983278,
"grad_norm": 1.1955533027648926,
"learning_rate": 9.790982382800837e-06,
"loss": 0.4692,
"step": 420
},
{
"epoch": 0.04279884542649547,
"grad_norm": 1.128085732460022,
"learning_rate": 9.786005772867525e-06,
"loss": 0.4608,
"step": 430
},
{
"epoch": 0.043794167413158155,
"grad_norm": 0.5518949627876282,
"learning_rate": 9.78102916293421e-06,
"loss": 0.5006,
"step": 440
},
{
"epoch": 0.044789489399820845,
"grad_norm": 0.7164484858512878,
"learning_rate": 9.776052553000896e-06,
"loss": 0.4996,
"step": 450
},
{
"epoch": 0.04578481138648353,
"grad_norm": 0.5959630012512207,
"learning_rate": 9.771075943067583e-06,
"loss": 0.4843,
"step": 460
},
{
"epoch": 0.04678013337314621,
"grad_norm": 0.743648111820221,
"learning_rate": 9.76609933313427e-06,
"loss": 0.4363,
"step": 470
},
{
"epoch": 0.0477754553598089,
"grad_norm": 0.8757079243659973,
"learning_rate": 9.761122723200956e-06,
"loss": 0.4665,
"step": 480
},
{
"epoch": 0.04877077734647158,
"grad_norm": 1.0122153759002686,
"learning_rate": 9.756146113267643e-06,
"loss": 0.492,
"step": 490
},
{
"epoch": 0.04976609933313427,
"grad_norm": 0.6179729700088501,
"learning_rate": 9.751169503334329e-06,
"loss": 0.5022,
"step": 500
},
{
"epoch": 0.04976609933313427,
"eval_loss": 0.4993921220302582,
"eval_runtime": 147.7401,
"eval_samples_per_second": 1.374,
"eval_steps_per_second": 0.69,
"step": 500
},
{
"epoch": 0.050761421319796954,
"grad_norm": 0.952812671661377,
"learning_rate": 9.746192893401016e-06,
"loss": 0.4901,
"step": 510
},
{
"epoch": 0.051756743306459636,
"grad_norm": 0.6715916991233826,
"learning_rate": 9.741216283467702e-06,
"loss": 0.5055,
"step": 520
},
{
"epoch": 0.052752065293122326,
"grad_norm": 0.674640953540802,
"learning_rate": 9.736239673534389e-06,
"loss": 0.4874,
"step": 530
},
{
"epoch": 0.05374738727978501,
"grad_norm": 0.7867962718009949,
"learning_rate": 9.731263063601075e-06,
"loss": 0.4956,
"step": 540
},
{
"epoch": 0.0547427092664477,
"grad_norm": 0.9035332202911377,
"learning_rate": 9.726286453667762e-06,
"loss": 0.499,
"step": 550
},
{
"epoch": 0.05573803125311038,
"grad_norm": 0.7009295225143433,
"learning_rate": 9.72130984373445e-06,
"loss": 0.5034,
"step": 560
},
{
"epoch": 0.05673335323977307,
"grad_norm": 0.7018862366676331,
"learning_rate": 9.716333233801135e-06,
"loss": 0.5137,
"step": 570
},
{
"epoch": 0.05772867522643575,
"grad_norm": 0.7812825441360474,
"learning_rate": 9.711356623867822e-06,
"loss": 0.4724,
"step": 580
},
{
"epoch": 0.058723997213098435,
"grad_norm": 0.6245225071907043,
"learning_rate": 9.70638001393451e-06,
"loss": 0.4446,
"step": 590
},
{
"epoch": 0.059719319199761124,
"grad_norm": 0.9083976149559021,
"learning_rate": 9.701403404001195e-06,
"loss": 0.4884,
"step": 600
},
{
"epoch": 0.059719319199761124,
"eval_loss": 0.4891846477985382,
"eval_runtime": 147.5284,
"eval_samples_per_second": 1.376,
"eval_steps_per_second": 0.691,
"step": 600
},
{
"epoch": 0.06071464118642381,
"grad_norm": 0.6195352673530579,
"learning_rate": 9.69642679406788e-06,
"loss": 0.5121,
"step": 610
},
{
"epoch": 0.061709963173086496,
"grad_norm": 0.8068727254867554,
"learning_rate": 9.691450184134568e-06,
"loss": 0.4689,
"step": 620
},
{
"epoch": 0.06270528515974919,
"grad_norm": 1.0427749156951904,
"learning_rate": 9.686473574201255e-06,
"loss": 0.4968,
"step": 630
},
{
"epoch": 0.06370060714641186,
"grad_norm": 0.698349118232727,
"learning_rate": 9.681496964267941e-06,
"loss": 0.4691,
"step": 640
},
{
"epoch": 0.06469592913307455,
"grad_norm": 0.9104384183883667,
"learning_rate": 9.676520354334628e-06,
"loss": 0.4775,
"step": 650
},
{
"epoch": 0.06569125111973724,
"grad_norm": 0.8729726076126099,
"learning_rate": 9.671543744401316e-06,
"loss": 0.5201,
"step": 660
},
{
"epoch": 0.06668657310639992,
"grad_norm": 0.9858236908912659,
"learning_rate": 9.666567134468001e-06,
"loss": 0.4268,
"step": 670
},
{
"epoch": 0.0676818950930626,
"grad_norm": 2.322754383087158,
"learning_rate": 9.661590524534687e-06,
"loss": 0.4744,
"step": 680
},
{
"epoch": 0.0686772170797253,
"grad_norm": 0.9327623248100281,
"learning_rate": 9.656613914601374e-06,
"loss": 0.4355,
"step": 690
},
{
"epoch": 0.06967253906638797,
"grad_norm": 0.6949413418769836,
"learning_rate": 9.651637304668062e-06,
"loss": 0.465,
"step": 700
},
{
"epoch": 0.06967253906638797,
"eval_loss": 0.4817120432853699,
"eval_runtime": 147.5643,
"eval_samples_per_second": 1.376,
"eval_steps_per_second": 0.691,
"step": 700
},
{
"epoch": 0.07066786105305066,
"grad_norm": 0.5208165049552917,
"learning_rate": 9.646660694734747e-06,
"loss": 0.4973,
"step": 710
},
{
"epoch": 0.07166318303971335,
"grad_norm": 0.8434884548187256,
"learning_rate": 9.641684084801434e-06,
"loss": 0.4721,
"step": 720
},
{
"epoch": 0.07265850502637604,
"grad_norm": 0.7161769866943359,
"learning_rate": 9.636707474868122e-06,
"loss": 0.498,
"step": 730
},
{
"epoch": 0.07365382701303871,
"grad_norm": 0.7036088705062866,
"learning_rate": 9.631730864934807e-06,
"loss": 0.4672,
"step": 740
},
{
"epoch": 0.0746491489997014,
"grad_norm": 0.9175013899803162,
"learning_rate": 9.626754255001493e-06,
"loss": 0.4781,
"step": 750
},
{
"epoch": 0.07564447098636409,
"grad_norm": 0.678519606590271,
"learning_rate": 9.62177764506818e-06,
"loss": 0.4048,
"step": 760
},
{
"epoch": 0.07663979297302677,
"grad_norm": 0.6295528411865234,
"learning_rate": 9.616801035134868e-06,
"loss": 0.449,
"step": 770
},
{
"epoch": 0.07763511495968946,
"grad_norm": 0.5424385666847229,
"learning_rate": 9.611824425201553e-06,
"loss": 0.4394,
"step": 780
},
{
"epoch": 0.07863043694635215,
"grad_norm": 0.508836030960083,
"learning_rate": 9.60684781526824e-06,
"loss": 0.4317,
"step": 790
},
{
"epoch": 0.07962575893301484,
"grad_norm": 0.6004147529602051,
"learning_rate": 9.601871205334926e-06,
"loss": 0.4308,
"step": 800
},
{
"epoch": 0.07962575893301484,
"eval_loss": 0.47557342052459717,
"eval_runtime": 147.5812,
"eval_samples_per_second": 1.376,
"eval_steps_per_second": 0.691,
"step": 800
},
{
"epoch": 0.08062108091967751,
"grad_norm": 0.5553786754608154,
"learning_rate": 9.596894595401613e-06,
"loss": 0.4376,
"step": 810
},
{
"epoch": 0.0816164029063402,
"grad_norm": 0.7254445552825928,
"learning_rate": 9.591917985468299e-06,
"loss": 0.4884,
"step": 820
},
{
"epoch": 0.08261172489300289,
"grad_norm": 0.7175013422966003,
"learning_rate": 9.586941375534986e-06,
"loss": 0.4167,
"step": 830
},
{
"epoch": 0.08360704687966557,
"grad_norm": 0.6464620232582092,
"learning_rate": 9.581964765601674e-06,
"loss": 0.4622,
"step": 840
},
{
"epoch": 0.08460236886632826,
"grad_norm": 0.6999176144599915,
"learning_rate": 9.57698815566836e-06,
"loss": 0.4708,
"step": 850
},
{
"epoch": 0.08559769085299095,
"grad_norm": 0.7939727306365967,
"learning_rate": 9.572011545735047e-06,
"loss": 0.4633,
"step": 860
},
{
"epoch": 0.08659301283965362,
"grad_norm": 0.473017156124115,
"learning_rate": 9.567034935801732e-06,
"loss": 0.4585,
"step": 870
},
{
"epoch": 0.08758833482631631,
"grad_norm": 0.7265183329582214,
"learning_rate": 9.56205832586842e-06,
"loss": 0.4485,
"step": 880
},
{
"epoch": 0.088583656812979,
"grad_norm": 0.539735734462738,
"learning_rate": 9.557081715935105e-06,
"loss": 0.475,
"step": 890
},
{
"epoch": 0.08957897879964169,
"grad_norm": 0.7587076425552368,
"learning_rate": 9.552105106001792e-06,
"loss": 0.4347,
"step": 900
},
{
"epoch": 0.08957897879964169,
"eval_loss": 0.4690374732017517,
"eval_runtime": 147.5672,
"eval_samples_per_second": 1.376,
"eval_steps_per_second": 0.691,
"step": 900
},
{
"epoch": 0.09057430078630437,
"grad_norm": 0.7549741864204407,
"learning_rate": 9.547128496068478e-06,
"loss": 0.4434,
"step": 910
},
{
"epoch": 0.09156962277296705,
"grad_norm": 0.686689555644989,
"learning_rate": 9.542151886135165e-06,
"loss": 0.4052,
"step": 920
},
{
"epoch": 0.09256494475962974,
"grad_norm": 1.02870512008667,
"learning_rate": 9.537175276201853e-06,
"loss": 0.4806,
"step": 930
},
{
"epoch": 0.09356026674629242,
"grad_norm": 0.7680675983428955,
"learning_rate": 9.532198666268538e-06,
"loss": 0.4609,
"step": 940
},
{
"epoch": 0.09455558873295511,
"grad_norm": 0.5478435754776001,
"learning_rate": 9.527222056335224e-06,
"loss": 0.4171,
"step": 950
},
{
"epoch": 0.0955509107196178,
"grad_norm": 0.5974985361099243,
"learning_rate": 9.522245446401913e-06,
"loss": 0.4686,
"step": 960
},
{
"epoch": 0.09654623270628049,
"grad_norm": 0.997151792049408,
"learning_rate": 9.517268836468598e-06,
"loss": 0.4676,
"step": 970
},
{
"epoch": 0.09754155469294316,
"grad_norm": 0.6366075277328491,
"learning_rate": 9.512292226535284e-06,
"loss": 0.4467,
"step": 980
},
{
"epoch": 0.09853687667960585,
"grad_norm": 0.5682553052902222,
"learning_rate": 9.507315616601971e-06,
"loss": 0.4772,
"step": 990
},
{
"epoch": 0.09953219866626854,
"grad_norm": 0.5869882106781006,
"learning_rate": 9.502339006668659e-06,
"loss": 0.3976,
"step": 1000
},
{
"epoch": 0.09953219866626854,
"eval_loss": 0.46156319975852966,
"eval_runtime": 147.6656,
"eval_samples_per_second": 1.375,
"eval_steps_per_second": 0.691,
"step": 1000
},
{
"epoch": 0.10052752065293122,
"grad_norm": 0.5758237838745117,
"learning_rate": 9.497362396735344e-06,
"loss": 0.4528,
"step": 1010
},
{
"epoch": 0.10152284263959391,
"grad_norm": 0.700281023979187,
"learning_rate": 9.492385786802032e-06,
"loss": 0.4545,
"step": 1020
},
{
"epoch": 0.1025181646262566,
"grad_norm": 1.1320914030075073,
"learning_rate": 9.487409176868719e-06,
"loss": 0.4331,
"step": 1030
},
{
"epoch": 0.10351348661291927,
"grad_norm": 0.6469867825508118,
"learning_rate": 9.482432566935405e-06,
"loss": 0.3759,
"step": 1040
},
{
"epoch": 0.10450880859958196,
"grad_norm": 0.9471383094787598,
"learning_rate": 9.47745595700209e-06,
"loss": 0.4041,
"step": 1050
},
{
"epoch": 0.10550413058624465,
"grad_norm": 0.5729160904884338,
"learning_rate": 9.472479347068777e-06,
"loss": 0.4871,
"step": 1060
},
{
"epoch": 0.10649945257290734,
"grad_norm": 0.642436683177948,
"learning_rate": 9.467502737135465e-06,
"loss": 0.3893,
"step": 1070
},
{
"epoch": 0.10749477455957002,
"grad_norm": 0.95659339427948,
"learning_rate": 9.46252612720215e-06,
"loss": 0.4486,
"step": 1080
},
{
"epoch": 0.1084900965462327,
"grad_norm": 0.6642667055130005,
"learning_rate": 9.457549517268838e-06,
"loss": 0.5168,
"step": 1090
},
{
"epoch": 0.1094854185328954,
"grad_norm": 0.5805796980857849,
"learning_rate": 9.452572907335525e-06,
"loss": 0.4019,
"step": 1100
},
{
"epoch": 0.1094854185328954,
"eval_loss": 0.4559178054332733,
"eval_runtime": 147.5891,
"eval_samples_per_second": 1.375,
"eval_steps_per_second": 0.691,
"step": 1100
},
{
"epoch": 0.11048074051955807,
"grad_norm": 0.7006909251213074,
"learning_rate": 9.44759629740221e-06,
"loss": 0.457,
"step": 1110
},
{
"epoch": 0.11147606250622076,
"grad_norm": 1.1821540594100952,
"learning_rate": 9.442619687468896e-06,
"loss": 0.3484,
"step": 1120
},
{
"epoch": 0.11247138449288345,
"grad_norm": 0.7232743501663208,
"learning_rate": 9.437643077535584e-06,
"loss": 0.417,
"step": 1130
},
{
"epoch": 0.11346670647954614,
"grad_norm": 0.6104183197021484,
"learning_rate": 9.43266646760227e-06,
"loss": 0.4821,
"step": 1140
},
{
"epoch": 0.11446202846620881,
"grad_norm": 0.5961386561393738,
"learning_rate": 9.427689857668956e-06,
"loss": 0.4834,
"step": 1150
},
{
"epoch": 0.1154573504528715,
"grad_norm": 0.5530894994735718,
"learning_rate": 9.422713247735644e-06,
"loss": 0.443,
"step": 1160
},
{
"epoch": 0.1164526724395342,
"grad_norm": 0.5148622393608093,
"learning_rate": 9.41773663780233e-06,
"loss": 0.4029,
"step": 1170
},
{
"epoch": 0.11744799442619687,
"grad_norm": 0.6148583292961121,
"learning_rate": 9.412760027869017e-06,
"loss": 0.4308,
"step": 1180
},
{
"epoch": 0.11844331641285956,
"grad_norm": 0.7840449213981628,
"learning_rate": 9.407783417935702e-06,
"loss": 0.499,
"step": 1190
},
{
"epoch": 0.11943863839952225,
"grad_norm": 0.6757422089576721,
"learning_rate": 9.40280680800239e-06,
"loss": 0.4263,
"step": 1200
},
{
"epoch": 0.11943863839952225,
"eval_loss": 0.4505193829536438,
"eval_runtime": 147.6664,
"eval_samples_per_second": 1.375,
"eval_steps_per_second": 0.691,
"step": 1200
}
],
"logging_steps": 10,
"max_steps": 20094,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.756627798568276e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}