llama-3-8b-instruct-windo-iter3 / trainer_state.json
jcmei's picture
End of training
0aa9ada verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 100,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0032,
"grad_norm": 301517.0427429078,
"learning_rate": 1.5625e-08,
"logits/chosen": -0.2715578079223633,
"logits/rejected": -0.42230841517448425,
"logps/chosen": -74.72806549072266,
"logps/rejected": -86.24398040771484,
"loss": 47111.2656,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.032,
"grad_norm": 262907.88134103786,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -0.464042067527771,
"logits/rejected": -0.4814835786819458,
"logps/chosen": -98.76150512695312,
"logps/rejected": -98.51900482177734,
"loss": 46480.3472,
"rewards/accuracies": 0.4444444477558136,
"rewards/chosen": 3.484352646410116e-06,
"rewards/margins": 4.8643836635164917e-05,
"rewards/rejected": -4.5159493311075494e-05,
"step": 10
},
{
"epoch": 0.064,
"grad_norm": 258146.1292254514,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.5679231882095337,
"logits/rejected": -0.5402768850326538,
"logps/chosen": -120.5081558227539,
"logps/rejected": -118.08524322509766,
"loss": 45353.8531,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.0015855863457545638,
"rewards/margins": -0.00022199496743269265,
"rewards/rejected": -0.001363591174595058,
"step": 20
},
{
"epoch": 0.096,
"grad_norm": 272342.29803302046,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -0.6670567393302917,
"logits/rejected": -0.6592522859573364,
"logps/chosen": -117.73258209228516,
"logps/rejected": -117.10823822021484,
"loss": 45740.5375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.0023494339548051357,
"rewards/margins": 0.0008368989219889045,
"rewards/rejected": -0.0031863327603787184,
"step": 30
},
{
"epoch": 0.128,
"grad_norm": 254349.424821141,
"learning_rate": 4.857142857142857e-07,
"logits/chosen": -0.6472231149673462,
"logits/rejected": -0.6133359670639038,
"logps/chosen": -104.78807067871094,
"logps/rejected": -102.49015045166016,
"loss": 46397.3,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.0014789658598601818,
"rewards/margins": 0.0007001858321018517,
"rewards/rejected": -0.0021791516337543726,
"step": 40
},
{
"epoch": 0.16,
"grad_norm": 317871.6740038316,
"learning_rate": 4.6785714285714283e-07,
"logits/chosen": -0.5559561848640442,
"logits/rejected": -0.4931167662143707,
"logps/chosen": -105.31684875488281,
"logps/rejected": -100.81905364990234,
"loss": 46727.1,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.0029151481576263905,
"rewards/margins": -0.00013320180005393922,
"rewards/rejected": -0.002781946212053299,
"step": 50
},
{
"epoch": 0.192,
"grad_norm": 309432.48532645905,
"learning_rate": 4.5e-07,
"logits/chosen": -0.5389941930770874,
"logits/rejected": -0.5341317653656006,
"logps/chosen": -100.21482849121094,
"logps/rejected": -101.88697814941406,
"loss": 47186.6906,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.005146821960806847,
"rewards/margins": 0.0010053727310150862,
"rewards/rejected": -0.0061521949246525764,
"step": 60
},
{
"epoch": 0.224,
"grad_norm": 279882.5151706957,
"learning_rate": 4.3214285714285713e-07,
"logits/chosen": -0.6586358547210693,
"logits/rejected": -0.642874538898468,
"logps/chosen": -103.6421890258789,
"logps/rejected": -106.9367904663086,
"loss": 47560.4625,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0009041793528012931,
"rewards/margins": 0.0008955754456110299,
"rewards/rejected": -0.001799754798412323,
"step": 70
},
{
"epoch": 0.256,
"grad_norm": 307667.6742927268,
"learning_rate": 4.142857142857143e-07,
"logits/chosen": -0.6012131571769714,
"logits/rejected": -0.608718991279602,
"logps/chosen": -102.5672378540039,
"logps/rejected": -102.2068099975586,
"loss": 47294.5156,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.0008262965711764991,
"rewards/margins": 0.001540123368613422,
"rewards/rejected": -0.00236641988158226,
"step": 80
},
{
"epoch": 0.288,
"grad_norm": 272828.7827659401,
"learning_rate": 3.9642857142857137e-07,
"logits/chosen": -0.5353714823722839,
"logits/rejected": -0.5272339582443237,
"logps/chosen": -89.8922348022461,
"logps/rejected": -96.12710571289062,
"loss": 46162.6719,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.0036609251983463764,
"rewards/margins": 0.0021247321274131536,
"rewards/rejected": -0.005785657558590174,
"step": 90
},
{
"epoch": 0.32,
"grad_norm": 268429.20188699494,
"learning_rate": 3.785714285714285e-07,
"logits/chosen": -0.6908645629882812,
"logits/rejected": -0.6659768223762512,
"logps/chosen": -103.90727233886719,
"logps/rejected": -104.8873291015625,
"loss": 46836.85,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.0008818693459033966,
"rewards/margins": 0.0011547221802175045,
"rewards/rejected": -0.002036591526120901,
"step": 100
},
{
"epoch": 0.352,
"grad_norm": 269671.9409354351,
"learning_rate": 3.607142857142857e-07,
"logits/chosen": -0.5391483902931213,
"logits/rejected": -0.518116295337677,
"logps/chosen": -73.18064880371094,
"logps/rejected": -74.66477966308594,
"loss": 46931.35,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": -0.000710971187800169,
"rewards/margins": -0.00017498522356618196,
"rewards/rejected": -0.0005359860369935632,
"step": 110
},
{
"epoch": 0.384,
"grad_norm": 283739.571196758,
"learning_rate": 3.4285714285714286e-07,
"logits/chosen": -0.6659616827964783,
"logits/rejected": -0.7110891938209534,
"logps/chosen": -105.9618148803711,
"logps/rejected": -113.90108489990234,
"loss": 47461.1406,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.0007265805033966899,
"rewards/margins": 0.0006529040401801467,
"rewards/rejected": -0.001379484310746193,
"step": 120
},
{
"epoch": 0.416,
"grad_norm": 356192.57857636997,
"learning_rate": 3.25e-07,
"logits/chosen": -0.5708358287811279,
"logits/rejected": -0.5911769866943359,
"logps/chosen": -109.13777923583984,
"logps/rejected": -111.34733581542969,
"loss": 48096.3938,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.0033053618390113115,
"rewards/margins": 0.0012148560490459204,
"rewards/rejected": -0.004520217888057232,
"step": 130
},
{
"epoch": 0.448,
"grad_norm": 335211.4928609446,
"learning_rate": 3.0714285714285716e-07,
"logits/chosen": -0.6561946868896484,
"logits/rejected": -0.6476176977157593,
"logps/chosen": -123.64212799072266,
"logps/rejected": -123.40422058105469,
"loss": 45940.975,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.003593811299651861,
"rewards/margins": 0.00053530337754637,
"rewards/rejected": -0.004129114560782909,
"step": 140
},
{
"epoch": 0.48,
"grad_norm": 305395.21981975477,
"learning_rate": 2.892857142857143e-07,
"logits/chosen": -0.5857366323471069,
"logits/rejected": -0.5595449209213257,
"logps/chosen": -93.74351501464844,
"logps/rejected": -93.7242660522461,
"loss": 47410.8,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.002368563786149025,
"rewards/margins": 0.000440702453488484,
"rewards/rejected": -0.0028092663269490004,
"step": 150
},
{
"epoch": 0.512,
"grad_norm": 332600.8895494031,
"learning_rate": 2.714285714285714e-07,
"logits/chosen": -0.5348027944564819,
"logits/rejected": -0.5600031614303589,
"logps/chosen": -93.50711059570312,
"logps/rejected": -97.4880599975586,
"loss": 47739.1156,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.0032862056978046894,
"rewards/margins": 0.001124653615988791,
"rewards/rejected": -0.004410859197378159,
"step": 160
},
{
"epoch": 0.544,
"grad_norm": 331388.7328009726,
"learning_rate": 2.5357142857142855e-07,
"logits/chosen": -0.615580677986145,
"logits/rejected": -0.6166919469833374,
"logps/chosen": -113.16961669921875,
"logps/rejected": -116.9697494506836,
"loss": 47041.25,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.0033803496044129133,
"rewards/margins": 0.0007489208364859223,
"rewards/rejected": -0.0041292705573141575,
"step": 170
},
{
"epoch": 0.576,
"grad_norm": 332044.78524281725,
"learning_rate": 2.357142857142857e-07,
"logits/chosen": -0.6037168502807617,
"logits/rejected": -0.6863250136375427,
"logps/chosen": -119.31378173828125,
"logps/rejected": -126.0621566772461,
"loss": 47535.0344,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.002967274049296975,
"rewards/margins": -0.0003730076423380524,
"rewards/rejected": -0.002594266552478075,
"step": 180
},
{
"epoch": 0.608,
"grad_norm": 319413.8319562671,
"learning_rate": 2.1785714285714284e-07,
"logits/chosen": -0.7273733019828796,
"logits/rejected": -0.7188557982444763,
"logps/chosen": -115.4392318725586,
"logps/rejected": -112.0301284790039,
"loss": 46876.4094,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.002623769221827388,
"rewards/margins": 0.001058573485352099,
"rewards/rejected": -0.003682342590764165,
"step": 190
},
{
"epoch": 0.64,
"grad_norm": 314346.2695610602,
"learning_rate": 2e-07,
"logits/chosen": -0.6322755813598633,
"logits/rejected": -0.5915661454200745,
"logps/chosen": -100.2895278930664,
"logps/rejected": -93.4193344116211,
"loss": 47579.9,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.004441672004759312,
"rewards/margins": 0.00043794940575025976,
"rewards/rejected": -0.00487962132319808,
"step": 200
},
{
"epoch": 0.672,
"grad_norm": 350476.79993683187,
"learning_rate": 1.8214285714285714e-07,
"logits/chosen": -0.6528446078300476,
"logits/rejected": -0.6807696223258972,
"logps/chosen": -88.17680358886719,
"logps/rejected": -88.77709197998047,
"loss": 48939.0469,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.001382711692713201,
"rewards/margins": 0.0008936499943956733,
"rewards/rejected": -0.002276361919939518,
"step": 210
},
{
"epoch": 0.704,
"grad_norm": 333610.0532688813,
"learning_rate": 1.6428571428571429e-07,
"logits/chosen": -0.7356145977973938,
"logits/rejected": -0.7182696461677551,
"logps/chosen": -132.14749145507812,
"logps/rejected": -126.3568344116211,
"loss": 47693.8156,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.0011152013903483748,
"rewards/margins": 0.0019210099708288908,
"rewards/rejected": -0.003036211710423231,
"step": 220
},
{
"epoch": 0.736,
"grad_norm": 298046.1912209505,
"learning_rate": 1.4642857142857143e-07,
"logits/chosen": -0.5968427658081055,
"logits/rejected": -0.5768970847129822,
"logps/chosen": -94.10835266113281,
"logps/rejected": -101.12223815917969,
"loss": 46278.9875,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.0007765673799440265,
"rewards/margins": 0.0012700657825917006,
"rewards/rejected": -0.002046633278951049,
"step": 230
},
{
"epoch": 0.768,
"grad_norm": 288133.9839915777,
"learning_rate": 1.2857142857142855e-07,
"logits/chosen": -0.7007887363433838,
"logits/rejected": -0.6801734566688538,
"logps/chosen": -129.5726776123047,
"logps/rejected": -131.34750366210938,
"loss": 46299.8406,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.0021117436699569225,
"rewards/margins": 0.0007607269217260182,
"rewards/rejected": -0.00287247053347528,
"step": 240
},
{
"epoch": 0.8,
"grad_norm": 311647.07184519153,
"learning_rate": 1.107142857142857e-07,
"logits/chosen": -0.6636364459991455,
"logits/rejected": -0.6806343793869019,
"logps/chosen": -126.0498275756836,
"logps/rejected": -129.86505126953125,
"loss": 46564.025,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.0026524278800934553,
"rewards/margins": 0.0016201415564864874,
"rewards/rejected": -0.00427256990224123,
"step": 250
},
{
"epoch": 0.832,
"grad_norm": 332873.16670732293,
"learning_rate": 9.285714285714286e-08,
"logits/chosen": -0.6928391456604004,
"logits/rejected": -0.7199726700782776,
"logps/chosen": -91.19033813476562,
"logps/rejected": -99.61897277832031,
"loss": 45619.1937,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.0015564201166853309,
"rewards/margins": 0.002241902519017458,
"rewards/rejected": -0.0037983227521181107,
"step": 260
},
{
"epoch": 0.864,
"grad_norm": 321461.36179731,
"learning_rate": 7.5e-08,
"logits/chosen": -0.6582412123680115,
"logits/rejected": -0.6260276436805725,
"logps/chosen": -100.95478820800781,
"logps/rejected": -103.7972412109375,
"loss": 47785.325,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.0012073480756953359,
"rewards/margins": 0.0029904134571552277,
"rewards/rejected": -0.004197761416435242,
"step": 270
},
{
"epoch": 0.896,
"grad_norm": 369055.42210931255,
"learning_rate": 5.714285714285714e-08,
"logits/chosen": -0.8000235557556152,
"logits/rejected": -0.8175070881843567,
"logps/chosen": -112.53459167480469,
"logps/rejected": -116.30067443847656,
"loss": 47248.0156,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0023680843878537416,
"rewards/margins": 0.001865853788331151,
"rewards/rejected": -0.004233937710523605,
"step": 280
},
{
"epoch": 0.928,
"grad_norm": 337372.99771556864,
"learning_rate": 3.9285714285714285e-08,
"logits/chosen": -0.7372657656669617,
"logits/rejected": -0.7545084953308105,
"logps/chosen": -133.76637268066406,
"logps/rejected": -133.70761108398438,
"loss": 46951.3187,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.003720104694366455,
"rewards/margins": 0.0001435236044926569,
"rewards/rejected": -0.0038636289536952972,
"step": 290
},
{
"epoch": 0.96,
"grad_norm": 321067.1618704567,
"learning_rate": 2.142857142857143e-08,
"logits/chosen": -0.5622406601905823,
"logits/rejected": -0.5376971364021301,
"logps/chosen": -113.44036865234375,
"logps/rejected": -111.9993667602539,
"loss": 46523.5844,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.0034807869233191013,
"rewards/margins": 0.001533324713818729,
"rewards/rejected": -0.005014111753553152,
"step": 300
},
{
"epoch": 0.992,
"grad_norm": 332780.2074089866,
"learning_rate": 3.571428571428571e-09,
"logits/chosen": -0.6824791431427002,
"logits/rejected": -0.6886446475982666,
"logps/chosen": -121.04481506347656,
"logps/rejected": -121.3174819946289,
"loss": 45077.1906,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.001433422090485692,
"rewards/margins": 0.001681215362623334,
"rewards/rejected": -0.003114637453109026,
"step": 310
},
{
"epoch": 0.9984,
"step": 312,
"total_flos": 0.0,
"train_loss": 46907.07216546474,
"train_runtime": 2770.7347,
"train_samples_per_second": 7.217,
"train_steps_per_second": 0.113
}
],
"logging_steps": 10,
"max_steps": 312,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}