gemma-7b-borpo-shuffled-6e-5 / trainer_state.json
silviasapora's picture
Model save
2a513b4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9952,
"eval_steps": 500,
"global_step": 351,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.042666666666666665,
"grad_norm": 23808.0,
"learning_rate": 8.333333333333334e-06,
"log_odds_chosen": 4.913786888122559,
"log_odds_ratio": -7.455605983734131,
"logits/chosen": 104.62542724609375,
"logits/rejected": 103.22361755371094,
"logps/chosen": -24.285247802734375,
"logps/rejected": -29.19942283630371,
"loss": 999.1373,
"nll_loss": 9.384310722351074,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -12.142623901367188,
"rewards/margins": 2.4570868015289307,
"rewards/rejected": -14.599711418151855,
"step": 5
},
{
"epoch": 0.08533333333333333,
"grad_norm": 20480.0,
"learning_rate": 1.6666666666666667e-05,
"log_odds_chosen": 3.161984443664551,
"log_odds_ratio": -4.3201093673706055,
"logits/chosen": 116.70096588134766,
"logits/rejected": 107.84611511230469,
"logps/chosen": -22.356525421142578,
"logps/rejected": -25.517377853393555,
"loss": 1471.3339,
"nll_loss": 7.478154182434082,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -11.178262710571289,
"rewards/margins": 1.580425500869751,
"rewards/rejected": -12.758688926696777,
"step": 10
},
{
"epoch": 0.128,
"grad_norm": 9792.0,
"learning_rate": 2.5e-05,
"log_odds_chosen": 5.405481338500977,
"log_odds_ratio": -3.742039442062378,
"logits/chosen": 101.9460220336914,
"logits/rejected": 144.56015014648438,
"logps/chosen": -16.96074867248535,
"logps/rejected": -22.36530303955078,
"loss": 2500.4584,
"nll_loss": 9.246469497680664,
"rewards/accuracies": 0.5625,
"rewards/chosen": -8.480374336242676,
"rewards/margins": 2.7022786140441895,
"rewards/rejected": -11.18265151977539,
"step": 15
},
{
"epoch": 0.17066666666666666,
"grad_norm": 23424.0,
"learning_rate": 3.3333333333333335e-05,
"log_odds_chosen": 1.755802869796753,
"log_odds_ratio": -8.513590812683105,
"logits/chosen": 138.0966033935547,
"logits/rejected": 115.8319320678711,
"logps/chosen": -21.58835220336914,
"logps/rejected": -23.347524642944336,
"loss": -909.4568,
"nll_loss": 7.946342468261719,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -10.79417610168457,
"rewards/margins": 0.879586398601532,
"rewards/rejected": -11.673762321472168,
"step": 20
},
{
"epoch": 0.21333333333333335,
"grad_norm": 81920.0,
"learning_rate": 4.1666666666666665e-05,
"log_odds_chosen": -3.714871883392334,
"log_odds_ratio": -11.706196784973145,
"logits/chosen": 131.1918182373047,
"logits/rejected": 112.025146484375,
"logps/chosen": -27.263925552368164,
"logps/rejected": -23.55154037475586,
"loss": -1289.5892,
"nll_loss": 11.121248245239258,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -13.631962776184082,
"rewards/margins": -1.8561919927597046,
"rewards/rejected": -11.77577018737793,
"step": 25
},
{
"epoch": 0.256,
"grad_norm": 23424.0,
"learning_rate": 5e-05,
"log_odds_chosen": 5.615313529968262,
"log_odds_ratio": -6.0788164138793945,
"logits/chosen": 120.47991943359375,
"logits/rejected": 131.72543334960938,
"logps/chosen": -20.34619903564453,
"logps/rejected": -25.962305068969727,
"loss": 1989.467,
"nll_loss": 11.813726425170898,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -10.173099517822266,
"rewards/margins": 2.808053731918335,
"rewards/rejected": -12.981152534484863,
"step": 30
},
{
"epoch": 0.2986666666666667,
"grad_norm": 12288.0,
"learning_rate": 5.833333333333333e-05,
"log_odds_chosen": 6.8634352684021,
"log_odds_ratio": -4.409341335296631,
"logits/chosen": 106.82928466796875,
"logits/rejected": 135.06765747070312,
"logps/chosen": -17.419185638427734,
"logps/rejected": -24.284481048583984,
"loss": 2134.8127,
"nll_loss": 8.908151626586914,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -8.709592819213867,
"rewards/margins": 3.4326491355895996,
"rewards/rejected": -12.142240524291992,
"step": 35
},
{
"epoch": 0.3413333333333333,
"grad_norm": 20096.0,
"learning_rate": 5.997613110678538e-05,
"log_odds_chosen": 10.421220779418945,
"log_odds_ratio": -4.025184154510498,
"logits/chosen": 97.60896301269531,
"logits/rejected": 131.75054931640625,
"logps/chosen": -20.480510711669922,
"logps/rejected": -30.901927947998047,
"loss": 2004.0334,
"nll_loss": 10.660150527954102,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -10.240255355834961,
"rewards/margins": 5.2107110023498535,
"rewards/rejected": -15.450963973999023,
"step": 40
},
{
"epoch": 0.384,
"grad_norm": 29952.0,
"learning_rate": 5.987922881985718e-05,
"log_odds_chosen": 2.5370476245880127,
"log_odds_ratio": -9.996942520141602,
"logits/chosen": 125.96684265136719,
"logits/rejected": 126.08040618896484,
"logps/chosen": -22.960010528564453,
"logps/rejected": -25.498239517211914,
"loss": 504.6511,
"nll_loss": 9.340021133422852,
"rewards/accuracies": 0.5,
"rewards/chosen": -11.480005264282227,
"rewards/margins": 1.2691147327423096,
"rewards/rejected": -12.749119758605957,
"step": 45
},
{
"epoch": 0.4266666666666667,
"grad_norm": 13312.0,
"learning_rate": 5.970804206224711e-05,
"log_odds_chosen": 9.48165512084961,
"log_odds_ratio": -6.348289489746094,
"logits/chosen": 115.5280532836914,
"logits/rejected": 133.51206970214844,
"logps/chosen": -18.29220962524414,
"logps/rejected": -27.774459838867188,
"loss": 357.9646,
"nll_loss": 8.295930862426758,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -9.14610481262207,
"rewards/margins": 4.74112606048584,
"rewards/rejected": -13.887229919433594,
"step": 50
},
{
"epoch": 0.4693333333333333,
"grad_norm": 36864.0,
"learning_rate": 5.9462996431207166e-05,
"log_odds_chosen": -1.4241477251052856,
"log_odds_ratio": -7.040617942810059,
"logits/chosen": 90.20933532714844,
"logits/rejected": 77.6080322265625,
"logps/chosen": -21.791763305664062,
"logps/rejected": -20.36836051940918,
"loss": 710.7211,
"nll_loss": 8.630704879760742,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -10.895881652832031,
"rewards/margins": -0.7117019891738892,
"rewards/rejected": -10.18418025970459,
"step": 55
},
{
"epoch": 0.512,
"grad_norm": 22912.0,
"learning_rate": 5.914470114878602e-05,
"log_odds_chosen": 0.059395600110292435,
"log_odds_ratio": -7.545324802398682,
"logits/chosen": 74.50141906738281,
"logits/rejected": 72.20657348632812,
"logps/chosen": -27.5406551361084,
"logps/rejected": -27.600433349609375,
"loss": 835.8969,
"nll_loss": 11.118535995483398,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -13.7703275680542,
"rewards/margins": 0.029887771233916283,
"rewards/rejected": -13.800216674804688,
"step": 60
},
{
"epoch": 0.5546666666666666,
"grad_norm": 59136.0,
"learning_rate": 5.875394754720707e-05,
"log_odds_chosen": 0.004063797183334827,
"log_odds_ratio": -8.829879760742188,
"logits/chosen": 107.4288101196289,
"logits/rejected": 100.03871154785156,
"logps/chosen": -26.154687881469727,
"logps/rejected": -26.159423828125,
"loss": 1129.1766,
"nll_loss": 8.619396209716797,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -13.077343940734863,
"rewards/margins": 0.0023682594764977694,
"rewards/rejected": -13.0797119140625,
"step": 65
},
{
"epoch": 0.5973333333333334,
"grad_norm": 84992.0,
"learning_rate": 5.8291707101491815e-05,
"log_odds_chosen": -9.859933853149414,
"log_odds_ratio": -13.291154861450195,
"logits/chosen": 130.4120635986328,
"logits/rejected": 107.79060363769531,
"logps/chosen": -33.194881439208984,
"logps/rejected": -23.33577537536621,
"loss": -1070.0952,
"nll_loss": 9.995885848999023,
"rewards/accuracies": 0.4375,
"rewards/chosen": -16.597440719604492,
"rewards/margins": -4.929553031921387,
"rewards/rejected": -11.667887687683105,
"step": 70
},
{
"epoch": 0.64,
"grad_norm": 27264.0,
"learning_rate": 5.77591290142199e-05,
"log_odds_chosen": -1.0986392498016357,
"log_odds_ratio": -6.415988922119141,
"logits/chosen": 202.5902862548828,
"logits/rejected": 170.36766052246094,
"logps/chosen": -28.743408203125,
"logps/rejected": -27.644739151000977,
"loss": 3134.2316,
"nll_loss": 16.13515853881836,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -14.3717041015625,
"rewards/margins": -0.5493333339691162,
"rewards/rejected": -13.822369575500488,
"step": 75
},
{
"epoch": 0.6826666666666666,
"grad_norm": 5920.0,
"learning_rate": 5.7157537358430446e-05,
"log_odds_chosen": 5.658118724822998,
"log_odds_ratio": -7.236645698547363,
"logits/chosen": 79.78996276855469,
"logits/rejected": 120.5929946899414,
"logps/chosen": -39.893455505371094,
"logps/rejected": -45.551578521728516,
"loss": 1430.3527,
"nll_loss": 26.075185775756836,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -19.946727752685547,
"rewards/margins": 2.829059362411499,
"rewards/rejected": -22.775789260864258,
"step": 80
},
{
"epoch": 0.7253333333333334,
"grad_norm": 7264.0,
"learning_rate": 5.648842778576781e-05,
"log_odds_chosen": -2.8542323112487793,
"log_odds_ratio": -11.537806510925293,
"logits/chosen": 43.76961135864258,
"logits/rejected": 40.082550048828125,
"logps/chosen": -41.100486755371094,
"logps/rejected": -38.24618911743164,
"loss": 27.785,
"nll_loss": 18.119293212890625,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -20.550243377685547,
"rewards/margins": -1.4271516799926758,
"rewards/rejected": -19.12309455871582,
"step": 85
},
{
"epoch": 0.768,
"grad_norm": 8160.0,
"learning_rate": 5.575346380805599e-05,
"log_odds_chosen": 6.750527858734131,
"log_odds_ratio": -4.975089073181152,
"logits/chosen": 142.46315002441406,
"logits/rejected": 194.29443359375,
"logps/chosen": -27.23212242126465,
"logps/rejected": -33.98273849487305,
"loss": 410.7923,
"nll_loss": 13.293853759765625,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -13.616061210632324,
"rewards/margins": 3.3753085136413574,
"rewards/rejected": -16.991369247436523,
"step": 90
},
{
"epoch": 0.8106666666666666,
"grad_norm": 6752.0,
"learning_rate": 5.4954472661546075e-05,
"log_odds_chosen": -1.6101436614990234,
"log_odds_ratio": -5.586986064910889,
"logits/chosen": 265.1175537109375,
"logits/rejected": 220.3394012451172,
"logps/chosen": -16.311241149902344,
"logps/rejected": -14.702871322631836,
"loss": 697.2691,
"nll_loss": 8.886590957641602,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -8.155620574951172,
"rewards/margins": -0.8041850924491882,
"rewards/rejected": -7.351435661315918,
"step": 95
},
{
"epoch": 0.8533333333333334,
"grad_norm": 1976.0,
"learning_rate": 5.4093440764119056e-05,
"log_odds_chosen": 0.8467995524406433,
"log_odds_ratio": -3.7493503093719482,
"logits/chosen": 214.7117156982422,
"logits/rejected": 216.2617950439453,
"logps/chosen": -10.674482345581055,
"logps/rejected": -11.517151832580566,
"loss": 671.6329,
"nll_loss": 5.953970909118652,
"rewards/accuracies": 0.5,
"rewards/chosen": -5.337241172790527,
"rewards/margins": 0.4213342070579529,
"rewards/rejected": -5.758575916290283,
"step": 100
},
{
"epoch": 0.896,
"grad_norm": 233.0,
"learning_rate": 5.317250877673799e-05,
"log_odds_chosen": 0.0025218098890036345,
"log_odds_ratio": -1.1279939413070679,
"logits/chosen": 274.94171142578125,
"logits/rejected": 293.26910400390625,
"logps/chosen": -3.1382219791412354,
"logps/rejected": -3.117029905319214,
"loss": 59.8813,
"nll_loss": 2.759488105773926,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -1.5691109895706177,
"rewards/margins": -0.010595941916108131,
"rewards/rejected": -1.558514952659607,
"step": 105
},
{
"epoch": 0.9386666666666666,
"grad_norm": 93.0,
"learning_rate": 5.219396628142752e-05,
"log_odds_chosen": 0.3583167493343353,
"log_odds_ratio": -0.7731421589851379,
"logits/chosen": 287.4047546386719,
"logits/rejected": 319.8270263671875,
"logps/chosen": -2.0119025707244873,
"logps/rejected": -2.3544743061065674,
"loss": 41.0939,
"nll_loss": 2.1219072341918945,
"rewards/accuracies": 0.5625,
"rewards/chosen": -1.0059512853622437,
"rewards/margins": 0.17128589749336243,
"rewards/rejected": -1.1772371530532837,
"step": 110
},
{
"epoch": 0.9813333333333333,
"grad_norm": 75.5,
"learning_rate": 5.1160246089012264e-05,
"log_odds_chosen": -0.07454674690961838,
"log_odds_ratio": -0.8472925424575806,
"logits/chosen": 304.05816650390625,
"logits/rejected": 292.1544494628906,
"logps/chosen": -1.64755117893219,
"logps/rejected": -1.558643102645874,
"loss": 35.4153,
"nll_loss": 1.8526198863983154,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.823775589466095,
"rewards/margins": -0.04445408657193184,
"rewards/rejected": -0.779321551322937,
"step": 115
},
{
"epoch": 1.024,
"grad_norm": 106.5,
"learning_rate": 5.007391819076575e-05,
"log_odds_chosen": 0.20053406059741974,
"log_odds_ratio": -0.6979594826698303,
"logits/chosen": 296.3975524902344,
"logits/rejected": 293.24871826171875,
"logps/chosen": -1.3861749172210693,
"logps/rejected": -1.5457828044891357,
"loss": 32.7872,
"nll_loss": 1.7506237030029297,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.6930874586105347,
"rewards/margins": 0.07980402559041977,
"rewards/rejected": -0.7728914022445679,
"step": 120
},
{
"epoch": 1.0666666666666667,
"grad_norm": 41.0,
"learning_rate": 4.893768336900717e-05,
"log_odds_chosen": 0.20815667510032654,
"log_odds_ratio": -0.654870331287384,
"logits/chosen": 285.3860168457031,
"logits/rejected": 291.6961975097656,
"logps/chosen": -1.3074676990509033,
"logps/rejected": -1.4680944681167603,
"loss": 30.4078,
"nll_loss": 1.648654580116272,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.6537338495254517,
"rewards/margins": 0.08031338453292847,
"rewards/rejected": -0.7340472340583801,
"step": 125
},
{
"epoch": 1.1093333333333333,
"grad_norm": 83.5,
"learning_rate": 4.775436648253103e-05,
"log_odds_chosen": 0.010318088345229626,
"log_odds_ratio": -0.7326194643974304,
"logits/chosen": 272.5470886230469,
"logits/rejected": 290.8238830566406,
"logps/chosen": -1.2763969898223877,
"logps/rejected": -1.282832384109497,
"loss": 29.9423,
"nll_loss": 1.5382884740829468,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.6381984949111938,
"rewards/margins": 0.0032175942324101925,
"rewards/rejected": -0.6414161920547485,
"step": 130
},
{
"epoch": 1.152,
"grad_norm": 52.25,
"learning_rate": 4.6526909443563075e-05,
"log_odds_chosen": 0.05014984682202339,
"log_odds_ratio": -0.7415339350700378,
"logits/chosen": 283.76141357421875,
"logits/rejected": 269.16754150390625,
"logps/chosen": -1.1821494102478027,
"logps/rejected": -1.2269926071166992,
"loss": 29.0019,
"nll_loss": 1.5523165464401245,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.5910747051239014,
"rewards/margins": 0.02242158353328705,
"rewards/rejected": -0.6134963035583496,
"step": 135
},
{
"epoch": 1.1946666666666665,
"grad_norm": 55.75,
"learning_rate": 4.5258363903702954e-05,
"log_odds_chosen": 0.25334566831588745,
"log_odds_ratio": -0.6387948989868164,
"logits/chosen": 279.69866943359375,
"logits/rejected": 307.73309326171875,
"logps/chosen": -1.0961264371871948,
"logps/rejected": -1.2705694437026978,
"loss": 27.7407,
"nll_loss": 1.4683058261871338,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.5480632185935974,
"rewards/margins": 0.08722147345542908,
"rewards/rejected": -0.6352847218513489,
"step": 140
},
{
"epoch": 1.2373333333333334,
"grad_norm": 63.75,
"learning_rate": 4.395188366703752e-05,
"log_odds_chosen": 0.27662745118141174,
"log_odds_ratio": -0.6522295475006104,
"logits/chosen": 272.47137451171875,
"logits/rejected": 291.1870422363281,
"logps/chosen": -1.1764074563980103,
"logps/rejected": -1.3605537414550781,
"loss": 27.2205,
"nll_loss": 1.449241042137146,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.5882037281990051,
"rewards/margins": 0.09207318723201752,
"rewards/rejected": -0.6802768707275391,
"step": 145
},
{
"epoch": 1.28,
"grad_norm": 84.5,
"learning_rate": 4.261071684928697e-05,
"log_odds_chosen": 0.08433417975902557,
"log_odds_ratio": -0.7295799255371094,
"logits/chosen": 283.64739990234375,
"logits/rejected": 284.56048583984375,
"logps/chosen": -1.1405603885650635,
"logps/rejected": -1.2061361074447632,
"loss": 27.0297,
"nll_loss": 1.4862051010131836,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.5702801942825317,
"rewards/margins": 0.03278781846165657,
"rewards/rejected": -0.6030680537223816,
"step": 150
},
{
"epoch": 1.3226666666666667,
"grad_norm": 45.75,
"learning_rate": 4.123819780247737e-05,
"log_odds_chosen": 0.19811879098415375,
"log_odds_ratio": -0.6645184755325317,
"logits/chosen": 271.0818176269531,
"logits/rejected": 282.7620544433594,
"logps/chosen": -1.0194677114486694,
"logps/rejected": -1.1592345237731934,
"loss": 26.6033,
"nll_loss": 1.4380306005477905,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.5097338557243347,
"rewards/margins": 0.06988338381052017,
"rewards/rejected": -0.5796172618865967,
"step": 155
},
{
"epoch": 1.3653333333333333,
"grad_norm": 49.25,
"learning_rate": 3.9837738825216133e-05,
"log_odds_chosen": 0.20502634346485138,
"log_odds_ratio": -0.6395789384841919,
"logits/chosen": 265.66180419921875,
"logits/rejected": 298.80450439453125,
"logps/chosen": -1.0500866174697876,
"logps/rejected": -1.1844158172607422,
"loss": 26.1919,
"nll_loss": 1.4191492795944214,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.5250433087348938,
"rewards/margins": 0.06716466695070267,
"rewards/rejected": -0.5922079086303711,
"step": 160
},
{
"epoch": 1.408,
"grad_norm": 26.25,
"learning_rate": 3.8412821679180084e-05,
"log_odds_chosen": 0.17818713188171387,
"log_odds_ratio": -0.6783817410469055,
"logits/chosen": 276.02899169921875,
"logits/rejected": 289.51385498046875,
"logps/chosen": -1.0516808032989502,
"logps/rejected": -1.1585873365402222,
"loss": 26.0691,
"nll_loss": 1.353134274482727,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.5258404016494751,
"rewards/margins": 0.053453266620635986,
"rewards/rejected": -0.5792936682701111,
"step": 165
},
{
"epoch": 1.4506666666666668,
"grad_norm": 23.125,
"learning_rate": 3.6966988932907276e-05,
"log_odds_chosen": 0.16093948483467102,
"log_odds_ratio": -0.6871160268783569,
"logits/chosen": 278.6529541015625,
"logits/rejected": 298.80657958984375,
"logps/chosen": -1.080251693725586,
"logps/rejected": -1.1996749639511108,
"loss": 26.6724,
"nll_loss": 1.4616249799728394,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.540125846862793,
"rewards/margins": 0.059711672365665436,
"rewards/rejected": -0.5998374819755554,
"step": 170
},
{
"epoch": 1.4933333333333334,
"grad_norm": 27.875,
"learning_rate": 3.5503835154413476e-05,
"log_odds_chosen": 0.286944180727005,
"log_odds_ratio": -0.6341909766197205,
"logits/chosen": 274.6024169921875,
"logits/rejected": 295.36651611328125,
"logps/chosen": -1.0407021045684814,
"logps/rejected": -1.2441030740737915,
"loss": 25.7035,
"nll_loss": 1.411714792251587,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.5203510522842407,
"rewards/margins": 0.10170049965381622,
"rewards/rejected": -0.6220515370368958,
"step": 175
},
{
"epoch": 1.536,
"grad_norm": 47.0,
"learning_rate": 3.4026997974529664e-05,
"log_odds_chosen": 0.16404980421066284,
"log_odds_ratio": -0.6638838052749634,
"logits/chosen": 290.327880859375,
"logits/rejected": 284.926513671875,
"logps/chosen": -1.093461275100708,
"logps/rejected": -1.1879903078079224,
"loss": 25.5271,
"nll_loss": 1.4601901769638062,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.546730637550354,
"rewards/margins": 0.047264464199543,
"rewards/rejected": -0.5939951539039612,
"step": 180
},
{
"epoch": 1.5786666666666667,
"grad_norm": 40.75,
"learning_rate": 3.25401490431787e-05,
"log_odds_chosen": 0.28145521879196167,
"log_odds_ratio": -0.6211186647415161,
"logits/chosen": 297.9057922363281,
"logits/rejected": 286.4951477050781,
"logps/chosen": -1.0374724864959717,
"logps/rejected": -1.2180078029632568,
"loss": 25.5928,
"nll_loss": 1.3627592325210571,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.5187362432479858,
"rewards/margins": 0.09026758372783661,
"rewards/rejected": -0.6090039014816284,
"step": 185
},
{
"epoch": 1.6213333333333333,
"grad_norm": 56.0,
"learning_rate": 3.104698490107504e-05,
"log_odds_chosen": 0.09670724719762802,
"log_odds_ratio": -0.6962383985519409,
"logits/chosen": 296.83575439453125,
"logits/rejected": 268.6614685058594,
"logps/chosen": -1.0726783275604248,
"logps/rejected": -1.1235979795455933,
"loss": 25.6578,
"nll_loss": 1.3929274082183838,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.5363391637802124,
"rewards/margins": 0.025459837168455124,
"rewards/rejected": -0.5617989897727966,
"step": 190
},
{
"epoch": 1.6640000000000001,
"grad_norm": 27.125,
"learning_rate": 2.9551217789542096e-05,
"log_odds_chosen": 0.08583483099937439,
"log_odds_ratio": -0.7124528884887695,
"logits/chosen": 291.0882568359375,
"logits/rejected": 282.97711181640625,
"logps/chosen": -1.0238406658172607,
"logps/rejected": -1.0693080425262451,
"loss": 25.3697,
"nll_loss": 1.4079334735870361,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.5119203329086304,
"rewards/margins": 0.02273363620042801,
"rewards/rejected": -0.5346540212631226,
"step": 195
},
{
"epoch": 1.7066666666666666,
"grad_norm": 29.875,
"learning_rate": 2.8056566421295443e-05,
"log_odds_chosen": 0.027378028258681297,
"log_odds_ratio": -0.7563061714172363,
"logits/chosen": 280.4498291015625,
"logits/rejected": 268.0575256347656,
"logps/chosen": -1.0595781803131104,
"logps/rejected": -1.0435364246368408,
"loss": 24.6268,
"nll_loss": 1.3725634813308716,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.5297890901565552,
"rewards/margins": -0.008020809851586819,
"rewards/rejected": -0.5217682123184204,
"step": 200
},
{
"epoch": 1.7493333333333334,
"grad_norm": 40.75,
"learning_rate": 2.656674673513705e-05,
"log_odds_chosen": 0.09508597105741501,
"log_odds_ratio": -0.7279762625694275,
"logits/chosen": 284.59503173828125,
"logits/rejected": 292.72509765625,
"logps/chosen": -1.1167399883270264,
"logps/rejected": -1.2022297382354736,
"loss": 25.7565,
"nll_loss": 1.4108952283859253,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.5583699941635132,
"rewards/margins": 0.04274484142661095,
"rewards/rejected": -0.6011148691177368,
"step": 205
},
{
"epoch": 1.792,
"grad_norm": 68.5,
"learning_rate": 2.508546265754587e-05,
"log_odds_chosen": 0.14177300035953522,
"log_odds_ratio": -0.6866236925125122,
"logits/chosen": 271.2067565917969,
"logits/rejected": 289.2135009765625,
"logps/chosen": -0.9864645004272461,
"logps/rejected": -1.0824763774871826,
"loss": 24.89,
"nll_loss": 1.4051529169082642,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.49323225021362305,
"rewards/margins": 0.048005927354097366,
"rewards/rejected": -0.5412381887435913,
"step": 210
},
{
"epoch": 1.8346666666666667,
"grad_norm": 29.5,
"learning_rate": 2.3616396894133145e-05,
"log_odds_chosen": 0.07325839251279831,
"log_odds_ratio": -0.7426969408988953,
"logits/chosen": 277.15673828125,
"logits/rejected": 272.313232421875,
"logps/chosen": -1.044854760169983,
"logps/rejected": -1.0880095958709717,
"loss": 24.9372,
"nll_loss": 1.420508623123169,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.5224273800849915,
"rewards/margins": 0.02157733403146267,
"rewards/rejected": -0.5440047979354858,
"step": 215
},
{
"epoch": 1.8773333333333333,
"grad_norm": 18.75,
"learning_rate": 2.216320177385585e-05,
"log_odds_chosen": 0.16561657190322876,
"log_odds_ratio": -0.6900728940963745,
"logits/chosen": 269.1635437011719,
"logits/rejected": 290.73931884765625,
"logps/chosen": -0.9672033190727234,
"logps/rejected": -1.0741941928863525,
"loss": 24.5158,
"nll_loss": 1.3439892530441284,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.4836016595363617,
"rewards/margins": 0.05349547788500786,
"rewards/rejected": -0.5370970964431763,
"step": 220
},
{
"epoch": 1.92,
"grad_norm": 51.25,
"learning_rate": 2.072949016875158e-05,
"log_odds_chosen": 0.254954069852829,
"log_odds_ratio": -0.6858216524124146,
"logits/chosen": 277.99786376953125,
"logits/rejected": 272.8140563964844,
"logps/chosen": -1.023193597793579,
"logps/rejected": -1.2254831790924072,
"loss": 25.0004,
"nll_loss": 1.3852344751358032,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.5115967988967896,
"rewards/margins": 0.10114479064941406,
"rewards/rejected": -0.6127415895462036,
"step": 225
},
{
"epoch": 1.9626666666666668,
"grad_norm": 42.0,
"learning_rate": 1.9318826511769297e-05,
"log_odds_chosen": -0.024613792076706886,
"log_odds_ratio": -0.7803007364273071,
"logits/chosen": 278.9836730957031,
"logits/rejected": 275.0539855957031,
"logps/chosen": -1.0930712223052979,
"logps/rejected": -1.0882163047790527,
"loss": 25.3575,
"nll_loss": 1.3715641498565674,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.5465356111526489,
"rewards/margins": -0.0024274878669530153,
"rewards/rejected": -0.5441081523895264,
"step": 230
},
{
"epoch": 2.005333333333333,
"grad_norm": 27.875,
"learning_rate": 1.793471793502748e-05,
"log_odds_chosen": 0.18588228523731232,
"log_odds_ratio": -0.6733505129814148,
"logits/chosen": 266.9411315917969,
"logits/rejected": 268.85430908203125,
"logps/chosen": -0.966964066028595,
"logps/rejected": -1.075714349746704,
"loss": 24.4463,
"nll_loss": 1.2817761898040771,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.4834820330142975,
"rewards/margins": 0.05437516048550606,
"rewards/rejected": -0.537857174873352,
"step": 235
},
{
"epoch": 2.048,
"grad_norm": 26.625,
"learning_rate": 1.6580605550531018e-05,
"log_odds_chosen": 0.12490881979465485,
"log_odds_ratio": -0.6833258271217346,
"logits/chosen": 267.01580810546875,
"logits/rejected": 280.65118408203125,
"logps/chosen": -0.866047739982605,
"logps/rejected": -0.9292898178100586,
"loss": 21.5112,
"nll_loss": 1.1620063781738281,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.4330238699913025,
"rewards/margins": 0.03162097930908203,
"rewards/rejected": -0.4646449089050293,
"step": 240
},
{
"epoch": 2.0906666666666665,
"grad_norm": 25.0,
"learning_rate": 1.525985589502466e-05,
"log_odds_chosen": 0.490588515996933,
"log_odds_ratio": -0.5364745259284973,
"logits/chosen": 271.6618957519531,
"logits/rejected": 270.92242431640625,
"logps/chosen": -0.8149029016494751,
"logps/rejected": -1.1035759449005127,
"loss": 21.2697,
"nll_loss": 1.204815149307251,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.40745145082473755,
"rewards/margins": 0.1443365067243576,
"rewards/rejected": -0.5517879724502563,
"step": 245
},
{
"epoch": 2.1333333333333333,
"grad_norm": 25.75,
"learning_rate": 1.3975752560252138e-05,
"log_odds_chosen": 0.43111294507980347,
"log_odds_ratio": -0.610099196434021,
"logits/chosen": 256.387939453125,
"logits/rejected": 280.3132629394531,
"logps/chosen": -0.8138604164123535,
"logps/rejected": -1.0904266834259033,
"loss": 20.7732,
"nll_loss": 1.0765711069107056,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.40693020820617676,
"rewards/margins": 0.13828308880329132,
"rewards/rejected": -0.5452133417129517,
"step": 250
},
{
"epoch": 2.176,
"grad_norm": 24.125,
"learning_rate": 1.27314880294298e-05,
"log_odds_chosen": 0.3809678852558136,
"log_odds_ratio": -0.6075100898742676,
"logits/chosen": 266.83233642578125,
"logits/rejected": 259.447265625,
"logps/chosen": -0.8320444822311401,
"logps/rejected": -1.043336033821106,
"loss": 20.9562,
"nll_loss": 1.1581926345825195,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.41602224111557007,
"rewards/margins": 0.1056457981467247,
"rewards/rejected": -0.521668016910553,
"step": 255
},
{
"epoch": 2.2186666666666666,
"grad_norm": 23.5,
"learning_rate": 1.1530155740230252e-05,
"log_odds_chosen": 0.4367187023162842,
"log_odds_ratio": -0.5616321563720703,
"logits/chosen": 255.9156036376953,
"logits/rejected": 277.2770080566406,
"logps/chosen": -0.8188761472702026,
"logps/rejected": -1.048285722732544,
"loss": 20.5943,
"nll_loss": 1.0858075618743896,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.4094380736351013,
"rewards/margins": 0.11470470577478409,
"rewards/rejected": -0.524142861366272,
"step": 260
},
{
"epoch": 2.2613333333333334,
"grad_norm": 19.0,
"learning_rate": 1.0374742394008972e-05,
"log_odds_chosen": 0.2701203525066376,
"log_odds_ratio": -0.6517602205276489,
"logits/chosen": 260.30401611328125,
"logits/rejected": 264.9652404785156,
"logps/chosen": -0.8311630487442017,
"logps/rejected": -0.991308867931366,
"loss": 20.3899,
"nll_loss": 1.069040298461914,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.41558152437210083,
"rewards/margins": 0.08007291704416275,
"rewards/rejected": -0.495654433965683,
"step": 265
},
{
"epoch": 2.304,
"grad_norm": 26.125,
"learning_rate": 9.268120530394061e-06,
"log_odds_chosen": 0.31922250986099243,
"log_odds_ratio": -0.6070750951766968,
"logits/chosen": 260.91009521484375,
"logits/rejected": 261.7254333496094,
"logps/chosen": -0.7734104990959167,
"logps/rejected": -0.9511035084724426,
"loss": 20.48,
"nll_loss": 1.0357019901275635,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.3867052495479584,
"rewards/margins": 0.08884649723768234,
"rewards/rejected": -0.4755517542362213,
"step": 270
},
{
"epoch": 2.3466666666666667,
"grad_norm": 30.625,
"learning_rate": 8.213041385700211e-06,
"log_odds_chosen": 0.3881288170814514,
"log_odds_ratio": -0.5812792181968689,
"logits/chosen": 275.2894287109375,
"logits/rejected": 252.8758087158203,
"logps/chosen": -0.8068667650222778,
"logps/rejected": -1.0178911685943604,
"loss": 20.1315,
"nll_loss": 1.0528508424758911,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.4034333825111389,
"rewards/margins": 0.10551220178604126,
"rewards/rejected": -0.5089455842971802,
"step": 275
},
{
"epoch": 2.389333333333333,
"grad_norm": 20.625,
"learning_rate": 7.212128052921661e-06,
"log_odds_chosen": 0.43442073464393616,
"log_odds_ratio": -0.5687755346298218,
"logits/chosen": 260.5086975097656,
"logits/rejected": 262.8999328613281,
"logps/chosen": -0.7444295287132263,
"logps/rejected": -0.9539780616760254,
"loss": 19.7724,
"nll_loss": 1.065710425376892,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.37221476435661316,
"rewards/margins": 0.10477427393198013,
"rewards/rejected": -0.4769890308380127,
"step": 280
},
{
"epoch": 2.432,
"grad_norm": 18.875,
"learning_rate": 6.267868960309771e-06,
"log_odds_chosen": 0.3951905071735382,
"log_odds_ratio": -0.5774310231208801,
"logits/chosen": 264.29644775390625,
"logits/rejected": 254.9677276611328,
"logps/chosen": -0.787185788154602,
"logps/rejected": -1.0126456022262573,
"loss": 19.8597,
"nll_loss": 1.018532633781433,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.393592894077301,
"rewards/margins": 0.11272994428873062,
"rewards/rejected": -0.5063228011131287,
"step": 285
},
{
"epoch": 2.474666666666667,
"grad_norm": 21.875,
"learning_rate": 5.382611684748257e-06,
"log_odds_chosen": 0.35993748903274536,
"log_odds_ratio": -0.5955245494842529,
"logits/chosen": 249.52297973632812,
"logits/rejected": 276.64947509765625,
"logps/chosen": -0.7756280303001404,
"logps/rejected": -0.9831158518791199,
"loss": 20.2687,
"nll_loss": 1.0755739212036133,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.3878140151500702,
"rewards/margins": 0.10374389588832855,
"rewards/rejected": -0.49155792593955994,
"step": 290
},
{
"epoch": 2.517333333333333,
"grad_norm": 18.375,
"learning_rate": 4.558557115307222e-06,
"log_odds_chosen": 0.3779729902744293,
"log_odds_ratio": -0.6153554320335388,
"logits/chosen": 262.4172668457031,
"logits/rejected": 273.03375244140625,
"logps/chosen": -0.7413235902786255,
"logps/rejected": -0.9600993990898132,
"loss": 19.9948,
"nll_loss": 1.108370304107666,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.37066179513931274,
"rewards/margins": 0.10938791930675507,
"rewards/rejected": -0.4800496995449066,
"step": 295
},
{
"epoch": 2.56,
"grad_norm": 23.0,
"learning_rate": 3.7977539814861106e-06,
"log_odds_chosen": 0.326369047164917,
"log_odds_ratio": -0.6079571843147278,
"logits/chosen": 257.0772705078125,
"logits/rejected": 261.3754577636719,
"logps/chosen": -0.792614221572876,
"logps/rejected": -0.960332989692688,
"loss": 20.7289,
"nll_loss": 1.0533356666564941,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.396307110786438,
"rewards/margins": 0.0838593915104866,
"rewards/rejected": -0.480166494846344,
"step": 300
},
{
"epoch": 2.602666666666667,
"grad_norm": 20.375,
"learning_rate": 3.102093759749376e-06,
"log_odds_chosen": 0.260172963142395,
"log_odds_ratio": -0.6579862833023071,
"logits/chosen": 261.3594055175781,
"logits/rejected": 266.4259033203125,
"logps/chosen": -0.7964383959770203,
"logps/rejected": -0.9274827241897583,
"loss": 20.2045,
"nll_loss": 1.1274524927139282,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.39821919798851013,
"rewards/margins": 0.06552214920520782,
"rewards/rejected": -0.46374136209487915,
"step": 305
},
{
"epoch": 2.6453333333333333,
"grad_norm": 20.5,
"learning_rate": 2.4733059710179828e-06,
"log_odds_chosen": 0.4331514239311218,
"log_odds_ratio": -0.5700831413269043,
"logits/chosen": 265.8288879394531,
"logits/rejected": 274.1119689941406,
"logps/chosen": -0.7710822820663452,
"logps/rejected": -0.9825912714004517,
"loss": 20.6386,
"nll_loss": 1.094036340713501,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.3855411410331726,
"rewards/margins": 0.10575449466705322,
"rewards/rejected": -0.49129563570022583,
"step": 310
},
{
"epoch": 2.6879999999999997,
"grad_norm": 19.875,
"learning_rate": 1.912953880807884e-06,
"log_odds_chosen": 0.3509272634983063,
"log_odds_ratio": -0.6254650354385376,
"logits/chosen": 269.66375732421875,
"logits/rejected": 276.72247314453125,
"logps/chosen": -0.8052287101745605,
"logps/rejected": -1.0146431922912598,
"loss": 19.5416,
"nll_loss": 1.0689141750335693,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.4026143550872803,
"rewards/margins": 0.10470722615718842,
"rewards/rejected": -0.5073215961456299,
"step": 315
},
{
"epoch": 2.7306666666666666,
"grad_norm": 22.375,
"learning_rate": 1.422430612705613e-06,
"log_odds_chosen": 0.2932564318180084,
"log_odds_ratio": -0.6279724836349487,
"logits/chosen": 263.7853088378906,
"logits/rejected": 254.1911163330078,
"logps/chosen": -0.8304440379142761,
"logps/rejected": -0.9680387377738953,
"loss": 20.6751,
"nll_loss": 1.1476246118545532,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.41522201895713806,
"rewards/margins": 0.06879737973213196,
"rewards/rejected": -0.48401936888694763,
"step": 320
},
{
"epoch": 2.7733333333333334,
"grad_norm": 20.375,
"learning_rate": 1.002955684843585e-06,
"log_odds_chosen": 0.4094099998474121,
"log_odds_ratio": -0.6279257535934448,
"logits/chosen": 263.61419677734375,
"logits/rejected": 278.83197021484375,
"logps/chosen": -0.7544084787368774,
"logps/rejected": -0.9667471051216125,
"loss": 20.3327,
"nll_loss": 1.0606472492218018,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.3772042393684387,
"rewards/margins": 0.10616934299468994,
"rewards/rejected": -0.4833735525608063,
"step": 325
},
{
"epoch": 2.816,
"grad_norm": 22.25,
"learning_rate": 6.555719779858294e-07,
"log_odds_chosen": 0.20543567836284637,
"log_odds_ratio": -0.6976035833358765,
"logits/chosen": 265.85736083984375,
"logits/rejected": 258.0235900878906,
"logps/chosen": -0.8123346567153931,
"logps/rejected": -0.9113311767578125,
"loss": 20.2874,
"nll_loss": 1.0404599905014038,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.40616732835769653,
"rewards/margins": 0.04949823394417763,
"rewards/rejected": -0.45566558837890625,
"step": 330
},
{
"epoch": 2.8586666666666667,
"grad_norm": 23.5,
"learning_rate": 3.8114314276213145e-07,
"log_odds_chosen": 0.2348608523607254,
"log_odds_ratio": -0.6606994867324829,
"logits/chosen": 264.12615966796875,
"logits/rejected": 273.71734619140625,
"logps/chosen": -0.7940512895584106,
"logps/rejected": -0.925014317035675,
"loss": 20.1626,
"nll_loss": 1.1582380533218384,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.3970256447792053,
"rewards/margins": 0.0654815211892128,
"rewards/rejected": -0.4625071585178375,
"step": 335
},
{
"epoch": 2.9013333333333335,
"grad_norm": 22.125,
"learning_rate": 1.8035145249644225e-07,
"log_odds_chosen": 0.23013488948345184,
"log_odds_ratio": -0.6557679772377014,
"logits/chosen": 261.1979064941406,
"logits/rejected": 262.1890563964844,
"logps/chosen": -0.8094332814216614,
"logps/rejected": -0.9246999621391296,
"loss": 19.9063,
"nll_loss": 1.1390663385391235,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.4047166407108307,
"rewards/margins": 0.05763337016105652,
"rewards/rejected": -0.4623499810695648,
"step": 340
},
{
"epoch": 2.944,
"grad_norm": 21.875,
"learning_rate": 5.369610696794536e-08,
"log_odds_chosen": 0.22427129745483398,
"log_odds_ratio": -0.6425634622573853,
"logits/chosen": 272.1688232421875,
"logits/rejected": 257.414306640625,
"logps/chosen": -0.8942912817001343,
"logps/rejected": -1.0066581964492798,
"loss": 19.9408,
"nll_loss": 1.096421241760254,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.44714564085006714,
"rewards/margins": 0.056183360517024994,
"rewards/rejected": -0.5033290982246399,
"step": 345
},
{
"epoch": 2.986666666666667,
"grad_norm": 26.5,
"learning_rate": 1.4919913217092962e-09,
"log_odds_chosen": 0.5936909914016724,
"log_odds_ratio": -0.538439154624939,
"logits/chosen": 274.3494567871094,
"logits/rejected": 245.5052947998047,
"logps/chosen": -0.7209577560424805,
"logps/rejected": -1.0420339107513428,
"loss": 19.7242,
"nll_loss": 1.0608009099960327,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.36047887802124023,
"rewards/margins": 0.16053801774978638,
"rewards/rejected": -0.5210169553756714,
"step": 350
},
{
"epoch": 2.9952,
"step": 351,
"total_flos": 0.0,
"train_loss": 270.4568550620663,
"train_runtime": 4053.6602,
"train_samples_per_second": 5.551,
"train_steps_per_second": 0.087
}
],
"logging_steps": 5,
"max_steps": 351,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}