{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9952, "eval_steps": 500, "global_step": 351, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.042666666666666665, "grad_norm": 23808.0, "learning_rate": 8.333333333333334e-06, "log_odds_chosen": 4.913786888122559, "log_odds_ratio": -7.455605983734131, "logits/chosen": 104.62542724609375, "logits/rejected": 103.22361755371094, "logps/chosen": -24.285247802734375, "logps/rejected": -29.19942283630371, "loss": 999.1373, "nll_loss": 9.384310722351074, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -12.142623901367188, "rewards/margins": 2.4570868015289307, "rewards/rejected": -14.599711418151855, "step": 5 }, { "epoch": 0.08533333333333333, "grad_norm": 20480.0, "learning_rate": 1.6666666666666667e-05, "log_odds_chosen": 3.161984443664551, "log_odds_ratio": -4.3201093673706055, "logits/chosen": 116.70096588134766, "logits/rejected": 107.84611511230469, "logps/chosen": -22.356525421142578, "logps/rejected": -25.517377853393555, "loss": 1471.3339, "nll_loss": 7.478154182434082, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -11.178262710571289, "rewards/margins": 1.580425500869751, "rewards/rejected": -12.758688926696777, "step": 10 }, { "epoch": 0.128, "grad_norm": 9792.0, "learning_rate": 2.5e-05, "log_odds_chosen": 5.405481338500977, "log_odds_ratio": -3.742039442062378, "logits/chosen": 101.9460220336914, "logits/rejected": 144.56015014648438, "logps/chosen": -16.96074867248535, "logps/rejected": -22.36530303955078, "loss": 2500.4584, "nll_loss": 9.246469497680664, "rewards/accuracies": 0.5625, "rewards/chosen": -8.480374336242676, "rewards/margins": 2.7022786140441895, "rewards/rejected": -11.18265151977539, "step": 15 }, { "epoch": 0.17066666666666666, "grad_norm": 23424.0, "learning_rate": 3.3333333333333335e-05, "log_odds_chosen": 1.755802869796753, "log_odds_ratio": -8.513590812683105, "logits/chosen": 138.0966033935547, "logits/rejected": 115.8319320678711, "logps/chosen": -21.58835220336914, "logps/rejected": -23.347524642944336, "loss": -909.4568, "nll_loss": 7.946342468261719, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -10.79417610168457, "rewards/margins": 0.879586398601532, "rewards/rejected": -11.673762321472168, "step": 20 }, { "epoch": 0.21333333333333335, "grad_norm": 81920.0, "learning_rate": 4.1666666666666665e-05, "log_odds_chosen": -3.714871883392334, "log_odds_ratio": -11.706196784973145, "logits/chosen": 131.1918182373047, "logits/rejected": 112.025146484375, "logps/chosen": -27.263925552368164, "logps/rejected": -23.55154037475586, "loss": -1289.5892, "nll_loss": 11.121248245239258, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -13.631962776184082, "rewards/margins": -1.8561919927597046, "rewards/rejected": -11.77577018737793, "step": 25 }, { "epoch": 0.256, "grad_norm": 23424.0, "learning_rate": 5e-05, "log_odds_chosen": 5.615313529968262, "log_odds_ratio": -6.0788164138793945, "logits/chosen": 120.47991943359375, "logits/rejected": 131.72543334960938, "logps/chosen": -20.34619903564453, "logps/rejected": -25.962305068969727, "loss": 1989.467, "nll_loss": 11.813726425170898, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -10.173099517822266, "rewards/margins": 2.808053731918335, "rewards/rejected": -12.981152534484863, "step": 30 }, { "epoch": 0.2986666666666667, "grad_norm": 12288.0, "learning_rate": 5.833333333333333e-05, "log_odds_chosen": 6.8634352684021, "log_odds_ratio": -4.409341335296631, "logits/chosen": 106.82928466796875, "logits/rejected": 135.06765747070312, "logps/chosen": -17.419185638427734, "logps/rejected": -24.284481048583984, "loss": 2134.8127, "nll_loss": 8.908151626586914, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -8.709592819213867, "rewards/margins": 3.4326491355895996, "rewards/rejected": -12.142240524291992, "step": 35 }, { "epoch": 0.3413333333333333, "grad_norm": 20096.0, "learning_rate": 5.997613110678538e-05, "log_odds_chosen": 10.421220779418945, "log_odds_ratio": -4.025184154510498, "logits/chosen": 97.60896301269531, "logits/rejected": 131.75054931640625, "logps/chosen": -20.480510711669922, "logps/rejected": -30.901927947998047, "loss": 2004.0334, "nll_loss": 10.660150527954102, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -10.240255355834961, "rewards/margins": 5.2107110023498535, "rewards/rejected": -15.450963973999023, "step": 40 }, { "epoch": 0.384, "grad_norm": 29952.0, "learning_rate": 5.987922881985718e-05, "log_odds_chosen": 2.5370476245880127, "log_odds_ratio": -9.996942520141602, "logits/chosen": 125.96684265136719, "logits/rejected": 126.08040618896484, "logps/chosen": -22.960010528564453, "logps/rejected": -25.498239517211914, "loss": 504.6511, "nll_loss": 9.340021133422852, "rewards/accuracies": 0.5, "rewards/chosen": -11.480005264282227, "rewards/margins": 1.2691147327423096, "rewards/rejected": -12.749119758605957, "step": 45 }, { "epoch": 0.4266666666666667, "grad_norm": 13312.0, "learning_rate": 5.970804206224711e-05, "log_odds_chosen": 9.48165512084961, "log_odds_ratio": -6.348289489746094, "logits/chosen": 115.5280532836914, "logits/rejected": 133.51206970214844, "logps/chosen": -18.29220962524414, "logps/rejected": -27.774459838867188, "loss": 357.9646, "nll_loss": 8.295930862426758, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -9.14610481262207, "rewards/margins": 4.74112606048584, "rewards/rejected": -13.887229919433594, "step": 50 }, { "epoch": 0.4693333333333333, "grad_norm": 36864.0, "learning_rate": 5.9462996431207166e-05, "log_odds_chosen": -1.4241477251052856, "log_odds_ratio": -7.040617942810059, "logits/chosen": 90.20933532714844, "logits/rejected": 77.6080322265625, "logps/chosen": -21.791763305664062, "logps/rejected": -20.36836051940918, "loss": 710.7211, "nll_loss": 8.630704879760742, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -10.895881652832031, "rewards/margins": -0.7117019891738892, "rewards/rejected": -10.18418025970459, "step": 55 }, { "epoch": 0.512, "grad_norm": 22912.0, "learning_rate": 5.914470114878602e-05, "log_odds_chosen": 0.059395600110292435, "log_odds_ratio": -7.545324802398682, "logits/chosen": 74.50141906738281, "logits/rejected": 72.20657348632812, "logps/chosen": -27.5406551361084, "logps/rejected": -27.600433349609375, "loss": 835.8969, "nll_loss": 11.118535995483398, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -13.7703275680542, "rewards/margins": 0.029887771233916283, "rewards/rejected": -13.800216674804688, "step": 60 }, { "epoch": 0.5546666666666666, "grad_norm": 59136.0, "learning_rate": 5.875394754720707e-05, "log_odds_chosen": 0.004063797183334827, "log_odds_ratio": -8.829879760742188, "logits/chosen": 107.4288101196289, "logits/rejected": 100.03871154785156, "logps/chosen": -26.154687881469727, "logps/rejected": -26.159423828125, "loss": 1129.1766, "nll_loss": 8.619396209716797, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -13.077343940734863, "rewards/margins": 0.0023682594764977694, "rewards/rejected": -13.0797119140625, "step": 65 }, { "epoch": 0.5973333333333334, "grad_norm": 84992.0, "learning_rate": 5.8291707101491815e-05, "log_odds_chosen": -9.859933853149414, "log_odds_ratio": -13.291154861450195, "logits/chosen": 130.4120635986328, "logits/rejected": 107.79060363769531, "logps/chosen": -33.194881439208984, "logps/rejected": -23.33577537536621, "loss": -1070.0952, "nll_loss": 9.995885848999023, "rewards/accuracies": 0.4375, "rewards/chosen": -16.597440719604492, "rewards/margins": -4.929553031921387, "rewards/rejected": -11.667887687683105, "step": 70 }, { "epoch": 0.64, "grad_norm": 27264.0, "learning_rate": 5.77591290142199e-05, "log_odds_chosen": -1.0986392498016357, "log_odds_ratio": -6.415988922119141, "logits/chosen": 202.5902862548828, "logits/rejected": 170.36766052246094, "logps/chosen": -28.743408203125, "logps/rejected": -27.644739151000977, "loss": 3134.2316, "nll_loss": 16.13515853881836, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -14.3717041015625, "rewards/margins": -0.5493333339691162, "rewards/rejected": -13.822369575500488, "step": 75 }, { "epoch": 0.6826666666666666, "grad_norm": 5920.0, "learning_rate": 5.7157537358430446e-05, "log_odds_chosen": 5.658118724822998, "log_odds_ratio": -7.236645698547363, "logits/chosen": 79.78996276855469, "logits/rejected": 120.5929946899414, "logps/chosen": -39.893455505371094, "logps/rejected": -45.551578521728516, "loss": 1430.3527, "nll_loss": 26.075185775756836, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -19.946727752685547, "rewards/margins": 2.829059362411499, "rewards/rejected": -22.775789260864258, "step": 80 }, { "epoch": 0.7253333333333334, "grad_norm": 7264.0, "learning_rate": 5.648842778576781e-05, "log_odds_chosen": -2.8542323112487793, "log_odds_ratio": -11.537806510925293, "logits/chosen": 43.76961135864258, "logits/rejected": 40.082550048828125, "logps/chosen": -41.100486755371094, "logps/rejected": -38.24618911743164, "loss": 27.785, "nll_loss": 18.119293212890625, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -20.550243377685547, "rewards/margins": -1.4271516799926758, "rewards/rejected": -19.12309455871582, "step": 85 }, { "epoch": 0.768, "grad_norm": 8160.0, "learning_rate": 5.575346380805599e-05, "log_odds_chosen": 6.750527858734131, "log_odds_ratio": -4.975089073181152, "logits/chosen": 142.46315002441406, "logits/rejected": 194.29443359375, "logps/chosen": -27.23212242126465, "logps/rejected": -33.98273849487305, "loss": 410.7923, "nll_loss": 13.293853759765625, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -13.616061210632324, "rewards/margins": 3.3753085136413574, "rewards/rejected": -16.991369247436523, "step": 90 }, { "epoch": 0.8106666666666666, "grad_norm": 6752.0, "learning_rate": 5.4954472661546075e-05, "log_odds_chosen": -1.6101436614990234, "log_odds_ratio": -5.586986064910889, "logits/chosen": 265.1175537109375, "logits/rejected": 220.3394012451172, "logps/chosen": -16.311241149902344, "logps/rejected": -14.702871322631836, "loss": 697.2691, "nll_loss": 8.886590957641602, "rewards/accuracies": 0.38749998807907104, "rewards/chosen": -8.155620574951172, "rewards/margins": -0.8041850924491882, "rewards/rejected": -7.351435661315918, "step": 95 }, { "epoch": 0.8533333333333334, "grad_norm": 1976.0, "learning_rate": 5.4093440764119056e-05, "log_odds_chosen": 0.8467995524406433, "log_odds_ratio": -3.7493503093719482, "logits/chosen": 214.7117156982422, "logits/rejected": 216.2617950439453, "logps/chosen": -10.674482345581055, "logps/rejected": -11.517151832580566, "loss": 671.6329, "nll_loss": 5.953970909118652, "rewards/accuracies": 0.5, "rewards/chosen": -5.337241172790527, "rewards/margins": 0.4213342070579529, "rewards/rejected": -5.758575916290283, "step": 100 }, { "epoch": 0.896, "grad_norm": 233.0, "learning_rate": 5.317250877673799e-05, "log_odds_chosen": 0.0025218098890036345, "log_odds_ratio": -1.1279939413070679, "logits/chosen": 274.94171142578125, "logits/rejected": 293.26910400390625, "logps/chosen": -3.1382219791412354, "logps/rejected": -3.117029905319214, "loss": 59.8813, "nll_loss": 2.759488105773926, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -1.5691109895706177, "rewards/margins": -0.010595941916108131, "rewards/rejected": -1.558514952659607, "step": 105 }, { "epoch": 0.9386666666666666, "grad_norm": 93.0, "learning_rate": 5.219396628142752e-05, "log_odds_chosen": 0.3583167493343353, "log_odds_ratio": -0.7731421589851379, "logits/chosen": 287.4047546386719, "logits/rejected": 319.8270263671875, "logps/chosen": -2.0119025707244873, "logps/rejected": -2.3544743061065674, "loss": 41.0939, "nll_loss": 2.1219072341918945, "rewards/accuracies": 0.5625, "rewards/chosen": -1.0059512853622437, "rewards/margins": 0.17128589749336243, "rewards/rejected": -1.1772371530532837, "step": 110 }, { "epoch": 0.9813333333333333, "grad_norm": 75.5, "learning_rate": 5.1160246089012264e-05, "log_odds_chosen": -0.07454674690961838, "log_odds_ratio": -0.8472925424575806, "logits/chosen": 304.05816650390625, "logits/rejected": 292.1544494628906, "logps/chosen": -1.64755117893219, "logps/rejected": -1.558643102645874, "loss": 35.4153, "nll_loss": 1.8526198863983154, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.823775589466095, "rewards/margins": -0.04445408657193184, "rewards/rejected": -0.779321551322937, "step": 115 }, { "epoch": 1.024, "grad_norm": 106.5, "learning_rate": 5.007391819076575e-05, "log_odds_chosen": 0.20053406059741974, "log_odds_ratio": -0.6979594826698303, "logits/chosen": 296.3975524902344, "logits/rejected": 293.24871826171875, "logps/chosen": -1.3861749172210693, "logps/rejected": -1.5457828044891357, "loss": 32.7872, "nll_loss": 1.7506237030029297, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.6930874586105347, "rewards/margins": 0.07980402559041977, "rewards/rejected": -0.7728914022445679, "step": 120 }, { "epoch": 1.0666666666666667, "grad_norm": 41.0, "learning_rate": 4.893768336900717e-05, "log_odds_chosen": 0.20815667510032654, "log_odds_ratio": -0.654870331287384, "logits/chosen": 285.3860168457031, "logits/rejected": 291.6961975097656, "logps/chosen": -1.3074676990509033, "logps/rejected": -1.4680944681167603, "loss": 30.4078, "nll_loss": 1.648654580116272, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.6537338495254517, "rewards/margins": 0.08031338453292847, "rewards/rejected": -0.7340472340583801, "step": 125 }, { "epoch": 1.1093333333333333, "grad_norm": 83.5, "learning_rate": 4.775436648253103e-05, "log_odds_chosen": 0.010318088345229626, "log_odds_ratio": -0.7326194643974304, "logits/chosen": 272.5470886230469, "logits/rejected": 290.8238830566406, "logps/chosen": -1.2763969898223877, "logps/rejected": -1.282832384109497, "loss": 29.9423, "nll_loss": 1.5382884740829468, "rewards/accuracies": 0.5, "rewards/chosen": -0.6381984949111938, "rewards/margins": 0.0032175942324101925, "rewards/rejected": -0.6414161920547485, "step": 130 }, { "epoch": 1.152, "grad_norm": 52.25, "learning_rate": 4.6526909443563075e-05, "log_odds_chosen": 0.05014984682202339, "log_odds_ratio": -0.7415339350700378, "logits/chosen": 283.76141357421875, "logits/rejected": 269.16754150390625, "logps/chosen": -1.1821494102478027, "logps/rejected": -1.2269926071166992, "loss": 29.0019, "nll_loss": 1.5523165464401245, "rewards/accuracies": 0.5, "rewards/chosen": -0.5910747051239014, "rewards/margins": 0.02242158353328705, "rewards/rejected": -0.6134963035583496, "step": 135 }, { "epoch": 1.1946666666666665, "grad_norm": 55.75, "learning_rate": 4.5258363903702954e-05, "log_odds_chosen": 0.25334566831588745, "log_odds_ratio": -0.6387948989868164, "logits/chosen": 279.69866943359375, "logits/rejected": 307.73309326171875, "logps/chosen": -1.0961264371871948, "logps/rejected": -1.2705694437026978, "loss": 27.7407, "nll_loss": 1.4683058261871338, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.5480632185935974, "rewards/margins": 0.08722147345542908, "rewards/rejected": -0.6352847218513489, "step": 140 }, { "epoch": 1.2373333333333334, "grad_norm": 63.75, "learning_rate": 4.395188366703752e-05, "log_odds_chosen": 0.27662745118141174, "log_odds_ratio": -0.6522295475006104, "logits/chosen": 272.47137451171875, "logits/rejected": 291.1870422363281, "logps/chosen": -1.1764074563980103, "logps/rejected": -1.3605537414550781, "loss": 27.2205, "nll_loss": 1.449241042137146, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5882037281990051, "rewards/margins": 0.09207318723201752, "rewards/rejected": -0.6802768707275391, "step": 145 }, { "epoch": 1.28, "grad_norm": 84.5, "learning_rate": 4.261071684928697e-05, "log_odds_chosen": 0.08433417975902557, "log_odds_ratio": -0.7295799255371094, "logits/chosen": 283.64739990234375, "logits/rejected": 284.56048583984375, "logps/chosen": -1.1405603885650635, "logps/rejected": -1.2061361074447632, "loss": 27.0297, "nll_loss": 1.4862051010131836, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5702801942825317, "rewards/margins": 0.03278781846165657, "rewards/rejected": -0.6030680537223816, "step": 150 }, { "epoch": 1.3226666666666667, "grad_norm": 45.75, "learning_rate": 4.123819780247737e-05, "log_odds_chosen": 0.19811879098415375, "log_odds_ratio": -0.6645184755325317, "logits/chosen": 271.0818176269531, "logits/rejected": 282.7620544433594, "logps/chosen": -1.0194677114486694, "logps/rejected": -1.1592345237731934, "loss": 26.6033, "nll_loss": 1.4380306005477905, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5097338557243347, "rewards/margins": 0.06988338381052017, "rewards/rejected": -0.5796172618865967, "step": 155 }, { "epoch": 1.3653333333333333, "grad_norm": 49.25, "learning_rate": 3.9837738825216133e-05, "log_odds_chosen": 0.20502634346485138, "log_odds_ratio": -0.6395789384841919, "logits/chosen": 265.66180419921875, "logits/rejected": 298.80450439453125, "logps/chosen": -1.0500866174697876, "logps/rejected": -1.1844158172607422, "loss": 26.1919, "nll_loss": 1.4191492795944214, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5250433087348938, "rewards/margins": 0.06716466695070267, "rewards/rejected": -0.5922079086303711, "step": 160 }, { "epoch": 1.408, "grad_norm": 26.25, "learning_rate": 3.8412821679180084e-05, "log_odds_chosen": 0.17818713188171387, "log_odds_ratio": -0.6783817410469055, "logits/chosen": 276.02899169921875, "logits/rejected": 289.51385498046875, "logps/chosen": -1.0516808032989502, "logps/rejected": -1.1585873365402222, "loss": 26.0691, "nll_loss": 1.353134274482727, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.5258404016494751, "rewards/margins": 0.053453266620635986, "rewards/rejected": -0.5792936682701111, "step": 165 }, { "epoch": 1.4506666666666668, "grad_norm": 23.125, "learning_rate": 3.6966988932907276e-05, "log_odds_chosen": 0.16093948483467102, "log_odds_ratio": -0.6871160268783569, "logits/chosen": 278.6529541015625, "logits/rejected": 298.80657958984375, "logps/chosen": -1.080251693725586, "logps/rejected": -1.1996749639511108, "loss": 26.6724, "nll_loss": 1.4616249799728394, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.540125846862793, "rewards/margins": 0.059711672365665436, "rewards/rejected": -0.5998374819755554, "step": 170 }, { "epoch": 1.4933333333333334, "grad_norm": 27.875, "learning_rate": 3.5503835154413476e-05, "log_odds_chosen": 0.286944180727005, "log_odds_ratio": -0.6341909766197205, "logits/chosen": 274.6024169921875, "logits/rejected": 295.36651611328125, "logps/chosen": -1.0407021045684814, "logps/rejected": -1.2441030740737915, "loss": 25.7035, "nll_loss": 1.411714792251587, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.5203510522842407, "rewards/margins": 0.10170049965381622, "rewards/rejected": -0.6220515370368958, "step": 175 }, { "epoch": 1.536, "grad_norm": 47.0, "learning_rate": 3.4026997974529664e-05, "log_odds_chosen": 0.16404980421066284, "log_odds_ratio": -0.6638838052749634, "logits/chosen": 290.327880859375, "logits/rejected": 284.926513671875, "logps/chosen": -1.093461275100708, "logps/rejected": -1.1879903078079224, "loss": 25.5271, "nll_loss": 1.4601901769638062, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.546730637550354, "rewards/margins": 0.047264464199543, "rewards/rejected": -0.5939951539039612, "step": 180 }, { "epoch": 1.5786666666666667, "grad_norm": 40.75, "learning_rate": 3.25401490431787e-05, "log_odds_chosen": 0.28145521879196167, "log_odds_ratio": -0.6211186647415161, "logits/chosen": 297.9057922363281, "logits/rejected": 286.4951477050781, "logps/chosen": -1.0374724864959717, "logps/rejected": -1.2180078029632568, "loss": 25.5928, "nll_loss": 1.3627592325210571, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5187362432479858, "rewards/margins": 0.09026758372783661, "rewards/rejected": -0.6090039014816284, "step": 185 }, { "epoch": 1.6213333333333333, "grad_norm": 56.0, "learning_rate": 3.104698490107504e-05, "log_odds_chosen": 0.09670724719762802, "log_odds_ratio": -0.6962383985519409, "logits/chosen": 296.83575439453125, "logits/rejected": 268.6614685058594, "logps/chosen": -1.0726783275604248, "logps/rejected": -1.1235979795455933, "loss": 25.6578, "nll_loss": 1.3929274082183838, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5363391637802124, "rewards/margins": 0.025459837168455124, "rewards/rejected": -0.5617989897727966, "step": 190 }, { "epoch": 1.6640000000000001, "grad_norm": 27.125, "learning_rate": 2.9551217789542096e-05, "log_odds_chosen": 0.08583483099937439, "log_odds_ratio": -0.7124528884887695, "logits/chosen": 291.0882568359375, "logits/rejected": 282.97711181640625, "logps/chosen": -1.0238406658172607, "logps/rejected": -1.0693080425262451, "loss": 25.3697, "nll_loss": 1.4079334735870361, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.5119203329086304, "rewards/margins": 0.02273363620042801, "rewards/rejected": -0.5346540212631226, "step": 195 }, { "epoch": 1.7066666666666666, "grad_norm": 29.875, "learning_rate": 2.8056566421295443e-05, "log_odds_chosen": 0.027378028258681297, "log_odds_ratio": -0.7563061714172363, "logits/chosen": 280.4498291015625, "logits/rejected": 268.0575256347656, "logps/chosen": -1.0595781803131104, "logps/rejected": -1.0435364246368408, "loss": 24.6268, "nll_loss": 1.3725634813308716, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.5297890901565552, "rewards/margins": -0.008020809851586819, "rewards/rejected": -0.5217682123184204, "step": 200 }, { "epoch": 1.7493333333333334, "grad_norm": 40.75, "learning_rate": 2.656674673513705e-05, "log_odds_chosen": 0.09508597105741501, "log_odds_ratio": -0.7279762625694275, "logits/chosen": 284.59503173828125, "logits/rejected": 292.72509765625, "logps/chosen": -1.1167399883270264, "logps/rejected": -1.2022297382354736, "loss": 25.7565, "nll_loss": 1.4108952283859253, "rewards/accuracies": 0.5, "rewards/chosen": -0.5583699941635132, "rewards/margins": 0.04274484142661095, "rewards/rejected": -0.6011148691177368, "step": 205 }, { "epoch": 1.792, "grad_norm": 68.5, "learning_rate": 2.508546265754587e-05, "log_odds_chosen": 0.14177300035953522, "log_odds_ratio": -0.6866236925125122, "logits/chosen": 271.2067565917969, "logits/rejected": 289.2135009765625, "logps/chosen": -0.9864645004272461, "logps/rejected": -1.0824763774871826, "loss": 24.89, "nll_loss": 1.4051529169082642, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.49323225021362305, "rewards/margins": 0.048005927354097366, "rewards/rejected": -0.5412381887435913, "step": 210 }, { "epoch": 1.8346666666666667, "grad_norm": 29.5, "learning_rate": 2.3616396894133145e-05, "log_odds_chosen": 0.07325839251279831, "log_odds_ratio": -0.7426969408988953, "logits/chosen": 277.15673828125, "logits/rejected": 272.313232421875, "logps/chosen": -1.044854760169983, "logps/rejected": -1.0880095958709717, "loss": 24.9372, "nll_loss": 1.420508623123169, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.5224273800849915, "rewards/margins": 0.02157733403146267, "rewards/rejected": -0.5440047979354858, "step": 215 }, { "epoch": 1.8773333333333333, "grad_norm": 18.75, "learning_rate": 2.216320177385585e-05, "log_odds_chosen": 0.16561657190322876, "log_odds_ratio": -0.6900728940963745, "logits/chosen": 269.1635437011719, "logits/rejected": 290.73931884765625, "logps/chosen": -0.9672033190727234, "logps/rejected": -1.0741941928863525, "loss": 24.5158, "nll_loss": 1.3439892530441284, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4836016595363617, "rewards/margins": 0.05349547788500786, "rewards/rejected": -0.5370970964431763, "step": 220 }, { "epoch": 1.92, "grad_norm": 51.25, "learning_rate": 2.072949016875158e-05, "log_odds_chosen": 0.254954069852829, "log_odds_ratio": -0.6858216524124146, "logits/chosen": 277.99786376953125, "logits/rejected": 272.8140563964844, "logps/chosen": -1.023193597793579, "logps/rejected": -1.2254831790924072, "loss": 25.0004, "nll_loss": 1.3852344751358032, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5115967988967896, "rewards/margins": 0.10114479064941406, "rewards/rejected": -0.6127415895462036, "step": 225 }, { "epoch": 1.9626666666666668, "grad_norm": 42.0, "learning_rate": 1.9318826511769297e-05, "log_odds_chosen": -0.024613792076706886, "log_odds_ratio": -0.7803007364273071, "logits/chosen": 278.9836730957031, "logits/rejected": 275.0539855957031, "logps/chosen": -1.0930712223052979, "logps/rejected": -1.0882163047790527, "loss": 25.3575, "nll_loss": 1.3715641498565674, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.5465356111526489, "rewards/margins": -0.0024274878669530153, "rewards/rejected": -0.5441081523895264, "step": 230 }, { "epoch": 2.005333333333333, "grad_norm": 27.875, "learning_rate": 1.793471793502748e-05, "log_odds_chosen": 0.18588228523731232, "log_odds_ratio": -0.6733505129814148, "logits/chosen": 266.9411315917969, "logits/rejected": 268.85430908203125, "logps/chosen": -0.966964066028595, "logps/rejected": -1.075714349746704, "loss": 24.4463, "nll_loss": 1.2817761898040771, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4834820330142975, "rewards/margins": 0.05437516048550606, "rewards/rejected": -0.537857174873352, "step": 235 }, { "epoch": 2.048, "grad_norm": 26.625, "learning_rate": 1.6580605550531018e-05, "log_odds_chosen": 0.12490881979465485, "log_odds_ratio": -0.6833258271217346, "logits/chosen": 267.01580810546875, "logits/rejected": 280.65118408203125, "logps/chosen": -0.866047739982605, "logps/rejected": -0.9292898178100586, "loss": 21.5112, "nll_loss": 1.1620063781738281, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4330238699913025, "rewards/margins": 0.03162097930908203, "rewards/rejected": -0.4646449089050293, "step": 240 }, { "epoch": 2.0906666666666665, "grad_norm": 25.0, "learning_rate": 1.525985589502466e-05, "log_odds_chosen": 0.490588515996933, "log_odds_ratio": -0.5364745259284973, "logits/chosen": 271.6618957519531, "logits/rejected": 270.92242431640625, "logps/chosen": -0.8149029016494751, "logps/rejected": -1.1035759449005127, "loss": 21.2697, "nll_loss": 1.204815149307251, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.40745145082473755, "rewards/margins": 0.1443365067243576, "rewards/rejected": -0.5517879724502563, "step": 245 }, { "epoch": 2.1333333333333333, "grad_norm": 25.75, "learning_rate": 1.3975752560252138e-05, "log_odds_chosen": 0.43111294507980347, "log_odds_ratio": -0.610099196434021, "logits/chosen": 256.387939453125, "logits/rejected": 280.3132629394531, "logps/chosen": -0.8138604164123535, "logps/rejected": -1.0904266834259033, "loss": 20.7732, "nll_loss": 1.0765711069107056, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.40693020820617676, "rewards/margins": 0.13828308880329132, "rewards/rejected": -0.5452133417129517, "step": 250 }, { "epoch": 2.176, "grad_norm": 24.125, "learning_rate": 1.27314880294298e-05, "log_odds_chosen": 0.3809678852558136, "log_odds_ratio": -0.6075100898742676, "logits/chosen": 266.83233642578125, "logits/rejected": 259.447265625, "logps/chosen": -0.8320444822311401, "logps/rejected": -1.043336033821106, "loss": 20.9562, "nll_loss": 1.1581926345825195, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.41602224111557007, "rewards/margins": 0.1056457981467247, "rewards/rejected": -0.521668016910553, "step": 255 }, { "epoch": 2.2186666666666666, "grad_norm": 23.5, "learning_rate": 1.1530155740230252e-05, "log_odds_chosen": 0.4367187023162842, "log_odds_ratio": -0.5616321563720703, "logits/chosen": 255.9156036376953, "logits/rejected": 277.2770080566406, "logps/chosen": -0.8188761472702026, "logps/rejected": -1.048285722732544, "loss": 20.5943, "nll_loss": 1.0858075618743896, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4094380736351013, "rewards/margins": 0.11470470577478409, "rewards/rejected": -0.524142861366272, "step": 260 }, { "epoch": 2.2613333333333334, "grad_norm": 19.0, "learning_rate": 1.0374742394008972e-05, "log_odds_chosen": 0.2701203525066376, "log_odds_ratio": -0.6517602205276489, "logits/chosen": 260.30401611328125, "logits/rejected": 264.9652404785156, "logps/chosen": -0.8311630487442017, "logps/rejected": -0.991308867931366, "loss": 20.3899, "nll_loss": 1.069040298461914, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.41558152437210083, "rewards/margins": 0.08007291704416275, "rewards/rejected": -0.495654433965683, "step": 265 }, { "epoch": 2.304, "grad_norm": 26.125, "learning_rate": 9.268120530394061e-06, "log_odds_chosen": 0.31922250986099243, "log_odds_ratio": -0.6070750951766968, "logits/chosen": 260.91009521484375, "logits/rejected": 261.7254333496094, "logps/chosen": -0.7734104990959167, "logps/rejected": -0.9511035084724426, "loss": 20.48, "nll_loss": 1.0357019901275635, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3867052495479584, "rewards/margins": 0.08884649723768234, "rewards/rejected": -0.4755517542362213, "step": 270 }, { "epoch": 2.3466666666666667, "grad_norm": 30.625, "learning_rate": 8.213041385700211e-06, "log_odds_chosen": 0.3881288170814514, "log_odds_ratio": -0.5812792181968689, "logits/chosen": 275.2894287109375, "logits/rejected": 252.8758087158203, "logps/chosen": -0.8068667650222778, "logps/rejected": -1.0178911685943604, "loss": 20.1315, "nll_loss": 1.0528508424758911, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4034333825111389, "rewards/margins": 0.10551220178604126, "rewards/rejected": -0.5089455842971802, "step": 275 }, { "epoch": 2.389333333333333, "grad_norm": 20.625, "learning_rate": 7.212128052921661e-06, "log_odds_chosen": 0.43442073464393616, "log_odds_ratio": -0.5687755346298218, "logits/chosen": 260.5086975097656, "logits/rejected": 262.8999328613281, "logps/chosen": -0.7444295287132263, "logps/rejected": -0.9539780616760254, "loss": 19.7724, "nll_loss": 1.065710425376892, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.37221476435661316, "rewards/margins": 0.10477427393198013, "rewards/rejected": -0.4769890308380127, "step": 280 }, { "epoch": 2.432, "grad_norm": 18.875, "learning_rate": 6.267868960309771e-06, "log_odds_chosen": 0.3951905071735382, "log_odds_ratio": -0.5774310231208801, "logits/chosen": 264.29644775390625, "logits/rejected": 254.9677276611328, "logps/chosen": -0.787185788154602, "logps/rejected": -1.0126456022262573, "loss": 19.8597, "nll_loss": 1.018532633781433, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.393592894077301, "rewards/margins": 0.11272994428873062, "rewards/rejected": -0.5063228011131287, "step": 285 }, { "epoch": 2.474666666666667, "grad_norm": 21.875, "learning_rate": 5.382611684748257e-06, "log_odds_chosen": 0.35993748903274536, "log_odds_ratio": -0.5955245494842529, "logits/chosen": 249.52297973632812, "logits/rejected": 276.64947509765625, "logps/chosen": -0.7756280303001404, "logps/rejected": -0.9831158518791199, "loss": 20.2687, "nll_loss": 1.0755739212036133, "rewards/accuracies": 0.6875, "rewards/chosen": -0.3878140151500702, "rewards/margins": 0.10374389588832855, "rewards/rejected": -0.49155792593955994, "step": 290 }, { "epoch": 2.517333333333333, "grad_norm": 18.375, "learning_rate": 4.558557115307222e-06, "log_odds_chosen": 0.3779729902744293, "log_odds_ratio": -0.6153554320335388, "logits/chosen": 262.4172668457031, "logits/rejected": 273.03375244140625, "logps/chosen": -0.7413235902786255, "logps/rejected": -0.9600993990898132, "loss": 19.9948, "nll_loss": 1.108370304107666, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37066179513931274, "rewards/margins": 0.10938791930675507, "rewards/rejected": -0.4800496995449066, "step": 295 }, { "epoch": 2.56, "grad_norm": 23.0, "learning_rate": 3.7977539814861106e-06, "log_odds_chosen": 0.326369047164917, "log_odds_ratio": -0.6079571843147278, "logits/chosen": 257.0772705078125, "logits/rejected": 261.3754577636719, "logps/chosen": -0.792614221572876, "logps/rejected": -0.960332989692688, "loss": 20.7289, "nll_loss": 1.0533356666564941, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.396307110786438, "rewards/margins": 0.0838593915104866, "rewards/rejected": -0.480166494846344, "step": 300 }, { "epoch": 2.602666666666667, "grad_norm": 20.375, "learning_rate": 3.102093759749376e-06, "log_odds_chosen": 0.260172963142395, "log_odds_ratio": -0.6579862833023071, "logits/chosen": 261.3594055175781, "logits/rejected": 266.4259033203125, "logps/chosen": -0.7964383959770203, "logps/rejected": -0.9274827241897583, "loss": 20.2045, "nll_loss": 1.1274524927139282, "rewards/accuracies": 0.625, "rewards/chosen": -0.39821919798851013, "rewards/margins": 0.06552214920520782, "rewards/rejected": -0.46374136209487915, "step": 305 }, { "epoch": 2.6453333333333333, "grad_norm": 20.5, "learning_rate": 2.4733059710179828e-06, "log_odds_chosen": 0.4331514239311218, "log_odds_ratio": -0.5700831413269043, "logits/chosen": 265.8288879394531, "logits/rejected": 274.1119689941406, "logps/chosen": -0.7710822820663452, "logps/rejected": -0.9825912714004517, "loss": 20.6386, "nll_loss": 1.094036340713501, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.3855411410331726, "rewards/margins": 0.10575449466705322, "rewards/rejected": -0.49129563570022583, "step": 310 }, { "epoch": 2.6879999999999997, "grad_norm": 19.875, "learning_rate": 1.912953880807884e-06, "log_odds_chosen": 0.3509272634983063, "log_odds_ratio": -0.6254650354385376, "logits/chosen": 269.66375732421875, "logits/rejected": 276.72247314453125, "logps/chosen": -0.8052287101745605, "logps/rejected": -1.0146431922912598, "loss": 19.5416, "nll_loss": 1.0689141750335693, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4026143550872803, "rewards/margins": 0.10470722615718842, "rewards/rejected": -0.5073215961456299, "step": 315 }, { "epoch": 2.7306666666666666, "grad_norm": 22.375, "learning_rate": 1.422430612705613e-06, "log_odds_chosen": 0.2932564318180084, "log_odds_ratio": -0.6279724836349487, "logits/chosen": 263.7853088378906, "logits/rejected": 254.1911163330078, "logps/chosen": -0.8304440379142761, "logps/rejected": -0.9680387377738953, "loss": 20.6751, "nll_loss": 1.1476246118545532, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.41522201895713806, "rewards/margins": 0.06879737973213196, "rewards/rejected": -0.48401936888694763, "step": 320 }, { "epoch": 2.7733333333333334, "grad_norm": 20.375, "learning_rate": 1.002955684843585e-06, "log_odds_chosen": 0.4094099998474121, "log_odds_ratio": -0.6279257535934448, "logits/chosen": 263.61419677734375, "logits/rejected": 278.83197021484375, "logps/chosen": -0.7544084787368774, "logps/rejected": -0.9667471051216125, "loss": 20.3327, "nll_loss": 1.0606472492218018, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3772042393684387, "rewards/margins": 0.10616934299468994, "rewards/rejected": -0.4833735525608063, "step": 325 }, { "epoch": 2.816, "grad_norm": 22.25, "learning_rate": 6.555719779858294e-07, "log_odds_chosen": 0.20543567836284637, "log_odds_ratio": -0.6976035833358765, "logits/chosen": 265.85736083984375, "logits/rejected": 258.0235900878906, "logps/chosen": -0.8123346567153931, "logps/rejected": -0.9113311767578125, "loss": 20.2874, "nll_loss": 1.0404599905014038, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.40616732835769653, "rewards/margins": 0.04949823394417763, "rewards/rejected": -0.45566558837890625, "step": 330 }, { "epoch": 2.8586666666666667, "grad_norm": 23.5, "learning_rate": 3.8114314276213145e-07, "log_odds_chosen": 0.2348608523607254, "log_odds_ratio": -0.6606994867324829, "logits/chosen": 264.12615966796875, "logits/rejected": 273.71734619140625, "logps/chosen": -0.7940512895584106, "logps/rejected": -0.925014317035675, "loss": 20.1626, "nll_loss": 1.1582380533218384, "rewards/accuracies": 0.625, "rewards/chosen": -0.3970256447792053, "rewards/margins": 0.0654815211892128, "rewards/rejected": -0.4625071585178375, "step": 335 }, { "epoch": 2.9013333333333335, "grad_norm": 22.125, "learning_rate": 1.8035145249644225e-07, "log_odds_chosen": 0.23013488948345184, "log_odds_ratio": -0.6557679772377014, "logits/chosen": 261.1979064941406, "logits/rejected": 262.1890563964844, "logps/chosen": -0.8094332814216614, "logps/rejected": -0.9246999621391296, "loss": 19.9063, "nll_loss": 1.1390663385391235, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4047166407108307, "rewards/margins": 0.05763337016105652, "rewards/rejected": -0.4623499810695648, "step": 340 }, { "epoch": 2.944, "grad_norm": 21.875, "learning_rate": 5.369610696794536e-08, "log_odds_chosen": 0.22427129745483398, "log_odds_ratio": -0.6425634622573853, "logits/chosen": 272.1688232421875, "logits/rejected": 257.414306640625, "logps/chosen": -0.8942912817001343, "logps/rejected": -1.0066581964492798, "loss": 19.9408, "nll_loss": 1.096421241760254, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.44714564085006714, "rewards/margins": 0.056183360517024994, "rewards/rejected": -0.5033290982246399, "step": 345 }, { "epoch": 2.986666666666667, "grad_norm": 26.5, "learning_rate": 1.4919913217092962e-09, "log_odds_chosen": 0.5936909914016724, "log_odds_ratio": -0.538439154624939, "logits/chosen": 274.3494567871094, "logits/rejected": 245.5052947998047, "logps/chosen": -0.7209577560424805, "logps/rejected": -1.0420339107513428, "loss": 19.7242, "nll_loss": 1.0608009099960327, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36047887802124023, "rewards/margins": 0.16053801774978638, "rewards/rejected": -0.5210169553756714, "step": 350 }, { "epoch": 2.9952, "step": 351, "total_flos": 0.0, "train_loss": 270.4568550620663, "train_runtime": 4053.6602, "train_samples_per_second": 5.551, "train_steps_per_second": 0.087 } ], "logging_steps": 5, "max_steps": 351, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }