diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,27033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.932551319648094, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0019550342130987292, + "grad_norm": Infinity, + "learning_rate": 5e-05, + "log_odds_chosen": -0.015795767307281494, + "log_odds_ratio": -0.9426531791687012, + "logits/chosen": 4.509999752044678, + "logits/rejected": 4.036651134490967, + "logps/chosen": -2.4494705200195312, + "logps/rejected": -2.521124839782715, + "loss": 2.7527, + "nll_loss": 2.510831832885742, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.24494706094264984, + "rewards/margins": 0.0071654487401247025, + "rewards/rejected": -0.252112478017807, + "step": 1 + }, + { + "epoch": 0.0039100684261974585, + "grad_norm": Infinity, + "learning_rate": 5e-05, + "log_odds_chosen": -0.8478785157203674, + "log_odds_ratio": -1.368590235710144, + "logits/chosen": 4.757806301116943, + "logits/rejected": 4.029632568359375, + "logps/chosen": -2.2490885257720947, + "logps/rejected": -1.665450930595398, + "loss": 2.7074, + "nll_loss": 2.4760875701904297, + "rewards/accuracies": 0.10000000149011612, + "rewards/chosen": -0.22490885853767395, + "rewards/margins": -0.058363769203424454, + "rewards/rejected": -0.1665450930595398, + "step": 2 + }, + { + "epoch": 0.005865102639296188, + "grad_norm": 5.289555072784424, + "learning_rate": 4.9967384213959555e-05, + "log_odds_chosen": -0.8217504620552063, + "log_odds_ratio": -1.3360508680343628, + "logits/chosen": 4.989155292510986, + "logits/rejected": 4.370809078216553, + "logps/chosen": -2.220688819885254, + "logps/rejected": -1.6879653930664062, + "loss": 2.7197, + "nll_loss": 2.4985923767089844, + "rewards/accuracies": 0.30000001192092896, + "rewards/chosen": -0.2220688760280609, + "rewards/margins": -0.05327234044671059, + "rewards/rejected": -0.16879653930664062, + "step": 3 + }, + { + "epoch": 0.007820136852394917, + "grad_norm": Infinity, + "learning_rate": 4.9967384213959555e-05, + "log_odds_chosen": -0.262212336063385, + "log_odds_ratio": -1.0405582189559937, + "logits/chosen": 1.6093991994857788, + "logits/rejected": 1.5807156562805176, + "logps/chosen": -7.319422721862793, + "logps/rejected": -7.057631492614746, + "loss": 7.7594, + "nll_loss": 7.3431196212768555, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.7319422960281372, + "rewards/margins": -0.026179134845733643, + "rewards/rejected": -0.7057631611824036, + "step": 4 + }, + { + "epoch": 0.009775171065493646, + "grad_norm": Infinity, + "learning_rate": 4.9967384213959555e-05, + "log_odds_chosen": 0.3653239905834198, + "log_odds_ratio": -0.6956540942192078, + "logits/chosen": 1.7297883033752441, + "logits/rejected": 1.8507649898529053, + "logps/chosen": -8.115639686584473, + "logps/rejected": -8.480916976928711, + "loss": 7.772, + "nll_loss": 8.032361030578613, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8115639686584473, + "rewards/margins": 0.03652782365679741, + "rewards/rejected": -0.848091721534729, + "step": 5 + }, + { + "epoch": 0.011730205278592375, + "grad_norm": Infinity, + "learning_rate": 4.9967384213959555e-05, + "log_odds_chosen": 0.5331651568412781, + "log_odds_ratio": -0.6491658687591553, + "logits/chosen": 1.4535019397735596, + "logits/rejected": 1.4423842430114746, + "logps/chosen": -7.6941237449646, + "logps/rejected": -8.22753620147705, + "loss": 7.6892, + "nll_loss": 7.484104156494141, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.7694123983383179, + "rewards/margins": 0.05334123969078064, + "rewards/rejected": -0.8227536678314209, + "step": 6 + }, + { + "epoch": 0.013685239491691105, + "grad_norm": 86.77243041992188, + "learning_rate": 4.9934768427919115e-05, + "log_odds_chosen": 0.014515519142150879, + "log_odds_ratio": -1.1149556636810303, + "logits/chosen": 1.816298007965088, + "logits/rejected": 1.6502597332000732, + "logps/chosen": -7.889425754547119, + "logps/rejected": -7.906527519226074, + "loss": 7.7864, + "nll_loss": 7.659824848175049, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.7889425754547119, + "rewards/margins": 0.0017101634293794632, + "rewards/rejected": -0.7906526923179626, + "step": 7 + }, + { + "epoch": 0.015640273704789834, + "grad_norm": 10.747298240661621, + "learning_rate": 4.990215264187867e-05, + "log_odds_chosen": -0.35515540838241577, + "log_odds_ratio": -1.0484645366668701, + "logits/chosen": 5.855646133422852, + "logits/rejected": 5.524779796600342, + "logps/chosen": -3.059504508972168, + "logps/rejected": -2.7580294609069824, + "loss": 3.4599, + "nll_loss": 3.464139461517334, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.30595046281814575, + "rewards/margins": -0.0301474891602993, + "rewards/rejected": -0.27580296993255615, + "step": 8 + }, + { + "epoch": 0.017595307917888565, + "grad_norm": 18.993223190307617, + "learning_rate": 4.986953685583823e-05, + "log_odds_chosen": 0.1849060207605362, + "log_odds_ratio": -0.6256051063537598, + "logits/chosen": 3.321678638458252, + "logits/rejected": 3.460010051727295, + "logps/chosen": -2.7108137607574463, + "logps/rejected": -2.888458013534546, + "loss": 2.8039, + "nll_loss": 2.897648811340332, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.2710813879966736, + "rewards/margins": 0.017764439806342125, + "rewards/rejected": -0.28884583711624146, + "step": 9 + }, + { + "epoch": 0.019550342130987292, + "grad_norm": 2.582282543182373, + "learning_rate": 4.983692106979778e-05, + "log_odds_chosen": -0.05771147459745407, + "log_odds_ratio": -0.8145978450775146, + "logits/chosen": 3.9804024696350098, + "logits/rejected": 3.851738929748535, + "logps/chosen": -2.3440346717834473, + "logps/rejected": -2.3212854862213135, + "loss": 2.5043, + "nll_loss": 2.472588539123535, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.23440346121788025, + "rewards/margins": -0.0022749179042875767, + "rewards/rejected": -0.2321285605430603, + "step": 10 + }, + { + "epoch": 0.021505376344086023, + "grad_norm": 2.2410075664520264, + "learning_rate": 4.980430528375734e-05, + "log_odds_chosen": 0.27020424604415894, + "log_odds_ratio": -0.6314987540245056, + "logits/chosen": 3.751436233520508, + "logits/rejected": 3.456789016723633, + "logps/chosen": -2.1009750366210938, + "logps/rejected": -2.362942934036255, + "loss": 2.3282, + "nll_loss": 2.2502100467681885, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.2100975215435028, + "rewards/margins": 0.026196792721748352, + "rewards/rejected": -0.23629431426525116, + "step": 11 + }, + { + "epoch": 0.02346041055718475, + "grad_norm": 1.916319489479065, + "learning_rate": 4.977168949771689e-05, + "log_odds_chosen": 1.4035148620605469, + "log_odds_ratio": -0.4143460988998413, + "logits/chosen": 3.662899971008301, + "logits/rejected": 3.434567928314209, + "logps/chosen": -2.010244846343994, + "logps/rejected": -3.360196590423584, + "loss": 2.2287, + "nll_loss": 2.277205467224121, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.20102447271347046, + "rewards/margins": 0.13499517738819122, + "rewards/rejected": -0.3360196650028229, + "step": 12 + }, + { + "epoch": 0.02541544477028348, + "grad_norm": 11.726612091064453, + "learning_rate": 4.973907371167645e-05, + "log_odds_chosen": 0.5809874534606934, + "log_odds_ratio": -0.521969199180603, + "logits/chosen": 3.145890474319458, + "logits/rejected": 2.917055606842041, + "logps/chosen": -1.9293737411499023, + "logps/rejected": -2.456306219100952, + "loss": 2.158, + "nll_loss": 2.0984244346618652, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.19293737411499023, + "rewards/margins": 0.052693240344524384, + "rewards/rejected": -0.24563060700893402, + "step": 13 + }, + { + "epoch": 0.02737047898338221, + "grad_norm": 2.1595611572265625, + "learning_rate": 4.970645792563601e-05, + "log_odds_chosen": 0.3715955913066864, + "log_odds_ratio": -0.7007839679718018, + "logits/chosen": 2.5651016235351562, + "logits/rejected": 2.3899197578430176, + "logps/chosen": -2.1322546005249023, + "logps/rejected": -2.4914326667785645, + "loss": 2.1844, + "nll_loss": 2.300642490386963, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.21322545409202576, + "rewards/margins": 0.03591781482100487, + "rewards/rejected": -0.24914324283599854, + "step": 14 + }, + { + "epoch": 0.02932551319648094, + "grad_norm": 1.4395924806594849, + "learning_rate": 4.967384213959557e-05, + "log_odds_chosen": 1.902299165725708, + "log_odds_ratio": -0.4035990834236145, + "logits/chosen": 3.0067198276519775, + "logits/rejected": 2.35882568359375, + "logps/chosen": -1.6744657754898071, + "logps/rejected": -3.4663596153259277, + "loss": 2.0726, + "nll_loss": 1.8043333292007446, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1674465835094452, + "rewards/margins": 0.17918938398361206, + "rewards/rejected": -0.34663593769073486, + "step": 15 + }, + { + "epoch": 0.03128054740957967, + "grad_norm": 1.7133126258850098, + "learning_rate": 4.964122635355512e-05, + "log_odds_chosen": 2.042612314224243, + "log_odds_ratio": -0.42081552743911743, + "logits/chosen": 3.1265344619750977, + "logits/rejected": 1.9012497663497925, + "logps/chosen": -1.8470971584320068, + "logps/rejected": -3.8250365257263184, + "loss": 2.0164, + "nll_loss": 1.9392006397247314, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.18470972776412964, + "rewards/margins": 0.19779396057128906, + "rewards/rejected": -0.3825036883354187, + "step": 16 + }, + { + "epoch": 0.033235581622678395, + "grad_norm": 1.987940788269043, + "learning_rate": 4.960861056751468e-05, + "log_odds_chosen": 1.7238825559616089, + "log_odds_ratio": -0.3945392072200775, + "logits/chosen": 2.298821210861206, + "logits/rejected": 2.1295201778411865, + "logps/chosen": -1.656211018562317, + "logps/rejected": -3.2878236770629883, + "loss": 1.9799, + "nll_loss": 1.6572669744491577, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1656211018562317, + "rewards/margins": 0.1631612777709961, + "rewards/rejected": -0.3287823796272278, + "step": 17 + }, + { + "epoch": 0.03519061583577713, + "grad_norm": 1.1870821714401245, + "learning_rate": 4.9575994781474235e-05, + "log_odds_chosen": 2.3520071506500244, + "log_odds_ratio": -0.4231254458427429, + "logits/chosen": 2.779679536819458, + "logits/rejected": 2.738187551498413, + "logps/chosen": -1.9245288372039795, + "logps/rejected": -4.202671051025391, + "loss": 1.943, + "nll_loss": 1.9871594905853271, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.19245287775993347, + "rewards/margins": 0.2278142124414444, + "rewards/rejected": -0.42026710510253906, + "step": 18 + }, + { + "epoch": 0.03714565004887586, + "grad_norm": 0.8726271986961365, + "learning_rate": 4.9543378995433794e-05, + "log_odds_chosen": 4.4119086265563965, + "log_odds_ratio": -0.09288666397333145, + "logits/chosen": 2.9857940673828125, + "logits/rejected": 2.4204530715942383, + "logps/chosen": -1.7135571241378784, + "logps/rejected": -5.934418678283691, + "loss": 1.885, + "nll_loss": 1.939065933227539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1713557094335556, + "rewards/margins": 0.4220861792564392, + "rewards/rejected": -0.593441903591156, + "step": 19 + }, + { + "epoch": 0.039100684261974585, + "grad_norm": 1.0302271842956543, + "learning_rate": 4.951076320939335e-05, + "log_odds_chosen": 2.3574986457824707, + "log_odds_ratio": -0.2766231894493103, + "logits/chosen": 3.2681617736816406, + "logits/rejected": 2.802415609359741, + "logps/chosen": -1.6375116109848022, + "logps/rejected": -3.854844093322754, + "loss": 1.8801, + "nll_loss": 1.6658718585968018, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16375115513801575, + "rewards/margins": 0.22173325717449188, + "rewards/rejected": -0.38548439741134644, + "step": 20 + }, + { + "epoch": 0.04105571847507331, + "grad_norm": 0.9916279315948486, + "learning_rate": 4.9478147423352907e-05, + "log_odds_chosen": 2.248506546020508, + "log_odds_ratio": -0.2879258394241333, + "logits/chosen": 3.302178144454956, + "logits/rejected": 2.478320598602295, + "logps/chosen": -1.7906379699707031, + "logps/rejected": -3.928553342819214, + "loss": 1.8432, + "nll_loss": 1.9126579761505127, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1790637969970703, + "rewards/margins": 0.21379154920578003, + "rewards/rejected": -0.3928554058074951, + "step": 21 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 0.8581139445304871, + "learning_rate": 4.944553163731246e-05, + "log_odds_chosen": 4.131821632385254, + "log_odds_ratio": -0.17351174354553223, + "logits/chosen": 2.910461187362671, + "logits/rejected": 2.3424339294433594, + "logps/chosen": -1.6405582427978516, + "logps/rejected": -5.594732284545898, + "loss": 1.8164, + "nll_loss": 1.810897707939148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16405582427978516, + "rewards/margins": 0.3954174518585205, + "rewards/rejected": -0.5594732761383057, + "step": 22 + }, + { + "epoch": 0.044965786901270774, + "grad_norm": 0.7575121521949768, + "learning_rate": 4.941291585127202e-05, + "log_odds_chosen": 1.7131013870239258, + "log_odds_ratio": -0.44207483530044556, + "logits/chosen": 3.097622871398926, + "logits/rejected": 2.6684460639953613, + "logps/chosen": -1.5862209796905518, + "logps/rejected": -3.2317705154418945, + "loss": 1.8122, + "nll_loss": 1.6682943105697632, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.15862208604812622, + "rewards/margins": 0.16455495357513428, + "rewards/rejected": -0.3231770694255829, + "step": 23 + }, + { + "epoch": 0.0469208211143695, + "grad_norm": 0.8802225589752197, + "learning_rate": 4.938030006523157e-05, + "log_odds_chosen": 5.059432029724121, + "log_odds_ratio": -0.13222631812095642, + "logits/chosen": 2.924544095993042, + "logits/rejected": 2.3114912509918213, + "logps/chosen": -1.5928711891174316, + "logps/rejected": -6.4368896484375, + "loss": 1.7634, + "nll_loss": 1.5913580656051636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15928712487220764, + "rewards/margins": 0.4844018220901489, + "rewards/rejected": -0.643688976764679, + "step": 24 + }, + { + "epoch": 0.04887585532746823, + "grad_norm": 0.878475546836853, + "learning_rate": 4.934768427919113e-05, + "log_odds_chosen": 4.486309051513672, + "log_odds_ratio": -0.26535820960998535, + "logits/chosen": 2.714141368865967, + "logits/rejected": 2.3120827674865723, + "logps/chosen": -1.4966808557510376, + "logps/rejected": -5.815276145935059, + "loss": 1.7472, + "nll_loss": 1.6685757637023926, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14966809749603271, + "rewards/margins": 0.4318596124649048, + "rewards/rejected": -0.5815277099609375, + "step": 25 + }, + { + "epoch": 0.05083088954056696, + "grad_norm": 0.7596239447593689, + "learning_rate": 4.9315068493150684e-05, + "log_odds_chosen": 3.3921988010406494, + "log_odds_ratio": -0.26721328496932983, + "logits/chosen": 2.6133570671081543, + "logits/rejected": 2.067133903503418, + "logps/chosen": -1.4509804248809814, + "logps/rejected": -4.670901298522949, + "loss": 1.7303, + "nll_loss": 1.524125099182129, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14509804546833038, + "rewards/margins": 0.32199206948280334, + "rewards/rejected": -0.4670901298522949, + "step": 26 + }, + { + "epoch": 0.05278592375366569, + "grad_norm": 0.9542296528816223, + "learning_rate": 4.928245270711024e-05, + "log_odds_chosen": 1.1886552572250366, + "log_odds_ratio": -0.5350046157836914, + "logits/chosen": 2.7397265434265137, + "logits/rejected": 2.6278796195983887, + "logps/chosen": -1.535649061203003, + "logps/rejected": -2.6682233810424805, + "loss": 1.7308, + "nll_loss": 1.6171307563781738, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.153564915060997, + "rewards/margins": 0.11325742304325104, + "rewards/rejected": -0.26682233810424805, + "step": 27 + }, + { + "epoch": 0.05474095796676442, + "grad_norm": 1.0910342931747437, + "learning_rate": 4.9249836921069796e-05, + "log_odds_chosen": 2.6057987213134766, + "log_odds_ratio": -0.5113380551338196, + "logits/chosen": 2.376919984817505, + "logits/rejected": 1.8373279571533203, + "logps/chosen": -1.6364576816558838, + "logps/rejected": -4.149959087371826, + "loss": 1.7184, + "nll_loss": 1.7846497297286987, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.16364578902721405, + "rewards/margins": 0.2513501048088074, + "rewards/rejected": -0.4149959087371826, + "step": 28 + }, + { + "epoch": 0.056695992179863146, + "grad_norm": 0.6902698278427124, + "learning_rate": 4.9217221135029355e-05, + "log_odds_chosen": 1.4629368782043457, + "log_odds_ratio": -0.4645235240459442, + "logits/chosen": 2.602464437484741, + "logits/rejected": 2.449523687362671, + "logps/chosen": -1.5652334690093994, + "logps/rejected": -2.9268691539764404, + "loss": 1.697, + "nll_loss": 1.744439959526062, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.15652336180210114, + "rewards/margins": 0.13616356253623962, + "rewards/rejected": -0.29268693923950195, + "step": 29 + }, + { + "epoch": 0.05865102639296188, + "grad_norm": 0.8389008045196533, + "learning_rate": 4.918460534898891e-05, + "log_odds_chosen": 2.6029791831970215, + "log_odds_ratio": -0.2978704869747162, + "logits/chosen": 2.1768362522125244, + "logits/rejected": 1.9997977018356323, + "logps/chosen": -1.4946825504302979, + "logps/rejected": -3.9405903816223145, + "loss": 1.7, + "nll_loss": 1.518242597579956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14946825802326202, + "rewards/margins": 0.24459081888198853, + "rewards/rejected": -0.39405906200408936, + "step": 30 + }, + { + "epoch": 0.06060606060606061, + "grad_norm": 0.8585636019706726, + "learning_rate": 4.915198956294847e-05, + "log_odds_chosen": 2.868819236755371, + "log_odds_ratio": -0.16824978590011597, + "logits/chosen": 2.5519042015075684, + "logits/rejected": 1.8910496234893799, + "logps/chosen": -1.5883302688598633, + "logps/rejected": -4.259197235107422, + "loss": 1.6756, + "nll_loss": 1.7135711908340454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15883302688598633, + "rewards/margins": 0.2670866847038269, + "rewards/rejected": -0.42591971158981323, + "step": 31 + }, + { + "epoch": 0.06256109481915934, + "grad_norm": 0.6597340703010559, + "learning_rate": 4.911937377690802e-05, + "log_odds_chosen": 3.4033660888671875, + "log_odds_ratio": -0.16694095730781555, + "logits/chosen": 2.3673579692840576, + "logits/rejected": 1.9860485792160034, + "logps/chosen": -1.439906120300293, + "logps/rejected": -4.620086669921875, + "loss": 1.6575, + "nll_loss": 1.6416456699371338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14399060606956482, + "rewards/margins": 0.3180180788040161, + "rewards/rejected": -0.46200865507125854, + "step": 32 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 0.8160446882247925, + "learning_rate": 4.908675799086758e-05, + "log_odds_chosen": 5.740808486938477, + "log_odds_ratio": -0.1134074330329895, + "logits/chosen": 2.3188819885253906, + "logits/rejected": 1.3463646173477173, + "logps/chosen": -1.3991882801055908, + "logps/rejected": -6.868556022644043, + "loss": 1.6676, + "nll_loss": 1.7056375741958618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13991881906986237, + "rewards/margins": 0.5469368696212769, + "rewards/rejected": -0.686855673789978, + "step": 33 + }, + { + "epoch": 0.06647116324535679, + "grad_norm": 0.8093536496162415, + "learning_rate": 4.905414220482714e-05, + "log_odds_chosen": 4.943930625915527, + "log_odds_ratio": -0.1472458690404892, + "logits/chosen": 2.3418426513671875, + "logits/rejected": 1.6748509407043457, + "logps/chosen": -1.6414716243743896, + "logps/rejected": -6.404474258422852, + "loss": 1.6675, + "nll_loss": 1.7226728200912476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16414716839790344, + "rewards/margins": 0.4763002395629883, + "rewards/rejected": -0.6404473781585693, + "step": 34 + }, + { + "epoch": 0.06842619745845552, + "grad_norm": 0.7675250172615051, + "learning_rate": 4.90215264187867e-05, + "log_odds_chosen": 5.24585485458374, + "log_odds_ratio": -0.18474672734737396, + "logits/chosen": 2.5179383754730225, + "logits/rejected": 1.5410356521606445, + "logps/chosen": -1.6690616607666016, + "logps/rejected": -6.711256980895996, + "loss": 1.657, + "nll_loss": 1.6759426593780518, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.16690616309642792, + "rewards/margins": 0.5042194724082947, + "rewards/rejected": -0.6711256504058838, + "step": 35 + }, + { + "epoch": 0.07038123167155426, + "grad_norm": 0.9768145084381104, + "learning_rate": 4.898891063274625e-05, + "log_odds_chosen": 3.0060033798217773, + "log_odds_ratio": -0.3217284679412842, + "logits/chosen": 2.511127471923828, + "logits/rejected": 1.7896931171417236, + "logps/chosen": -1.4878113269805908, + "logps/rejected": -4.343317031860352, + "loss": 1.6488, + "nll_loss": 1.5318455696105957, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14878112077713013, + "rewards/margins": 0.2855505645275116, + "rewards/rejected": -0.4343317449092865, + "step": 36 + }, + { + "epoch": 0.07233626588465299, + "grad_norm": 0.839377224445343, + "learning_rate": 4.895629484670581e-05, + "log_odds_chosen": 5.9764323234558105, + "log_odds_ratio": -0.18684130907058716, + "logits/chosen": 2.335430383682251, + "logits/rejected": 1.2173219919204712, + "logps/chosen": -1.4760345220565796, + "logps/rejected": -7.246689796447754, + "loss": 1.6307, + "nll_loss": 1.509698510169983, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14760345220565796, + "rewards/margins": 0.5770654678344727, + "rewards/rejected": -0.7246689200401306, + "step": 37 + }, + { + "epoch": 0.07429130009775171, + "grad_norm": 0.832291841506958, + "learning_rate": 4.892367906066536e-05, + "log_odds_chosen": 4.389082908630371, + "log_odds_ratio": -0.1778152883052826, + "logits/chosen": 2.2295243740081787, + "logits/rejected": 1.503605842590332, + "logps/chosen": -1.461230993270874, + "logps/rejected": -5.647950172424316, + "loss": 1.6422, + "nll_loss": 1.5762319564819336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14612311124801636, + "rewards/margins": 0.4186719059944153, + "rewards/rejected": -0.5647950172424316, + "step": 38 + }, + { + "epoch": 0.07624633431085044, + "grad_norm": 0.9901284575462341, + "learning_rate": 4.889106327462492e-05, + "log_odds_chosen": 8.850934982299805, + "log_odds_ratio": -0.05441835895180702, + "logits/chosen": 2.035140037536621, + "logits/rejected": 0.7119467258453369, + "logps/chosen": -1.4808456897735596, + "logps/rejected": -10.07605266571045, + "loss": 1.6096, + "nll_loss": 1.5579369068145752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14808455109596252, + "rewards/margins": 0.859520673751831, + "rewards/rejected": -1.0076053142547607, + "step": 39 + }, + { + "epoch": 0.07820136852394917, + "grad_norm": 0.9069317579269409, + "learning_rate": 4.8858447488584476e-05, + "log_odds_chosen": 3.5290029048919678, + "log_odds_ratio": -0.27841129899024963, + "logits/chosen": 2.0968003273010254, + "logits/rejected": 1.6632616519927979, + "logps/chosen": -1.5251446962356567, + "logps/rejected": -4.89771842956543, + "loss": 1.5964, + "nll_loss": 1.6536409854888916, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1525144875049591, + "rewards/margins": 0.3372573256492615, + "rewards/rejected": -0.48977184295654297, + "step": 40 + }, + { + "epoch": 0.0801564027370479, + "grad_norm": 0.789936900138855, + "learning_rate": 4.8825831702544035e-05, + "log_odds_chosen": 5.215309143066406, + "log_odds_ratio": -0.13427776098251343, + "logits/chosen": 2.241924285888672, + "logits/rejected": 1.3134957551956177, + "logps/chosen": -1.4403858184814453, + "logps/rejected": -6.407920837402344, + "loss": 1.5997, + "nll_loss": 1.5195701122283936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.144038587808609, + "rewards/margins": 0.4967535138130188, + "rewards/rejected": -0.6407920718193054, + "step": 41 + }, + { + "epoch": 0.08211143695014662, + "grad_norm": 0.7240133881568909, + "learning_rate": 4.879321591650359e-05, + "log_odds_chosen": 2.000676393508911, + "log_odds_ratio": -0.40548175573349, + "logits/chosen": 2.0519556999206543, + "logits/rejected": 1.7917907238006592, + "logps/chosen": -1.5050177574157715, + "logps/rejected": -3.3697314262390137, + "loss": 1.5977, + "nll_loss": 1.545334815979004, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1505017876625061, + "rewards/margins": 0.1864713579416275, + "rewards/rejected": -0.3369731307029724, + "step": 42 + }, + { + "epoch": 0.08406647116324535, + "grad_norm": 0.7609788775444031, + "learning_rate": 4.876060013046315e-05, + "log_odds_chosen": 5.020174980163574, + "log_odds_ratio": -0.26298946142196655, + "logits/chosen": 2.0027074813842773, + "logits/rejected": 1.1811378002166748, + "logps/chosen": -1.464072823524475, + "logps/rejected": -6.310466766357422, + "loss": 1.5995, + "nll_loss": 1.698756456375122, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14640729129314423, + "rewards/margins": 0.4846394658088684, + "rewards/rejected": -0.631046712398529, + "step": 43 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7828589677810669, + "learning_rate": 4.87279843444227e-05, + "log_odds_chosen": 5.2603607177734375, + "log_odds_ratio": -0.294029176235199, + "logits/chosen": 2.3689918518066406, + "logits/rejected": 1.3344029188156128, + "logps/chosen": -1.463855266571045, + "logps/rejected": -6.567535400390625, + "loss": 1.5693, + "nll_loss": 1.5522754192352295, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1463855355978012, + "rewards/margins": 0.5103679895401001, + "rewards/rejected": -0.6567535400390625, + "step": 44 + }, + { + "epoch": 0.08797653958944282, + "grad_norm": 0.7198808789253235, + "learning_rate": 4.869536855838226e-05, + "log_odds_chosen": 3.7486956119537354, + "log_odds_ratio": -0.3370180130004883, + "logits/chosen": 2.406686305999756, + "logits/rejected": 1.4770073890686035, + "logps/chosen": -1.5168871879577637, + "logps/rejected": -5.131182670593262, + "loss": 1.5605, + "nll_loss": 1.5872851610183716, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.15168870985507965, + "rewards/margins": 0.36142951250076294, + "rewards/rejected": -0.5131182074546814, + "step": 45 + }, + { + "epoch": 0.08993157380254155, + "grad_norm": 0.7528467178344727, + "learning_rate": 4.866275277234181e-05, + "log_odds_chosen": 2.2795753479003906, + "log_odds_ratio": -0.35739123821258545, + "logits/chosen": 2.281710624694824, + "logits/rejected": 1.6200214624404907, + "logps/chosen": -1.5357706546783447, + "logps/rejected": -3.6957197189331055, + "loss": 1.5711, + "nll_loss": 1.589789628982544, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1535770744085312, + "rewards/margins": 0.21599489450454712, + "rewards/rejected": -0.3695719838142395, + "step": 46 + }, + { + "epoch": 0.09188660801564028, + "grad_norm": 0.7308492064476013, + "learning_rate": 4.863013698630137e-05, + "log_odds_chosen": 2.4094467163085938, + "log_odds_ratio": -0.4753958582878113, + "logits/chosen": 2.350855827331543, + "logits/rejected": 1.7652349472045898, + "logps/chosen": -1.462003469467163, + "logps/rejected": -3.7666940689086914, + "loss": 1.5665, + "nll_loss": 1.4719218015670776, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.14620035886764526, + "rewards/margins": 0.23046903312206268, + "rewards/rejected": -0.37666940689086914, + "step": 47 + }, + { + "epoch": 0.093841642228739, + "grad_norm": 0.6873332858085632, + "learning_rate": 4.8597521200260924e-05, + "log_odds_chosen": 2.5447299480438232, + "log_odds_ratio": -0.30011117458343506, + "logits/chosen": 2.320936679840088, + "logits/rejected": 1.7293076515197754, + "logps/chosen": -1.4400086402893066, + "logps/rejected": -3.8254730701446533, + "loss": 1.554, + "nll_loss": 1.5110268592834473, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1440008580684662, + "rewards/margins": 0.2385464459657669, + "rewards/rejected": -0.3825473189353943, + "step": 48 + }, + { + "epoch": 0.09579667644183773, + "grad_norm": 0.6862441897392273, + "learning_rate": 4.8564905414220484e-05, + "log_odds_chosen": 2.7722930908203125, + "log_odds_ratio": -0.267081618309021, + "logits/chosen": 2.37742018699646, + "logits/rejected": 1.7560045719146729, + "logps/chosen": -1.4152823686599731, + "logps/rejected": -4.02066707611084, + "loss": 1.5595, + "nll_loss": 1.592606782913208, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14152824878692627, + "rewards/margins": 0.2605384588241577, + "rewards/rejected": -0.402066707611084, + "step": 49 + }, + { + "epoch": 0.09775171065493646, + "grad_norm": 0.6836202144622803, + "learning_rate": 4.8532289628180036e-05, + "log_odds_chosen": 2.9254186153411865, + "log_odds_ratio": -0.36321234703063965, + "logits/chosen": 1.8667048215866089, + "logits/rejected": 1.5011208057403564, + "logps/chosen": -1.4486148357391357, + "logps/rejected": -4.253735542297363, + "loss": 1.5483, + "nll_loss": 1.5208964347839355, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14486147463321686, + "rewards/margins": 0.28051209449768066, + "rewards/rejected": -0.42537355422973633, + "step": 50 + }, + { + "epoch": 0.09970674486803519, + "grad_norm": 0.7661028504371643, + "learning_rate": 4.8499673842139596e-05, + "log_odds_chosen": 3.9525113105773926, + "log_odds_ratio": -0.37830275297164917, + "logits/chosen": 2.3178677558898926, + "logits/rejected": 1.3411366939544678, + "logps/chosen": -1.488782286643982, + "logps/rejected": -5.319664478302002, + "loss": 1.547, + "nll_loss": 1.4902622699737549, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14887823164463043, + "rewards/margins": 0.38308823108673096, + "rewards/rejected": -0.5319664478302002, + "step": 51 + }, + { + "epoch": 0.10166177908113393, + "grad_norm": 0.7380790114402771, + "learning_rate": 4.846705805609915e-05, + "log_odds_chosen": 2.694859504699707, + "log_odds_ratio": -0.36442720890045166, + "logits/chosen": 2.319237232208252, + "logits/rejected": 1.7108639478683472, + "logps/chosen": -1.3975046873092651, + "logps/rejected": -3.9307045936584473, + "loss": 1.5597, + "nll_loss": 1.5374995470046997, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.13975048065185547, + "rewards/margins": 0.25331994891166687, + "rewards/rejected": -0.39307039976119995, + "step": 52 + }, + { + "epoch": 0.10361681329423265, + "grad_norm": 0.8142716884613037, + "learning_rate": 4.843444227005871e-05, + "log_odds_chosen": 4.384263515472412, + "log_odds_ratio": -0.3738389015197754, + "logits/chosen": 1.979762077331543, + "logits/rejected": 1.4949979782104492, + "logps/chosen": -1.5693120956420898, + "logps/rejected": -5.811679840087891, + "loss": 1.5439, + "nll_loss": 1.6757783889770508, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.15693120658397675, + "rewards/margins": 0.4242367744445801, + "rewards/rejected": -0.581167995929718, + "step": 53 + }, + { + "epoch": 0.10557184750733138, + "grad_norm": 0.840408205986023, + "learning_rate": 4.840182648401827e-05, + "log_odds_chosen": 3.9247570037841797, + "log_odds_ratio": -0.2544275224208832, + "logits/chosen": 2.275968074798584, + "logits/rejected": 1.2878494262695312, + "logps/chosen": -1.4923079013824463, + "logps/rejected": -5.243251323699951, + "loss": 1.5223, + "nll_loss": 1.5857570171356201, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14923080801963806, + "rewards/margins": 0.37509429454803467, + "rewards/rejected": -0.5243251323699951, + "step": 54 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.017526626586914, + "learning_rate": 4.836921069797783e-05, + "log_odds_chosen": 5.521985054016113, + "log_odds_ratio": -0.11633212864398956, + "logits/chosen": 1.9883339405059814, + "logits/rejected": 1.3329412937164307, + "logps/chosen": -1.593027114868164, + "logps/rejected": -6.894577980041504, + "loss": 1.5371, + "nll_loss": 1.5787748098373413, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1593027263879776, + "rewards/margins": 0.5301550626754761, + "rewards/rejected": -0.6894578337669373, + "step": 55 + }, + { + "epoch": 0.10948191593352884, + "grad_norm": 1.26445472240448, + "learning_rate": 4.833659491193738e-05, + "log_odds_chosen": 4.304198265075684, + "log_odds_ratio": -0.18000125885009766, + "logits/chosen": 2.160170793533325, + "logits/rejected": 1.3573284149169922, + "logps/chosen": -1.3694514036178589, + "logps/rejected": -5.438887596130371, + "loss": 1.5269, + "nll_loss": 1.5634503364562988, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13694512844085693, + "rewards/margins": 0.4069436490535736, + "rewards/rejected": -0.5438887476921082, + "step": 56 + }, + { + "epoch": 0.11143695014662756, + "grad_norm": 0.8018607497215271, + "learning_rate": 4.830397912589694e-05, + "log_odds_chosen": 5.030666828155518, + "log_odds_ratio": -0.09847380220890045, + "logits/chosen": 2.0073935985565186, + "logits/rejected": 1.3732943534851074, + "logps/chosen": -1.175767183303833, + "logps/rejected": -5.77321720123291, + "loss": 1.5382, + "nll_loss": 1.2334356307983398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1175767034292221, + "rewards/margins": 0.45974504947662354, + "rewards/rejected": -0.5773217678070068, + "step": 57 + }, + { + "epoch": 0.11339198435972629, + "grad_norm": 0.7250831723213196, + "learning_rate": 4.827136333985649e-05, + "log_odds_chosen": 3.4144814014434814, + "log_odds_ratio": -0.3091236352920532, + "logits/chosen": 1.9553923606872559, + "logits/rejected": 1.1691893339157104, + "logps/chosen": -1.4741464853286743, + "logps/rejected": -4.746518135070801, + "loss": 1.5293, + "nll_loss": 1.4538118839263916, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1474146544933319, + "rewards/margins": 0.3272371292114258, + "rewards/rejected": -0.4746517539024353, + "step": 58 + }, + { + "epoch": 0.11534701857282502, + "grad_norm": 0.7433968782424927, + "learning_rate": 4.823874755381605e-05, + "log_odds_chosen": 2.9475607872009277, + "log_odds_ratio": -0.2465718686580658, + "logits/chosen": 2.221162796020508, + "logits/rejected": 1.4861279726028442, + "logps/chosen": -1.4574463367462158, + "logps/rejected": -4.221869945526123, + "loss": 1.4995, + "nll_loss": 1.4145461320877075, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14574463665485382, + "rewards/margins": 0.27644237875938416, + "rewards/rejected": -0.4221870005130768, + "step": 59 + }, + { + "epoch": 0.11730205278592376, + "grad_norm": 0.9494369626045227, + "learning_rate": 4.8206131767775604e-05, + "log_odds_chosen": 3.010460615158081, + "log_odds_ratio": -0.2736998200416565, + "logits/chosen": 2.0606894493103027, + "logits/rejected": 1.330474615097046, + "logps/chosen": -1.4091761112213135, + "logps/rejected": -4.218234062194824, + "loss": 1.5341, + "nll_loss": 1.368818759918213, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1409175992012024, + "rewards/margins": 0.2809058129787445, + "rewards/rejected": -0.4218234419822693, + "step": 60 + }, + { + "epoch": 0.11925708699902249, + "grad_norm": 0.9923098087310791, + "learning_rate": 4.8173515981735164e-05, + "log_odds_chosen": 2.004603862762451, + "log_odds_ratio": -0.38542288541793823, + "logits/chosen": 1.9823591709136963, + "logits/rejected": 1.7739986181259155, + "logps/chosen": -1.1246858835220337, + "logps/rejected": -2.9013586044311523, + "loss": 1.5051, + "nll_loss": 1.2354991436004639, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11246858537197113, + "rewards/margins": 0.1776672601699829, + "rewards/rejected": -0.29013586044311523, + "step": 61 + }, + { + "epoch": 0.12121212121212122, + "grad_norm": 0.9536778330802917, + "learning_rate": 4.8140900195694716e-05, + "log_odds_chosen": 5.0203471183776855, + "log_odds_ratio": -0.2382582277059555, + "logits/chosen": 1.8761231899261475, + "logits/rejected": 1.4335169792175293, + "logps/chosen": -1.4001063108444214, + "logps/rejected": -6.215633869171143, + "loss": 1.4685, + "nll_loss": 1.4567911624908447, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14001062512397766, + "rewards/margins": 0.48155272006988525, + "rewards/rejected": -0.6215634346008301, + "step": 62 + }, + { + "epoch": 0.12316715542521994, + "grad_norm": 0.9013022780418396, + "learning_rate": 4.8108284409654276e-05, + "log_odds_chosen": 6.613487720489502, + "log_odds_ratio": -0.4061037003993988, + "logits/chosen": 2.2971887588500977, + "logits/rejected": 1.0140882730484009, + "logps/chosen": -1.4577893018722534, + "logps/rejected": -7.933663368225098, + "loss": 1.5288, + "nll_loss": 1.485929012298584, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.14577893912792206, + "rewards/margins": 0.6475874185562134, + "rewards/rejected": -0.7933663725852966, + "step": 63 + }, + { + "epoch": 0.12512218963831867, + "grad_norm": 0.7330491542816162, + "learning_rate": 4.807566862361383e-05, + "log_odds_chosen": 4.182305812835693, + "log_odds_ratio": -0.3244001269340515, + "logits/chosen": 2.1461777687072754, + "logits/rejected": 1.4816935062408447, + "logps/chosen": -1.4328134059906006, + "logps/rejected": -5.460397720336914, + "loss": 1.5004, + "nll_loss": 1.481743574142456, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14328134059906006, + "rewards/margins": 0.4027584195137024, + "rewards/rejected": -0.5460397601127625, + "step": 64 + }, + { + "epoch": 0.1270772238514174, + "grad_norm": 0.8243466019630432, + "learning_rate": 4.804305283757339e-05, + "log_odds_chosen": 12.073083877563477, + "log_odds_ratio": -0.22937898337841034, + "logits/chosen": 1.778444528579712, + "logits/rejected": 0.878467321395874, + "logps/chosen": -1.4269479513168335, + "logps/rejected": -13.31179428100586, + "loss": 1.5055, + "nll_loss": 1.584397792816162, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14269480109214783, + "rewards/margins": 1.1884846687316895, + "rewards/rejected": -1.3311794996261597, + "step": 65 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 0.8397243618965149, + "learning_rate": 4.801043705153294e-05, + "log_odds_chosen": 14.66856575012207, + "log_odds_ratio": -0.3019821345806122, + "logits/chosen": 2.0395774841308594, + "logits/rejected": 1.170179843902588, + "logps/chosen": -1.299126148223877, + "logps/rejected": -15.74128532409668, + "loss": 1.4974, + "nll_loss": 1.560849666595459, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1299126148223877, + "rewards/margins": 1.4442158937454224, + "rewards/rejected": -1.57412850856781, + "step": 66 + }, + { + "epoch": 0.13098729227761485, + "grad_norm": 0.7264294028282166, + "learning_rate": 4.79778212654925e-05, + "log_odds_chosen": 15.498464584350586, + "log_odds_ratio": -0.26210176944732666, + "logits/chosen": 1.9104585647583008, + "logits/rejected": 0.5509365797042847, + "logps/chosen": -1.4173688888549805, + "logps/rejected": -16.754253387451172, + "loss": 1.5092, + "nll_loss": 1.4415007829666138, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14173689484596252, + "rewards/margins": 1.5336886644363403, + "rewards/rejected": -1.675425410270691, + "step": 67 + }, + { + "epoch": 0.13294232649071358, + "grad_norm": 0.7218042016029358, + "learning_rate": 4.794520547945205e-05, + "log_odds_chosen": 10.435873031616211, + "log_odds_ratio": -0.2493719458580017, + "logits/chosen": 1.9492502212524414, + "logits/rejected": 1.4149287939071655, + "logps/chosen": -1.3215556144714355, + "logps/rejected": -11.526473999023438, + "loss": 1.4961, + "nll_loss": 1.3997879028320312, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13215556740760803, + "rewards/margins": 1.0204918384552002, + "rewards/rejected": -1.1526474952697754, + "step": 68 + }, + { + "epoch": 0.1348973607038123, + "grad_norm": 0.6872608065605164, + "learning_rate": 4.791258969341161e-05, + "log_odds_chosen": 12.460031509399414, + "log_odds_ratio": -0.22742164134979248, + "logits/chosen": 2.083967685699463, + "logits/rejected": 0.9661794900894165, + "logps/chosen": -1.2399303913116455, + "logps/rejected": -13.450027465820312, + "loss": 1.505, + "nll_loss": 1.385688066482544, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12399302423000336, + "rewards/margins": 1.2210097312927246, + "rewards/rejected": -1.3450026512145996, + "step": 69 + }, + { + "epoch": 0.13685239491691104, + "grad_norm": 0.7720362544059753, + "learning_rate": 4.7879973907371165e-05, + "log_odds_chosen": 11.863508224487305, + "log_odds_ratio": -0.21888263523578644, + "logits/chosen": 1.5903079509735107, + "logits/rejected": 0.674506664276123, + "logps/chosen": -1.098848581314087, + "logps/rejected": -12.617745399475098, + "loss": 1.4719, + "nll_loss": 1.2162249088287354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10988485813140869, + "rewards/margins": 1.151889681816101, + "rewards/rejected": -1.2617745399475098, + "step": 70 + }, + { + "epoch": 0.13880742913000976, + "grad_norm": 0.7485812306404114, + "learning_rate": 4.7847358121330724e-05, + "log_odds_chosen": 6.161189079284668, + "log_odds_ratio": -0.3760022521018982, + "logits/chosen": 1.8400450944900513, + "logits/rejected": 1.5099108219146729, + "logps/chosen": -1.4739573001861572, + "logps/rejected": -7.510321617126465, + "loss": 1.4761, + "nll_loss": 1.498463749885559, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14739573001861572, + "rewards/margins": 0.6036365032196045, + "rewards/rejected": -0.751032292842865, + "step": 71 + }, + { + "epoch": 0.14076246334310852, + "grad_norm": 0.6332464814186096, + "learning_rate": 4.781474233529028e-05, + "log_odds_chosen": 10.497509002685547, + "log_odds_ratio": -0.39624279737472534, + "logits/chosen": 1.928511381149292, + "logits/rejected": 0.9187361598014832, + "logps/chosen": -1.4734458923339844, + "logps/rejected": -11.853676795959473, + "loss": 1.4646, + "nll_loss": 1.5352208614349365, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14734458923339844, + "rewards/margins": 1.0380231142044067, + "rewards/rejected": -1.1853678226470947, + "step": 72 + }, + { + "epoch": 0.14271749755620725, + "grad_norm": 0.6771290898323059, + "learning_rate": 4.7782126549249837e-05, + "log_odds_chosen": 14.738556861877441, + "log_odds_ratio": -0.162057563662529, + "logits/chosen": 1.9255168437957764, + "logits/rejected": 0.48746389150619507, + "logps/chosen": -1.2297669649124146, + "logps/rejected": -15.679567337036133, + "loss": 1.4685, + "nll_loss": 1.3367674350738525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12297669053077698, + "rewards/margins": 1.4449801445007324, + "rewards/rejected": -1.5679569244384766, + "step": 73 + }, + { + "epoch": 0.14467253176930597, + "grad_norm": 0.6709555387496948, + "learning_rate": 4.7749510763209396e-05, + "log_odds_chosen": 11.17204475402832, + "log_odds_ratio": -0.2278939187526703, + "logits/chosen": 1.8491456508636475, + "logits/rejected": 0.9629364013671875, + "logps/chosen": -1.5831336975097656, + "logps/rejected": -12.59073257446289, + "loss": 1.4563, + "nll_loss": 1.6740729808807373, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.15831336379051208, + "rewards/margins": 1.1007598638534546, + "rewards/rejected": -1.259073257446289, + "step": 74 + }, + { + "epoch": 0.1466275659824047, + "grad_norm": 0.7191247344017029, + "learning_rate": 4.7716894977168955e-05, + "log_odds_chosen": 11.021308898925781, + "log_odds_ratio": -0.2472204566001892, + "logits/chosen": 2.0912296772003174, + "logits/rejected": 0.8645464181900024, + "logps/chosen": -1.3500739336013794, + "logps/rejected": -12.163490295410156, + "loss": 1.4899, + "nll_loss": 1.4305788278579712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13500738143920898, + "rewards/margins": 1.0813416242599487, + "rewards/rejected": -1.2163490056991577, + "step": 75 + }, + { + "epoch": 0.14858260019550343, + "grad_norm": 0.6664873361587524, + "learning_rate": 4.768427919112851e-05, + "log_odds_chosen": 8.067474365234375, + "log_odds_ratio": -0.2534986436367035, + "logits/chosen": 1.8878326416015625, + "logits/rejected": 0.9413186311721802, + "logps/chosen": -1.3017253875732422, + "logps/rejected": -9.16075325012207, + "loss": 1.4538, + "nll_loss": 1.5370495319366455, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.13017253577709198, + "rewards/margins": 0.7859028577804565, + "rewards/rejected": -0.9160754680633545, + "step": 76 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 0.7199352979660034, + "learning_rate": 4.765166340508807e-05, + "log_odds_chosen": 2.442326068878174, + "log_odds_ratio": -0.25186392664909363, + "logits/chosen": 1.6102423667907715, + "logits/rejected": 1.2504682540893555, + "logps/chosen": -1.2253031730651855, + "logps/rejected": -3.400601387023926, + "loss": 1.438, + "nll_loss": 1.3434683084487915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12253031879663467, + "rewards/margins": 0.21752983331680298, + "rewards/rejected": -0.34006017446517944, + "step": 77 + }, + { + "epoch": 0.15249266862170088, + "grad_norm": 0.6622952222824097, + "learning_rate": 4.761904761904762e-05, + "log_odds_chosen": 2.464942216873169, + "log_odds_ratio": -0.29986244440078735, + "logits/chosen": 1.667536973953247, + "logits/rejected": 1.2541221380233765, + "logps/chosen": -1.1019446849822998, + "logps/rejected": -3.323368787765503, + "loss": 1.4594, + "nll_loss": 1.1758722066879272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11019445955753326, + "rewards/margins": 0.22214239835739136, + "rewards/rejected": -0.3323369026184082, + "step": 78 + }, + { + "epoch": 0.1544477028347996, + "grad_norm": 0.7748003602027893, + "learning_rate": 4.758643183300718e-05, + "log_odds_chosen": 4.682073593139648, + "log_odds_ratio": -0.26584818959236145, + "logits/chosen": 2.061000347137451, + "logits/rejected": 0.8829006552696228, + "logps/chosen": -1.444542407989502, + "logps/rejected": -5.954538345336914, + "loss": 1.479, + "nll_loss": 1.5050190687179565, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1444542407989502, + "rewards/margins": 0.45099955797195435, + "rewards/rejected": -0.5954537987709045, + "step": 79 + }, + { + "epoch": 0.15640273704789834, + "grad_norm": 0.6847163438796997, + "learning_rate": 4.755381604696673e-05, + "log_odds_chosen": 4.763134002685547, + "log_odds_ratio": -0.41811633110046387, + "logits/chosen": 1.8771353960037231, + "logits/rejected": 1.3843082189559937, + "logps/chosen": -1.3026714324951172, + "logps/rejected": -5.944716930389404, + "loss": 1.4683, + "nll_loss": 1.3627656698226929, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.13026714324951172, + "rewards/margins": 0.4642045497894287, + "rewards/rejected": -0.5944716930389404, + "step": 80 + }, + { + "epoch": 0.15835777126099707, + "grad_norm": 0.6807910799980164, + "learning_rate": 4.752120026092629e-05, + "log_odds_chosen": 5.0939555168151855, + "log_odds_ratio": -0.2415585219860077, + "logits/chosen": 1.4218227863311768, + "logits/rejected": 0.5273487567901611, + "logps/chosen": -1.403899073600769, + "logps/rejected": -6.291587829589844, + "loss": 1.4597, + "nll_loss": 1.3402941226959229, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14038991928100586, + "rewards/margins": 0.48876893520355225, + "rewards/rejected": -0.6291588544845581, + "step": 81 + }, + { + "epoch": 0.1603128054740958, + "grad_norm": 0.7087857127189636, + "learning_rate": 4.7488584474885845e-05, + "log_odds_chosen": 4.8105292320251465, + "log_odds_ratio": -0.2760387361049652, + "logits/chosen": 1.7885057926177979, + "logits/rejected": 1.0022543668746948, + "logps/chosen": -1.5799968242645264, + "logps/rejected": -6.254765033721924, + "loss": 1.4413, + "nll_loss": 1.646227478981018, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1579996943473816, + "rewards/margins": 0.4674767851829529, + "rewards/rejected": -0.6254765391349792, + "step": 82 + }, + { + "epoch": 0.16226783968719452, + "grad_norm": 0.7642877697944641, + "learning_rate": 4.7455968688845404e-05, + "log_odds_chosen": 4.9856719970703125, + "log_odds_ratio": -0.249114990234375, + "logits/chosen": 1.659773588180542, + "logits/rejected": 1.0140830278396606, + "logps/chosen": -1.3218655586242676, + "logps/rejected": -6.091880798339844, + "loss": 1.4293, + "nll_loss": 1.407090187072754, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.13218656182289124, + "rewards/margins": 0.47700151801109314, + "rewards/rejected": -0.6091880798339844, + "step": 83 + }, + { + "epoch": 0.16422287390029325, + "grad_norm": 0.7001745700836182, + "learning_rate": 4.742335290280496e-05, + "log_odds_chosen": 5.576223373413086, + "log_odds_ratio": -0.35578426718711853, + "logits/chosen": 2.198936700820923, + "logits/rejected": 1.3473594188690186, + "logps/chosen": -1.1810741424560547, + "logps/rejected": -6.5163750648498535, + "loss": 1.4338, + "nll_loss": 1.356499195098877, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11810740828514099, + "rewards/margins": 0.5335301160812378, + "rewards/rejected": -0.6516374945640564, + "step": 84 + }, + { + "epoch": 0.16617790811339198, + "grad_norm": 0.7311692237854004, + "learning_rate": 4.7390737116764516e-05, + "log_odds_chosen": 3.681995153427124, + "log_odds_ratio": -0.3203997313976288, + "logits/chosen": 1.579182505607605, + "logits/rejected": 0.9454560279846191, + "logps/chosen": -1.3752734661102295, + "logps/rejected": -4.871384620666504, + "loss": 1.4385, + "nll_loss": 1.557340383529663, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.13752734661102295, + "rewards/margins": 0.3496111035346985, + "rewards/rejected": -0.4871384799480438, + "step": 85 + }, + { + "epoch": 0.1681329423264907, + "grad_norm": 0.6891183257102966, + "learning_rate": 4.735812133072407e-05, + "log_odds_chosen": 4.8709001541137695, + "log_odds_ratio": -0.211325004696846, + "logits/chosen": 1.7538448572158813, + "logits/rejected": 0.6851677894592285, + "logps/chosen": -1.2515896558761597, + "logps/rejected": -5.870602130889893, + "loss": 1.4461, + "nll_loss": 1.4442150592803955, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12515896558761597, + "rewards/margins": 0.4619012475013733, + "rewards/rejected": -0.5870602130889893, + "step": 86 + }, + { + "epoch": 0.17008797653958943, + "grad_norm": 0.7087100148200989, + "learning_rate": 4.732550554468363e-05, + "log_odds_chosen": 3.396533489227295, + "log_odds_ratio": -0.36137598752975464, + "logits/chosen": 1.3023903369903564, + "logits/rejected": 0.7235583066940308, + "logps/chosen": -1.3552250862121582, + "logps/rejected": -4.555652618408203, + "loss": 1.4444, + "nll_loss": 1.2757389545440674, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1355225145816803, + "rewards/margins": 0.32004275918006897, + "rewards/rejected": -0.45556527376174927, + "step": 87 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 0.702618420124054, + "learning_rate": 4.729288975864318e-05, + "log_odds_chosen": 4.88123893737793, + "log_odds_ratio": -0.2372969537973404, + "logits/chosen": 1.5274001359939575, + "logits/rejected": 0.8063890933990479, + "logps/chosen": -1.1897449493408203, + "logps/rejected": -5.82098913192749, + "loss": 1.4489, + "nll_loss": 1.2077107429504395, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11897449195384979, + "rewards/margins": 0.46312445402145386, + "rewards/rejected": -0.5820989608764648, + "step": 88 + }, + { + "epoch": 0.1739980449657869, + "grad_norm": 0.7499403953552246, + "learning_rate": 4.726027397260274e-05, + "log_odds_chosen": 6.817405700683594, + "log_odds_ratio": -0.2099139541387558, + "logits/chosen": 1.8558385372161865, + "logits/rejected": 0.8090221881866455, + "logps/chosen": -1.4715217351913452, + "logps/rejected": -7.964572906494141, + "loss": 1.4301, + "nll_loss": 1.7194527387619019, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14715218544006348, + "rewards/margins": 0.6493051648139954, + "rewards/rejected": -0.7964573502540588, + "step": 89 + }, + { + "epoch": 0.17595307917888564, + "grad_norm": 0.7166664004325867, + "learning_rate": 4.7227658186562293e-05, + "log_odds_chosen": 4.252359390258789, + "log_odds_ratio": -0.348575234413147, + "logits/chosen": 1.7106884717941284, + "logits/rejected": 1.4048469066619873, + "logps/chosen": -1.4604880809783936, + "logps/rejected": -5.5299482345581055, + "loss": 1.4437, + "nll_loss": 1.5575542449951172, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14604881405830383, + "rewards/margins": 0.406946063041687, + "rewards/rejected": -0.5529949069023132, + "step": 90 + }, + { + "epoch": 0.17790811339198437, + "grad_norm": 0.6951318383216858, + "learning_rate": 4.719504240052185e-05, + "log_odds_chosen": 10.769355773925781, + "log_odds_ratio": -0.12847542762756348, + "logits/chosen": 1.5253201723098755, + "logits/rejected": -0.2897353172302246, + "logps/chosen": -1.365741491317749, + "logps/rejected": -11.881928443908691, + "loss": 1.4145, + "nll_loss": 1.4364502429962158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1365741491317749, + "rewards/margins": 1.0516188144683838, + "rewards/rejected": -1.1881929636001587, + "step": 91 + }, + { + "epoch": 0.1798631476050831, + "grad_norm": 0.729890763759613, + "learning_rate": 4.7162426614481406e-05, + "log_odds_chosen": 6.1698737144470215, + "log_odds_ratio": -0.35637378692626953, + "logits/chosen": 1.422513484954834, + "logits/rejected": 0.68843674659729, + "logps/chosen": -1.449270486831665, + "logps/rejected": -7.45712423324585, + "loss": 1.4327, + "nll_loss": 1.5447942018508911, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1449270397424698, + "rewards/margins": 0.6007854342460632, + "rewards/rejected": -0.7457124590873718, + "step": 92 + }, + { + "epoch": 0.18181818181818182, + "grad_norm": 0.8014387488365173, + "learning_rate": 4.7129810828440965e-05, + "log_odds_chosen": 4.473212718963623, + "log_odds_ratio": -0.18472816050052643, + "logits/chosen": 1.249009609222412, + "logits/rejected": 0.9486333131790161, + "logps/chosen": -1.0901079177856445, + "logps/rejected": -5.213332176208496, + "loss": 1.4149, + "nll_loss": 1.3752704858779907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10901078581809998, + "rewards/margins": 0.41232237219810486, + "rewards/rejected": -0.5213332176208496, + "step": 93 + }, + { + "epoch": 0.18377321603128055, + "grad_norm": 0.6825114488601685, + "learning_rate": 4.7097195042400525e-05, + "log_odds_chosen": 4.463861465454102, + "log_odds_ratio": -0.2902586758136749, + "logits/chosen": 1.529774785041809, + "logits/rejected": 0.8114626407623291, + "logps/chosen": -1.4061813354492188, + "logps/rejected": -5.679861068725586, + "loss": 1.4404, + "nll_loss": 1.3988925218582153, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14061813056468964, + "rewards/margins": 0.4273679852485657, + "rewards/rejected": -0.5679861307144165, + "step": 94 + }, + { + "epoch": 0.18572825024437928, + "grad_norm": 0.7042595744132996, + "learning_rate": 4.7064579256360084e-05, + "log_odds_chosen": 2.9708564281463623, + "log_odds_ratio": -0.30499106645584106, + "logits/chosen": 1.5208230018615723, + "logits/rejected": 0.9097532033920288, + "logps/chosen": -1.4423526525497437, + "logps/rejected": -4.22061014175415, + "loss": 1.4105, + "nll_loss": 1.426192045211792, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1442352831363678, + "rewards/margins": 0.2778257429599762, + "rewards/rejected": -0.422061026096344, + "step": 95 + }, + { + "epoch": 0.187683284457478, + "grad_norm": 0.6857805848121643, + "learning_rate": 4.703196347031964e-05, + "log_odds_chosen": 2.9753165245056152, + "log_odds_ratio": -0.22793763875961304, + "logits/chosen": 1.5549793243408203, + "logits/rejected": 0.8418679237365723, + "logps/chosen": -1.1317572593688965, + "logps/rejected": -3.8076236248016357, + "loss": 1.4183, + "nll_loss": 1.291404366493225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11317572742700577, + "rewards/margins": 0.2675866186618805, + "rewards/rejected": -0.38076236844062805, + "step": 96 + }, + { + "epoch": 0.18963831867057673, + "grad_norm": 0.7101030349731445, + "learning_rate": 4.6999347684279196e-05, + "log_odds_chosen": 1.2280504703521729, + "log_odds_ratio": -0.5340121984481812, + "logits/chosen": 1.6823488473892212, + "logits/rejected": 1.2653664350509644, + "logps/chosen": -1.3156702518463135, + "logps/rejected": -2.4593636989593506, + "loss": 1.3908, + "nll_loss": 1.4018261432647705, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13156703114509583, + "rewards/margins": 0.11436935514211655, + "rewards/rejected": -0.24593636393547058, + "step": 97 + }, + { + "epoch": 0.19159335288367546, + "grad_norm": 0.7391109466552734, + "learning_rate": 4.696673189823875e-05, + "log_odds_chosen": 2.8894524574279785, + "log_odds_ratio": -0.32055455446243286, + "logits/chosen": 1.4241766929626465, + "logits/rejected": 1.2140928506851196, + "logps/chosen": -0.9421634078025818, + "logps/rejected": -3.482051372528076, + "loss": 1.405, + "nll_loss": 1.1341726779937744, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09421634674072266, + "rewards/margins": 0.2539888024330139, + "rewards/rejected": -0.3482051491737366, + "step": 98 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 0.7316187024116516, + "learning_rate": 4.693411611219831e-05, + "log_odds_chosen": 2.3490664958953857, + "log_odds_ratio": -0.4217137098312378, + "logits/chosen": 1.3027396202087402, + "logits/rejected": 0.8528461456298828, + "logps/chosen": -1.2963712215423584, + "logps/rejected": -3.482072353363037, + "loss": 1.4193, + "nll_loss": 1.423575758934021, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12963712215423584, + "rewards/margins": 0.21857008337974548, + "rewards/rejected": -0.3482072353363037, + "step": 99 + }, + { + "epoch": 0.19550342130987292, + "grad_norm": 0.7553929686546326, + "learning_rate": 4.690150032615786e-05, + "log_odds_chosen": 3.215608835220337, + "log_odds_ratio": -0.36612915992736816, + "logits/chosen": 1.358292579650879, + "logits/rejected": 0.5799618363380432, + "logps/chosen": -1.5400903224945068, + "logps/rejected": -4.627594947814941, + "loss": 1.3923, + "nll_loss": 1.4651737213134766, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.15400904417037964, + "rewards/margins": 0.3087504506111145, + "rewards/rejected": -0.46275943517684937, + "step": 100 + }, + { + "epoch": 0.19745845552297164, + "grad_norm": 0.7635395526885986, + "learning_rate": 4.686888454011742e-05, + "log_odds_chosen": 3.3309683799743652, + "log_odds_ratio": -0.35581111907958984, + "logits/chosen": 1.5950534343719482, + "logits/rejected": 0.8835946321487427, + "logps/chosen": -1.383009672164917, + "logps/rejected": -4.544572353363037, + "loss": 1.3957, + "nll_loss": 1.4722793102264404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13830098509788513, + "rewards/margins": 0.316156268119812, + "rewards/rejected": -0.45445728302001953, + "step": 101 + }, + { + "epoch": 0.19941348973607037, + "grad_norm": 0.6573813557624817, + "learning_rate": 4.683626875407697e-05, + "log_odds_chosen": 2.744372606277466, + "log_odds_ratio": -0.3343849778175354, + "logits/chosen": 1.2537287473678589, + "logits/rejected": 0.9394323825836182, + "logps/chosen": -1.3290894031524658, + "logps/rejected": -3.8715310096740723, + "loss": 1.3824, + "nll_loss": 1.3573670387268066, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13290894031524658, + "rewards/margins": 0.2542441487312317, + "rewards/rejected": -0.38715308904647827, + "step": 102 + }, + { + "epoch": 0.2013685239491691, + "grad_norm": 0.7030101418495178, + "learning_rate": 4.680365296803653e-05, + "log_odds_chosen": 9.536344528198242, + "log_odds_ratio": -0.13810645043849945, + "logits/chosen": 1.187511682510376, + "logits/rejected": 0.03287088871002197, + "logps/chosen": -1.1744999885559082, + "logps/rejected": -10.399774551391602, + "loss": 1.3975, + "nll_loss": 1.257333755493164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11744999885559082, + "rewards/margins": 0.9225274324417114, + "rewards/rejected": -1.0399774312973022, + "step": 103 + }, + { + "epoch": 0.20332355816226785, + "grad_norm": 0.6749060153961182, + "learning_rate": 4.6771037181996085e-05, + "log_odds_chosen": 4.229156970977783, + "log_odds_ratio": -0.3048085868358612, + "logits/chosen": 1.0694631338119507, + "logits/rejected": 0.4076783359050751, + "logps/chosen": -1.1094199419021606, + "logps/rejected": -4.969640731811523, + "loss": 1.4126, + "nll_loss": 1.219916582107544, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11094199120998383, + "rewards/margins": 0.3860221207141876, + "rewards/rejected": -0.49696412682533264, + "step": 104 + }, + { + "epoch": 0.20527859237536658, + "grad_norm": 0.7391946315765381, + "learning_rate": 4.6738421395955645e-05, + "log_odds_chosen": 6.526311874389648, + "log_odds_ratio": -0.16212622821331024, + "logits/chosen": 1.3286025524139404, + "logits/rejected": 0.05422012507915497, + "logps/chosen": -1.2869739532470703, + "logps/rejected": -7.513417720794678, + "loss": 1.3974, + "nll_loss": 1.428157925605774, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12869738042354584, + "rewards/margins": 0.6226444244384766, + "rewards/rejected": -0.7513418197631836, + "step": 105 + }, + { + "epoch": 0.2072336265884653, + "grad_norm": 0.703712522983551, + "learning_rate": 4.67058056099152e-05, + "log_odds_chosen": 3.306849479675293, + "log_odds_ratio": -0.24968525767326355, + "logits/chosen": 1.2632780075073242, + "logits/rejected": 0.618010401725769, + "logps/chosen": -1.3509830236434937, + "logps/rejected": -4.441869735717773, + "loss": 1.385, + "nll_loss": 1.5050835609436035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13509830832481384, + "rewards/margins": 0.30908864736557007, + "rewards/rejected": -0.4441869556903839, + "step": 106 + }, + { + "epoch": 0.20918866080156404, + "grad_norm": 0.6770440340042114, + "learning_rate": 4.667318982387476e-05, + "log_odds_chosen": 1.9592475891113281, + "log_odds_ratio": -0.5034722089767456, + "logits/chosen": 1.4675874710083008, + "logits/rejected": 1.1629838943481445, + "logps/chosen": -1.337562084197998, + "logps/rejected": -3.1920816898345947, + "loss": 1.3781, + "nll_loss": 1.403862476348877, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.13375622034072876, + "rewards/margins": 0.1854519546031952, + "rewards/rejected": -0.31920817494392395, + "step": 107 + }, + { + "epoch": 0.21114369501466276, + "grad_norm": 0.6591935157775879, + "learning_rate": 4.664057403783431e-05, + "log_odds_chosen": 3.188936710357666, + "log_odds_ratio": -0.28340762853622437, + "logits/chosen": 1.2385058403015137, + "logits/rejected": 0.6677268147468567, + "logps/chosen": -1.141720175743103, + "logps/rejected": -4.073037624359131, + "loss": 1.3881, + "nll_loss": 1.2023448944091797, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11417201906442642, + "rewards/margins": 0.2931317687034607, + "rewards/rejected": -0.40730375051498413, + "step": 108 + }, + { + "epoch": 0.2130987292277615, + "grad_norm": 0.681292712688446, + "learning_rate": 4.660795825179387e-05, + "log_odds_chosen": 3.646908760070801, + "log_odds_ratio": -0.1680804193019867, + "logits/chosen": 1.5224292278289795, + "logits/rejected": 0.5011821389198303, + "logps/chosen": -1.4252612590789795, + "logps/rejected": -4.860801696777344, + "loss": 1.385, + "nll_loss": 1.538453459739685, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14252613484859467, + "rewards/margins": 0.3435540497303009, + "rewards/rejected": -0.4860801696777344, + "step": 109 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 0.8594942688941956, + "learning_rate": 4.657534246575342e-05, + "log_odds_chosen": 3.062958002090454, + "log_odds_ratio": -0.2900886535644531, + "logits/chosen": 1.655545949935913, + "logits/rejected": 0.8270969986915588, + "logps/chosen": -1.2381882667541504, + "logps/rejected": -4.073758602142334, + "loss": 1.3813, + "nll_loss": 1.2890970706939697, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12381882965564728, + "rewards/margins": 0.2835569977760315, + "rewards/rejected": -0.4073758125305176, + "step": 110 + }, + { + "epoch": 0.21700879765395895, + "grad_norm": 0.8542609214782715, + "learning_rate": 4.654272667971298e-05, + "log_odds_chosen": 6.591673851013184, + "log_odds_ratio": -0.15612998604774475, + "logits/chosen": 1.3222336769104004, + "logits/rejected": 0.4622771739959717, + "logps/chosen": -1.281661033630371, + "logps/rejected": -7.552131652832031, + "loss": 1.3796, + "nll_loss": 1.535856008529663, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12816612422466278, + "rewards/margins": 0.6270470023155212, + "rewards/rejected": -0.7552131414413452, + "step": 111 + }, + { + "epoch": 0.21896383186705767, + "grad_norm": 0.7308524250984192, + "learning_rate": 4.6510110893672534e-05, + "log_odds_chosen": 5.792438507080078, + "log_odds_ratio": -0.1366799920797348, + "logits/chosen": 1.082383632659912, + "logits/rejected": 0.4269208312034607, + "logps/chosen": -1.0550119876861572, + "logps/rejected": -6.451422214508057, + "loss": 1.3671, + "nll_loss": 1.1802783012390137, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1055012047290802, + "rewards/margins": 0.5396410226821899, + "rewards/rejected": -0.6451422572135925, + "step": 112 + }, + { + "epoch": 0.2209188660801564, + "grad_norm": 0.6633080840110779, + "learning_rate": 4.6477495107632094e-05, + "log_odds_chosen": 3.4108800888061523, + "log_odds_ratio": -0.2886888086795807, + "logits/chosen": 1.3004437685012817, + "logits/rejected": 0.639487624168396, + "logps/chosen": -1.1085131168365479, + "logps/rejected": -4.270345687866211, + "loss": 1.3802, + "nll_loss": 1.2177863121032715, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11085130274295807, + "rewards/margins": 0.31618326902389526, + "rewards/rejected": -0.42703455686569214, + "step": 113 + }, + { + "epoch": 0.22287390029325513, + "grad_norm": 0.7017490267753601, + "learning_rate": 4.644487932159165e-05, + "log_odds_chosen": 2.923975944519043, + "log_odds_ratio": -0.30726999044418335, + "logits/chosen": 0.8171277642250061, + "logits/rejected": 0.6966362595558167, + "logps/chosen": -1.3131732940673828, + "logps/rejected": -4.064748764038086, + "loss": 1.3802, + "nll_loss": 1.433335781097412, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13131731748580933, + "rewards/margins": 0.2751576006412506, + "rewards/rejected": -0.40647488832473755, + "step": 114 + }, + { + "epoch": 0.22482893450635386, + "grad_norm": 0.7356051206588745, + "learning_rate": 4.641226353555121e-05, + "log_odds_chosen": 6.167659282684326, + "log_odds_ratio": -0.11911168694496155, + "logits/chosen": 1.3091119527816772, + "logits/rejected": -0.042340561747550964, + "logps/chosen": -1.198789358139038, + "logps/rejected": -7.046017646789551, + "loss": 1.3659, + "nll_loss": 1.2870309352874756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11987894773483276, + "rewards/margins": 0.5847228169441223, + "rewards/rejected": -0.7046017646789551, + "step": 115 + }, + { + "epoch": 0.22678396871945258, + "grad_norm": 0.8499444723129272, + "learning_rate": 4.6379647749510765e-05, + "log_odds_chosen": 8.591011047363281, + "log_odds_ratio": -0.19548369944095612, + "logits/chosen": 1.43937349319458, + "logits/rejected": -0.21641704440116882, + "logps/chosen": -1.253410816192627, + "logps/rejected": -9.59046745300293, + "loss": 1.3813, + "nll_loss": 1.3738594055175781, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12534108757972717, + "rewards/margins": 0.8337056636810303, + "rewards/rejected": -0.9590467214584351, + "step": 116 + }, + { + "epoch": 0.2287390029325513, + "grad_norm": 0.7445962429046631, + "learning_rate": 4.6347031963470325e-05, + "log_odds_chosen": 8.240190505981445, + "log_odds_ratio": -0.1408553272485733, + "logits/chosen": 1.1423077583312988, + "logits/rejected": 0.12755347788333893, + "logps/chosen": -1.1600462198257446, + "logps/rejected": -9.047393798828125, + "loss": 1.3813, + "nll_loss": 1.4043207168579102, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11600461602210999, + "rewards/margins": 0.7887347340583801, + "rewards/rejected": -0.9047393798828125, + "step": 117 + }, + { + "epoch": 0.23069403714565004, + "grad_norm": 0.6804081201553345, + "learning_rate": 4.631441617742988e-05, + "log_odds_chosen": 3.19157075881958, + "log_odds_ratio": -0.3856373131275177, + "logits/chosen": 1.1696404218673706, + "logits/rejected": 0.7217961549758911, + "logps/chosen": -1.5237408876419067, + "logps/rejected": -4.58286714553833, + "loss": 1.3767, + "nll_loss": 1.4274024963378906, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.15237408876419067, + "rewards/margins": 0.30591264367103577, + "rewards/rejected": -0.45828676223754883, + "step": 118 + }, + { + "epoch": 0.23264907135874877, + "grad_norm": 0.8376771211624146, + "learning_rate": 4.628180039138944e-05, + "log_odds_chosen": 5.947774410247803, + "log_odds_ratio": -0.19070857763290405, + "logits/chosen": 1.208172082901001, + "logits/rejected": 0.493600070476532, + "logps/chosen": -1.3619062900543213, + "logps/rejected": -7.071653842926025, + "loss": 1.3913, + "nll_loss": 1.4531872272491455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13619063794612885, + "rewards/margins": 0.5709747076034546, + "rewards/rejected": -0.7071653604507446, + "step": 119 + }, + { + "epoch": 0.23460410557184752, + "grad_norm": 0.941291868686676, + "learning_rate": 4.624918460534899e-05, + "log_odds_chosen": 3.3939456939697266, + "log_odds_ratio": -0.3366093635559082, + "logits/chosen": 1.5587563514709473, + "logits/rejected": 0.8702186942100525, + "logps/chosen": -1.240950584411621, + "logps/rejected": -4.4199137687683105, + "loss": 1.406, + "nll_loss": 1.4428961277008057, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12409505248069763, + "rewards/margins": 0.3178963363170624, + "rewards/rejected": -0.4419914186000824, + "step": 120 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 0.8417291641235352, + "learning_rate": 4.621656881930855e-05, + "log_odds_chosen": 7.617626190185547, + "log_odds_ratio": -0.30877989530563354, + "logits/chosen": 1.2773478031158447, + "logits/rejected": 0.12731142342090607, + "logps/chosen": -1.3664320707321167, + "logps/rejected": -8.73999309539795, + "loss": 1.3771, + "nll_loss": 1.5289790630340576, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1366432011127472, + "rewards/margins": 0.7373560667037964, + "rewards/rejected": -0.8739993572235107, + "step": 121 + }, + { + "epoch": 0.23851417399804498, + "grad_norm": 0.8367831707000732, + "learning_rate": 4.61839530332681e-05, + "log_odds_chosen": 6.716846942901611, + "log_odds_ratio": -0.23841995000839233, + "logits/chosen": 1.4331316947937012, + "logits/rejected": 0.07170508801937103, + "logps/chosen": -1.250941276550293, + "logps/rejected": -7.7253618240356445, + "loss": 1.3701, + "nll_loss": 1.3259763717651367, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12509413063526154, + "rewards/margins": 0.6474420428276062, + "rewards/rejected": -0.7725361585617065, + "step": 122 + }, + { + "epoch": 0.2404692082111437, + "grad_norm": 0.775296151638031, + "learning_rate": 4.615133724722766e-05, + "log_odds_chosen": 4.496191024780273, + "log_odds_ratio": -0.47254377603530884, + "logits/chosen": 1.6649961471557617, + "logits/rejected": 0.5631541013717651, + "logps/chosen": -1.3218774795532227, + "logps/rejected": -5.701331615447998, + "loss": 1.3856, + "nll_loss": 1.4578332901000977, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.13218775391578674, + "rewards/margins": 0.4379453659057617, + "rewards/rejected": -0.5701331496238708, + "step": 123 + }, + { + "epoch": 0.24242424242424243, + "grad_norm": 0.7473137974739075, + "learning_rate": 4.6118721461187214e-05, + "log_odds_chosen": 7.064208984375, + "log_odds_ratio": -0.08943454176187515, + "logits/chosen": 1.3410427570343018, + "logits/rejected": 0.2058267444372177, + "logps/chosen": -1.3738852739334106, + "logps/rejected": -8.164815902709961, + "loss": 1.3605, + "nll_loss": 1.4776184558868408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13738852739334106, + "rewards/margins": 0.6790930032730103, + "rewards/rejected": -0.8164815902709961, + "step": 124 + }, + { + "epoch": 0.24437927663734116, + "grad_norm": 1.0111122131347656, + "learning_rate": 4.608610567514677e-05, + "log_odds_chosen": 3.490861177444458, + "log_odds_ratio": -0.2653139531612396, + "logits/chosen": 1.509355902671814, + "logits/rejected": 0.682778537273407, + "logps/chosen": -1.3564157485961914, + "logps/rejected": -4.659384727478027, + "loss": 1.3754, + "nll_loss": 1.4030632972717285, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13564157485961914, + "rewards/margins": 0.3302968740463257, + "rewards/rejected": -0.4659384489059448, + "step": 125 + }, + { + "epoch": 0.24633431085043989, + "grad_norm": 0.9135308265686035, + "learning_rate": 4.6053489889106326e-05, + "log_odds_chosen": 6.929924011230469, + "log_odds_ratio": -0.21844923496246338, + "logits/chosen": 1.2736010551452637, + "logits/rejected": 0.45289134979248047, + "logps/chosen": -1.3937616348266602, + "logps/rejected": -8.079181671142578, + "loss": 1.3644, + "nll_loss": 1.548231601715088, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13937616348266602, + "rewards/margins": 0.6685420274734497, + "rewards/rejected": -0.8079182505607605, + "step": 126 + }, + { + "epoch": 0.2482893450635386, + "grad_norm": 0.6578118801116943, + "learning_rate": 4.6020874103065885e-05, + "log_odds_chosen": 6.317380428314209, + "log_odds_ratio": -0.20841491222381592, + "logits/chosen": 1.010016679763794, + "logits/rejected": 0.1282709538936615, + "logps/chosen": -1.2062023878097534, + "logps/rejected": -7.190776824951172, + "loss": 1.3476, + "nll_loss": 1.2709475755691528, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1206202432513237, + "rewards/margins": 0.5984574556350708, + "rewards/rejected": -0.7190777063369751, + "step": 127 + }, + { + "epoch": 0.25024437927663734, + "grad_norm": 0.9637512564659119, + "learning_rate": 4.598825831702544e-05, + "log_odds_chosen": 3.552827835083008, + "log_odds_ratio": -0.287852942943573, + "logits/chosen": 1.243557095527649, + "logits/rejected": 0.48955318331718445, + "logps/chosen": -1.1572327613830566, + "logps/rejected": -4.4538140296936035, + "loss": 1.3534, + "nll_loss": 1.2624359130859375, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11572328209877014, + "rewards/margins": 0.3296581506729126, + "rewards/rejected": -0.4453814625740051, + "step": 128 + }, + { + "epoch": 0.25219941348973607, + "grad_norm": 0.928742527961731, + "learning_rate": 4.5955642530985e-05, + "log_odds_chosen": 6.844118118286133, + "log_odds_ratio": -0.07751336693763733, + "logits/chosen": 1.5881102085113525, + "logits/rejected": 0.24238908290863037, + "logps/chosen": -1.399533987045288, + "logps/rejected": -7.995414733886719, + "loss": 1.3563, + "nll_loss": 1.51485013961792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1399534046649933, + "rewards/margins": 0.659588098526001, + "rewards/rejected": -0.7995414733886719, + "step": 129 + }, + { + "epoch": 0.2541544477028348, + "grad_norm": 0.834551215171814, + "learning_rate": 4.592302674494455e-05, + "log_odds_chosen": 7.607309341430664, + "log_odds_ratio": -0.18511465191841125, + "logits/chosen": 1.202840805053711, + "logits/rejected": -0.022417571395635605, + "logps/chosen": -1.3398054838180542, + "logps/rejected": -8.723930358886719, + "loss": 1.3622, + "nll_loss": 1.4916731119155884, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13398054242134094, + "rewards/margins": 0.7384124994277954, + "rewards/rejected": -0.8723931312561035, + "step": 130 + }, + { + "epoch": 0.2561094819159335, + "grad_norm": 0.8256898522377014, + "learning_rate": 4.589041095890411e-05, + "log_odds_chosen": 2.890697717666626, + "log_odds_ratio": -0.24942034482955933, + "logits/chosen": 1.3113644123077393, + "logits/rejected": 0.4996740520000458, + "logps/chosen": -1.1963205337524414, + "logps/rejected": -3.798494577407837, + "loss": 1.3579, + "nll_loss": 1.2446264028549194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1196320503950119, + "rewards/margins": 0.26021742820739746, + "rewards/rejected": -0.37984949350357056, + "step": 131 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.7764825224876404, + "learning_rate": 4.585779517286366e-05, + "log_odds_chosen": 3.5723087787628174, + "log_odds_ratio": -0.38138964772224426, + "logits/chosen": 1.0032000541687012, + "logits/rejected": 0.5669847726821899, + "logps/chosen": -1.2294442653656006, + "logps/rejected": -4.652216911315918, + "loss": 1.3484, + "nll_loss": 1.329392910003662, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1229444295167923, + "rewards/margins": 0.34227728843688965, + "rewards/rejected": -0.46522170305252075, + "step": 132 + }, + { + "epoch": 0.260019550342131, + "grad_norm": 0.8847340941429138, + "learning_rate": 4.582517938682322e-05, + "log_odds_chosen": 5.380949974060059, + "log_odds_ratio": -0.17023128271102905, + "logits/chosen": 1.3217450380325317, + "logits/rejected": 0.54442298412323, + "logps/chosen": -1.147449016571045, + "logps/rejected": -6.226014137268066, + "loss": 1.3626, + "nll_loss": 1.217560052871704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11474491655826569, + "rewards/margins": 0.5078564882278442, + "rewards/rejected": -0.6226014494895935, + "step": 133 + }, + { + "epoch": 0.2619745845552297, + "grad_norm": 0.7047170996665955, + "learning_rate": 4.579256360078278e-05, + "log_odds_chosen": 6.7440056800842285, + "log_odds_ratio": -0.13314080238342285, + "logits/chosen": 1.0640201568603516, + "logits/rejected": 0.2338232696056366, + "logps/chosen": -1.0820674896240234, + "logps/rejected": -7.431894302368164, + "loss": 1.3426, + "nll_loss": 1.1964459419250488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10820674896240234, + "rewards/margins": 0.634982705116272, + "rewards/rejected": -0.7431894540786743, + "step": 134 + }, + { + "epoch": 0.26392961876832843, + "grad_norm": 0.8741002678871155, + "learning_rate": 4.575994781474234e-05, + "log_odds_chosen": 6.763589859008789, + "log_odds_ratio": -0.2075534462928772, + "logits/chosen": 1.1034576892852783, + "logits/rejected": 0.02155705913901329, + "logps/chosen": -1.2813842296600342, + "logps/rejected": -7.808471202850342, + "loss": 1.3387, + "nll_loss": 1.2994283437728882, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12813842296600342, + "rewards/margins": 0.6527087688446045, + "rewards/rejected": -0.7808471918106079, + "step": 135 + }, + { + "epoch": 0.26588465298142716, + "grad_norm": 0.685775876045227, + "learning_rate": 4.5727332028701894e-05, + "log_odds_chosen": 8.01341438293457, + "log_odds_ratio": -0.06948719173669815, + "logits/chosen": 1.1701428890228271, + "logits/rejected": 0.0885528177022934, + "logps/chosen": -1.1079585552215576, + "logps/rejected": -8.763409614562988, + "loss": 1.3522, + "nll_loss": 1.1962952613830566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11079585552215576, + "rewards/margins": 0.7655450105667114, + "rewards/rejected": -0.8763408660888672, + "step": 136 + }, + { + "epoch": 0.2678396871945259, + "grad_norm": 0.8080558180809021, + "learning_rate": 4.569471624266145e-05, + "log_odds_chosen": 10.438997268676758, + "log_odds_ratio": -0.06861001998186111, + "logits/chosen": 1.129025936126709, + "logits/rejected": 0.06703589856624603, + "logps/chosen": -1.2248647212982178, + "logps/rejected": -11.306135177612305, + "loss": 1.3553, + "nll_loss": 1.3655122518539429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12248647212982178, + "rewards/margins": 1.0081270933151245, + "rewards/rejected": -1.1306135654449463, + "step": 137 + }, + { + "epoch": 0.2697947214076246, + "grad_norm": 0.837350070476532, + "learning_rate": 4.5662100456621006e-05, + "log_odds_chosen": 6.88880729675293, + "log_odds_ratio": -0.17550794780254364, + "logits/chosen": 1.2030504941940308, + "logits/rejected": 0.2826595902442932, + "logps/chosen": -1.2054929733276367, + "logps/rejected": -7.797959804534912, + "loss": 1.3275, + "nll_loss": 1.2336819171905518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1205492913722992, + "rewards/margins": 0.6592466831207275, + "rewards/rejected": -0.7797960042953491, + "step": 138 + }, + { + "epoch": 0.27174975562072334, + "grad_norm": 0.7500559687614441, + "learning_rate": 4.5629484670580565e-05, + "log_odds_chosen": 3.7631964683532715, + "log_odds_ratio": -0.3830460011959076, + "logits/chosen": 1.4691882133483887, + "logits/rejected": 0.5620991587638855, + "logps/chosen": -1.4934468269348145, + "logps/rejected": -5.090549945831299, + "loss": 1.3494, + "nll_loss": 1.4751865863800049, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14934468269348145, + "rewards/margins": 0.35971033573150635, + "rewards/rejected": -0.5090550184249878, + "step": 139 + }, + { + "epoch": 0.27370478983382207, + "grad_norm": 0.7565537691116333, + "learning_rate": 4.559686888454012e-05, + "log_odds_chosen": 5.911233901977539, + "log_odds_ratio": -0.2114151120185852, + "logits/chosen": 1.1663533449172974, + "logits/rejected": 0.32799261808395386, + "logps/chosen": -1.1303192377090454, + "logps/rejected": -6.709089279174805, + "loss": 1.3111, + "nll_loss": 1.1939606666564941, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11303191632032394, + "rewards/margins": 0.5578770041465759, + "rewards/rejected": -0.6709089279174805, + "step": 140 + }, + { + "epoch": 0.2756598240469208, + "grad_norm": 0.7639319896697998, + "learning_rate": 4.556425309849968e-05, + "log_odds_chosen": 4.193432331085205, + "log_odds_ratio": -0.2800212502479553, + "logits/chosen": 0.8668926954269409, + "logits/rejected": 0.36053431034088135, + "logps/chosen": -1.4837689399719238, + "logps/rejected": -5.480234622955322, + "loss": 1.3411, + "nll_loss": 1.5312228202819824, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14837688207626343, + "rewards/margins": 0.3996465504169464, + "rewards/rejected": -0.5480234622955322, + "step": 141 + }, + { + "epoch": 0.2776148582600195, + "grad_norm": 0.8129464387893677, + "learning_rate": 4.553163731245923e-05, + "log_odds_chosen": 7.251906394958496, + "log_odds_ratio": -0.2060183882713318, + "logits/chosen": 1.380979299545288, + "logits/rejected": 0.17799563705921173, + "logps/chosen": -1.2492822408676147, + "logps/rejected": -8.255239486694336, + "loss": 1.3252, + "nll_loss": 1.326695442199707, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12492823600769043, + "rewards/margins": 0.7005957365036011, + "rewards/rejected": -0.8255239725112915, + "step": 142 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 0.7189015746116638, + "learning_rate": 4.549902152641879e-05, + "log_odds_chosen": 3.299474000930786, + "log_odds_ratio": -0.48445332050323486, + "logits/chosen": 0.98194420337677, + "logits/rejected": 0.5005861520767212, + "logps/chosen": -1.2597136497497559, + "logps/rejected": -4.495794773101807, + "loss": 1.3469, + "nll_loss": 1.3174867630004883, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.12597137689590454, + "rewards/margins": 0.3236081302165985, + "rewards/rejected": -0.44957950711250305, + "step": 143 + }, + { + "epoch": 0.28152492668621704, + "grad_norm": 0.6578760147094727, + "learning_rate": 4.546640574037834e-05, + "log_odds_chosen": 10.804450035095215, + "log_odds_ratio": -0.2453923523426056, + "logits/chosen": 1.273078203201294, + "logits/rejected": 0.09464335441589355, + "logps/chosen": -1.2187954187393188, + "logps/rejected": -11.746915817260742, + "loss": 1.3253, + "nll_loss": 1.286625862121582, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12187954783439636, + "rewards/margins": 1.0528119802474976, + "rewards/rejected": -1.1746915578842163, + "step": 144 + }, + { + "epoch": 0.28347996089931576, + "grad_norm": 0.6698490381240845, + "learning_rate": 4.54337899543379e-05, + "log_odds_chosen": 10.561488151550293, + "log_odds_ratio": -0.08453737944364548, + "logits/chosen": 1.2613013982772827, + "logits/rejected": -0.3837907314300537, + "logps/chosen": -1.1582547426223755, + "logps/rejected": -11.355853080749512, + "loss": 1.303, + "nll_loss": 1.1345750093460083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11582547426223755, + "rewards/margins": 1.0197597742080688, + "rewards/rejected": -1.1355853080749512, + "step": 145 + }, + { + "epoch": 0.2854349951124145, + "grad_norm": 0.7142356038093567, + "learning_rate": 4.5401174168297455e-05, + "log_odds_chosen": 9.374436378479004, + "log_odds_ratio": -0.2919524013996124, + "logits/chosen": 1.0752935409545898, + "logits/rejected": 0.07957415282726288, + "logps/chosen": -1.1352649927139282, + "logps/rejected": -10.226885795593262, + "loss": 1.3368, + "nll_loss": 1.3018205165863037, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11352649331092834, + "rewards/margins": 0.9091621041297913, + "rewards/rejected": -1.022688627243042, + "step": 146 + }, + { + "epoch": 0.2873900293255132, + "grad_norm": 0.6986067891120911, + "learning_rate": 4.5368558382257014e-05, + "log_odds_chosen": 6.8174357414245605, + "log_odds_ratio": -0.38542747497558594, + "logits/chosen": 1.2543253898620605, + "logits/rejected": 0.3490453362464905, + "logps/chosen": -1.0824544429779053, + "logps/rejected": -7.673934459686279, + "loss": 1.3128, + "nll_loss": 1.1624630689620972, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10824544727802277, + "rewards/margins": 0.659148097038269, + "rewards/rejected": -0.7673934698104858, + "step": 147 + }, + { + "epoch": 0.28934506353861195, + "grad_norm": 0.6737425923347473, + "learning_rate": 4.533594259621657e-05, + "log_odds_chosen": 7.005639553070068, + "log_odds_ratio": -0.16293057799339294, + "logits/chosen": 1.1431665420532227, + "logits/rejected": 0.11543533951044083, + "logps/chosen": -1.1079386472702026, + "logps/rejected": -7.691783428192139, + "loss": 1.3086, + "nll_loss": 1.2645106315612793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11079386621713638, + "rewards/margins": 0.6583845019340515, + "rewards/rejected": -0.7691783905029297, + "step": 148 + }, + { + "epoch": 0.2913000977517107, + "grad_norm": 0.7016736268997192, + "learning_rate": 4.5303326810176126e-05, + "log_odds_chosen": 6.777936935424805, + "log_odds_ratio": -0.339383065700531, + "logits/chosen": 1.1777927875518799, + "logits/rejected": 0.37596338987350464, + "logps/chosen": -1.1733481884002686, + "logps/rejected": -7.750881195068359, + "loss": 1.3311, + "nll_loss": 1.1365373134613037, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11733481287956238, + "rewards/margins": 0.6577532291412354, + "rewards/rejected": -0.7750881314277649, + "step": 149 + }, + { + "epoch": 0.2932551319648094, + "grad_norm": 0.6708948612213135, + "learning_rate": 4.527071102413568e-05, + "log_odds_chosen": 9.599726676940918, + "log_odds_ratio": -0.22605453431606293, + "logits/chosen": 0.9147375226020813, + "logits/rejected": -0.13359951972961426, + "logps/chosen": -1.1798818111419678, + "logps/rejected": -10.534637451171875, + "loss": 1.3236, + "nll_loss": 1.12567138671875, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11798818409442902, + "rewards/margins": 0.9354755878448486, + "rewards/rejected": -1.0534636974334717, + "step": 150 + }, + { + "epoch": 0.29521016617790813, + "grad_norm": 0.7134934663772583, + "learning_rate": 4.523809523809524e-05, + "log_odds_chosen": 4.593421936035156, + "log_odds_ratio": -0.291809618473053, + "logits/chosen": 0.9548784494400024, + "logits/rejected": 0.3291575014591217, + "logps/chosen": -1.332450032234192, + "logps/rejected": -5.7139434814453125, + "loss": 1.314, + "nll_loss": 1.463289499282837, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13324500620365143, + "rewards/margins": 0.4381493031978607, + "rewards/rejected": -0.5713943243026733, + "step": 151 + }, + { + "epoch": 0.29716520039100686, + "grad_norm": 0.6319467425346375, + "learning_rate": 4.520547945205479e-05, + "log_odds_chosen": 6.728297233581543, + "log_odds_ratio": -0.3091104030609131, + "logits/chosen": 1.1306414604187012, + "logits/rejected": 0.08857578039169312, + "logps/chosen": -1.2623155117034912, + "logps/rejected": -7.777181625366211, + "loss": 1.2933, + "nll_loss": 1.266621708869934, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12623155117034912, + "rewards/margins": 0.6514866352081299, + "rewards/rejected": -0.7777181267738342, + "step": 152 + }, + { + "epoch": 0.2991202346041056, + "grad_norm": 0.7018750905990601, + "learning_rate": 4.517286366601435e-05, + "log_odds_chosen": 7.7447052001953125, + "log_odds_ratio": -0.2908017039299011, + "logits/chosen": 1.3486583232879639, + "logits/rejected": 0.23236782848834991, + "logps/chosen": -1.4751003980636597, + "logps/rejected": -9.058426856994629, + "loss": 1.3249, + "nll_loss": 1.560368299484253, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.14751005172729492, + "rewards/margins": 0.7583326697349548, + "rewards/rejected": -0.905842661857605, + "step": 153 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.6442519426345825, + "learning_rate": 4.514024787997391e-05, + "log_odds_chosen": 5.587681293487549, + "log_odds_ratio": -0.210578054189682, + "logits/chosen": 1.0321848392486572, + "logits/rejected": 0.45235908031463623, + "logps/chosen": -1.176702857017517, + "logps/rejected": -6.475557327270508, + "loss": 1.286, + "nll_loss": 1.2135465145111084, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11767027527093887, + "rewards/margins": 0.529885470867157, + "rewards/rejected": -0.6475557684898376, + "step": 154 + }, + { + "epoch": 0.30303030303030304, + "grad_norm": 0.6347115635871887, + "learning_rate": 4.510763209393347e-05, + "log_odds_chosen": 5.344383239746094, + "log_odds_ratio": -0.31450244784355164, + "logits/chosen": 0.9138493537902832, + "logits/rejected": 0.15367649495601654, + "logps/chosen": -1.2387139797210693, + "logps/rejected": -6.373169898986816, + "loss": 1.3183, + "nll_loss": 1.3146356344223022, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12387140095233917, + "rewards/margins": 0.5134456157684326, + "rewards/rejected": -0.6373169422149658, + "step": 155 + }, + { + "epoch": 0.30498533724340177, + "grad_norm": 0.6473566889762878, + "learning_rate": 4.507501630789302e-05, + "log_odds_chosen": 12.419679641723633, + "log_odds_ratio": -0.24708393216133118, + "logits/chosen": 0.8284097909927368, + "logits/rejected": -0.25541654229164124, + "logps/chosen": -1.1512467861175537, + "logps/rejected": -13.324753761291504, + "loss": 1.2967, + "nll_loss": 1.2504405975341797, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11512468010187149, + "rewards/margins": 1.217350721359253, + "rewards/rejected": -1.3324753046035767, + "step": 156 + }, + { + "epoch": 0.3069403714565005, + "grad_norm": 0.6593970060348511, + "learning_rate": 4.504240052185258e-05, + "log_odds_chosen": 6.7596845626831055, + "log_odds_ratio": -0.16296730935573578, + "logits/chosen": 1.2335219383239746, + "logits/rejected": -0.30017346143722534, + "logps/chosen": -1.232032299041748, + "logps/rejected": -7.697624206542969, + "loss": 1.2887, + "nll_loss": 1.3438351154327393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12320324778556824, + "rewards/margins": 0.6465592384338379, + "rewards/rejected": -0.7697624564170837, + "step": 157 + }, + { + "epoch": 0.3088954056695992, + "grad_norm": 0.7469980716705322, + "learning_rate": 4.5009784735812134e-05, + "log_odds_chosen": 7.449018955230713, + "log_odds_ratio": -0.21003535389900208, + "logits/chosen": 1.2491610050201416, + "logits/rejected": -0.33202117681503296, + "logps/chosen": -1.2422571182250977, + "logps/rejected": -8.443574905395508, + "loss": 1.3301, + "nll_loss": 1.279083013534546, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12422572076320648, + "rewards/margins": 0.7201318740844727, + "rewards/rejected": -0.8443576097488403, + "step": 158 + }, + { + "epoch": 0.31085043988269795, + "grad_norm": 0.6834777593612671, + "learning_rate": 4.4977168949771694e-05, + "log_odds_chosen": 2.2118043899536133, + "log_odds_ratio": -0.28867554664611816, + "logits/chosen": 1.0162633657455444, + "logits/rejected": 0.57472825050354, + "logps/chosen": -0.9805067777633667, + "logps/rejected": -2.8771982192993164, + "loss": 1.2963, + "nll_loss": 0.9965966939926147, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09805068373680115, + "rewards/margins": 0.18966913223266602, + "rewards/rejected": -0.28771981596946716, + "step": 159 + }, + { + "epoch": 0.3128054740957967, + "grad_norm": 0.6686550378799438, + "learning_rate": 4.4944553163731246e-05, + "log_odds_chosen": 9.388495445251465, + "log_odds_ratio": -0.18994244933128357, + "logits/chosen": 1.1412005424499512, + "logits/rejected": -0.5078994035720825, + "logps/chosen": -1.126765251159668, + "logps/rejected": -10.204002380371094, + "loss": 1.3129, + "nll_loss": 1.2409747838974, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11267653107643127, + "rewards/margins": 0.9077237844467163, + "rewards/rejected": -1.0204001665115356, + "step": 160 + }, + { + "epoch": 0.3147605083088954, + "grad_norm": 0.6696754097938538, + "learning_rate": 4.4911937377690806e-05, + "log_odds_chosen": 7.901925563812256, + "log_odds_ratio": -0.25689083337783813, + "logits/chosen": 0.8174062967300415, + "logits/rejected": 0.19935843348503113, + "logps/chosen": -1.4657707214355469, + "logps/rejected": -9.185319900512695, + "loss": 1.3027, + "nll_loss": 1.4969499111175537, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.14657707512378693, + "rewards/margins": 0.7719548940658569, + "rewards/rejected": -0.9185319542884827, + "step": 161 + }, + { + "epoch": 0.31671554252199413, + "grad_norm": 0.657768726348877, + "learning_rate": 4.487932159165036e-05, + "log_odds_chosen": 7.604282379150391, + "log_odds_ratio": -0.3189427852630615, + "logits/chosen": 1.1892476081848145, + "logits/rejected": 0.2148885279893875, + "logps/chosen": -1.1731646060943604, + "logps/rejected": -8.537515640258789, + "loss": 1.2806, + "nll_loss": 1.2350025177001953, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11731645464897156, + "rewards/margins": 0.7364351153373718, + "rewards/rejected": -0.853751540184021, + "step": 162 + }, + { + "epoch": 0.31867057673509286, + "grad_norm": 0.6995986104011536, + "learning_rate": 4.484670580560992e-05, + "log_odds_chosen": 10.648557662963867, + "log_odds_ratio": -0.1492287665605545, + "logits/chosen": 0.7388175129890442, + "logits/rejected": -0.45485547184944153, + "logps/chosen": -1.0501000881195068, + "logps/rejected": -11.312532424926758, + "loss": 1.2972, + "nll_loss": 1.2048826217651367, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1050100103020668, + "rewards/margins": 1.0262432098388672, + "rewards/rejected": -1.1312532424926758, + "step": 163 + }, + { + "epoch": 0.3206256109481916, + "grad_norm": 0.6905623078346252, + "learning_rate": 4.481409001956947e-05, + "log_odds_chosen": 8.147631645202637, + "log_odds_ratio": -0.1537928432226181, + "logits/chosen": 1.2463170289993286, + "logits/rejected": -0.008779309689998627, + "logps/chosen": -1.129638433456421, + "logps/rejected": -8.92544174194336, + "loss": 1.274, + "nll_loss": 1.2035000324249268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11296384036540985, + "rewards/margins": 0.779580295085907, + "rewards/rejected": -0.892544150352478, + "step": 164 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.6554829478263855, + "learning_rate": 4.478147423352903e-05, + "log_odds_chosen": 9.498098373413086, + "log_odds_ratio": -0.17203450202941895, + "logits/chosen": 0.7376835346221924, + "logits/rejected": 0.35444575548171997, + "logps/chosen": -1.369474172592163, + "logps/rejected": -10.63492202758789, + "loss": 1.2974, + "nll_loss": 1.6062145233154297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1369474232196808, + "rewards/margins": 0.9265449047088623, + "rewards/rejected": -1.0634922981262207, + "step": 165 + }, + { + "epoch": 0.32453567937438904, + "grad_norm": 0.6752935647964478, + "learning_rate": 4.474885844748858e-05, + "log_odds_chosen": 5.583610534667969, + "log_odds_ratio": -0.3088548183441162, + "logits/chosen": 1.134658694267273, + "logits/rejected": 0.3457709848880768, + "logps/chosen": -1.114492654800415, + "logps/rejected": -6.420201778411865, + "loss": 1.2781, + "nll_loss": 1.228858232498169, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11144927144050598, + "rewards/margins": 0.5305709838867188, + "rewards/rejected": -0.6420202255249023, + "step": 166 + }, + { + "epoch": 0.32649071358748777, + "grad_norm": 0.6546787023544312, + "learning_rate": 4.471624266144814e-05, + "log_odds_chosen": 5.695281028747559, + "log_odds_ratio": -0.26528289914131165, + "logits/chosen": 0.764804482460022, + "logits/rejected": 0.45924830436706543, + "logps/chosen": -1.1505049467086792, + "logps/rejected": -6.587752342224121, + "loss": 1.2807, + "nll_loss": 1.2051739692687988, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11505050957202911, + "rewards/margins": 0.5437247157096863, + "rewards/rejected": -0.6587752103805542, + "step": 167 + }, + { + "epoch": 0.3284457478005865, + "grad_norm": 0.684111475944519, + "learning_rate": 4.4683626875407695e-05, + "log_odds_chosen": 2.636181116104126, + "log_odds_ratio": -0.4040669798851013, + "logits/chosen": 1.1404447555541992, + "logits/rejected": 0.6625574827194214, + "logps/chosen": -1.0098929405212402, + "logps/rejected": -3.4254016876220703, + "loss": 1.2888, + "nll_loss": 1.1069860458374023, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.10098929703235626, + "rewards/margins": 0.24155089259147644, + "rewards/rejected": -0.3425401747226715, + "step": 168 + }, + { + "epoch": 0.3304007820136852, + "grad_norm": 0.6739201545715332, + "learning_rate": 4.4651011089367255e-05, + "log_odds_chosen": 5.879755020141602, + "log_odds_ratio": -0.23205450177192688, + "logits/chosen": 1.2117037773132324, + "logits/rejected": 0.29385611414909363, + "logps/chosen": -1.300632119178772, + "logps/rejected": -6.930560111999512, + "loss": 1.2909, + "nll_loss": 1.362748146057129, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13006320595741272, + "rewards/margins": 0.5629928112030029, + "rewards/rejected": -0.6930561065673828, + "step": 169 + }, + { + "epoch": 0.33235581622678395, + "grad_norm": 0.6655233502388, + "learning_rate": 4.461839530332681e-05, + "log_odds_chosen": 8.32658576965332, + "log_odds_ratio": -0.281083881855011, + "logits/chosen": 1.1393965482711792, + "logits/rejected": 0.21437595784664154, + "logps/chosen": -1.2904281616210938, + "logps/rejected": -9.400596618652344, + "loss": 1.2779, + "nll_loss": 1.4751179218292236, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1290428191423416, + "rewards/margins": 0.8110169172286987, + "rewards/rejected": -0.9400597214698792, + "step": 170 + }, + { + "epoch": 0.3343108504398827, + "grad_norm": 0.6560928225517273, + "learning_rate": 4.458577951728637e-05, + "log_odds_chosen": 3.9552299976348877, + "log_odds_ratio": -0.42670008540153503, + "logits/chosen": 0.9724131226539612, + "logits/rejected": 0.27413076162338257, + "logps/chosen": -1.300279140472412, + "logps/rejected": -5.070854663848877, + "loss": 1.2841, + "nll_loss": 1.3250243663787842, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.1300279051065445, + "rewards/margins": 0.3770574927330017, + "rewards/rejected": -0.5070854425430298, + "step": 171 + }, + { + "epoch": 0.3362658846529814, + "grad_norm": 0.642482578754425, + "learning_rate": 4.455316373124592e-05, + "log_odds_chosen": 8.281599044799805, + "log_odds_ratio": -0.10941663384437561, + "logits/chosen": 1.2929105758666992, + "logits/rejected": -0.027250394225120544, + "logps/chosen": -1.322333574295044, + "logps/rejected": -9.331554412841797, + "loss": 1.2793, + "nll_loss": 1.340207815170288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1322333663702011, + "rewards/margins": 0.8009219765663147, + "rewards/rejected": -0.9331554174423218, + "step": 172 + }, + { + "epoch": 0.33822091886608013, + "grad_norm": 0.666562020778656, + "learning_rate": 4.452054794520548e-05, + "log_odds_chosen": 5.579797267913818, + "log_odds_ratio": -0.3486769199371338, + "logits/chosen": 1.2528607845306396, + "logits/rejected": 0.35565102100372314, + "logps/chosen": -1.1642118692398071, + "logps/rejected": -6.502310752868652, + "loss": 1.2928, + "nll_loss": 1.23642098903656, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11642119288444519, + "rewards/margins": 0.5338099002838135, + "rewards/rejected": -0.650231122970581, + "step": 173 + }, + { + "epoch": 0.34017595307917886, + "grad_norm": 0.6391333341598511, + "learning_rate": 4.448793215916504e-05, + "log_odds_chosen": 7.693410873413086, + "log_odds_ratio": -0.1888713836669922, + "logits/chosen": 0.9610038995742798, + "logits/rejected": 0.32492655515670776, + "logps/chosen": -1.0983843803405762, + "logps/rejected": -8.478047370910645, + "loss": 1.2949, + "nll_loss": 1.2646734714508057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10983843356370926, + "rewards/margins": 0.7379662990570068, + "rewards/rejected": -0.8478046655654907, + "step": 174 + }, + { + "epoch": 0.3421309872922776, + "grad_norm": 0.6464778184890747, + "learning_rate": 4.44553163731246e-05, + "log_odds_chosen": 7.1548895835876465, + "log_odds_ratio": -0.20468023419380188, + "logits/chosen": 0.9334843158721924, + "logits/rejected": 0.19493302702903748, + "logps/chosen": -1.4115179777145386, + "logps/rejected": -8.367647171020508, + "loss": 1.2608, + "nll_loss": 1.4891103506088257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1411518007516861, + "rewards/margins": 0.695612907409668, + "rewards/rejected": -0.8367647528648376, + "step": 175 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.6812382340431213, + "learning_rate": 4.442270058708415e-05, + "log_odds_chosen": 5.387096405029297, + "log_odds_ratio": -0.41102975606918335, + "logits/chosen": 1.1839473247528076, + "logits/rejected": 0.44740748405456543, + "logps/chosen": -1.100735068321228, + "logps/rejected": -6.272075176239014, + "loss": 1.2708, + "nll_loss": 1.193727970123291, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.1100735142827034, + "rewards/margins": 0.5171339511871338, + "rewards/rejected": -0.6272075176239014, + "step": 176 + }, + { + "epoch": 0.3460410557184751, + "grad_norm": 0.6404303908348083, + "learning_rate": 4.439008480104371e-05, + "log_odds_chosen": 3.8172059059143066, + "log_odds_ratio": -0.17974162101745605, + "logits/chosen": 1.1884533166885376, + "logits/rejected": -0.09031975269317627, + "logps/chosen": -1.2223758697509766, + "logps/rejected": -4.746357440948486, + "loss": 1.2623, + "nll_loss": 1.298691987991333, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12223759293556213, + "rewards/margins": 0.352398157119751, + "rewards/rejected": -0.4746357202529907, + "step": 177 + }, + { + "epoch": 0.3479960899315738, + "grad_norm": 0.6905381679534912, + "learning_rate": 4.435746901500326e-05, + "log_odds_chosen": 3.811098098754883, + "log_odds_ratio": -0.3175540864467621, + "logits/chosen": 0.7951512932777405, + "logits/rejected": -0.15313050150871277, + "logps/chosen": -1.1517698764801025, + "logps/rejected": -4.757976531982422, + "loss": 1.2752, + "nll_loss": 1.1512203216552734, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.1151769831776619, + "rewards/margins": 0.3606206476688385, + "rewards/rejected": -0.4757976531982422, + "step": 178 + }, + { + "epoch": 0.34995112414467255, + "grad_norm": 0.6388888955116272, + "learning_rate": 4.432485322896282e-05, + "log_odds_chosen": 8.502408981323242, + "log_odds_ratio": -0.16159912943840027, + "logits/chosen": 0.910706639289856, + "logits/rejected": -0.2576729953289032, + "logps/chosen": -1.008789300918579, + "logps/rejected": -9.130575180053711, + "loss": 1.2691, + "nll_loss": 1.062515139579773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10087893158197403, + "rewards/margins": 0.8121786117553711, + "rewards/rejected": -0.9130575656890869, + "step": 179 + }, + { + "epoch": 0.3519061583577713, + "grad_norm": 0.7146406769752502, + "learning_rate": 4.4292237442922375e-05, + "log_odds_chosen": 4.958927631378174, + "log_odds_ratio": -0.3167142868041992, + "logits/chosen": 1.2740206718444824, + "logits/rejected": 0.33013471961021423, + "logps/chosen": -1.2704756259918213, + "logps/rejected": -6.026884078979492, + "loss": 1.2684, + "nll_loss": 1.4416685104370117, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1270475685596466, + "rewards/margins": 0.4756408929824829, + "rewards/rejected": -0.6026885509490967, + "step": 180 + }, + { + "epoch": 0.35386119257087, + "grad_norm": 0.6456431746482849, + "learning_rate": 4.4259621656881934e-05, + "log_odds_chosen": 8.702266693115234, + "log_odds_ratio": -0.17736217379570007, + "logits/chosen": 1.1357327699661255, + "logits/rejected": 0.08993685245513916, + "logps/chosen": -1.2087366580963135, + "logps/rejected": -9.61585807800293, + "loss": 1.2733, + "nll_loss": 1.3588387966156006, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12087367475032806, + "rewards/margins": 0.8407120704650879, + "rewards/rejected": -0.9615857601165771, + "step": 181 + }, + { + "epoch": 0.35581622678396874, + "grad_norm": 0.7535463571548462, + "learning_rate": 4.422700587084149e-05, + "log_odds_chosen": 4.588898181915283, + "log_odds_ratio": -0.3852817118167877, + "logits/chosen": 0.968330979347229, + "logits/rejected": 0.5130836367607117, + "logps/chosen": -1.1459612846374512, + "logps/rejected": -5.511468887329102, + "loss": 1.2741, + "nll_loss": 1.2873775959014893, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11459614336490631, + "rewards/margins": 0.43655070662498474, + "rewards/rejected": -0.5511468648910522, + "step": 182 + }, + { + "epoch": 0.35777126099706746, + "grad_norm": 0.7022663354873657, + "learning_rate": 4.419439008480105e-05, + "log_odds_chosen": 6.309603214263916, + "log_odds_ratio": -0.19436508417129517, + "logits/chosen": 1.2918877601623535, + "logits/rejected": 0.4777633845806122, + "logps/chosen": -1.1524808406829834, + "logps/rejected": -7.137856483459473, + "loss": 1.2697, + "nll_loss": 1.35286283493042, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11524808406829834, + "rewards/margins": 0.5985375642776489, + "rewards/rejected": -0.7137856483459473, + "step": 183 + }, + { + "epoch": 0.3597262952101662, + "grad_norm": 0.6209842562675476, + "learning_rate": 4.41617742987606e-05, + "log_odds_chosen": 8.28361988067627, + "log_odds_ratio": -0.15384000539779663, + "logits/chosen": 0.7374481558799744, + "logits/rejected": -0.34608176350593567, + "logps/chosen": -1.036502718925476, + "logps/rejected": -8.967209815979004, + "loss": 1.2736, + "nll_loss": 1.2251183986663818, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10365027189254761, + "rewards/margins": 0.7930706739425659, + "rewards/rejected": -0.8967210054397583, + "step": 184 + }, + { + "epoch": 0.3616813294232649, + "grad_norm": 0.714485764503479, + "learning_rate": 4.412915851272016e-05, + "log_odds_chosen": 8.26357650756836, + "log_odds_ratio": -0.16277046501636505, + "logits/chosen": 0.7380566596984863, + "logits/rejected": -0.05640576779842377, + "logps/chosen": -1.1000443696975708, + "logps/rejected": -8.991644859313965, + "loss": 1.2525, + "nll_loss": 1.168377161026001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11000443994998932, + "rewards/margins": 0.7891600131988525, + "rewards/rejected": -0.8991645574569702, + "step": 185 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.7311909198760986, + "learning_rate": 4.409654272667971e-05, + "log_odds_chosen": 6.542537689208984, + "log_odds_ratio": -0.28538012504577637, + "logits/chosen": 0.851951003074646, + "logits/rejected": 0.32337623834609985, + "logps/chosen": -1.2731025218963623, + "logps/rejected": -7.581394195556641, + "loss": 1.2662, + "nll_loss": 1.3675457239151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12731026113033295, + "rewards/margins": 0.6308291554450989, + "rewards/rejected": -0.758139431476593, + "step": 186 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 0.6749897599220276, + "learning_rate": 4.406392694063927e-05, + "log_odds_chosen": 8.416498184204102, + "log_odds_ratio": -0.24198018014431, + "logits/chosen": 0.8684228658676147, + "logits/rejected": 0.3889032006263733, + "logps/chosen": -1.0915963649749756, + "logps/rejected": -9.21990966796875, + "loss": 1.2405, + "nll_loss": 1.1655617952346802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10915964841842651, + "rewards/margins": 0.8128314018249512, + "rewards/rejected": -0.9219910502433777, + "step": 187 + }, + { + "epoch": 0.3675464320625611, + "grad_norm": 0.6515994668006897, + "learning_rate": 4.4031311154598824e-05, + "log_odds_chosen": 8.13419246673584, + "log_odds_ratio": -0.08258984237909317, + "logits/chosen": 0.42945629358291626, + "logits/rejected": -0.29234930872917175, + "logps/chosen": -0.8776967525482178, + "logps/rejected": -8.50039291381836, + "loss": 1.2341, + "nll_loss": 1.0900585651397705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08776967227458954, + "rewards/margins": 0.7622696161270142, + "rewards/rejected": -0.8500393629074097, + "step": 188 + }, + { + "epoch": 0.36950146627565983, + "grad_norm": 0.7019903659820557, + "learning_rate": 4.399869536855838e-05, + "log_odds_chosen": 9.48984146118164, + "log_odds_ratio": -0.08697722852230072, + "logits/chosen": 0.7598472833633423, + "logits/rejected": -0.14043010771274567, + "logps/chosen": -1.2110486030578613, + "logps/rejected": -10.350791931152344, + "loss": 1.261, + "nll_loss": 1.4121822118759155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12110485136508942, + "rewards/margins": 0.9139742851257324, + "rewards/rejected": -1.0350791215896606, + "step": 189 + }, + { + "epoch": 0.37145650048875856, + "grad_norm": 0.8576006293296814, + "learning_rate": 4.3966079582517936e-05, + "log_odds_chosen": 7.56829833984375, + "log_odds_ratio": -0.20392444729804993, + "logits/chosen": 0.7384766936302185, + "logits/rejected": 0.08765817433595657, + "logps/chosen": -1.0137724876403809, + "logps/rejected": -8.203524589538574, + "loss": 1.2849, + "nll_loss": 1.0438096523284912, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10137724876403809, + "rewards/margins": 0.718975305557251, + "rewards/rejected": -0.8203525543212891, + "step": 190 + }, + { + "epoch": 0.3734115347018573, + "grad_norm": 0.83159339427948, + "learning_rate": 4.3933463796477495e-05, + "log_odds_chosen": 5.915223121643066, + "log_odds_ratio": -0.3392825722694397, + "logits/chosen": 1.0115458965301514, + "logits/rejected": -0.00037151575088500977, + "logps/chosen": -1.2160964012145996, + "logps/rejected": -6.884629726409912, + "loss": 1.2615, + "nll_loss": 1.3563966751098633, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.1216096431016922, + "rewards/margins": 0.5668532848358154, + "rewards/rejected": -0.6884629726409912, + "step": 191 + }, + { + "epoch": 0.375366568914956, + "grad_norm": 0.6756464838981628, + "learning_rate": 4.390084801043705e-05, + "log_odds_chosen": 6.000771522521973, + "log_odds_ratio": -0.21287649869918823, + "logits/chosen": 0.9071030616760254, + "logits/rejected": 0.23000824451446533, + "logps/chosen": -1.191312551498413, + "logps/rejected": -6.918125152587891, + "loss": 1.2687, + "nll_loss": 1.4924635887145996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11913125216960907, + "rewards/margins": 0.572681188583374, + "rewards/rejected": -0.6918125152587891, + "step": 192 + }, + { + "epoch": 0.37732160312805474, + "grad_norm": 0.7067583799362183, + "learning_rate": 4.386823222439661e-05, + "log_odds_chosen": 9.000679016113281, + "log_odds_ratio": -0.1076178178191185, + "logits/chosen": 0.8936985731124878, + "logits/rejected": 0.01938284933567047, + "logps/chosen": -1.5172154903411865, + "logps/rejected": -10.2769775390625, + "loss": 1.2731, + "nll_loss": 1.4694663286209106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1517215371131897, + "rewards/margins": 0.8759763240814209, + "rewards/rejected": -1.0276979207992554, + "step": 193 + }, + { + "epoch": 0.37927663734115347, + "grad_norm": 0.6716545820236206, + "learning_rate": 4.383561643835617e-05, + "log_odds_chosen": 6.475771903991699, + "log_odds_ratio": -0.2723192572593689, + "logits/chosen": 1.2504796981811523, + "logits/rejected": 0.5459403395652771, + "logps/chosen": -1.1261268854141235, + "logps/rejected": -7.287970066070557, + "loss": 1.2499, + "nll_loss": 1.2451543807983398, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11261269450187683, + "rewards/margins": 0.6161843538284302, + "rewards/rejected": -0.7287970781326294, + "step": 194 + }, + { + "epoch": 0.3812316715542522, + "grad_norm": 0.6783389449119568, + "learning_rate": 4.3803000652315726e-05, + "log_odds_chosen": 4.35908317565918, + "log_odds_ratio": -0.5006662011146545, + "logits/chosen": 1.0490483045578003, + "logits/rejected": 0.5552818775177002, + "logps/chosen": -1.1501168012619019, + "logps/rejected": -5.391838073730469, + "loss": 1.2427, + "nll_loss": 1.2162498235702515, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.11501167714595795, + "rewards/margins": 0.4241721034049988, + "rewards/rejected": -0.5391838550567627, + "step": 195 + }, + { + "epoch": 0.3831867057673509, + "grad_norm": 0.7135428786277771, + "learning_rate": 4.377038486627528e-05, + "log_odds_chosen": 5.809591770172119, + "log_odds_ratio": -0.13523194193840027, + "logits/chosen": 1.0520741939544678, + "logits/rejected": 0.028407230973243713, + "logps/chosen": -1.121673583984375, + "logps/rejected": -6.561716556549072, + "loss": 1.2488, + "nll_loss": 1.2242628335952759, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1121673658490181, + "rewards/margins": 0.5440043210983276, + "rewards/rejected": -0.6561716794967651, + "step": 196 + }, + { + "epoch": 0.38514173998044965, + "grad_norm": 0.6597371697425842, + "learning_rate": 4.373776908023484e-05, + "log_odds_chosen": 6.982980728149414, + "log_odds_ratio": -0.1742490828037262, + "logits/chosen": 1.1000220775604248, + "logits/rejected": 0.19672027230262756, + "logps/chosen": -1.0908890962600708, + "logps/rejected": -7.7393927574157715, + "loss": 1.2344, + "nll_loss": 1.1876468658447266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10908892005681992, + "rewards/margins": 0.6648504137992859, + "rewards/rejected": -0.7739393711090088, + "step": 197 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 0.6811966896057129, + "learning_rate": 4.370515329419439e-05, + "log_odds_chosen": 5.015325546264648, + "log_odds_ratio": -0.24114495515823364, + "logits/chosen": 0.8852458000183105, + "logits/rejected": 0.13992834091186523, + "logps/chosen": -1.079222559928894, + "logps/rejected": -5.793858528137207, + "loss": 1.2325, + "nll_loss": 1.1869739294052124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1079222559928894, + "rewards/margins": 0.47146353125572205, + "rewards/rejected": -0.5793858170509338, + "step": 198 + }, + { + "epoch": 0.3890518084066471, + "grad_norm": 0.6927857398986816, + "learning_rate": 4.367253750815395e-05, + "log_odds_chosen": 7.5125322341918945, + "log_odds_ratio": -0.062332864850759506, + "logits/chosen": 0.9993805885314941, + "logits/rejected": -0.31965458393096924, + "logps/chosen": -1.1625535488128662, + "logps/rejected": -8.290902137756348, + "loss": 1.2496, + "nll_loss": 1.2660536766052246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11625534296035767, + "rewards/margins": 0.7128348350524902, + "rewards/rejected": -0.8290901184082031, + "step": 199 + }, + { + "epoch": 0.39100684261974583, + "grad_norm": 0.6757230758666992, + "learning_rate": 4.3639921722113503e-05, + "log_odds_chosen": 7.870077133178711, + "log_odds_ratio": -0.17845183610916138, + "logits/chosen": 0.9083282947540283, + "logits/rejected": -0.2592724859714508, + "logps/chosen": -1.365842580795288, + "logps/rejected": -9.030019760131836, + "loss": 1.2466, + "nll_loss": 1.4411852359771729, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.13658425211906433, + "rewards/margins": 0.7664177417755127, + "rewards/rejected": -0.9030020236968994, + "step": 200 + }, + { + "epoch": 0.39296187683284456, + "grad_norm": 0.6517112255096436, + "learning_rate": 4.360730593607306e-05, + "log_odds_chosen": 8.384554862976074, + "log_odds_ratio": -0.16314105689525604, + "logits/chosen": 1.1210591793060303, + "logits/rejected": 0.05085504800081253, + "logps/chosen": -1.1674306392669678, + "logps/rejected": -9.253350257873535, + "loss": 1.2225, + "nll_loss": 1.3042041063308716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1167430654168129, + "rewards/margins": 0.8085919618606567, + "rewards/rejected": -0.9253350496292114, + "step": 201 + }, + { + "epoch": 0.3949169110459433, + "grad_norm": 0.6781591773033142, + "learning_rate": 4.3574690150032616e-05, + "log_odds_chosen": 12.345592498779297, + "log_odds_ratio": -0.2425260841846466, + "logits/chosen": 1.046008586883545, + "logits/rejected": -0.13235768675804138, + "logps/chosen": -1.1176838874816895, + "logps/rejected": -13.161320686340332, + "loss": 1.2459, + "nll_loss": 1.1487846374511719, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11176839470863342, + "rewards/margins": 1.2043638229370117, + "rewards/rejected": -1.3161320686340332, + "step": 202 + }, + { + "epoch": 0.396871945259042, + "grad_norm": 0.65924471616745, + "learning_rate": 4.3542074363992175e-05, + "log_odds_chosen": 8.368257522583008, + "log_odds_ratio": -0.2621564567089081, + "logits/chosen": 0.8440848588943481, + "logits/rejected": -0.27263954281806946, + "logps/chosen": -0.8806042075157166, + "logps/rejected": -8.893491744995117, + "loss": 1.2244, + "nll_loss": 0.9431186318397522, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0880604237318039, + "rewards/margins": 0.8012887239456177, + "rewards/rejected": -0.8893491625785828, + "step": 203 + }, + { + "epoch": 0.39882697947214074, + "grad_norm": 0.6636806130409241, + "learning_rate": 4.350945857795173e-05, + "log_odds_chosen": 5.735206604003906, + "log_odds_ratio": -0.21453680098056793, + "logits/chosen": 0.8560812473297119, + "logits/rejected": -0.007798045873641968, + "logps/chosen": -1.0207531452178955, + "logps/rejected": -6.4365234375, + "loss": 1.2127, + "nll_loss": 1.1727707386016846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10207531601190567, + "rewards/margins": 0.5415769815444946, + "rewards/rejected": -0.6436523199081421, + "step": 204 + }, + { + "epoch": 0.40078201368523947, + "grad_norm": 0.6741806864738464, + "learning_rate": 4.347684279191129e-05, + "log_odds_chosen": 9.543339729309082, + "log_odds_ratio": -0.12185163795948029, + "logits/chosen": 0.7724578380584717, + "logits/rejected": -0.11952032148838043, + "logps/chosen": -0.9540544748306274, + "logps/rejected": -10.0386962890625, + "loss": 1.2328, + "nll_loss": 1.1469743251800537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0954054445028305, + "rewards/margins": 0.9084641933441162, + "rewards/rejected": -1.0038697719573975, + "step": 205 + }, + { + "epoch": 0.4027370478983382, + "grad_norm": 0.6721197962760925, + "learning_rate": 4.344422700587084e-05, + "log_odds_chosen": 7.096440315246582, + "log_odds_ratio": -0.26034170389175415, + "logits/chosen": 0.6792910695075989, + "logits/rejected": 0.49212726950645447, + "logps/chosen": -1.2235617637634277, + "logps/rejected": -8.069514274597168, + "loss": 1.2019, + "nll_loss": 1.3205127716064453, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.12235617637634277, + "rewards/margins": 0.6845952272415161, + "rewards/rejected": -0.8069513440132141, + "step": 206 + }, + { + "epoch": 0.4046920821114369, + "grad_norm": 0.7571945190429688, + "learning_rate": 4.34116112198304e-05, + "log_odds_chosen": 5.162712574005127, + "log_odds_ratio": -0.3031338155269623, + "logits/chosen": 0.6827737092971802, + "logits/rejected": 0.018964119255542755, + "logps/chosen": -1.1238532066345215, + "logps/rejected": -6.056515693664551, + "loss": 1.229, + "nll_loss": 1.2711267471313477, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11238531768321991, + "rewards/margins": 0.4932662844657898, + "rewards/rejected": -0.6056516170501709, + "step": 207 + }, + { + "epoch": 0.4066471163245357, + "grad_norm": 0.7138019800186157, + "learning_rate": 4.337899543378995e-05, + "log_odds_chosen": 0.7840014696121216, + "log_odds_ratio": -0.5457270741462708, + "logits/chosen": 0.7141859531402588, + "logits/rejected": 0.6460504531860352, + "logps/chosen": -1.039139986038208, + "logps/rejected": -1.7113968133926392, + "loss": 1.2297, + "nll_loss": 1.2488605976104736, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10391400009393692, + "rewards/margins": 0.06722568720579147, + "rewards/rejected": -0.1711396872997284, + "step": 208 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 0.6752780675888062, + "learning_rate": 4.334637964774951e-05, + "log_odds_chosen": 4.193634986877441, + "log_odds_ratio": -0.3454432785511017, + "logits/chosen": 0.6512546539306641, + "logits/rejected": -0.13835130631923676, + "logps/chosen": -1.3411760330200195, + "logps/rejected": -5.293696880340576, + "loss": 1.2278, + "nll_loss": 1.3451123237609863, + "rewards/accuracies": 0.7000000476837158, + "rewards/chosen": -0.13411760330200195, + "rewards/margins": 0.3952520787715912, + "rewards/rejected": -0.5293697118759155, + "step": 209 + }, + { + "epoch": 0.41055718475073316, + "grad_norm": 0.6336066126823425, + "learning_rate": 4.3313763861709064e-05, + "log_odds_chosen": 8.296728134155273, + "log_odds_ratio": -0.19171170890331268, + "logits/chosen": 0.9909137487411499, + "logits/rejected": 0.2594684362411499, + "logps/chosen": -0.9867726564407349, + "logps/rejected": -8.926240921020508, + "loss": 1.2199, + "nll_loss": 1.1117079257965088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09867726266384125, + "rewards/margins": 0.7939467430114746, + "rewards/rejected": -0.892624020576477, + "step": 210 + }, + { + "epoch": 0.4125122189638319, + "grad_norm": 0.6607248783111572, + "learning_rate": 4.3281148075668624e-05, + "log_odds_chosen": 6.106511116027832, + "log_odds_ratio": -0.2823370397090912, + "logits/chosen": 0.8894782066345215, + "logits/rejected": 0.4358742833137512, + "logps/chosen": -1.0710291862487793, + "logps/rejected": -6.909181118011475, + "loss": 1.2336, + "nll_loss": 1.2205599546432495, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10710293054580688, + "rewards/margins": 0.5838152170181274, + "rewards/rejected": -0.6909180879592896, + "step": 211 + }, + { + "epoch": 0.4144672531769306, + "grad_norm": 0.6726201176643372, + "learning_rate": 4.3248532289628176e-05, + "log_odds_chosen": 6.995101451873779, + "log_odds_ratio": -0.2252292037010193, + "logits/chosen": 0.664546549320221, + "logits/rejected": 0.19357503950595856, + "logps/chosen": -1.2649931907653809, + "logps/rejected": -7.965364456176758, + "loss": 1.2312, + "nll_loss": 1.2234934568405151, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12649932503700256, + "rewards/margins": 0.6700371503829956, + "rewards/rejected": -0.7965365052223206, + "step": 212 + }, + { + "epoch": 0.41642228739002934, + "grad_norm": 0.671800434589386, + "learning_rate": 4.3215916503587736e-05, + "log_odds_chosen": 5.4564948081970215, + "log_odds_ratio": -0.29519909620285034, + "logits/chosen": 0.9595391750335693, + "logits/rejected": -0.092902772128582, + "logps/chosen": -1.2039215564727783, + "logps/rejected": -6.405703544616699, + "loss": 1.2256, + "nll_loss": 1.3281253576278687, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12039215862751007, + "rewards/margins": 0.5201782584190369, + "rewards/rejected": -0.6405704021453857, + "step": 213 + }, + { + "epoch": 0.41837732160312807, + "grad_norm": 0.6855944395065308, + "learning_rate": 4.3183300717547295e-05, + "log_odds_chosen": 8.11988639831543, + "log_odds_ratio": -0.04494572430849075, + "logits/chosen": 1.070630431175232, + "logits/rejected": -0.13403280079364777, + "logps/chosen": -1.523493766784668, + "logps/rejected": -9.397867202758789, + "loss": 1.22, + "nll_loss": 1.4459385871887207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15234938263893127, + "rewards/margins": 0.7874373197555542, + "rewards/rejected": -0.9397867321968079, + "step": 214 + }, + { + "epoch": 0.4203323558162268, + "grad_norm": 0.6870822906494141, + "learning_rate": 4.3150684931506855e-05, + "log_odds_chosen": 8.803707122802734, + "log_odds_ratio": -0.1364431381225586, + "logits/chosen": 0.9367923140525818, + "logits/rejected": -0.23593908548355103, + "logps/chosen": -1.1683111190795898, + "logps/rejected": -9.633816719055176, + "loss": 1.2055, + "nll_loss": 1.1734676361083984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11683111637830734, + "rewards/margins": 0.8465505838394165, + "rewards/rejected": -0.9633817076683044, + "step": 215 + }, + { + "epoch": 0.4222873900293255, + "grad_norm": 0.6506274342536926, + "learning_rate": 4.311806914546641e-05, + "log_odds_chosen": 6.000734329223633, + "log_odds_ratio": -0.2173043191432953, + "logits/chosen": 0.6508830785751343, + "logits/rejected": -0.08820539712905884, + "logps/chosen": -1.0022213459014893, + "logps/rejected": -6.632278919219971, + "loss": 1.2369, + "nll_loss": 1.091955304145813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10022212564945221, + "rewards/margins": 0.5630057454109192, + "rewards/rejected": -0.663227915763855, + "step": 216 + }, + { + "epoch": 0.42424242424242425, + "grad_norm": 0.6581596732139587, + "learning_rate": 4.308545335942597e-05, + "log_odds_chosen": 4.746521472930908, + "log_odds_ratio": -0.14330801367759705, + "logits/chosen": 0.4921647012233734, + "logits/rejected": 0.05417359620332718, + "logps/chosen": -0.9231715202331543, + "logps/rejected": -5.255744934082031, + "loss": 1.2358, + "nll_loss": 0.9638171195983887, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09231714904308319, + "rewards/margins": 0.4332573413848877, + "rewards/rejected": -0.5255745053291321, + "step": 217 + }, + { + "epoch": 0.426197458455523, + "grad_norm": 0.6661440134048462, + "learning_rate": 4.305283757338552e-05, + "log_odds_chosen": 8.647785186767578, + "log_odds_ratio": -0.11387351155281067, + "logits/chosen": 0.7650039792060852, + "logits/rejected": -0.07973608374595642, + "logps/chosen": -0.9575352668762207, + "logps/rejected": -9.154767990112305, + "loss": 1.1998, + "nll_loss": 1.0774734020233154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09575352817773819, + "rewards/margins": 0.8197232484817505, + "rewards/rejected": -0.9154767990112305, + "step": 218 + }, + { + "epoch": 0.4281524926686217, + "grad_norm": 0.6587656140327454, + "learning_rate": 4.302022178734508e-05, + "log_odds_chosen": 6.157992362976074, + "log_odds_ratio": -0.09475427865982056, + "logits/chosen": 0.6384172439575195, + "logits/rejected": -0.2501693665981293, + "logps/chosen": -1.0714644193649292, + "logps/rejected": -6.852480888366699, + "loss": 1.2157, + "nll_loss": 1.2294048070907593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10714644938707352, + "rewards/margins": 0.5781017541885376, + "rewards/rejected": -0.6852481961250305, + "step": 219 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 0.6804084777832031, + "learning_rate": 4.298760600130463e-05, + "log_odds_chosen": 6.047308921813965, + "log_odds_ratio": -0.14391186833381653, + "logits/chosen": 0.5739718079566956, + "logits/rejected": -0.19499072432518005, + "logps/chosen": -0.9985291957855225, + "logps/rejected": -6.628899574279785, + "loss": 1.2292, + "nll_loss": 1.0836694240570068, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09985293447971344, + "rewards/margins": 0.563037097454071, + "rewards/rejected": -0.6628900170326233, + "step": 220 + }, + { + "epoch": 0.43206256109481916, + "grad_norm": 0.6979094743728638, + "learning_rate": 4.295499021526419e-05, + "log_odds_chosen": 7.949481964111328, + "log_odds_ratio": -0.20190131664276123, + "logits/chosen": 0.9569327235221863, + "logits/rejected": 0.168582484126091, + "logps/chosen": -1.0329644680023193, + "logps/rejected": -8.649258613586426, + "loss": 1.2148, + "nll_loss": 1.2265366315841675, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10329645872116089, + "rewards/margins": 0.7616294622421265, + "rewards/rejected": -0.8649259805679321, + "step": 221 + }, + { + "epoch": 0.4340175953079179, + "grad_norm": 0.6426916718482971, + "learning_rate": 4.2922374429223744e-05, + "log_odds_chosen": 6.56869649887085, + "log_odds_ratio": -0.1335625946521759, + "logits/chosen": 0.7141002416610718, + "logits/rejected": -0.12777739763259888, + "logps/chosen": -1.0506397485733032, + "logps/rejected": -7.190646171569824, + "loss": 1.2188, + "nll_loss": 1.3099658489227295, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10506397485733032, + "rewards/margins": 0.6140007376670837, + "rewards/rejected": -0.7190647125244141, + "step": 222 + }, + { + "epoch": 0.4359726295210166, + "grad_norm": 0.6816736459732056, + "learning_rate": 4.2889758643183304e-05, + "log_odds_chosen": 7.325562477111816, + "log_odds_ratio": -0.1608986109495163, + "logits/chosen": 0.7606161832809448, + "logits/rejected": -0.2304556518793106, + "logps/chosen": -1.0264461040496826, + "logps/rejected": -7.986577987670898, + "loss": 1.196, + "nll_loss": 1.0577342510223389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10264462232589722, + "rewards/margins": 0.6960131525993347, + "rewards/rejected": -0.7986577749252319, + "step": 223 + }, + { + "epoch": 0.43792766373411535, + "grad_norm": 0.6567084789276123, + "learning_rate": 4.2857142857142856e-05, + "log_odds_chosen": 9.767776489257812, + "log_odds_ratio": -0.08839066326618195, + "logits/chosen": 0.6860675811767578, + "logits/rejected": -0.3855365216732025, + "logps/chosen": -1.0109754800796509, + "logps/rejected": -10.351072311401367, + "loss": 1.1798, + "nll_loss": 1.1156764030456543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10109755396842957, + "rewards/margins": 0.9340097904205322, + "rewards/rejected": -1.0351073741912842, + "step": 224 + }, + { + "epoch": 0.4398826979472141, + "grad_norm": 0.6495134234428406, + "learning_rate": 4.2824527071102416e-05, + "log_odds_chosen": 5.872086524963379, + "log_odds_ratio": -0.28713279962539673, + "logits/chosen": 0.4358323812484741, + "logits/rejected": -0.03965809941291809, + "logps/chosen": -1.225232481956482, + "logps/rejected": -6.867452621459961, + "loss": 1.2072, + "nll_loss": 1.3078467845916748, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1225232407450676, + "rewards/margins": 0.564221978187561, + "rewards/rejected": -0.6867452263832092, + "step": 225 + }, + { + "epoch": 0.4418377321603128, + "grad_norm": 0.6899813413619995, + "learning_rate": 4.279191128506197e-05, + "log_odds_chosen": 4.9206156730651855, + "log_odds_ratio": -0.428790807723999, + "logits/chosen": 0.6208153963088989, + "logits/rejected": 0.5111707448959351, + "logps/chosen": -1.0005133152008057, + "logps/rejected": -5.71583366394043, + "loss": 1.197, + "nll_loss": 1.1248276233673096, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10005134344100952, + "rewards/margins": 0.47153207659721375, + "rewards/rejected": -0.5715833902359009, + "step": 226 + }, + { + "epoch": 0.44379276637341153, + "grad_norm": 0.6532382369041443, + "learning_rate": 4.275929549902153e-05, + "log_odds_chosen": 6.682397365570068, + "log_odds_ratio": -0.194398432970047, + "logits/chosen": 1.1597890853881836, + "logits/rejected": 0.0615229494869709, + "logps/chosen": -1.1530109643936157, + "logps/rejected": -7.521717548370361, + "loss": 1.1958, + "nll_loss": 1.2212765216827393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11530108749866486, + "rewards/margins": 0.6368706822395325, + "rewards/rejected": -0.7521717548370361, + "step": 227 + }, + { + "epoch": 0.44574780058651026, + "grad_norm": 0.6540907621383667, + "learning_rate": 4.272667971298108e-05, + "log_odds_chosen": 3.8019838333129883, + "log_odds_ratio": -0.2724740505218506, + "logits/chosen": 0.37123382091522217, + "logits/rejected": 0.42815202474594116, + "logps/chosen": -0.923300564289093, + "logps/rejected": -4.4166154861450195, + "loss": 1.2038, + "nll_loss": 0.879158616065979, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09233006089925766, + "rewards/margins": 0.34933149814605713, + "rewards/rejected": -0.4416615664958954, + "step": 228 + }, + { + "epoch": 0.447702834799609, + "grad_norm": 0.6847851276397705, + "learning_rate": 4.269406392694064e-05, + "log_odds_chosen": 4.9119439125061035, + "log_odds_ratio": -0.20367969572544098, + "logits/chosen": 0.7524558305740356, + "logits/rejected": 0.013638362288475037, + "logps/chosen": -1.0148441791534424, + "logps/rejected": -5.564971923828125, + "loss": 1.1927, + "nll_loss": 1.1214625835418701, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10148441046476364, + "rewards/margins": 0.45501279830932617, + "rewards/rejected": -0.5564972162246704, + "step": 229 + }, + { + "epoch": 0.4496578690127077, + "grad_norm": 1.0283137559890747, + "learning_rate": 4.266144814090019e-05, + "log_odds_chosen": 8.668474197387695, + "log_odds_ratio": -0.18627549707889557, + "logits/chosen": 0.9192749261856079, + "logits/rejected": -0.10028047114610672, + "logps/chosen": -1.1191141605377197, + "logps/rejected": -9.467999458312988, + "loss": 1.2035, + "nll_loss": 1.295276165008545, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11191141605377197, + "rewards/margins": 0.8348884582519531, + "rewards/rejected": -0.9467998743057251, + "step": 230 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.7171432971954346, + "learning_rate": 4.262883235485975e-05, + "log_odds_chosen": 3.8169403076171875, + "log_odds_ratio": -0.2807808816432953, + "logits/chosen": 0.3789661228656769, + "logits/rejected": 0.0947137176990509, + "logps/chosen": -0.9711625576019287, + "logps/rejected": -4.464138031005859, + "loss": 1.2187, + "nll_loss": 1.1208417415618896, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09711625427007675, + "rewards/margins": 0.34929758310317993, + "rewards/rejected": -0.4464138150215149, + "step": 231 + }, + { + "epoch": 0.45356793743890517, + "grad_norm": 0.6943047642707825, + "learning_rate": 4.2596216568819305e-05, + "log_odds_chosen": 4.010525703430176, + "log_odds_ratio": -0.1984591782093048, + "logits/chosen": 1.113783597946167, + "logits/rejected": 0.08750103414058685, + "logps/chosen": -1.0992376804351807, + "logps/rejected": -4.799307823181152, + "loss": 1.2157, + "nll_loss": 1.1840382814407349, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10992376506328583, + "rewards/margins": 0.3700070083141327, + "rewards/rejected": -0.4799307882785797, + "step": 232 + }, + { + "epoch": 0.4555229716520039, + "grad_norm": 0.7388827800750732, + "learning_rate": 4.2563600782778864e-05, + "log_odds_chosen": 7.228670120239258, + "log_odds_ratio": -0.1879369616508484, + "logits/chosen": 0.8897843360900879, + "logits/rejected": -0.20654325187206268, + "logps/chosen": -1.0459961891174316, + "logps/rejected": -7.920650005340576, + "loss": 1.2065, + "nll_loss": 1.0449442863464355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10459960997104645, + "rewards/margins": 0.6874654293060303, + "rewards/rejected": -0.7920650243759155, + "step": 233 + }, + { + "epoch": 0.4574780058651026, + "grad_norm": 0.7748013138771057, + "learning_rate": 4.2530984996738424e-05, + "log_odds_chosen": 7.61234188079834, + "log_odds_ratio": -0.17971932888031006, + "logits/chosen": 0.6865963339805603, + "logits/rejected": 0.35063689947128296, + "logps/chosen": -0.9994885325431824, + "logps/rejected": -8.25189208984375, + "loss": 1.1892, + "nll_loss": 1.063398003578186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09994885325431824, + "rewards/margins": 0.7252404093742371, + "rewards/rejected": -0.8251892328262329, + "step": 234 + }, + { + "epoch": 0.45943304007820135, + "grad_norm": 0.7356454730033875, + "learning_rate": 4.2498369210697983e-05, + "log_odds_chosen": 4.202281951904297, + "log_odds_ratio": -0.21115785837173462, + "logits/chosen": 0.9331599473953247, + "logits/rejected": 0.14213812351226807, + "logps/chosen": -1.1207890510559082, + "logps/rejected": -4.946667671203613, + "loss": 1.1864, + "nll_loss": 1.2237921953201294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11207890510559082, + "rewards/margins": 0.38258790969848633, + "rewards/rejected": -0.49466681480407715, + "step": 235 + }, + { + "epoch": 0.4613880742913001, + "grad_norm": 0.6947811245918274, + "learning_rate": 4.2465753424657536e-05, + "log_odds_chosen": 3.8454484939575195, + "log_odds_ratio": -0.32736310362815857, + "logits/chosen": 0.2921525835990906, + "logits/rejected": 0.001339353621006012, + "logps/chosen": -1.2222959995269775, + "logps/rejected": -4.850205421447754, + "loss": 1.1902, + "nll_loss": 1.1556880474090576, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12222960591316223, + "rewards/margins": 0.36279094219207764, + "rewards/rejected": -0.4850205183029175, + "step": 236 + }, + { + "epoch": 0.4633431085043988, + "grad_norm": 0.711220920085907, + "learning_rate": 4.2433137638617096e-05, + "log_odds_chosen": 6.012630462646484, + "log_odds_ratio": -0.1405373215675354, + "logits/chosen": 0.5068355798721313, + "logits/rejected": -0.2745785713195801, + "logps/chosen": -0.9872889518737793, + "logps/rejected": -6.606304168701172, + "loss": 1.1931, + "nll_loss": 1.0911933183670044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09872891008853912, + "rewards/margins": 0.5619015693664551, + "rewards/rejected": -0.660630464553833, + "step": 237 + }, + { + "epoch": 0.46529814271749753, + "grad_norm": 0.7407872676849365, + "learning_rate": 4.240052185257665e-05, + "log_odds_chosen": 9.217241287231445, + "log_odds_ratio": -0.25504663586616516, + "logits/chosen": 0.5312206745147705, + "logits/rejected": -0.01243487000465393, + "logps/chosen": -1.013113021850586, + "logps/rejected": -9.859275817871094, + "loss": 1.1816, + "nll_loss": 1.101323127746582, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10131130367517471, + "rewards/margins": 0.8846161961555481, + "rewards/rejected": -0.9859275817871094, + "step": 238 + }, + { + "epoch": 0.46725317693059626, + "grad_norm": 0.6919553279876709, + "learning_rate": 4.236790606653621e-05, + "log_odds_chosen": 7.788331031799316, + "log_odds_ratio": -0.20482546091079712, + "logits/chosen": 0.7134689092636108, + "logits/rejected": -0.23549507558345795, + "logps/chosen": -1.2671983242034912, + "logps/rejected": -8.770662307739258, + "loss": 1.2132, + "nll_loss": 1.306382656097412, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12671983242034912, + "rewards/margins": 0.7503464221954346, + "rewards/rejected": -0.8770662546157837, + "step": 239 + }, + { + "epoch": 0.46920821114369504, + "grad_norm": 0.7030436396598816, + "learning_rate": 4.233529028049576e-05, + "log_odds_chosen": 5.3028717041015625, + "log_odds_ratio": -0.20051038265228271, + "logits/chosen": 0.3247440457344055, + "logits/rejected": 0.2519289553165436, + "logps/chosen": -0.9654523134231567, + "logps/rejected": -5.901013374328613, + "loss": 1.2024, + "nll_loss": 1.0522440671920776, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09654524177312851, + "rewards/margins": 0.49355608224868774, + "rewards/rejected": -0.5901013612747192, + "step": 240 + }, + { + "epoch": 0.47116324535679377, + "grad_norm": 0.6615464091300964, + "learning_rate": 4.230267449445532e-05, + "log_odds_chosen": 5.632296085357666, + "log_odds_ratio": -0.14506156742572784, + "logits/chosen": 0.8729724287986755, + "logits/rejected": 0.3847310543060303, + "logps/chosen": -1.1047452688217163, + "logps/rejected": -6.369307518005371, + "loss": 1.2126, + "nll_loss": 1.1969361305236816, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11047452688217163, + "rewards/margins": 0.5264562368392944, + "rewards/rejected": -0.6369307041168213, + "step": 241 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.7105023860931396, + "learning_rate": 4.227005870841487e-05, + "log_odds_chosen": 7.858578205108643, + "log_odds_ratio": -0.13598795235157013, + "logits/chosen": 0.8104360699653625, + "logits/rejected": 0.07514157146215439, + "logps/chosen": -0.8657926917076111, + "logps/rejected": -8.231510162353516, + "loss": 1.1969, + "nll_loss": 1.0706291198730469, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08657927066087723, + "rewards/margins": 0.7365717887878418, + "rewards/rejected": -0.8231511116027832, + "step": 242 + }, + { + "epoch": 0.4750733137829912, + "grad_norm": 0.6636717319488525, + "learning_rate": 4.223744292237443e-05, + "log_odds_chosen": 10.534120559692383, + "log_odds_ratio": -0.08634812384843826, + "logits/chosen": 0.6517191529273987, + "logits/rejected": -0.2083858847618103, + "logps/chosen": -1.2069272994995117, + "logps/rejected": -11.399520874023438, + "loss": 1.1692, + "nll_loss": 1.189385175704956, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12069272994995117, + "rewards/margins": 1.0192592144012451, + "rewards/rejected": -1.1399519443511963, + "step": 243 + }, + { + "epoch": 0.47702834799608995, + "grad_norm": 0.6680395603179932, + "learning_rate": 4.2204827136333985e-05, + "log_odds_chosen": 7.376863479614258, + "log_odds_ratio": -0.13099458813667297, + "logits/chosen": 0.5916165709495544, + "logits/rejected": -0.03986227884888649, + "logps/chosen": -1.173282265663147, + "logps/rejected": -8.227132797241211, + "loss": 1.1909, + "nll_loss": 1.2833709716796875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11732824146747589, + "rewards/margins": 0.7053850293159485, + "rewards/rejected": -0.822713315486908, + "step": 244 + }, + { + "epoch": 0.4789833822091887, + "grad_norm": 0.6842270493507385, + "learning_rate": 4.2172211350293544e-05, + "log_odds_chosen": 6.389212131500244, + "log_odds_ratio": -0.2222258746623993, + "logits/chosen": 0.8799620270729065, + "logits/rejected": -0.012478888034820557, + "logps/chosen": -1.1711639165878296, + "logps/rejected": -7.294344902038574, + "loss": 1.1843, + "nll_loss": 1.1545705795288086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11711637675762177, + "rewards/margins": 0.6123180985450745, + "rewards/rejected": -0.7294344902038574, + "step": 245 + }, + { + "epoch": 0.4809384164222874, + "grad_norm": 0.7436397075653076, + "learning_rate": 4.21395955642531e-05, + "log_odds_chosen": 7.733835697174072, + "log_odds_ratio": -0.12518292665481567, + "logits/chosen": 0.672578752040863, + "logits/rejected": 0.14552362263202667, + "logps/chosen": -1.0736228227615356, + "logps/rejected": -8.449789047241211, + "loss": 1.1853, + "nll_loss": 1.142848014831543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.107362300157547, + "rewards/margins": 0.7376166582107544, + "rewards/rejected": -0.8449789881706238, + "step": 246 + }, + { + "epoch": 0.48289345063538613, + "grad_norm": 0.6975862383842468, + "learning_rate": 4.2106979778212656e-05, + "log_odds_chosen": 4.370568752288818, + "log_odds_ratio": -0.21793211996555328, + "logits/chosen": 0.5878639221191406, + "logits/rejected": 0.07971948385238647, + "logps/chosen": -0.9643622040748596, + "logps/rejected": -4.96099328994751, + "loss": 1.1907, + "nll_loss": 1.1241059303283691, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09643621742725372, + "rewards/margins": 0.3996630609035492, + "rewards/rejected": -0.4960993230342865, + "step": 247 + }, + { + "epoch": 0.48484848484848486, + "grad_norm": 0.6782337427139282, + "learning_rate": 4.207436399217221e-05, + "log_odds_chosen": 8.316946029663086, + "log_odds_ratio": -0.15419572591781616, + "logits/chosen": 0.7761902213096619, + "logits/rejected": -0.001148030161857605, + "logps/chosen": -1.1806130409240723, + "logps/rejected": -9.134148597717285, + "loss": 1.1882, + "nll_loss": 1.0713238716125488, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11806130409240723, + "rewards/margins": 0.7953535914421082, + "rewards/rejected": -0.9134148359298706, + "step": 248 + }, + { + "epoch": 0.4868035190615836, + "grad_norm": 0.6475673913955688, + "learning_rate": 4.204174820613177e-05, + "log_odds_chosen": 5.598581314086914, + "log_odds_ratio": -0.2090078592300415, + "logits/chosen": 0.5819118022918701, + "logits/rejected": -0.00944153219461441, + "logps/chosen": -1.2180044651031494, + "logps/rejected": -6.5464186668396, + "loss": 1.1643, + "nll_loss": 1.274623990058899, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.12180045247077942, + "rewards/margins": 0.5328414440155029, + "rewards/rejected": -0.6546419262886047, + "step": 249 + }, + { + "epoch": 0.4887585532746823, + "grad_norm": 0.6903215646743774, + "learning_rate": 4.200913242009132e-05, + "log_odds_chosen": 7.891858100891113, + "log_odds_ratio": -0.17421749234199524, + "logits/chosen": 0.4029393196105957, + "logits/rejected": 0.17868672311306, + "logps/chosen": -0.9536838531494141, + "logps/rejected": -8.404196739196777, + "loss": 1.179, + "nll_loss": 1.2948195934295654, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0953683853149414, + "rewards/margins": 0.7450513243675232, + "rewards/rejected": -0.8404196500778198, + "step": 250 + }, + { + "epoch": 0.49071358748778104, + "grad_norm": 0.6497746109962463, + "learning_rate": 4.197651663405088e-05, + "log_odds_chosen": 10.266134262084961, + "log_odds_ratio": -0.18467208743095398, + "logits/chosen": 0.8247162103652954, + "logits/rejected": 0.19323435425758362, + "logps/chosen": -0.8944298028945923, + "logps/rejected": -10.646110534667969, + "loss": 1.1584, + "nll_loss": 0.9788614511489868, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08944297581911087, + "rewards/margins": 0.975167989730835, + "rewards/rejected": -1.0646109580993652, + "step": 251 + }, + { + "epoch": 0.49266862170087977, + "grad_norm": 0.649543285369873, + "learning_rate": 4.1943900848010433e-05, + "log_odds_chosen": 7.690011024475098, + "log_odds_ratio": -0.28732824325561523, + "logits/chosen": 0.5696521997451782, + "logits/rejected": 0.0071504563093185425, + "logps/chosen": -1.0807639360427856, + "logps/rejected": -8.520203590393066, + "loss": 1.1704, + "nll_loss": 1.183617353439331, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10807639360427856, + "rewards/margins": 0.7439439296722412, + "rewards/rejected": -0.8520203232765198, + "step": 252 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.6525819897651672, + "learning_rate": 4.191128506196999e-05, + "log_odds_chosen": 6.02238130569458, + "log_odds_ratio": -0.11982538551092148, + "logits/chosen": 0.751982569694519, + "logits/rejected": 0.0021363645792007446, + "logps/chosen": -0.9845625162124634, + "logps/rejected": -6.563854217529297, + "loss": 1.1506, + "nll_loss": 0.9785577058792114, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0984562411904335, + "rewards/margins": 0.5579291582107544, + "rewards/rejected": -0.6563854217529297, + "step": 253 + }, + { + "epoch": 0.4965786901270772, + "grad_norm": 0.6790549755096436, + "learning_rate": 4.187866927592955e-05, + "log_odds_chosen": 4.089137077331543, + "log_odds_ratio": -0.2235877811908722, + "logits/chosen": 0.5788465738296509, + "logits/rejected": 0.2246243804693222, + "logps/chosen": -1.0780720710754395, + "logps/rejected": -4.894374847412109, + "loss": 1.1819, + "nll_loss": 1.1401374340057373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1078072041273117, + "rewards/margins": 0.3816302418708801, + "rewards/rejected": -0.4894375205039978, + "step": 254 + }, + { + "epoch": 0.49853372434017595, + "grad_norm": 0.6653516292572021, + "learning_rate": 4.184605348988911e-05, + "log_odds_chosen": 5.269237518310547, + "log_odds_ratio": -0.2847875952720642, + "logits/chosen": 0.7024892568588257, + "logits/rejected": -0.19342902302742004, + "logps/chosen": -1.0166038274765015, + "logps/rejected": -5.922469139099121, + "loss": 1.1777, + "nll_loss": 1.1827728748321533, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10166039317846298, + "rewards/margins": 0.4905865788459778, + "rewards/rejected": -0.592246949672699, + "step": 255 + }, + { + "epoch": 0.5004887585532747, + "grad_norm": 0.7071744799613953, + "learning_rate": 4.1813437703848665e-05, + "log_odds_chosen": 2.956843852996826, + "log_odds_ratio": -0.23219028115272522, + "logits/chosen": 0.5941203832626343, + "logits/rejected": 0.40791797637939453, + "logps/chosen": -0.9979085922241211, + "logps/rejected": -3.649352550506592, + "loss": 1.1366, + "nll_loss": 1.0456674098968506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09979085624217987, + "rewards/margins": 0.2651444375514984, + "rewards/rejected": -0.3649352788925171, + "step": 256 + }, + { + "epoch": 0.5024437927663734, + "grad_norm": 0.7152044177055359, + "learning_rate": 4.1780821917808224e-05, + "log_odds_chosen": 6.681696891784668, + "log_odds_ratio": -0.22332042455673218, + "logits/chosen": 0.6421303749084473, + "logits/rejected": 0.08190090209245682, + "logps/chosen": -1.0854679346084595, + "logps/rejected": -7.470207214355469, + "loss": 1.1652, + "nll_loss": 1.049300193786621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10854679346084595, + "rewards/margins": 0.6384739875793457, + "rewards/rejected": -0.7470207214355469, + "step": 257 + }, + { + "epoch": 0.5043988269794721, + "grad_norm": 0.7050417065620422, + "learning_rate": 4.174820613176778e-05, + "log_odds_chosen": 4.766329765319824, + "log_odds_ratio": -0.30606013536453247, + "logits/chosen": 0.8683248162269592, + "logits/rejected": 0.348154753446579, + "logps/chosen": -1.132232427597046, + "logps/rejected": -5.674560546875, + "loss": 1.1681, + "nll_loss": 1.2417287826538086, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11322325468063354, + "rewards/margins": 0.454232782125473, + "rewards/rejected": -0.5674560070037842, + "step": 258 + }, + { + "epoch": 0.5063538611925709, + "grad_norm": 0.648814857006073, + "learning_rate": 4.1715590345727336e-05, + "log_odds_chosen": 15.47890853881836, + "log_odds_ratio": -0.1544625610113144, + "logits/chosen": 0.6894258856773376, + "logits/rejected": 0.12671008706092834, + "logps/chosen": -1.083014965057373, + "logps/rejected": -16.228403091430664, + "loss": 1.1528, + "nll_loss": 1.2059355974197388, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10830150544643402, + "rewards/margins": 1.5145387649536133, + "rewards/rejected": -1.622840166091919, + "step": 259 + }, + { + "epoch": 0.5083088954056696, + "grad_norm": 0.6845105290412903, + "learning_rate": 4.168297455968689e-05, + "log_odds_chosen": 5.149576187133789, + "log_odds_ratio": -0.24421626329421997, + "logits/chosen": 0.7844775915145874, + "logits/rejected": 0.4183514714241028, + "logps/chosen": -0.9286330342292786, + "logps/rejected": -5.722538948059082, + "loss": 1.1469, + "nll_loss": 1.0586717128753662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0928633064031601, + "rewards/margins": 0.47939062118530273, + "rewards/rejected": -0.572253942489624, + "step": 260 + }, + { + "epoch": 0.5102639296187683, + "grad_norm": 0.6626594066619873, + "learning_rate": 4.165035877364645e-05, + "log_odds_chosen": 5.647026062011719, + "log_odds_ratio": -0.13057807087898254, + "logits/chosen": 0.8761078119277954, + "logits/rejected": -0.018826564773917198, + "logps/chosen": -0.9782587885856628, + "logps/rejected": -6.239114761352539, + "loss": 1.1572, + "nll_loss": 1.1121537685394287, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09782588481903076, + "rewards/margins": 0.526085615158081, + "rewards/rejected": -0.6239114999771118, + "step": 261 + }, + { + "epoch": 0.512218963831867, + "grad_norm": 0.6961473822593689, + "learning_rate": 4.1617742987606e-05, + "log_odds_chosen": 3.657623529434204, + "log_odds_ratio": -0.26499229669570923, + "logits/chosen": 0.5242988467216492, + "logits/rejected": -0.23591238260269165, + "logps/chosen": -1.1027683019638062, + "logps/rejected": -4.492673873901367, + "loss": 1.1509, + "nll_loss": 1.1959213018417358, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11027683317661285, + "rewards/margins": 0.33899056911468506, + "rewards/rejected": -0.4492674469947815, + "step": 262 + }, + { + "epoch": 0.5141739980449658, + "grad_norm": 0.6429718732833862, + "learning_rate": 4.158512720156556e-05, + "log_odds_chosen": 9.997663497924805, + "log_odds_ratio": -0.2087637335062027, + "logits/chosen": 0.7451266050338745, + "logits/rejected": 0.003947455435991287, + "logps/chosen": -0.9906499981880188, + "logps/rejected": -10.56393814086914, + "loss": 1.1607, + "nll_loss": 1.1195344924926758, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09906499087810516, + "rewards/margins": 0.9573289155960083, + "rewards/rejected": -1.0563938617706299, + "step": 263 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.6714103817939758, + "learning_rate": 4.155251141552511e-05, + "log_odds_chosen": 7.216554641723633, + "log_odds_ratio": -0.15240159630775452, + "logits/chosen": 0.7232346534729004, + "logits/rejected": -0.16476884484291077, + "logps/chosen": -0.8200346231460571, + "logps/rejected": -7.5122504234313965, + "loss": 1.1651, + "nll_loss": 0.8759939670562744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08200345933437347, + "rewards/margins": 0.6692215800285339, + "rewards/rejected": -0.7512251138687134, + "step": 264 + }, + { + "epoch": 0.5180840664711632, + "grad_norm": 0.6581036448478699, + "learning_rate": 4.151989562948467e-05, + "log_odds_chosen": 5.405378818511963, + "log_odds_ratio": -0.17860428988933563, + "logits/chosen": 0.5968877077102661, + "logits/rejected": -0.33335089683532715, + "logps/chosen": -0.9661722183227539, + "logps/rejected": -6.011872291564941, + "loss": 1.1456, + "nll_loss": 1.034532904624939, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09661722928285599, + "rewards/margins": 0.5045700073242188, + "rewards/rejected": -0.6011872887611389, + "step": 265 + }, + { + "epoch": 0.520039100684262, + "grad_norm": 0.6885536313056946, + "learning_rate": 4.1487279843444225e-05, + "log_odds_chosen": 9.362210273742676, + "log_odds_ratio": -0.19060276448726654, + "logits/chosen": 0.5062807202339172, + "logits/rejected": -0.21584820747375488, + "logps/chosen": -1.1191682815551758, + "logps/rejected": -10.150638580322266, + "loss": 1.1626, + "nll_loss": 1.1266376972198486, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11191682517528534, + "rewards/margins": 0.9031471014022827, + "rewards/rejected": -1.0150638818740845, + "step": 266 + }, + { + "epoch": 0.5219941348973607, + "grad_norm": 0.7146323323249817, + "learning_rate": 4.1454664057403785e-05, + "log_odds_chosen": 4.2370147705078125, + "log_odds_ratio": -0.18396827578544617, + "logits/chosen": 0.7095978260040283, + "logits/rejected": 0.2848646640777588, + "logps/chosen": -0.9776026606559753, + "logps/rejected": -4.843325614929199, + "loss": 1.1327, + "nll_loss": 1.0867592096328735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09776026755571365, + "rewards/margins": 0.38657230138778687, + "rewards/rejected": -0.4843325912952423, + "step": 267 + }, + { + "epoch": 0.5239491691104594, + "grad_norm": 0.6869406700134277, + "learning_rate": 4.142204827136334e-05, + "log_odds_chosen": 11.420750617980957, + "log_odds_ratio": -0.007301103323698044, + "logits/chosen": 0.34075480699539185, + "logits/rejected": -0.5899679660797119, + "logps/chosen": -0.9919652342796326, + "logps/rejected": -11.932402610778809, + "loss": 1.1409, + "nll_loss": 0.999819278717041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09919653832912445, + "rewards/margins": 1.0940437316894531, + "rewards/rejected": -1.1932402849197388, + "step": 268 + }, + { + "epoch": 0.5259042033235581, + "grad_norm": 0.6642701625823975, + "learning_rate": 4.13894324853229e-05, + "log_odds_chosen": 9.44560718536377, + "log_odds_ratio": -0.3056187033653259, + "logits/chosen": 0.19630679488182068, + "logits/rejected": -0.3897777795791626, + "logps/chosen": -0.9722408652305603, + "logps/rejected": -10.111209869384766, + "loss": 1.1436, + "nll_loss": 1.1632628440856934, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09722408652305603, + "rewards/margins": 0.9138969779014587, + "rewards/rejected": -1.0111210346221924, + "step": 269 + }, + { + "epoch": 0.5278592375366569, + "grad_norm": 0.6657781004905701, + "learning_rate": 4.135681669928245e-05, + "log_odds_chosen": 7.3104400634765625, + "log_odds_ratio": -0.12702669203281403, + "logits/chosen": 0.7866991758346558, + "logits/rejected": -0.3408326506614685, + "logps/chosen": -1.080026388168335, + "logps/rejected": -8.02646255493164, + "loss": 1.1772, + "nll_loss": 1.2404909133911133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10800263285636902, + "rewards/margins": 0.6946436166763306, + "rewards/rejected": -0.8026462197303772, + "step": 270 + }, + { + "epoch": 0.5298142717497556, + "grad_norm": 0.6965118050575256, + "learning_rate": 4.132420091324201e-05, + "log_odds_chosen": 9.900495529174805, + "log_odds_ratio": -0.09691554307937622, + "logits/chosen": 0.6056500673294067, + "logits/rejected": -0.3448256850242615, + "logps/chosen": -1.1234078407287598, + "logps/rejected": -10.540274620056152, + "loss": 1.1447, + "nll_loss": 1.260823369026184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1123407855629921, + "rewards/margins": 0.9416866898536682, + "rewards/rejected": -1.0540275573730469, + "step": 271 + }, + { + "epoch": 0.5317693059628543, + "grad_norm": 0.6609433889389038, + "learning_rate": 4.129158512720156e-05, + "log_odds_chosen": 5.432811737060547, + "log_odds_ratio": -0.25237345695495605, + "logits/chosen": 0.2997015118598938, + "logits/rejected": -0.45080670714378357, + "logps/chosen": -1.046942949295044, + "logps/rejected": -6.153581619262695, + "loss": 1.1134, + "nll_loss": 1.0671082735061646, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10469429194927216, + "rewards/margins": 0.5106639266014099, + "rewards/rejected": -0.6153581738471985, + "step": 272 + }, + { + "epoch": 0.533724340175953, + "grad_norm": 0.7542477250099182, + "learning_rate": 4.125896934116112e-05, + "log_odds_chosen": 12.081981658935547, + "log_odds_ratio": -0.09762982279062271, + "logits/chosen": 0.014615421183407307, + "logits/rejected": -0.41017603874206543, + "logps/chosen": -0.9079731106758118, + "logps/rejected": -12.520423889160156, + "loss": 1.1312, + "nll_loss": 0.9873826503753662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0907973125576973, + "rewards/margins": 1.1612452268600464, + "rewards/rejected": -1.252042531967163, + "step": 273 + }, + { + "epoch": 0.5356793743890518, + "grad_norm": 0.7542407512664795, + "learning_rate": 4.122635355512068e-05, + "log_odds_chosen": 6.395196914672852, + "log_odds_ratio": -0.18070031702518463, + "logits/chosen": 0.26587778329849243, + "logits/rejected": -0.5923341512680054, + "logps/chosen": -1.1171767711639404, + "logps/rejected": -7.168094635009766, + "loss": 1.1379, + "nll_loss": 1.2345396280288696, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11171767860651016, + "rewards/margins": 0.6050918102264404, + "rewards/rejected": -0.7168095111846924, + "step": 274 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.680167555809021, + "learning_rate": 4.119373776908024e-05, + "log_odds_chosen": 9.145588874816895, + "log_odds_ratio": -0.1658388376235962, + "logits/chosen": 0.8427155017852783, + "logits/rejected": -0.7521920204162598, + "logps/chosen": -1.0660748481750488, + "logps/rejected": -9.89027214050293, + "loss": 1.1229, + "nll_loss": 1.1842740774154663, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10660747438669205, + "rewards/margins": 0.8824198246002197, + "rewards/rejected": -0.9890272617340088, + "step": 275 + }, + { + "epoch": 0.5395894428152492, + "grad_norm": 0.7076801657676697, + "learning_rate": 4.116112198303979e-05, + "log_odds_chosen": 4.392437934875488, + "log_odds_ratio": -0.27273038029670715, + "logits/chosen": 0.4899173974990845, + "logits/rejected": 0.13369080424308777, + "logps/chosen": -1.0607868432998657, + "logps/rejected": -5.133925437927246, + "loss": 1.1364, + "nll_loss": 1.1494295597076416, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10607868432998657, + "rewards/margins": 0.40731385350227356, + "rewards/rejected": -0.513392448425293, + "step": 276 + }, + { + "epoch": 0.541544477028348, + "grad_norm": 0.6715219020843506, + "learning_rate": 4.112850619699935e-05, + "log_odds_chosen": 10.86029052734375, + "log_odds_ratio": -0.24884098768234253, + "logits/chosen": 0.2763820290565491, + "logits/rejected": -0.5413069128990173, + "logps/chosen": -1.0161396265029907, + "logps/rejected": -11.510680198669434, + "loss": 1.1281, + "nll_loss": 0.9742406010627747, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10161396861076355, + "rewards/margins": 1.0494540929794312, + "rewards/rejected": -1.151068091392517, + "step": 277 + }, + { + "epoch": 0.5434995112414467, + "grad_norm": 0.6684945821762085, + "learning_rate": 4.1095890410958905e-05, + "log_odds_chosen": 7.329801559448242, + "log_odds_ratio": -0.08567076921463013, + "logits/chosen": 0.4225273132324219, + "logits/rejected": -0.4423757791519165, + "logps/chosen": -0.9871346950531006, + "logps/rejected": -7.83686637878418, + "loss": 1.1545, + "nll_loss": 1.1451261043548584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0987134724855423, + "rewards/margins": 0.6849731802940369, + "rewards/rejected": -0.783686637878418, + "step": 278 + }, + { + "epoch": 0.5454545454545454, + "grad_norm": 0.6549646258354187, + "learning_rate": 4.1063274624918465e-05, + "log_odds_chosen": 7.392395973205566, + "log_odds_ratio": -0.10891471803188324, + "logits/chosen": 0.7277379035949707, + "logits/rejected": -0.6470577716827393, + "logps/chosen": -0.9376391172409058, + "logps/rejected": -7.80961275100708, + "loss": 1.1434, + "nll_loss": 1.0594056844711304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09376391768455505, + "rewards/margins": 0.6871973872184753, + "rewards/rejected": -0.780961275100708, + "step": 279 + }, + { + "epoch": 0.5474095796676441, + "grad_norm": 0.7244571447372437, + "learning_rate": 4.103065883887802e-05, + "log_odds_chosen": 7.83552360534668, + "log_odds_ratio": -0.13160735368728638, + "logits/chosen": 0.5056818127632141, + "logits/rejected": -0.07429533451795578, + "logps/chosen": -0.971825122833252, + "logps/rejected": -8.298418045043945, + "loss": 1.0957, + "nll_loss": 1.1087538003921509, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0971825122833252, + "rewards/margins": 0.7326593995094299, + "rewards/rejected": -0.8298418521881104, + "step": 280 + }, + { + "epoch": 0.5493646138807429, + "grad_norm": 0.690964937210083, + "learning_rate": 4.099804305283758e-05, + "log_odds_chosen": 9.236759185791016, + "log_odds_ratio": -0.28039202094078064, + "logits/chosen": 0.6274337768554688, + "logits/rejected": -0.4794255793094635, + "logps/chosen": -1.0684432983398438, + "logps/rejected": -10.006853103637695, + "loss": 1.1234, + "nll_loss": 1.1236571073532104, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.10684433579444885, + "rewards/margins": 0.893841028213501, + "rewards/rejected": -1.0006853342056274, + "step": 281 + }, + { + "epoch": 0.5513196480938416, + "grad_norm": 0.6793696880340576, + "learning_rate": 4.096542726679713e-05, + "log_odds_chosen": 5.69762659072876, + "log_odds_ratio": -0.21575284004211426, + "logits/chosen": 0.37799543142318726, + "logits/rejected": -0.3916747570037842, + "logps/chosen": -0.9225404262542725, + "logps/rejected": -6.279555797576904, + "loss": 1.1395, + "nll_loss": 1.0313210487365723, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09225404262542725, + "rewards/margins": 0.5357015132904053, + "rewards/rejected": -0.6279555559158325, + "step": 282 + }, + { + "epoch": 0.5532746823069403, + "grad_norm": 0.6548845767974854, + "learning_rate": 4.093281148075669e-05, + "log_odds_chosen": 6.338593482971191, + "log_odds_ratio": -0.18454338610172272, + "logits/chosen": 0.10643066465854645, + "logits/rejected": -0.17373044788837433, + "logps/chosen": -0.9662367105484009, + "logps/rejected": -6.916711330413818, + "loss": 1.1123, + "nll_loss": 1.0164183378219604, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09662368148565292, + "rewards/margins": 0.5950474739074707, + "rewards/rejected": -0.6916711330413818, + "step": 283 + }, + { + "epoch": 0.555229716520039, + "grad_norm": 0.6832579970359802, + "learning_rate": 4.090019569471624e-05, + "log_odds_chosen": 6.864666938781738, + "log_odds_ratio": -0.17679129540920258, + "logits/chosen": 0.6277916431427002, + "logits/rejected": -0.6125322580337524, + "logps/chosen": -1.075322151184082, + "logps/rejected": -7.586731910705566, + "loss": 1.108, + "nll_loss": 1.1886825561523438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10753223299980164, + "rewards/margins": 0.6511410474777222, + "rewards/rejected": -0.7586732506752014, + "step": 284 + }, + { + "epoch": 0.5571847507331378, + "grad_norm": 0.7047138810157776, + "learning_rate": 4.08675799086758e-05, + "log_odds_chosen": 8.511234283447266, + "log_odds_ratio": -0.08311718702316284, + "logits/chosen": 0.35328567028045654, + "logits/rejected": -0.25347214937210083, + "logps/chosen": -0.8512539863586426, + "logps/rejected": -8.833724021911621, + "loss": 1.1265, + "nll_loss": 0.975980818271637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0851254016160965, + "rewards/margins": 0.7982469201087952, + "rewards/rejected": -0.8833723068237305, + "step": 285 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.6836577653884888, + "learning_rate": 4.0834964122635354e-05, + "log_odds_chosen": 11.158236503601074, + "log_odds_ratio": -0.0724794864654541, + "logits/chosen": 0.38961926102638245, + "logits/rejected": -0.5365880131721497, + "logps/chosen": -0.8943246006965637, + "logps/rejected": -11.541574478149414, + "loss": 1.1055, + "nll_loss": 0.9907675981521606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08943246304988861, + "rewards/margins": 1.0647250413894653, + "rewards/rejected": -1.1541576385498047, + "step": 286 + }, + { + "epoch": 0.5610948191593352, + "grad_norm": 0.6702540516853333, + "learning_rate": 4.0802348336594913e-05, + "log_odds_chosen": 8.107760429382324, + "log_odds_ratio": -0.13655710220336914, + "logits/chosen": 0.792991042137146, + "logits/rejected": -0.4938603341579437, + "logps/chosen": -1.0284419059753418, + "logps/rejected": -8.760270118713379, + "loss": 1.1196, + "nll_loss": 1.1688802242279053, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10284419357776642, + "rewards/margins": 0.7731828093528748, + "rewards/rejected": -0.87602698802948, + "step": 287 + }, + { + "epoch": 0.5630498533724341, + "grad_norm": 0.7200450897216797, + "learning_rate": 4.0769732550554466e-05, + "log_odds_chosen": 3.840339183807373, + "log_odds_ratio": -0.23622727394104004, + "logits/chosen": 0.5775584578514099, + "logits/rejected": 0.35635268688201904, + "logps/chosen": -0.7905005216598511, + "logps/rejected": -4.210387229919434, + "loss": 1.117, + "nll_loss": 0.9706283211708069, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07905005663633347, + "rewards/margins": 0.3419886827468872, + "rewards/rejected": -0.42103874683380127, + "step": 288 + }, + { + "epoch": 0.5650048875855328, + "grad_norm": 0.6484196186065674, + "learning_rate": 4.0737116764514026e-05, + "log_odds_chosen": 12.12307357788086, + "log_odds_ratio": -0.08841225504875183, + "logits/chosen": 0.37197405099868774, + "logits/rejected": -0.3429291844367981, + "logps/chosen": -1.030677318572998, + "logps/rejected": -12.666439056396484, + "loss": 1.0981, + "nll_loss": 1.1639087200164795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10306773334741592, + "rewards/margins": 1.1635762453079224, + "rewards/rejected": -1.26664400100708, + "step": 289 + }, + { + "epoch": 0.5669599217986315, + "grad_norm": 0.7452549934387207, + "learning_rate": 4.070450097847358e-05, + "log_odds_chosen": 2.879051923751831, + "log_odds_ratio": -0.354413777589798, + "logits/chosen": 0.19132810831069946, + "logits/rejected": -0.04564804583787918, + "logps/chosen": -0.9507859945297241, + "logps/rejected": -3.592869758605957, + "loss": 1.1217, + "nll_loss": 1.069838523864746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09507860243320465, + "rewards/margins": 0.2642083466053009, + "rewards/rejected": -0.35928696393966675, + "step": 290 + }, + { + "epoch": 0.5689149560117303, + "grad_norm": 0.65251624584198, + "learning_rate": 4.067188519243314e-05, + "log_odds_chosen": 8.82230281829834, + "log_odds_ratio": -0.13964956998825073, + "logits/chosen": 0.7677267789840698, + "logits/rejected": -0.38636061549186707, + "logps/chosen": -0.9465181827545166, + "logps/rejected": -9.324983596801758, + "loss": 1.1142, + "nll_loss": 1.1983228921890259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09465181827545166, + "rewards/margins": 0.8378466367721558, + "rewards/rejected": -0.9324983358383179, + "step": 291 + }, + { + "epoch": 0.570869990224829, + "grad_norm": 0.6657481789588928, + "learning_rate": 4.063926940639269e-05, + "log_odds_chosen": 6.81377649307251, + "log_odds_ratio": -0.303103506565094, + "logits/chosen": 0.4104228913784027, + "logits/rejected": 0.17728020250797272, + "logps/chosen": -0.9230424165725708, + "logps/rejected": -7.425897121429443, + "loss": 1.1188, + "nll_loss": 1.070756196975708, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09230424463748932, + "rewards/margins": 0.6502854824066162, + "rewards/rejected": -0.7425897121429443, + "step": 292 + }, + { + "epoch": 0.5728250244379277, + "grad_norm": 0.6689323782920837, + "learning_rate": 4.060665362035225e-05, + "log_odds_chosen": 9.313121795654297, + "log_odds_ratio": -0.14007017016410828, + "logits/chosen": 0.5725845098495483, + "logits/rejected": -0.5665950179100037, + "logps/chosen": -1.194811224937439, + "logps/rejected": -10.182653427124023, + "loss": 1.1273, + "nll_loss": 1.2345573902130127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11948111653327942, + "rewards/margins": 0.8987842798233032, + "rewards/rejected": -1.0182653665542603, + "step": 293 + }, + { + "epoch": 0.5747800586510264, + "grad_norm": 0.6670846939086914, + "learning_rate": 4.057403783431181e-05, + "log_odds_chosen": 11.757686614990234, + "log_odds_ratio": -0.11874425411224365, + "logits/chosen": 0.39290493726730347, + "logits/rejected": -0.444050133228302, + "logps/chosen": -1.076517939567566, + "logps/rejected": -12.423477172851562, + "loss": 1.1122, + "nll_loss": 1.0322303771972656, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10765179991722107, + "rewards/margins": 1.1346958875656128, + "rewards/rejected": -1.2423477172851562, + "step": 294 + }, + { + "epoch": 0.5767350928641252, + "grad_norm": 0.703316330909729, + "learning_rate": 4.054142204827137e-05, + "log_odds_chosen": 12.541744232177734, + "log_odds_ratio": -0.045996230095624924, + "logits/chosen": 0.6211004257202148, + "logits/rejected": -0.834053635597229, + "logps/chosen": -0.9886387586593628, + "logps/rejected": -13.053762435913086, + "loss": 1.1142, + "nll_loss": 1.1105003356933594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0988638699054718, + "rewards/margins": 1.206512212753296, + "rewards/rejected": -1.3053761720657349, + "step": 295 + }, + { + "epoch": 0.5786901270772239, + "grad_norm": 0.679226279258728, + "learning_rate": 4.050880626223092e-05, + "log_odds_chosen": 6.655791282653809, + "log_odds_ratio": -0.3161381483078003, + "logits/chosen": 0.6385577917098999, + "logits/rejected": -0.011883988976478577, + "logps/chosen": -1.0419230461120605, + "logps/rejected": -7.443033695220947, + "loss": 1.1018, + "nll_loss": 1.097179651260376, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10419230163097382, + "rewards/margins": 0.6401110887527466, + "rewards/rejected": -0.7443033456802368, + "step": 296 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.6663801670074463, + "learning_rate": 4.047619047619048e-05, + "log_odds_chosen": 13.766864776611328, + "log_odds_ratio": -0.09677732735872269, + "logits/chosen": 0.403751015663147, + "logits/rejected": -0.597703218460083, + "logps/chosen": -0.9485516548156738, + "logps/rejected": -14.225860595703125, + "loss": 1.1056, + "nll_loss": 1.0454223155975342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0948551595211029, + "rewards/margins": 1.3277308940887451, + "rewards/rejected": -1.42258620262146, + "step": 297 + }, + { + "epoch": 0.5826001955034213, + "grad_norm": 0.6710541248321533, + "learning_rate": 4.0443574690150034e-05, + "log_odds_chosen": 7.351007461547852, + "log_odds_ratio": -0.27199745178222656, + "logits/chosen": 0.9510916471481323, + "logits/rejected": -0.2689743638038635, + "logps/chosen": -1.1624168157577515, + "logps/rejected": -8.182243347167969, + "loss": 1.1225, + "nll_loss": 1.2934702634811401, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.11624167859554291, + "rewards/margins": 0.7019827365875244, + "rewards/rejected": -0.8182244300842285, + "step": 298 + }, + { + "epoch": 0.5845552297165201, + "grad_norm": 0.6998826861381531, + "learning_rate": 4.041095890410959e-05, + "log_odds_chosen": 8.999236106872559, + "log_odds_ratio": -0.14469751715660095, + "logits/chosen": 0.5208371877670288, + "logits/rejected": -0.2569431960582733, + "logps/chosen": -0.9534580111503601, + "logps/rejected": -9.55595874786377, + "loss": 1.0983, + "nll_loss": 1.0441091060638428, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09534580260515213, + "rewards/margins": 0.8602501749992371, + "rewards/rejected": -0.9555959105491638, + "step": 299 + }, + { + "epoch": 0.5865102639296188, + "grad_norm": 0.6698459386825562, + "learning_rate": 4.0378343118069146e-05, + "log_odds_chosen": 10.34005355834961, + "log_odds_ratio": -0.12956996262073517, + "logits/chosen": 0.5058138370513916, + "logits/rejected": -0.329831600189209, + "logps/chosen": -0.8900048732757568, + "logps/rejected": -10.776302337646484, + "loss": 1.1022, + "nll_loss": 1.068361759185791, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08900049328804016, + "rewards/margins": 0.9886297583580017, + "rewards/rejected": -1.0776302814483643, + "step": 300 + }, + { + "epoch": 0.5884652981427175, + "grad_norm": 0.7136221528053284, + "learning_rate": 4.0345727332028705e-05, + "log_odds_chosen": 10.195194244384766, + "log_odds_ratio": -0.14599014818668365, + "logits/chosen": 0.17233490943908691, + "logits/rejected": -0.14460690319538116, + "logps/chosen": -1.1019034385681152, + "logps/rejected": -10.908931732177734, + "loss": 1.0879, + "nll_loss": 1.119685411453247, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11019034683704376, + "rewards/margins": 0.9807029366493225, + "rewards/rejected": -1.090893268585205, + "step": 301 + }, + { + "epoch": 0.5904203323558163, + "grad_norm": 0.7383285760879517, + "learning_rate": 4.031311154598826e-05, + "log_odds_chosen": 10.317426681518555, + "log_odds_ratio": -0.029196877032518387, + "logits/chosen": 0.09929897636175156, + "logits/rejected": -1.105665922164917, + "logps/chosen": -0.9038373231887817, + "logps/rejected": -10.683902740478516, + "loss": 1.0648, + "nll_loss": 0.9239006042480469, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09038373082876205, + "rewards/margins": 0.9780064821243286, + "rewards/rejected": -1.068390130996704, + "step": 302 + }, + { + "epoch": 0.592375366568915, + "grad_norm": 0.7328550219535828, + "learning_rate": 4.028049575994782e-05, + "log_odds_chosen": 9.558090209960938, + "log_odds_ratio": -0.39697495102882385, + "logits/chosen": 0.02921976149082184, + "logits/rejected": -0.3084670305252075, + "logps/chosen": -1.1725804805755615, + "logps/rejected": -10.371448516845703, + "loss": 1.0783, + "nll_loss": 1.1658848524093628, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.11725804209709167, + "rewards/margins": 0.9198868274688721, + "rewards/rejected": -1.0371448993682861, + "step": 303 + }, + { + "epoch": 0.5943304007820137, + "grad_norm": 0.7914910316467285, + "learning_rate": 4.024787997390737e-05, + "log_odds_chosen": 8.540351867675781, + "log_odds_ratio": -0.25227341055870056, + "logits/chosen": 0.4468066096305847, + "logits/rejected": -0.3455967307090759, + "logps/chosen": -1.0221272706985474, + "logps/rejected": -9.206241607666016, + "loss": 1.1141, + "nll_loss": 1.2625291347503662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10221272706985474, + "rewards/margins": 0.8184114098548889, + "rewards/rejected": -0.9206241369247437, + "step": 304 + }, + { + "epoch": 0.5962854349951124, + "grad_norm": 0.8036676049232483, + "learning_rate": 4.021526418786693e-05, + "log_odds_chosen": 9.224700927734375, + "log_odds_ratio": -0.16359944641590118, + "logits/chosen": 0.5215994715690613, + "logits/rejected": -0.16730424761772156, + "logps/chosen": -1.042446255683899, + "logps/rejected": -9.869378089904785, + "loss": 1.1022, + "nll_loss": 1.1048529148101807, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10424462705850601, + "rewards/margins": 0.8826931715011597, + "rewards/rejected": -0.9869378805160522, + "step": 305 + }, + { + "epoch": 0.5982404692082112, + "grad_norm": 0.7671235799789429, + "learning_rate": 4.018264840182648e-05, + "log_odds_chosen": 6.190712928771973, + "log_odds_ratio": -0.23258596658706665, + "logits/chosen": 0.5369517803192139, + "logits/rejected": -0.14751233160495758, + "logps/chosen": -0.9647350311279297, + "logps/rejected": -6.7982378005981445, + "loss": 1.0774, + "nll_loss": 1.142849326133728, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.09647350013256073, + "rewards/margins": 0.5833503603935242, + "rewards/rejected": -0.6798238158226013, + "step": 306 + }, + { + "epoch": 0.6001955034213099, + "grad_norm": 0.6952720880508423, + "learning_rate": 4.015003261578604e-05, + "log_odds_chosen": 10.163156509399414, + "log_odds_ratio": -0.0944875180721283, + "logits/chosen": 0.46014150977134705, + "logits/rejected": -0.23878949880599976, + "logps/chosen": -0.8695891499519348, + "logps/rejected": -10.512669563293457, + "loss": 1.0873, + "nll_loss": 0.9365761280059814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08695891499519348, + "rewards/margins": 0.9643080234527588, + "rewards/rejected": -1.0512670278549194, + "step": 307 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.8226056098937988, + "learning_rate": 4.0117416829745595e-05, + "log_odds_chosen": 7.230032920837402, + "log_odds_ratio": -0.1841113567352295, + "logits/chosen": 0.5554065108299255, + "logits/rejected": -0.689578115940094, + "logps/chosen": -0.88451087474823, + "logps/rejected": -7.66387939453125, + "loss": 1.0803, + "nll_loss": 1.01332688331604, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.088451087474823, + "rewards/margins": 0.6779369115829468, + "rewards/rejected": -0.7663879990577698, + "step": 308 + }, + { + "epoch": 0.6041055718475073, + "grad_norm": 0.7932422161102295, + "learning_rate": 4.0084801043705154e-05, + "log_odds_chosen": 4.731194019317627, + "log_odds_ratio": -0.3452082574367523, + "logits/chosen": 0.17039242386817932, + "logits/rejected": -0.16583223640918732, + "logps/chosen": -0.9843783378601074, + "logps/rejected": -5.463403701782227, + "loss": 1.07, + "nll_loss": 1.037376880645752, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0984378308057785, + "rewards/margins": 0.4479025602340698, + "rewards/rejected": -0.5463404059410095, + "step": 309 + }, + { + "epoch": 0.6060606060606061, + "grad_norm": 0.7270837426185608, + "learning_rate": 4.005218525766471e-05, + "log_odds_chosen": 10.361936569213867, + "log_odds_ratio": -0.10116403549909592, + "logits/chosen": 0.25156038999557495, + "logits/rejected": -0.3337465226650238, + "logps/chosen": -0.82117760181427, + "logps/rejected": -10.636209487915039, + "loss": 1.0922, + "nll_loss": 0.9243340492248535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08211776614189148, + "rewards/margins": 0.9815031290054321, + "rewards/rejected": -1.063620924949646, + "step": 310 + }, + { + "epoch": 0.6080156402737048, + "grad_norm": 0.8723970651626587, + "learning_rate": 4.0019569471624266e-05, + "log_odds_chosen": 8.078058242797852, + "log_odds_ratio": -0.17071497440338135, + "logits/chosen": 0.38699233531951904, + "logits/rejected": -0.32072311639785767, + "logps/chosen": -0.8848750591278076, + "logps/rejected": -8.53613567352295, + "loss": 1.0778, + "nll_loss": 0.9410775899887085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08848750591278076, + "rewards/margins": 0.7651259899139404, + "rewards/rejected": -0.8536134958267212, + "step": 311 + }, + { + "epoch": 0.6099706744868035, + "grad_norm": 0.7769557237625122, + "learning_rate": 3.998695368558382e-05, + "log_odds_chosen": 5.422914028167725, + "log_odds_ratio": -0.2640592157840729, + "logits/chosen": 0.1735839545726776, + "logits/rejected": 0.01326579600572586, + "logps/chosen": -0.8096966743469238, + "logps/rejected": -5.808189868927002, + "loss": 1.0868, + "nll_loss": 0.9764860272407532, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0809696614742279, + "rewards/margins": 0.4998493194580078, + "rewards/rejected": -0.5808190107345581, + "step": 312 + }, + { + "epoch": 0.6119257086999023, + "grad_norm": 0.6750428676605225, + "learning_rate": 3.995433789954338e-05, + "log_odds_chosen": 12.730113983154297, + "log_odds_ratio": -0.055161990225315094, + "logits/chosen": 0.05814070999622345, + "logits/rejected": -0.45035886764526367, + "logps/chosen": -0.7340595126152039, + "logps/rejected": -12.839445114135742, + "loss": 1.0724, + "nll_loss": 0.8614511489868164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07340595871210098, + "rewards/margins": 1.210538625717163, + "rewards/rejected": -1.2839446067810059, + "step": 313 + }, + { + "epoch": 0.613880742913001, + "grad_norm": 0.7885208129882812, + "learning_rate": 3.992172211350294e-05, + "log_odds_chosen": 8.373957633972168, + "log_odds_ratio": -0.23639734089374542, + "logits/chosen": 0.31785500049591064, + "logits/rejected": -0.24729840457439423, + "logps/chosen": -0.8974629640579224, + "logps/rejected": -8.869861602783203, + "loss": 1.0856, + "nll_loss": 0.9262354373931885, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08974629640579224, + "rewards/margins": 0.7972398996353149, + "rewards/rejected": -0.8869861960411072, + "step": 314 + }, + { + "epoch": 0.6158357771260997, + "grad_norm": 0.7365303039550781, + "learning_rate": 3.98891063274625e-05, + "log_odds_chosen": 8.574464797973633, + "log_odds_ratio": -0.24707037210464478, + "logits/chosen": 0.41473373770713806, + "logits/rejected": -0.3191220462322235, + "logps/chosen": -0.9568653106689453, + "logps/rejected": -9.14932918548584, + "loss": 1.0728, + "nll_loss": 1.0297964811325073, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09568653255701065, + "rewards/margins": 0.8192465305328369, + "rewards/rejected": -0.9149330258369446, + "step": 315 + }, + { + "epoch": 0.6177908113391984, + "grad_norm": 0.7034230828285217, + "learning_rate": 3.985649054142205e-05, + "log_odds_chosen": 12.835803985595703, + "log_odds_ratio": -0.06799305230379105, + "logits/chosen": 0.6137794852256775, + "logits/rejected": -0.455719530582428, + "logps/chosen": -0.7932102680206299, + "logps/rejected": -12.94871711730957, + "loss": 1.0709, + "nll_loss": 0.9232203960418701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07932102680206299, + "rewards/margins": 1.2155506610870361, + "rewards/rejected": -1.2948716878890991, + "step": 316 + }, + { + "epoch": 0.6197458455522972, + "grad_norm": 0.7686121463775635, + "learning_rate": 3.982387475538161e-05, + "log_odds_chosen": 17.21440315246582, + "log_odds_ratio": -0.09399690479040146, + "logits/chosen": 0.33708620071411133, + "logits/rejected": -0.4784221649169922, + "logps/chosen": -1.0282928943634033, + "logps/rejected": -17.766386032104492, + "loss": 1.0655, + "nll_loss": 1.1351099014282227, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10282929241657257, + "rewards/margins": 1.6738091707229614, + "rewards/rejected": -1.7766385078430176, + "step": 317 + }, + { + "epoch": 0.6217008797653959, + "grad_norm": 0.7012072205543518, + "learning_rate": 3.979125896934116e-05, + "log_odds_chosen": 15.905498504638672, + "log_odds_ratio": -0.12007281184196472, + "logits/chosen": 0.47083336114883423, + "logits/rejected": -0.3959487974643707, + "logps/chosen": -1.0968141555786133, + "logps/rejected": -16.573169708251953, + "loss": 1.0863, + "nll_loss": 1.0323851108551025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10968142747879028, + "rewards/margins": 1.547635555267334, + "rewards/rejected": -1.6573169231414795, + "step": 318 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.7731661200523376, + "learning_rate": 3.975864318330072e-05, + "log_odds_chosen": 10.821012496948242, + "log_odds_ratio": -0.1517227292060852, + "logits/chosen": 0.07868921756744385, + "logits/rejected": -0.5221620798110962, + "logps/chosen": -0.8663922548294067, + "logps/rejected": -11.227571487426758, + "loss": 1.0795, + "nll_loss": 0.970252275466919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08663922548294067, + "rewards/margins": 1.0361180305480957, + "rewards/rejected": -1.1227571964263916, + "step": 319 + }, + { + "epoch": 0.6256109481915934, + "grad_norm": 0.7407070994377136, + "learning_rate": 3.9726027397260274e-05, + "log_odds_chosen": 4.234835147857666, + "log_odds_ratio": -0.1405094712972641, + "logits/chosen": 0.6778172254562378, + "logits/rejected": 0.06807506084442139, + "logps/chosen": -0.9321144819259644, + "logps/rejected": -4.6896467208862305, + "loss": 1.0792, + "nll_loss": 1.1379458904266357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09321145713329315, + "rewards/margins": 0.375753253698349, + "rewards/rejected": -0.46896469593048096, + "step": 320 + }, + { + "epoch": 0.6275659824046921, + "grad_norm": 0.7266780138015747, + "learning_rate": 3.9693411611219834e-05, + "log_odds_chosen": 10.93179702758789, + "log_odds_ratio": -0.08779972046613693, + "logits/chosen": 0.19358541071414948, + "logits/rejected": -0.32651054859161377, + "logps/chosen": -0.9166792035102844, + "logps/rejected": -11.350723266601562, + "loss": 1.0815, + "nll_loss": 1.267322301864624, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09166792035102844, + "rewards/margins": 1.043404459953308, + "rewards/rejected": -1.1350723505020142, + "step": 321 + }, + { + "epoch": 0.6295210166177908, + "grad_norm": 0.7632256150245667, + "learning_rate": 3.9660795825179387e-05, + "log_odds_chosen": 11.016931533813477, + "log_odds_ratio": -0.2577012777328491, + "logits/chosen": 0.3908025324344635, + "logits/rejected": -0.19670870900154114, + "logps/chosen": -0.8040949106216431, + "logps/rejected": -11.341644287109375, + "loss": 1.0567, + "nll_loss": 0.920543909072876, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.08040949702262878, + "rewards/margins": 1.0537549257278442, + "rewards/rejected": -1.1341644525527954, + "step": 322 + }, + { + "epoch": 0.6314760508308895, + "grad_norm": 0.7243574261665344, + "learning_rate": 3.9628180039138946e-05, + "log_odds_chosen": 9.99502182006836, + "log_odds_ratio": -0.12459034472703934, + "logits/chosen": 0.13155673444271088, + "logits/rejected": -0.4217144548892975, + "logps/chosen": -1.0472030639648438, + "logps/rejected": -10.516366004943848, + "loss": 1.0778, + "nll_loss": 1.1131829023361206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10472030937671661, + "rewards/margins": 0.9469163417816162, + "rewards/rejected": -1.0516365766525269, + "step": 323 + }, + { + "epoch": 0.6334310850439883, + "grad_norm": 0.7381526827812195, + "learning_rate": 3.95955642530985e-05, + "log_odds_chosen": 7.909669876098633, + "log_odds_ratio": -0.2541644275188446, + "logits/chosen": 0.1258295774459839, + "logits/rejected": -0.561514139175415, + "logps/chosen": -1.0173382759094238, + "logps/rejected": -8.593768119812012, + "loss": 1.0562, + "nll_loss": 1.1411817073822021, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.10173381865024567, + "rewards/margins": 0.7576429843902588, + "rewards/rejected": -0.8593767881393433, + "step": 324 + }, + { + "epoch": 0.635386119257087, + "grad_norm": 0.8003792762756348, + "learning_rate": 3.956294846705806e-05, + "log_odds_chosen": 5.782504558563232, + "log_odds_ratio": -0.1888050138950348, + "logits/chosen": 0.240608811378479, + "logits/rejected": -0.3871909976005554, + "logps/chosen": -0.872035562992096, + "logps/rejected": -6.202639579772949, + "loss": 1.0366, + "nll_loss": 0.8874043822288513, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08720355480909348, + "rewards/margins": 0.5330604314804077, + "rewards/rejected": -0.620263934135437, + "step": 325 + }, + { + "epoch": 0.6373411534701857, + "grad_norm": 0.6927700638771057, + "learning_rate": 3.953033268101761e-05, + "log_odds_chosen": 9.303312301635742, + "log_odds_ratio": -0.13850325345993042, + "logits/chosen": 0.22262492775917053, + "logits/rejected": -0.6064327955245972, + "logps/chosen": -1.002050757408142, + "logps/rejected": -9.937296867370605, + "loss": 1.0627, + "nll_loss": 1.1262929439544678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10020507872104645, + "rewards/margins": 0.893524706363678, + "rewards/rejected": -0.993729829788208, + "step": 326 + }, + { + "epoch": 0.6392961876832844, + "grad_norm": 0.7787310481071472, + "learning_rate": 3.949771689497717e-05, + "log_odds_chosen": 7.6388840675354, + "log_odds_ratio": -0.13889126479625702, + "logits/chosen": 0.23221158981323242, + "logits/rejected": -0.34398218989372253, + "logps/chosen": -0.7601031064987183, + "logps/rejected": -7.821802616119385, + "loss": 1.0561, + "nll_loss": 0.7691296339035034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0760103166103363, + "rewards/margins": 0.7061699628829956, + "rewards/rejected": -0.7821803092956543, + "step": 327 + }, + { + "epoch": 0.6412512218963832, + "grad_norm": 0.7744753360748291, + "learning_rate": 3.946510110893672e-05, + "log_odds_chosen": 12.500802993774414, + "log_odds_ratio": -0.08407304435968399, + "logits/chosen": 0.678739607334137, + "logits/rejected": -0.29979613423347473, + "logps/chosen": -1.2237313985824585, + "logps/rejected": -13.38144302368164, + "loss": 1.0677, + "nll_loss": 1.2589553594589233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12237313389778137, + "rewards/margins": 1.215771198272705, + "rewards/rejected": -1.3381444215774536, + "step": 328 + }, + { + "epoch": 0.6432062561094819, + "grad_norm": 0.717902421951294, + "learning_rate": 3.943248532289628e-05, + "log_odds_chosen": 8.733448028564453, + "log_odds_ratio": -0.15875515341758728, + "logits/chosen": 0.49599015712738037, + "logits/rejected": -0.17015522718429565, + "logps/chosen": -1.10453462600708, + "logps/rejected": -9.501535415649414, + "loss": 1.0399, + "nll_loss": 1.2026195526123047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11045344918966293, + "rewards/margins": 0.8397000432014465, + "rewards/rejected": -0.9501534700393677, + "step": 329 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.7144838571548462, + "learning_rate": 3.9399869536855835e-05, + "log_odds_chosen": 6.543728828430176, + "log_odds_ratio": -0.22681686282157898, + "logits/chosen": 0.47245052456855774, + "logits/rejected": -0.13368874788284302, + "logps/chosen": -0.9019752144813538, + "logps/rejected": -7.093357086181641, + "loss": 1.0755, + "nll_loss": 0.9365350008010864, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09019751846790314, + "rewards/margins": 0.619138240814209, + "rewards/rejected": -0.7093356847763062, + "step": 330 + }, + { + "epoch": 0.6471163245356794, + "grad_norm": 0.7785714864730835, + "learning_rate": 3.9367253750815395e-05, + "log_odds_chosen": 6.965949058532715, + "log_odds_ratio": -0.20529989898204803, + "logits/chosen": 0.5823709964752197, + "logits/rejected": -0.1545196771621704, + "logps/chosen": -1.1105486154556274, + "logps/rejected": -7.766041278839111, + "loss": 1.0335, + "nll_loss": 1.1586744785308838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11105486005544662, + "rewards/margins": 0.6655492782592773, + "rewards/rejected": -0.776604175567627, + "step": 331 + }, + { + "epoch": 0.6490713587487781, + "grad_norm": 0.6896499395370483, + "learning_rate": 3.933463796477495e-05, + "log_odds_chosen": 13.441879272460938, + "log_odds_ratio": -0.058730751276016235, + "logits/chosen": 0.13887718319892883, + "logits/rejected": -0.3765685558319092, + "logps/chosen": -0.8157219886779785, + "logps/rejected": -13.661527633666992, + "loss": 1.0596, + "nll_loss": 0.8660918474197388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08157219737768173, + "rewards/margins": 1.2845804691314697, + "rewards/rejected": -1.3661527633666992, + "step": 332 + }, + { + "epoch": 0.6510263929618768, + "grad_norm": 0.748981237411499, + "learning_rate": 3.930202217873451e-05, + "log_odds_chosen": 6.3784589767456055, + "log_odds_ratio": -0.19948123395442963, + "logits/chosen": 0.1781880110502243, + "logits/rejected": -0.1886785328388214, + "logps/chosen": -0.8999835252761841, + "logps/rejected": -6.889002323150635, + "loss": 1.0604, + "nll_loss": 0.8814743757247925, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08999834954738617, + "rewards/margins": 0.5989018678665161, + "rewards/rejected": -0.6889001727104187, + "step": 333 + }, + { + "epoch": 0.6529814271749755, + "grad_norm": 0.6799917817115784, + "learning_rate": 3.9269406392694066e-05, + "log_odds_chosen": 15.255110740661621, + "log_odds_ratio": -0.10017555207014084, + "logits/chosen": 0.09061459451913834, + "logits/rejected": -0.4881461262702942, + "logps/chosen": -0.8869200944900513, + "logps/rejected": -15.665924072265625, + "loss": 1.0184, + "nll_loss": 1.0154869556427002, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08869201689958572, + "rewards/margins": 1.4779003858566284, + "rewards/rejected": -1.5665923357009888, + "step": 334 + }, + { + "epoch": 0.6549364613880743, + "grad_norm": 0.7516436576843262, + "learning_rate": 3.9236790606653626e-05, + "log_odds_chosen": 11.601221084594727, + "log_odds_ratio": -0.16160064935684204, + "logits/chosen": 0.3190007209777832, + "logits/rejected": -0.2581862807273865, + "logps/chosen": -1.077967643737793, + "logps/rejected": -12.30025577545166, + "loss": 1.0555, + "nll_loss": 1.162546157836914, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10779675841331482, + "rewards/margins": 1.1222288608551025, + "rewards/rejected": -1.2300255298614502, + "step": 335 + }, + { + "epoch": 0.656891495601173, + "grad_norm": 0.7321993708610535, + "learning_rate": 3.920417482061318e-05, + "log_odds_chosen": 12.694446563720703, + "log_odds_ratio": -0.125194251537323, + "logits/chosen": 0.2762424051761627, + "logits/rejected": -0.763642430305481, + "logps/chosen": -0.8949229717254639, + "logps/rejected": -13.035565376281738, + "loss": 1.0606, + "nll_loss": 1.0746886730194092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08949229121208191, + "rewards/margins": 1.214064359664917, + "rewards/rejected": -1.3035566806793213, + "step": 336 + }, + { + "epoch": 0.6588465298142717, + "grad_norm": 0.7147474884986877, + "learning_rate": 3.917155903457274e-05, + "log_odds_chosen": 11.74331283569336, + "log_odds_ratio": -0.060027409344911575, + "logits/chosen": 0.2531675398349762, + "logits/rejected": -0.7064575552940369, + "logps/chosen": -1.1645171642303467, + "logps/rejected": -12.554037094116211, + "loss": 1.0424, + "nll_loss": 1.1856651306152344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11645171791315079, + "rewards/margins": 1.1389520168304443, + "rewards/rejected": -1.255403757095337, + "step": 337 + }, + { + "epoch": 0.6608015640273704, + "grad_norm": 0.6948788166046143, + "learning_rate": 3.913894324853229e-05, + "log_odds_chosen": 8.368000030517578, + "log_odds_ratio": -0.2165239155292511, + "logits/chosen": 0.32574349641799927, + "logits/rejected": -0.09269766509532928, + "logps/chosen": -0.8508930206298828, + "logps/rejected": -8.803024291992188, + "loss": 1.0347, + "nll_loss": 0.958396315574646, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.085089311003685, + "rewards/margins": 0.7952131032943726, + "rewards/rejected": -0.8803024291992188, + "step": 338 + }, + { + "epoch": 0.6627565982404692, + "grad_norm": 0.7339446544647217, + "learning_rate": 3.910632746249185e-05, + "log_odds_chosen": 18.221195220947266, + "log_odds_ratio": -0.14651599526405334, + "logits/chosen": 0.4897107481956482, + "logits/rejected": 0.09629464894533157, + "logps/chosen": -0.9759562015533447, + "logps/rejected": -18.756235122680664, + "loss": 1.04, + "nll_loss": 1.1120108366012573, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09759561717510223, + "rewards/margins": 1.7780280113220215, + "rewards/rejected": -1.8756234645843506, + "step": 339 + }, + { + "epoch": 0.6647116324535679, + "grad_norm": 0.6701048016548157, + "learning_rate": 3.90737116764514e-05, + "log_odds_chosen": 4.644395351409912, + "log_odds_ratio": -0.17388154566287994, + "logits/chosen": -0.0977468341588974, + "logits/rejected": -0.2073955535888672, + "logps/chosen": -0.815130352973938, + "logps/rejected": -4.963609218597412, + "loss": 1.0396, + "nll_loss": 0.8992843627929688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08151303976774216, + "rewards/margins": 0.4148479104042053, + "rewards/rejected": -0.4963609576225281, + "step": 340 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.6887212991714478, + "learning_rate": 3.904109589041096e-05, + "log_odds_chosen": 10.678484916687012, + "log_odds_ratio": -0.05018872395157814, + "logits/chosen": -0.1201663389801979, + "logits/rejected": -0.5024425387382507, + "logps/chosen": -0.8753475546836853, + "logps/rejected": -11.021692276000977, + "loss": 1.0432, + "nll_loss": 0.9933708906173706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08753475546836853, + "rewards/margins": 1.0146344900131226, + "rewards/rejected": -1.102169156074524, + "step": 341 + }, + { + "epoch": 0.6686217008797654, + "grad_norm": 0.6745936870574951, + "learning_rate": 3.9008480104370515e-05, + "log_odds_chosen": 8.617015838623047, + "log_odds_ratio": -0.2717978358268738, + "logits/chosen": 0.26834067702293396, + "logits/rejected": -0.45241889357566833, + "logps/chosen": -0.9437215328216553, + "logps/rejected": -9.170695304870605, + "loss": 1.0415, + "nll_loss": 0.9688966870307922, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09437215328216553, + "rewards/margins": 0.8226973414421082, + "rewards/rejected": -0.9170694947242737, + "step": 342 + }, + { + "epoch": 0.6705767350928641, + "grad_norm": 0.6959183216094971, + "learning_rate": 3.8975864318330075e-05, + "log_odds_chosen": 5.81561803817749, + "log_odds_ratio": -0.266409307718277, + "logits/chosen": 0.33520522713661194, + "logits/rejected": -0.3628959655761719, + "logps/chosen": -0.7865683436393738, + "logps/rejected": -6.10355281829834, + "loss": 1.0585, + "nll_loss": 1.0611634254455566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07865683734416962, + "rewards/margins": 0.53169846534729, + "rewards/rejected": -0.6103552579879761, + "step": 343 + }, + { + "epoch": 0.6725317693059628, + "grad_norm": 0.7316209077835083, + "learning_rate": 3.894324853228963e-05, + "log_odds_chosen": 9.970869064331055, + "log_odds_ratio": -0.08135147392749786, + "logits/chosen": 0.07419527322053909, + "logits/rejected": -0.5581535696983337, + "logps/chosen": -0.9069391489028931, + "logps/rejected": -10.351160049438477, + "loss": 1.0302, + "nll_loss": 1.1042723655700684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09069392085075378, + "rewards/margins": 0.9444221258163452, + "rewards/rejected": -1.0351159572601318, + "step": 344 + }, + { + "epoch": 0.6744868035190615, + "grad_norm": 0.6625117659568787, + "learning_rate": 3.891063274624919e-05, + "log_odds_chosen": 12.793309211730957, + "log_odds_ratio": -0.14852942526340485, + "logits/chosen": 0.08311387151479721, + "logits/rejected": -0.3381527066230774, + "logps/chosen": -0.908025324344635, + "logps/rejected": -13.20089054107666, + "loss": 1.0303, + "nll_loss": 0.9933123588562012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09080253541469574, + "rewards/margins": 1.2292864322662354, + "rewards/rejected": -1.3200891017913818, + "step": 345 + }, + { + "epoch": 0.6764418377321603, + "grad_norm": 0.7676037549972534, + "learning_rate": 3.887801696020874e-05, + "log_odds_chosen": 10.327384948730469, + "log_odds_ratio": -0.13479046523571014, + "logits/chosen": 0.23591884970664978, + "logits/rejected": -0.4031290113925934, + "logps/chosen": -0.8475979566574097, + "logps/rejected": -10.663883209228516, + "loss": 1.0518, + "nll_loss": 1.015711784362793, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08475980162620544, + "rewards/margins": 0.9816286563873291, + "rewards/rejected": -1.0663883686065674, + "step": 346 + }, + { + "epoch": 0.678396871945259, + "grad_norm": 0.7725668549537659, + "learning_rate": 3.88454011741683e-05, + "log_odds_chosen": 9.089563369750977, + "log_odds_ratio": -0.21110276877880096, + "logits/chosen": 0.2855651378631592, + "logits/rejected": -0.44043099880218506, + "logps/chosen": -0.944257378578186, + "logps/rejected": -9.521669387817383, + "loss": 1.0306, + "nll_loss": 0.849261999130249, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0944257378578186, + "rewards/margins": 0.8577412366867065, + "rewards/rejected": -0.9521669149398804, + "step": 347 + }, + { + "epoch": 0.6803519061583577, + "grad_norm": 0.7096676230430603, + "learning_rate": 3.881278538812785e-05, + "log_odds_chosen": 11.44461441040039, + "log_odds_ratio": -0.09115945547819138, + "logits/chosen": 0.09488950669765472, + "logits/rejected": -0.48650944232940674, + "logps/chosen": -0.9295996427536011, + "logps/rejected": -11.836697578430176, + "loss": 1.0356, + "nll_loss": 1.1095199584960938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09295996278524399, + "rewards/margins": 1.0907098054885864, + "rewards/rejected": -1.1836698055267334, + "step": 348 + }, + { + "epoch": 0.6823069403714564, + "grad_norm": 0.766991913318634, + "learning_rate": 3.878016960208741e-05, + "log_odds_chosen": 11.501043319702148, + "log_odds_ratio": -0.17507722973823547, + "logits/chosen": 0.1667974293231964, + "logits/rejected": -0.27350759506225586, + "logps/chosen": -1.0163013935089111, + "logps/rejected": -12.096433639526367, + "loss": 1.0205, + "nll_loss": 1.1349672079086304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10163013637065887, + "rewards/margins": 1.1080131530761719, + "rewards/rejected": -1.2096433639526367, + "step": 349 + }, + { + "epoch": 0.6842619745845552, + "grad_norm": 0.8496200442314148, + "learning_rate": 3.8747553816046964e-05, + "log_odds_chosen": 11.644378662109375, + "log_odds_ratio": -0.17683683335781097, + "logits/chosen": 0.06550633907318115, + "logits/rejected": -0.6864033937454224, + "logps/chosen": -0.9214028120040894, + "logps/rejected": -12.071671485900879, + "loss": 1.0431, + "nll_loss": 1.1473430395126343, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09214027225971222, + "rewards/margins": 1.1150269508361816, + "rewards/rejected": -1.207167148590088, + "step": 350 + }, + { + "epoch": 0.6862170087976539, + "grad_norm": 0.7468739151954651, + "learning_rate": 3.871493803000652e-05, + "log_odds_chosen": 7.945107460021973, + "log_odds_ratio": -0.1591452956199646, + "logits/chosen": 0.18449145555496216, + "logits/rejected": -0.28225627541542053, + "logps/chosen": -0.8886194825172424, + "logps/rejected": -8.387579917907715, + "loss": 1.034, + "nll_loss": 0.994304358959198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08886194974184036, + "rewards/margins": 0.7498960494995117, + "rewards/rejected": -0.8387580513954163, + "step": 351 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.805486798286438, + "learning_rate": 3.8682322243966076e-05, + "log_odds_chosen": 12.043450355529785, + "log_odds_ratio": -0.1347082406282425, + "logits/chosen": 0.35523688793182373, + "logits/rejected": -0.8511843681335449, + "logps/chosen": -0.7686980962753296, + "logps/rejected": -12.260720252990723, + "loss": 1.0257, + "nll_loss": 0.8864758014678955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07686981558799744, + "rewards/margins": 1.1492022275924683, + "rewards/rejected": -1.2260719537734985, + "step": 352 + }, + { + "epoch": 0.6901270772238515, + "grad_norm": 0.6882284283638, + "learning_rate": 3.8649706457925635e-05, + "log_odds_chosen": 7.849414825439453, + "log_odds_ratio": -0.128037691116333, + "logits/chosen": 0.15065065026283264, + "logits/rejected": -0.36872148513793945, + "logps/chosen": -0.8545006513595581, + "logps/rejected": -8.150678634643555, + "loss": 1.0183, + "nll_loss": 1.0311806201934814, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08545006811618805, + "rewards/margins": 0.729617714881897, + "rewards/rejected": -0.8150678277015686, + "step": 353 + }, + { + "epoch": 0.6920821114369502, + "grad_norm": 0.8190439939498901, + "learning_rate": 3.8617090671885195e-05, + "log_odds_chosen": 15.342952728271484, + "log_odds_ratio": -0.025760412216186523, + "logits/chosen": 0.1952979564666748, + "logits/rejected": -0.8877828121185303, + "logps/chosen": -1.0986453294754028, + "logps/rejected": -16.03041648864746, + "loss": 1.045, + "nll_loss": 1.131406545639038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10986454784870148, + "rewards/margins": 1.4931771755218506, + "rewards/rejected": -1.603041648864746, + "step": 354 + }, + { + "epoch": 0.6940371456500489, + "grad_norm": 0.7974002361297607, + "learning_rate": 3.8584474885844754e-05, + "log_odds_chosen": 14.3634614944458, + "log_odds_ratio": -0.11456994712352753, + "logits/chosen": 0.25168362259864807, + "logits/rejected": -0.8306794166564941, + "logps/chosen": -0.8732409477233887, + "logps/rejected": -14.741388320922852, + "loss": 1.0228, + "nll_loss": 1.0142360925674438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0873240977525711, + "rewards/margins": 1.386814832687378, + "rewards/rejected": -1.4741389751434326, + "step": 355 + }, + { + "epoch": 0.6959921798631477, + "grad_norm": 0.6946945190429688, + "learning_rate": 3.855185909980431e-05, + "log_odds_chosen": 5.131886959075928, + "log_odds_ratio": -0.19223366677761078, + "logits/chosen": 0.054076291620731354, + "logits/rejected": -0.5773534178733826, + "logps/chosen": -0.8671945929527283, + "logps/rejected": -5.585619926452637, + "loss": 1.0018, + "nll_loss": 1.0617656707763672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08671946823596954, + "rewards/margins": 0.47184255719184875, + "rewards/rejected": -0.5585620403289795, + "step": 356 + }, + { + "epoch": 0.6979472140762464, + "grad_norm": 0.7711277604103088, + "learning_rate": 3.8519243313763866e-05, + "log_odds_chosen": 9.13167953491211, + "log_odds_ratio": -0.19184571504592896, + "logits/chosen": 0.057080090045928955, + "logits/rejected": -0.9647346138954163, + "logps/chosen": -0.8964655995368958, + "logps/rejected": -9.584610939025879, + "loss": 1.0223, + "nll_loss": 1.0033024549484253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08964655548334122, + "rewards/margins": 0.8688145279884338, + "rewards/rejected": -0.9584610462188721, + "step": 357 + }, + { + "epoch": 0.6999022482893451, + "grad_norm": 0.7761881947517395, + "learning_rate": 3.848662752772342e-05, + "log_odds_chosen": 11.574317932128906, + "log_odds_ratio": -0.2194259762763977, + "logits/chosen": 0.51539146900177, + "logits/rejected": -0.37585365772247314, + "logps/chosen": -0.8689804077148438, + "logps/rejected": -12.059110641479492, + "loss": 0.9994, + "nll_loss": 0.976043164730072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08689804375171661, + "rewards/margins": 1.1190131902694702, + "rewards/rejected": -1.2059111595153809, + "step": 358 + }, + { + "epoch": 0.7018572825024438, + "grad_norm": 0.7076807618141174, + "learning_rate": 3.845401174168298e-05, + "log_odds_chosen": 10.502439498901367, + "log_odds_ratio": -0.19250565767288208, + "logits/chosen": 0.3410624861717224, + "logits/rejected": -0.6308845281600952, + "logps/chosen": -0.8223678469657898, + "logps/rejected": -10.868289947509766, + "loss": 1.0278, + "nll_loss": 0.8545849919319153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08223678171634674, + "rewards/margins": 1.0045922994613647, + "rewards/rejected": -1.0868290662765503, + "step": 359 + }, + { + "epoch": 0.7038123167155426, + "grad_norm": 0.7671579122543335, + "learning_rate": 3.842139595564253e-05, + "log_odds_chosen": 11.65248966217041, + "log_odds_ratio": -0.18191000819206238, + "logits/chosen": 0.20354531705379486, + "logits/rejected": -0.4328272342681885, + "logps/chosen": -0.9795019030570984, + "logps/rejected": -12.240646362304688, + "loss": 0.9901, + "nll_loss": 1.1117463111877441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09795019775629044, + "rewards/margins": 1.1261144876480103, + "rewards/rejected": -1.224064588546753, + "step": 360 + }, + { + "epoch": 0.7057673509286413, + "grad_norm": 0.7423350811004639, + "learning_rate": 3.838878016960209e-05, + "log_odds_chosen": 17.463226318359375, + "log_odds_ratio": -0.05390213802456856, + "logits/chosen": 0.33416157960891724, + "logits/rejected": -0.48588812351226807, + "logps/chosen": -0.9454622864723206, + "logps/rejected": -17.945091247558594, + "loss": 1.0078, + "nll_loss": 1.0092724561691284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09454622864723206, + "rewards/margins": 1.6999627351760864, + "rewards/rejected": -1.7945090532302856, + "step": 361 + }, + { + "epoch": 0.70772238514174, + "grad_norm": 0.701998770236969, + "learning_rate": 3.8356164383561644e-05, + "log_odds_chosen": 8.451641082763672, + "log_odds_ratio": -0.16865402460098267, + "logits/chosen": 0.3216341733932495, + "logits/rejected": -0.5299345254898071, + "logps/chosen": -0.9088441133499146, + "logps/rejected": -8.929816246032715, + "loss": 1.0051, + "nll_loss": 0.9684736728668213, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09088440984487534, + "rewards/margins": 0.8020973205566406, + "rewards/rejected": -0.8929816484451294, + "step": 362 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.7387005686759949, + "learning_rate": 3.83235485975212e-05, + "log_odds_chosen": 11.070967674255371, + "log_odds_ratio": -0.08538828790187836, + "logits/chosen": -0.29611748456954956, + "logits/rejected": -0.332248330116272, + "logps/chosen": -0.8491048812866211, + "logps/rejected": -11.340608596801758, + "loss": 1.0001, + "nll_loss": 1.0384745597839355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08491048961877823, + "rewards/margins": 1.0491503477096558, + "rewards/rejected": -1.1340608596801758, + "step": 363 + }, + { + "epoch": 0.7116324535679375, + "grad_norm": 0.6946907639503479, + "learning_rate": 3.8290932811480756e-05, + "log_odds_chosen": 8.444194793701172, + "log_odds_ratio": -0.10476413369178772, + "logits/chosen": 0.24226176738739014, + "logits/rejected": -0.6046956777572632, + "logps/chosen": -0.9894341230392456, + "logps/rejected": -8.971856117248535, + "loss": 0.9791, + "nll_loss": 1.1748840808868408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09894341230392456, + "rewards/margins": 0.7982422113418579, + "rewards/rejected": -0.8971856832504272, + "step": 364 + }, + { + "epoch": 0.7135874877810362, + "grad_norm": 0.790361762046814, + "learning_rate": 3.8258317025440315e-05, + "log_odds_chosen": 8.572097778320312, + "log_odds_ratio": -0.04738086089491844, + "logits/chosen": -0.03046603500843048, + "logits/rejected": -0.6051952242851257, + "logps/chosen": -0.6269464492797852, + "logps/rejected": -8.419326782226562, + "loss": 0.9977, + "nll_loss": 0.7672558426856995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06269465386867523, + "rewards/margins": 0.7792381048202515, + "rewards/rejected": -0.8419326543807983, + "step": 365 + }, + { + "epoch": 0.7155425219941349, + "grad_norm": 0.7280831933021545, + "learning_rate": 3.822570123939987e-05, + "log_odds_chosen": 4.218668460845947, + "log_odds_ratio": -0.2686604857444763, + "logits/chosen": 0.04562797397375107, + "logits/rejected": -0.39909446239471436, + "logps/chosen": -0.8781400918960571, + "logps/rejected": -4.7203192710876465, + "loss": 1.0075, + "nll_loss": 1.0075616836547852, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08781401067972183, + "rewards/margins": 0.3842179477214813, + "rewards/rejected": -0.47203195095062256, + "step": 366 + }, + { + "epoch": 0.7174975562072337, + "grad_norm": 0.7305868268013, + "learning_rate": 3.819308545335943e-05, + "log_odds_chosen": 11.391950607299805, + "log_odds_ratio": -0.1607973724603653, + "logits/chosen": 0.44365817308425903, + "logits/rejected": -1.1335105895996094, + "logps/chosen": -0.7879927158355713, + "logps/rejected": -11.656452178955078, + "loss": 0.9967, + "nll_loss": 0.9892336130142212, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07879926264286041, + "rewards/margins": 1.0868459939956665, + "rewards/rejected": -1.1656452417373657, + "step": 367 + }, + { + "epoch": 0.7194525904203324, + "grad_norm": 0.7596710324287415, + "learning_rate": 3.816046966731898e-05, + "log_odds_chosen": 9.231220245361328, + "log_odds_ratio": -0.13329756259918213, + "logits/chosen": 0.3684564232826233, + "logits/rejected": -0.8130645155906677, + "logps/chosen": -0.9306896328926086, + "logps/rejected": -9.711837768554688, + "loss": 0.9888, + "nll_loss": 0.9679763317108154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09306895732879639, + "rewards/margins": 0.8781147599220276, + "rewards/rejected": -0.9711837768554688, + "step": 368 + }, + { + "epoch": 0.7214076246334311, + "grad_norm": 0.7181580662727356, + "learning_rate": 3.812785388127854e-05, + "log_odds_chosen": 8.044187545776367, + "log_odds_ratio": -0.3013090491294861, + "logits/chosen": 0.1439676135778427, + "logits/rejected": -0.4118828773498535, + "logps/chosen": -0.9366845488548279, + "logps/rejected": -8.610621452331543, + "loss": 1.0087, + "nll_loss": 1.0378894805908203, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09366845339536667, + "rewards/margins": 0.7673937082290649, + "rewards/rejected": -0.8610621690750122, + "step": 369 + }, + { + "epoch": 0.7233626588465298, + "grad_norm": 0.7404477000236511, + "learning_rate": 3.809523809523809e-05, + "log_odds_chosen": 11.04481315612793, + "log_odds_ratio": -0.0827026292681694, + "logits/chosen": -0.07292544841766357, + "logits/rejected": -0.8677270412445068, + "logps/chosen": -0.9372554421424866, + "logps/rejected": -11.504232406616211, + "loss": 1.0011, + "nll_loss": 1.0897555351257324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0937255471944809, + "rewards/margins": 1.0566977262496948, + "rewards/rejected": -1.150423288345337, + "step": 370 + }, + { + "epoch": 0.7253176930596286, + "grad_norm": 0.7061487436294556, + "learning_rate": 3.806262230919765e-05, + "log_odds_chosen": 11.273612976074219, + "log_odds_ratio": -0.09757309406995773, + "logits/chosen": 0.38232332468032837, + "logits/rejected": -0.22247633337974548, + "logps/chosen": -0.8605624437332153, + "logps/rejected": -11.599395751953125, + "loss": 0.9812, + "nll_loss": 1.0010488033294678, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08605623245239258, + "rewards/margins": 1.073883295059204, + "rewards/rejected": -1.1599395275115967, + "step": 371 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.6867861747741699, + "learning_rate": 3.803000652315721e-05, + "log_odds_chosen": 11.958574295043945, + "log_odds_ratio": -0.09586191922426224, + "logits/chosen": 0.0601084902882576, + "logits/rejected": -0.6321113109588623, + "logps/chosen": -0.7997138500213623, + "logps/rejected": -12.187679290771484, + "loss": 0.9915, + "nll_loss": 0.9236552119255066, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07997138798236847, + "rewards/margins": 1.1387965679168701, + "rewards/rejected": -1.2187680006027222, + "step": 372 + }, + { + "epoch": 0.729227761485826, + "grad_norm": 0.7022337317466736, + "learning_rate": 3.7997390737116764e-05, + "log_odds_chosen": 10.221096992492676, + "log_odds_ratio": -0.09761947393417358, + "logits/chosen": 0.1916242390871048, + "logits/rejected": -0.49622631072998047, + "logps/chosen": -0.9327988624572754, + "logps/rejected": -10.620943069458008, + "loss": 1.0055, + "nll_loss": 1.2033979892730713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0932798981666565, + "rewards/margins": 0.9688143134117126, + "rewards/rejected": -1.0620942115783691, + "step": 373 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.7623032331466675, + "learning_rate": 3.796477495107632e-05, + "log_odds_chosen": 12.798507690429688, + "log_odds_ratio": -0.08354681730270386, + "logits/chosen": 0.1493166834115982, + "logits/rejected": -0.4794970452785492, + "logps/chosen": -0.8585419654846191, + "logps/rejected": -13.083864212036133, + "loss": 0.9754, + "nll_loss": 1.1187913417816162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08585420250892639, + "rewards/margins": 1.2225322723388672, + "rewards/rejected": -1.3083864450454712, + "step": 374 + }, + { + "epoch": 0.7331378299120235, + "grad_norm": 0.7296212911605835, + "learning_rate": 3.793215916503588e-05, + "log_odds_chosen": 14.629247665405273, + "log_odds_ratio": -0.08428078889846802, + "logits/chosen": 0.09346985816955566, + "logits/rejected": -1.4061946868896484, + "logps/chosen": -0.916100025177002, + "logps/rejected": -15.009824752807617, + "loss": 0.9859, + "nll_loss": 0.938214898109436, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09160999953746796, + "rewards/margins": 1.409372329711914, + "rewards/rejected": -1.500982403755188, + "step": 375 + }, + { + "epoch": 0.7350928641251222, + "grad_norm": 0.7655154466629028, + "learning_rate": 3.7899543378995436e-05, + "log_odds_chosen": 14.498310089111328, + "log_odds_ratio": -0.035350970923900604, + "logits/chosen": 0.06340785324573517, + "logits/rejected": -1.2351852655410767, + "logps/chosen": -0.8242172002792358, + "logps/rejected": -14.748828887939453, + "loss": 0.9826, + "nll_loss": 0.8994652032852173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08242172747850418, + "rewards/margins": 1.3924612998962402, + "rewards/rejected": -1.4748828411102295, + "step": 376 + }, + { + "epoch": 0.7370478983382209, + "grad_norm": 0.7490354776382446, + "learning_rate": 3.7866927592954995e-05, + "log_odds_chosen": 8.29649543762207, + "log_odds_ratio": -0.10904494673013687, + "logits/chosen": -0.15713316202163696, + "logits/rejected": -0.6823784708976746, + "logps/chosen": -0.7694954872131348, + "logps/rejected": -8.508760452270508, + "loss": 0.9834, + "nll_loss": 0.908592700958252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07694955170154572, + "rewards/margins": 0.7739264965057373, + "rewards/rejected": -0.8508760333061218, + "step": 377 + }, + { + "epoch": 0.7390029325513197, + "grad_norm": 0.7220534682273865, + "learning_rate": 3.783431180691455e-05, + "log_odds_chosen": 9.238248825073242, + "log_odds_ratio": -0.07950688153505325, + "logits/chosen": 0.11624934524297714, + "logits/rejected": 0.08427603542804718, + "logps/chosen": -0.6899752020835876, + "logps/rejected": -9.275847434997559, + "loss": 0.981, + "nll_loss": 0.7919901013374329, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06899751722812653, + "rewards/margins": 0.8585872650146484, + "rewards/rejected": -0.9275847673416138, + "step": 378 + }, + { + "epoch": 0.7409579667644184, + "grad_norm": 0.7809976935386658, + "learning_rate": 3.780169602087411e-05, + "log_odds_chosen": 9.082656860351562, + "log_odds_ratio": -0.20125941932201385, + "logits/chosen": 0.26850372552871704, + "logits/rejected": -0.5089722275733948, + "logps/chosen": -0.8387951850891113, + "logps/rejected": -9.386519432067871, + "loss": 0.9798, + "nll_loss": 1.0118404626846313, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0838795155286789, + "rewards/margins": 0.8547724485397339, + "rewards/rejected": -0.9386520385742188, + "step": 379 + }, + { + "epoch": 0.7429130009775171, + "grad_norm": 0.682223379611969, + "learning_rate": 3.776908023483366e-05, + "log_odds_chosen": 11.968207359313965, + "log_odds_ratio": -0.13575102388858795, + "logits/chosen": -0.006240922957658768, + "logits/rejected": -0.6960878968238831, + "logps/chosen": -0.8067638874053955, + "logps/rejected": -12.236161231994629, + "loss": 0.9793, + "nll_loss": 0.902459442615509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08067639172077179, + "rewards/margins": 1.142939805984497, + "rewards/rejected": -1.223616123199463, + "step": 380 + }, + { + "epoch": 0.7448680351906158, + "grad_norm": 0.824830949306488, + "learning_rate": 3.773646444879322e-05, + "log_odds_chosen": 8.080909729003906, + "log_odds_ratio": -0.08029701560735703, + "logits/chosen": -0.097439706325531, + "logits/rejected": -0.63784259557724, + "logps/chosen": -0.8125027418136597, + "logps/rejected": -8.342215538024902, + "loss": 0.9874, + "nll_loss": 0.9208087921142578, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08125027269124985, + "rewards/margins": 0.7529712915420532, + "rewards/rejected": -0.834221601486206, + "step": 381 + }, + { + "epoch": 0.7468230694037146, + "grad_norm": 0.7192775011062622, + "learning_rate": 3.770384866275277e-05, + "log_odds_chosen": 12.81875228881836, + "log_odds_ratio": -0.057437360286712646, + "logits/chosen": 0.012379590421915054, + "logits/rejected": -0.8704553842544556, + "logps/chosen": -0.9294430017471313, + "logps/rejected": -13.268436431884766, + "loss": 1.004, + "nll_loss": 0.9943996667861938, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09294430911540985, + "rewards/margins": 1.2338993549346924, + "rewards/rejected": -1.3268437385559082, + "step": 382 + }, + { + "epoch": 0.7487781036168133, + "grad_norm": 0.7293890118598938, + "learning_rate": 3.767123287671233e-05, + "log_odds_chosen": 8.946050643920898, + "log_odds_ratio": -0.055775947868824005, + "logits/chosen": 0.3932822346687317, + "logits/rejected": -0.7310247421264648, + "logps/chosen": -0.8668081760406494, + "logps/rejected": -9.239944458007812, + "loss": 0.9935, + "nll_loss": 1.0638165473937988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0866808146238327, + "rewards/margins": 0.8373136520385742, + "rewards/rejected": -0.9239944815635681, + "step": 383 + }, + { + "epoch": 0.750733137829912, + "grad_norm": 0.7680302262306213, + "learning_rate": 3.7638617090671884e-05, + "log_odds_chosen": 9.918699264526367, + "log_odds_ratio": -0.1319858729839325, + "logits/chosen": 0.07628665119409561, + "logits/rejected": -0.693605899810791, + "logps/chosen": -0.8481377363204956, + "logps/rejected": -10.27682876586914, + "loss": 0.9857, + "nll_loss": 0.9667797088623047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08481377363204956, + "rewards/margins": 0.9428690075874329, + "rewards/rejected": -1.0276827812194824, + "step": 384 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.6940392851829529, + "learning_rate": 3.7606001304631444e-05, + "log_odds_chosen": 9.281539916992188, + "log_odds_ratio": -0.20543111860752106, + "logits/chosen": 0.2313881516456604, + "logits/rejected": -0.3630730211734772, + "logps/chosen": -0.7262087464332581, + "logps/rejected": -9.507791519165039, + "loss": 0.9942, + "nll_loss": 0.8527743816375732, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07262088358402252, + "rewards/margins": 0.8781583309173584, + "rewards/rejected": -0.9507791996002197, + "step": 385 + }, + { + "epoch": 0.7546432062561095, + "grad_norm": 0.7254694700241089, + "learning_rate": 3.7573385518590996e-05, + "log_odds_chosen": 10.07780647277832, + "log_odds_ratio": -0.16741147637367249, + "logits/chosen": -0.14313778281211853, + "logits/rejected": -0.27957794070243835, + "logps/chosen": -1.0049073696136475, + "logps/rejected": -10.685223579406738, + "loss": 0.9775, + "nll_loss": 1.2079637050628662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10049073398113251, + "rewards/margins": 0.968031644821167, + "rewards/rejected": -1.068522334098816, + "step": 386 + }, + { + "epoch": 0.7565982404692082, + "grad_norm": 0.713530421257019, + "learning_rate": 3.7540769732550556e-05, + "log_odds_chosen": 11.938545227050781, + "log_odds_ratio": -0.137315034866333, + "logits/chosen": -0.004934739321470261, + "logits/rejected": -0.8259605765342712, + "logps/chosen": -0.9552385807037354, + "logps/rejected": -12.482855796813965, + "loss": 0.985, + "nll_loss": 1.003657579421997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09552385658025742, + "rewards/margins": 1.1527615785598755, + "rewards/rejected": -1.2482855319976807, + "step": 387 + }, + { + "epoch": 0.7585532746823069, + "grad_norm": 0.7128574252128601, + "learning_rate": 3.750815394651011e-05, + "log_odds_chosen": 4.576414108276367, + "log_odds_ratio": -0.30138903856277466, + "logits/chosen": 0.41276049613952637, + "logits/rejected": -0.29098159074783325, + "logps/chosen": -0.9424372911453247, + "logps/rejected": -5.16379451751709, + "loss": 0.9688, + "nll_loss": 1.044988751411438, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.09424373507499695, + "rewards/margins": 0.42213571071624756, + "rewards/rejected": -0.5163794755935669, + "step": 388 + }, + { + "epoch": 0.7605083088954057, + "grad_norm": 0.7075881958007812, + "learning_rate": 3.747553816046967e-05, + "log_odds_chosen": 9.23231029510498, + "log_odds_ratio": -0.23022279143333435, + "logits/chosen": 0.12064582109451294, + "logits/rejected": -0.31286561489105225, + "logps/chosen": -0.8681063652038574, + "logps/rejected": -9.65234661102295, + "loss": 0.9696, + "nll_loss": 1.026764154434204, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0868106335401535, + "rewards/margins": 0.8784240484237671, + "rewards/rejected": -0.965234637260437, + "step": 389 + }, + { + "epoch": 0.7624633431085044, + "grad_norm": 0.6999031901359558, + "learning_rate": 3.744292237442922e-05, + "log_odds_chosen": 9.95547866821289, + "log_odds_ratio": -0.13065746426582336, + "logits/chosen": 0.14888551831245422, + "logits/rejected": -0.35529324412345886, + "logps/chosen": -0.9348762035369873, + "logps/rejected": -10.394761085510254, + "loss": 0.9708, + "nll_loss": 0.9877551794052124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09348763525485992, + "rewards/margins": 0.9459885358810425, + "rewards/rejected": -1.0394761562347412, + "step": 390 + }, + { + "epoch": 0.7644183773216031, + "grad_norm": 0.732151210308075, + "learning_rate": 3.741030658838878e-05, + "log_odds_chosen": 9.936786651611328, + "log_odds_ratio": -0.057395752519369125, + "logits/chosen": -0.29100465774536133, + "logits/rejected": -0.7752015590667725, + "logps/chosen": -0.7721365094184875, + "logps/rejected": -10.123495101928711, + "loss": 0.969, + "nll_loss": 0.8190904855728149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07721364498138428, + "rewards/margins": 0.9351359009742737, + "rewards/rejected": -1.0123496055603027, + "step": 391 + }, + { + "epoch": 0.7663734115347018, + "grad_norm": 0.7296123504638672, + "learning_rate": 3.737769080234834e-05, + "log_odds_chosen": 9.311635971069336, + "log_odds_ratio": -0.17455336451530457, + "logits/chosen": -0.06040741503238678, + "logits/rejected": -0.5958414673805237, + "logps/chosen": -0.6681124567985535, + "logps/rejected": -9.385723114013672, + "loss": 0.97, + "nll_loss": 0.8069876432418823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06681124866008759, + "rewards/margins": 0.8717610836029053, + "rewards/rejected": -0.9385722875595093, + "step": 392 + }, + { + "epoch": 0.7683284457478006, + "grad_norm": 0.7181831002235413, + "learning_rate": 3.734507501630789e-05, + "log_odds_chosen": 17.37663459777832, + "log_odds_ratio": -0.10680252313613892, + "logits/chosen": -0.03178231790661812, + "logits/rejected": -0.5430458784103394, + "logps/chosen": -0.916388750076294, + "logps/rejected": -17.802888870239258, + "loss": 0.9852, + "nll_loss": 1.032230257987976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09163887053728104, + "rewards/margins": 1.6886500120162964, + "rewards/rejected": -1.780288815498352, + "step": 393 + }, + { + "epoch": 0.7702834799608993, + "grad_norm": 0.7128579020500183, + "learning_rate": 3.731245923026745e-05, + "log_odds_chosen": 13.96545124053955, + "log_odds_ratio": -0.07324190437793732, + "logits/chosen": 0.3262014389038086, + "logits/rejected": -0.9833789467811584, + "logps/chosen": -0.9703899621963501, + "logps/rejected": -14.482536315917969, + "loss": 0.9424, + "nll_loss": 1.0242919921875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09703899919986725, + "rewards/margins": 1.3512146472930908, + "rewards/rejected": -1.4482536315917969, + "step": 394 + }, + { + "epoch": 0.772238514173998, + "grad_norm": 0.6800099015235901, + "learning_rate": 3.727984344422701e-05, + "log_odds_chosen": 9.781783103942871, + "log_odds_ratio": -0.0949823409318924, + "logits/chosen": 0.10972454398870468, + "logits/rejected": -0.6611499786376953, + "logps/chosen": -0.8334658741950989, + "logps/rejected": -10.046257019042969, + "loss": 0.9702, + "nll_loss": 0.9965488910675049, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08334659039974213, + "rewards/margins": 0.9212791919708252, + "rewards/rejected": -1.0046257972717285, + "step": 395 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.728235125541687, + "learning_rate": 3.7247227658186564e-05, + "log_odds_chosen": 8.489664077758789, + "log_odds_ratio": -0.018632439896464348, + "logits/chosen": -0.046620212495326996, + "logits/rejected": -1.0682244300842285, + "logps/chosen": -0.8535736203193665, + "logps/rejected": -8.783258438110352, + "loss": 0.96, + "nll_loss": 0.960117518901825, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08535736054182053, + "rewards/margins": 0.7929684519767761, + "rewards/rejected": -0.8783258199691772, + "step": 396 + }, + { + "epoch": 0.7761485826001955, + "grad_norm": 0.6935809850692749, + "learning_rate": 3.7214611872146123e-05, + "log_odds_chosen": 11.199606895446777, + "log_odds_ratio": -0.14401519298553467, + "logits/chosen": -0.09235890209674835, + "logits/rejected": -0.5257691740989685, + "logps/chosen": -0.7293604016304016, + "logps/rejected": -11.206130981445312, + "loss": 0.9464, + "nll_loss": 0.85493004322052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0729360431432724, + "rewards/margins": 1.0476771593093872, + "rewards/rejected": -1.1206130981445312, + "step": 397 + }, + { + "epoch": 0.7781036168132942, + "grad_norm": 0.7179955244064331, + "learning_rate": 3.7181996086105676e-05, + "log_odds_chosen": 13.58524227142334, + "log_odds_ratio": -0.12053030729293823, + "logits/chosen": 0.1389959305524826, + "logits/rejected": -0.5279268026351929, + "logps/chosen": -0.8968260288238525, + "logps/rejected": -13.928459167480469, + "loss": 0.9609, + "nll_loss": 1.0347819328308105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08968260884284973, + "rewards/margins": 1.3031632900238037, + "rewards/rejected": -1.392845869064331, + "step": 398 + }, + { + "epoch": 0.7800586510263929, + "grad_norm": 0.7031475901603699, + "learning_rate": 3.7149380300065236e-05, + "log_odds_chosen": 4.417687892913818, + "log_odds_ratio": -0.2656514048576355, + "logits/chosen": 0.17618048191070557, + "logits/rejected": -0.024746179580688477, + "logps/chosen": -0.8936065435409546, + "logps/rejected": -4.931901454925537, + "loss": 0.9688, + "nll_loss": 1.040271282196045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08936066180467606, + "rewards/margins": 0.4038294851779938, + "rewards/rejected": -0.49319013953208923, + "step": 399 + }, + { + "epoch": 0.7820136852394917, + "grad_norm": 0.7219538688659668, + "learning_rate": 3.711676451402479e-05, + "log_odds_chosen": 11.651009559631348, + "log_odds_ratio": -0.24780671298503876, + "logits/chosen": 0.33698537945747375, + "logits/rejected": -0.29026147723197937, + "logps/chosen": -0.7608500719070435, + "logps/rejected": -11.86223030090332, + "loss": 0.9545, + "nll_loss": 0.9708322286605835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07608500123023987, + "rewards/margins": 1.110137939453125, + "rewards/rejected": -1.1862229108810425, + "step": 400 + }, + { + "epoch": 0.7839687194525904, + "grad_norm": 0.6956151723861694, + "learning_rate": 3.708414872798435e-05, + "log_odds_chosen": 5.330575942993164, + "log_odds_ratio": -0.1776837408542633, + "logits/chosen": -0.12100641429424286, + "logits/rejected": -0.2606464624404907, + "logps/chosen": -0.5721072554588318, + "logps/rejected": -5.194371223449707, + "loss": 0.9244, + "nll_loss": 0.7497704029083252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05721072107553482, + "rewards/margins": 0.46222633123397827, + "rewards/rejected": -0.5194370746612549, + "step": 401 + }, + { + "epoch": 0.7859237536656891, + "grad_norm": 0.7119638919830322, + "learning_rate": 3.70515329419439e-05, + "log_odds_chosen": 4.908454418182373, + "log_odds_ratio": -0.2338227927684784, + "logits/chosen": -0.20322871208190918, + "logits/rejected": -0.8796697854995728, + "logps/chosen": -0.7103285789489746, + "logps/rejected": -5.069460868835449, + "loss": 0.9521, + "nll_loss": 0.8730916976928711, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07103285938501358, + "rewards/margins": 0.43591323494911194, + "rewards/rejected": -0.5069460868835449, + "step": 402 + }, + { + "epoch": 0.7878787878787878, + "grad_norm": 0.7108336687088013, + "learning_rate": 3.701891715590346e-05, + "log_odds_chosen": 9.179397583007812, + "log_odds_ratio": -0.222529798746109, + "logits/chosen": 0.06143427640199661, + "logits/rejected": -0.8053635358810425, + "logps/chosen": -0.8298342227935791, + "logps/rejected": -9.607382774353027, + "loss": 0.9355, + "nll_loss": 0.8745653629302979, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08298342674970627, + "rewards/margins": 0.877754807472229, + "rewards/rejected": -0.9607383012771606, + "step": 403 + }, + { + "epoch": 0.7898338220918866, + "grad_norm": 0.6808122396469116, + "learning_rate": 3.698630136986301e-05, + "log_odds_chosen": 12.320064544677734, + "log_odds_ratio": -0.1123555600643158, + "logits/chosen": -0.06645575910806656, + "logits/rejected": -0.4812483489513397, + "logps/chosen": -0.8151518106460571, + "logps/rejected": -12.53872299194336, + "loss": 0.9547, + "nll_loss": 0.9204083681106567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08151518553495407, + "rewards/margins": 1.172357201576233, + "rewards/rejected": -1.253872275352478, + "step": 404 + }, + { + "epoch": 0.7917888563049853, + "grad_norm": 0.7029263973236084, + "learning_rate": 3.695368558382257e-05, + "log_odds_chosen": 9.115050315856934, + "log_odds_ratio": -0.19123002886772156, + "logits/chosen": -0.1276378035545349, + "logits/rejected": -0.7267742156982422, + "logps/chosen": -0.815380334854126, + "logps/rejected": -9.396652221679688, + "loss": 0.9413, + "nll_loss": 0.8235123157501221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08153803646564484, + "rewards/margins": 0.8581271171569824, + "rewards/rejected": -0.9396651983261108, + "step": 405 + }, + { + "epoch": 0.793743890518084, + "grad_norm": 0.7324498295783997, + "learning_rate": 3.6921069797782125e-05, + "log_odds_chosen": 9.922319412231445, + "log_odds_ratio": -0.07928106188774109, + "logits/chosen": -0.11836598813533783, + "logits/rejected": -0.6962651610374451, + "logps/chosen": -0.7085702419281006, + "logps/rejected": -9.896364212036133, + "loss": 0.9574, + "nll_loss": 0.8026823997497559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07085703313350677, + "rewards/margins": 0.9187793731689453, + "rewards/rejected": -0.9896364808082581, + "step": 406 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.7040823101997375, + "learning_rate": 3.6888454011741684e-05, + "log_odds_chosen": 9.717578887939453, + "log_odds_ratio": -0.24910318851470947, + "logits/chosen": -0.18292179703712463, + "logits/rejected": -0.6108570694923401, + "logps/chosen": -0.8522086143493652, + "logps/rejected": -10.086240768432617, + "loss": 0.9561, + "nll_loss": 1.0434678792953491, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08522085845470428, + "rewards/margins": 0.923403263092041, + "rewards/rejected": -1.0086241960525513, + "step": 407 + }, + { + "epoch": 0.7976539589442815, + "grad_norm": 0.7133339643478394, + "learning_rate": 3.685583822570124e-05, + "log_odds_chosen": 6.212283611297607, + "log_odds_ratio": -0.14624260365962982, + "logits/chosen": -0.18388886749744415, + "logits/rejected": -0.6520110964775085, + "logps/chosen": -0.6623320579528809, + "logps/rejected": -6.205554008483887, + "loss": 0.9416, + "nll_loss": 0.7706358432769775, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06623321026563644, + "rewards/margins": 0.5543222427368164, + "rewards/rejected": -0.6205554008483887, + "step": 408 + }, + { + "epoch": 0.7996089931573802, + "grad_norm": 0.7315388321876526, + "learning_rate": 3.6823222439660797e-05, + "log_odds_chosen": 19.032791137695312, + "log_odds_ratio": -0.0576394684612751, + "logits/chosen": 0.14042295515537262, + "logits/rejected": -0.6530076265335083, + "logps/chosen": -0.9308563470840454, + "logps/rejected": -19.477354049682617, + "loss": 0.9372, + "nll_loss": 1.0244742631912231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0930856317281723, + "rewards/margins": 1.8546497821807861, + "rewards/rejected": -1.94773530960083, + "step": 409 + }, + { + "epoch": 0.8015640273704789, + "grad_norm": 0.7157960534095764, + "learning_rate": 3.679060665362035e-05, + "log_odds_chosen": 10.15210247039795, + "log_odds_ratio": -0.10466822236776352, + "logits/chosen": -0.28133392333984375, + "logits/rejected": -0.9403045177459717, + "logps/chosen": -0.6314886212348938, + "logps/rejected": -10.11031436920166, + "loss": 0.943, + "nll_loss": 0.7043455243110657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0631488710641861, + "rewards/margins": 0.9478824734687805, + "rewards/rejected": -1.0110313892364502, + "step": 410 + }, + { + "epoch": 0.8035190615835777, + "grad_norm": 0.7512738108634949, + "learning_rate": 3.675799086757991e-05, + "log_odds_chosen": 7.917848587036133, + "log_odds_ratio": -0.18805450201034546, + "logits/chosen": 0.3859931230545044, + "logits/rejected": -0.0800468921661377, + "logps/chosen": -0.68839430809021, + "logps/rejected": -8.015522003173828, + "loss": 0.9282, + "nll_loss": 0.8064141273498535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06883943825960159, + "rewards/margins": 0.7327128052711487, + "rewards/rejected": -0.8015521764755249, + "step": 411 + }, + { + "epoch": 0.8054740957966764, + "grad_norm": 0.6800876259803772, + "learning_rate": 3.672537508153947e-05, + "log_odds_chosen": 16.494403839111328, + "log_odds_ratio": -0.1085062175989151, + "logits/chosen": 0.003081154078245163, + "logits/rejected": -0.6027607321739197, + "logps/chosen": -0.7384375929832458, + "logps/rejected": -16.62291717529297, + "loss": 0.9288, + "nll_loss": 0.9021713733673096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07384376227855682, + "rewards/margins": 1.5884478092193604, + "rewards/rejected": -1.6622915267944336, + "step": 412 + }, + { + "epoch": 0.8074291300097751, + "grad_norm": 0.7698684930801392, + "learning_rate": 3.669275929549902e-05, + "log_odds_chosen": 9.68372917175293, + "log_odds_ratio": -0.23854684829711914, + "logits/chosen": -0.3084426522254944, + "logits/rejected": -0.47476568818092346, + "logps/chosen": -0.7006561160087585, + "logps/rejected": -9.866061210632324, + "loss": 0.9336, + "nll_loss": 0.7540038824081421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07006561756134033, + "rewards/margins": 0.9165405035018921, + "rewards/rejected": -0.9866061210632324, + "step": 413 + }, + { + "epoch": 0.8093841642228738, + "grad_norm": 0.698698878288269, + "learning_rate": 3.666014350945858e-05, + "log_odds_chosen": 9.869476318359375, + "log_odds_ratio": -0.22986558079719543, + "logits/chosen": -0.06835287064313889, + "logits/rejected": -0.8372170925140381, + "logps/chosen": -0.8229342699050903, + "logps/rejected": -10.173078536987305, + "loss": 0.9223, + "nll_loss": 0.9506775736808777, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08229342103004456, + "rewards/margins": 0.9350144863128662, + "rewards/rejected": -1.0173078775405884, + "step": 414 + }, + { + "epoch": 0.8113391984359726, + "grad_norm": 0.7197661995887756, + "learning_rate": 3.662752772341814e-05, + "log_odds_chosen": 16.289806365966797, + "log_odds_ratio": -0.09915497899055481, + "logits/chosen": 0.04806363582611084, + "logits/rejected": -0.6158959865570068, + "logps/chosen": -0.8461745381355286, + "logps/rejected": -16.6120548248291, + "loss": 0.9256, + "nll_loss": 0.9372244477272034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08461745083332062, + "rewards/margins": 1.5765880346298218, + "rewards/rejected": -1.661205530166626, + "step": 415 + }, + { + "epoch": 0.8132942326490714, + "grad_norm": 0.695586085319519, + "learning_rate": 3.659491193737769e-05, + "log_odds_chosen": 10.186290740966797, + "log_odds_ratio": -0.10521069914102554, + "logits/chosen": 0.28012603521347046, + "logits/rejected": -0.16905447840690613, + "logps/chosen": -0.7868204116821289, + "logps/rejected": -10.293625831604004, + "loss": 0.9171, + "nll_loss": 0.9973369240760803, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07868204265832901, + "rewards/margins": 0.9506806135177612, + "rewards/rejected": -1.029362678527832, + "step": 416 + }, + { + "epoch": 0.8152492668621701, + "grad_norm": 0.7755530476570129, + "learning_rate": 3.656229615133725e-05, + "log_odds_chosen": 13.115825653076172, + "log_odds_ratio": -0.22699470818042755, + "logits/chosen": -0.17421719431877136, + "logits/rejected": -0.7151620388031006, + "logps/chosen": -0.8124500513076782, + "logps/rejected": -13.49860954284668, + "loss": 0.9441, + "nll_loss": 0.9922413229942322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08124500513076782, + "rewards/margins": 1.268615961074829, + "rewards/rejected": -1.3498609066009521, + "step": 417 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.7182744741439819, + "learning_rate": 3.6529680365296805e-05, + "log_odds_chosen": 17.721294403076172, + "log_odds_ratio": -0.13585937023162842, + "logits/chosen": -0.09724123030900955, + "logits/rejected": -0.8768575191497803, + "logps/chosen": -0.7608200311660767, + "logps/rejected": -17.921138763427734, + "loss": 0.929, + "nll_loss": 0.8440706729888916, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.0760820060968399, + "rewards/margins": 1.7160320281982422, + "rewards/rejected": -1.7921139001846313, + "step": 418 + }, + { + "epoch": 0.8191593352883676, + "grad_norm": 0.7082046866416931, + "learning_rate": 3.6497064579256364e-05, + "log_odds_chosen": 11.10428237915039, + "log_odds_ratio": -0.08491268008947372, + "logits/chosen": -0.4519732594490051, + "logits/rejected": -0.7711098194122314, + "logps/chosen": -0.6765220165252686, + "logps/rejected": -11.113286018371582, + "loss": 0.934, + "nll_loss": 0.8559308052062988, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06765220314264297, + "rewards/margins": 1.043676495552063, + "rewards/rejected": -1.1113286018371582, + "step": 419 + }, + { + "epoch": 0.8211143695014663, + "grad_norm": 0.7318930625915527, + "learning_rate": 3.646444879321592e-05, + "log_odds_chosen": 12.927820205688477, + "log_odds_ratio": -0.08697669953107834, + "logits/chosen": -0.19322456419467926, + "logits/rejected": -1.0678845643997192, + "logps/chosen": -0.8941976428031921, + "logps/rejected": -13.260482788085938, + "loss": 0.9313, + "nll_loss": 1.008849859237671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08941976726055145, + "rewards/margins": 1.236628532409668, + "rewards/rejected": -1.3260483741760254, + "step": 420 + }, + { + "epoch": 0.823069403714565, + "grad_norm": 0.7095892429351807, + "learning_rate": 3.6431833007175476e-05, + "log_odds_chosen": 10.07003116607666, + "log_odds_ratio": -0.1181793287396431, + "logits/chosen": 0.13283099234104156, + "logits/rejected": -0.7405471205711365, + "logps/chosen": -0.6782287955284119, + "logps/rejected": -9.947266578674316, + "loss": 0.9267, + "nll_loss": 0.8230759501457214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0678228810429573, + "rewards/margins": 0.9269038438796997, + "rewards/rejected": -0.9947267770767212, + "step": 421 + }, + { + "epoch": 0.8250244379276638, + "grad_norm": 0.7635107040405273, + "learning_rate": 3.639921722113503e-05, + "log_odds_chosen": 9.861254692077637, + "log_odds_ratio": -0.10810279846191406, + "logits/chosen": 0.013675197958946228, + "logits/rejected": -0.9678953289985657, + "logps/chosen": -0.6092125177383423, + "logps/rejected": -9.7800874710083, + "loss": 0.9205, + "nll_loss": 0.7524574995040894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06092125177383423, + "rewards/margins": 0.9170875549316406, + "rewards/rejected": -0.9780088663101196, + "step": 422 + }, + { + "epoch": 0.8269794721407625, + "grad_norm": 0.7182417511940002, + "learning_rate": 3.636660143509459e-05, + "log_odds_chosen": 8.329140663146973, + "log_odds_ratio": -0.1808062046766281, + "logits/chosen": -0.09774290025234222, + "logits/rejected": -0.5379586815834045, + "logps/chosen": -0.8195507526397705, + "logps/rejected": -8.696789741516113, + "loss": 0.9377, + "nll_loss": 0.8576422929763794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08195507526397705, + "rewards/margins": 0.7877238988876343, + "rewards/rejected": -0.8696789145469666, + "step": 423 + }, + { + "epoch": 0.8289345063538612, + "grad_norm": 0.7239990830421448, + "learning_rate": 3.633398564905414e-05, + "log_odds_chosen": 8.702159881591797, + "log_odds_ratio": -0.1765444278717041, + "logits/chosen": -0.1564980447292328, + "logits/rejected": -1.0123512744903564, + "logps/chosen": -1.0421487092971802, + "logps/rejected": -9.344560623168945, + "loss": 0.895, + "nll_loss": 1.2173563241958618, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.1042148768901825, + "rewards/margins": 0.8302412033081055, + "rewards/rejected": -0.9344559907913208, + "step": 424 + }, + { + "epoch": 0.83088954056696, + "grad_norm": 0.7354182004928589, + "learning_rate": 3.63013698630137e-05, + "log_odds_chosen": 11.746320724487305, + "log_odds_ratio": -0.03195307403802872, + "logits/chosen": -0.19711638987064362, + "logits/rejected": -1.1518752574920654, + "logps/chosen": -0.6855113506317139, + "logps/rejected": -11.648176193237305, + "loss": 0.9137, + "nll_loss": 0.8236199617385864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06855113059282303, + "rewards/margins": 1.096266508102417, + "rewards/rejected": -1.1648175716400146, + "step": 425 + }, + { + "epoch": 0.8328445747800587, + "grad_norm": 0.7247713804244995, + "learning_rate": 3.626875407697325e-05, + "log_odds_chosen": 15.207576751708984, + "log_odds_ratio": -0.06784433126449585, + "logits/chosen": -0.2950440049171448, + "logits/rejected": -0.8685745000839233, + "logps/chosen": -0.6130086183547974, + "logps/rejected": -15.020705223083496, + "loss": 0.9394, + "nll_loss": 0.802249550819397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.061300866305828094, + "rewards/margins": 1.4407696723937988, + "rewards/rejected": -1.502070426940918, + "step": 426 + }, + { + "epoch": 0.8347996089931574, + "grad_norm": 0.7359989285469055, + "learning_rate": 3.623613829093281e-05, + "log_odds_chosen": 6.02622127532959, + "log_odds_ratio": -0.24611103534698486, + "logits/chosen": -0.227216899394989, + "logits/rejected": -0.7890604138374329, + "logps/chosen": -0.8232349157333374, + "logps/rejected": -6.429255485534668, + "loss": 0.9136, + "nll_loss": 1.0194889307022095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08232349157333374, + "rewards/margins": 0.5606020092964172, + "rewards/rejected": -0.642925500869751, + "step": 427 + }, + { + "epoch": 0.8367546432062561, + "grad_norm": 0.7218251824378967, + "learning_rate": 3.6203522504892366e-05, + "log_odds_chosen": 13.325040817260742, + "log_odds_ratio": -0.15034101903438568, + "logits/chosen": -0.20973829925060272, + "logits/rejected": -0.9050062894821167, + "logps/chosen": -0.8274120688438416, + "logps/rejected": -13.675254821777344, + "loss": 0.9114, + "nll_loss": 0.8920317888259888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08274120837450027, + "rewards/margins": 1.2847843170166016, + "rewards/rejected": -1.367525577545166, + "step": 428 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.7044532299041748, + "learning_rate": 3.6170906718851925e-05, + "log_odds_chosen": 9.438350677490234, + "log_odds_ratio": -0.19210179150104523, + "logits/chosen": -0.15168677270412445, + "logits/rejected": -0.8686566352844238, + "logps/chosen": -0.7721205949783325, + "logps/rejected": -9.608070373535156, + "loss": 0.9039, + "nll_loss": 0.8910237550735474, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07721206545829773, + "rewards/margins": 0.8835949897766113, + "rewards/rejected": -0.9608070850372314, + "step": 429 + }, + { + "epoch": 0.8406647116324536, + "grad_norm": 0.7609062194824219, + "learning_rate": 3.613829093281148e-05, + "log_odds_chosen": 12.620323181152344, + "log_odds_ratio": -0.09096536785364151, + "logits/chosen": -0.054793234914541245, + "logits/rejected": -0.9757675528526306, + "logps/chosen": -0.7772599458694458, + "logps/rejected": -12.736908912658691, + "loss": 0.9155, + "nll_loss": 0.8603876829147339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07772599160671234, + "rewards/margins": 1.1959648132324219, + "rewards/rejected": -1.2736908197402954, + "step": 430 + }, + { + "epoch": 0.8426197458455523, + "grad_norm": 0.7192644476890564, + "learning_rate": 3.610567514677104e-05, + "log_odds_chosen": 13.913612365722656, + "log_odds_ratio": -0.12782810628414154, + "logits/chosen": -0.04727506637573242, + "logits/rejected": -0.5795392394065857, + "logps/chosen": -0.7953932881355286, + "logps/rejected": -14.056380271911621, + "loss": 0.9312, + "nll_loss": 0.9965416193008423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07953932881355286, + "rewards/margins": 1.3260986804962158, + "rewards/rejected": -1.4056380987167358, + "step": 431 + }, + { + "epoch": 0.844574780058651, + "grad_norm": 0.788785457611084, + "learning_rate": 3.60730593607306e-05, + "log_odds_chosen": 14.194786071777344, + "log_odds_ratio": -0.09528584033250809, + "logits/chosen": -0.0019254740327596664, + "logits/rejected": -0.7975226640701294, + "logps/chosen": -0.8374001979827881, + "logps/rejected": -14.527616500854492, + "loss": 0.903, + "nll_loss": 0.9108747839927673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08374002575874329, + "rewards/margins": 1.3690217733383179, + "rewards/rejected": -1.4527617692947388, + "step": 432 + }, + { + "epoch": 0.8465298142717498, + "grad_norm": 0.7149589657783508, + "learning_rate": 3.604044357469015e-05, + "log_odds_chosen": 10.568818092346191, + "log_odds_ratio": -0.28777337074279785, + "logits/chosen": -0.08111928403377533, + "logits/rejected": -0.8126868009567261, + "logps/chosen": -0.8283179998397827, + "logps/rejected": -10.946288108825684, + "loss": 0.8881, + "nll_loss": 0.9585270881652832, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.08283179998397827, + "rewards/margins": 1.0117969512939453, + "rewards/rejected": -1.0946288108825684, + "step": 433 + }, + { + "epoch": 0.8484848484848485, + "grad_norm": 0.7605778574943542, + "learning_rate": 3.600782778864971e-05, + "log_odds_chosen": 18.206186294555664, + "log_odds_ratio": -0.00022330955835059285, + "logits/chosen": -0.2844318449497223, + "logits/rejected": -1.3889963626861572, + "logps/chosen": -0.6617085933685303, + "logps/rejected": -18.097139358520508, + "loss": 0.8903, + "nll_loss": 0.7545496225357056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06617085635662079, + "rewards/margins": 1.7435431480407715, + "rewards/rejected": -1.8097138404846191, + "step": 434 + }, + { + "epoch": 0.8504398826979472, + "grad_norm": 0.758668839931488, + "learning_rate": 3.597521200260927e-05, + "log_odds_chosen": 9.829566955566406, + "log_odds_ratio": -0.07518977671861649, + "logits/chosen": -0.4791215658187866, + "logits/rejected": -1.066717505455017, + "logps/chosen": -0.769946813583374, + "logps/rejected": -10.022579193115234, + "loss": 0.9085, + "nll_loss": 0.9086700081825256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07699467986822128, + "rewards/margins": 0.9252631664276123, + "rewards/rejected": -1.0022578239440918, + "step": 435 + }, + { + "epoch": 0.852394916911046, + "grad_norm": 0.7165745496749878, + "learning_rate": 3.594259621656882e-05, + "log_odds_chosen": 9.801006317138672, + "log_odds_ratio": -0.06888943165540695, + "logits/chosen": -0.42896008491516113, + "logits/rejected": -1.0979437828063965, + "logps/chosen": -0.7325782775878906, + "logps/rejected": -9.923664093017578, + "loss": 0.9098, + "nll_loss": 0.8418450355529785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07325783371925354, + "rewards/margins": 0.9191085696220398, + "rewards/rejected": -0.9923664331436157, + "step": 436 + }, + { + "epoch": 0.8543499511241447, + "grad_norm": 0.7132607102394104, + "learning_rate": 3.590998043052838e-05, + "log_odds_chosen": 9.248269081115723, + "log_odds_ratio": -0.1548292189836502, + "logits/chosen": -0.17941075563430786, + "logits/rejected": -0.9208800792694092, + "logps/chosen": -0.8080654740333557, + "logps/rejected": -9.462888717651367, + "loss": 0.9041, + "nll_loss": 0.9293208718299866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08080655336380005, + "rewards/margins": 0.8654823899269104, + "rewards/rejected": -0.9462888836860657, + "step": 437 + }, + { + "epoch": 0.8563049853372434, + "grad_norm": 0.7064453959465027, + "learning_rate": 3.587736464448793e-05, + "log_odds_chosen": 10.356266021728516, + "log_odds_ratio": -0.04626818001270294, + "logits/chosen": -0.16261473298072815, + "logits/rejected": -1.113995909690857, + "logps/chosen": -0.6466849446296692, + "logps/rejected": -10.295011520385742, + "loss": 0.8784, + "nll_loss": 0.8092962503433228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06466849893331528, + "rewards/margins": 0.9648327827453613, + "rewards/rejected": -1.02950119972229, + "step": 438 + }, + { + "epoch": 0.8582600195503421, + "grad_norm": 0.7169517278671265, + "learning_rate": 3.584474885844749e-05, + "log_odds_chosen": 7.7720794677734375, + "log_odds_ratio": -0.1227082908153534, + "logits/chosen": -0.19039122760295868, + "logits/rejected": -0.7135578989982605, + "logps/chosen": -0.7527152299880981, + "logps/rejected": -7.833100318908691, + "loss": 0.8997, + "nll_loss": 0.8195376992225647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07527153193950653, + "rewards/margins": 0.7080384492874146, + "rewards/rejected": -0.7833099961280823, + "step": 439 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.688936710357666, + "learning_rate": 3.5812133072407045e-05, + "log_odds_chosen": 15.049463272094727, + "log_odds_ratio": -0.0022743656300008297, + "logits/chosen": -0.31437572836875916, + "logits/rejected": -1.334895372390747, + "logps/chosen": -0.7085176110267639, + "logps/rejected": -15.04987621307373, + "loss": 0.89, + "nll_loss": 0.8246854543685913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07085175812244415, + "rewards/margins": 1.434135913848877, + "rewards/rejected": -1.5049875974655151, + "step": 440 + }, + { + "epoch": 0.8621700879765396, + "grad_norm": 0.7397514581680298, + "learning_rate": 3.5779517286366605e-05, + "log_odds_chosen": 12.135644912719727, + "log_odds_ratio": -0.14318615198135376, + "logits/chosen": 0.03256801515817642, + "logits/rejected": -0.8212164044380188, + "logps/chosen": -0.6814695596694946, + "logps/rejected": -12.245549201965332, + "loss": 0.8964, + "nll_loss": 0.8109290599822998, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0681469589471817, + "rewards/margins": 1.1564080715179443, + "rewards/rejected": -1.2245550155639648, + "step": 441 + }, + { + "epoch": 0.8641251221896383, + "grad_norm": 0.6647982597351074, + "learning_rate": 3.574690150032616e-05, + "log_odds_chosen": 17.647132873535156, + "log_odds_ratio": -0.043836385011672974, + "logits/chosen": -0.22171002626419067, + "logits/rejected": -1.320967435836792, + "logps/chosen": -0.8832035660743713, + "logps/rejected": -17.930465698242188, + "loss": 0.8708, + "nll_loss": 0.964152455329895, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08832035958766937, + "rewards/margins": 1.7047263383865356, + "rewards/rejected": -1.7930467128753662, + "step": 442 + }, + { + "epoch": 0.8660801564027371, + "grad_norm": 0.7141931056976318, + "learning_rate": 3.571428571428572e-05, + "log_odds_chosen": 15.101737976074219, + "log_odds_ratio": -0.10681568831205368, + "logits/chosen": -0.08886892348527908, + "logits/rejected": -0.9091113209724426, + "logps/chosen": -0.7546743750572205, + "logps/rejected": -15.266040802001953, + "loss": 0.9013, + "nll_loss": 0.866584062576294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07546743750572205, + "rewards/margins": 1.4511367082595825, + "rewards/rejected": -1.5266042947769165, + "step": 443 + }, + { + "epoch": 0.8680351906158358, + "grad_norm": 0.6997552514076233, + "learning_rate": 3.568166992824527e-05, + "log_odds_chosen": 18.63430404663086, + "log_odds_ratio": -0.05064854770898819, + "logits/chosen": -0.06190396845340729, + "logits/rejected": -0.439775675535202, + "logps/chosen": -0.8205047845840454, + "logps/rejected": -18.87588882446289, + "loss": 0.8942, + "nll_loss": 0.9922086596488953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08205047994852066, + "rewards/margins": 1.8055384159088135, + "rewards/rejected": -1.8875887393951416, + "step": 444 + }, + { + "epoch": 0.8699902248289345, + "grad_norm": 0.7164373993873596, + "learning_rate": 3.564905414220483e-05, + "log_odds_chosen": 9.682291984558105, + "log_odds_ratio": -0.15525877475738525, + "logits/chosen": -0.17284902930259705, + "logits/rejected": -0.9023337364196777, + "logps/chosen": -0.7416632175445557, + "logps/rejected": -9.887492179870605, + "loss": 0.8903, + "nll_loss": 0.8808637261390686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07416632026433945, + "rewards/margins": 0.9145828485488892, + "rewards/rejected": -0.9887491464614868, + "step": 445 + }, + { + "epoch": 0.8719452590420332, + "grad_norm": 0.7395002841949463, + "learning_rate": 3.561643835616438e-05, + "log_odds_chosen": 14.76528549194336, + "log_odds_ratio": -0.08107682317495346, + "logits/chosen": -0.6067786812782288, + "logits/rejected": -0.8276552557945251, + "logps/chosen": -0.6389215588569641, + "logps/rejected": -14.655365943908691, + "loss": 0.8868, + "nll_loss": 0.7857397198677063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06389215588569641, + "rewards/margins": 1.4016444683074951, + "rewards/rejected": -1.4655365943908691, + "step": 446 + }, + { + "epoch": 0.873900293255132, + "grad_norm": 0.7005184292793274, + "learning_rate": 3.558382257012394e-05, + "log_odds_chosen": 15.515069961547852, + "log_odds_ratio": -0.0451698936522007, + "logits/chosen": -0.017736099660396576, + "logits/rejected": -1.3567578792572021, + "logps/chosen": -0.6064046621322632, + "logps/rejected": -15.287075996398926, + "loss": 0.8979, + "nll_loss": 0.7556054592132568, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06064046174287796, + "rewards/margins": 1.4680671691894531, + "rewards/rejected": -1.52870774269104, + "step": 447 + }, + { + "epoch": 0.8758553274682307, + "grad_norm": 0.7428403496742249, + "learning_rate": 3.5551206784083494e-05, + "log_odds_chosen": 6.886754989624023, + "log_odds_ratio": -0.1382514089345932, + "logits/chosen": -0.03035527467727661, + "logits/rejected": -0.8675273060798645, + "logps/chosen": -0.7930521965026855, + "logps/rejected": -7.157093048095703, + "loss": 0.8621, + "nll_loss": 0.8689541816711426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07930521667003632, + "rewards/margins": 0.6364040970802307, + "rewards/rejected": -0.7157093286514282, + "step": 448 + }, + { + "epoch": 0.8778103616813294, + "grad_norm": 0.6989544034004211, + "learning_rate": 3.5518590998043053e-05, + "log_odds_chosen": 8.24744987487793, + "log_odds_ratio": -0.15700191259384155, + "logits/chosen": -0.30489492416381836, + "logits/rejected": -1.2104082107543945, + "logps/chosen": -0.6994487643241882, + "logps/rejected": -8.382566452026367, + "loss": 0.8799, + "nll_loss": 0.839654266834259, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.06994487345218658, + "rewards/margins": 0.7683118581771851, + "rewards/rejected": -0.8382567167282104, + "step": 449 + }, + { + "epoch": 0.8797653958944281, + "grad_norm": 0.7120003700256348, + "learning_rate": 3.5485975212002606e-05, + "log_odds_chosen": 8.48165512084961, + "log_odds_ratio": -0.2182646542787552, + "logits/chosen": -0.5328896045684814, + "logits/rejected": -0.7594773769378662, + "logps/chosen": -0.7250353693962097, + "logps/rejected": -8.744487762451172, + "loss": 0.8712, + "nll_loss": 0.8096299171447754, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07250353693962097, + "rewards/margins": 0.801945149898529, + "rewards/rejected": -0.8744487762451172, + "step": 450 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.7625570893287659, + "learning_rate": 3.5453359425962166e-05, + "log_odds_chosen": 9.66550064086914, + "log_odds_ratio": -0.06940103322267532, + "logits/chosen": -0.48477956652641296, + "logits/rejected": -1.2708193063735962, + "logps/chosen": -0.8146083354949951, + "logps/rejected": -9.935094833374023, + "loss": 0.8938, + "nll_loss": 0.9338250160217285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08146083354949951, + "rewards/margins": 0.9120486378669739, + "rewards/rejected": -0.9935095310211182, + "step": 451 + }, + { + "epoch": 0.8836754643206256, + "grad_norm": 0.7093210220336914, + "learning_rate": 3.5420743639921725e-05, + "log_odds_chosen": 13.092355728149414, + "log_odds_ratio": -0.14477023482322693, + "logits/chosen": -0.23998016119003296, + "logits/rejected": -0.9732732176780701, + "logps/chosen": -0.7821435332298279, + "logps/rejected": -13.350418090820312, + "loss": 0.8764, + "nll_loss": 0.895736575126648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07821434736251831, + "rewards/margins": 1.2568275928497314, + "rewards/rejected": -1.335041880607605, + "step": 452 + }, + { + "epoch": 0.8856304985337243, + "grad_norm": 0.7128586769104004, + "learning_rate": 3.538812785388128e-05, + "log_odds_chosen": 10.153064727783203, + "log_odds_ratio": -0.060406677424907684, + "logits/chosen": -0.09772032499313354, + "logits/rejected": -1.1836352348327637, + "logps/chosen": -0.7144232988357544, + "logps/rejected": -10.172429084777832, + "loss": 0.8646, + "nll_loss": 0.7922492623329163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07144233584403992, + "rewards/margins": 0.9458005428314209, + "rewards/rejected": -1.0172429084777832, + "step": 453 + }, + { + "epoch": 0.8875855327468231, + "grad_norm": 0.7241632342338562, + "learning_rate": 3.535551206784084e-05, + "log_odds_chosen": 16.00912857055664, + "log_odds_ratio": -0.06322925537824631, + "logits/chosen": -0.10064490139484406, + "logits/rejected": -0.6088881492614746, + "logps/chosen": -0.7494255900382996, + "logps/rejected": -16.112594604492188, + "loss": 0.8806, + "nll_loss": 0.8593502044677734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07494255900382996, + "rewards/margins": 1.5363168716430664, + "rewards/rejected": -1.6112594604492188, + "step": 454 + }, + { + "epoch": 0.8895405669599218, + "grad_norm": 0.7113070487976074, + "learning_rate": 3.53228962818004e-05, + "log_odds_chosen": 5.911761283874512, + "log_odds_ratio": -0.12517820298671722, + "logits/chosen": -0.3931048810482025, + "logits/rejected": -0.524001955986023, + "logps/chosen": -0.8599909543991089, + "logps/rejected": -6.302038192749023, + "loss": 0.8759, + "nll_loss": 0.8478162288665771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08599910140037537, + "rewards/margins": 0.5442047715187073, + "rewards/rejected": -0.6302038431167603, + "step": 455 + }, + { + "epoch": 0.8914956011730205, + "grad_norm": 0.721919059753418, + "learning_rate": 3.529028049575995e-05, + "log_odds_chosen": 11.025558471679688, + "log_odds_ratio": -0.061446335166692734, + "logits/chosen": -0.5202599167823792, + "logits/rejected": -1.0115721225738525, + "logps/chosen": -0.66499924659729, + "logps/rejected": -10.962458610534668, + "loss": 0.8733, + "nll_loss": 0.8419156074523926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06649992614984512, + "rewards/margins": 1.0297460556030273, + "rewards/rejected": -1.0962460041046143, + "step": 456 + }, + { + "epoch": 0.8934506353861192, + "grad_norm": 0.6911339163780212, + "learning_rate": 3.525766470971951e-05, + "log_odds_chosen": 15.296323776245117, + "log_odds_ratio": -0.061890147626399994, + "logits/chosen": -0.16502471268177032, + "logits/rejected": -1.096909999847412, + "logps/chosen": -0.6355814933776855, + "logps/rejected": -15.201183319091797, + "loss": 0.8458, + "nll_loss": 0.7962952256202698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06355814635753632, + "rewards/margins": 1.4565601348876953, + "rewards/rejected": -1.5201184749603271, + "step": 457 + }, + { + "epoch": 0.895405669599218, + "grad_norm": 0.7331794500350952, + "learning_rate": 3.522504892367906e-05, + "log_odds_chosen": 10.41217041015625, + "log_odds_ratio": -0.046379752457141876, + "logits/chosen": -0.41449815034866333, + "logits/rejected": -1.341786503791809, + "logps/chosen": -0.7185384035110474, + "logps/rejected": -10.469520568847656, + "loss": 0.8713, + "nll_loss": 0.7943539619445801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07185383141040802, + "rewards/margins": 0.975098192691803, + "rewards/rejected": -1.0469520092010498, + "step": 458 + }, + { + "epoch": 0.8973607038123167, + "grad_norm": 0.6970353722572327, + "learning_rate": 3.519243313763862e-05, + "log_odds_chosen": 4.478856086730957, + "log_odds_ratio": -0.21837690472602844, + "logits/chosen": -0.6223350763320923, + "logits/rejected": -1.2025282382965088, + "logps/chosen": -0.6317367553710938, + "logps/rejected": -4.456264495849609, + "loss": 0.8606, + "nll_loss": 0.7300387024879456, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.06317367404699326, + "rewards/margins": 0.3824527859687805, + "rewards/rejected": -0.445626437664032, + "step": 459 + }, + { + "epoch": 0.8993157380254154, + "grad_norm": 0.7435722351074219, + "learning_rate": 3.5159817351598174e-05, + "log_odds_chosen": 14.126375198364258, + "log_odds_ratio": -0.09217776358127594, + "logits/chosen": -0.21449591219425201, + "logits/rejected": -0.6762105226516724, + "logps/chosen": -0.8626919984817505, + "logps/rejected": -14.48434829711914, + "loss": 0.8903, + "nll_loss": 1.046924352645874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08626919984817505, + "rewards/margins": 1.3621655702590942, + "rewards/rejected": -1.4484347105026245, + "step": 460 + }, + { + "epoch": 0.9012707722385142, + "grad_norm": 0.6975011229515076, + "learning_rate": 3.512720156555773e-05, + "log_odds_chosen": 8.348469734191895, + "log_odds_ratio": -0.07748879492282867, + "logits/chosen": -0.4870525002479553, + "logits/rejected": -0.8957189321517944, + "logps/chosen": -0.7435719966888428, + "logps/rejected": -8.401260375976562, + "loss": 0.8786, + "nll_loss": 0.9897006750106812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07435719668865204, + "rewards/margins": 0.7657688856124878, + "rewards/rejected": -0.8401260375976562, + "step": 461 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.7096757888793945, + "learning_rate": 3.5094585779517286e-05, + "log_odds_chosen": 9.955499649047852, + "log_odds_ratio": -0.05054632946848869, + "logits/chosen": -0.4714685082435608, + "logits/rejected": -1.0872044563293457, + "logps/chosen": -0.5980658531188965, + "logps/rejected": -9.726789474487305, + "loss": 0.8522, + "nll_loss": 0.7993582487106323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05980658158659935, + "rewards/margins": 0.9128724336624146, + "rewards/rejected": -0.9726790189743042, + "step": 462 + }, + { + "epoch": 0.9051808406647116, + "grad_norm": 0.7141382694244385, + "learning_rate": 3.5061969993476845e-05, + "log_odds_chosen": 11.24806022644043, + "log_odds_ratio": -0.02993447333574295, + "logits/chosen": -0.1413189172744751, + "logits/rejected": -1.207427740097046, + "logps/chosen": -0.7675151824951172, + "logps/rejected": -11.311752319335938, + "loss": 0.8564, + "nll_loss": 0.7966090440750122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07675151526927948, + "rewards/margins": 1.0544238090515137, + "rewards/rejected": -1.13117516040802, + "step": 463 + }, + { + "epoch": 0.9071358748778103, + "grad_norm": 0.6870387196540833, + "learning_rate": 3.50293542074364e-05, + "log_odds_chosen": 9.526460647583008, + "log_odds_ratio": -0.10392825305461884, + "logits/chosen": -0.41983985900878906, + "logits/rejected": -1.096123218536377, + "logps/chosen": -0.6518410444259644, + "logps/rejected": -9.460358619689941, + "loss": 0.8421, + "nll_loss": 0.718944787979126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0651841014623642, + "rewards/margins": 0.8808517456054688, + "rewards/rejected": -0.9460358619689941, + "step": 464 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 0.7461258769035339, + "learning_rate": 3.499673842139596e-05, + "log_odds_chosen": 12.587100982666016, + "log_odds_ratio": -0.08396513015031815, + "logits/chosen": -0.3904385566711426, + "logits/rejected": -0.9808741807937622, + "logps/chosen": -0.652385950088501, + "logps/rejected": -12.545037269592285, + "loss": 0.8541, + "nll_loss": 0.726431131362915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0652385950088501, + "rewards/margins": 1.1892651319503784, + "rewards/rejected": -1.2545037269592285, + "step": 465 + }, + { + "epoch": 0.9110459433040078, + "grad_norm": 0.7339531779289246, + "learning_rate": 3.496412263535551e-05, + "log_odds_chosen": 7.908977031707764, + "log_odds_ratio": -0.21185654401779175, + "logits/chosen": -0.40577036142349243, + "logits/rejected": -0.9297428131103516, + "logps/chosen": -0.619253396987915, + "logps/rejected": -7.988880157470703, + "loss": 0.847, + "nll_loss": 0.6806700825691223, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.061925336718559265, + "rewards/margins": 0.7369626760482788, + "rewards/rejected": -0.7988879680633545, + "step": 466 + }, + { + "epoch": 0.9130009775171065, + "grad_norm": 0.7265191078186035, + "learning_rate": 3.493150684931507e-05, + "log_odds_chosen": 19.068798065185547, + "log_odds_ratio": -0.02430247887969017, + "logits/chosen": -0.17711594700813293, + "logits/rejected": -0.7280517816543579, + "logps/chosen": -0.5702927708625793, + "logps/rejected": -18.680044174194336, + "loss": 0.848, + "nll_loss": 0.8171983957290649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05702928453683853, + "rewards/margins": 1.8109750747680664, + "rewards/rejected": -1.868004560470581, + "step": 467 + }, + { + "epoch": 0.9149560117302052, + "grad_norm": 0.7326372861862183, + "learning_rate": 3.489889106327462e-05, + "log_odds_chosen": 2.8317949771881104, + "log_odds_ratio": -0.28007176518440247, + "logits/chosen": -0.005377039313316345, + "logits/rejected": 0.07841678708791733, + "logps/chosen": -0.6443730592727661, + "logps/rejected": -2.8514244556427, + "loss": 0.8474, + "nll_loss": 0.8128778338432312, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.06443730741739273, + "rewards/margins": 0.22070513665676117, + "rewards/rejected": -0.2851424515247345, + "step": 468 + }, + { + "epoch": 0.916911045943304, + "grad_norm": 0.7200118899345398, + "learning_rate": 3.486627527723418e-05, + "log_odds_chosen": 7.6430840492248535, + "log_odds_ratio": -0.10847952961921692, + "logits/chosen": -0.4067313075065613, + "logits/rejected": -0.8866600394248962, + "logps/chosen": -0.7086043953895569, + "logps/rejected": -7.713209629058838, + "loss": 0.8486, + "nll_loss": 0.8199364542961121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07086044549942017, + "rewards/margins": 0.7004604935646057, + "rewards/rejected": -0.7713209390640259, + "step": 469 + }, + { + "epoch": 0.9188660801564027, + "grad_norm": 0.7374091744422913, + "learning_rate": 3.4833659491193735e-05, + "log_odds_chosen": 13.17422103881836, + "log_odds_ratio": -0.025462673977017403, + "logits/chosen": -0.4287252724170685, + "logits/rejected": -1.2008187770843506, + "logps/chosen": -0.5842455625534058, + "logps/rejected": -12.903280258178711, + "loss": 0.8405, + "nll_loss": 0.7378931641578674, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05842455476522446, + "rewards/margins": 1.2319035530090332, + "rewards/rejected": -1.290328025817871, + "step": 470 + }, + { + "epoch": 0.9208211143695014, + "grad_norm": 0.7117858529090881, + "learning_rate": 3.4801043705153294e-05, + "log_odds_chosen": 11.888151168823242, + "log_odds_ratio": -0.14098721742630005, + "logits/chosen": -0.17643685638904572, + "logits/rejected": -0.773415744304657, + "logps/chosen": -0.6946250200271606, + "logps/rejected": -11.921512603759766, + "loss": 0.8378, + "nll_loss": 0.861427366733551, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.06946250796318054, + "rewards/margins": 1.1226887702941895, + "rewards/rejected": -1.1921511888504028, + "step": 471 + }, + { + "epoch": 0.9227761485826002, + "grad_norm": 0.696627140045166, + "learning_rate": 3.4768427919112854e-05, + "log_odds_chosen": 10.602128982543945, + "log_odds_ratio": -0.13013282418251038, + "logits/chosen": -0.3072212040424347, + "logits/rejected": -0.6334589719772339, + "logps/chosen": -0.6773164868354797, + "logps/rejected": -10.682096481323242, + "loss": 0.8111, + "nll_loss": 0.7214632034301758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06773164868354797, + "rewards/margins": 1.0004781484603882, + "rewards/rejected": -1.0682097673416138, + "step": 472 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7619100213050842, + "learning_rate": 3.4735812133072406e-05, + "log_odds_chosen": 7.021310806274414, + "log_odds_ratio": -0.13185983896255493, + "logits/chosen": -0.19587989151477814, + "logits/rejected": -1.1060678958892822, + "logps/chosen": -0.559809684753418, + "logps/rejected": -6.820035457611084, + "loss": 0.8471, + "nll_loss": 0.6444576382637024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.055980972945690155, + "rewards/margins": 0.6260225772857666, + "rewards/rejected": -0.6820034980773926, + "step": 473 + }, + { + "epoch": 0.9266862170087976, + "grad_norm": 0.7238292098045349, + "learning_rate": 3.4703196347031966e-05, + "log_odds_chosen": 14.825262069702148, + "log_odds_ratio": -0.03477742522954941, + "logits/chosen": -0.7019953727722168, + "logits/rejected": -1.160001277923584, + "logps/chosen": -0.7207116484642029, + "logps/rejected": -14.738922119140625, + "loss": 0.8463, + "nll_loss": 0.8261364102363586, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07207117229700089, + "rewards/margins": 1.4018211364746094, + "rewards/rejected": -1.4738922119140625, + "step": 474 + }, + { + "epoch": 0.9286412512218963, + "grad_norm": 0.7292147278785706, + "learning_rate": 3.4670580560991525e-05, + "log_odds_chosen": 12.763543128967285, + "log_odds_ratio": -0.10479736328125, + "logits/chosen": -0.5066919326782227, + "logits/rejected": -0.8886002898216248, + "logps/chosen": -0.8946337699890137, + "logps/rejected": -13.113706588745117, + "loss": 0.8538, + "nll_loss": 1.0095727443695068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08946339040994644, + "rewards/margins": 1.221907377243042, + "rewards/rejected": -1.3113707304000854, + "step": 475 + }, + { + "epoch": 0.9305962854349951, + "grad_norm": 0.7279580235481262, + "learning_rate": 3.463796477495108e-05, + "log_odds_chosen": 11.059494972229004, + "log_odds_ratio": -0.12546879053115845, + "logits/chosen": -0.378292977809906, + "logits/rejected": -0.5621368885040283, + "logps/chosen": -0.6589778661727905, + "logps/rejected": -11.06345272064209, + "loss": 0.8257, + "nll_loss": 0.7984383702278137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06589778512716293, + "rewards/margins": 1.0404475927352905, + "rewards/rejected": -1.106345295906067, + "step": 476 + }, + { + "epoch": 0.9325513196480938, + "grad_norm": 0.720504879951477, + "learning_rate": 3.460534898891064e-05, + "log_odds_chosen": 8.757610321044922, + "log_odds_ratio": -0.08460487425327301, + "logits/chosen": -0.20284324884414673, + "logits/rejected": -0.9795554876327515, + "logps/chosen": -0.6565445065498352, + "logps/rejected": -8.713459014892578, + "loss": 0.8411, + "nll_loss": 0.7478543519973755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0656544491648674, + "rewards/margins": 0.8056914806365967, + "rewards/rejected": -0.8713459968566895, + "step": 477 + }, + { + "epoch": 0.9345063538611925, + "grad_norm": 0.7420869469642639, + "learning_rate": 3.457273320287019e-05, + "log_odds_chosen": 7.1289753913879395, + "log_odds_ratio": -0.15799152851104736, + "logits/chosen": -0.09817808121442795, + "logits/rejected": -0.4351709485054016, + "logps/chosen": -0.8648152947425842, + "logps/rejected": -7.511728286743164, + "loss": 0.8494, + "nll_loss": 0.9157633781433105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08648152649402618, + "rewards/margins": 0.6646912097930908, + "rewards/rejected": -0.7511727809906006, + "step": 478 + }, + { + "epoch": 0.9364613880742912, + "grad_norm": 0.7439941763877869, + "learning_rate": 3.454011741682975e-05, + "log_odds_chosen": 6.002360820770264, + "log_odds_ratio": -0.056951552629470825, + "logits/chosen": -0.5579625368118286, + "logits/rejected": -0.8487610220909119, + "logps/chosen": -0.5668483972549438, + "logps/rejected": -5.756075859069824, + "loss": 0.8363, + "nll_loss": 0.6989344954490662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05668484792113304, + "rewards/margins": 0.518922746181488, + "rewards/rejected": -0.5756075978279114, + "step": 479 + }, + { + "epoch": 0.9384164222873901, + "grad_norm": 0.7352519631385803, + "learning_rate": 3.45075016307893e-05, + "log_odds_chosen": 14.7349853515625, + "log_odds_ratio": -0.07653481513261795, + "logits/chosen": -0.42828404903411865, + "logits/rejected": -0.8830748796463013, + "logps/chosen": -0.7405681610107422, + "logps/rejected": -14.848365783691406, + "loss": 0.8245, + "nll_loss": 0.8958524465560913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07405681908130646, + "rewards/margins": 1.4107797145843506, + "rewards/rejected": -1.4848365783691406, + "step": 480 + }, + { + "epoch": 0.9403714565004888, + "grad_norm": 0.7201321721076965, + "learning_rate": 3.447488584474886e-05, + "log_odds_chosen": 9.292166709899902, + "log_odds_ratio": -0.0866202861070633, + "logits/chosen": -0.7205287218093872, + "logits/rejected": -0.8703268766403198, + "logps/chosen": -0.7218480706214905, + "logps/rejected": -9.375861167907715, + "loss": 0.8336, + "nll_loss": 0.7126502990722656, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07218480855226517, + "rewards/margins": 0.8654012680053711, + "rewards/rejected": -0.9375861883163452, + "step": 481 + }, + { + "epoch": 0.9423264907135875, + "grad_norm": 0.7357725501060486, + "learning_rate": 3.4442270058708414e-05, + "log_odds_chosen": 12.90532112121582, + "log_odds_ratio": -0.043923795223236084, + "logits/chosen": -0.4581657946109772, + "logits/rejected": -0.9700899124145508, + "logps/chosen": -0.6556092500686646, + "logps/rejected": -12.795511245727539, + "loss": 0.8222, + "nll_loss": 0.8483753800392151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06556092202663422, + "rewards/margins": 1.2139902114868164, + "rewards/rejected": -1.2795511484146118, + "step": 482 + }, + { + "epoch": 0.9442815249266863, + "grad_norm": 0.7202299237251282, + "learning_rate": 3.4409654272667974e-05, + "log_odds_chosen": 11.828601837158203, + "log_odds_ratio": -0.16260598599910736, + "logits/chosen": -0.3643549978733063, + "logits/rejected": -0.9568381309509277, + "logps/chosen": -0.7335261106491089, + "logps/rejected": -12.00793170928955, + "loss": 0.8255, + "nll_loss": 0.8334125280380249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07335261255502701, + "rewards/margins": 1.1274404525756836, + "rewards/rejected": -1.2007932662963867, + "step": 483 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.7146207690238953, + "learning_rate": 3.437703848662753e-05, + "log_odds_chosen": 7.360755920410156, + "log_odds_ratio": -0.10498297214508057, + "logits/chosen": -0.5009814500808716, + "logits/rejected": -1.0357322692871094, + "logps/chosen": -0.6614243984222412, + "logps/rejected": -7.339273452758789, + "loss": 0.8126, + "nll_loss": 0.8051553964614868, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06614243984222412, + "rewards/margins": 0.6677849292755127, + "rewards/rejected": -0.7339273691177368, + "step": 484 + }, + { + "epoch": 0.9481915933528837, + "grad_norm": 0.711856484413147, + "learning_rate": 3.4344422700587086e-05, + "log_odds_chosen": 9.520809173583984, + "log_odds_ratio": -0.09422419965267181, + "logits/chosen": -0.40922433137893677, + "logits/rejected": -0.994839608669281, + "logps/chosen": -0.6094300746917725, + "logps/rejected": -9.384113311767578, + "loss": 0.8137, + "nll_loss": 0.7501894235610962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.060943011194467545, + "rewards/margins": 0.8774683475494385, + "rewards/rejected": -0.9384112358093262, + "step": 485 + }, + { + "epoch": 0.9501466275659824, + "grad_norm": 0.701828122138977, + "learning_rate": 3.431180691454664e-05, + "log_odds_chosen": 12.505343437194824, + "log_odds_ratio": -0.07498367130756378, + "logits/chosen": -0.4145153760910034, + "logits/rejected": -1.1242501735687256, + "logps/chosen": -0.7516512870788574, + "logps/rejected": -12.652158737182617, + "loss": 0.794, + "nll_loss": 0.8512237071990967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07516512274742126, + "rewards/margins": 1.1900508403778076, + "rewards/rejected": -1.2652158737182617, + "step": 486 + }, + { + "epoch": 0.9521016617790812, + "grad_norm": 0.7373877763748169, + "learning_rate": 3.42791911285062e-05, + "log_odds_chosen": 2.641418695449829, + "log_odds_ratio": -0.14878106117248535, + "logits/chosen": -0.508453369140625, + "logits/rejected": -0.5146299600601196, + "logps/chosen": -0.5854432582855225, + "logps/rejected": -2.5585575103759766, + "loss": 0.8144, + "nll_loss": 0.7347421646118164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05854432284832001, + "rewards/margins": 0.1973114311695099, + "rewards/rejected": -0.2558557391166687, + "step": 487 + }, + { + "epoch": 0.9540566959921799, + "grad_norm": 0.7326816320419312, + "learning_rate": 3.424657534246575e-05, + "log_odds_chosen": 17.091407775878906, + "log_odds_ratio": -0.0879184901714325, + "logits/chosen": -0.21322648227214813, + "logits/rejected": -0.6958395838737488, + "logps/chosen": -0.6829094886779785, + "logps/rejected": -17.118728637695312, + "loss": 0.8269, + "nll_loss": 0.8366402387619019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06829095631837845, + "rewards/margins": 1.6435818672180176, + "rewards/rejected": -1.7118728160858154, + "step": 488 + }, + { + "epoch": 0.9560117302052786, + "grad_norm": 0.766679048538208, + "learning_rate": 3.421395955642531e-05, + "log_odds_chosen": 11.951904296875, + "log_odds_ratio": -0.031982433050870895, + "logits/chosen": -0.5650243759155273, + "logits/rejected": -0.7343953847885132, + "logps/chosen": -0.7253568172454834, + "logps/rejected": -11.92192554473877, + "loss": 0.8226, + "nll_loss": 0.8932933807373047, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0725356787443161, + "rewards/margins": 1.1196568012237549, + "rewards/rejected": -1.1921924352645874, + "step": 489 + }, + { + "epoch": 0.9579667644183774, + "grad_norm": 0.7166140675544739, + "learning_rate": 3.418134377038486e-05, + "log_odds_chosen": 7.619058609008789, + "log_odds_ratio": -0.16216491162776947, + "logits/chosen": -0.5250576734542847, + "logits/rejected": -1.0873773097991943, + "logps/chosen": -0.6466493606567383, + "logps/rejected": -7.618906021118164, + "loss": 0.8067, + "nll_loss": 0.7695545554161072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06466494500637054, + "rewards/margins": 0.6972257494926453, + "rewards/rejected": -0.7618906497955322, + "step": 490 + }, + { + "epoch": 0.9599217986314761, + "grad_norm": 0.746809720993042, + "learning_rate": 3.414872798434442e-05, + "log_odds_chosen": 11.903421401977539, + "log_odds_ratio": -0.04873340576887131, + "logits/chosen": -0.47647738456726074, + "logits/rejected": -1.1140730381011963, + "logps/chosen": -0.7108423709869385, + "logps/rejected": -11.940874099731445, + "loss": 0.8197, + "nll_loss": 0.7953153848648071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07108423858880997, + "rewards/margins": 1.1230031251907349, + "rewards/rejected": -1.1940875053405762, + "step": 491 + }, + { + "epoch": 0.9618768328445748, + "grad_norm": 0.7258880138397217, + "learning_rate": 3.411611219830398e-05, + "log_odds_chosen": 12.394794464111328, + "log_odds_ratio": -0.14135849475860596, + "logits/chosen": -0.4352658987045288, + "logits/rejected": -0.8955838680267334, + "logps/chosen": -0.6796140670776367, + "logps/rejected": -12.41160774230957, + "loss": 0.805, + "nll_loss": 0.8859663009643555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06796140968799591, + "rewards/margins": 1.1731994152069092, + "rewards/rejected": -1.2411608695983887, + "step": 492 + }, + { + "epoch": 0.9638318670576735, + "grad_norm": 0.7632006406784058, + "learning_rate": 3.4083496412263535e-05, + "log_odds_chosen": 15.991569519042969, + "log_odds_ratio": -0.056680161505937576, + "logits/chosen": -0.4926494359970093, + "logits/rejected": -1.0453853607177734, + "logps/chosen": -0.5206949710845947, + "logps/rejected": -15.580713272094727, + "loss": 0.8112, + "nll_loss": 0.6273183822631836, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.052069492638111115, + "rewards/margins": 1.5060019493103027, + "rewards/rejected": -1.5580713748931885, + "step": 493 + }, + { + "epoch": 0.9657869012707723, + "grad_norm": 0.768322229385376, + "learning_rate": 3.4050880626223094e-05, + "log_odds_chosen": 10.08560848236084, + "log_odds_ratio": -0.045677702873945236, + "logits/chosen": -0.3032284379005432, + "logits/rejected": -1.0136380195617676, + "logps/chosen": -0.7190263271331787, + "logps/rejected": -10.108743667602539, + "loss": 0.8097, + "nll_loss": 0.8245089054107666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07190263271331787, + "rewards/margins": 0.9389716982841492, + "rewards/rejected": -1.0108743906021118, + "step": 494 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.7579919099807739, + "learning_rate": 3.4018264840182654e-05, + "log_odds_chosen": 11.574173927307129, + "log_odds_ratio": -0.053065478801727295, + "logits/chosen": -0.455281525850296, + "logits/rejected": -1.1741468906402588, + "logps/chosen": -0.8210651874542236, + "logps/rejected": -11.8480224609375, + "loss": 0.8144, + "nll_loss": 0.8926892876625061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08210651576519012, + "rewards/margins": 1.1026958227157593, + "rewards/rejected": -1.1848022937774658, + "step": 495 + }, + { + "epoch": 0.9696969696969697, + "grad_norm": 0.7679611444473267, + "learning_rate": 3.3985649054142206e-05, + "log_odds_chosen": 16.93597412109375, + "log_odds_ratio": -0.015713617205619812, + "logits/chosen": -0.20484018325805664, + "logits/rejected": -1.0005052089691162, + "logps/chosen": -0.7569926977157593, + "logps/rejected": -17.014873504638672, + "loss": 0.798, + "nll_loss": 0.7935050129890442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07569926977157593, + "rewards/margins": 1.6257879734039307, + "rewards/rejected": -1.7014873027801514, + "step": 496 + }, + { + "epoch": 0.9716520039100685, + "grad_norm": 0.7490580677986145, + "learning_rate": 3.3953033268101766e-05, + "log_odds_chosen": 11.401053428649902, + "log_odds_ratio": -0.09302864968776703, + "logits/chosen": -0.6684495210647583, + "logits/rejected": -1.1510887145996094, + "logps/chosen": -0.6616054177284241, + "logps/rejected": -11.4022798538208, + "loss": 0.82, + "nll_loss": 0.7348425984382629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06616055220365524, + "rewards/margins": 1.0740675926208496, + "rewards/rejected": -1.140228033065796, + "step": 497 + }, + { + "epoch": 0.9736070381231672, + "grad_norm": 0.7899497747421265, + "learning_rate": 3.392041748206132e-05, + "log_odds_chosen": 12.026823043823242, + "log_odds_ratio": -0.06841376423835754, + "logits/chosen": -0.5349042415618896, + "logits/rejected": -1.0209227800369263, + "logps/chosen": -0.47439563274383545, + "logps/rejected": -11.498780250549316, + "loss": 0.7834, + "nll_loss": 0.6074992418289185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.047439564019441605, + "rewards/margins": 1.1024384498596191, + "rewards/rejected": -1.1498780250549316, + "step": 498 + }, + { + "epoch": 0.9755620723362659, + "grad_norm": 0.7721085548400879, + "learning_rate": 3.388780169602088e-05, + "log_odds_chosen": 10.073436737060547, + "log_odds_ratio": -0.10651960968971252, + "logits/chosen": -0.6111949682235718, + "logits/rejected": -0.7432936429977417, + "logps/chosen": -0.5340858101844788, + "logps/rejected": -9.798685073852539, + "loss": 0.7692, + "nll_loss": 0.6167590618133545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.053408581763505936, + "rewards/margins": 0.9264599084854126, + "rewards/rejected": -0.9798685312271118, + "step": 499 + }, + { + "epoch": 0.9775171065493646, + "grad_norm": 0.7571791410446167, + "learning_rate": 3.385518590998043e-05, + "log_odds_chosen": 9.280817985534668, + "log_odds_ratio": -0.06714154034852982, + "logits/chosen": -0.5660990476608276, + "logits/rejected": -1.1199361085891724, + "logps/chosen": -0.582938551902771, + "logps/rejected": -9.061441421508789, + "loss": 0.7878, + "nll_loss": 0.7151268720626831, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05829384922981262, + "rewards/margins": 0.8478503227233887, + "rewards/rejected": -0.9061441421508789, + "step": 500 + }, + { + "epoch": 0.9794721407624634, + "grad_norm": 0.7867841124534607, + "learning_rate": 3.382257012393999e-05, + "log_odds_chosen": 15.29836654663086, + "log_odds_ratio": -0.05394447222352028, + "logits/chosen": -0.7067490816116333, + "logits/rejected": -1.0544185638427734, + "logps/chosen": -0.5722078084945679, + "logps/rejected": -14.983619689941406, + "loss": 0.7858, + "nll_loss": 0.6625403165817261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.057220783084630966, + "rewards/margins": 1.4411413669586182, + "rewards/rejected": -1.4983621835708618, + "step": 501 + }, + { + "epoch": 0.9814271749755621, + "grad_norm": 0.7703598141670227, + "learning_rate": 3.378995433789954e-05, + "log_odds_chosen": 3.9276111125946045, + "log_odds_ratio": -0.26475730538368225, + "logits/chosen": -0.5809782147407532, + "logits/rejected": -0.6110835075378418, + "logps/chosen": -0.7099093198776245, + "logps/rejected": -4.159364700317383, + "loss": 0.8193, + "nll_loss": 0.8324306011199951, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07099093496799469, + "rewards/margins": 0.3449455201625824, + "rewards/rejected": -0.4159364700317383, + "step": 502 + }, + { + "epoch": 0.9833822091886608, + "grad_norm": 0.7634415030479431, + "learning_rate": 3.37573385518591e-05, + "log_odds_chosen": 11.10832405090332, + "log_odds_ratio": -0.024046115577220917, + "logits/chosen": -0.35057204961776733, + "logits/rejected": -0.9610720872879028, + "logps/chosen": -0.6043937802314758, + "logps/rejected": -10.80783462524414, + "loss": 0.7896, + "nll_loss": 0.7488991022109985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06043937802314758, + "rewards/margins": 1.0203440189361572, + "rewards/rejected": -1.0807833671569824, + "step": 503 + }, + { + "epoch": 0.9853372434017595, + "grad_norm": 0.7627769112586975, + "learning_rate": 3.3724722765818655e-05, + "log_odds_chosen": 9.326521873474121, + "log_odds_ratio": -0.08203571289777756, + "logits/chosen": -0.11595489084720612, + "logits/rejected": -0.8560320138931274, + "logps/chosen": -0.6601013541221619, + "logps/rejected": -9.267707824707031, + "loss": 0.7939, + "nll_loss": 0.7958817481994629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06601013988256454, + "rewards/margins": 0.8607606887817383, + "rewards/rejected": -0.926770806312561, + "step": 504 + }, + { + "epoch": 0.9872922776148583, + "grad_norm": 0.7691999673843384, + "learning_rate": 3.3692106979778215e-05, + "log_odds_chosen": 5.90548038482666, + "log_odds_ratio": -0.21737748384475708, + "logits/chosen": -0.6727055311203003, + "logits/rejected": -0.732673168182373, + "logps/chosen": -0.7953156232833862, + "logps/rejected": -6.205281734466553, + "loss": 0.7879, + "nll_loss": 1.1683647632598877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07953156530857086, + "rewards/margins": 0.5409966707229614, + "rewards/rejected": -0.6205282211303711, + "step": 505 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.7580311298370361, + "learning_rate": 3.365949119373777e-05, + "log_odds_chosen": 13.99635124206543, + "log_odds_ratio": -0.03765573352575302, + "logits/chosen": -0.5319039821624756, + "logits/rejected": -1.3452463150024414, + "logps/chosen": -0.6958300471305847, + "logps/rejected": -13.9765625, + "loss": 0.7802, + "nll_loss": 0.8330222368240356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.069582998752594, + "rewards/margins": 1.328073263168335, + "rewards/rejected": -1.3976564407348633, + "step": 506 + }, + { + "epoch": 0.9912023460410557, + "grad_norm": 0.7848649621009827, + "learning_rate": 3.362687540769733e-05, + "log_odds_chosen": 8.996940612792969, + "log_odds_ratio": -0.09575815498828888, + "logits/chosen": -0.4752542972564697, + "logits/rejected": -1.0080690383911133, + "logps/chosen": -0.7552341222763062, + "logps/rejected": -9.133678436279297, + "loss": 0.7914, + "nll_loss": 0.7892815470695496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07552342116832733, + "rewards/margins": 0.8378444910049438, + "rewards/rejected": -0.9133678674697876, + "step": 507 + }, + { + "epoch": 0.9931573802541545, + "grad_norm": 0.7976130843162537, + "learning_rate": 3.359425962165688e-05, + "log_odds_chosen": 11.156414031982422, + "log_odds_ratio": -0.09736481308937073, + "logits/chosen": -0.680529773235321, + "logits/rejected": -0.6529916524887085, + "logps/chosen": -0.5454006195068359, + "logps/rejected": -10.811391830444336, + "loss": 0.7945, + "nll_loss": 0.7753928303718567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.054540060460567474, + "rewards/margins": 1.0265990495681763, + "rewards/rejected": -1.081139087677002, + "step": 508 + }, + { + "epoch": 0.9951124144672532, + "grad_norm": 0.7548176050186157, + "learning_rate": 3.356164383561644e-05, + "log_odds_chosen": 7.633919715881348, + "log_odds_ratio": -0.2779882848262787, + "logits/chosen": -0.6533724069595337, + "logits/rejected": -0.8000415563583374, + "logps/chosen": -0.7176302671432495, + "logps/rejected": -7.813236236572266, + "loss": 0.8009, + "nll_loss": 0.9474443197250366, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.07176302373409271, + "rewards/margins": 0.709560751914978, + "rewards/rejected": -0.7813236713409424, + "step": 509 + }, + { + "epoch": 0.9970674486803519, + "grad_norm": 0.7344701886177063, + "learning_rate": 3.352902804957599e-05, + "log_odds_chosen": 7.730894088745117, + "log_odds_ratio": -0.14041510224342346, + "logits/chosen": -0.6775522232055664, + "logits/rejected": -1.2709062099456787, + "logps/chosen": -0.6365092992782593, + "logps/rejected": -7.743663311004639, + "loss": 0.7734, + "nll_loss": 0.7173987627029419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06365092843770981, + "rewards/margins": 0.7107154130935669, + "rewards/rejected": -0.7743663787841797, + "step": 510 + }, + { + "epoch": 0.9990224828934506, + "grad_norm": 0.769092321395874, + "learning_rate": 3.349641226353555e-05, + "log_odds_chosen": 11.108455657958984, + "log_odds_ratio": -0.13901421427726746, + "logits/chosen": -0.7440725564956665, + "logits/rejected": -0.7437351942062378, + "logps/chosen": -0.7434514760971069, + "logps/rejected": -11.272302627563477, + "loss": 0.7927, + "nll_loss": 0.8617143630981445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07434515655040741, + "rewards/margins": 1.0528851747512817, + "rewards/rejected": -1.1272304058074951, + "step": 511 + }, + { + "epoch": 1.0009775171065494, + "grad_norm": 0.7336360216140747, + "learning_rate": 3.346379647749511e-05, + "log_odds_chosen": 6.67110538482666, + "log_odds_ratio": -0.08117252588272095, + "logits/chosen": -0.7495433688163757, + "logits/rejected": -1.22011399269104, + "logps/chosen": -0.5145964622497559, + "logps/rejected": -6.1763458251953125, + "loss": 0.703, + "nll_loss": 0.7111921310424805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.051459647715091705, + "rewards/margins": 0.5661749243736267, + "rewards/rejected": -0.6176345944404602, + "step": 512 + }, + { + "epoch": 1.002932551319648, + "grad_norm": 0.7075099945068359, + "learning_rate": 3.343118069145466e-05, + "log_odds_chosen": 10.655590057373047, + "log_odds_ratio": -0.019836103543639183, + "logits/chosen": -0.3072991669178009, + "logits/rejected": -0.8194465637207031, + "logps/chosen": -0.46711403131484985, + "logps/rejected": -9.894790649414062, + "loss": 0.5835, + "nll_loss": 0.6942476034164429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.046711407601833344, + "rewards/margins": 0.9427676200866699, + "rewards/rejected": -0.9894790649414062, + "step": 513 + }, + { + "epoch": 1.0048875855327468, + "grad_norm": 0.7451090812683105, + "learning_rate": 3.339856490541422e-05, + "log_odds_chosen": 11.804349899291992, + "log_odds_ratio": -0.04596191272139549, + "logits/chosen": -1.3810123205184937, + "logits/rejected": -1.207172155380249, + "logps/chosen": -0.4618269205093384, + "logps/rejected": -11.261030197143555, + "loss": 0.5823, + "nll_loss": 0.6228824257850647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04618269205093384, + "rewards/margins": 1.0799204111099243, + "rewards/rejected": -1.1261030435562134, + "step": 514 + }, + { + "epoch": 1.0068426197458455, + "grad_norm": 0.7638962864875793, + "learning_rate": 3.336594911937378e-05, + "log_odds_chosen": 8.307703971862793, + "log_odds_ratio": -0.052655719220638275, + "logits/chosen": -1.2783527374267578, + "logits/rejected": -1.4867238998413086, + "logps/chosen": -0.4644492268562317, + "logps/rejected": -7.7574143409729, + "loss": 0.559, + "nll_loss": 0.5813341736793518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04644492268562317, + "rewards/margins": 0.7292965650558472, + "rewards/rejected": -0.775741457939148, + "step": 515 + }, + { + "epoch": 1.0087976539589443, + "grad_norm": 0.7778139710426331, + "learning_rate": 3.3333333333333335e-05, + "log_odds_chosen": 8.699782371520996, + "log_odds_ratio": -0.05186303332448006, + "logits/chosen": -1.323776125907898, + "logits/rejected": -1.35886549949646, + "logps/chosen": -0.48509901762008667, + "logps/rejected": -8.294282913208008, + "loss": 0.5759, + "nll_loss": 0.5863348245620728, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048509903252124786, + "rewards/margins": 0.7809184789657593, + "rewards/rejected": -0.8294284343719482, + "step": 516 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.7348308563232422, + "learning_rate": 3.3300717547292894e-05, + "log_odds_chosen": 20.575923919677734, + "log_odds_ratio": -0.000858886749483645, + "logits/chosen": -1.2454833984375, + "logits/rejected": -1.2867419719696045, + "logps/chosen": -0.4811432957649231, + "logps/rejected": -19.83279037475586, + "loss": 0.5651, + "nll_loss": 0.6272632479667664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04811432957649231, + "rewards/margins": 1.9351649284362793, + "rewards/rejected": -1.9832792282104492, + "step": 517 + }, + { + "epoch": 1.0127077223851417, + "grad_norm": 0.7073251605033875, + "learning_rate": 3.326810176125245e-05, + "log_odds_chosen": 18.70823860168457, + "log_odds_ratio": -0.020798271521925926, + "logits/chosen": -0.9065128564834595, + "logits/rejected": -1.2388525009155273, + "logps/chosen": -0.43951791524887085, + "logps/rejected": -18.140579223632812, + "loss": 0.5808, + "nll_loss": 0.5790886878967285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.043951794505119324, + "rewards/margins": 1.7701060771942139, + "rewards/rejected": -1.8140578269958496, + "step": 518 + }, + { + "epoch": 1.0146627565982405, + "grad_norm": 0.7032327055931091, + "learning_rate": 3.3235485975212007e-05, + "log_odds_chosen": 11.775863647460938, + "log_odds_ratio": -0.04854842275381088, + "logits/chosen": -1.203606367111206, + "logits/rejected": -1.0736299753189087, + "logps/chosen": -0.5100117921829224, + "logps/rejected": -11.253028869628906, + "loss": 0.564, + "nll_loss": 0.5354430079460144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.051001183688640594, + "rewards/margins": 1.074301838874817, + "rewards/rejected": -1.125303030014038, + "step": 519 + }, + { + "epoch": 1.0166177908113392, + "grad_norm": 0.6877638697624207, + "learning_rate": 3.320287018917156e-05, + "log_odds_chosen": 9.50920581817627, + "log_odds_ratio": -0.037817809730768204, + "logits/chosen": -1.320874810218811, + "logits/rejected": -1.1124024391174316, + "logps/chosen": -0.3075897991657257, + "logps/rejected": -8.544535636901855, + "loss": 0.5455, + "nll_loss": 0.4236251711845398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030758976936340332, + "rewards/margins": 0.8236945867538452, + "rewards/rejected": -0.8544536232948303, + "step": 520 + }, + { + "epoch": 1.018572825024438, + "grad_norm": 0.7034885287284851, + "learning_rate": 3.317025440313112e-05, + "log_odds_chosen": 7.76276159286499, + "log_odds_ratio": -0.05532576143741608, + "logits/chosen": -1.4354479312896729, + "logits/rejected": -1.428919792175293, + "logps/chosen": -0.2917661964893341, + "logps/rejected": -6.724789142608643, + "loss": 0.5515, + "nll_loss": 0.36107736825942993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02917661890387535, + "rewards/margins": 0.6433022022247314, + "rewards/rejected": -0.6724788546562195, + "step": 521 + }, + { + "epoch": 1.0205278592375366, + "grad_norm": 0.7081824541091919, + "learning_rate": 3.313763861709067e-05, + "log_odds_chosen": 15.96968936920166, + "log_odds_ratio": -0.013215781189501286, + "logits/chosen": -1.3948438167572021, + "logits/rejected": -1.3838223218917847, + "logps/chosen": -0.3814734220504761, + "logps/rejected": -15.205560684204102, + "loss": 0.5584, + "nll_loss": 0.47212955355644226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03814734145998955, + "rewards/margins": 1.4824087619781494, + "rewards/rejected": -1.5205562114715576, + "step": 522 + }, + { + "epoch": 1.0224828934506354, + "grad_norm": 0.7011891603469849, + "learning_rate": 3.310502283105023e-05, + "log_odds_chosen": 6.012911796569824, + "log_odds_ratio": -0.04920900985598564, + "logits/chosen": -1.2566728591918945, + "logits/rejected": -1.2730729579925537, + "logps/chosen": -0.5416103005409241, + "logps/rejected": -5.669743537902832, + "loss": 0.565, + "nll_loss": 0.7161548137664795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05416102707386017, + "rewards/margins": 0.5128133296966553, + "rewards/rejected": -0.566974401473999, + "step": 523 + }, + { + "epoch": 1.024437927663734, + "grad_norm": 0.693427324295044, + "learning_rate": 3.3072407045009784e-05, + "log_odds_chosen": 13.073247909545898, + "log_odds_ratio": -0.027852583676576614, + "logits/chosen": -1.179961919784546, + "logits/rejected": -1.2102091312408447, + "logps/chosen": -0.3468817472457886, + "logps/rejected": -12.148797035217285, + "loss": 0.5572, + "nll_loss": 0.5047758221626282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03468817472457886, + "rewards/margins": 1.1801915168762207, + "rewards/rejected": -1.2148797512054443, + "step": 524 + }, + { + "epoch": 1.0263929618768328, + "grad_norm": 0.6814071536064148, + "learning_rate": 3.303979125896934e-05, + "log_odds_chosen": 11.168566703796387, + "log_odds_ratio": -0.05023694410920143, + "logits/chosen": -1.1009427309036255, + "logits/rejected": -1.1901663541793823, + "logps/chosen": -0.5312334299087524, + "logps/rejected": -10.788091659545898, + "loss": 0.5573, + "nll_loss": 0.7145440578460693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.053123343735933304, + "rewards/margins": 1.0256859064102173, + "rewards/rejected": -1.0788092613220215, + "step": 525 + }, + { + "epoch": 1.0283479960899315, + "grad_norm": 0.7105069160461426, + "learning_rate": 3.3007175472928896e-05, + "log_odds_chosen": 13.668149948120117, + "log_odds_ratio": -0.0683068037033081, + "logits/chosen": -1.032808780670166, + "logits/rejected": -1.06974196434021, + "logps/chosen": -0.4386330246925354, + "logps/rejected": -13.06321907043457, + "loss": 0.5696, + "nll_loss": 0.5837178826332092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04386330395936966, + "rewards/margins": 1.2624585628509521, + "rewards/rejected": -1.3063217401504517, + "step": 526 + }, + { + "epoch": 1.0303030303030303, + "grad_norm": 0.6746382117271423, + "learning_rate": 3.2974559686888455e-05, + "log_odds_chosen": 12.642032623291016, + "log_odds_ratio": -0.016004018485546112, + "logits/chosen": -0.909950315952301, + "logits/rejected": -1.2550323009490967, + "logps/chosen": -0.4172792136669159, + "logps/rejected": -11.9619722366333, + "loss": 0.5544, + "nll_loss": 0.5954374074935913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04172792285680771, + "rewards/margins": 1.15446937084198, + "rewards/rejected": -1.196197271347046, + "step": 527 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.6670281887054443, + "learning_rate": 3.294194390084801e-05, + "log_odds_chosen": 11.795117378234863, + "log_odds_ratio": -0.040910035371780396, + "logits/chosen": -1.0046261548995972, + "logits/rejected": -1.1516849994659424, + "logps/chosen": -0.4177377223968506, + "logps/rejected": -11.142871856689453, + "loss": 0.544, + "nll_loss": 0.5146663188934326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04177376627922058, + "rewards/margins": 1.0725135803222656, + "rewards/rejected": -1.114287257194519, + "step": 528 + }, + { + "epoch": 1.0342130987292277, + "grad_norm": 0.6901669502258301, + "learning_rate": 3.290932811480757e-05, + "log_odds_chosen": 13.088350296020508, + "log_odds_ratio": -0.03518122434616089, + "logits/chosen": -1.2674661874771118, + "logits/rejected": -1.1873856782913208, + "logps/chosen": -0.3623931407928467, + "logps/rejected": -12.16550064086914, + "loss": 0.5458, + "nll_loss": 0.4658421277999878, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03623931109905243, + "rewards/margins": 1.1803107261657715, + "rewards/rejected": -1.2165501117706299, + "step": 529 + }, + { + "epoch": 1.0361681329423265, + "grad_norm": 0.7040908336639404, + "learning_rate": 3.287671232876712e-05, + "log_odds_chosen": 15.031644821166992, + "log_odds_ratio": -0.003988980781286955, + "logits/chosen": -1.2011340856552124, + "logits/rejected": -1.3059028387069702, + "logps/chosen": -0.469357967376709, + "logps/rejected": -14.443739891052246, + "loss": 0.5734, + "nll_loss": 0.6003106832504272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0469357967376709, + "rewards/margins": 1.3974381685256958, + "rewards/rejected": -1.4443739652633667, + "step": 530 + }, + { + "epoch": 1.0381231671554252, + "grad_norm": 0.7119730114936829, + "learning_rate": 3.284409654272668e-05, + "log_odds_chosen": 9.672428131103516, + "log_odds_ratio": -0.0682624876499176, + "logits/chosen": -1.1971259117126465, + "logits/rejected": -1.043594479560852, + "logps/chosen": -0.3947277367115021, + "logps/rejected": -9.012441635131836, + "loss": 0.5655, + "nll_loss": 0.5557081699371338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03947277367115021, + "rewards/margins": 0.8617714047431946, + "rewards/rejected": -0.9012442231178284, + "step": 531 + }, + { + "epoch": 1.040078201368524, + "grad_norm": 0.6694727540016174, + "learning_rate": 3.281148075668624e-05, + "log_odds_chosen": 8.300237655639648, + "log_odds_ratio": -0.046774499118328094, + "logits/chosen": -1.2003533840179443, + "logits/rejected": -1.4628247022628784, + "logps/chosen": -0.32168692350387573, + "logps/rejected": -7.231645584106445, + "loss": 0.5299, + "nll_loss": 0.5299124121665955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03216869384050369, + "rewards/margins": 0.6909958720207214, + "rewards/rejected": -0.7231645584106445, + "step": 532 + }, + { + "epoch": 1.0420332355816226, + "grad_norm": 0.7117211222648621, + "learning_rate": 3.277886497064579e-05, + "log_odds_chosen": 10.447582244873047, + "log_odds_ratio": -0.07918490469455719, + "logits/chosen": -1.4007325172424316, + "logits/rejected": -1.2994530200958252, + "logps/chosen": -0.3994891047477722, + "logps/rejected": -9.806540489196777, + "loss": 0.5587, + "nll_loss": 0.4758418798446655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03994891047477722, + "rewards/margins": 0.9407051801681519, + "rewards/rejected": -0.9806540012359619, + "step": 533 + }, + { + "epoch": 1.0439882697947214, + "grad_norm": 0.6755963563919067, + "learning_rate": 3.274624918460535e-05, + "log_odds_chosen": 8.697891235351562, + "log_odds_ratio": -0.03937075287103653, + "logits/chosen": -1.1918225288391113, + "logits/rejected": -1.4038922786712646, + "logps/chosen": -0.31803053617477417, + "logps/rejected": -7.749574184417725, + "loss": 0.5493, + "nll_loss": 0.45318442583084106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031803056597709656, + "rewards/margins": 0.7431544065475464, + "rewards/rejected": -0.7749574184417725, + "step": 534 + }, + { + "epoch": 1.04594330400782, + "grad_norm": 0.7110870480537415, + "learning_rate": 3.271363339856491e-05, + "log_odds_chosen": 11.876801490783691, + "log_odds_ratio": -0.02911151945590973, + "logits/chosen": -1.126831293106079, + "logits/rejected": -1.193347692489624, + "logps/chosen": -0.36315515637397766, + "logps/rejected": -11.044620513916016, + "loss": 0.5379, + "nll_loss": 0.4794536828994751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036315519362688065, + "rewards/margins": 1.0681464672088623, + "rewards/rejected": -1.104461908340454, + "step": 535 + }, + { + "epoch": 1.0478983382209188, + "grad_norm": 0.7023019790649414, + "learning_rate": 3.2681017612524463e-05, + "log_odds_chosen": 11.305734634399414, + "log_odds_ratio": -0.06529423594474792, + "logits/chosen": -1.2145642042160034, + "logits/rejected": -1.2164461612701416, + "logps/chosen": -0.44840502738952637, + "logps/rejected": -10.639402389526367, + "loss": 0.5544, + "nll_loss": 0.6084418296813965, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044840507209300995, + "rewards/margins": 1.0190997123718262, + "rewards/rejected": -1.0639402866363525, + "step": 536 + }, + { + "epoch": 1.0498533724340176, + "grad_norm": 0.7144843339920044, + "learning_rate": 3.264840182648402e-05, + "log_odds_chosen": 14.783637046813965, + "log_odds_ratio": -0.03448185324668884, + "logits/chosen": -1.0144182443618774, + "logits/rejected": -1.412582278251648, + "logps/chosen": -0.41798675060272217, + "logps/rejected": -14.021878242492676, + "loss": 0.5454, + "nll_loss": 0.6054961681365967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0417986735701561, + "rewards/margins": 1.3603891134262085, + "rewards/rejected": -1.402187705039978, + "step": 537 + }, + { + "epoch": 1.0518084066471163, + "grad_norm": 0.6829630732536316, + "learning_rate": 3.2615786040443576e-05, + "log_odds_chosen": 11.430675506591797, + "log_odds_ratio": -0.00955939944833517, + "logits/chosen": -1.3890390396118164, + "logits/rejected": -1.45833420753479, + "logps/chosen": -0.2958887219429016, + "logps/rejected": -10.261358261108398, + "loss": 0.546, + "nll_loss": 0.46748554706573486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02958887442946434, + "rewards/margins": 0.9965469837188721, + "rewards/rejected": -1.0261359214782715, + "step": 538 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.683269202709198, + "learning_rate": 3.2583170254403135e-05, + "log_odds_chosen": 12.248570442199707, + "log_odds_ratio": -0.031476087868213654, + "logits/chosen": -0.9693518877029419, + "logits/rejected": -1.303145408630371, + "logps/chosen": -0.4789624810218811, + "logps/rejected": -11.763038635253906, + "loss": 0.547, + "nll_loss": 0.5584403276443481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04789625108242035, + "rewards/margins": 1.1284077167510986, + "rewards/rejected": -1.1763038635253906, + "step": 539 + }, + { + "epoch": 1.0557184750733137, + "grad_norm": 0.6921140551567078, + "learning_rate": 3.255055446836269e-05, + "log_odds_chosen": 9.154491424560547, + "log_odds_ratio": -0.07354243099689484, + "logits/chosen": -1.275766372680664, + "logits/rejected": -1.3661515712738037, + "logps/chosen": -0.4288392663002014, + "logps/rejected": -8.616432189941406, + "loss": 0.5427, + "nll_loss": 0.5374979972839355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04288392886519432, + "rewards/margins": 0.8187593221664429, + "rewards/rejected": -0.8616431951522827, + "step": 540 + }, + { + "epoch": 1.0576735092864125, + "grad_norm": 0.6709725856781006, + "learning_rate": 3.251793868232225e-05, + "log_odds_chosen": 13.948375701904297, + "log_odds_ratio": -0.07739664614200592, + "logits/chosen": -1.175313949584961, + "logits/rejected": -1.3859549760818481, + "logps/chosen": -0.44751498103141785, + "logps/rejected": -13.345067977905273, + "loss": 0.5338, + "nll_loss": 0.5970073938369751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044751495122909546, + "rewards/margins": 1.2897553443908691, + "rewards/rejected": -1.3345067501068115, + "step": 541 + }, + { + "epoch": 1.0596285434995112, + "grad_norm": 0.6923962831497192, + "learning_rate": 3.24853228962818e-05, + "log_odds_chosen": 7.340867042541504, + "log_odds_ratio": -0.06560420989990234, + "logits/chosen": -1.2170090675354004, + "logits/rejected": -1.5940337181091309, + "logps/chosen": -0.34116846323013306, + "logps/rejected": -6.367558479309082, + "loss": 0.539, + "nll_loss": 0.4937642812728882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034116849303245544, + "rewards/margins": 0.6026390194892883, + "rewards/rejected": -0.6367558240890503, + "step": 542 + }, + { + "epoch": 1.06158357771261, + "grad_norm": 0.6840120553970337, + "learning_rate": 3.245270711024136e-05, + "log_odds_chosen": 11.792753219604492, + "log_odds_ratio": -0.032515451312065125, + "logits/chosen": -1.095882773399353, + "logits/rejected": -1.1006028652191162, + "logps/chosen": -0.2835473418235779, + "logps/rejected": -10.707277297973633, + "loss": 0.5458, + "nll_loss": 0.451333612203598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028354734182357788, + "rewards/margins": 1.0423729419708252, + "rewards/rejected": -1.0707277059555054, + "step": 543 + }, + { + "epoch": 1.0635386119257086, + "grad_norm": 0.6700659394264221, + "learning_rate": 3.242009132420091e-05, + "log_odds_chosen": 15.382213592529297, + "log_odds_ratio": -0.013288706541061401, + "logits/chosen": -1.2952792644500732, + "logits/rejected": -1.1046547889709473, + "logps/chosen": -0.34569448232650757, + "logps/rejected": -14.40819263458252, + "loss": 0.5423, + "nll_loss": 0.49983417987823486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034569449722766876, + "rewards/margins": 1.406249761581421, + "rewards/rejected": -1.440819263458252, + "step": 544 + }, + { + "epoch": 1.0654936461388074, + "grad_norm": 0.7043254375457764, + "learning_rate": 3.238747553816047e-05, + "log_odds_chosen": 13.0176362991333, + "log_odds_ratio": -0.04289277642965317, + "logits/chosen": -1.3542534112930298, + "logits/rejected": -1.229292869567871, + "logps/chosen": -0.32851940393447876, + "logps/rejected": -12.03873062133789, + "loss": 0.5486, + "nll_loss": 0.4382115304470062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032851941883563995, + "rewards/margins": 1.1710212230682373, + "rewards/rejected": -1.2038731575012207, + "step": 545 + }, + { + "epoch": 1.067448680351906, + "grad_norm": 0.6877513527870178, + "learning_rate": 3.2354859752120024e-05, + "log_odds_chosen": 11.220157623291016, + "log_odds_ratio": -0.12777918577194214, + "logits/chosen": -1.1558358669281006, + "logits/rejected": -1.2094560861587524, + "logps/chosen": -0.5233843326568604, + "logps/rejected": -10.81214714050293, + "loss": 0.5295, + "nll_loss": 0.6049879789352417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.052338436245918274, + "rewards/margins": 1.0288761854171753, + "rewards/rejected": -1.0812147855758667, + "step": 546 + }, + { + "epoch": 1.0694037145650048, + "grad_norm": 0.7113047242164612, + "learning_rate": 3.2322243966079584e-05, + "log_odds_chosen": 16.577796936035156, + "log_odds_ratio": -0.018015379086136818, + "logits/chosen": -1.2426466941833496, + "logits/rejected": -1.2918283939361572, + "logps/chosen": -0.37603893876075745, + "logps/rejected": -15.564404487609863, + "loss": 0.5318, + "nll_loss": 0.41790562868118286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037603892385959625, + "rewards/margins": 1.518836498260498, + "rewards/rejected": -1.5564404726028442, + "step": 547 + }, + { + "epoch": 1.0713587487781036, + "grad_norm": 0.6977596282958984, + "learning_rate": 3.2289628180039136e-05, + "log_odds_chosen": 5.39865255355835, + "log_odds_ratio": -0.07124398648738861, + "logits/chosen": -1.144812822341919, + "logits/rejected": -1.2033029794692993, + "logps/chosen": -0.45391327142715454, + "logps/rejected": -4.846343994140625, + "loss": 0.5344, + "nll_loss": 0.5399352312088013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.045391328632831573, + "rewards/margins": 0.4392430782318115, + "rewards/rejected": -0.4846343994140625, + "step": 548 + }, + { + "epoch": 1.0733137829912023, + "grad_norm": 0.7320883274078369, + "learning_rate": 3.2257012393998696e-05, + "log_odds_chosen": 12.314353942871094, + "log_odds_ratio": -0.05897014960646629, + "logits/chosen": -1.2041431665420532, + "logits/rejected": -1.2022807598114014, + "logps/chosen": -0.4238230288028717, + "logps/rejected": -11.640092849731445, + "loss": 0.5502, + "nll_loss": 0.4795885384082794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04238230362534523, + "rewards/margins": 1.1216270923614502, + "rewards/rejected": -1.1640092134475708, + "step": 549 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.6893203854560852, + "learning_rate": 3.222439660795825e-05, + "log_odds_chosen": 14.289182662963867, + "log_odds_ratio": -0.05256461352109909, + "logits/chosen": -1.2857224941253662, + "logits/rejected": -1.4202388525009155, + "logps/chosen": -0.44474607706069946, + "logps/rejected": -13.696512222290039, + "loss": 0.529, + "nll_loss": 0.5077416896820068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044474609196186066, + "rewards/margins": 1.32517671585083, + "rewards/rejected": -1.3696513175964355, + "step": 550 + }, + { + "epoch": 1.0772238514173997, + "grad_norm": 0.72184818983078, + "learning_rate": 3.219178082191781e-05, + "log_odds_chosen": 13.797950744628906, + "log_odds_ratio": -0.10283517092466354, + "logits/chosen": -0.9902523756027222, + "logits/rejected": -1.2583602666854858, + "logps/chosen": -0.34698641300201416, + "logps/rejected": -12.802715301513672, + "loss": 0.5413, + "nll_loss": 0.5302708148956299, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.03469863906502724, + "rewards/margins": 1.2455730438232422, + "rewards/rejected": -1.2802716493606567, + "step": 551 + }, + { + "epoch": 1.0791788856304985, + "grad_norm": 0.6994834542274475, + "learning_rate": 3.215916503587737e-05, + "log_odds_chosen": 14.94234848022461, + "log_odds_ratio": -0.03796517103910446, + "logits/chosen": -0.9767404198646545, + "logits/rejected": -1.014824628829956, + "logps/chosen": -0.32797184586524963, + "logps/rejected": -13.924799919128418, + "loss": 0.5263, + "nll_loss": 0.5124338269233704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032797183841466904, + "rewards/margins": 1.3596829175949097, + "rewards/rejected": -1.3924800157546997, + "step": 552 + }, + { + "epoch": 1.0811339198435972, + "grad_norm": 0.6952031850814819, + "learning_rate": 3.212654924983692e-05, + "log_odds_chosen": 8.958312034606934, + "log_odds_ratio": -0.045773111283779144, + "logits/chosen": -1.3227370977401733, + "logits/rejected": -1.3874489068984985, + "logps/chosen": -0.35068944096565247, + "logps/rejected": -8.036859512329102, + "loss": 0.5301, + "nll_loss": 0.5018061399459839, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03506894409656525, + "rewards/margins": 0.7686170339584351, + "rewards/rejected": -0.8036860227584839, + "step": 553 + }, + { + "epoch": 1.083088954056696, + "grad_norm": 0.7202411890029907, + "learning_rate": 3.209393346379648e-05, + "log_odds_chosen": 7.0508317947387695, + "log_odds_ratio": -0.0827040821313858, + "logits/chosen": -1.098850131034851, + "logits/rejected": -1.2289674282073975, + "logps/chosen": -0.49654215574264526, + "logps/rejected": -6.623836517333984, + "loss": 0.5357, + "nll_loss": 0.7478981018066406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.049654215574264526, + "rewards/margins": 0.6127294301986694, + "rewards/rejected": -0.6623836755752563, + "step": 554 + }, + { + "epoch": 1.0850439882697946, + "grad_norm": 0.6967370510101318, + "learning_rate": 3.206131767775604e-05, + "log_odds_chosen": 16.032421112060547, + "log_odds_ratio": -0.024062249809503555, + "logits/chosen": -1.271820068359375, + "logits/rejected": -1.3724161386489868, + "logps/chosen": -0.35015469789505005, + "logps/rejected": -15.136449813842773, + "loss": 0.5363, + "nll_loss": 0.4483514428138733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03501546382904053, + "rewards/margins": 1.4786297082901, + "rewards/rejected": -1.513645052909851, + "step": 555 + }, + { + "epoch": 1.0869990224828934, + "grad_norm": 0.7119446992874146, + "learning_rate": 3.202870189171559e-05, + "log_odds_chosen": 18.018627166748047, + "log_odds_ratio": -0.03067036345601082, + "logits/chosen": -1.2066770792007446, + "logits/rejected": -1.3428841829299927, + "logps/chosen": -0.34607210755348206, + "logps/rejected": -17.067707061767578, + "loss": 0.5284, + "nll_loss": 0.5990719199180603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034607209265232086, + "rewards/margins": 1.672163486480713, + "rewards/rejected": -1.706770658493042, + "step": 556 + }, + { + "epoch": 1.088954056695992, + "grad_norm": 0.6883905529975891, + "learning_rate": 3.199608610567515e-05, + "log_odds_chosen": 18.016281127929688, + "log_odds_ratio": -0.03948044404387474, + "logits/chosen": -1.3215306997299194, + "logits/rejected": -1.2406947612762451, + "logps/chosen": -0.3850085735321045, + "logps/rejected": -17.198715209960938, + "loss": 0.5282, + "nll_loss": 0.5578354001045227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03850086033344269, + "rewards/margins": 1.6813706159591675, + "rewards/rejected": -1.7198715209960938, + "step": 557 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.6733434200286865, + "learning_rate": 3.1963470319634704e-05, + "log_odds_chosen": 13.153863906860352, + "log_odds_ratio": -0.0506766140460968, + "logits/chosen": -1.1983219385147095, + "logits/rejected": -1.2215434312820435, + "logps/chosen": -0.2388143390417099, + "logps/rejected": -11.66224479675293, + "loss": 0.5237, + "nll_loss": 0.37424132227897644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02388143539428711, + "rewards/margins": 1.1423431634902954, + "rewards/rejected": -1.166224479675293, + "step": 558 + }, + { + "epoch": 1.0928641251221896, + "grad_norm": 0.7105990648269653, + "learning_rate": 3.1930854533594264e-05, + "log_odds_chosen": 13.562946319580078, + "log_odds_ratio": -0.016283497214317322, + "logits/chosen": -1.2434886693954468, + "logits/rejected": -1.3991384506225586, + "logps/chosen": -0.46901100873947144, + "logps/rejected": -12.866233825683594, + "loss": 0.5462, + "nll_loss": 0.6225215196609497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.046901099383831024, + "rewards/margins": 1.2397222518920898, + "rewards/rejected": -1.286623477935791, + "step": 559 + }, + { + "epoch": 1.0948191593352883, + "grad_norm": 0.7036073207855225, + "learning_rate": 3.1898238747553816e-05, + "log_odds_chosen": 16.432575225830078, + "log_odds_ratio": -0.033672165125608444, + "logits/chosen": -1.0355465412139893, + "logits/rejected": -1.065449595451355, + "logps/chosen": -0.4885258674621582, + "logps/rejected": -15.91482162475586, + "loss": 0.5063, + "nll_loss": 0.6431280374526978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04885258898139, + "rewards/margins": 1.5426297187805176, + "rewards/rejected": -1.591482162475586, + "step": 560 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.7107883095741272, + "learning_rate": 3.1865622961513376e-05, + "log_odds_chosen": 7.7636213302612305, + "log_odds_ratio": -0.05903831869363785, + "logits/chosen": -1.3951995372772217, + "logits/rejected": -1.1549842357635498, + "logps/chosen": -0.44055449962615967, + "logps/rejected": -7.213000297546387, + "loss": 0.5376, + "nll_loss": 0.6647042036056519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.044055450707674026, + "rewards/margins": 0.6772446036338806, + "rewards/rejected": -0.7213000059127808, + "step": 561 + }, + { + "epoch": 1.0987292277614857, + "grad_norm": 0.6856012940406799, + "learning_rate": 3.183300717547293e-05, + "log_odds_chosen": 7.57395076751709, + "log_odds_ratio": -0.08745992183685303, + "logits/chosen": -0.9368453025817871, + "logits/rejected": -0.9056733846664429, + "logps/chosen": -0.31217092275619507, + "logps/rejected": -6.750736236572266, + "loss": 0.5163, + "nll_loss": 0.4838142693042755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031217096373438835, + "rewards/margins": 0.6438566446304321, + "rewards/rejected": -0.6750736236572266, + "step": 562 + }, + { + "epoch": 1.1006842619745845, + "grad_norm": 0.7043616771697998, + "learning_rate": 3.180039138943249e-05, + "log_odds_chosen": 10.431768417358398, + "log_odds_ratio": -0.07067019492387772, + "logits/chosen": -1.1926424503326416, + "logits/rejected": -1.447959542274475, + "logps/chosen": -0.5118004679679871, + "logps/rejected": -9.993358612060547, + "loss": 0.5417, + "nll_loss": 0.6071051359176636, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.051180049777030945, + "rewards/margins": 0.9481556415557861, + "rewards/rejected": -0.999335765838623, + "step": 563 + }, + { + "epoch": 1.1026392961876832, + "grad_norm": 0.7084854245185852, + "learning_rate": 3.176777560339204e-05, + "log_odds_chosen": 8.293876647949219, + "log_odds_ratio": -0.04211021214723587, + "logits/chosen": -1.1423194408416748, + "logits/rejected": -1.2194592952728271, + "logps/chosen": -0.22431209683418274, + "logps/rejected": -6.925491809844971, + "loss": 0.5336, + "nll_loss": 0.347864031791687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022431211546063423, + "rewards/margins": 0.670117974281311, + "rewards/rejected": -0.6925491094589233, + "step": 564 + }, + { + "epoch": 1.104594330400782, + "grad_norm": 0.7264560461044312, + "learning_rate": 3.17351598173516e-05, + "log_odds_chosen": 6.5627031326293945, + "log_odds_ratio": -0.0426606684923172, + "logits/chosen": -1.2226468324661255, + "logits/rejected": -1.1318455934524536, + "logps/chosen": -0.3488934636116028, + "logps/rejected": -5.52884578704834, + "loss": 0.5407, + "nll_loss": 0.4795345664024353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034889351576566696, + "rewards/margins": 0.5179951786994934, + "rewards/rejected": -0.552884578704834, + "step": 565 + }, + { + "epoch": 1.1065493646138806, + "grad_norm": 0.7028964757919312, + "learning_rate": 3.170254403131115e-05, + "log_odds_chosen": 9.364984512329102, + "log_odds_ratio": -0.024864906445145607, + "logits/chosen": -1.287439227104187, + "logits/rejected": -1.2594280242919922, + "logps/chosen": -0.38659512996673584, + "logps/rejected": -8.47496223449707, + "loss": 0.5207, + "nll_loss": 0.5063124299049377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03865951672196388, + "rewards/margins": 0.8088367581367493, + "rewards/rejected": -0.8474962115287781, + "step": 566 + }, + { + "epoch": 1.1085043988269794, + "grad_norm": 0.6735405921936035, + "learning_rate": 3.166992824527071e-05, + "log_odds_chosen": 8.940417289733887, + "log_odds_ratio": -0.0367865227162838, + "logits/chosen": -1.288403034210205, + "logits/rejected": -1.3272640705108643, + "logps/chosen": -0.4214792549610138, + "logps/rejected": -8.323944091796875, + "loss": 0.5133, + "nll_loss": 0.5173627138137817, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0421479269862175, + "rewards/margins": 0.7902464866638184, + "rewards/rejected": -0.8323944211006165, + "step": 567 + }, + { + "epoch": 1.110459433040078, + "grad_norm": 0.6842292547225952, + "learning_rate": 3.1637312459230265e-05, + "log_odds_chosen": 14.393380165100098, + "log_odds_ratio": -0.027549002319574356, + "logits/chosen": -1.1086671352386475, + "logits/rejected": -1.2912236452102661, + "logps/chosen": -0.3229108452796936, + "logps/rejected": -13.089828491210938, + "loss": 0.5173, + "nll_loss": 0.4975380301475525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03229108452796936, + "rewards/margins": 1.2766917943954468, + "rewards/rejected": -1.3089828491210938, + "step": 568 + }, + { + "epoch": 1.1124144672531768, + "grad_norm": 0.7527894973754883, + "learning_rate": 3.1604696673189824e-05, + "log_odds_chosen": 13.0013427734375, + "log_odds_ratio": -0.03551366925239563, + "logits/chosen": -1.2478044033050537, + "logits/rejected": -1.1147425174713135, + "logps/chosen": -0.361067533493042, + "logps/rejected": -12.117504119873047, + "loss": 0.5261, + "nll_loss": 0.5435159206390381, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03610675036907196, + "rewards/margins": 1.1756436824798584, + "rewards/rejected": -1.2117505073547363, + "step": 569 + }, + { + "epoch": 1.1143695014662756, + "grad_norm": 0.6705291271209717, + "learning_rate": 3.157208088714938e-05, + "log_odds_chosen": 6.44515323638916, + "log_odds_ratio": -0.05377066880464554, + "logits/chosen": -1.3091009855270386, + "logits/rejected": -1.3556318283081055, + "logps/chosen": -0.36859700083732605, + "logps/rejected": -5.556127548217773, + "loss": 0.5187, + "nll_loss": 0.4482402801513672, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036859698593616486, + "rewards/margins": 0.5187529921531677, + "rewards/rejected": -0.5556127429008484, + "step": 570 + }, + { + "epoch": 1.1163245356793743, + "grad_norm": 0.7180348038673401, + "learning_rate": 3.1539465101108937e-05, + "log_odds_chosen": 11.129253387451172, + "log_odds_ratio": -0.02178550511598587, + "logits/chosen": -1.3172909021377563, + "logits/rejected": -1.3577715158462524, + "logps/chosen": -0.42735034227371216, + "logps/rejected": -10.265910148620605, + "loss": 0.5237, + "nll_loss": 0.59023118019104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.042735032737255096, + "rewards/margins": 0.9838559031486511, + "rewards/rejected": -1.0265910625457764, + "step": 571 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.6943552494049072, + "learning_rate": 3.1506849315068496e-05, + "log_odds_chosen": 15.396892547607422, + "log_odds_ratio": -0.0442228838801384, + "logits/chosen": -1.4774632453918457, + "logits/rejected": -1.355116844177246, + "logps/chosen": -0.2749546468257904, + "logps/rejected": -14.10643196105957, + "loss": 0.512, + "nll_loss": 0.3962975740432739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02749546617269516, + "rewards/margins": 1.3831477165222168, + "rewards/rejected": -1.4106431007385254, + "step": 572 + }, + { + "epoch": 1.1202346041055717, + "grad_norm": 0.7143755555152893, + "learning_rate": 3.147423352902805e-05, + "log_odds_chosen": 15.499427795410156, + "log_odds_ratio": -0.0361870601773262, + "logits/chosen": -1.3851063251495361, + "logits/rejected": -1.3862545490264893, + "logps/chosen": -0.3763030767440796, + "logps/rejected": -14.70671272277832, + "loss": 0.5251, + "nll_loss": 0.5630367994308472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03763031214475632, + "rewards/margins": 1.4330408573150635, + "rewards/rejected": -1.47067129611969, + "step": 573 + }, + { + "epoch": 1.1221896383186705, + "grad_norm": 0.7046847939491272, + "learning_rate": 3.144161774298761e-05, + "log_odds_chosen": 10.53813362121582, + "log_odds_ratio": -0.013507082127034664, + "logits/chosen": -1.1490368843078613, + "logits/rejected": -1.3944499492645264, + "logps/chosen": -0.37122902274131775, + "logps/rejected": -9.593764305114746, + "loss": 0.516, + "nll_loss": 0.5370656251907349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.037122905254364014, + "rewards/margins": 0.9222535490989685, + "rewards/rejected": -0.9593764543533325, + "step": 574 + }, + { + "epoch": 1.1241446725317692, + "grad_norm": 0.7065625190734863, + "learning_rate": 3.140900195694717e-05, + "log_odds_chosen": 16.429847717285156, + "log_odds_ratio": -0.022794222459197044, + "logits/chosen": -1.2918541431427002, + "logits/rejected": -1.1332154273986816, + "logps/chosen": -0.33507153391838074, + "logps/rejected": -15.255571365356445, + "loss": 0.5241, + "nll_loss": 0.441519558429718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033507153391838074, + "rewards/margins": 1.4920499324798584, + "rewards/rejected": -1.525557279586792, + "step": 575 + }, + { + "epoch": 1.126099706744868, + "grad_norm": 0.6642009615898132, + "learning_rate": 3.137638617090672e-05, + "log_odds_chosen": 9.914203643798828, + "log_odds_ratio": -0.01752467080950737, + "logits/chosen": -1.2082018852233887, + "logits/rejected": -1.458463430404663, + "logps/chosen": -0.26531893014907837, + "logps/rejected": -8.478126525878906, + "loss": 0.4978, + "nll_loss": 0.3345639705657959, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026531893759965897, + "rewards/margins": 0.8212807178497314, + "rewards/rejected": -0.8478125929832458, + "step": 576 + }, + { + "epoch": 1.1280547409579667, + "grad_norm": 0.7119777798652649, + "learning_rate": 3.134377038486628e-05, + "log_odds_chosen": 13.766066551208496, + "log_odds_ratio": -0.0703095942735672, + "logits/chosen": -1.2892460823059082, + "logits/rejected": -1.3397712707519531, + "logps/chosen": -0.3707049489021301, + "logps/rejected": -12.795903205871582, + "loss": 0.5377, + "nll_loss": 0.4772912263870239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03707049414515495, + "rewards/margins": 1.2425198554992676, + "rewards/rejected": -1.279590368270874, + "step": 577 + }, + { + "epoch": 1.1300097751710654, + "grad_norm": 0.6699366569519043, + "learning_rate": 3.131115459882583e-05, + "log_odds_chosen": 13.515551567077637, + "log_odds_ratio": -0.02435864694416523, + "logits/chosen": -1.1462757587432861, + "logits/rejected": -1.1960759162902832, + "logps/chosen": -0.3546561598777771, + "logps/rejected": -12.646808624267578, + "loss": 0.515, + "nll_loss": 0.47618603706359863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03546561673283577, + "rewards/margins": 1.229215383529663, + "rewards/rejected": -1.2646808624267578, + "step": 578 + }, + { + "epoch": 1.131964809384164, + "grad_norm": 0.669596791267395, + "learning_rate": 3.127853881278539e-05, + "log_odds_chosen": 10.534242630004883, + "log_odds_ratio": -0.04878669232130051, + "logits/chosen": -1.1447434425354004, + "logits/rejected": -1.3214621543884277, + "logps/chosen": -0.5099708437919617, + "logps/rejected": -10.030635833740234, + "loss": 0.5224, + "nll_loss": 0.6599740982055664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.050997085869312286, + "rewards/margins": 0.9520665407180786, + "rewards/rejected": -1.003063678741455, + "step": 579 + }, + { + "epoch": 1.1339198435972628, + "grad_norm": 0.6706910133361816, + "learning_rate": 3.1245923026744945e-05, + "log_odds_chosen": 10.958101272583008, + "log_odds_ratio": -0.02519639953970909, + "logits/chosen": -1.0306525230407715, + "logits/rejected": -1.1941007375717163, + "logps/chosen": -0.3885287940502167, + "logps/rejected": -10.05328369140625, + "loss": 0.5052, + "nll_loss": 0.4976595938205719, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03885287791490555, + "rewards/margins": 0.9664755463600159, + "rewards/rejected": -1.0053284168243408, + "step": 580 + }, + { + "epoch": 1.1358748778103616, + "grad_norm": 0.682305097579956, + "learning_rate": 3.1213307240704504e-05, + "log_odds_chosen": 15.715177536010742, + "log_odds_ratio": -0.03887879103422165, + "logits/chosen": -1.1585731506347656, + "logits/rejected": -0.9497904777526855, + "logps/chosen": -0.45146214962005615, + "logps/rejected": -14.902458190917969, + "loss": 0.5203, + "nll_loss": 0.6459730863571167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.045146211981773376, + "rewards/margins": 1.4450995922088623, + "rewards/rejected": -1.4902458190917969, + "step": 581 + }, + { + "epoch": 1.1378299120234603, + "grad_norm": 0.6825368404388428, + "learning_rate": 3.118069145466406e-05, + "log_odds_chosen": 12.741348266601562, + "log_odds_ratio": -0.029333284124732018, + "logits/chosen": -1.1049169301986694, + "logits/rejected": -1.0866111516952515, + "logps/chosen": -0.31738120317459106, + "logps/rejected": -11.639058113098145, + "loss": 0.5051, + "nll_loss": 0.5536911487579346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031738121062517166, + "rewards/margins": 1.1321678161621094, + "rewards/rejected": -1.1639058589935303, + "step": 582 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.6886694431304932, + "learning_rate": 3.1148075668623616e-05, + "log_odds_chosen": 15.84097671508789, + "log_odds_ratio": -0.045729588717222214, + "logits/chosen": -1.3036367893218994, + "logits/rejected": -1.3578777313232422, + "logps/chosen": -0.38074544072151184, + "logps/rejected": -14.98077392578125, + "loss": 0.5214, + "nll_loss": 0.4224063754081726, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0380745455622673, + "rewards/margins": 1.4600028991699219, + "rewards/rejected": -1.498077392578125, + "step": 583 + }, + { + "epoch": 1.141739980449658, + "grad_norm": 0.6858547925949097, + "learning_rate": 3.111545988258317e-05, + "log_odds_chosen": 8.48341178894043, + "log_odds_ratio": -0.023123137652873993, + "logits/chosen": -1.2775874137878418, + "logits/rejected": -1.2759900093078613, + "logps/chosen": -0.31076350808143616, + "logps/rejected": -7.445518493652344, + "loss": 0.5239, + "nll_loss": 0.35960423946380615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031076349318027496, + "rewards/margins": 0.7134755253791809, + "rewards/rejected": -0.7445518374443054, + "step": 584 + }, + { + "epoch": 1.1436950146627567, + "grad_norm": 0.6715458631515503, + "learning_rate": 3.108284409654273e-05, + "log_odds_chosen": 11.067577362060547, + "log_odds_ratio": -0.03562592715024948, + "logits/chosen": -1.3752801418304443, + "logits/rejected": -1.2634303569793701, + "logps/chosen": -0.22153347730636597, + "logps/rejected": -9.608011245727539, + "loss": 0.512, + "nll_loss": 0.34159091114997864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022153349593281746, + "rewards/margins": 0.9386476278305054, + "rewards/rejected": -0.9608010053634644, + "step": 585 + }, + { + "epoch": 1.1456500488758554, + "grad_norm": 0.6973502039909363, + "learning_rate": 3.105022831050228e-05, + "log_odds_chosen": 14.120711326599121, + "log_odds_ratio": -0.05424219369888306, + "logits/chosen": -1.131150722503662, + "logits/rejected": -1.1628100872039795, + "logps/chosen": -0.379143625497818, + "logps/rejected": -13.239175796508789, + "loss": 0.522, + "nll_loss": 0.5332534313201904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03791436180472374, + "rewards/margins": 1.2860032320022583, + "rewards/rejected": -1.3239176273345947, + "step": 586 + }, + { + "epoch": 1.1476050830889541, + "grad_norm": 0.6694691181182861, + "learning_rate": 3.101761252446184e-05, + "log_odds_chosen": 12.131752014160156, + "log_odds_ratio": -0.03699393942952156, + "logits/chosen": -1.185821533203125, + "logits/rejected": -1.435551643371582, + "logps/chosen": -0.42869752645492554, + "logps/rejected": -11.360295295715332, + "loss": 0.5074, + "nll_loss": 0.47389888763427734, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04286975413560867, + "rewards/margins": 1.0931599140167236, + "rewards/rejected": -1.1360294818878174, + "step": 587 + }, + { + "epoch": 1.1495601173020529, + "grad_norm": 0.6840605139732361, + "learning_rate": 3.0984996738421393e-05, + "log_odds_chosen": 13.850370407104492, + "log_odds_ratio": -0.031042661517858505, + "logits/chosen": -1.2748734951019287, + "logits/rejected": -1.232447624206543, + "logps/chosen": -0.2862948775291443, + "logps/rejected": -12.741326332092285, + "loss": 0.521, + "nll_loss": 0.42648833990097046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028629489243030548, + "rewards/margins": 1.2455031871795654, + "rewards/rejected": -1.2741327285766602, + "step": 588 + }, + { + "epoch": 1.1515151515151516, + "grad_norm": 0.687702476978302, + "learning_rate": 3.095238095238095e-05, + "log_odds_chosen": 14.121519088745117, + "log_odds_ratio": -0.028046593070030212, + "logits/chosen": -1.3080593347549438, + "logits/rejected": -1.0954368114471436, + "logps/chosen": -0.3057152330875397, + "logps/rejected": -12.948552131652832, + "loss": 0.5121, + "nll_loss": 0.4001719355583191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03057152032852173, + "rewards/margins": 1.2642838954925537, + "rewards/rejected": -1.2948553562164307, + "step": 589 + }, + { + "epoch": 1.1534701857282503, + "grad_norm": 0.6754181385040283, + "learning_rate": 3.0919765166340506e-05, + "log_odds_chosen": 17.586528778076172, + "log_odds_ratio": -0.06126530095934868, + "logits/chosen": -1.1863524913787842, + "logits/rejected": -1.1510984897613525, + "logps/chosen": -0.48948776721954346, + "logps/rejected": -16.953960418701172, + "loss": 0.5056, + "nll_loss": 0.6423881649971008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048948779702186584, + "rewards/margins": 1.6464471817016602, + "rewards/rejected": -1.6953959465026855, + "step": 590 + }, + { + "epoch": 1.155425219941349, + "grad_norm": 0.6703155040740967, + "learning_rate": 3.0887149380300065e-05, + "log_odds_chosen": 11.499078750610352, + "log_odds_ratio": -0.01628708839416504, + "logits/chosen": -1.2606127262115479, + "logits/rejected": -1.520026445388794, + "logps/chosen": -0.41044285893440247, + "logps/rejected": -10.705459594726562, + "loss": 0.5136, + "nll_loss": 0.5148156881332397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.041044287383556366, + "rewards/margins": 1.0295016765594482, + "rewards/rejected": -1.0705459117889404, + "step": 591 + }, + { + "epoch": 1.1573802541544478, + "grad_norm": 0.6608515977859497, + "learning_rate": 3.0854533594259625e-05, + "log_odds_chosen": 15.525111198425293, + "log_odds_ratio": -0.0068309069611132145, + "logits/chosen": -1.2245395183563232, + "logits/rejected": -1.2850282192230225, + "logps/chosen": -0.2970283031463623, + "logps/rejected": -14.311182022094727, + "loss": 0.4858, + "nll_loss": 0.41126614809036255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02970283105969429, + "rewards/margins": 1.4014153480529785, + "rewards/rejected": -1.4311182498931885, + "step": 592 + }, + { + "epoch": 1.1593352883675465, + "grad_norm": 0.6854323744773865, + "learning_rate": 3.082191780821918e-05, + "log_odds_chosen": 15.689228057861328, + "log_odds_ratio": -0.028646137565374374, + "logits/chosen": -1.2376590967178345, + "logits/rejected": -1.1331348419189453, + "logps/chosen": -0.5346788167953491, + "logps/rejected": -15.194520950317383, + "loss": 0.5099, + "nll_loss": 0.5814070701599121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05346788093447685, + "rewards/margins": 1.4659843444824219, + "rewards/rejected": -1.5194520950317383, + "step": 593 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.7067267298698425, + "learning_rate": 3.078930202217874e-05, + "log_odds_chosen": 10.812814712524414, + "log_odds_ratio": -0.036329273134469986, + "logits/chosen": -1.237723708152771, + "logits/rejected": -1.1733622550964355, + "logps/chosen": -0.3312470316886902, + "logps/rejected": -9.708669662475586, + "loss": 0.5275, + "nll_loss": 0.4923359751701355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03312470018863678, + "rewards/margins": 0.9377422332763672, + "rewards/rejected": -0.9708669781684875, + "step": 594 + }, + { + "epoch": 1.163245356793744, + "grad_norm": 0.6914932727813721, + "learning_rate": 3.0756686236138296e-05, + "log_odds_chosen": 9.041374206542969, + "log_odds_ratio": -0.08107767254114151, + "logits/chosen": -1.2793008089065552, + "logits/rejected": -1.244995355606079, + "logps/chosen": -0.4273708164691925, + "logps/rejected": -8.25271987915039, + "loss": 0.5243, + "nll_loss": 0.4971688389778137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04273707792162895, + "rewards/margins": 0.7825350165367126, + "rewards/rejected": -0.8252719640731812, + "step": 595 + }, + { + "epoch": 1.1652003910068427, + "grad_norm": 0.6935763955116272, + "learning_rate": 3.072407045009785e-05, + "log_odds_chosen": 15.705638885498047, + "log_odds_ratio": -0.033864688128232956, + "logits/chosen": -1.3252320289611816, + "logits/rejected": -1.1299914121627808, + "logps/chosen": -0.4026493430137634, + "logps/rejected": -14.950026512145996, + "loss": 0.5228, + "nll_loss": 0.43867504596710205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04026493430137634, + "rewards/margins": 1.454737663269043, + "rewards/rejected": -1.4950025081634521, + "step": 596 + }, + { + "epoch": 1.1671554252199414, + "grad_norm": 0.6657503843307495, + "learning_rate": 3.069145466405741e-05, + "log_odds_chosen": 17.28795623779297, + "log_odds_ratio": -0.024968577548861504, + "logits/chosen": -1.2172918319702148, + "logits/rejected": -1.0929619073867798, + "logps/chosen": -0.4135543704032898, + "logps/rejected": -16.645137786865234, + "loss": 0.5002, + "nll_loss": 0.5116530060768127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0413554385304451, + "rewards/margins": 1.6231582164764404, + "rewards/rejected": -1.6645138263702393, + "step": 597 + }, + { + "epoch": 1.1691104594330402, + "grad_norm": 0.670772135257721, + "learning_rate": 3.065883887801696e-05, + "log_odds_chosen": 11.530031204223633, + "log_odds_ratio": -0.057504408061504364, + "logits/chosen": -1.338998794555664, + "logits/rejected": -1.4112985134124756, + "logps/chosen": -0.3730660676956177, + "logps/rejected": -10.691730499267578, + "loss": 0.477, + "nll_loss": 0.4674760103225708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03730660676956177, + "rewards/margins": 1.031866431236267, + "rewards/rejected": -1.069172978401184, + "step": 598 + }, + { + "epoch": 1.1710654936461389, + "grad_norm": 0.7249093651771545, + "learning_rate": 3.062622309197652e-05, + "log_odds_chosen": 12.81834602355957, + "log_odds_ratio": -0.02733590453863144, + "logits/chosen": -1.3342599868774414, + "logits/rejected": -1.342078447341919, + "logps/chosen": -0.27637308835983276, + "logps/rejected": -11.635597229003906, + "loss": 0.5082, + "nll_loss": 0.3931330442428589, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027637306600809097, + "rewards/margins": 1.1359224319458008, + "rewards/rejected": -1.1635597944259644, + "step": 599 + }, + { + "epoch": 1.1730205278592376, + "grad_norm": 0.6766085028648376, + "learning_rate": 3.059360730593607e-05, + "log_odds_chosen": 16.905973434448242, + "log_odds_ratio": -0.007352971937507391, + "logits/chosen": -1.1833713054656982, + "logits/rejected": -1.1633350849151611, + "logps/chosen": -0.32637882232666016, + "logps/rejected": -15.859077453613281, + "loss": 0.4854, + "nll_loss": 0.4452570080757141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032637882977724075, + "rewards/margins": 1.553269863128662, + "rewards/rejected": -1.5859076976776123, + "step": 600 + }, + { + "epoch": 1.1749755620723363, + "grad_norm": 0.7193933725357056, + "learning_rate": 3.056099151989563e-05, + "log_odds_chosen": 10.232939720153809, + "log_odds_ratio": -0.08106856048107147, + "logits/chosen": -1.3249483108520508, + "logits/rejected": -1.4669016599655151, + "logps/chosen": -0.49335503578186035, + "logps/rejected": -9.74928092956543, + "loss": 0.5121, + "nll_loss": 0.5962625741958618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04933550953865051, + "rewards/margins": 0.9255925416946411, + "rewards/rejected": -0.9749280214309692, + "step": 601 + }, + { + "epoch": 1.176930596285435, + "grad_norm": 0.6756731867790222, + "learning_rate": 3.0528375733855185e-05, + "log_odds_chosen": 12.018394470214844, + "log_odds_ratio": -0.02412068098783493, + "logits/chosen": -1.3392541408538818, + "logits/rejected": -1.3443951606750488, + "logps/chosen": -0.36708617210388184, + "logps/rejected": -11.070291519165039, + "loss": 0.4935, + "nll_loss": 0.5126000046730042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036708615720272064, + "rewards/margins": 1.070320725440979, + "rewards/rejected": -1.1070291996002197, + "step": 602 + }, + { + "epoch": 1.1788856304985338, + "grad_norm": 0.686643660068512, + "learning_rate": 3.0495759947814745e-05, + "log_odds_chosen": 6.989504337310791, + "log_odds_ratio": -0.11388899385929108, + "logits/chosen": -1.2013182640075684, + "logits/rejected": -1.0866663455963135, + "logps/chosen": -0.3404088020324707, + "logps/rejected": -6.080774307250977, + "loss": 0.5025, + "nll_loss": 0.47317418456077576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03404087573289871, + "rewards/margins": 0.5740365386009216, + "rewards/rejected": -0.6080774664878845, + "step": 603 + }, + { + "epoch": 1.1808406647116325, + "grad_norm": 0.6724377274513245, + "learning_rate": 3.0463144161774298e-05, + "log_odds_chosen": 9.481629371643066, + "log_odds_ratio": -0.036036405712366104, + "logits/chosen": -1.269179105758667, + "logits/rejected": -1.3989903926849365, + "logps/chosen": -0.29628556966781616, + "logps/rejected": -8.261026382446289, + "loss": 0.4827, + "nll_loss": 0.3536837697029114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029628558084368706, + "rewards/margins": 0.796474039554596, + "rewards/rejected": -0.826102614402771, + "step": 604 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.6539981365203857, + "learning_rate": 3.0430528375733857e-05, + "log_odds_chosen": 9.623355865478516, + "log_odds_ratio": -0.006067922338843346, + "logits/chosen": -1.1708321571350098, + "logits/rejected": -1.357384443283081, + "logps/chosen": -0.21218755841255188, + "logps/rejected": -7.909640312194824, + "loss": 0.479, + "nll_loss": 0.4021824300289154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021218756213784218, + "rewards/margins": 0.7697452306747437, + "rewards/rejected": -0.7909640073776245, + "step": 605 + }, + { + "epoch": 1.18475073313783, + "grad_norm": 0.706014096736908, + "learning_rate": 3.039791258969341e-05, + "log_odds_chosen": 9.286947250366211, + "log_odds_ratio": -0.0416153222322464, + "logits/chosen": -1.2122690677642822, + "logits/rejected": -1.391201138496399, + "logps/chosen": -0.33020246028900146, + "logps/rejected": -8.245321273803711, + "loss": 0.5035, + "nll_loss": 0.5563122034072876, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033020250499248505, + "rewards/margins": 0.7915118932723999, + "rewards/rejected": -0.824532151222229, + "step": 606 + }, + { + "epoch": 1.1867057673509287, + "grad_norm": 0.682781457901001, + "learning_rate": 3.036529680365297e-05, + "log_odds_chosen": 11.931504249572754, + "log_odds_ratio": -0.009104878641664982, + "logits/chosen": -1.3080742359161377, + "logits/rejected": -1.4904873371124268, + "logps/chosen": -0.4480544924736023, + "logps/rejected": -11.088150024414062, + "loss": 0.5006, + "nll_loss": 0.48746761679649353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04480545222759247, + "rewards/margins": 1.0640095472335815, + "rewards/rejected": -1.1088149547576904, + "step": 607 + }, + { + "epoch": 1.1886608015640274, + "grad_norm": 0.6781203746795654, + "learning_rate": 3.0332681017612525e-05, + "log_odds_chosen": 7.457338333129883, + "log_odds_ratio": -0.03877297788858414, + "logits/chosen": -1.1961653232574463, + "logits/rejected": -1.2879431247711182, + "logps/chosen": -0.5392236113548279, + "logps/rejected": -6.833890914916992, + "loss": 0.4988, + "nll_loss": 0.7534139752388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05392236262559891, + "rewards/margins": 0.6294667720794678, + "rewards/rejected": -0.6833891272544861, + "step": 608 + }, + { + "epoch": 1.1906158357771262, + "grad_norm": 0.6766337156295776, + "learning_rate": 3.0300065231572085e-05, + "log_odds_chosen": 9.583624839782715, + "log_odds_ratio": -0.10929644107818604, + "logits/chosen": -1.0764710903167725, + "logits/rejected": -0.9956393241882324, + "logps/chosen": -0.34713828563690186, + "logps/rejected": -8.586764335632324, + "loss": 0.4757, + "nll_loss": 0.5409311652183533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034713830798864365, + "rewards/margins": 0.8239625692367554, + "rewards/rejected": -0.8586764335632324, + "step": 609 + }, + { + "epoch": 1.1925708699902249, + "grad_norm": 0.6756924986839294, + "learning_rate": 3.0267449445531637e-05, + "log_odds_chosen": 11.016217231750488, + "log_odds_ratio": -0.04428723454475403, + "logits/chosen": -1.3474490642547607, + "logits/rejected": -1.4402854442596436, + "logps/chosen": -0.31826648116111755, + "logps/rejected": -9.96047306060791, + "loss": 0.4889, + "nll_loss": 0.4377318322658539, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031826648861169815, + "rewards/margins": 0.9642205834388733, + "rewards/rejected": -0.9960472583770752, + "step": 610 + }, + { + "epoch": 1.1945259042033236, + "grad_norm": 0.7104191780090332, + "learning_rate": 3.0234833659491197e-05, + "log_odds_chosen": 6.503749847412109, + "log_odds_ratio": -0.03614040091633797, + "logits/chosen": -1.4282678365707397, + "logits/rejected": -1.4176368713378906, + "logps/chosen": -0.33994317054748535, + "logps/rejected": -5.621044158935547, + "loss": 0.4846, + "nll_loss": 0.4174997806549072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033994320780038834, + "rewards/margins": 0.5281100869178772, + "rewards/rejected": -0.5621044039726257, + "step": 611 + }, + { + "epoch": 1.1964809384164223, + "grad_norm": 0.7001104354858398, + "learning_rate": 3.020221787345075e-05, + "log_odds_chosen": 12.581290245056152, + "log_odds_ratio": -0.05753454193472862, + "logits/chosen": -1.2868602275848389, + "logits/rejected": -1.3619754314422607, + "logps/chosen": -0.3667372167110443, + "logps/rejected": -11.558197021484375, + "loss": 0.4929, + "nll_loss": 0.49347567558288574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03667372465133667, + "rewards/margins": 1.1191458702087402, + "rewards/rejected": -1.1558196544647217, + "step": 612 + }, + { + "epoch": 1.198435972629521, + "grad_norm": 0.6810599565505981, + "learning_rate": 3.016960208741031e-05, + "log_odds_chosen": 16.42742347717285, + "log_odds_ratio": -0.015045564621686935, + "logits/chosen": -1.2534525394439697, + "logits/rejected": -1.307205080986023, + "logps/chosen": -0.3434114456176758, + "logps/rejected": -15.441001892089844, + "loss": 0.4899, + "nll_loss": 0.45075052976608276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03434114530682564, + "rewards/margins": 1.5097589492797852, + "rewards/rejected": -1.5441001653671265, + "step": 613 + }, + { + "epoch": 1.2003910068426198, + "grad_norm": 0.6872751116752625, + "learning_rate": 3.0136986301369862e-05, + "log_odds_chosen": 14.65786361694336, + "log_odds_ratio": -0.058811116963624954, + "logits/chosen": -1.4515619277954102, + "logits/rejected": -1.247408390045166, + "logps/chosen": -0.3632899820804596, + "logps/rejected": -13.656795501708984, + "loss": 0.4853, + "nll_loss": 0.49331343173980713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0363290011882782, + "rewards/margins": 1.3293505907058716, + "rewards/rejected": -1.3656796216964722, + "step": 614 + }, + { + "epoch": 1.2023460410557185, + "grad_norm": 0.6651281118392944, + "learning_rate": 3.010437051532942e-05, + "log_odds_chosen": 14.946439743041992, + "log_odds_ratio": -0.03502390906214714, + "logits/chosen": -1.2261791229248047, + "logits/rejected": -1.191378116607666, + "logps/chosen": -0.5103687047958374, + "logps/rejected": -14.411428451538086, + "loss": 0.4748, + "nll_loss": 0.5766055583953857, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05103687196969986, + "rewards/margins": 1.390105962753296, + "rewards/rejected": -1.4411427974700928, + "step": 615 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.6944769024848938, + "learning_rate": 3.0071754729288974e-05, + "log_odds_chosen": 13.08912181854248, + "log_odds_ratio": -0.011264370754361153, + "logits/chosen": -1.1927950382232666, + "logits/rejected": -1.252568006515503, + "logps/chosen": -0.3867856562137604, + "logps/rejected": -12.184761047363281, + "loss": 0.4887, + "nll_loss": 0.528286337852478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03867856785655022, + "rewards/margins": 1.1797975301742554, + "rewards/rejected": -1.2184761762619019, + "step": 616 + }, + { + "epoch": 1.206256109481916, + "grad_norm": 0.6944798231124878, + "learning_rate": 3.0039138943248533e-05, + "log_odds_chosen": 9.453985214233398, + "log_odds_ratio": -0.031254664063453674, + "logits/chosen": -1.236262321472168, + "logits/rejected": -1.347424030303955, + "logps/chosen": -0.27717769145965576, + "logps/rejected": -8.275997161865234, + "loss": 0.4873, + "nll_loss": 0.36599820852279663, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027717769145965576, + "rewards/margins": 0.7998819351196289, + "rewards/rejected": -0.8275996446609497, + "step": 617 + }, + { + "epoch": 1.2082111436950147, + "grad_norm": 0.7089403867721558, + "learning_rate": 3.000652315720809e-05, + "log_odds_chosen": 9.645415306091309, + "log_odds_ratio": -0.048951778560876846, + "logits/chosen": -1.239474892616272, + "logits/rejected": -1.1851897239685059, + "logps/chosen": -0.4285425841808319, + "logps/rejected": -8.710281372070312, + "loss": 0.4777, + "nll_loss": 0.549185037612915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04285426437854767, + "rewards/margins": 0.8281738758087158, + "rewards/rejected": -0.8710281848907471, + "step": 618 + }, + { + "epoch": 1.2101661779081134, + "grad_norm": 0.6846951246261597, + "learning_rate": 2.997390737116765e-05, + "log_odds_chosen": 17.653854370117188, + "log_odds_ratio": -0.0026721772737801075, + "logits/chosen": -1.2430682182312012, + "logits/rejected": -1.3348220586776733, + "logps/chosen": -0.2767616808414459, + "logps/rejected": -16.36576271057129, + "loss": 0.4756, + "nll_loss": 0.35761502385139465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027676168829202652, + "rewards/margins": 1.6089003086090088, + "rewards/rejected": -1.6365762948989868, + "step": 619 + }, + { + "epoch": 1.2121212121212122, + "grad_norm": 0.7194717526435852, + "learning_rate": 2.9941291585127202e-05, + "log_odds_chosen": 6.551730632781982, + "log_odds_ratio": -0.06476765871047974, + "logits/chosen": -1.3311944007873535, + "logits/rejected": -1.2889297008514404, + "logps/chosen": -0.4268190264701843, + "logps/rejected": -5.913201332092285, + "loss": 0.4936, + "nll_loss": 0.5732928514480591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04268190264701843, + "rewards/margins": 0.5486382842063904, + "rewards/rejected": -0.5913201570510864, + "step": 620 + }, + { + "epoch": 1.2140762463343109, + "grad_norm": 0.6692578792572021, + "learning_rate": 2.990867579908676e-05, + "log_odds_chosen": 17.76357078552246, + "log_odds_ratio": -0.03090531937777996, + "logits/chosen": -1.2033178806304932, + "logits/rejected": -1.1330718994140625, + "logps/chosen": -0.4167267084121704, + "logps/rejected": -16.938222885131836, + "loss": 0.4947, + "nll_loss": 0.5938361883163452, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04167266935110092, + "rewards/margins": 1.6521499156951904, + "rewards/rejected": -1.6938225030899048, + "step": 621 + }, + { + "epoch": 1.2160312805474096, + "grad_norm": 0.6951360702514648, + "learning_rate": 2.9876060013046314e-05, + "log_odds_chosen": 11.669878959655762, + "log_odds_ratio": -0.0318027026951313, + "logits/chosen": -1.3981075286865234, + "logits/rejected": -1.1393872499465942, + "logps/chosen": -0.25424784421920776, + "logps/rejected": -10.239509582519531, + "loss": 0.4857, + "nll_loss": 0.4178755581378937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025424785912036896, + "rewards/margins": 0.9985262155532837, + "rewards/rejected": -1.0239510536193848, + "step": 622 + }, + { + "epoch": 1.2179863147605083, + "grad_norm": 0.6717251539230347, + "learning_rate": 2.9843444227005873e-05, + "log_odds_chosen": 12.462961196899414, + "log_odds_ratio": -0.02752752974629402, + "logits/chosen": -1.4185445308685303, + "logits/rejected": -1.2847777605056763, + "logps/chosen": -0.3059079051017761, + "logps/rejected": -11.300549507141113, + "loss": 0.4842, + "nll_loss": 0.42613232135772705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030590791255235672, + "rewards/margins": 1.0994641780853271, + "rewards/rejected": -1.1300549507141113, + "step": 623 + }, + { + "epoch": 1.219941348973607, + "grad_norm": 0.6920968890190125, + "learning_rate": 2.9810828440965426e-05, + "log_odds_chosen": 10.293636322021484, + "log_odds_ratio": -0.08401717245578766, + "logits/chosen": -1.3201717138290405, + "logits/rejected": -1.1733555793762207, + "logps/chosen": -0.4774496555328369, + "logps/rejected": -9.750580787658691, + "loss": 0.4835, + "nll_loss": 0.6712399125099182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04774496704339981, + "rewards/margins": 0.9273131489753723, + "rewards/rejected": -0.9750580787658691, + "step": 624 + }, + { + "epoch": 1.2218963831867058, + "grad_norm": 0.6943438053131104, + "learning_rate": 2.9778212654924986e-05, + "log_odds_chosen": 12.611335754394531, + "log_odds_ratio": -0.005705300252884626, + "logits/chosen": -1.335741639137268, + "logits/rejected": -1.3353112936019897, + "logps/chosen": -0.4190712869167328, + "logps/rejected": -11.751867294311523, + "loss": 0.4927, + "nll_loss": 0.635893702507019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04190713167190552, + "rewards/margins": 1.13327956199646, + "rewards/rejected": -1.1751866340637207, + "step": 625 + }, + { + "epoch": 1.2238514173998045, + "grad_norm": 0.6885552406311035, + "learning_rate": 2.9745596868884538e-05, + "log_odds_chosen": 14.069778442382812, + "log_odds_ratio": -0.04266669228672981, + "logits/chosen": -1.362778902053833, + "logits/rejected": -1.2936749458312988, + "logps/chosen": -0.4220017194747925, + "logps/rejected": -13.477133750915527, + "loss": 0.4634, + "nll_loss": 0.5133793354034424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04220017045736313, + "rewards/margins": 1.3055131435394287, + "rewards/rejected": -1.3477134704589844, + "step": 626 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.6755397319793701, + "learning_rate": 2.9712981082844098e-05, + "log_odds_chosen": 14.083576202392578, + "log_odds_ratio": -0.03461562469601631, + "logits/chosen": -1.481644868850708, + "logits/rejected": -1.3326897621154785, + "logps/chosen": -0.3833985924720764, + "logps/rejected": -13.224565505981445, + "loss": 0.4817, + "nll_loss": 0.4278715252876282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03833985701203346, + "rewards/margins": 1.2841168642044067, + "rewards/rejected": -1.3224565982818604, + "step": 627 + }, + { + "epoch": 1.227761485826002, + "grad_norm": 0.6885781288146973, + "learning_rate": 2.9680365296803654e-05, + "log_odds_chosen": 8.714710235595703, + "log_odds_ratio": -0.04532979428768158, + "logits/chosen": -1.2138689756393433, + "logits/rejected": -1.3395596742630005, + "logps/chosen": -0.25163131952285767, + "logps/rejected": -7.472148418426514, + "loss": 0.4867, + "nll_loss": 0.40133947134017944, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025163132697343826, + "rewards/margins": 0.722051739692688, + "rewards/rejected": -0.7472148537635803, + "step": 628 + }, + { + "epoch": 1.2297165200391007, + "grad_norm": 0.6761809587478638, + "learning_rate": 2.9647749510763213e-05, + "log_odds_chosen": 12.84821891784668, + "log_odds_ratio": -0.031668100506067276, + "logits/chosen": -1.2949095964431763, + "logits/rejected": -1.2184085845947266, + "logps/chosen": -0.3211953639984131, + "logps/rejected": -11.758410453796387, + "loss": 0.4712, + "nll_loss": 0.45129746198654175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03211953490972519, + "rewards/margins": 1.143721580505371, + "rewards/rejected": -1.1758410930633545, + "step": 629 + }, + { + "epoch": 1.2316715542521994, + "grad_norm": 0.6686386466026306, + "learning_rate": 2.9615133724722766e-05, + "log_odds_chosen": 8.991713523864746, + "log_odds_ratio": -0.03891807794570923, + "logits/chosen": -1.3104655742645264, + "logits/rejected": -1.3480138778686523, + "logps/chosen": -0.36090052127838135, + "logps/rejected": -7.989012718200684, + "loss": 0.4674, + "nll_loss": 0.4880182147026062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036090049892663956, + "rewards/margins": 0.7628113031387329, + "rewards/rejected": -0.7989013195037842, + "step": 630 + }, + { + "epoch": 1.2336265884652982, + "grad_norm": 0.691981315612793, + "learning_rate": 2.9582517938682325e-05, + "log_odds_chosen": 11.358559608459473, + "log_odds_ratio": -0.05037148296833038, + "logits/chosen": -1.450415849685669, + "logits/rejected": -1.339939832687378, + "logps/chosen": -0.2984842360019684, + "logps/rejected": -10.19227123260498, + "loss": 0.48, + "nll_loss": 0.49573832750320435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029848424717783928, + "rewards/margins": 0.9893786311149597, + "rewards/rejected": -1.019227147102356, + "step": 631 + }, + { + "epoch": 1.2355816226783969, + "grad_norm": 0.6988846659660339, + "learning_rate": 2.9549902152641878e-05, + "log_odds_chosen": 8.01124095916748, + "log_odds_ratio": -0.0940786674618721, + "logits/chosen": -1.3196498155593872, + "logits/rejected": -1.1292251348495483, + "logps/chosen": -0.26000139117240906, + "logps/rejected": -6.873319625854492, + "loss": 0.4759, + "nll_loss": 0.39491504430770874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026000138372182846, + "rewards/margins": 0.6613317728042603, + "rewards/rejected": -0.6873319149017334, + "step": 632 + }, + { + "epoch": 1.2375366568914956, + "grad_norm": 0.7175016403198242, + "learning_rate": 2.9517286366601438e-05, + "log_odds_chosen": 10.71984577178955, + "log_odds_ratio": -0.008622369728982449, + "logits/chosen": -1.4615199565887451, + "logits/rejected": -1.3165082931518555, + "logps/chosen": -0.264209121465683, + "logps/rejected": -9.438597679138184, + "loss": 0.4914, + "nll_loss": 0.42680463194847107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026420913636684418, + "rewards/margins": 0.9174389243125916, + "rewards/rejected": -0.9438598155975342, + "step": 633 + }, + { + "epoch": 1.2394916911045943, + "grad_norm": 0.6816957592964172, + "learning_rate": 2.948467058056099e-05, + "log_odds_chosen": 5.951194763183594, + "log_odds_ratio": -0.12382426112890244, + "logits/chosen": -1.2714701890945435, + "logits/rejected": -1.0995817184448242, + "logps/chosen": -0.29421064257621765, + "logps/rejected": -5.059694766998291, + "loss": 0.4758, + "nll_loss": 0.4204973578453064, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.029421065002679825, + "rewards/margins": 0.4765484631061554, + "rewards/rejected": -0.5059695243835449, + "step": 634 + }, + { + "epoch": 1.241446725317693, + "grad_norm": 0.7087035179138184, + "learning_rate": 2.945205479452055e-05, + "log_odds_chosen": 12.513680458068848, + "log_odds_ratio": -0.03483537957072258, + "logits/chosen": -1.1820404529571533, + "logits/rejected": -1.1919770240783691, + "logps/chosen": -0.40605250000953674, + "logps/rejected": -11.790498733520508, + "loss": 0.4726, + "nll_loss": 0.4733390212059021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04060525447130203, + "rewards/margins": 1.1384446620941162, + "rewards/rejected": -1.1790497303009033, + "step": 635 + }, + { + "epoch": 1.2434017595307918, + "grad_norm": 0.696841299533844, + "learning_rate": 2.9419439008480102e-05, + "log_odds_chosen": 11.135854721069336, + "log_odds_ratio": -0.06789536774158478, + "logits/chosen": -1.133817434310913, + "logits/rejected": -1.1056270599365234, + "logps/chosen": -0.40968525409698486, + "logps/rejected": -10.447938919067383, + "loss": 0.485, + "nll_loss": 0.6372969150543213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04096852242946625, + "rewards/margins": 1.0038254261016846, + "rewards/rejected": -1.044793963432312, + "step": 636 + }, + { + "epoch": 1.2453567937438905, + "grad_norm": 0.6969045996665955, + "learning_rate": 2.9386823222439662e-05, + "log_odds_chosen": 19.806442260742188, + "log_odds_ratio": -0.010339247062802315, + "logits/chosen": -1.197753667831421, + "logits/rejected": -1.1522630453109741, + "logps/chosen": -0.284285306930542, + "logps/rejected": -18.556640625, + "loss": 0.4774, + "nll_loss": 0.4124361574649811, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02842853218317032, + "rewards/margins": 1.8272355794906616, + "rewards/rejected": -1.8556640148162842, + "step": 637 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.6917294263839722, + "learning_rate": 2.9354207436399218e-05, + "log_odds_chosen": 12.11132526397705, + "log_odds_ratio": -0.04090193659067154, + "logits/chosen": -1.147544026374817, + "logits/rejected": -1.1810929775238037, + "logps/chosen": -0.3413219153881073, + "logps/rejected": -11.158011436462402, + "loss": 0.4617, + "nll_loss": 0.5432823300361633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03413219004869461, + "rewards/margins": 1.0816689729690552, + "rewards/rejected": -1.1158010959625244, + "step": 638 + }, + { + "epoch": 1.249266862170088, + "grad_norm": 0.674623429775238, + "learning_rate": 2.9321591650358778e-05, + "log_odds_chosen": 9.725897789001465, + "log_odds_ratio": -0.025320829823613167, + "logits/chosen": -1.3424623012542725, + "logits/rejected": -1.201694130897522, + "logps/chosen": -0.25895652174949646, + "logps/rejected": -8.414819717407227, + "loss": 0.4656, + "nll_loss": 0.35587871074676514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025895653292536736, + "rewards/margins": 0.8155863285064697, + "rewards/rejected": -0.8414820432662964, + "step": 639 + }, + { + "epoch": 1.2512218963831867, + "grad_norm": 0.7191535234451294, + "learning_rate": 2.928897586431833e-05, + "log_odds_chosen": 9.943578720092773, + "log_odds_ratio": -0.035860575735569, + "logits/chosen": -1.32865571975708, + "logits/rejected": -1.417319655418396, + "logps/chosen": -0.32248926162719727, + "logps/rejected": -8.830098152160645, + "loss": 0.4765, + "nll_loss": 0.42321494221687317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032248929142951965, + "rewards/margins": 0.8507610559463501, + "rewards/rejected": -0.8830099105834961, + "step": 640 + }, + { + "epoch": 1.2531769305962854, + "grad_norm": 0.7051622867584229, + "learning_rate": 2.925636007827789e-05, + "log_odds_chosen": 9.774035453796387, + "log_odds_ratio": -0.02657504752278328, + "logits/chosen": -1.2485740184783936, + "logits/rejected": -1.3046910762786865, + "logps/chosen": -0.25320953130722046, + "logps/rejected": -8.355206489562988, + "loss": 0.4739, + "nll_loss": 0.41132888197898865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025320954620838165, + "rewards/margins": 0.8101997375488281, + "rewards/rejected": -0.8355206847190857, + "step": 641 + }, + { + "epoch": 1.2551319648093842, + "grad_norm": 0.7108646631240845, + "learning_rate": 2.9223744292237442e-05, + "log_odds_chosen": 13.357117652893066, + "log_odds_ratio": -0.029244892299175262, + "logits/chosen": -1.1505727767944336, + "logits/rejected": -1.1312223672866821, + "logps/chosen": -0.2881747782230377, + "logps/rejected": -11.815256118774414, + "loss": 0.4632, + "nll_loss": 0.4658285975456238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028817474842071533, + "rewards/margins": 1.1527081727981567, + "rewards/rejected": -1.1815255880355835, + "step": 642 + }, + { + "epoch": 1.2570869990224829, + "grad_norm": 0.6850515007972717, + "learning_rate": 2.9191128506197002e-05, + "log_odds_chosen": 11.16917610168457, + "log_odds_ratio": -0.03880474716424942, + "logits/chosen": -1.3768970966339111, + "logits/rejected": -1.3146092891693115, + "logps/chosen": -0.4389021098613739, + "logps/rejected": -10.273286819458008, + "loss": 0.4692, + "nll_loss": 0.46013885736465454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04389021173119545, + "rewards/margins": 0.9834383726119995, + "rewards/rejected": -1.0273287296295166, + "step": 643 + }, + { + "epoch": 1.2590420332355816, + "grad_norm": 0.6936293244361877, + "learning_rate": 2.9158512720156555e-05, + "log_odds_chosen": 8.063065528869629, + "log_odds_ratio": -0.05729047954082489, + "logits/chosen": -1.17995285987854, + "logits/rejected": -1.4341195821762085, + "logps/chosen": -0.4050953984260559, + "logps/rejected": -7.266781806945801, + "loss": 0.4597, + "nll_loss": 0.47900867462158203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04050953686237335, + "rewards/margins": 0.6861686110496521, + "rewards/rejected": -0.726678192615509, + "step": 644 + }, + { + "epoch": 1.2609970674486803, + "grad_norm": 0.6953266263008118, + "learning_rate": 2.9125896934116114e-05, + "log_odds_chosen": 14.536052703857422, + "log_odds_ratio": -0.0425211638212204, + "logits/chosen": -1.4794453382492065, + "logits/rejected": -1.4088058471679688, + "logps/chosen": -0.29542356729507446, + "logps/rejected": -13.310650825500488, + "loss": 0.4694, + "nll_loss": 0.39285945892333984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029542358592152596, + "rewards/margins": 1.3015227317810059, + "rewards/rejected": -1.331065058708191, + "step": 645 + }, + { + "epoch": 1.262952101661779, + "grad_norm": 0.7129997611045837, + "learning_rate": 2.9093281148075667e-05, + "log_odds_chosen": 15.032266616821289, + "log_odds_ratio": -0.008173049427568913, + "logits/chosen": -1.4382915496826172, + "logits/rejected": -1.247588872909546, + "logps/chosen": -0.27556508779525757, + "logps/rejected": -13.661211013793945, + "loss": 0.47, + "nll_loss": 0.354035347700119, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027556512504816055, + "rewards/margins": 1.3385646343231201, + "rewards/rejected": -1.3661212921142578, + "step": 646 + }, + { + "epoch": 1.2649071358748778, + "grad_norm": 0.681011438369751, + "learning_rate": 2.9060665362035226e-05, + "log_odds_chosen": 16.281776428222656, + "log_odds_ratio": -0.004902908578515053, + "logits/chosen": -1.3517885208129883, + "logits/rejected": -1.2592746019363403, + "logps/chosen": -0.3735232949256897, + "logps/rejected": -15.233057022094727, + "loss": 0.4598, + "nll_loss": 0.5573760271072388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03735232725739479, + "rewards/margins": 1.485953450202942, + "rewards/rejected": -1.5233056545257568, + "step": 647 + }, + { + "epoch": 1.2668621700879765, + "grad_norm": 0.6982327699661255, + "learning_rate": 2.9028049575994782e-05, + "log_odds_chosen": 13.85255241394043, + "log_odds_ratio": -0.0044286069460213184, + "logits/chosen": -1.2552355527877808, + "logits/rejected": -1.2449619770050049, + "logps/chosen": -0.2921157479286194, + "logps/rejected": -12.522171020507812, + "loss": 0.4624, + "nll_loss": 0.40862539410591125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029211577028036118, + "rewards/margins": 1.2230055332183838, + "rewards/rejected": -1.252217173576355, + "step": 648 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.663921594619751, + "learning_rate": 2.8995433789954342e-05, + "log_odds_chosen": 13.745746612548828, + "log_odds_ratio": -0.01603000983595848, + "logits/chosen": -1.4906558990478516, + "logits/rejected": -1.3257198333740234, + "logps/chosen": -0.2238606959581375, + "logps/rejected": -12.276678085327148, + "loss": 0.4457, + "nll_loss": 0.35765892267227173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02238607220351696, + "rewards/margins": 1.2052818536758423, + "rewards/rejected": -1.2276678085327148, + "step": 649 + }, + { + "epoch": 1.270772238514174, + "grad_norm": 0.6663202047348022, + "learning_rate": 2.8962818003913894e-05, + "log_odds_chosen": 10.896696090698242, + "log_odds_ratio": -0.046670135110616684, + "logits/chosen": -1.2423620223999023, + "logits/rejected": -1.0544989109039307, + "logps/chosen": -0.3554762005805969, + "logps/rejected": -9.846628189086914, + "loss": 0.4526, + "nll_loss": 0.5162559151649475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03554762154817581, + "rewards/margins": 0.9491152167320251, + "rewards/rejected": -0.9846627712249756, + "step": 650 + }, + { + "epoch": 1.2727272727272727, + "grad_norm": 0.7168102860450745, + "learning_rate": 2.8930202217873454e-05, + "log_odds_chosen": 11.216144561767578, + "log_odds_ratio": -0.046391814947128296, + "logits/chosen": -1.3638190031051636, + "logits/rejected": -1.2868831157684326, + "logps/chosen": -0.28246593475341797, + "logps/rejected": -9.781414031982422, + "loss": 0.4634, + "nll_loss": 0.37078189849853516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028246592730283737, + "rewards/margins": 0.9498947858810425, + "rewards/rejected": -0.9781414270401001, + "step": 651 + }, + { + "epoch": 1.2746823069403714, + "grad_norm": 0.6971044540405273, + "learning_rate": 2.8897586431833007e-05, + "log_odds_chosen": 12.095852851867676, + "log_odds_ratio": -0.04096267372369766, + "logits/chosen": -1.3573212623596191, + "logits/rejected": -1.5912344455718994, + "logps/chosen": -0.4925471544265747, + "logps/rejected": -11.598838806152344, + "loss": 0.4625, + "nll_loss": 0.550658106803894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04925471544265747, + "rewards/margins": 1.1106290817260742, + "rewards/rejected": -1.1598838567733765, + "step": 652 + }, + { + "epoch": 1.2766373411534702, + "grad_norm": 0.6941003799438477, + "learning_rate": 2.8864970645792566e-05, + "log_odds_chosen": 9.300117492675781, + "log_odds_ratio": -0.0434623658657074, + "logits/chosen": -1.4293335676193237, + "logits/rejected": -1.3046329021453857, + "logps/chosen": -0.26449137926101685, + "logps/rejected": -8.136804580688477, + "loss": 0.4717, + "nll_loss": 0.39164865016937256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026449136435985565, + "rewards/margins": 0.7872313857078552, + "rewards/rejected": -0.8136805295944214, + "step": 653 + }, + { + "epoch": 1.278592375366569, + "grad_norm": 0.6656367182731628, + "learning_rate": 2.883235485975212e-05, + "log_odds_chosen": 16.3642578125, + "log_odds_ratio": -0.031436532735824585, + "logits/chosen": -1.4555974006652832, + "logits/rejected": -1.4293665885925293, + "logps/chosen": -0.3501516580581665, + "logps/rejected": -15.344696044921875, + "loss": 0.46, + "nll_loss": 0.43604904413223267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03501516580581665, + "rewards/margins": 1.4994544982910156, + "rewards/rejected": -1.5344696044921875, + "step": 654 + }, + { + "epoch": 1.2805474095796676, + "grad_norm": 0.6554001569747925, + "learning_rate": 2.8799739073711678e-05, + "log_odds_chosen": 14.107046127319336, + "log_odds_ratio": -0.04490998387336731, + "logits/chosen": -1.3327441215515137, + "logits/rejected": -1.2524676322937012, + "logps/chosen": -0.25259852409362793, + "logps/rejected": -12.856697082519531, + "loss": 0.4504, + "nll_loss": 0.45484989881515503, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025259852409362793, + "rewards/margins": 1.2604098320007324, + "rewards/rejected": -1.2856696844100952, + "step": 655 + }, + { + "epoch": 1.2825024437927663, + "grad_norm": 0.6803367137908936, + "learning_rate": 2.8767123287671234e-05, + "log_odds_chosen": 11.162376403808594, + "log_odds_ratio": -0.09028522670269012, + "logits/chosen": -1.3709113597869873, + "logits/rejected": -1.2762020826339722, + "logps/chosen": -0.4070039689540863, + "logps/rejected": -10.310009002685547, + "loss": 0.4579, + "nll_loss": 0.4829605519771576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04070039838552475, + "rewards/margins": 0.9903004169464111, + "rewards/rejected": -1.0310008525848389, + "step": 656 + }, + { + "epoch": 1.284457478005865, + "grad_norm": 0.6612158417701721, + "learning_rate": 2.873450750163079e-05, + "log_odds_chosen": 8.099418640136719, + "log_odds_ratio": -0.06240250915288925, + "logits/chosen": -1.4552009105682373, + "logits/rejected": -1.416378140449524, + "logps/chosen": -0.2275254726409912, + "logps/rejected": -6.747317314147949, + "loss": 0.4497, + "nll_loss": 0.27954840660095215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022752545773983, + "rewards/margins": 0.6519792079925537, + "rewards/rejected": -0.6747317314147949, + "step": 657 + }, + { + "epoch": 1.2864125122189638, + "grad_norm": 0.7059373259544373, + "learning_rate": 2.8701891715590347e-05, + "log_odds_chosen": 22.107519149780273, + "log_odds_ratio": -0.00015205472300294787, + "logits/chosen": -1.2416419982910156, + "logits/rejected": -0.985344409942627, + "logps/chosen": -0.2244822084903717, + "logps/rejected": -20.567794799804688, + "loss": 0.4575, + "nll_loss": 0.41667285561561584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02244822308421135, + "rewards/margins": 2.0343313217163086, + "rewards/rejected": -2.056779623031616, + "step": 658 + }, + { + "epoch": 1.2883675464320625, + "grad_norm": 0.6965703964233398, + "learning_rate": 2.8669275929549906e-05, + "log_odds_chosen": 15.650067329406738, + "log_odds_ratio": -0.017963936552405357, + "logits/chosen": -1.4215092658996582, + "logits/rejected": -1.0932588577270508, + "logps/chosen": -0.23767483234405518, + "logps/rejected": -14.212404251098633, + "loss": 0.4636, + "nll_loss": 0.42050373554229736, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023767484351992607, + "rewards/margins": 1.397472858428955, + "rewards/rejected": -1.4212404489517212, + "step": 659 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.6889711022377014, + "learning_rate": 2.863666014350946e-05, + "log_odds_chosen": 10.576282501220703, + "log_odds_ratio": -0.02785879746079445, + "logits/chosen": -1.3638919591903687, + "logits/rejected": -1.336370587348938, + "logps/chosen": -0.3548089265823364, + "logps/rejected": -9.632658958435059, + "loss": 0.4517, + "nll_loss": 0.48341110348701477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03548089414834976, + "rewards/margins": 0.9277849197387695, + "rewards/rejected": -0.9632658362388611, + "step": 660 + }, + { + "epoch": 1.29227761485826, + "grad_norm": 0.6701059937477112, + "learning_rate": 2.8604044357469018e-05, + "log_odds_chosen": 9.051522254943848, + "log_odds_ratio": -0.0739707499742508, + "logits/chosen": -1.4490935802459717, + "logits/rejected": -1.3543627262115479, + "logps/chosen": -0.3694194555282593, + "logps/rejected": -7.973056793212891, + "loss": 0.4493, + "nll_loss": 0.43298017978668213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03694194555282593, + "rewards/margins": 0.7603638172149658, + "rewards/rejected": -0.797305703163147, + "step": 661 + }, + { + "epoch": 1.2942326490713587, + "grad_norm": 0.6888431310653687, + "learning_rate": 2.857142857142857e-05, + "log_odds_chosen": 7.311617374420166, + "log_odds_ratio": -0.04778500646352768, + "logits/chosen": -1.433006763458252, + "logits/rejected": -1.4050796031951904, + "logps/chosen": -0.28415337204933167, + "logps/rejected": -6.239870071411133, + "loss": 0.4634, + "nll_loss": 0.5137395858764648, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028415337204933167, + "rewards/margins": 0.5955716371536255, + "rewards/rejected": -0.6239869594573975, + "step": 662 + }, + { + "epoch": 1.2961876832844574, + "grad_norm": 0.6752138733863831, + "learning_rate": 2.853881278538813e-05, + "log_odds_chosen": 15.962257385253906, + "log_odds_ratio": -0.034671589732170105, + "logits/chosen": -1.1380228996276855, + "logits/rejected": -1.3663415908813477, + "logps/chosen": -0.38590505719184875, + "logps/rejected": -15.061128616333008, + "loss": 0.4617, + "nll_loss": 0.5973658561706543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.038590505719184875, + "rewards/margins": 1.467522382736206, + "rewards/rejected": -1.506112813949585, + "step": 663 + }, + { + "epoch": 1.2981427174975562, + "grad_norm": 0.6632657647132874, + "learning_rate": 2.8506196999347683e-05, + "log_odds_chosen": 13.94062328338623, + "log_odds_ratio": -0.02562783658504486, + "logits/chosen": -1.3676890134811401, + "logits/rejected": -1.3013091087341309, + "logps/chosen": -0.36565279960632324, + "logps/rejected": -12.93145751953125, + "loss": 0.4519, + "nll_loss": 0.47567880153656006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.036565277725458145, + "rewards/margins": 1.2565805912017822, + "rewards/rejected": -1.293145775794983, + "step": 664 + }, + { + "epoch": 1.300097751710655, + "grad_norm": 0.6943866610527039, + "learning_rate": 2.8473581213307243e-05, + "log_odds_chosen": 10.733720779418945, + "log_odds_ratio": -0.040177732706069946, + "logits/chosen": -1.4092988967895508, + "logits/rejected": -1.1964191198349, + "logps/chosen": -0.3087679147720337, + "logps/rejected": -9.483551025390625, + "loss": 0.4677, + "nll_loss": 0.5438419580459595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03087678924202919, + "rewards/margins": 0.9174782037734985, + "rewards/rejected": -0.9483550786972046, + "step": 665 + }, + { + "epoch": 1.3020527859237536, + "grad_norm": 0.6566576361656189, + "learning_rate": 2.84409654272668e-05, + "log_odds_chosen": 15.971263885498047, + "log_odds_ratio": -0.004497944843024015, + "logits/chosen": -1.3063322305679321, + "logits/rejected": -1.2055439949035645, + "logps/chosen": -0.27331972122192383, + "logps/rejected": -14.608938217163086, + "loss": 0.4411, + "nll_loss": 0.4292348027229309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027331974357366562, + "rewards/margins": 1.4335618019104004, + "rewards/rejected": -1.4608938694000244, + "step": 666 + }, + { + "epoch": 1.3040078201368523, + "grad_norm": 0.6909797787666321, + "learning_rate": 2.8408349641226355e-05, + "log_odds_chosen": 17.993717193603516, + "log_odds_ratio": -0.00275968830101192, + "logits/chosen": -1.3942333459854126, + "logits/rejected": -1.3599858283996582, + "logps/chosen": -0.39797425270080566, + "logps/rejected": -16.946975708007812, + "loss": 0.4457, + "nll_loss": 0.6086827516555786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.039797428995370865, + "rewards/margins": 1.654900074005127, + "rewards/rejected": -1.694697618484497, + "step": 667 + }, + { + "epoch": 1.305962854349951, + "grad_norm": 0.6776682734489441, + "learning_rate": 2.837573385518591e-05, + "log_odds_chosen": 13.85862922668457, + "log_odds_ratio": -0.04227109253406525, + "logits/chosen": -1.3009560108184814, + "logits/rejected": -1.3240375518798828, + "logps/chosen": -0.4622621536254883, + "logps/rejected": -13.199660301208496, + "loss": 0.4465, + "nll_loss": 0.5514085292816162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04622621834278107, + "rewards/margins": 1.2737398147583008, + "rewards/rejected": -1.3199659585952759, + "step": 668 + }, + { + "epoch": 1.3079178885630498, + "grad_norm": 0.6837126016616821, + "learning_rate": 2.834311806914547e-05, + "log_odds_chosen": 19.20944595336914, + "log_odds_ratio": -1.0657777238520794e-05, + "logits/chosen": -1.4685053825378418, + "logits/rejected": -1.2955538034439087, + "logps/chosen": -0.3692167401313782, + "logps/rejected": -18.229869842529297, + "loss": 0.4514, + "nll_loss": 0.4700937867164612, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0369216725230217, + "rewards/margins": 1.7860653400421143, + "rewards/rejected": -1.8229870796203613, + "step": 669 + }, + { + "epoch": 1.3098729227761485, + "grad_norm": 0.6768202781677246, + "learning_rate": 2.8310502283105023e-05, + "log_odds_chosen": 13.478382110595703, + "log_odds_ratio": -0.03241470456123352, + "logits/chosen": -1.3503410816192627, + "logits/rejected": -1.4251132011413574, + "logps/chosen": -0.3030715584754944, + "logps/rejected": -12.428572654724121, + "loss": 0.4435, + "nll_loss": 0.47822919487953186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030307158827781677, + "rewards/margins": 1.2125500440597534, + "rewards/rejected": -1.2428572177886963, + "step": 670 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.6966361403465271, + "learning_rate": 2.8277886497064582e-05, + "log_odds_chosen": 10.531070709228516, + "log_odds_ratio": -0.042165011167526245, + "logits/chosen": -1.31465744972229, + "logits/rejected": -1.3605504035949707, + "logps/chosen": -0.32917165756225586, + "logps/rejected": -9.294025421142578, + "loss": 0.4587, + "nll_loss": 0.44068169593811035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032917167991399765, + "rewards/margins": 0.8964853882789612, + "rewards/rejected": -0.9294024705886841, + "step": 671 + }, + { + "epoch": 1.313782991202346, + "grad_norm": 0.6595525741577148, + "learning_rate": 2.8245270711024135e-05, + "log_odds_chosen": 11.48315715789795, + "log_odds_ratio": -0.0790652260184288, + "logits/chosen": -1.3123440742492676, + "logits/rejected": -1.3211885690689087, + "logps/chosen": -0.419420063495636, + "logps/rejected": -10.685153007507324, + "loss": 0.4284, + "nll_loss": 0.4986911416053772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04194200783967972, + "rewards/margins": 1.0265734195709229, + "rewards/rejected": -1.068515419960022, + "step": 672 + }, + { + "epoch": 1.3157380254154447, + "grad_norm": 0.6623773574829102, + "learning_rate": 2.8212654924983695e-05, + "log_odds_chosen": 9.25027084350586, + "log_odds_ratio": -0.0292662363499403, + "logits/chosen": -1.3858143091201782, + "logits/rejected": -1.210413932800293, + "logps/chosen": -0.34465670585632324, + "logps/rejected": -8.221890449523926, + "loss": 0.4337, + "nll_loss": 0.45546776056289673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.034465670585632324, + "rewards/margins": 0.7877233028411865, + "rewards/rejected": -0.8221890926361084, + "step": 673 + }, + { + "epoch": 1.3176930596285434, + "grad_norm": 0.6790416836738586, + "learning_rate": 2.8180039138943247e-05, + "log_odds_chosen": 18.08144760131836, + "log_odds_ratio": -0.015057490207254887, + "logits/chosen": -1.314862608909607, + "logits/rejected": -1.3059788942337036, + "logps/chosen": -0.3798813819885254, + "logps/rejected": -17.0919189453125, + "loss": 0.4425, + "nll_loss": 0.604043185710907, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03798814117908478, + "rewards/margins": 1.67120361328125, + "rewards/rejected": -1.7091917991638184, + "step": 674 + }, + { + "epoch": 1.3196480938416422, + "grad_norm": 0.656650960445404, + "learning_rate": 2.8147423352902807e-05, + "log_odds_chosen": 13.67027759552002, + "log_odds_ratio": -0.08489196002483368, + "logits/chosen": -1.5986757278442383, + "logits/rejected": -1.3745474815368652, + "logps/chosen": -0.37375980615615845, + "logps/rejected": -12.766581535339355, + "loss": 0.4343, + "nll_loss": 0.4377150237560272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03737598657608032, + "rewards/margins": 1.2392821311950684, + "rewards/rejected": -1.276658296585083, + "step": 675 + }, + { + "epoch": 1.321603128054741, + "grad_norm": 0.7023094892501831, + "learning_rate": 2.8114807566862363e-05, + "log_odds_chosen": 14.260215759277344, + "log_odds_ratio": -0.009923448786139488, + "logits/chosen": -1.5990420579910278, + "logits/rejected": -1.2384703159332275, + "logps/chosen": -0.34574630856513977, + "logps/rejected": -13.052417755126953, + "loss": 0.455, + "nll_loss": 0.4603259563446045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03457462787628174, + "rewards/margins": 1.2706670761108398, + "rewards/rejected": -1.3052417039871216, + "step": 676 + }, + { + "epoch": 1.3235581622678396, + "grad_norm": 0.681706428527832, + "learning_rate": 2.808219178082192e-05, + "log_odds_chosen": 10.811444282531738, + "log_odds_ratio": -0.03407329320907593, + "logits/chosen": -1.3990345001220703, + "logits/rejected": -1.3545174598693848, + "logps/chosen": -0.31973665952682495, + "logps/rejected": -9.575150489807129, + "loss": 0.4386, + "nll_loss": 0.4856855571269989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031973667442798615, + "rewards/margins": 0.925541341304779, + "rewards/rejected": -0.9575150012969971, + "step": 677 + }, + { + "epoch": 1.3255131964809383, + "grad_norm": 0.6745299696922302, + "learning_rate": 2.8049575994781475e-05, + "log_odds_chosen": 10.264098167419434, + "log_odds_ratio": -0.033337876200675964, + "logits/chosen": -1.4621281623840332, + "logits/rejected": -1.2632859945297241, + "logps/chosen": -0.21555006504058838, + "logps/rejected": -8.806193351745605, + "loss": 0.4345, + "nll_loss": 0.3759111762046814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021555010229349136, + "rewards/margins": 0.8590643405914307, + "rewards/rejected": -0.8806194067001343, + "step": 678 + }, + { + "epoch": 1.327468230694037, + "grad_norm": 0.663748562335968, + "learning_rate": 2.8016960208741034e-05, + "log_odds_chosen": 12.279447555541992, + "log_odds_ratio": -0.023058228194713593, + "logits/chosen": -1.394614338874817, + "logits/rejected": -1.3734469413757324, + "logps/chosen": -0.2254663109779358, + "logps/rejected": -10.687788009643555, + "loss": 0.4268, + "nll_loss": 0.45340150594711304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02254663035273552, + "rewards/margins": 1.0462322235107422, + "rewards/rejected": -1.0687787532806396, + "step": 679 + }, + { + "epoch": 1.3294232649071358, + "grad_norm": 0.6617835164070129, + "learning_rate": 2.7984344422700587e-05, + "log_odds_chosen": 12.919355392456055, + "log_odds_ratio": -0.028566716238856316, + "logits/chosen": -1.3626270294189453, + "logits/rejected": -1.4054687023162842, + "logps/chosen": -0.30542266368865967, + "logps/rejected": -11.74847412109375, + "loss": 0.4499, + "nll_loss": 0.49918076395988464, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030542265623807907, + "rewards/margins": 1.1443052291870117, + "rewards/rejected": -1.1748474836349487, + "step": 680 + }, + { + "epoch": 1.3313782991202345, + "grad_norm": 0.6519189476966858, + "learning_rate": 2.7951728636660147e-05, + "log_odds_chosen": 9.578415870666504, + "log_odds_ratio": -0.07163581997156143, + "logits/chosen": -1.5932186841964722, + "logits/rejected": -1.5274804830551147, + "logps/chosen": -0.3257193863391876, + "logps/rejected": -8.380537033081055, + "loss": 0.4247, + "nll_loss": 0.4355069398880005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0325719378888607, + "rewards/margins": 0.8054817318916321, + "rewards/rejected": -0.8380537033081055, + "step": 681 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.6870196461677551, + "learning_rate": 2.79191128506197e-05, + "log_odds_chosen": 18.986557006835938, + "log_odds_ratio": -0.018086863681674004, + "logits/chosen": -1.4446392059326172, + "logits/rejected": -1.387089490890503, + "logps/chosen": -0.3695237636566162, + "logps/rejected": -17.94007110595703, + "loss": 0.4342, + "nll_loss": 0.4669676423072815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03695237636566162, + "rewards/margins": 1.7570546865463257, + "rewards/rejected": -1.7940073013305664, + "step": 682 + }, + { + "epoch": 1.335288367546432, + "grad_norm": 0.6744781732559204, + "learning_rate": 2.788649706457926e-05, + "log_odds_chosen": 16.512056350708008, + "log_odds_ratio": -0.0064162639901041985, + "logits/chosen": -1.3426151275634766, + "logits/rejected": -1.3282243013381958, + "logps/chosen": -0.19082871079444885, + "logps/rejected": -14.923271179199219, + "loss": 0.4395, + "nll_loss": 0.3552820682525635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019082874059677124, + "rewards/margins": 1.473244071006775, + "rewards/rejected": -1.4923269748687744, + "step": 683 + }, + { + "epoch": 1.3372434017595307, + "grad_norm": 0.694257915019989, + "learning_rate": 2.785388127853881e-05, + "log_odds_chosen": 7.068902969360352, + "log_odds_ratio": -0.07516761124134064, + "logits/chosen": -1.357407569885254, + "logits/rejected": -1.2171175479888916, + "logps/chosen": -0.2863836884498596, + "logps/rejected": -5.83249568939209, + "loss": 0.4471, + "nll_loss": 0.46345099806785583, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028638366609811783, + "rewards/margins": 0.5546112060546875, + "rewards/rejected": -0.583249568939209, + "step": 684 + }, + { + "epoch": 1.3391984359726294, + "grad_norm": 0.6702181696891785, + "learning_rate": 2.782126549249837e-05, + "log_odds_chosen": 10.66441535949707, + "log_odds_ratio": -0.0389629602432251, + "logits/chosen": -1.5486652851104736, + "logits/rejected": -1.3034348487854004, + "logps/chosen": -0.24610793590545654, + "logps/rejected": -9.267446517944336, + "loss": 0.4361, + "nll_loss": 0.35901790857315063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024610795080661774, + "rewards/margins": 0.9021338820457458, + "rewards/rejected": -0.9267446994781494, + "step": 685 + }, + { + "epoch": 1.3411534701857282, + "grad_norm": 0.6873118877410889, + "learning_rate": 2.7788649706457927e-05, + "log_odds_chosen": 5.294398307800293, + "log_odds_ratio": -0.06894001364707947, + "logits/chosen": -1.4093248844146729, + "logits/rejected": -1.2989306449890137, + "logps/chosen": -0.33735015988349915, + "logps/rejected": -4.402853965759277, + "loss": 0.4545, + "nll_loss": 0.4277664124965668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033735014498233795, + "rewards/margins": 0.4065503478050232, + "rewards/rejected": -0.4402853846549988, + "step": 686 + }, + { + "epoch": 1.343108504398827, + "grad_norm": 0.6663971543312073, + "learning_rate": 2.7756033920417483e-05, + "log_odds_chosen": 12.65275764465332, + "log_odds_ratio": -0.07603709399700165, + "logits/chosen": -1.256025791168213, + "logits/rejected": -1.0244063138961792, + "logps/chosen": -0.26410678029060364, + "logps/rejected": -11.480548858642578, + "loss": 0.4283, + "nll_loss": 0.3741031289100647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026410680264234543, + "rewards/margins": 1.1216442584991455, + "rewards/rejected": -1.1480549573898315, + "step": 687 + }, + { + "epoch": 1.3450635386119258, + "grad_norm": 0.6527528166770935, + "learning_rate": 2.772341813437704e-05, + "log_odds_chosen": 11.395517349243164, + "log_odds_ratio": -0.011718858033418655, + "logits/chosen": -1.3974684476852417, + "logits/rejected": -1.2133808135986328, + "logps/chosen": -0.21240857243537903, + "logps/rejected": -9.795578956604004, + "loss": 0.4226, + "nll_loss": 0.3603901267051697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02124086022377014, + "rewards/margins": 0.9583169221878052, + "rewards/rejected": -0.9795578718185425, + "step": 688 + }, + { + "epoch": 1.3470185728250246, + "grad_norm": 0.6886448264122009, + "learning_rate": 2.76908023483366e-05, + "log_odds_chosen": 14.29765796661377, + "log_odds_ratio": -0.009905068203806877, + "logits/chosen": -1.4202063083648682, + "logits/rejected": -1.272413730621338, + "logps/chosen": -0.19441872835159302, + "logps/rejected": -12.539636611938477, + "loss": 0.437, + "nll_loss": 0.29549217224121094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019441872835159302, + "rewards/margins": 1.2345216274261475, + "rewards/rejected": -1.253963589668274, + "step": 689 + }, + { + "epoch": 1.3489736070381233, + "grad_norm": 0.6611674427986145, + "learning_rate": 2.765818656229615e-05, + "log_odds_chosen": 16.080368041992188, + "log_odds_ratio": -0.000671834743116051, + "logits/chosen": -1.4878534078598022, + "logits/rejected": -1.3022570610046387, + "logps/chosen": -0.22564846277236938, + "logps/rejected": -14.498679161071777, + "loss": 0.4204, + "nll_loss": 0.35549861192703247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022564847022294998, + "rewards/margins": 1.4273029565811157, + "rewards/rejected": -1.4498677253723145, + "step": 690 + }, + { + "epoch": 1.350928641251222, + "grad_norm": 0.7026146054267883, + "learning_rate": 2.762557077625571e-05, + "log_odds_chosen": 7.488453388214111, + "log_odds_ratio": -0.05505525320768356, + "logits/chosen": -1.4582844972610474, + "logits/rejected": -1.3286621570587158, + "logps/chosen": -0.28034618496894836, + "logps/rejected": -6.495767116546631, + "loss": 0.4267, + "nll_loss": 0.37360233068466187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028034619987010956, + "rewards/margins": 0.6215420961380005, + "rewards/rejected": -0.649576723575592, + "step": 691 + }, + { + "epoch": 1.3528836754643208, + "grad_norm": 0.6747104525566101, + "learning_rate": 2.7592954990215264e-05, + "log_odds_chosen": 12.505614280700684, + "log_odds_ratio": -0.025640316307544708, + "logits/chosen": -1.5109752416610718, + "logits/rejected": -1.4672577381134033, + "logps/chosen": -0.17423053085803986, + "logps/rejected": -10.829279899597168, + "loss": 0.4308, + "nll_loss": 0.31921133399009705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017423052340745926, + "rewards/margins": 1.0655049085617065, + "rewards/rejected": -1.0829278230667114, + "step": 692 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.6674101948738098, + "learning_rate": 2.7560339204174823e-05, + "log_odds_chosen": 5.096267223358154, + "log_odds_ratio": -0.059145666658878326, + "logits/chosen": -1.4894006252288818, + "logits/rejected": -1.3692646026611328, + "logps/chosen": -0.48714980483055115, + "logps/rejected": -4.327075481414795, + "loss": 0.4222, + "nll_loss": 0.5842530131340027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.048714980483055115, + "rewards/margins": 0.3839925229549408, + "rewards/rejected": -0.4327075481414795, + "step": 693 + }, + { + "epoch": 1.3567937438905182, + "grad_norm": 0.6585062146186829, + "learning_rate": 2.7527723418134376e-05, + "log_odds_chosen": 15.407514572143555, + "log_odds_ratio": -0.006759217008948326, + "logits/chosen": -1.5894148349761963, + "logits/rejected": -1.4650204181671143, + "logps/chosen": -0.2422313690185547, + "logps/rejected": -14.08316421508789, + "loss": 0.4231, + "nll_loss": 0.3461146354675293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02422313578426838, + "rewards/margins": 1.3840932846069336, + "rewards/rejected": -1.4083163738250732, + "step": 694 + }, + { + "epoch": 1.358748778103617, + "grad_norm": 0.6618332862854004, + "learning_rate": 2.7495107632093935e-05, + "log_odds_chosen": 11.969976425170898, + "log_odds_ratio": -0.040374141186475754, + "logits/chosen": -1.3846194744110107, + "logits/rejected": -1.2889351844787598, + "logps/chosen": -0.3123231530189514, + "logps/rejected": -10.795225143432617, + "loss": 0.4198, + "nll_loss": 0.39309558272361755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0312323160469532, + "rewards/margins": 1.0482902526855469, + "rewards/rejected": -1.0795226097106934, + "step": 695 + }, + { + "epoch": 1.3607038123167157, + "grad_norm": 0.6682750582695007, + "learning_rate": 2.746249184605349e-05, + "log_odds_chosen": 6.993173122406006, + "log_odds_ratio": -0.09535076469182968, + "logits/chosen": -1.2524001598358154, + "logits/rejected": -1.3649201393127441, + "logps/chosen": -0.39942243695259094, + "logps/rejected": -6.218092918395996, + "loss": 0.4292, + "nll_loss": 0.4255730211734772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03994224593043327, + "rewards/margins": 0.5818670392036438, + "rewards/rejected": -0.6218092441558838, + "step": 696 + }, + { + "epoch": 1.3626588465298144, + "grad_norm": 0.6518443822860718, + "learning_rate": 2.7429876060013047e-05, + "log_odds_chosen": 15.511184692382812, + "log_odds_ratio": -0.01496611163020134, + "logits/chosen": -1.4751474857330322, + "logits/rejected": -1.2971646785736084, + "logps/chosen": -0.4015483260154724, + "logps/rejected": -14.744335174560547, + "loss": 0.4113, + "nll_loss": 0.5270360112190247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0401548370718956, + "rewards/margins": 1.4342787265777588, + "rewards/rejected": -1.4744335412979126, + "step": 697 + }, + { + "epoch": 1.3646138807429131, + "grad_norm": 0.6774429678916931, + "learning_rate": 2.7397260273972603e-05, + "log_odds_chosen": 10.681903839111328, + "log_odds_ratio": -0.044580813497304916, + "logits/chosen": -1.2360990047454834, + "logits/rejected": -1.3883620500564575, + "logps/chosen": -0.2672516107559204, + "logps/rejected": -9.471036911010742, + "loss": 0.4294, + "nll_loss": 0.4051187336444855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026725158095359802, + "rewards/margins": 0.920378565788269, + "rewards/rejected": -0.94710373878479, + "step": 698 + }, + { + "epoch": 1.3665689149560118, + "grad_norm": 0.6580828428268433, + "learning_rate": 2.7364644487932163e-05, + "log_odds_chosen": 12.718484878540039, + "log_odds_ratio": -0.01244400069117546, + "logits/chosen": -1.4545363187789917, + "logits/rejected": -1.408891201019287, + "logps/chosen": -0.2791890501976013, + "logps/rejected": -11.381448745727539, + "loss": 0.4163, + "nll_loss": 0.447986364364624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027918905019760132, + "rewards/margins": 1.110226035118103, + "rewards/rejected": -1.1381449699401855, + "step": 699 + }, + { + "epoch": 1.3685239491691106, + "grad_norm": 0.6709342002868652, + "learning_rate": 2.7332028701891716e-05, + "log_odds_chosen": 15.697826385498047, + "log_odds_ratio": -0.005489474628120661, + "logits/chosen": -1.4017925262451172, + "logits/rejected": -1.4381277561187744, + "logps/chosen": -0.23887094855308533, + "logps/rejected": -13.95638656616211, + "loss": 0.4077, + "nll_loss": 0.45361781120300293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023887092247605324, + "rewards/margins": 1.3717515468597412, + "rewards/rejected": -1.3956387042999268, + "step": 700 + }, + { + "epoch": 1.3704789833822093, + "grad_norm": 0.6854177713394165, + "learning_rate": 2.7299412915851275e-05, + "log_odds_chosen": 12.843753814697266, + "log_odds_ratio": -0.0075004370883107185, + "logits/chosen": -1.5823485851287842, + "logits/rejected": -1.4007279872894287, + "logps/chosen": -0.27097493410110474, + "logps/rejected": -11.540414810180664, + "loss": 0.4325, + "nll_loss": 0.39892590045928955, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027097491547465324, + "rewards/margins": 1.1269439458847046, + "rewards/rejected": -1.1540414094924927, + "step": 701 + }, + { + "epoch": 1.372434017595308, + "grad_norm": 0.6760475635528564, + "learning_rate": 2.7266797129810828e-05, + "log_odds_chosen": 10.769508361816406, + "log_odds_ratio": -0.07391811907291412, + "logits/chosen": -1.5592124462127686, + "logits/rejected": -1.4099953174591064, + "logps/chosen": -0.35248202085494995, + "logps/rejected": -9.673836708068848, + "loss": 0.4261, + "nll_loss": 0.5188721418380737, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.035248201340436935, + "rewards/margins": 0.9321354627609253, + "rewards/rejected": -0.967383623123169, + "step": 702 + }, + { + "epoch": 1.3743890518084068, + "grad_norm": 0.6696889996528625, + "learning_rate": 2.7234181343770387e-05, + "log_odds_chosen": 11.858776092529297, + "log_odds_ratio": -0.06383504718542099, + "logits/chosen": -1.4593126773834229, + "logits/rejected": -1.2865175008773804, + "logps/chosen": -0.3967357873916626, + "logps/rejected": -10.893924713134766, + "loss": 0.4293, + "nll_loss": 0.5606618523597717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0396735817193985, + "rewards/margins": 1.049718976020813, + "rewards/rejected": -1.0893925428390503, + "step": 703 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.6415077447891235, + "learning_rate": 2.720156555772994e-05, + "log_odds_chosen": 9.556548118591309, + "log_odds_ratio": -0.02748730406165123, + "logits/chosen": -1.4516390562057495, + "logits/rejected": -1.39517343044281, + "logps/chosen": -0.3253428339958191, + "logps/rejected": -8.50472640991211, + "loss": 0.4164, + "nll_loss": 0.4128497838973999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03253428637981415, + "rewards/margins": 0.8179383277893066, + "rewards/rejected": -0.8504726886749268, + "step": 704 + }, + { + "epoch": 1.3782991202346042, + "grad_norm": 0.6984980702400208, + "learning_rate": 2.71689497716895e-05, + "log_odds_chosen": 16.44754409790039, + "log_odds_ratio": -0.04724876210093498, + "logits/chosen": -1.3228323459625244, + "logits/rejected": -1.056627631187439, + "logps/chosen": -0.30402037501335144, + "logps/rejected": -14.98538589477539, + "loss": 0.4398, + "nll_loss": 0.5017848610877991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030402038246393204, + "rewards/margins": 1.4681366682052612, + "rewards/rejected": -1.498538613319397, + "step": 705 + }, + { + "epoch": 1.380254154447703, + "grad_norm": 0.6729361414909363, + "learning_rate": 2.7136333985649056e-05, + "log_odds_chosen": 9.365677833557129, + "log_odds_ratio": -0.014535047113895416, + "logits/chosen": -1.6160796880722046, + "logits/rejected": -1.383267879486084, + "logps/chosen": -0.16868622601032257, + "logps/rejected": -7.641977310180664, + "loss": 0.428, + "nll_loss": 0.28883418440818787, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016868622973561287, + "rewards/margins": 0.7473291158676147, + "rewards/rejected": -0.764197826385498, + "step": 706 + }, + { + "epoch": 1.3822091886608017, + "grad_norm": 0.6656391024589539, + "learning_rate": 2.710371819960861e-05, + "log_odds_chosen": 8.931496620178223, + "log_odds_ratio": -0.08069512993097305, + "logits/chosen": -1.4752089977264404, + "logits/rejected": -1.4239885807037354, + "logps/chosen": -0.3118746876716614, + "logps/rejected": -7.898514747619629, + "loss": 0.4368, + "nll_loss": 0.4258486032485962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.031187471002340317, + "rewards/margins": 0.7586641311645508, + "rewards/rejected": -0.7898515462875366, + "step": 707 + }, + { + "epoch": 1.3841642228739004, + "grad_norm": 0.6596536040306091, + "learning_rate": 2.7071102413568168e-05, + "log_odds_chosen": 20.44851303100586, + "log_odds_ratio": -0.00332234101369977, + "logits/chosen": -1.4473923444747925, + "logits/rejected": -1.3107094764709473, + "logps/chosen": -0.22023680806159973, + "logps/rejected": -18.917823791503906, + "loss": 0.4046, + "nll_loss": 0.36181640625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022023681551218033, + "rewards/margins": 1.8697588443756104, + "rewards/rejected": -1.8917824029922485, + "step": 708 + }, + { + "epoch": 1.3861192570869991, + "grad_norm": 0.6756699085235596, + "learning_rate": 2.7038486627527727e-05, + "log_odds_chosen": 11.004413604736328, + "log_odds_ratio": -0.006352486088871956, + "logits/chosen": -1.4312260150909424, + "logits/rejected": -1.0047346353530884, + "logps/chosen": -0.2955555021762848, + "logps/rejected": -9.616233825683594, + "loss": 0.4074, + "nll_loss": 0.46882325410842896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02955554984509945, + "rewards/margins": 0.9320678114891052, + "rewards/rejected": -0.9616233706474304, + "step": 709 + }, + { + "epoch": 1.3880742913000979, + "grad_norm": 0.6960408091545105, + "learning_rate": 2.700587084148728e-05, + "log_odds_chosen": 5.1050872802734375, + "log_odds_ratio": -0.04350782930850983, + "logits/chosen": -1.3898942470550537, + "logits/rejected": -1.2877986431121826, + "logps/chosen": -0.3821786046028137, + "logps/rejected": -4.129449844360352, + "loss": 0.4247, + "nll_loss": 0.48261332511901855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03821785748004913, + "rewards/margins": 0.37472715973854065, + "rewards/rejected": -0.412945032119751, + "step": 710 + }, + { + "epoch": 1.3900293255131966, + "grad_norm": 0.6515598893165588, + "learning_rate": 2.697325505544684e-05, + "log_odds_chosen": 13.982303619384766, + "log_odds_ratio": -0.026797838509082794, + "logits/chosen": -1.39878511428833, + "logits/rejected": -1.270552635192871, + "logps/chosen": -0.20139355957508087, + "logps/rejected": -12.314067840576172, + "loss": 0.4042, + "nll_loss": 0.3448660373687744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020139355212450027, + "rewards/margins": 1.211267352104187, + "rewards/rejected": -1.231406807899475, + "step": 711 + }, + { + "epoch": 1.3919843597262953, + "grad_norm": 0.648862898349762, + "learning_rate": 2.6940639269406392e-05, + "log_odds_chosen": 13.119208335876465, + "log_odds_ratio": -0.044078778475522995, + "logits/chosen": -1.3157758712768555, + "logits/rejected": -1.3881819248199463, + "logps/chosen": -0.3232520818710327, + "logps/rejected": -11.684898376464844, + "loss": 0.4029, + "nll_loss": 0.45129460096359253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03232521191239357, + "rewards/margins": 1.136164665222168, + "rewards/rejected": -1.168489933013916, + "step": 712 + }, + { + "epoch": 1.393939393939394, + "grad_norm": 0.7013861536979675, + "learning_rate": 2.690802348336595e-05, + "log_odds_chosen": 12.048341751098633, + "log_odds_ratio": -0.1334342062473297, + "logits/chosen": -1.2884176969528198, + "logits/rejected": -1.3403651714324951, + "logps/chosen": -0.41112345457077026, + "logps/rejected": -11.364938735961914, + "loss": 0.4319, + "nll_loss": 0.5088917016983032, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.041112348437309265, + "rewards/margins": 1.0953816175460815, + "rewards/rejected": -1.1364939212799072, + "step": 713 + }, + { + "epoch": 1.3958944281524928, + "grad_norm": 0.6420502066612244, + "learning_rate": 2.6875407697325504e-05, + "log_odds_chosen": 10.156495094299316, + "log_odds_ratio": -0.022149693220853806, + "logits/chosen": -1.2217895984649658, + "logits/rejected": -1.318338394165039, + "logps/chosen": -0.3237217962741852, + "logps/rejected": -9.093066215515137, + "loss": 0.3962, + "nll_loss": 0.46374180912971497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03237218037247658, + "rewards/margins": 0.8769344091415405, + "rewards/rejected": -0.9093065857887268, + "step": 714 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.6857893466949463, + "learning_rate": 2.6842791911285064e-05, + "log_odds_chosen": 12.649747848510742, + "log_odds_ratio": -0.006878402084112167, + "logits/chosen": -1.4584126472473145, + "logits/rejected": -1.2068724632263184, + "logps/chosen": -0.2758999466896057, + "logps/rejected": -11.280922889709473, + "loss": 0.4228, + "nll_loss": 0.3139870762825012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02758999727666378, + "rewards/margins": 1.1005022525787354, + "rewards/rejected": -1.1280922889709473, + "step": 715 + }, + { + "epoch": 1.3998044965786902, + "grad_norm": 0.6602794528007507, + "learning_rate": 2.681017612524462e-05, + "log_odds_chosen": 13.12955093383789, + "log_odds_ratio": -0.017247024923563004, + "logits/chosen": -1.4618098735809326, + "logits/rejected": -1.450056552886963, + "logps/chosen": -0.26292192935943604, + "logps/rejected": -11.741659164428711, + "loss": 0.4058, + "nll_loss": 0.3542994260787964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026292193681001663, + "rewards/margins": 1.147873878479004, + "rewards/rejected": -1.174165964126587, + "step": 716 + }, + { + "epoch": 1.401759530791789, + "grad_norm": 0.6595775485038757, + "learning_rate": 2.6777560339204176e-05, + "log_odds_chosen": 8.528743743896484, + "log_odds_ratio": -0.007926532998681068, + "logits/chosen": -1.3078500032424927, + "logits/rejected": -1.404052495956421, + "logps/chosen": -0.22414562106132507, + "logps/rejected": -7.023338317871094, + "loss": 0.3963, + "nll_loss": 0.32029637694358826, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022414563223719597, + "rewards/margins": 0.6799192428588867, + "rewards/rejected": -0.7023338079452515, + "step": 717 + }, + { + "epoch": 1.4037145650048877, + "grad_norm": 0.6664738059043884, + "learning_rate": 2.6744944553163732e-05, + "log_odds_chosen": 14.888526916503906, + "log_odds_ratio": -0.004297342617064714, + "logits/chosen": -1.3916778564453125, + "logits/rejected": -1.25740385055542, + "logps/chosen": -0.23038409650325775, + "logps/rejected": -13.203348159790039, + "loss": 0.4065, + "nll_loss": 0.28869226574897766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023038407787680626, + "rewards/margins": 1.2972962856292725, + "rewards/rejected": -1.3203346729278564, + "step": 718 + }, + { + "epoch": 1.4056695992179864, + "grad_norm": 0.6668436527252197, + "learning_rate": 2.671232876712329e-05, + "log_odds_chosen": 7.46367073059082, + "log_odds_ratio": -0.019457202404737473, + "logits/chosen": -1.5860686302185059, + "logits/rejected": -1.430010199546814, + "logps/chosen": -0.2730500102043152, + "logps/rejected": -5.887295722961426, + "loss": 0.3999, + "nll_loss": 0.41093119978904724, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027305003255605698, + "rewards/margins": 0.5614246129989624, + "rewards/rejected": -0.5887295603752136, + "step": 719 + }, + { + "epoch": 1.4076246334310851, + "grad_norm": 0.6541679501533508, + "learning_rate": 2.6679712981082844e-05, + "log_odds_chosen": 11.220428466796875, + "log_odds_ratio": -0.021069061011075974, + "logits/chosen": -1.3651490211486816, + "logits/rejected": -1.4073548316955566, + "logps/chosen": -0.23950812220573425, + "logps/rejected": -9.867033004760742, + "loss": 0.4043, + "nll_loss": 0.3133358955383301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023950813338160515, + "rewards/margins": 0.9627525806427002, + "rewards/rejected": -0.9867033958435059, + "step": 720 + }, + { + "epoch": 1.4095796676441839, + "grad_norm": 0.6586343050003052, + "learning_rate": 2.6647097195042404e-05, + "log_odds_chosen": 12.55937671661377, + "log_odds_ratio": -0.00413292832672596, + "logits/chosen": -1.515415906906128, + "logits/rejected": -1.4591214656829834, + "logps/chosen": -0.3628729283809662, + "logps/rejected": -11.578876495361328, + "loss": 0.3925, + "nll_loss": 0.41781461238861084, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03628729283809662, + "rewards/margins": 1.1216003894805908, + "rewards/rejected": -1.1578876972198486, + "step": 721 + }, + { + "epoch": 1.4115347018572826, + "grad_norm": 0.6805813312530518, + "learning_rate": 2.6614481409001956e-05, + "log_odds_chosen": 14.965612411499023, + "log_odds_ratio": -0.025822311639785767, + "logits/chosen": -1.3913781642913818, + "logits/rejected": -1.0364186763763428, + "logps/chosen": -0.2889409065246582, + "logps/rejected": -13.694211959838867, + "loss": 0.3997, + "nll_loss": 0.43830570578575134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02889409102499485, + "rewards/margins": 1.3405272960662842, + "rewards/rejected": -1.3694212436676025, + "step": 722 + }, + { + "epoch": 1.4134897360703813, + "grad_norm": 0.6543363928794861, + "learning_rate": 2.6581865622961516e-05, + "log_odds_chosen": 11.205548286437988, + "log_odds_ratio": -0.04601755738258362, + "logits/chosen": -1.4629496335983276, + "logits/rejected": -1.4843928813934326, + "logps/chosen": -0.25596654415130615, + "logps/rejected": -9.747806549072266, + "loss": 0.4106, + "nll_loss": 0.2804097831249237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025596655905246735, + "rewards/margins": 0.9491840600967407, + "rewards/rejected": -0.9747807383537292, + "step": 723 + }, + { + "epoch": 1.41544477028348, + "grad_norm": 0.6865048408508301, + "learning_rate": 2.654924983692107e-05, + "log_odds_chosen": 8.973470687866211, + "log_odds_ratio": -0.019337646663188934, + "logits/chosen": -1.5224616527557373, + "logits/rejected": -1.360398530960083, + "logps/chosen": -0.20301198959350586, + "logps/rejected": -7.346080780029297, + "loss": 0.4075, + "nll_loss": 0.2983621656894684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020301200449466705, + "rewards/margins": 0.7143069505691528, + "rewards/rejected": -0.7346081137657166, + "step": 724 + }, + { + "epoch": 1.4173998044965788, + "grad_norm": 0.6501922011375427, + "learning_rate": 2.6516634050880628e-05, + "log_odds_chosen": 16.380048751831055, + "log_odds_ratio": -0.01895288936793804, + "logits/chosen": -1.4148800373077393, + "logits/rejected": -1.2424978017807007, + "logps/chosen": -0.30378201603889465, + "logps/rejected": -15.248468399047852, + "loss": 0.401, + "nll_loss": 0.4076079726219177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030378200113773346, + "rewards/margins": 1.4944686889648438, + "rewards/rejected": -1.5248467922210693, + "step": 725 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.6855003833770752, + "learning_rate": 2.6484018264840184e-05, + "log_odds_chosen": 14.466773986816406, + "log_odds_ratio": -0.0060893576592206955, + "logits/chosen": -1.4612178802490234, + "logits/rejected": -1.363004207611084, + "logps/chosen": -0.1671542525291443, + "logps/rejected": -12.67106819152832, + "loss": 0.4006, + "nll_loss": 0.3044384717941284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01671542599797249, + "rewards/margins": 1.2503912448883057, + "rewards/rejected": -1.2671067714691162, + "step": 726 + }, + { + "epoch": 1.4213098729227762, + "grad_norm": 0.6550787091255188, + "learning_rate": 2.645140247879974e-05, + "log_odds_chosen": 21.18186378479004, + "log_odds_ratio": -0.008703046478331089, + "logits/chosen": -1.4812935590744019, + "logits/rejected": -1.3872811794281006, + "logps/chosen": -0.28850120306015015, + "logps/rejected": -19.79560089111328, + "loss": 0.3948, + "nll_loss": 0.47601014375686646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028850121423602104, + "rewards/margins": 1.9507102966308594, + "rewards/rejected": -1.9795602560043335, + "step": 727 + }, + { + "epoch": 1.423264907135875, + "grad_norm": 0.6744993925094604, + "learning_rate": 2.6418786692759296e-05, + "log_odds_chosen": 10.061454772949219, + "log_odds_ratio": -0.04901958256959915, + "logits/chosen": -1.2453829050064087, + "logits/rejected": -1.2485431432724, + "logps/chosen": -0.1941477209329605, + "logps/rejected": -8.442383766174316, + "loss": 0.4094, + "nll_loss": 0.34227627515792847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01941477134823799, + "rewards/margins": 0.8248236179351807, + "rewards/rejected": -0.84423828125, + "step": 728 + }, + { + "epoch": 1.4252199413489737, + "grad_norm": 0.6840391159057617, + "learning_rate": 2.6386170906718856e-05, + "log_odds_chosen": 16.193164825439453, + "log_odds_ratio": -0.012441557832062244, + "logits/chosen": -1.4371274709701538, + "logits/rejected": -1.3134400844573975, + "logps/chosen": -0.3441266417503357, + "logps/rejected": -15.18587875366211, + "loss": 0.4072, + "nll_loss": 0.41449862718582153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03441266715526581, + "rewards/margins": 1.484175205230713, + "rewards/rejected": -1.5185878276824951, + "step": 729 + }, + { + "epoch": 1.4271749755620724, + "grad_norm": 0.6361057162284851, + "learning_rate": 2.635355512067841e-05, + "log_odds_chosen": 17.452287673950195, + "log_odds_ratio": -0.008170317858457565, + "logits/chosen": -1.3686085939407349, + "logits/rejected": -1.2453747987747192, + "logps/chosen": -0.2291656881570816, + "logps/rejected": -15.824840545654297, + "loss": 0.3837, + "nll_loss": 0.40765485167503357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02291657030582428, + "rewards/margins": 1.5595675706863403, + "rewards/rejected": -1.582484245300293, + "step": 730 + }, + { + "epoch": 1.4291300097751711, + "grad_norm": 0.6565587520599365, + "learning_rate": 2.6320939334637968e-05, + "log_odds_chosen": 17.293222427368164, + "log_odds_ratio": -0.02130892314016819, + "logits/chosen": -1.3035705089569092, + "logits/rejected": -1.0822275876998901, + "logps/chosen": -0.15369278192520142, + "logps/rejected": -15.465274810791016, + "loss": 0.3954, + "nll_loss": 0.35997942090034485, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015369279310107231, + "rewards/margins": 1.531158208847046, + "rewards/rejected": -1.546527624130249, + "step": 731 + }, + { + "epoch": 1.4310850439882699, + "grad_norm": 0.6397748589515686, + "learning_rate": 2.628832354859752e-05, + "log_odds_chosen": 10.426054954528809, + "log_odds_ratio": -0.055807407945394516, + "logits/chosen": -1.508867859840393, + "logits/rejected": -1.1464265584945679, + "logps/chosen": -0.2648329734802246, + "logps/rejected": -8.865592956542969, + "loss": 0.3825, + "nll_loss": 0.37505245208740234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02648329921066761, + "rewards/margins": 0.8600760698318481, + "rewards/rejected": -0.8865593671798706, + "step": 732 + }, + { + "epoch": 1.4330400782013686, + "grad_norm": 0.6588864922523499, + "learning_rate": 2.625570776255708e-05, + "log_odds_chosen": 10.966197967529297, + "log_odds_ratio": -0.04850185662508011, + "logits/chosen": -1.486305832862854, + "logits/rejected": -1.4407005310058594, + "logps/chosen": -0.2774222493171692, + "logps/rejected": -9.701894760131836, + "loss": 0.384, + "nll_loss": 0.4140610694885254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02774222567677498, + "rewards/margins": 0.942447304725647, + "rewards/rejected": -0.9701895713806152, + "step": 733 + }, + { + "epoch": 1.4349951124144673, + "grad_norm": 0.6421037316322327, + "learning_rate": 2.6223091976516633e-05, + "log_odds_chosen": 9.194669723510742, + "log_odds_ratio": -0.011342995800077915, + "logits/chosen": -1.514816403388977, + "logits/rejected": -1.3481892347335815, + "logps/chosen": -0.4145638644695282, + "logps/rejected": -8.186787605285645, + "loss": 0.3767, + "nll_loss": 0.5002407431602478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04145638644695282, + "rewards/margins": 0.7772223949432373, + "rewards/rejected": -0.8186788558959961, + "step": 734 + }, + { + "epoch": 1.436950146627566, + "grad_norm": 0.6810351014137268, + "learning_rate": 2.6190476190476192e-05, + "log_odds_chosen": 13.883463859558105, + "log_odds_ratio": -0.09505952894687653, + "logits/chosen": -1.4874213933944702, + "logits/rejected": -1.28812575340271, + "logps/chosen": -0.4452674984931946, + "logps/rejected": -13.070365905761719, + "loss": 0.3877, + "nll_loss": 0.5967094898223877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04452675208449364, + "rewards/margins": 1.262509822845459, + "rewards/rejected": -1.3070366382598877, + "step": 735 + }, + { + "epoch": 1.4389051808406648, + "grad_norm": 0.6723158359527588, + "learning_rate": 2.6157860404435748e-05, + "log_odds_chosen": 9.92475700378418, + "log_odds_ratio": -0.1088147759437561, + "logits/chosen": -1.5832757949829102, + "logits/rejected": -1.3337936401367188, + "logps/chosen": -0.28233104944229126, + "logps/rejected": -8.552684783935547, + "loss": 0.3991, + "nll_loss": 0.40794724225997925, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.028233105316758156, + "rewards/margins": 0.8270353674888611, + "rewards/rejected": -0.8552683591842651, + "step": 736 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.6545373201370239, + "learning_rate": 2.6125244618395304e-05, + "log_odds_chosen": 14.975168228149414, + "log_odds_ratio": -0.009427893906831741, + "logits/chosen": -1.4926118850708008, + "logits/rejected": -1.2814573049545288, + "logps/chosen": -0.1824880838394165, + "logps/rejected": -13.227649688720703, + "loss": 0.402, + "nll_loss": 0.40562254190444946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01824880763888359, + "rewards/margins": 1.3045161962509155, + "rewards/rejected": -1.3227651119232178, + "step": 737 + }, + { + "epoch": 1.4428152492668622, + "grad_norm": 0.6330305337905884, + "learning_rate": 2.609262883235486e-05, + "log_odds_chosen": 13.854325294494629, + "log_odds_ratio": -0.02046654373407364, + "logits/chosen": -1.4788191318511963, + "logits/rejected": -1.2084414958953857, + "logps/chosen": -0.21273744106292725, + "logps/rejected": -12.306577682495117, + "loss": 0.3926, + "nll_loss": 0.3686641454696655, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021273743361234665, + "rewards/margins": 1.2093840837478638, + "rewards/rejected": -1.2306578159332275, + "step": 738 + }, + { + "epoch": 1.444770283479961, + "grad_norm": 0.6458708047866821, + "learning_rate": 2.606001304631442e-05, + "log_odds_chosen": 10.975409507751465, + "log_odds_ratio": -0.015492696315050125, + "logits/chosen": -1.467191219329834, + "logits/rejected": -1.3122684955596924, + "logps/chosen": -0.26031994819641113, + "logps/rejected": -9.76888370513916, + "loss": 0.3975, + "nll_loss": 0.4023493230342865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026031995192170143, + "rewards/margins": 0.9508564472198486, + "rewards/rejected": -0.9768884181976318, + "step": 739 + }, + { + "epoch": 1.4467253176930597, + "grad_norm": 0.6575332283973694, + "learning_rate": 2.6027397260273973e-05, + "log_odds_chosen": 10.43898868560791, + "log_odds_ratio": -0.0470145158469677, + "logits/chosen": -1.4558351039886475, + "logits/rejected": -1.2459180355072021, + "logps/chosen": -0.2603527307510376, + "logps/rejected": -8.986703872680664, + "loss": 0.3876, + "nll_loss": 0.36534756422042847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02603527344763279, + "rewards/margins": 0.8726351261138916, + "rewards/rejected": -0.8986704349517822, + "step": 740 + }, + { + "epoch": 1.4486803519061584, + "grad_norm": 0.676115095615387, + "learning_rate": 2.5994781474233532e-05, + "log_odds_chosen": 10.530704498291016, + "log_odds_ratio": -0.01770772784948349, + "logits/chosen": -1.412257432937622, + "logits/rejected": -1.4112614393234253, + "logps/chosen": -0.2665649652481079, + "logps/rejected": -8.879875183105469, + "loss": 0.4029, + "nll_loss": 0.4408552944660187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0266564954072237, + "rewards/margins": 0.8613311052322388, + "rewards/rejected": -0.8879876136779785, + "step": 741 + }, + { + "epoch": 1.4506353861192571, + "grad_norm": 0.6448396444320679, + "learning_rate": 2.5962165688193085e-05, + "log_odds_chosen": 14.58224868774414, + "log_odds_ratio": -0.01017422042787075, + "logits/chosen": -1.3816986083984375, + "logits/rejected": -1.2078639268875122, + "logps/chosen": -0.1516660451889038, + "logps/rejected": -12.746112823486328, + "loss": 0.3852, + "nll_loss": 0.24510356783866882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015166603028774261, + "rewards/margins": 1.2594447135925293, + "rewards/rejected": -1.274611234664917, + "step": 742 + }, + { + "epoch": 1.4525904203323559, + "grad_norm": 0.6564180850982666, + "learning_rate": 2.5929549902152644e-05, + "log_odds_chosen": 12.645145416259766, + "log_odds_ratio": -0.010659023188054562, + "logits/chosen": -1.5312131643295288, + "logits/rejected": -1.2977099418640137, + "logps/chosen": -0.23777884244918823, + "logps/rejected": -10.974531173706055, + "loss": 0.3893, + "nll_loss": 0.3094143867492676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023777883499860764, + "rewards/margins": 1.073675274848938, + "rewards/rejected": -1.0974531173706055, + "step": 743 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.6409600377082825, + "learning_rate": 2.5896934116112197e-05, + "log_odds_chosen": 14.322210311889648, + "log_odds_ratio": -0.02782336249947548, + "logits/chosen": -1.5266692638397217, + "logits/rejected": -1.2152607440948486, + "logps/chosen": -0.20309492945671082, + "logps/rejected": -12.641568183898926, + "loss": 0.3697, + "nll_loss": 0.3516111373901367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02030949294567108, + "rewards/margins": 1.243847370147705, + "rewards/rejected": -1.2641568183898926, + "step": 744 + }, + { + "epoch": 1.4565004887585533, + "grad_norm": 0.6514245867729187, + "learning_rate": 2.5864318330071756e-05, + "log_odds_chosen": 18.443622589111328, + "log_odds_ratio": -0.0015672399895265698, + "logits/chosen": -1.240975022315979, + "logits/rejected": -1.0365056991577148, + "logps/chosen": -0.16589972376823425, + "logps/rejected": -16.580835342407227, + "loss": 0.3813, + "nll_loss": 0.2841712236404419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016589971259236336, + "rewards/margins": 1.641493558883667, + "rewards/rejected": -1.6580836772918701, + "step": 745 + }, + { + "epoch": 1.458455522971652, + "grad_norm": 0.661504864692688, + "learning_rate": 2.5831702544031313e-05, + "log_odds_chosen": 15.059541702270508, + "log_odds_ratio": -0.004806451965123415, + "logits/chosen": -1.6691913604736328, + "logits/rejected": -1.3650178909301758, + "logps/chosen": -0.2742729187011719, + "logps/rejected": -13.527994155883789, + "loss": 0.3742, + "nll_loss": 0.433430016040802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02742728963494301, + "rewards/margins": 1.3253722190856934, + "rewards/rejected": -1.3527995347976685, + "step": 746 + }, + { + "epoch": 1.4604105571847508, + "grad_norm": 0.664226233959198, + "learning_rate": 2.579908675799087e-05, + "log_odds_chosen": 15.453190803527832, + "log_odds_ratio": -0.008288027718663216, + "logits/chosen": -1.6006362438201904, + "logits/rejected": -1.285917043685913, + "logps/chosen": -0.17174005508422852, + "logps/rejected": -13.712028503417969, + "loss": 0.3873, + "nll_loss": 0.2722126245498657, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01717400550842285, + "rewards/margins": 1.3540289402008057, + "rewards/rejected": -1.3712029457092285, + "step": 747 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.6769481897354126, + "learning_rate": 2.5766470971950425e-05, + "log_odds_chosen": 10.284904479980469, + "log_odds_ratio": -0.008120691403746605, + "logits/chosen": -1.4347379207611084, + "logits/rejected": -1.5043025016784668, + "logps/chosen": -0.25033214688301086, + "logps/rejected": -8.89396858215332, + "loss": 0.3868, + "nll_loss": 0.45676684379577637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025033215060830116, + "rewards/margins": 0.8643635511398315, + "rewards/rejected": -0.8893967866897583, + "step": 748 + }, + { + "epoch": 1.4643206256109482, + "grad_norm": 0.6671487092971802, + "learning_rate": 2.5733855185909984e-05, + "log_odds_chosen": 14.378483772277832, + "log_odds_ratio": -0.005752436351031065, + "logits/chosen": -1.4496434926986694, + "logits/rejected": -1.426448106765747, + "logps/chosen": -0.23225772380828857, + "logps/rejected": -12.68328857421875, + "loss": 0.3879, + "nll_loss": 0.3458259105682373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023225773125886917, + "rewards/margins": 1.2451030015945435, + "rewards/rejected": -1.2683287858963013, + "step": 749 + }, + { + "epoch": 1.466275659824047, + "grad_norm": 0.6784003973007202, + "learning_rate": 2.5701239399869537e-05, + "log_odds_chosen": 14.409307479858398, + "log_odds_ratio": -0.022505423054099083, + "logits/chosen": -1.3237930536270142, + "logits/rejected": -1.3750615119934082, + "logps/chosen": -0.32301804423332214, + "logps/rejected": -13.231588363647461, + "loss": 0.3875, + "nll_loss": 0.4927727282047272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032301805913448334, + "rewards/margins": 1.2908570766448975, + "rewards/rejected": -1.323158860206604, + "step": 750 + }, + { + "epoch": 1.4682306940371457, + "grad_norm": 0.6451238393783569, + "learning_rate": 2.5668623613829096e-05, + "log_odds_chosen": 16.94093894958496, + "log_odds_ratio": -0.01360371708869934, + "logits/chosen": -1.509347677230835, + "logits/rejected": -1.2176508903503418, + "logps/chosen": -0.2934649884700775, + "logps/rejected": -15.572099685668945, + "loss": 0.3835, + "nll_loss": 0.4780479669570923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02934649959206581, + "rewards/margins": 1.5278635025024414, + "rewards/rejected": -1.5572099685668945, + "step": 751 + }, + { + "epoch": 1.4701857282502444, + "grad_norm": 0.6524550318717957, + "learning_rate": 2.563600782778865e-05, + "log_odds_chosen": 14.028526306152344, + "log_odds_ratio": -0.00857525784522295, + "logits/chosen": -1.487825870513916, + "logits/rejected": -1.3311148881912231, + "logps/chosen": -0.29711583256721497, + "logps/rejected": -12.568521499633789, + "loss": 0.375, + "nll_loss": 0.45325708389282227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029711587354540825, + "rewards/margins": 1.2271404266357422, + "rewards/rejected": -1.256852149963379, + "step": 752 + }, + { + "epoch": 1.4721407624633431, + "grad_norm": 0.6336098313331604, + "learning_rate": 2.560339204174821e-05, + "log_odds_chosen": 12.863231658935547, + "log_odds_ratio": -0.017221884801983833, + "logits/chosen": -1.2323092222213745, + "logits/rejected": -0.9776533246040344, + "logps/chosen": -0.2548539638519287, + "logps/rejected": -11.401025772094727, + "loss": 0.3711, + "nll_loss": 0.4460296034812927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02548539638519287, + "rewards/margins": 1.1146172285079956, + "rewards/rejected": -1.1401026248931885, + "step": 753 + }, + { + "epoch": 1.4740957966764419, + "grad_norm": 0.6444098353385925, + "learning_rate": 2.557077625570776e-05, + "log_odds_chosen": 15.058599472045898, + "log_odds_ratio": -0.0031126912217587233, + "logits/chosen": -1.5390503406524658, + "logits/rejected": -1.3417043685913086, + "logps/chosen": -0.24383783340454102, + "logps/rejected": -13.324308395385742, + "loss": 0.3582, + "nll_loss": 0.34940505027770996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0243837870657444, + "rewards/margins": 1.3080470561981201, + "rewards/rejected": -1.3324308395385742, + "step": 754 + }, + { + "epoch": 1.4760508308895406, + "grad_norm": 0.6884974837303162, + "learning_rate": 2.553816046966732e-05, + "log_odds_chosen": 16.801651000976562, + "log_odds_ratio": -0.011719176545739174, + "logits/chosen": -1.5801098346710205, + "logits/rejected": -1.36586332321167, + "logps/chosen": -0.18851402401924133, + "logps/rejected": -15.0740966796875, + "loss": 0.3863, + "nll_loss": 0.2897107005119324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018851403146982193, + "rewards/margins": 1.488558292388916, + "rewards/rejected": -1.507409691810608, + "step": 755 + }, + { + "epoch": 1.4780058651026393, + "grad_norm": 0.6829601526260376, + "learning_rate": 2.5505544683626877e-05, + "log_odds_chosen": 14.96235466003418, + "log_odds_ratio": -0.01657315157353878, + "logits/chosen": -1.425804615020752, + "logits/rejected": -1.3903353214263916, + "logps/chosen": -0.32883572578430176, + "logps/rejected": -13.628070831298828, + "loss": 0.3886, + "nll_loss": 0.48378807306289673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.032883573323488235, + "rewards/margins": 1.3299236297607422, + "rewards/rejected": -1.362807035446167, + "step": 756 + }, + { + "epoch": 1.479960899315738, + "grad_norm": 0.6389460563659668, + "learning_rate": 2.5472928897586433e-05, + "log_odds_chosen": 12.146697998046875, + "log_odds_ratio": -0.011370046064257622, + "logits/chosen": -1.413656234741211, + "logits/rejected": -1.5250768661499023, + "logps/chosen": -0.24205484986305237, + "logps/rejected": -10.7411470413208, + "loss": 0.3657, + "nll_loss": 0.35933271050453186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024205485358834267, + "rewards/margins": 1.0499093532562256, + "rewards/rejected": -1.0741147994995117, + "step": 757 + }, + { + "epoch": 1.4819159335288368, + "grad_norm": 0.6370611190795898, + "learning_rate": 2.544031311154599e-05, + "log_odds_chosen": 14.862663269042969, + "log_odds_ratio": -0.012587877921760082, + "logits/chosen": -1.4860255718231201, + "logits/rejected": -1.305882215499878, + "logps/chosen": -0.28285425901412964, + "logps/rejected": -13.390756607055664, + "loss": 0.3644, + "nll_loss": 0.478368878364563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028285427019000053, + "rewards/margins": 1.3107901811599731, + "rewards/rejected": -1.3390755653381348, + "step": 758 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.6306945085525513, + "learning_rate": 2.540769732550555e-05, + "log_odds_chosen": 8.145353317260742, + "log_odds_ratio": -0.01602342538535595, + "logits/chosen": -1.4735127687454224, + "logits/rejected": -1.247586727142334, + "logps/chosen": -0.2561395764350891, + "logps/rejected": -6.800796031951904, + "loss": 0.3684, + "nll_loss": 0.41042229533195496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02561395987868309, + "rewards/margins": 0.6544656157493591, + "rewards/rejected": -0.6800795793533325, + "step": 759 + }, + { + "epoch": 1.4858260019550342, + "grad_norm": 0.6278574466705322, + "learning_rate": 2.53750815394651e-05, + "log_odds_chosen": 13.112163543701172, + "log_odds_ratio": -0.0002626166387926787, + "logits/chosen": -1.3714121580123901, + "logits/rejected": -1.542726755142212, + "logps/chosen": -0.30443885922431946, + "logps/rejected": -11.470351219177246, + "loss": 0.3599, + "nll_loss": 0.4012797474861145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030443888157606125, + "rewards/margins": 1.116591215133667, + "rewards/rejected": -1.1470351219177246, + "step": 760 + }, + { + "epoch": 1.487781036168133, + "grad_norm": 0.6329840421676636, + "learning_rate": 2.534246575342466e-05, + "log_odds_chosen": 9.038989067077637, + "log_odds_ratio": -0.019079633057117462, + "logits/chosen": -1.646256923675537, + "logits/rejected": -1.470120906829834, + "logps/chosen": -0.2042175531387329, + "logps/rejected": -7.5206828117370605, + "loss": 0.3612, + "nll_loss": 0.3941296339035034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02042175643146038, + "rewards/margins": 0.7316465377807617, + "rewards/rejected": -0.752068281173706, + "step": 761 + }, + { + "epoch": 1.4897360703812317, + "grad_norm": 0.6655564904212952, + "learning_rate": 2.5309849967384213e-05, + "log_odds_chosen": 16.325334548950195, + "log_odds_ratio": -0.013980518095195293, + "logits/chosen": -1.5070483684539795, + "logits/rejected": -1.2935047149658203, + "logps/chosen": -0.19495365023612976, + "logps/rejected": -14.654807090759277, + "loss": 0.3661, + "nll_loss": 0.3188515901565552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019495364278554916, + "rewards/margins": 1.4459854364395142, + "rewards/rejected": -1.4654808044433594, + "step": 762 + }, + { + "epoch": 1.4916911045943304, + "grad_norm": 0.6370005011558533, + "learning_rate": 2.5277234181343773e-05, + "log_odds_chosen": 17.001724243164062, + "log_odds_ratio": -0.007996270433068275, + "logits/chosen": -1.473594069480896, + "logits/rejected": -1.3629283905029297, + "logps/chosen": -0.11198326200246811, + "logps/rejected": -14.876924514770508, + "loss": 0.3624, + "nll_loss": 0.2421141266822815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011198326013982296, + "rewards/margins": 1.4764939546585083, + "rewards/rejected": -1.4876923561096191, + "step": 763 + }, + { + "epoch": 1.4936461388074291, + "grad_norm": 0.6785708069801331, + "learning_rate": 2.5244618395303325e-05, + "log_odds_chosen": 15.297161102294922, + "log_odds_ratio": -0.036455173045396805, + "logits/chosen": -1.485032081604004, + "logits/rejected": -1.4404804706573486, + "logps/chosen": -0.3513592481613159, + "logps/rejected": -14.30722713470459, + "loss": 0.3696, + "nll_loss": 0.4541681408882141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03513592854142189, + "rewards/margins": 1.3955867290496826, + "rewards/rejected": -1.430722713470459, + "step": 764 + }, + { + "epoch": 1.4956011730205279, + "grad_norm": 0.654188871383667, + "learning_rate": 2.5212002609262885e-05, + "log_odds_chosen": 15.435491561889648, + "log_odds_ratio": -0.0028955989982932806, + "logits/chosen": -1.4145777225494385, + "logits/rejected": -1.5018970966339111, + "logps/chosen": -0.23029837012290955, + "logps/rejected": -13.77541732788086, + "loss": 0.371, + "nll_loss": 0.3340996205806732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023029837757349014, + "rewards/margins": 1.3545119762420654, + "rewards/rejected": -1.3775417804718018, + "step": 765 + }, + { + "epoch": 1.4975562072336266, + "grad_norm": 0.6503623127937317, + "learning_rate": 2.517938682322244e-05, + "log_odds_chosen": 10.316645622253418, + "log_odds_ratio": -0.024731164798140526, + "logits/chosen": -1.608980655670166, + "logits/rejected": -1.498016119003296, + "logps/chosen": -0.3205469846725464, + "logps/rejected": -9.145040512084961, + "loss": 0.3712, + "nll_loss": 0.46240854263305664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03205469623208046, + "rewards/margins": 0.8824493885040283, + "rewards/rejected": -0.9145041108131409, + "step": 766 + }, + { + "epoch": 1.4995112414467253, + "grad_norm": 0.6442086100578308, + "learning_rate": 2.5146771037181997e-05, + "log_odds_chosen": 9.411669731140137, + "log_odds_ratio": -0.02242751605808735, + "logits/chosen": -1.4549416303634644, + "logits/rejected": -1.2701170444488525, + "logps/chosen": -0.26789262890815735, + "logps/rejected": -8.001017570495605, + "loss": 0.3584, + "nll_loss": 0.46468380093574524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026789262890815735, + "rewards/margins": 0.7733124494552612, + "rewards/rejected": -0.8001017570495605, + "step": 767 + }, + { + "epoch": 1.501466275659824, + "grad_norm": 0.6298905611038208, + "learning_rate": 2.5114155251141553e-05, + "log_odds_chosen": 8.487652778625488, + "log_odds_ratio": -0.01584544964134693, + "logits/chosen": -1.5017883777618408, + "logits/rejected": -1.4979103803634644, + "logps/chosen": -0.2226615846157074, + "logps/rejected": -6.938543319702148, + "loss": 0.3486, + "nll_loss": 0.31232932209968567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02226615883409977, + "rewards/margins": 0.6715881824493408, + "rewards/rejected": -0.6938543915748596, + "step": 768 + }, + { + "epoch": 1.5034213098729228, + "grad_norm": 0.655976414680481, + "learning_rate": 2.5081539465101113e-05, + "log_odds_chosen": 10.419085502624512, + "log_odds_ratio": -0.04392457753419876, + "logits/chosen": -1.5449113845825195, + "logits/rejected": -1.324507474899292, + "logps/chosen": -0.3603002727031708, + "logps/rejected": -9.040483474731445, + "loss": 0.3737, + "nll_loss": 0.5543684363365173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03603002801537514, + "rewards/margins": 0.8680182695388794, + "rewards/rejected": -0.9040483236312866, + "step": 769 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.646271288394928, + "learning_rate": 2.5048923679060665e-05, + "log_odds_chosen": 14.328849792480469, + "log_odds_ratio": -0.007130487821996212, + "logits/chosen": -1.3656564950942993, + "logits/rejected": -1.1149466037750244, + "logps/chosen": -0.3588442802429199, + "logps/rejected": -12.628063201904297, + "loss": 0.3607, + "nll_loss": 0.5057868361473083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03588443249464035, + "rewards/margins": 1.2269219160079956, + "rewards/rejected": -1.2628062963485718, + "step": 770 + }, + { + "epoch": 1.5073313782991202, + "grad_norm": 0.6461143493652344, + "learning_rate": 2.5016307893020225e-05, + "log_odds_chosen": 11.845163345336914, + "log_odds_ratio": -0.02455233596265316, + "logits/chosen": -1.3061847686767578, + "logits/rejected": -1.3293745517730713, + "logps/chosen": -0.2887045741081238, + "logps/rejected": -10.583145141601562, + "loss": 0.3601, + "nll_loss": 0.43144482374191284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028870457783341408, + "rewards/margins": 1.0294440984725952, + "rewards/rejected": -1.058314561843872, + "step": 771 + }, + { + "epoch": 1.509286412512219, + "grad_norm": 0.6795880794525146, + "learning_rate": 2.4983692106979778e-05, + "log_odds_chosen": 17.041038513183594, + "log_odds_ratio": -0.014792386442422867, + "logits/chosen": -1.55562424659729, + "logits/rejected": -1.3756402730941772, + "logps/chosen": -0.24881696701049805, + "logps/rejected": -15.491861343383789, + "loss": 0.3713, + "nll_loss": 0.4139917492866516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.024881698191165924, + "rewards/margins": 1.5243043899536133, + "rewards/rejected": -1.5491862297058105, + "step": 772 + }, + { + "epoch": 1.5112414467253177, + "grad_norm": 0.6271737217903137, + "learning_rate": 2.4951076320939334e-05, + "log_odds_chosen": 18.618173599243164, + "log_odds_ratio": -0.004635748919099569, + "logits/chosen": -1.625236988067627, + "logits/rejected": -1.1613147258758545, + "logps/chosen": -0.21625353395938873, + "logps/rejected": -16.988142013549805, + "loss": 0.3521, + "nll_loss": 0.34191593527793884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021625353023409843, + "rewards/margins": 1.6771888732910156, + "rewards/rejected": -1.6988143920898438, + "step": 773 + }, + { + "epoch": 1.5131964809384164, + "grad_norm": 0.6466004848480225, + "learning_rate": 2.491846053489889e-05, + "log_odds_chosen": 15.625382423400879, + "log_odds_ratio": -0.01867886446416378, + "logits/chosen": -1.4454361200332642, + "logits/rejected": -1.2656009197235107, + "logps/chosen": -0.2813417315483093, + "logps/rejected": -14.262359619140625, + "loss": 0.3415, + "nll_loss": 0.5349839925765991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028134174644947052, + "rewards/margins": 1.398101806640625, + "rewards/rejected": -1.4262359142303467, + "step": 774 + }, + { + "epoch": 1.5151515151515151, + "grad_norm": 0.6306490302085876, + "learning_rate": 2.4885844748858446e-05, + "log_odds_chosen": 18.23511505126953, + "log_odds_ratio": -0.0069998567923903465, + "logits/chosen": -1.6191725730895996, + "logits/rejected": -1.3695297241210938, + "logps/chosen": -0.20493394136428833, + "logps/rejected": -16.511795043945312, + "loss": 0.3365, + "nll_loss": 0.2759959101676941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020493393763899803, + "rewards/margins": 1.6306861639022827, + "rewards/rejected": -1.651179313659668, + "step": 775 + }, + { + "epoch": 1.5171065493646139, + "grad_norm": 0.6936075687408447, + "learning_rate": 2.4853228962818005e-05, + "log_odds_chosen": 12.353272438049316, + "log_odds_ratio": -0.02898303046822548, + "logits/chosen": -1.4498212337493896, + "logits/rejected": -1.5267882347106934, + "logps/chosen": -0.1782311499118805, + "logps/rejected": -10.338140487670898, + "loss": 0.3692, + "nll_loss": 0.3406756520271301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01782311499118805, + "rewards/margins": 1.0159908533096313, + "rewards/rejected": -1.0338139533996582, + "step": 776 + }, + { + "epoch": 1.5190615835777126, + "grad_norm": 0.6647539138793945, + "learning_rate": 2.482061317677756e-05, + "log_odds_chosen": 13.241720199584961, + "log_odds_ratio": -0.002739694667980075, + "logits/chosen": -1.5816177129745483, + "logits/rejected": -1.4350988864898682, + "logps/chosen": -0.20203867554664612, + "logps/rejected": -11.378307342529297, + "loss": 0.3501, + "nll_loss": 0.33473387360572815, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02020386978983879, + "rewards/margins": 1.1176269054412842, + "rewards/rejected": -1.1378307342529297, + "step": 777 + }, + { + "epoch": 1.5210166177908113, + "grad_norm": 0.6446375846862793, + "learning_rate": 2.4787997390737117e-05, + "log_odds_chosen": 14.171686172485352, + "log_odds_ratio": -0.011323995888233185, + "logits/chosen": -1.424851894378662, + "logits/rejected": -1.4029185771942139, + "logps/chosen": -0.2448456883430481, + "logps/rejected": -12.465836524963379, + "loss": 0.346, + "nll_loss": 0.38211482763290405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02448457106947899, + "rewards/margins": 1.2220990657806396, + "rewards/rejected": -1.2465837001800537, + "step": 778 + }, + { + "epoch": 1.52297165200391, + "grad_norm": 0.6076839566230774, + "learning_rate": 2.4755381604696674e-05, + "log_odds_chosen": 12.482505798339844, + "log_odds_ratio": -0.029754238203167915, + "logits/chosen": -1.4637641906738281, + "logits/rejected": -1.692347526550293, + "logps/chosen": -0.2460704743862152, + "logps/rejected": -10.835283279418945, + "loss": 0.3299, + "nll_loss": 0.3361165225505829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02460704743862152, + "rewards/margins": 1.0589213371276855, + "rewards/rejected": -1.0835285186767578, + "step": 779 + }, + { + "epoch": 1.5249266862170088, + "grad_norm": 0.6582280993461609, + "learning_rate": 2.472276581865623e-05, + "log_odds_chosen": 12.512533187866211, + "log_odds_ratio": -0.018382010981440544, + "logits/chosen": -1.5543544292449951, + "logits/rejected": -1.4219027757644653, + "logps/chosen": -0.19997254014015198, + "logps/rejected": -10.687376976013184, + "loss": 0.3508, + "nll_loss": 0.40771204233169556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019997255876660347, + "rewards/margins": 1.0487403869628906, + "rewards/rejected": -1.0687377452850342, + "step": 780 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.6607106328010559, + "learning_rate": 2.4690150032615786e-05, + "log_odds_chosen": 12.369226455688477, + "log_odds_ratio": -0.03857988864183426, + "logits/chosen": -1.6794297695159912, + "logits/rejected": -1.5130889415740967, + "logps/chosen": -0.25696226954460144, + "logps/rejected": -10.999906539916992, + "loss": 0.3562, + "nll_loss": 0.3827040195465088, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025696225464344025, + "rewards/margins": 1.0742943286895752, + "rewards/rejected": -1.0999906063079834, + "step": 781 + }, + { + "epoch": 1.5288367546432062, + "grad_norm": 0.6407656669616699, + "learning_rate": 2.4657534246575342e-05, + "log_odds_chosen": 15.932260513305664, + "log_odds_ratio": -0.0035019582137465477, + "logits/chosen": -1.5055348873138428, + "logits/rejected": -1.3411471843719482, + "logps/chosen": -0.1694302260875702, + "logps/rejected": -13.98609733581543, + "loss": 0.3524, + "nll_loss": 0.2669332027435303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01694302260875702, + "rewards/margins": 1.381666660308838, + "rewards/rejected": -1.3986097574234009, + "step": 782 + }, + { + "epoch": 1.530791788856305, + "grad_norm": 0.6461949348449707, + "learning_rate": 2.4624918460534898e-05, + "log_odds_chosen": 17.728368759155273, + "log_odds_ratio": -0.006590545177459717, + "logits/chosen": -1.6441571712493896, + "logits/rejected": -1.4071356058120728, + "logps/chosen": -0.15528073906898499, + "logps/rejected": -15.786825180053711, + "loss": 0.3441, + "nll_loss": 0.2643541097640991, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015528075397014618, + "rewards/margins": 1.5631544589996338, + "rewards/rejected": -1.5786824226379395, + "step": 783 + }, + { + "epoch": 1.5327468230694037, + "grad_norm": 0.6367537975311279, + "learning_rate": 2.4592302674494454e-05, + "log_odds_chosen": 14.047929763793945, + "log_odds_ratio": -0.0027687798719853163, + "logits/chosen": -1.6637932062149048, + "logits/rejected": -1.3909000158309937, + "logps/chosen": -0.16634142398834229, + "logps/rejected": -12.174304962158203, + "loss": 0.3517, + "nll_loss": 0.29810208082199097, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0166341420263052, + "rewards/margins": 1.200796365737915, + "rewards/rejected": -1.2174304723739624, + "step": 784 + }, + { + "epoch": 1.5347018572825024, + "grad_norm": 0.6786255836486816, + "learning_rate": 2.455968688845401e-05, + "log_odds_chosen": 10.802555084228516, + "log_odds_ratio": -0.028718765825033188, + "logits/chosen": -1.3438467979431152, + "logits/rejected": -1.2648730278015137, + "logps/chosen": -0.25947582721710205, + "logps/rejected": -9.39741039276123, + "loss": 0.3585, + "nll_loss": 0.399493008852005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025947581976652145, + "rewards/margins": 0.9137934446334839, + "rewards/rejected": -0.9397411346435547, + "step": 785 + }, + { + "epoch": 1.5366568914956011, + "grad_norm": 0.6772621870040894, + "learning_rate": 2.452707110241357e-05, + "log_odds_chosen": 9.590494155883789, + "log_odds_ratio": -0.02806643582880497, + "logits/chosen": -1.7245360612869263, + "logits/rejected": -1.5014640092849731, + "logps/chosen": -0.20619311928749084, + "logps/rejected": -8.039005279541016, + "loss": 0.3834, + "nll_loss": 0.3823516368865967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020619312301278114, + "rewards/margins": 0.783281147480011, + "rewards/rejected": -0.8039004802703857, + "step": 786 + }, + { + "epoch": 1.5386119257086999, + "grad_norm": 0.6401938796043396, + "learning_rate": 2.4494455316373126e-05, + "log_odds_chosen": 8.253326416015625, + "log_odds_ratio": -0.01680927164852619, + "logits/chosen": -1.5689555406570435, + "logits/rejected": -1.5735857486724854, + "logps/chosen": -0.238957017660141, + "logps/rejected": -6.475642204284668, + "loss": 0.3498, + "nll_loss": 0.41492488980293274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02389570139348507, + "rewards/margins": 0.6236685514450073, + "rewards/rejected": -0.6475642919540405, + "step": 787 + }, + { + "epoch": 1.5405669599217986, + "grad_norm": 0.6257124543190002, + "learning_rate": 2.446183953033268e-05, + "log_odds_chosen": 11.727794647216797, + "log_odds_ratio": -0.014606994576752186, + "logits/chosen": -1.520676612854004, + "logits/rejected": -1.355090856552124, + "logps/chosen": -0.2448488026857376, + "logps/rejected": -10.139500617980957, + "loss": 0.341, + "nll_loss": 0.3066455125808716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02448488026857376, + "rewards/margins": 0.9894651174545288, + "rewards/rejected": -1.0139501094818115, + "step": 788 + }, + { + "epoch": 1.5425219941348973, + "grad_norm": 0.6582112312316895, + "learning_rate": 2.4429223744292238e-05, + "log_odds_chosen": 15.42151165008545, + "log_odds_ratio": -0.006146646104753017, + "logits/chosen": -1.6564935445785522, + "logits/rejected": -1.3553837537765503, + "logps/chosen": -0.21617646515369415, + "logps/rejected": -13.713383674621582, + "loss": 0.3552, + "nll_loss": 0.36836397647857666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021617647260427475, + "rewards/margins": 1.34972083568573, + "rewards/rejected": -1.3713384866714478, + "step": 789 + }, + { + "epoch": 1.544477028347996, + "grad_norm": 0.6313238143920898, + "learning_rate": 2.4396607958251794e-05, + "log_odds_chosen": 13.360014915466309, + "log_odds_ratio": -0.019528048112988472, + "logits/chosen": -1.5610079765319824, + "logits/rejected": -1.4700090885162354, + "logps/chosen": -0.20489341020584106, + "logps/rejected": -11.730490684509277, + "loss": 0.3307, + "nll_loss": 0.2701171040534973, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020489342510700226, + "rewards/margins": 1.1525596380233765, + "rewards/rejected": -1.1730490922927856, + "step": 790 + }, + { + "epoch": 1.5464320625610948, + "grad_norm": 0.6491675972938538, + "learning_rate": 2.436399217221135e-05, + "log_odds_chosen": 14.602370262145996, + "log_odds_ratio": -0.03992389887571335, + "logits/chosen": -1.6335248947143555, + "logits/rejected": -1.3792409896850586, + "logps/chosen": -0.2070770412683487, + "logps/rejected": -13.003156661987305, + "loss": 0.3481, + "nll_loss": 0.314965158700943, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02070770412683487, + "rewards/margins": 1.2796080112457275, + "rewards/rejected": -1.3003156185150146, + "step": 791 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.6808804869651794, + "learning_rate": 2.4331376386170906e-05, + "log_odds_chosen": 9.638371467590332, + "log_odds_ratio": -0.07287915796041489, + "logits/chosen": -1.600858449935913, + "logits/rejected": -1.5951201915740967, + "logps/chosen": -0.21274784207344055, + "logps/rejected": -7.983227252960205, + "loss": 0.3661, + "nll_loss": 0.2991673946380615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021274782717227936, + "rewards/margins": 0.7770479917526245, + "rewards/rejected": -0.7983227968215942, + "step": 792 + }, + { + "epoch": 1.5503421309872922, + "grad_norm": 0.6314836740493774, + "learning_rate": 2.4298760600130462e-05, + "log_odds_chosen": 8.448981285095215, + "log_odds_ratio": -0.016495559364557266, + "logits/chosen": -1.7374095916748047, + "logits/rejected": -1.5267653465270996, + "logps/chosen": -0.21520723402500153, + "logps/rejected": -6.887152194976807, + "loss": 0.3424, + "nll_loss": 0.3153136968612671, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021520724520087242, + "rewards/margins": 0.6671944856643677, + "rewards/rejected": -0.6887152194976807, + "step": 793 + }, + { + "epoch": 1.552297165200391, + "grad_norm": 0.6273159384727478, + "learning_rate": 2.4266144814090018e-05, + "log_odds_chosen": 15.505703926086426, + "log_odds_ratio": -0.00341211911290884, + "logits/chosen": -1.5175175666809082, + "logits/rejected": -1.5890083312988281, + "logps/chosen": -0.148604154586792, + "logps/rejected": -13.557713508605957, + "loss": 0.3375, + "nll_loss": 0.25414466857910156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014860415831208229, + "rewards/margins": 1.3409110307693481, + "rewards/rejected": -1.355771541595459, + "step": 794 + }, + { + "epoch": 1.5542521994134897, + "grad_norm": 0.6145349144935608, + "learning_rate": 2.4233529028049574e-05, + "log_odds_chosen": 9.673782348632812, + "log_odds_ratio": -0.01962556689977646, + "logits/chosen": -1.4941682815551758, + "logits/rejected": -1.3382108211517334, + "logps/chosen": -0.19488871097564697, + "logps/rejected": -7.8854289054870605, + "loss": 0.3367, + "nll_loss": 0.320248544216156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019488872960209846, + "rewards/margins": 0.7690540552139282, + "rewards/rejected": -0.7885428667068481, + "step": 795 + }, + { + "epoch": 1.5562072336265884, + "grad_norm": 0.6436463594436646, + "learning_rate": 2.4200913242009134e-05, + "log_odds_chosen": 12.455611228942871, + "log_odds_ratio": -0.023946383967995644, + "logits/chosen": -1.485806941986084, + "logits/rejected": -1.3517658710479736, + "logps/chosen": -0.16529005765914917, + "logps/rejected": -10.697359085083008, + "loss": 0.3468, + "nll_loss": 0.24400563538074493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016529006883502007, + "rewards/margins": 1.0532068014144897, + "rewards/rejected": -1.0697358846664429, + "step": 796 + }, + { + "epoch": 1.5581622678396871, + "grad_norm": 0.6699291467666626, + "learning_rate": 2.416829745596869e-05, + "log_odds_chosen": 7.553085803985596, + "log_odds_ratio": -0.024005580693483353, + "logits/chosen": -1.2908012866973877, + "logits/rejected": -1.3799583911895752, + "logps/chosen": -0.20717699825763702, + "logps/rejected": -5.863973140716553, + "loss": 0.3655, + "nll_loss": 0.43048298358917236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020717699080705643, + "rewards/margins": 0.5656796097755432, + "rewards/rejected": -0.5863972902297974, + "step": 797 + }, + { + "epoch": 1.5601173020527859, + "grad_norm": 0.6519328951835632, + "learning_rate": 2.4135681669928246e-05, + "log_odds_chosen": 18.227941513061523, + "log_odds_ratio": -0.012793010100722313, + "logits/chosen": -1.4587820768356323, + "logits/rejected": -1.1391925811767578, + "logps/chosen": -0.25920867919921875, + "logps/rejected": -16.777631759643555, + "loss": 0.3502, + "nll_loss": 0.41739606857299805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025920869782567024, + "rewards/margins": 1.6518423557281494, + "rewards/rejected": -1.6777631044387817, + "step": 798 + }, + { + "epoch": 1.5620723362658846, + "grad_norm": 0.6139220595359802, + "learning_rate": 2.4103065883887802e-05, + "log_odds_chosen": 13.013568878173828, + "log_odds_ratio": -0.0690615177154541, + "logits/chosen": -1.5596532821655273, + "logits/rejected": -1.6008620262145996, + "logps/chosen": -0.23273666203022003, + "logps/rejected": -11.430973052978516, + "loss": 0.3254, + "nll_loss": 0.3108055591583252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023273665457963943, + "rewards/margins": 1.1198235750198364, + "rewards/rejected": -1.1430972814559937, + "step": 799 + }, + { + "epoch": 1.5640273704789833, + "grad_norm": 0.648903489112854, + "learning_rate": 2.4070450097847358e-05, + "log_odds_chosen": 18.07754135131836, + "log_odds_ratio": -0.011904461309313774, + "logits/chosen": -1.463768482208252, + "logits/rejected": -1.1367120742797852, + "logps/chosen": -0.1451840102672577, + "logps/rejected": -16.135589599609375, + "loss": 0.3455, + "nll_loss": 0.2870514392852783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014518399722874165, + "rewards/margins": 1.5990405082702637, + "rewards/rejected": -1.6135590076446533, + "step": 800 + }, + { + "epoch": 1.565982404692082, + "grad_norm": 0.6534183025360107, + "learning_rate": 2.4037834311806914e-05, + "log_odds_chosen": 21.39441680908203, + "log_odds_ratio": -0.0019205206772312522, + "logits/chosen": -1.5744091272354126, + "logits/rejected": -1.4598350524902344, + "logps/chosen": -0.12605887651443481, + "logps/rejected": -19.106746673583984, + "loss": 0.3447, + "nll_loss": 0.23001015186309814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012605887837707996, + "rewards/margins": 1.8980686664581299, + "rewards/rejected": -1.9106745719909668, + "step": 801 + }, + { + "epoch": 1.5679374389051808, + "grad_norm": 0.6509966254234314, + "learning_rate": 2.400521852576647e-05, + "log_odds_chosen": 10.580936431884766, + "log_odds_ratio": -0.04427013546228409, + "logits/chosen": -1.3645555973052979, + "logits/rejected": -1.4574313163757324, + "logps/chosen": -0.36144518852233887, + "logps/rejected": -9.440972328186035, + "loss": 0.3385, + "nll_loss": 0.40813004970550537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03614451736211777, + "rewards/margins": 0.9079527854919434, + "rewards/rejected": -0.9440972805023193, + "step": 802 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.6455848217010498, + "learning_rate": 2.3972602739726026e-05, + "log_odds_chosen": 17.88613510131836, + "log_odds_ratio": -0.017419416457414627, + "logits/chosen": -1.5186724662780762, + "logits/rejected": -1.5242904424667358, + "logps/chosen": -0.20645351707935333, + "logps/rejected": -16.28599739074707, + "loss": 0.3431, + "nll_loss": 0.3379721939563751, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020645353943109512, + "rewards/margins": 1.607954502105713, + "rewards/rejected": -1.6285996437072754, + "step": 803 + }, + { + "epoch": 1.5718475073313782, + "grad_norm": 0.6253862977027893, + "learning_rate": 2.3939986953685582e-05, + "log_odds_chosen": 11.841687202453613, + "log_odds_ratio": -0.005307571962475777, + "logits/chosen": -1.5133681297302246, + "logits/rejected": -1.3660204410552979, + "logps/chosen": -0.12318545579910278, + "logps/rejected": -9.510640144348145, + "loss": 0.3442, + "nll_loss": 0.30134204030036926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012318545952439308, + "rewards/margins": 0.9387454986572266, + "rewards/rejected": -0.9510639905929565, + "step": 804 + }, + { + "epoch": 1.573802541544477, + "grad_norm": 0.6196828484535217, + "learning_rate": 2.390737116764514e-05, + "log_odds_chosen": 15.088223457336426, + "log_odds_ratio": -0.0024937670677900314, + "logits/chosen": -1.6118518114089966, + "logits/rejected": -1.3162827491760254, + "logps/chosen": -0.22951363027095795, + "logps/rejected": -13.297386169433594, + "loss": 0.3341, + "nll_loss": 0.3742668330669403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022951364517211914, + "rewards/margins": 1.3067872524261475, + "rewards/rejected": -1.3297386169433594, + "step": 805 + }, + { + "epoch": 1.5757575757575757, + "grad_norm": 0.6180077791213989, + "learning_rate": 2.3874755381604698e-05, + "log_odds_chosen": 14.08885383605957, + "log_odds_ratio": -0.01131533645093441, + "logits/chosen": -1.736650824546814, + "logits/rejected": -1.3489861488342285, + "logps/chosen": -0.16396504640579224, + "logps/rejected": -12.129948616027832, + "loss": 0.3232, + "nll_loss": 0.300262451171875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016396503895521164, + "rewards/margins": 1.1965985298156738, + "rewards/rejected": -1.2129948139190674, + "step": 806 + }, + { + "epoch": 1.5777126099706744, + "grad_norm": 0.6404759883880615, + "learning_rate": 2.3842139595564254e-05, + "log_odds_chosen": 12.742757797241211, + "log_odds_ratio": -0.0107985008507967, + "logits/chosen": -1.4439475536346436, + "logits/rejected": -1.3069300651550293, + "logps/chosen": -0.13302195072174072, + "logps/rejected": -10.622312545776367, + "loss": 0.3467, + "nll_loss": 0.2997775077819824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013302195817232132, + "rewards/margins": 1.04892897605896, + "rewards/rejected": -1.0622310638427734, + "step": 807 + }, + { + "epoch": 1.5796676441837731, + "grad_norm": 0.6663649678230286, + "learning_rate": 2.380952380952381e-05, + "log_odds_chosen": 10.21731185913086, + "log_odds_ratio": -0.014745337888598442, + "logits/chosen": -1.551225185394287, + "logits/rejected": -1.3802858591079712, + "logps/chosen": -0.1859707087278366, + "logps/rejected": -8.408479690551758, + "loss": 0.347, + "nll_loss": 0.29739469289779663, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0185970738530159, + "rewards/margins": 0.8222509622573853, + "rewards/rejected": -0.8408479690551758, + "step": 808 + }, + { + "epoch": 1.5816226783968719, + "grad_norm": 0.6095337867736816, + "learning_rate": 2.3776908023483366e-05, + "log_odds_chosen": 11.18100643157959, + "log_odds_ratio": -0.01750890351831913, + "logits/chosen": -1.583784580230713, + "logits/rejected": -1.3995460271835327, + "logps/chosen": -0.2806301414966583, + "logps/rejected": -9.926553726196289, + "loss": 0.3218, + "nll_loss": 0.41545283794403076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.028063016012310982, + "rewards/margins": 0.9645922780036926, + "rewards/rejected": -0.9926553964614868, + "step": 809 + }, + { + "epoch": 1.5835777126099706, + "grad_norm": 0.6262449026107788, + "learning_rate": 2.3744292237442922e-05, + "log_odds_chosen": 13.136646270751953, + "log_odds_ratio": -0.006813552230596542, + "logits/chosen": -1.3956856727600098, + "logits/rejected": -1.3477883338928223, + "logps/chosen": -0.1832582652568817, + "logps/rejected": -11.257617950439453, + "loss": 0.3328, + "nll_loss": 0.319985032081604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01832582801580429, + "rewards/margins": 1.1074360609054565, + "rewards/rejected": -1.1257619857788086, + "step": 810 + }, + { + "epoch": 1.5855327468230693, + "grad_norm": 0.6319876313209534, + "learning_rate": 2.371167645140248e-05, + "log_odds_chosen": 10.559402465820312, + "log_odds_ratio": -0.015161506831645966, + "logits/chosen": -1.5897603034973145, + "logits/rejected": -1.389951229095459, + "logps/chosen": -0.2397930920124054, + "logps/rejected": -9.102799415588379, + "loss": 0.3249, + "nll_loss": 0.40196263790130615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0239793099462986, + "rewards/margins": 0.8863006830215454, + "rewards/rejected": -0.9102799892425537, + "step": 811 + }, + { + "epoch": 1.587487781036168, + "grad_norm": 0.6227273941040039, + "learning_rate": 2.3679060665362035e-05, + "log_odds_chosen": 12.518637657165527, + "log_odds_ratio": -0.01492786593735218, + "logits/chosen": -1.7811179161071777, + "logits/rejected": -1.3635449409484863, + "logps/chosen": -0.23682385683059692, + "logps/rejected": -11.112258911132812, + "loss": 0.3317, + "nll_loss": 0.3351787328720093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023682385683059692, + "rewards/margins": 1.0875434875488281, + "rewards/rejected": -1.1112258434295654, + "step": 812 + }, + { + "epoch": 1.5894428152492668, + "grad_norm": 0.6266920566558838, + "learning_rate": 2.364644487932159e-05, + "log_odds_chosen": 11.228069305419922, + "log_odds_ratio": -0.018999531865119934, + "logits/chosen": -1.6162071228027344, + "logits/rejected": -1.4328984022140503, + "logps/chosen": -0.15189999341964722, + "logps/rejected": -9.272918701171875, + "loss": 0.3285, + "nll_loss": 0.25463977456092834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015189999714493752, + "rewards/margins": 0.9121018648147583, + "rewards/rejected": -0.9272918701171875, + "step": 813 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.6255060434341431, + "learning_rate": 2.3613829093281147e-05, + "log_odds_chosen": 11.868322372436523, + "log_odds_ratio": -0.054105017334222794, + "logits/chosen": -1.7329208850860596, + "logits/rejected": -1.6022906303405762, + "logps/chosen": -0.1897512674331665, + "logps/rejected": -9.950489044189453, + "loss": 0.3213, + "nll_loss": 0.30125030875205994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01897512748837471, + "rewards/margins": 0.9760737419128418, + "rewards/rejected": -0.995048999786377, + "step": 814 + }, + { + "epoch": 1.5933528836754642, + "grad_norm": 0.6101838946342468, + "learning_rate": 2.3581213307240703e-05, + "log_odds_chosen": 11.333024978637695, + "log_odds_ratio": -0.04497556760907173, + "logits/chosen": -1.6604610681533813, + "logits/rejected": -1.2861175537109375, + "logps/chosen": -0.25592663884162903, + "logps/rejected": -9.920008659362793, + "loss": 0.3165, + "nll_loss": 0.38193821907043457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.025592664256691933, + "rewards/margins": 0.9664082527160645, + "rewards/rejected": -0.992000937461853, + "step": 815 + }, + { + "epoch": 1.595307917888563, + "grad_norm": 0.5997951626777649, + "learning_rate": 2.3548597521200262e-05, + "log_odds_chosen": 19.092350006103516, + "log_odds_ratio": -0.00014729345275554806, + "logits/chosen": -1.53122878074646, + "logits/rejected": -1.3963606357574463, + "logps/chosen": -0.3040957450866699, + "logps/rejected": -17.764747619628906, + "loss": 0.3104, + "nll_loss": 0.4373921751976013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.030409574508666992, + "rewards/margins": 1.746065378189087, + "rewards/rejected": -1.776474952697754, + "step": 816 + }, + { + "epoch": 1.5972629521016617, + "grad_norm": 0.618396520614624, + "learning_rate": 2.351598173515982e-05, + "log_odds_chosen": 11.093877792358398, + "log_odds_ratio": -0.013668099418282509, + "logits/chosen": -1.5584518909454346, + "logits/rejected": -1.3106476068496704, + "logps/chosen": -0.1799987554550171, + "logps/rejected": -9.299154281616211, + "loss": 0.3234, + "nll_loss": 0.289469838142395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01799987629055977, + "rewards/margins": 0.9119156002998352, + "rewards/rejected": -0.9299154877662659, + "step": 817 + }, + { + "epoch": 1.5992179863147604, + "grad_norm": 0.5883194208145142, + "learning_rate": 2.3483365949119374e-05, + "log_odds_chosen": 15.846931457519531, + "log_odds_ratio": -0.0040477849543094635, + "logits/chosen": -1.5924524068832397, + "logits/rejected": -1.2660540342330933, + "logps/chosen": -0.1636350452899933, + "logps/rejected": -13.925788879394531, + "loss": 0.3014, + "nll_loss": 0.2573124170303345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01636350527405739, + "rewards/margins": 1.3762155771255493, + "rewards/rejected": -1.3925789594650269, + "step": 818 + }, + { + "epoch": 1.6011730205278591, + "grad_norm": 0.6203172206878662, + "learning_rate": 2.345075016307893e-05, + "log_odds_chosen": 10.769038200378418, + "log_odds_ratio": -0.05336151272058487, + "logits/chosen": -1.6622706651687622, + "logits/rejected": -1.417654037475586, + "logps/chosen": -0.33794113993644714, + "logps/rejected": -9.578767776489258, + "loss": 0.3237, + "nll_loss": 0.48372316360473633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033794112503528595, + "rewards/margins": 0.9240826368331909, + "rewards/rejected": -0.9578767418861389, + "step": 819 + }, + { + "epoch": 1.6031280547409579, + "grad_norm": 0.6108238101005554, + "learning_rate": 2.3418134377038487e-05, + "log_odds_chosen": 10.658021926879883, + "log_odds_ratio": -0.015958378091454506, + "logits/chosen": -1.5668814182281494, + "logits/rejected": -1.476122498512268, + "logps/chosen": -0.3141708970069885, + "logps/rejected": -9.423797607421875, + "loss": 0.3134, + "nll_loss": 0.4293084740638733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03141709417104721, + "rewards/margins": 0.910962700843811, + "rewards/rejected": -0.9423798322677612, + "step": 820 + }, + { + "epoch": 1.6050830889540566, + "grad_norm": 0.6264744400978088, + "learning_rate": 2.3385518590998043e-05, + "log_odds_chosen": 16.494956970214844, + "log_odds_ratio": -0.0007809184025973082, + "logits/chosen": -1.4920196533203125, + "logits/rejected": -1.3410472869873047, + "logps/chosen": -0.1370513141155243, + "logps/rejected": -14.303756713867188, + "loss": 0.3218, + "nll_loss": 0.39672932028770447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01370513066649437, + "rewards/margins": 1.416670560836792, + "rewards/rejected": -1.4303758144378662, + "step": 821 + }, + { + "epoch": 1.6070381231671553, + "grad_norm": 0.6146429181098938, + "learning_rate": 2.33529028049576e-05, + "log_odds_chosen": 8.971610069274902, + "log_odds_ratio": -0.003907974809408188, + "logits/chosen": -1.3835071325302124, + "logits/rejected": -1.493558406829834, + "logps/chosen": -0.2147475630044937, + "logps/rejected": -7.333754062652588, + "loss": 0.3184, + "nll_loss": 0.3382198214530945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02147475630044937, + "rewards/margins": 0.7119007110595703, + "rewards/rejected": -0.7333754897117615, + "step": 822 + }, + { + "epoch": 1.608993157380254, + "grad_norm": 0.6064382791519165, + "learning_rate": 2.3320287018917155e-05, + "log_odds_chosen": 13.731101989746094, + "log_odds_ratio": -0.003446481190621853, + "logits/chosen": -1.530711054801941, + "logits/rejected": -1.4803907871246338, + "logps/chosen": -0.17487803101539612, + "logps/rejected": -11.712807655334473, + "loss": 0.3197, + "nll_loss": 0.25240787863731384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01748780347406864, + "rewards/margins": 1.1537930965423584, + "rewards/rejected": -1.171280860900879, + "step": 823 + }, + { + "epoch": 1.6109481915933528, + "grad_norm": 0.624786913394928, + "learning_rate": 2.328767123287671e-05, + "log_odds_chosen": 16.069509506225586, + "log_odds_ratio": -0.006766228005290031, + "logits/chosen": -1.5261902809143066, + "logits/rejected": -1.3062082529067993, + "logps/chosen": -0.129902645945549, + "logps/rejected": -13.975217819213867, + "loss": 0.3116, + "nll_loss": 0.23605968058109283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01299026608467102, + "rewards/margins": 1.3845314979553223, + "rewards/rejected": -1.39752197265625, + "step": 824 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.6034452319145203, + "learning_rate": 2.3255055446836267e-05, + "log_odds_chosen": 14.575065612792969, + "log_odds_ratio": -0.014632967300713062, + "logits/chosen": -1.5104985237121582, + "logits/rejected": -1.409914255142212, + "logps/chosen": -0.2341783344745636, + "logps/rejected": -12.933868408203125, + "loss": 0.3049, + "nll_loss": 0.35785433650016785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02341783046722412, + "rewards/margins": 1.2699689865112305, + "rewards/rejected": -1.2933868169784546, + "step": 825 + }, + { + "epoch": 1.6148582600195502, + "grad_norm": 0.6113386750221252, + "learning_rate": 2.3222439660795826e-05, + "log_odds_chosen": 12.562272071838379, + "log_odds_ratio": -0.03272683173418045, + "logits/chosen": -1.6768767833709717, + "logits/rejected": -1.3863074779510498, + "logps/chosen": -0.27004772424697876, + "logps/rejected": -11.031567573547363, + "loss": 0.3093, + "nll_loss": 0.39833903312683105, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027004772797226906, + "rewards/margins": 1.0761520862579346, + "rewards/rejected": -1.1031568050384521, + "step": 826 + }, + { + "epoch": 1.616813294232649, + "grad_norm": 0.6250815391540527, + "learning_rate": 2.3189823874755383e-05, + "log_odds_chosen": 16.60581398010254, + "log_odds_ratio": -0.02347889356315136, + "logits/chosen": -1.473537802696228, + "logits/rejected": -1.316845178604126, + "logps/chosen": -0.21050432324409485, + "logps/rejected": -14.661670684814453, + "loss": 0.3202, + "nll_loss": 0.39090222120285034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021050430834293365, + "rewards/margins": 1.445116639137268, + "rewards/rejected": -1.4661670923233032, + "step": 827 + }, + { + "epoch": 1.6187683284457477, + "grad_norm": 0.5722936391830444, + "learning_rate": 2.315720808871494e-05, + "log_odds_chosen": 13.427713394165039, + "log_odds_ratio": -0.004188911989331245, + "logits/chosen": -1.6685709953308105, + "logits/rejected": -1.4580049514770508, + "logps/chosen": -0.18899103999137878, + "logps/rejected": -11.535916328430176, + "loss": 0.291, + "nll_loss": 0.298930823802948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018899105489253998, + "rewards/margins": 1.1346924304962158, + "rewards/rejected": -1.1535916328430176, + "step": 828 + }, + { + "epoch": 1.6207233626588464, + "grad_norm": 0.596215009689331, + "learning_rate": 2.3124592302674495e-05, + "log_odds_chosen": 16.451297760009766, + "log_odds_ratio": -0.01611592434346676, + "logits/chosen": -1.506678819656372, + "logits/rejected": -1.4203855991363525, + "logps/chosen": -0.29949837923049927, + "logps/rejected": -15.09254264831543, + "loss": 0.2987, + "nll_loss": 0.4524744153022766, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.029949838295578957, + "rewards/margins": 1.4793044328689575, + "rewards/rejected": -1.5092542171478271, + "step": 829 + }, + { + "epoch": 1.6226783968719452, + "grad_norm": 0.6530519723892212, + "learning_rate": 2.309197651663405e-05, + "log_odds_chosen": 14.439098358154297, + "log_odds_ratio": -0.007836119271814823, + "logits/chosen": -1.5670411586761475, + "logits/rejected": -1.200214147567749, + "logps/chosen": -0.15435531735420227, + "logps/rejected": -12.338390350341797, + "loss": 0.3291, + "nll_loss": 0.31827443838119507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015435531735420227, + "rewards/margins": 1.2184035778045654, + "rewards/rejected": -1.2338391542434692, + "step": 830 + }, + { + "epoch": 1.6246334310850439, + "grad_norm": 0.611697256565094, + "learning_rate": 2.3059360730593607e-05, + "log_odds_chosen": 13.452825546264648, + "log_odds_ratio": -0.008763410151004791, + "logits/chosen": -1.600196123123169, + "logits/rejected": -1.4298840761184692, + "logps/chosen": -0.15685398876667023, + "logps/rejected": -11.231447219848633, + "loss": 0.3074, + "nll_loss": 0.3088415861129761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015685398131608963, + "rewards/margins": 1.1074591875076294, + "rewards/rejected": -1.123144507408142, + "step": 831 + }, + { + "epoch": 1.6265884652981426, + "grad_norm": 0.6166245341300964, + "learning_rate": 2.3026744944553163e-05, + "log_odds_chosen": 16.177419662475586, + "log_odds_ratio": -0.0007651163032278419, + "logits/chosen": -1.543987512588501, + "logits/rejected": -1.4759690761566162, + "logps/chosen": -0.1936577558517456, + "logps/rejected": -14.487470626831055, + "loss": 0.3144, + "nll_loss": 0.2736108899116516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01936577633023262, + "rewards/margins": 1.429381251335144, + "rewards/rejected": -1.4487470388412476, + "step": 832 + }, + { + "epoch": 1.6285434995112413, + "grad_norm": 0.6086735129356384, + "learning_rate": 2.299412915851272e-05, + "log_odds_chosen": 9.119421005249023, + "log_odds_ratio": -0.019066646695137024, + "logits/chosen": -1.62909734249115, + "logits/rejected": -1.3488184213638306, + "logps/chosen": -0.279875248670578, + "logps/rejected": -7.80269193649292, + "loss": 0.3135, + "nll_loss": 0.39621633291244507, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0279875248670578, + "rewards/margins": 0.752281665802002, + "rewards/rejected": -0.780269205570221, + "step": 833 + }, + { + "epoch": 1.63049853372434, + "grad_norm": 0.6057042479515076, + "learning_rate": 2.2961513372472275e-05, + "log_odds_chosen": 9.306339263916016, + "log_odds_ratio": -0.02815840393304825, + "logits/chosen": -1.4952294826507568, + "logits/rejected": -1.4543557167053223, + "logps/chosen": -0.13139484822750092, + "logps/rejected": -7.198477268218994, + "loss": 0.3122, + "nll_loss": 0.23874346911907196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013139485381543636, + "rewards/margins": 0.7067083120346069, + "rewards/rejected": -0.7198477983474731, + "step": 834 + }, + { + "epoch": 1.6324535679374388, + "grad_norm": 0.6027778387069702, + "learning_rate": 2.292889758643183e-05, + "log_odds_chosen": 14.91845417022705, + "log_odds_ratio": -0.021964289247989655, + "logits/chosen": -1.419800043106079, + "logits/rejected": -1.2608305215835571, + "logps/chosen": -0.19526419043540955, + "logps/rejected": -13.201471328735352, + "loss": 0.3086, + "nll_loss": 0.2723200023174286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019526420161128044, + "rewards/margins": 1.300620675086975, + "rewards/rejected": -1.3201470375061035, + "step": 835 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.6076327562332153, + "learning_rate": 2.289628180039139e-05, + "log_odds_chosen": 13.49659538269043, + "log_odds_ratio": -0.013633392751216888, + "logits/chosen": -1.604211688041687, + "logits/rejected": -1.4792571067810059, + "logps/chosen": -0.23028850555419922, + "logps/rejected": -11.906097412109375, + "loss": 0.3104, + "nll_loss": 0.35764673352241516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.023028850555419922, + "rewards/margins": 1.1675809621810913, + "rewards/rejected": -1.1906096935272217, + "step": 836 + }, + { + "epoch": 1.6363636363636362, + "grad_norm": 0.6162664294242859, + "learning_rate": 2.2863666014350947e-05, + "log_odds_chosen": 12.277507781982422, + "log_odds_ratio": -0.0014905014541000128, + "logits/chosen": -1.7341245412826538, + "logits/rejected": -1.4498708248138428, + "logps/chosen": -0.1687300056219101, + "logps/rejected": -10.171403884887695, + "loss": 0.3108, + "nll_loss": 0.2976795434951782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01687300018966198, + "rewards/margins": 1.0002673864364624, + "rewards/rejected": -1.01714026927948, + "step": 837 + }, + { + "epoch": 1.638318670576735, + "grad_norm": 0.6098231077194214, + "learning_rate": 2.2831050228310503e-05, + "log_odds_chosen": 8.956438064575195, + "log_odds_ratio": -0.02198060229420662, + "logits/chosen": -1.4481498003005981, + "logits/rejected": -1.6626367568969727, + "logps/chosen": -0.17200303077697754, + "logps/rejected": -7.123298168182373, + "loss": 0.2968, + "nll_loss": 0.3224264979362488, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017200304195284843, + "rewards/margins": 0.6951295137405396, + "rewards/rejected": -0.7123298645019531, + "step": 838 + }, + { + "epoch": 1.6402737047898337, + "grad_norm": 0.6110907196998596, + "learning_rate": 2.279843444227006e-05, + "log_odds_chosen": 11.857666969299316, + "log_odds_ratio": -0.019825506955385208, + "logits/chosen": -1.5411734580993652, + "logits/rejected": -1.3328675031661987, + "logps/chosen": -0.21313393115997314, + "logps/rejected": -10.141993522644043, + "loss": 0.2998, + "nll_loss": 0.29482758045196533, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021313393488526344, + "rewards/margins": 0.9928860664367676, + "rewards/rejected": -1.0141994953155518, + "step": 839 + }, + { + "epoch": 1.6422287390029324, + "grad_norm": 0.617862343788147, + "learning_rate": 2.2765818656229615e-05, + "log_odds_chosen": 16.556804656982422, + "log_odds_ratio": -0.0014976884704083204, + "logits/chosen": -1.6036369800567627, + "logits/rejected": -1.4009063243865967, + "logps/chosen": -0.22690153121948242, + "logps/rejected": -14.927436828613281, + "loss": 0.3028, + "nll_loss": 0.30156946182250977, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022690152749419212, + "rewards/margins": 1.4700535535812378, + "rewards/rejected": -1.492743730545044, + "step": 840 + }, + { + "epoch": 1.6441837732160312, + "grad_norm": 0.6145368218421936, + "learning_rate": 2.273320287018917e-05, + "log_odds_chosen": 20.334272384643555, + "log_odds_ratio": -0.0027307397685945034, + "logits/chosen": -1.6907758712768555, + "logits/rejected": -1.4345932006835938, + "logps/chosen": -0.11393465101718903, + "logps/rejected": -18.124401092529297, + "loss": 0.3086, + "nll_loss": 0.21039482951164246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011393466033041477, + "rewards/margins": 1.8010467290878296, + "rewards/rejected": -1.8124401569366455, + "step": 841 + }, + { + "epoch": 1.6461388074291299, + "grad_norm": 0.6234651803970337, + "learning_rate": 2.2700587084148727e-05, + "log_odds_chosen": 10.896885871887207, + "log_odds_ratio": -0.019651083275675774, + "logits/chosen": -1.5614049434661865, + "logits/rejected": -1.5333524942398071, + "logps/chosen": -0.19476784765720367, + "logps/rejected": -9.244889259338379, + "loss": 0.3003, + "nll_loss": 0.2709362804889679, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01947678253054619, + "rewards/margins": 0.9050122499465942, + "rewards/rejected": -0.9244889616966248, + "step": 842 + }, + { + "epoch": 1.6480938416422286, + "grad_norm": 0.6048691868782043, + "learning_rate": 2.2667971298108283e-05, + "log_odds_chosen": 15.295771598815918, + "log_odds_ratio": -0.0019282361026853323, + "logits/chosen": -1.460780382156372, + "logits/rejected": -1.491201639175415, + "logps/chosen": -0.27015894651412964, + "logps/rejected": -13.560197830200195, + "loss": 0.2975, + "nll_loss": 0.32892897725105286, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027015892788767815, + "rewards/margins": 1.3290040493011475, + "rewards/rejected": -1.3560199737548828, + "step": 843 + }, + { + "epoch": 1.6500488758553273, + "grad_norm": 0.6151620745658875, + "learning_rate": 2.263535551206784e-05, + "log_odds_chosen": 17.084186553955078, + "log_odds_ratio": -0.004608933813869953, + "logits/chosen": -1.5779426097869873, + "logits/rejected": -1.2757627964019775, + "logps/chosen": -0.14168445765972137, + "logps/rejected": -15.056461334228516, + "loss": 0.3043, + "nll_loss": 0.26483193039894104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014168445952236652, + "rewards/margins": 1.4914777278900146, + "rewards/rejected": -1.5056462287902832, + "step": 844 + }, + { + "epoch": 1.652003910068426, + "grad_norm": 0.6088802218437195, + "learning_rate": 2.2602739726027396e-05, + "log_odds_chosen": 12.137191772460938, + "log_odds_ratio": -0.004319117870181799, + "logits/chosen": -1.723249912261963, + "logits/rejected": -1.3981679677963257, + "logps/chosen": -0.18943747878074646, + "logps/rejected": -10.384544372558594, + "loss": 0.2961, + "nll_loss": 0.34181374311447144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018943749368190765, + "rewards/margins": 1.0195107460021973, + "rewards/rejected": -1.038454532623291, + "step": 845 + }, + { + "epoch": 1.6539589442815248, + "grad_norm": 0.6465806365013123, + "learning_rate": 2.2570123939986955e-05, + "log_odds_chosen": 12.866445541381836, + "log_odds_ratio": -0.0041405134834349155, + "logits/chosen": -1.613861322402954, + "logits/rejected": -1.384838342666626, + "logps/chosen": -0.2415432333946228, + "logps/rejected": -11.16464614868164, + "loss": 0.3109, + "nll_loss": 0.31257858872413635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02415432408452034, + "rewards/margins": 1.0923104286193848, + "rewards/rejected": -1.116464614868164, + "step": 846 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.6262547969818115, + "learning_rate": 2.253750815394651e-05, + "log_odds_chosen": 11.25482177734375, + "log_odds_ratio": -0.02670002169907093, + "logits/chosen": -1.4659978151321411, + "logits/rejected": -1.2225340604782104, + "logps/chosen": -0.23987135291099548, + "logps/rejected": -9.708686828613281, + "loss": 0.2962, + "nll_loss": 0.3750954866409302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02398713305592537, + "rewards/margins": 0.9468815326690674, + "rewards/rejected": -0.970868706703186, + "step": 847 + }, + { + "epoch": 1.6578690127077222, + "grad_norm": 0.6410757899284363, + "learning_rate": 2.2504892367906067e-05, + "log_odds_chosen": 14.406316757202148, + "log_odds_ratio": -0.018084313720464706, + "logits/chosen": -1.6775099039077759, + "logits/rejected": -1.3182134628295898, + "logps/chosen": -0.20264482498168945, + "logps/rejected": -12.696199417114258, + "loss": 0.3054, + "nll_loss": 0.3377282917499542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020264483988285065, + "rewards/margins": 1.2493553161621094, + "rewards/rejected": -1.2696199417114258, + "step": 848 + }, + { + "epoch": 1.659824046920821, + "grad_norm": 0.6029911041259766, + "learning_rate": 2.2472276581865623e-05, + "log_odds_chosen": 15.613334655761719, + "log_odds_ratio": -0.01627780869603157, + "logits/chosen": -1.4751166105270386, + "logits/rejected": -1.2101380825042725, + "logps/chosen": -0.16040529310703278, + "logps/rejected": -13.784161567687988, + "loss": 0.2965, + "nll_loss": 0.2520313262939453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016040530055761337, + "rewards/margins": 1.3623754978179932, + "rewards/rejected": -1.3784160614013672, + "step": 849 + }, + { + "epoch": 1.6617790811339197, + "grad_norm": 0.6048762798309326, + "learning_rate": 2.243966079582518e-05, + "log_odds_chosen": 6.991404056549072, + "log_odds_ratio": -0.0140980314463377, + "logits/chosen": -1.4950300455093384, + "logits/rejected": -1.4787267446517944, + "logps/chosen": -0.17318877577781677, + "logps/rejected": -5.334920883178711, + "loss": 0.2897, + "nll_loss": 0.31002670526504517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017318876460194588, + "rewards/margins": 0.516173243522644, + "rewards/rejected": -0.5334920883178711, + "step": 850 + }, + { + "epoch": 1.6637341153470184, + "grad_norm": 0.6309613585472107, + "learning_rate": 2.2407045009784735e-05, + "log_odds_chosen": 15.171703338623047, + "log_odds_ratio": -0.010035413317382336, + "logits/chosen": -1.6443887948989868, + "logits/rejected": -1.3153481483459473, + "logps/chosen": -0.19902820885181427, + "logps/rejected": -13.221452713012695, + "loss": 0.2992, + "nll_loss": 0.3997703492641449, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019902821630239487, + "rewards/margins": 1.3022425174713135, + "rewards/rejected": -1.3221452236175537, + "step": 851 + }, + { + "epoch": 1.6656891495601172, + "grad_norm": 0.5962141752243042, + "learning_rate": 2.237442922374429e-05, + "log_odds_chosen": 10.618556022644043, + "log_odds_ratio": -0.01026216521859169, + "logits/chosen": -1.6516616344451904, + "logits/rejected": -1.5020816326141357, + "logps/chosen": -0.19008460640907288, + "logps/rejected": -8.589662551879883, + "loss": 0.295, + "nll_loss": 0.32560575008392334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.019008463248610497, + "rewards/margins": 0.8399578332901001, + "rewards/rejected": -0.8589662909507751, + "step": 852 + }, + { + "epoch": 1.6676441837732159, + "grad_norm": 0.6177801489830017, + "learning_rate": 2.2341813437703848e-05, + "log_odds_chosen": 12.326190948486328, + "log_odds_ratio": -0.00971852894872427, + "logits/chosen": -1.697513461112976, + "logits/rejected": -1.452056646347046, + "logps/chosen": -0.11838740855455399, + "logps/rejected": -10.13878345489502, + "loss": 0.2956, + "nll_loss": 0.27184998989105225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011838741600513458, + "rewards/margins": 1.0020396709442139, + "rewards/rejected": -1.013878345489502, + "step": 853 + }, + { + "epoch": 1.6695992179863146, + "grad_norm": 0.6108191013336182, + "learning_rate": 2.2309197651663404e-05, + "log_odds_chosen": 19.533605575561523, + "log_odds_ratio": -0.0017824710812419653, + "logits/chosen": -1.635897159576416, + "logits/rejected": -1.2040958404541016, + "logps/chosen": -0.07297106087207794, + "logps/rejected": -16.876380920410156, + "loss": 0.2861, + "nll_loss": 0.18932056427001953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007297106087207794, + "rewards/margins": 1.6803408861160278, + "rewards/rejected": -1.6876380443572998, + "step": 854 + }, + { + "epoch": 1.6715542521994133, + "grad_norm": 0.6075525879859924, + "learning_rate": 2.227658186562296e-05, + "log_odds_chosen": 12.234249114990234, + "log_odds_ratio": -0.015133354812860489, + "logits/chosen": -1.5522019863128662, + "logits/rejected": -1.4951956272125244, + "logps/chosen": -0.17598524689674377, + "logps/rejected": -10.578929901123047, + "loss": 0.2869, + "nll_loss": 0.3114855885505676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017598526552319527, + "rewards/margins": 1.0402944087982178, + "rewards/rejected": -1.0578930377960205, + "step": 855 + }, + { + "epoch": 1.673509286412512, + "grad_norm": 0.6090246438980103, + "learning_rate": 2.224396607958252e-05, + "log_odds_chosen": 14.423230171203613, + "log_odds_ratio": -0.006344897672533989, + "logits/chosen": -1.655858039855957, + "logits/rejected": -1.5607144832611084, + "logps/chosen": -0.21107351779937744, + "logps/rejected": -12.721628189086914, + "loss": 0.2876, + "nll_loss": 0.3117351233959198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021107351407408714, + "rewards/margins": 1.2510554790496826, + "rewards/rejected": -1.272162914276123, + "step": 856 + }, + { + "epoch": 1.6754643206256108, + "grad_norm": 0.6034498810768127, + "learning_rate": 2.2211350293542075e-05, + "log_odds_chosen": 14.982501983642578, + "log_odds_ratio": -0.019525278359651566, + "logits/chosen": -1.6407623291015625, + "logits/rejected": -1.4595391750335693, + "logps/chosen": -0.1461537778377533, + "logps/rejected": -12.920125961303711, + "loss": 0.2887, + "nll_loss": 0.22013011574745178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014615376479923725, + "rewards/margins": 1.2773972749710083, + "rewards/rejected": -1.2920126914978027, + "step": 857 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.6090744137763977, + "learning_rate": 2.217873450750163e-05, + "log_odds_chosen": 12.048627853393555, + "log_odds_ratio": -0.011364803649485111, + "logits/chosen": -1.5029475688934326, + "logits/rejected": -1.4928908348083496, + "logps/chosen": -0.27407318353652954, + "logps/rejected": -10.7251558303833, + "loss": 0.2914, + "nll_loss": 0.3660019636154175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027407318353652954, + "rewards/margins": 1.0451083183288574, + "rewards/rejected": -1.0725154876708984, + "step": 858 + }, + { + "epoch": 1.6793743890518082, + "grad_norm": 0.5744964480400085, + "learning_rate": 2.2146118721461187e-05, + "log_odds_chosen": 16.51137351989746, + "log_odds_ratio": -0.0006009330973029137, + "logits/chosen": -1.5849660634994507, + "logits/rejected": -1.3565964698791504, + "logps/chosen": -0.1860659420490265, + "logps/rejected": -14.485471725463867, + "loss": 0.2724, + "nll_loss": 0.2413504272699356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01860659383237362, + "rewards/margins": 1.4299405813217163, + "rewards/rejected": -1.448547124862671, + "step": 859 + }, + { + "epoch": 1.681329423264907, + "grad_norm": 0.5998291969299316, + "learning_rate": 2.2113502935420744e-05, + "log_odds_chosen": 16.41946029663086, + "log_odds_ratio": -0.009826318360865116, + "logits/chosen": -1.6522797346115112, + "logits/rejected": -1.4256740808486938, + "logps/chosen": -0.2383415699005127, + "logps/rejected": -14.95840835571289, + "loss": 0.2843, + "nll_loss": 0.3019595742225647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02383415587246418, + "rewards/margins": 1.4720067977905273, + "rewards/rejected": -1.4958407878875732, + "step": 860 + }, + { + "epoch": 1.6832844574780057, + "grad_norm": 0.6106069087982178, + "learning_rate": 2.20808871493803e-05, + "log_odds_chosen": 14.975397109985352, + "log_odds_ratio": -0.01875372603535652, + "logits/chosen": -1.7265218496322632, + "logits/rejected": -1.4638460874557495, + "logps/chosen": -0.12736257910728455, + "logps/rejected": -12.904085159301758, + "loss": 0.2799, + "nll_loss": 0.24003368616104126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012736259028315544, + "rewards/margins": 1.2776724100112915, + "rewards/rejected": -1.2904086112976074, + "step": 861 + }, + { + "epoch": 1.6852394916911044, + "grad_norm": 0.6057350635528564, + "learning_rate": 2.2048271363339856e-05, + "log_odds_chosen": 7.435217380523682, + "log_odds_ratio": -0.0442759245634079, + "logits/chosen": -1.6618025302886963, + "logits/rejected": -1.3070824146270752, + "logps/chosen": -0.16772404313087463, + "logps/rejected": -5.703051567077637, + "loss": 0.2956, + "nll_loss": 0.28290677070617676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016772404313087463, + "rewards/margins": 0.5535327196121216, + "rewards/rejected": -0.5703051090240479, + "step": 862 + }, + { + "epoch": 1.6871945259042032, + "grad_norm": 0.5764407515525818, + "learning_rate": 2.2015655577299412e-05, + "log_odds_chosen": 8.942172050476074, + "log_odds_ratio": -0.012524496763944626, + "logits/chosen": -1.6201058626174927, + "logits/rejected": -1.5261415243148804, + "logps/chosen": -0.15709254145622253, + "logps/rejected": -7.071645736694336, + "loss": 0.2704, + "nll_loss": 0.23275607824325562, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015709254890680313, + "rewards/margins": 0.6914553046226501, + "rewards/rejected": -0.7071645855903625, + "step": 863 + }, + { + "epoch": 1.689149560117302, + "grad_norm": 0.6076523661613464, + "learning_rate": 2.1983039791258968e-05, + "log_odds_chosen": 9.216392517089844, + "log_odds_ratio": -0.02585381641983986, + "logits/chosen": -1.504335880279541, + "logits/rejected": -1.5276654958724976, + "logps/chosen": -0.1839342564344406, + "logps/rejected": -7.498133659362793, + "loss": 0.2801, + "nll_loss": 0.25085753202438354, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01839342713356018, + "rewards/margins": 0.7314199805259705, + "rewards/rejected": -0.7498133182525635, + "step": 864 + }, + { + "epoch": 1.6911045943304008, + "grad_norm": 0.6076325178146362, + "learning_rate": 2.1950424005218524e-05, + "log_odds_chosen": 9.358734130859375, + "log_odds_ratio": -0.006770077161490917, + "logits/chosen": -1.470247745513916, + "logits/rejected": -1.5776870250701904, + "logps/chosen": -0.16025994718074799, + "logps/rejected": -7.450796127319336, + "loss": 0.2794, + "nll_loss": 0.2737869918346405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016025995835661888, + "rewards/margins": 0.7290536165237427, + "rewards/rejected": -0.7450796365737915, + "step": 865 + }, + { + "epoch": 1.6930596285434996, + "grad_norm": 0.6006945967674255, + "learning_rate": 2.1917808219178083e-05, + "log_odds_chosen": 11.624385833740234, + "log_odds_ratio": -0.01591043919324875, + "logits/chosen": -1.5092933177947998, + "logits/rejected": -1.4576666355133057, + "logps/chosen": -0.1503734141588211, + "logps/rejected": -9.637356758117676, + "loss": 0.281, + "nll_loss": 0.2542268633842468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01503734290599823, + "rewards/margins": 0.9486984014511108, + "rewards/rejected": -0.9637356996536255, + "step": 866 + }, + { + "epoch": 1.6950146627565983, + "grad_norm": 0.6176522374153137, + "learning_rate": 2.188519243313764e-05, + "log_odds_chosen": 12.659637451171875, + "log_odds_ratio": -0.0098464610055089, + "logits/chosen": -1.5197327136993408, + "logits/rejected": -1.336582899093628, + "logps/chosen": -0.14162100851535797, + "logps/rejected": -10.545426368713379, + "loss": 0.2924, + "nll_loss": 0.2835369408130646, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014162102714180946, + "rewards/margins": 1.0403804779052734, + "rewards/rejected": -1.0545425415039062, + "step": 867 + }, + { + "epoch": 1.696969696969697, + "grad_norm": 0.5788339376449585, + "learning_rate": 2.1852576647097196e-05, + "log_odds_chosen": 11.42207145690918, + "log_odds_ratio": -0.014632419683039188, + "logits/chosen": -1.7646629810333252, + "logits/rejected": -1.6007739305496216, + "logps/chosen": -0.2035512924194336, + "logps/rejected": -9.741190910339355, + "loss": 0.2731, + "nll_loss": 0.3331218957901001, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02035513147711754, + "rewards/margins": 0.9537639617919922, + "rewards/rejected": -0.9741190671920776, + "step": 868 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.6148770451545715, + "learning_rate": 2.1819960861056752e-05, + "log_odds_chosen": 16.08672332763672, + "log_odds_ratio": -0.0022573648020625114, + "logits/chosen": -1.4134225845336914, + "logits/rejected": -1.345531940460205, + "logps/chosen": -0.09789349138736725, + "logps/rejected": -13.640209197998047, + "loss": 0.2916, + "nll_loss": 0.20531433820724487, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00978935044258833, + "rewards/margins": 1.3542317152023315, + "rewards/rejected": -1.3640210628509521, + "step": 869 + }, + { + "epoch": 1.7008797653958945, + "grad_norm": 0.586615800857544, + "learning_rate": 2.1787345075016308e-05, + "log_odds_chosen": 10.112385749816895, + "log_odds_ratio": -0.010707613080739975, + "logits/chosen": -1.5808337926864624, + "logits/rejected": -1.4903881549835205, + "logps/chosen": -0.15453305840492249, + "logps/rejected": -8.29654598236084, + "loss": 0.2719, + "nll_loss": 0.259713351726532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015453306026756763, + "rewards/margins": 0.8142012357711792, + "rewards/rejected": -0.8296545743942261, + "step": 870 + }, + { + "epoch": 1.7028347996089932, + "grad_norm": 0.6230431795120239, + "learning_rate": 2.1754729288975864e-05, + "log_odds_chosen": 20.17174530029297, + "log_odds_ratio": -0.005364878568798304, + "logits/chosen": -1.5472718477249146, + "logits/rejected": -1.2960267066955566, + "logps/chosen": -0.21737578511238098, + "logps/rejected": -18.515621185302734, + "loss": 0.2951, + "nll_loss": 0.3781585097312927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021737579256296158, + "rewards/margins": 1.829824686050415, + "rewards/rejected": -1.851562261581421, + "step": 871 + }, + { + "epoch": 1.704789833822092, + "grad_norm": 0.6041767001152039, + "learning_rate": 2.172211350293542e-05, + "log_odds_chosen": 19.54475212097168, + "log_odds_ratio": -0.006638507358729839, + "logits/chosen": -1.7618160247802734, + "logits/rejected": -1.518640398979187, + "logps/chosen": -0.14396408200263977, + "logps/rejected": -17.492713928222656, + "loss": 0.2775, + "nll_loss": 0.26342496275901794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014396408572793007, + "rewards/margins": 1.734874963760376, + "rewards/rejected": -1.7492715120315552, + "step": 872 + }, + { + "epoch": 1.7067448680351907, + "grad_norm": 0.5745936632156372, + "learning_rate": 2.1689497716894976e-05, + "log_odds_chosen": 11.768324851989746, + "log_odds_ratio": -0.007713957689702511, + "logits/chosen": -1.7722716331481934, + "logits/rejected": -1.5404983758926392, + "logps/chosen": -0.16259898245334625, + "logps/rejected": -9.508157730102539, + "loss": 0.2664, + "nll_loss": 0.2111862152814865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016259899362921715, + "rewards/margins": 0.9345559477806091, + "rewards/rejected": -0.9508158564567566, + "step": 873 + }, + { + "epoch": 1.7086999022482894, + "grad_norm": 0.6185850501060486, + "learning_rate": 2.1656881930854532e-05, + "log_odds_chosen": 14.426231384277344, + "log_odds_ratio": -0.006821425631642342, + "logits/chosen": -1.652878761291504, + "logits/rejected": -1.509543538093567, + "logps/chosen": -0.08461812138557434, + "logps/rejected": -11.819326400756836, + "loss": 0.2861, + "nll_loss": 0.20457717776298523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00846181157976389, + "rewards/margins": 1.1734708547592163, + "rewards/rejected": -1.1819326877593994, + "step": 874 + }, + { + "epoch": 1.710654936461388, + "grad_norm": 0.6022407412528992, + "learning_rate": 2.1624266144814088e-05, + "log_odds_chosen": 14.307589530944824, + "log_odds_ratio": -0.010418519377708435, + "logits/chosen": -1.4887311458587646, + "logits/rejected": -1.1983273029327393, + "logps/chosen": -0.1673031747341156, + "logps/rejected": -12.183444023132324, + "loss": 0.2788, + "nll_loss": 0.3767797350883484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01673031784594059, + "rewards/margins": 1.2016141414642334, + "rewards/rejected": -1.2183444499969482, + "step": 875 + }, + { + "epoch": 1.7126099706744868, + "grad_norm": 0.5588152408599854, + "learning_rate": 2.1591650358773648e-05, + "log_odds_chosen": 12.752374649047852, + "log_odds_ratio": -0.00350450212135911, + "logits/chosen": -1.660904884338379, + "logits/rejected": -1.6054790019989014, + "logps/chosen": -0.16006998717784882, + "logps/rejected": -10.71613883972168, + "loss": 0.2605, + "nll_loss": 0.2669861912727356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01600699871778488, + "rewards/margins": 1.0556069612503052, + "rewards/rejected": -1.0716139078140259, + "step": 876 + }, + { + "epoch": 1.7145650048875856, + "grad_norm": 0.5979718565940857, + "learning_rate": 2.1559034572733204e-05, + "log_odds_chosen": 10.923490524291992, + "log_odds_ratio": -0.035646192729473114, + "logits/chosen": -1.6792666912078857, + "logits/rejected": -1.5626498460769653, + "logps/chosen": -0.20017804205417633, + "logps/rejected": -9.143239974975586, + "loss": 0.281, + "nll_loss": 0.3211482763290405, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020017804577946663, + "rewards/margins": 0.8943063020706177, + "rewards/rejected": -0.9143239855766296, + "step": 877 + }, + { + "epoch": 1.7165200391006843, + "grad_norm": 0.5879942178726196, + "learning_rate": 2.152641878669276e-05, + "log_odds_chosen": 15.15517807006836, + "log_odds_ratio": -0.0011164565803483129, + "logits/chosen": -1.6150181293487549, + "logits/rejected": -1.422170639038086, + "logps/chosen": -0.1289491057395935, + "logps/rejected": -12.777889251708984, + "loss": 0.2695, + "nll_loss": 0.30085599422454834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012894910760223866, + "rewards/margins": 1.2648941278457642, + "rewards/rejected": -1.2777889966964722, + "step": 878 + }, + { + "epoch": 1.718475073313783, + "grad_norm": 0.5927748680114746, + "learning_rate": 2.1493803000652316e-05, + "log_odds_chosen": 13.555412292480469, + "log_odds_ratio": -0.016909930855035782, + "logits/chosen": -1.5670934915542603, + "logits/rejected": -1.6483173370361328, + "logps/chosen": -0.12696610391139984, + "logps/rejected": -11.512728691101074, + "loss": 0.2725, + "nll_loss": 0.189909428358078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012696610763669014, + "rewards/margins": 1.1385762691497803, + "rewards/rejected": -1.1512727737426758, + "step": 879 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.5789029002189636, + "learning_rate": 2.1461187214611872e-05, + "log_odds_chosen": 17.190872192382812, + "log_odds_ratio": -0.0023901958484202623, + "logits/chosen": -1.6677535772323608, + "logits/rejected": -1.355820655822754, + "logps/chosen": -0.13524408638477325, + "logps/rejected": -15.040617942810059, + "loss": 0.2648, + "nll_loss": 0.24306774139404297, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013524409383535385, + "rewards/margins": 1.4905375242233276, + "rewards/rejected": -1.5040618181228638, + "step": 880 + }, + { + "epoch": 1.7223851417399805, + "grad_norm": 0.6185790300369263, + "learning_rate": 2.1428571428571428e-05, + "log_odds_chosen": 19.587175369262695, + "log_odds_ratio": -4.613444616552442e-06, + "logits/chosen": -1.6451802253723145, + "logits/rejected": -1.27000892162323, + "logps/chosen": -0.11107385158538818, + "logps/rejected": -17.168960571289062, + "loss": 0.2839, + "nll_loss": 0.21654827892780304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011107385158538818, + "rewards/margins": 1.7057886123657227, + "rewards/rejected": -1.7168962955474854, + "step": 881 + }, + { + "epoch": 1.7243401759530792, + "grad_norm": 0.7470810413360596, + "learning_rate": 2.1395955642530984e-05, + "log_odds_chosen": 14.853752136230469, + "log_odds_ratio": -0.009404229931533337, + "logits/chosen": -1.4782228469848633, + "logits/rejected": -1.3019423484802246, + "logps/chosen": -0.158029243350029, + "logps/rejected": -12.814777374267578, + "loss": 0.262, + "nll_loss": 0.2990247309207916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01580292358994484, + "rewards/margins": 1.2656748294830322, + "rewards/rejected": -1.281477689743042, + "step": 882 + }, + { + "epoch": 1.726295210166178, + "grad_norm": 0.5924163460731506, + "learning_rate": 2.136333985649054e-05, + "log_odds_chosen": 15.483648300170898, + "log_odds_ratio": -0.00345861678943038, + "logits/chosen": -1.614540457725525, + "logits/rejected": -1.4212077856063843, + "logps/chosen": -0.13438370823860168, + "logps/rejected": -13.14250373840332, + "loss": 0.2758, + "nll_loss": 0.28449779748916626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013438371941447258, + "rewards/margins": 1.300812005996704, + "rewards/rejected": -1.3142503499984741, + "step": 883 + }, + { + "epoch": 1.7282502443792767, + "grad_norm": 0.5608046054840088, + "learning_rate": 2.1330724070450096e-05, + "log_odds_chosen": 11.59035873413086, + "log_odds_ratio": -0.014511458575725555, + "logits/chosen": -1.5631400346755981, + "logits/rejected": -1.3325377702713013, + "logps/chosen": -0.18498674035072327, + "logps/rejected": -9.85841178894043, + "loss": 0.2654, + "nll_loss": 0.35330063104629517, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018498675897717476, + "rewards/margins": 0.9673425555229187, + "rewards/rejected": -0.9858412146568298, + "step": 884 + }, + { + "epoch": 1.7302052785923754, + "grad_norm": 0.5659024715423584, + "learning_rate": 2.1298108284409652e-05, + "log_odds_chosen": 17.549320220947266, + "log_odds_ratio": -0.0022212681360542774, + "logits/chosen": -1.5316340923309326, + "logits/rejected": -1.3783321380615234, + "logps/chosen": -0.12174608558416367, + "logps/rejected": -15.295266151428223, + "loss": 0.2615, + "nll_loss": 0.2881045937538147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012174610048532486, + "rewards/margins": 1.5173521041870117, + "rewards/rejected": -1.5295265913009644, + "step": 885 + }, + { + "epoch": 1.7321603128054741, + "grad_norm": 0.5535696148872375, + "learning_rate": 2.1265492498369212e-05, + "log_odds_chosen": 9.556693077087402, + "log_odds_ratio": -0.006874157581478357, + "logits/chosen": -1.7323682308197021, + "logits/rejected": -1.4240717887878418, + "logps/chosen": -0.22893129289150238, + "logps/rejected": -7.562557220458984, + "loss": 0.2507, + "nll_loss": 0.4049264192581177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.022893130779266357, + "rewards/margins": 0.73336261510849, + "rewards/rejected": -0.7562557458877563, + "step": 886 + }, + { + "epoch": 1.7341153470185728, + "grad_norm": 0.5986427664756775, + "learning_rate": 2.1232876712328768e-05, + "log_odds_chosen": 14.807144165039062, + "log_odds_ratio": -0.010590963996946812, + "logits/chosen": -1.5003430843353271, + "logits/rejected": -1.317291498184204, + "logps/chosen": -0.1291673481464386, + "logps/rejected": -12.642704010009766, + "loss": 0.2714, + "nll_loss": 0.24573811888694763, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012916735373437405, + "rewards/margins": 1.2513537406921387, + "rewards/rejected": -1.264270544052124, + "step": 887 + }, + { + "epoch": 1.7360703812316716, + "grad_norm": 0.612058162689209, + "learning_rate": 2.1200260926288324e-05, + "log_odds_chosen": 10.338470458984375, + "log_odds_ratio": -0.012615757063031197, + "logits/chosen": -1.6652278900146484, + "logits/rejected": -1.5869152545928955, + "logps/chosen": -0.12240932881832123, + "logps/rejected": -8.218172073364258, + "loss": 0.2713, + "nll_loss": 0.24266500771045685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012240933254361153, + "rewards/margins": 0.8095762729644775, + "rewards/rejected": -0.82181715965271, + "step": 888 + }, + { + "epoch": 1.7380254154447703, + "grad_norm": 0.5711016654968262, + "learning_rate": 2.116764514024788e-05, + "log_odds_chosen": 11.148866653442383, + "log_odds_ratio": -0.009495975449681282, + "logits/chosen": -1.6320388317108154, + "logits/rejected": -1.3461627960205078, + "logps/chosen": -0.15687909722328186, + "logps/rejected": -8.980779647827148, + "loss": 0.2592, + "nll_loss": 0.21806372702121735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015687908977270126, + "rewards/margins": 0.882390022277832, + "rewards/rejected": -0.8980779647827148, + "step": 889 + }, + { + "epoch": 1.739980449657869, + "grad_norm": 0.5836576223373413, + "learning_rate": 2.1135029354207436e-05, + "log_odds_chosen": 11.298870086669922, + "log_odds_ratio": -0.016323678195476532, + "logits/chosen": -1.5576900243759155, + "logits/rejected": -1.2611849308013916, + "logps/chosen": -0.1366139054298401, + "logps/rejected": -9.289764404296875, + "loss": 0.2574, + "nll_loss": 0.24796125292778015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013661390170454979, + "rewards/margins": 0.9153150320053101, + "rewards/rejected": -0.9289764165878296, + "step": 890 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.5726979970932007, + "learning_rate": 2.1102413568166992e-05, + "log_odds_chosen": 13.783609390258789, + "log_odds_ratio": -0.009664734825491905, + "logits/chosen": -1.5668880939483643, + "logits/rejected": -1.3762723207473755, + "logps/chosen": -0.17604362964630127, + "logps/rejected": -11.890863418579102, + "loss": 0.2609, + "nll_loss": 0.3035792410373688, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017604362219572067, + "rewards/margins": 1.1714820861816406, + "rewards/rejected": -1.1890864372253418, + "step": 891 + }, + { + "epoch": 1.7438905180840665, + "grad_norm": 0.5716158747673035, + "learning_rate": 2.106979778212655e-05, + "log_odds_chosen": 12.73627758026123, + "log_odds_ratio": -0.009319066070020199, + "logits/chosen": -1.5309879779815674, + "logits/rejected": -1.5121307373046875, + "logps/chosen": -0.12204430252313614, + "logps/rejected": -10.61105728149414, + "loss": 0.2534, + "nll_loss": 0.22138012945652008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012204429134726524, + "rewards/margins": 1.0489013195037842, + "rewards/rejected": -1.061105728149414, + "step": 892 + }, + { + "epoch": 1.7458455522971652, + "grad_norm": 0.5831878185272217, + "learning_rate": 2.1037181996086105e-05, + "log_odds_chosen": 7.288383483886719, + "log_odds_ratio": -0.03347208723425865, + "logits/chosen": -1.564469575881958, + "logits/rejected": -1.4752169847488403, + "logps/chosen": -0.1578916311264038, + "logps/rejected": -5.407576560974121, + "loss": 0.2605, + "nll_loss": 0.2700563669204712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01578916423022747, + "rewards/margins": 0.5249685049057007, + "rewards/rejected": -0.5407576560974121, + "step": 893 + }, + { + "epoch": 1.747800586510264, + "grad_norm": 0.5697640776634216, + "learning_rate": 2.100456621004566e-05, + "log_odds_chosen": 7.611023902893066, + "log_odds_ratio": -0.009054156020283699, + "logits/chosen": -1.395176887512207, + "logits/rejected": -1.4929730892181396, + "logps/chosen": -0.17575004696846008, + "logps/rejected": -5.5685529708862305, + "loss": 0.2536, + "nll_loss": 0.25048312544822693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017575005069375038, + "rewards/margins": 0.5392802953720093, + "rewards/rejected": -0.556855320930481, + "step": 894 + }, + { + "epoch": 1.7497556207233627, + "grad_norm": 0.5858087539672852, + "learning_rate": 2.0971950424005217e-05, + "log_odds_chosen": 13.06997299194336, + "log_odds_ratio": -0.005054926499724388, + "logits/chosen": -1.6962864398956299, + "logits/rejected": -1.415411114692688, + "logps/chosen": -0.1521148830652237, + "logps/rejected": -11.158365249633789, + "loss": 0.2615, + "nll_loss": 0.27205604314804077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015211489051580429, + "rewards/margins": 1.1006250381469727, + "rewards/rejected": -1.1158366203308105, + "step": 895 + }, + { + "epoch": 1.7517106549364614, + "grad_norm": 0.5782363414764404, + "learning_rate": 2.0939334637964776e-05, + "log_odds_chosen": 9.219330787658691, + "log_odds_ratio": -0.021752770990133286, + "logits/chosen": -1.7938518524169922, + "logits/rejected": -1.6550018787384033, + "logps/chosen": -0.2425970733165741, + "logps/rejected": -7.736455917358398, + "loss": 0.262, + "nll_loss": 0.3696126937866211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02425970695912838, + "rewards/margins": 0.7493858337402344, + "rewards/rejected": -0.7736455202102661, + "step": 896 + }, + { + "epoch": 1.7536656891495601, + "grad_norm": 0.5889632701873779, + "learning_rate": 2.0906718851924332e-05, + "log_odds_chosen": 15.710424423217773, + "log_odds_ratio": -0.0034753475338220596, + "logits/chosen": -1.7788236141204834, + "logits/rejected": -1.489164113998413, + "logps/chosen": -0.07456071674823761, + "logps/rejected": -13.082891464233398, + "loss": 0.2649, + "nll_loss": 0.18808403611183167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007456071674823761, + "rewards/margins": 1.300832986831665, + "rewards/rejected": -1.3082890510559082, + "step": 897 + }, + { + "epoch": 1.7556207233626588, + "grad_norm": 0.58430415391922, + "learning_rate": 2.087410306588389e-05, + "log_odds_chosen": 20.96262550354004, + "log_odds_ratio": -0.004736342933028936, + "logits/chosen": -1.6957740783691406, + "logits/rejected": -1.407779574394226, + "logps/chosen": -0.13052159547805786, + "logps/rejected": -18.71969985961914, + "loss": 0.2638, + "nll_loss": 0.3018331825733185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013052161782979965, + "rewards/margins": 1.8589179515838623, + "rewards/rejected": -1.8719701766967773, + "step": 898 + }, + { + "epoch": 1.7575757575757576, + "grad_norm": 0.5725110769271851, + "learning_rate": 2.0841487279843444e-05, + "log_odds_chosen": 9.993307113647461, + "log_odds_ratio": -0.017755631357431412, + "logits/chosen": -1.6689411401748657, + "logits/rejected": -1.4715301990509033, + "logps/chosen": -0.1303953379392624, + "logps/rejected": -7.801048278808594, + "loss": 0.265, + "nll_loss": 0.2773182690143585, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013039533980190754, + "rewards/margins": 0.7670652866363525, + "rewards/rejected": -0.7801048755645752, + "step": 899 + }, + { + "epoch": 1.7595307917888563, + "grad_norm": 0.5752285718917847, + "learning_rate": 2.0808871493803e-05, + "log_odds_chosen": 8.475903511047363, + "log_odds_ratio": -0.020473940297961235, + "logits/chosen": -1.5056757926940918, + "logits/rejected": -1.454603910446167, + "logps/chosen": -0.17266598343849182, + "logps/rejected": -6.61230993270874, + "loss": 0.2517, + "nll_loss": 0.2934053838253021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01726659946143627, + "rewards/margins": 0.6439643502235413, + "rewards/rejected": -0.6612309813499451, + "step": 900 + }, + { + "epoch": 1.761485826001955, + "grad_norm": 0.5564587116241455, + "learning_rate": 2.0776255707762557e-05, + "log_odds_chosen": 16.322755813598633, + "log_odds_ratio": -0.009563097730278969, + "logits/chosen": -1.518384575843811, + "logits/rejected": -1.2396823167800903, + "logps/chosen": -0.12405097484588623, + "logps/rejected": -14.184717178344727, + "loss": 0.2477, + "nll_loss": 0.23192265629768372, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012405096553266048, + "rewards/margins": 1.406066656112671, + "rewards/rejected": -1.4184718132019043, + "step": 901 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.5759814977645874, + "learning_rate": 2.0743639921722113e-05, + "log_odds_chosen": 16.904342651367188, + "log_odds_ratio": -0.0012585959630087018, + "logits/chosen": -1.5144226551055908, + "logits/rejected": -1.3953793048858643, + "logps/chosen": -0.09307500720024109, + "logps/rejected": -14.401715278625488, + "loss": 0.2587, + "nll_loss": 0.25964629650115967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00930749997496605, + "rewards/margins": 1.4308640956878662, + "rewards/rejected": -1.440171480178833, + "step": 902 + }, + { + "epoch": 1.7653958944281525, + "grad_norm": 0.5689054131507874, + "learning_rate": 2.071102413568167e-05, + "log_odds_chosen": 16.2772274017334, + "log_odds_ratio": -0.002732709515839815, + "logits/chosen": -1.6866083145141602, + "logits/rejected": -1.5196216106414795, + "logps/chosen": -0.1057031974196434, + "logps/rejected": -13.94883918762207, + "loss": 0.2543, + "nll_loss": 0.253223717212677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010570320300757885, + "rewards/margins": 1.3843135833740234, + "rewards/rejected": -1.3948838710784912, + "step": 903 + }, + { + "epoch": 1.7673509286412512, + "grad_norm": 0.5658145546913147, + "learning_rate": 2.0678408349641225e-05, + "log_odds_chosen": 13.739230155944824, + "log_odds_ratio": -0.019357753917574883, + "logits/chosen": -1.703791856765747, + "logits/rejected": -1.5562715530395508, + "logps/chosen": -0.11202475428581238, + "logps/rejected": -11.30612564086914, + "loss": 0.2524, + "nll_loss": 0.21716666221618652, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011202475056052208, + "rewards/margins": 1.1194100379943848, + "rewards/rejected": -1.1306126117706299, + "step": 904 + }, + { + "epoch": 1.76930596285435, + "grad_norm": 0.5724008083343506, + "learning_rate": 2.064579256360078e-05, + "log_odds_chosen": 12.7920503616333, + "log_odds_ratio": -0.006406975444406271, + "logits/chosen": -1.4504400491714478, + "logits/rejected": -1.535116195678711, + "logps/chosen": -0.1325298547744751, + "logps/rejected": -10.440775871276855, + "loss": 0.2542, + "nll_loss": 0.2657815217971802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0132529865950346, + "rewards/margins": 1.0308246612548828, + "rewards/rejected": -1.044077754020691, + "step": 905 + }, + { + "epoch": 1.7712609970674487, + "grad_norm": 0.5553867220878601, + "learning_rate": 2.061317677756034e-05, + "log_odds_chosen": 17.933456420898438, + "log_odds_ratio": -0.003525475738570094, + "logits/chosen": -1.6861608028411865, + "logits/rejected": -1.5413535833358765, + "logps/chosen": -0.09744444489479065, + "logps/rejected": -15.519876480102539, + "loss": 0.2433, + "nll_loss": 0.19845622777938843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00974444579333067, + "rewards/margins": 1.542243242263794, + "rewards/rejected": -1.551987648010254, + "step": 906 + }, + { + "epoch": 1.7732160312805474, + "grad_norm": 0.5606549382209778, + "learning_rate": 2.0580560991519897e-05, + "log_odds_chosen": 10.858787536621094, + "log_odds_ratio": -0.003708701580762863, + "logits/chosen": -1.7061833143234253, + "logits/rejected": -1.541084885597229, + "logps/chosen": -0.17293232679367065, + "logps/rejected": -9.005252838134766, + "loss": 0.2483, + "nll_loss": 0.26897966861724854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017293233424425125, + "rewards/margins": 0.8832319974899292, + "rewards/rejected": -0.9005252122879028, + "step": 907 + }, + { + "epoch": 1.7751710654936461, + "grad_norm": 0.5665396451950073, + "learning_rate": 2.0547945205479453e-05, + "log_odds_chosen": 10.951019287109375, + "log_odds_ratio": -0.012331211939454079, + "logits/chosen": -1.6647257804870605, + "logits/rejected": -1.3393669128417969, + "logps/chosen": -0.1482212394475937, + "logps/rejected": -9.071502685546875, + "loss": 0.252, + "nll_loss": 0.2501649856567383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014822125434875488, + "rewards/margins": 0.8923282623291016, + "rewards/rejected": -0.907150387763977, + "step": 908 + }, + { + "epoch": 1.7771260997067448, + "grad_norm": 0.5570266842842102, + "learning_rate": 2.051532941943901e-05, + "log_odds_chosen": 11.733464241027832, + "log_odds_ratio": -0.007406673394143581, + "logits/chosen": -1.6022212505340576, + "logits/rejected": -1.4660956859588623, + "logps/chosen": -0.08904262632131577, + "logps/rejected": -9.125633239746094, + "loss": 0.2412, + "nll_loss": 0.24800783395767212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008904263377189636, + "rewards/margins": 0.9036591053009033, + "rewards/rejected": -0.9125634431838989, + "step": 909 + }, + { + "epoch": 1.7790811339198436, + "grad_norm": 0.5526052713394165, + "learning_rate": 2.0482713633398565e-05, + "log_odds_chosen": 15.354419708251953, + "log_odds_ratio": -0.0077002812176942825, + "logits/chosen": -1.7336170673370361, + "logits/rejected": -1.4046001434326172, + "logps/chosen": -0.13269424438476562, + "logps/rejected": -13.118112564086914, + "loss": 0.2422, + "nll_loss": 0.24607853591442108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013269424438476562, + "rewards/margins": 1.298541784286499, + "rewards/rejected": -1.3118112087249756, + "step": 910 + }, + { + "epoch": 1.7810361681329423, + "grad_norm": 0.5689600706100464, + "learning_rate": 2.045009784735812e-05, + "log_odds_chosen": 16.891983032226562, + "log_odds_ratio": -0.001692835008725524, + "logits/chosen": -1.587671160697937, + "logits/rejected": -1.390427827835083, + "logps/chosen": -0.14455224573612213, + "logps/rejected": -14.486222267150879, + "loss": 0.2444, + "nll_loss": 0.22620372474193573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014455224387347698, + "rewards/margins": 1.4341669082641602, + "rewards/rejected": -1.448622226715088, + "step": 911 + }, + { + "epoch": 1.782991202346041, + "grad_norm": 0.5808539390563965, + "learning_rate": 2.0417482061317677e-05, + "log_odds_chosen": 14.281759262084961, + "log_odds_ratio": -0.0019047169480472803, + "logits/chosen": -1.7255353927612305, + "logits/rejected": -1.4138364791870117, + "logps/chosen": -0.22089499235153198, + "logps/rejected": -11.99342155456543, + "loss": 0.249, + "nll_loss": 0.33330315351486206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02208949811756611, + "rewards/margins": 1.1772527694702148, + "rewards/rejected": -1.1993422508239746, + "step": 912 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.5718557834625244, + "learning_rate": 2.0384866275277233e-05, + "log_odds_chosen": 9.73382568359375, + "log_odds_ratio": -0.024645019322633743, + "logits/chosen": -1.7089290618896484, + "logits/rejected": -1.5939826965332031, + "logps/chosen": -0.116352379322052, + "logps/rejected": -7.669165134429932, + "loss": 0.2476, + "nll_loss": 0.21599875390529633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01163523830473423, + "rewards/margins": 0.7552813291549683, + "rewards/rejected": -0.7669165730476379, + "step": 913 + }, + { + "epoch": 1.7869012707722385, + "grad_norm": 0.5822864174842834, + "learning_rate": 2.035225048923679e-05, + "log_odds_chosen": 13.739198684692383, + "log_odds_ratio": -0.003435317659750581, + "logits/chosen": -1.6032397747039795, + "logits/rejected": -1.3420922756195068, + "logps/chosen": -0.07494784891605377, + "logps/rejected": -11.16431713104248, + "loss": 0.2547, + "nll_loss": 0.1973983645439148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007494784891605377, + "rewards/margins": 1.1089369058609009, + "rewards/rejected": -1.116431713104248, + "step": 914 + }, + { + "epoch": 1.7888563049853372, + "grad_norm": 0.5693533420562744, + "learning_rate": 2.0319634703196345e-05, + "log_odds_chosen": 12.111791610717773, + "log_odds_ratio": -0.0038302033208310604, + "logits/chosen": -1.7642101049423218, + "logits/rejected": -1.5362415313720703, + "logps/chosen": -0.07945239543914795, + "logps/rejected": -9.283926010131836, + "loss": 0.25, + "nll_loss": 0.21490070223808289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007945239543914795, + "rewards/margins": 0.9204474687576294, + "rewards/rejected": -0.9283926486968994, + "step": 915 + }, + { + "epoch": 1.790811339198436, + "grad_norm": 0.5563174486160278, + "learning_rate": 2.0287018917155905e-05, + "log_odds_chosen": 15.157867431640625, + "log_odds_ratio": -0.14847756922245026, + "logits/chosen": -1.713494062423706, + "logits/rejected": -1.4222102165222168, + "logps/chosen": -0.12261784821748734, + "logps/rejected": -13.276507377624512, + "loss": 0.2402, + "nll_loss": 0.19205254316329956, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.012261785566806793, + "rewards/margins": 1.3153890371322632, + "rewards/rejected": -1.327650785446167, + "step": 916 + }, + { + "epoch": 1.7927663734115347, + "grad_norm": 0.5571836233139038, + "learning_rate": 2.025440313111546e-05, + "log_odds_chosen": 12.336240768432617, + "log_odds_ratio": -0.008877198211848736, + "logits/chosen": -1.8055894374847412, + "logits/rejected": -1.4810845851898193, + "logps/chosen": -0.08361870050430298, + "logps/rejected": -9.749814987182617, + "loss": 0.2386, + "nll_loss": 0.20423153042793274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008361870422959328, + "rewards/margins": 0.966619610786438, + "rewards/rejected": -0.974981427192688, + "step": 917 + }, + { + "epoch": 1.7947214076246334, + "grad_norm": 0.5622938275337219, + "learning_rate": 2.0221787345075017e-05, + "log_odds_chosen": 11.997905731201172, + "log_odds_ratio": -0.007735748775303364, + "logits/chosen": -1.6316132545471191, + "logits/rejected": -1.4718191623687744, + "logps/chosen": -0.1042386069893837, + "logps/rejected": -9.73194408416748, + "loss": 0.2365, + "nll_loss": 0.22892752289772034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01042386144399643, + "rewards/margins": 0.9627705216407776, + "rewards/rejected": -0.9731943607330322, + "step": 918 + }, + { + "epoch": 1.7966764418377321, + "grad_norm": 0.5650069117546082, + "learning_rate": 2.0189171559034573e-05, + "log_odds_chosen": 10.462493896484375, + "log_odds_ratio": -0.002964487299323082, + "logits/chosen": -1.7959012985229492, + "logits/rejected": -1.513780117034912, + "logps/chosen": -0.08668620884418488, + "logps/rejected": -7.922987937927246, + "loss": 0.2444, + "nll_loss": 0.18401074409484863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008668621070683002, + "rewards/margins": 0.7836301922798157, + "rewards/rejected": -0.7922987937927246, + "step": 919 + }, + { + "epoch": 1.7986314760508308, + "grad_norm": 0.5365273356437683, + "learning_rate": 2.015655577299413e-05, + "log_odds_chosen": 11.966004371643066, + "log_odds_ratio": -0.0065422337502241135, + "logits/chosen": -1.6128809452056885, + "logits/rejected": -1.5370783805847168, + "logps/chosen": -0.11502901464700699, + "logps/rejected": -9.782718658447266, + "loss": 0.2272, + "nll_loss": 0.23866194486618042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011502902954816818, + "rewards/margins": 0.9667689800262451, + "rewards/rejected": -0.9782718420028687, + "step": 920 + }, + { + "epoch": 1.8005865102639296, + "grad_norm": 0.5594448447227478, + "learning_rate": 2.0123939986953685e-05, + "log_odds_chosen": 13.258781433105469, + "log_odds_ratio": -0.010914504528045654, + "logits/chosen": -1.649066686630249, + "logits/rejected": -1.5574796199798584, + "logps/chosen": -0.1229495257139206, + "logps/rejected": -11.054950714111328, + "loss": 0.2385, + "nll_loss": 0.22595825791358948, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012294952757656574, + "rewards/margins": 1.0932000875473022, + "rewards/rejected": -1.1054950952529907, + "step": 921 + }, + { + "epoch": 1.8025415444770283, + "grad_norm": 0.5635837316513062, + "learning_rate": 2.009132420091324e-05, + "log_odds_chosen": 8.85827350616455, + "log_odds_ratio": -0.012408901937305927, + "logits/chosen": -1.6365325450897217, + "logits/rejected": -1.4746323823928833, + "logps/chosen": -0.1068229079246521, + "logps/rejected": -6.658695220947266, + "loss": 0.239, + "nll_loss": 0.22426730394363403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01068229041993618, + "rewards/margins": 0.6551872491836548, + "rewards/rejected": -0.6658695340156555, + "step": 922 + }, + { + "epoch": 1.804496578690127, + "grad_norm": 0.5673706531524658, + "learning_rate": 2.0058708414872797e-05, + "log_odds_chosen": 15.310626983642578, + "log_odds_ratio": -0.006817838177084923, + "logits/chosen": -1.5657927989959717, + "logits/rejected": -1.394033670425415, + "logps/chosen": -0.09583842009305954, + "logps/rejected": -12.702070236206055, + "loss": 0.2355, + "nll_loss": 0.20484638214111328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009583842940628529, + "rewards/margins": 1.2606232166290283, + "rewards/rejected": -1.2702069282531738, + "step": 923 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.5491198897361755, + "learning_rate": 2.0026092628832353e-05, + "log_odds_chosen": 13.751033782958984, + "log_odds_ratio": -0.0035603768192231655, + "logits/chosen": -1.6447217464447021, + "logits/rejected": -1.3472559452056885, + "logps/chosen": -0.07865103334188461, + "logps/rejected": -11.121286392211914, + "loss": 0.2364, + "nll_loss": 0.17922154068946838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007865102961659431, + "rewards/margins": 1.1042635440826416, + "rewards/rejected": -1.112128734588623, + "step": 924 + }, + { + "epoch": 1.8084066471163245, + "grad_norm": 0.5646079778671265, + "learning_rate": 1.999347684279191e-05, + "log_odds_chosen": 10.420348167419434, + "log_odds_ratio": -0.017918361350893974, + "logits/chosen": -1.602012276649475, + "logits/rejected": -1.5856653451919556, + "logps/chosen": -0.13713058829307556, + "logps/rejected": -8.348542213439941, + "loss": 0.2403, + "nll_loss": 0.2507140636444092, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013713058084249496, + "rewards/margins": 0.8211411237716675, + "rewards/rejected": -0.834854245185852, + "step": 925 + }, + { + "epoch": 1.8103616813294232, + "grad_norm": 0.5720843076705933, + "learning_rate": 1.996086105675147e-05, + "log_odds_chosen": 14.909643173217773, + "log_odds_ratio": -0.0008446058491244912, + "logits/chosen": -1.5889918804168701, + "logits/rejected": -1.6231412887573242, + "logps/chosen": -0.09042977541685104, + "logps/rejected": -12.467759132385254, + "loss": 0.2417, + "nll_loss": 0.2568029463291168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009042978286743164, + "rewards/margins": 1.2377328872680664, + "rewards/rejected": -1.2467758655548096, + "step": 926 + }, + { + "epoch": 1.812316715542522, + "grad_norm": 0.5444841384887695, + "learning_rate": 1.9928245270711025e-05, + "log_odds_chosen": 11.54686450958252, + "log_odds_ratio": -0.015440212562680244, + "logits/chosen": -1.7248668670654297, + "logits/rejected": -1.3556513786315918, + "logps/chosen": -0.15204721689224243, + "logps/rejected": -9.455835342407227, + "loss": 0.2358, + "nll_loss": 0.30607396364212036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015204722061753273, + "rewards/margins": 0.930378794670105, + "rewards/rejected": -0.9455834627151489, + "step": 927 + }, + { + "epoch": 1.8142717497556209, + "grad_norm": 0.5670117735862732, + "learning_rate": 1.989562948467058e-05, + "log_odds_chosen": 12.212747573852539, + "log_odds_ratio": -0.012539353221654892, + "logits/chosen": -1.7069356441497803, + "logits/rejected": -1.5764074325561523, + "logps/chosen": -0.1480804830789566, + "logps/rejected": -9.917861938476562, + "loss": 0.2459, + "nll_loss": 0.31952741742134094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0148080475628376, + "rewards/margins": 0.9769781827926636, + "rewards/rejected": -0.9917862415313721, + "step": 928 + }, + { + "epoch": 1.8162267839687196, + "grad_norm": 0.54472416639328, + "learning_rate": 1.9863013698630137e-05, + "log_odds_chosen": 9.993003845214844, + "log_odds_ratio": -0.012040774337947369, + "logits/chosen": -1.809615969657898, + "logits/rejected": -1.4153119325637817, + "logps/chosen": -0.1112232357263565, + "logps/rejected": -7.636756896972656, + "loss": 0.2337, + "nll_loss": 0.26539772748947144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01112232357263565, + "rewards/margins": 0.7525534629821777, + "rewards/rejected": -0.7636756896972656, + "step": 929 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.5335976481437683, + "learning_rate": 1.9830397912589693e-05, + "log_odds_chosen": 14.607402801513672, + "log_odds_ratio": -0.005546241998672485, + "logits/chosen": -1.5356297492980957, + "logits/rejected": -1.2445118427276611, + "logps/chosen": -0.09573094546794891, + "logps/rejected": -12.138898849487305, + "loss": 0.2193, + "nll_loss": 0.207376629114151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009573094546794891, + "rewards/margins": 1.2043167352676392, + "rewards/rejected": -1.2138898372650146, + "step": 930 + }, + { + "epoch": 1.820136852394917, + "grad_norm": 0.5336527824401855, + "learning_rate": 1.979778212654925e-05, + "log_odds_chosen": 12.763025283813477, + "log_odds_ratio": -0.0018555476563051343, + "logits/chosen": -1.5075695514678955, + "logits/rejected": -1.428417682647705, + "logps/chosen": -0.15284329652786255, + "logps/rejected": -10.510268211364746, + "loss": 0.2269, + "nll_loss": 0.1942993402481079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015284329652786255, + "rewards/margins": 1.0357425212860107, + "rewards/rejected": -1.0510268211364746, + "step": 931 + }, + { + "epoch": 1.8220918866080158, + "grad_norm": 0.563807487487793, + "learning_rate": 1.9765166340508805e-05, + "log_odds_chosen": 13.688657760620117, + "log_odds_ratio": -0.01158154010772705, + "logits/chosen": -1.5522960424423218, + "logits/rejected": -1.4045093059539795, + "logps/chosen": -0.16441521048545837, + "logps/rejected": -11.837555885314941, + "loss": 0.2345, + "nll_loss": 0.24372772872447968, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016441522166132927, + "rewards/margins": 1.1673141717910767, + "rewards/rejected": -1.18375563621521, + "step": 932 + }, + { + "epoch": 1.8240469208211145, + "grad_norm": 0.546540379524231, + "learning_rate": 1.973255055446836e-05, + "log_odds_chosen": 13.610328674316406, + "log_odds_ratio": -0.003241550177335739, + "logits/chosen": -1.4815623760223389, + "logits/rejected": -1.5601599216461182, + "logps/chosen": -0.09431243687868118, + "logps/rejected": -11.172037124633789, + "loss": 0.2303, + "nll_loss": 0.18207448720932007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009431243874132633, + "rewards/margins": 1.1077724695205688, + "rewards/rejected": -1.1172038316726685, + "step": 933 + }, + { + "epoch": 1.8260019550342133, + "grad_norm": 0.5437535047531128, + "learning_rate": 1.9699934768427918e-05, + "log_odds_chosen": 12.97563648223877, + "log_odds_ratio": -0.0039017496164888144, + "logits/chosen": -1.808099389076233, + "logits/rejected": -1.6404016017913818, + "logps/chosen": -0.09051952511072159, + "logps/rejected": -10.491317749023438, + "loss": 0.2281, + "nll_loss": 0.1804356426000595, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009051953442394733, + "rewards/margins": 1.0400798320770264, + "rewards/rejected": -1.0491318702697754, + "step": 934 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.5416752099990845, + "learning_rate": 1.9667318982387474e-05, + "log_odds_chosen": 15.748043060302734, + "log_odds_ratio": -0.0001335286651737988, + "logits/chosen": -1.7400639057159424, + "logits/rejected": -1.4350413084030151, + "logps/chosen": -0.1288164258003235, + "logps/rejected": -13.597881317138672, + "loss": 0.2276, + "nll_loss": 0.2167244255542755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012881642207503319, + "rewards/margins": 1.3469066619873047, + "rewards/rejected": -1.3597880601882935, + "step": 935 + }, + { + "epoch": 1.8299120234604107, + "grad_norm": 0.5400493741035461, + "learning_rate": 1.9634703196347033e-05, + "log_odds_chosen": 16.156219482421875, + "log_odds_ratio": -0.005138026550412178, + "logits/chosen": -1.466220498085022, + "logits/rejected": -1.382662057876587, + "logps/chosen": -0.1420457661151886, + "logps/rejected": -13.995553016662598, + "loss": 0.2241, + "nll_loss": 0.2404707670211792, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014204577542841434, + "rewards/margins": 1.3853507041931152, + "rewards/rejected": -1.3995553255081177, + "step": 936 + }, + { + "epoch": 1.8318670576735094, + "grad_norm": 0.5430614352226257, + "learning_rate": 1.960208741030659e-05, + "log_odds_chosen": 11.921902656555176, + "log_odds_ratio": -0.008115297183394432, + "logits/chosen": -1.7584729194641113, + "logits/rejected": -1.4606828689575195, + "logps/chosen": -0.14394713938236237, + "logps/rejected": -9.790695190429688, + "loss": 0.2336, + "nll_loss": 0.2707750201225281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014394713565707207, + "rewards/margins": 0.9646748304367065, + "rewards/rejected": -0.9790695905685425, + "step": 937 + }, + { + "epoch": 1.8338220918866082, + "grad_norm": 0.548323929309845, + "learning_rate": 1.9569471624266145e-05, + "log_odds_chosen": 18.115110397338867, + "log_odds_ratio": -1.9975175746367313e-05, + "logits/chosen": -1.6036345958709717, + "logits/rejected": -1.4979819059371948, + "logps/chosen": -0.12602990865707397, + "logps/rejected": -15.830632209777832, + "loss": 0.2264, + "nll_loss": 0.22314558923244476, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012602990493178368, + "rewards/margins": 1.570460319519043, + "rewards/rejected": -1.5830632448196411, + "step": 938 + }, + { + "epoch": 1.835777126099707, + "grad_norm": 0.5390161871910095, + "learning_rate": 1.95368558382257e-05, + "log_odds_chosen": 14.547281265258789, + "log_odds_ratio": -0.010226178914308548, + "logits/chosen": -1.7775146961212158, + "logits/rejected": -1.409653663635254, + "logps/chosen": -0.11260691285133362, + "logps/rejected": -12.318764686584473, + "loss": 0.2282, + "nll_loss": 0.2057819962501526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011260690167546272, + "rewards/margins": 1.2206157445907593, + "rewards/rejected": -1.2318764925003052, + "step": 939 + }, + { + "epoch": 1.8377321603128056, + "grad_norm": 0.5433930158615112, + "learning_rate": 1.9504240052185258e-05, + "log_odds_chosen": 12.064512252807617, + "log_odds_ratio": -0.005208005663007498, + "logits/chosen": -1.6304426193237305, + "logits/rejected": -1.4707986116409302, + "logps/chosen": -0.11335031688213348, + "logps/rejected": -9.832286834716797, + "loss": 0.2264, + "nll_loss": 0.2437957227230072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011335032060742378, + "rewards/margins": 0.9718937873840332, + "rewards/rejected": -0.9832288026809692, + "step": 940 + }, + { + "epoch": 1.8396871945259043, + "grad_norm": 0.5405575037002563, + "learning_rate": 1.9471624266144814e-05, + "log_odds_chosen": 7.94252872467041, + "log_odds_ratio": -0.019016968086361885, + "logits/chosen": -1.805606484413147, + "logits/rejected": -1.4931025505065918, + "logps/chosen": -0.10093867778778076, + "logps/rejected": -5.538595676422119, + "loss": 0.2166, + "nll_loss": 0.2217838317155838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010093867778778076, + "rewards/margins": 0.5437657237052917, + "rewards/rejected": -0.5538595914840698, + "step": 941 + }, + { + "epoch": 1.841642228739003, + "grad_norm": 0.543535053730011, + "learning_rate": 1.943900848010437e-05, + "log_odds_chosen": 14.90078353881836, + "log_odds_ratio": -0.013720295391976833, + "logits/chosen": -1.6853176355361938, + "logits/rejected": -1.5090949535369873, + "logps/chosen": -0.0714801773428917, + "logps/rejected": -12.238401412963867, + "loss": 0.2273, + "nll_loss": 0.19247476756572723, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007148018106818199, + "rewards/margins": 1.2166922092437744, + "rewards/rejected": -1.2238401174545288, + "step": 942 + }, + { + "epoch": 1.8435972629521018, + "grad_norm": 0.5527139902114868, + "learning_rate": 1.9406392694063926e-05, + "log_odds_chosen": 12.744778633117676, + "log_odds_ratio": -0.013244198635220528, + "logits/chosen": -1.6203480958938599, + "logits/rejected": -1.4400241374969482, + "logps/chosen": -0.18404407799243927, + "logps/rejected": -10.666536331176758, + "loss": 0.2301, + "nll_loss": 0.3898266851902008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018404407426714897, + "rewards/margins": 1.0482492446899414, + "rewards/rejected": -1.0666536092758179, + "step": 943 + }, + { + "epoch": 1.8455522971652005, + "grad_norm": 0.5347129702568054, + "learning_rate": 1.9373776908023482e-05, + "log_odds_chosen": 11.195257186889648, + "log_odds_ratio": -0.0019803668837994337, + "logits/chosen": -1.677020788192749, + "logits/rejected": -1.4641156196594238, + "logps/chosen": -0.06342259049415588, + "logps/rejected": -8.378836631774902, + "loss": 0.2262, + "nll_loss": 0.15820902585983276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006342259235680103, + "rewards/margins": 0.8315414786338806, + "rewards/rejected": -0.837883710861206, + "step": 944 + }, + { + "epoch": 1.8475073313782993, + "grad_norm": 0.5203208327293396, + "learning_rate": 1.9341161121983038e-05, + "log_odds_chosen": 10.57241439819336, + "log_odds_ratio": -0.010753638111054897, + "logits/chosen": -1.704649567604065, + "logits/rejected": -1.4862916469573975, + "logps/chosen": -0.11705774068832397, + "logps/rejected": -8.10746955871582, + "loss": 0.2104, + "nll_loss": 0.26110750436782837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011705773882567883, + "rewards/margins": 0.7990411520004272, + "rewards/rejected": -0.8107469081878662, + "step": 945 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.5615418553352356, + "learning_rate": 1.9308545335942597e-05, + "log_odds_chosen": 14.413156509399414, + "log_odds_ratio": -0.004470356740057468, + "logits/chosen": -1.7537622451782227, + "logits/rejected": -1.4738523960113525, + "logps/chosen": -0.08170484006404877, + "logps/rejected": -11.707545280456543, + "loss": 0.2246, + "nll_loss": 0.18832048773765564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008170483633875847, + "rewards/margins": 1.1625839471817017, + "rewards/rejected": -1.1707544326782227, + "step": 946 + }, + { + "epoch": 1.8514173998044967, + "grad_norm": 0.5235439538955688, + "learning_rate": 1.9275929549902154e-05, + "log_odds_chosen": 11.283646583557129, + "log_odds_ratio": -0.020134519785642624, + "logits/chosen": -1.5765444040298462, + "logits/rejected": -1.4791289567947388, + "logps/chosen": -0.1563999503850937, + "logps/rejected": -8.866528511047363, + "loss": 0.2161, + "nll_loss": 0.3004603385925293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01563999429345131, + "rewards/margins": 0.8710128664970398, + "rewards/rejected": -0.8866528868675232, + "step": 947 + }, + { + "epoch": 1.8533724340175954, + "grad_norm": 0.5295754671096802, + "learning_rate": 1.924331376386171e-05, + "log_odds_chosen": 15.781184196472168, + "log_odds_ratio": -0.010546335019171238, + "logits/chosen": -1.715172290802002, + "logits/rejected": -1.550492286682129, + "logps/chosen": -0.16013334691524506, + "logps/rejected": -13.55683708190918, + "loss": 0.2171, + "nll_loss": 0.3450993299484253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016013335436582565, + "rewards/margins": 1.3396704196929932, + "rewards/rejected": -1.3556838035583496, + "step": 948 + }, + { + "epoch": 1.8553274682306942, + "grad_norm": 0.539354145526886, + "learning_rate": 1.9210697977821266e-05, + "log_odds_chosen": 19.46869659423828, + "log_odds_ratio": -0.003156436374410987, + "logits/chosen": -1.827993392944336, + "logits/rejected": -1.4562337398529053, + "logps/chosen": -0.10688737034797668, + "logps/rejected": -17.022777557373047, + "loss": 0.2149, + "nll_loss": 0.1865134835243225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010688737034797668, + "rewards/margins": 1.691589117050171, + "rewards/rejected": -1.7022778987884521, + "step": 949 + }, + { + "epoch": 1.857282502443793, + "grad_norm": 0.5594937801361084, + "learning_rate": 1.9178082191780822e-05, + "log_odds_chosen": 9.697990417480469, + "log_odds_ratio": -0.0033449744805693626, + "logits/chosen": -1.647180199623108, + "logits/rejected": -1.3925126791000366, + "logps/chosen": -0.07468913495540619, + "logps/rejected": -7.09190559387207, + "loss": 0.226, + "nll_loss": 0.1922895908355713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0074689132161438465, + "rewards/margins": 0.701721727848053, + "rewards/rejected": -0.7091906666755676, + "step": 950 + }, + { + "epoch": 1.8592375366568916, + "grad_norm": 0.5357095003128052, + "learning_rate": 1.9145466405740378e-05, + "log_odds_chosen": 14.458257675170898, + "log_odds_ratio": -0.0032759036403149366, + "logits/chosen": -1.5790374279022217, + "logits/rejected": -1.61802339553833, + "logps/chosen": -0.09949888288974762, + "logps/rejected": -12.152825355529785, + "loss": 0.2179, + "nll_loss": 0.22314327955245972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009949889034032822, + "rewards/margins": 1.2053327560424805, + "rewards/rejected": -1.2152825593948364, + "step": 951 + }, + { + "epoch": 1.8611925708699903, + "grad_norm": 0.5429408550262451, + "learning_rate": 1.9112850619699934e-05, + "log_odds_chosen": 11.18530559539795, + "log_odds_ratio": -0.00493386946618557, + "logits/chosen": -1.8646488189697266, + "logits/rejected": -1.396550178527832, + "logps/chosen": -0.14084330201148987, + "logps/rejected": -9.188051223754883, + "loss": 0.2158, + "nll_loss": 0.3485981225967407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014084331691265106, + "rewards/margins": 0.9047209620475769, + "rewards/rejected": -0.9188052415847778, + "step": 952 + }, + { + "epoch": 1.863147605083089, + "grad_norm": 0.5474455952644348, + "learning_rate": 1.908023483365949e-05, + "log_odds_chosen": 9.94972038269043, + "log_odds_ratio": -0.009965585544705391, + "logits/chosen": -1.794443130493164, + "logits/rejected": -1.5787110328674316, + "logps/chosen": -0.09569792449474335, + "logps/rejected": -7.496306419372559, + "loss": 0.2238, + "nll_loss": 0.24759238958358765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009569792076945305, + "rewards/margins": 0.7400608658790588, + "rewards/rejected": -0.7496306896209717, + "step": 953 + }, + { + "epoch": 1.8651026392961878, + "grad_norm": 0.5411081314086914, + "learning_rate": 1.9047619047619046e-05, + "log_odds_chosen": 12.634045600891113, + "log_odds_ratio": -0.020459303632378578, + "logits/chosen": -1.7321206331253052, + "logits/rejected": -1.4641461372375488, + "logps/chosen": -0.13272792100906372, + "logps/rejected": -10.521013259887695, + "loss": 0.216, + "nll_loss": 0.23140142858028412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013272791169583797, + "rewards/margins": 1.038828730583191, + "rewards/rejected": -1.0521013736724854, + "step": 954 + }, + { + "epoch": 1.8670576735092865, + "grad_norm": 0.5223149657249451, + "learning_rate": 1.9015003261578606e-05, + "log_odds_chosen": 9.792613983154297, + "log_odds_ratio": -0.011967544443905354, + "logits/chosen": -1.6768202781677246, + "logits/rejected": -1.4186099767684937, + "logps/chosen": -0.12682193517684937, + "logps/rejected": -7.684130668640137, + "loss": 0.218, + "nll_loss": 0.2249983549118042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.012682193890213966, + "rewards/margins": 0.755730926990509, + "rewards/rejected": -0.7684131860733032, + "step": 955 + }, + { + "epoch": 1.8690127077223853, + "grad_norm": 0.5185034275054932, + "learning_rate": 1.898238747553816e-05, + "log_odds_chosen": 10.28866958618164, + "log_odds_ratio": -0.002497772453352809, + "logits/chosen": -1.7027013301849365, + "logits/rejected": -1.5472897291183472, + "logps/chosen": -0.08221787214279175, + "logps/rejected": -7.780355930328369, + "loss": 0.2119, + "nll_loss": 0.17725317180156708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00822178740054369, + "rewards/margins": 0.7698138952255249, + "rewards/rejected": -0.7780356407165527, + "step": 956 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 0.5314845442771912, + "learning_rate": 1.8949771689497718e-05, + "log_odds_chosen": 11.416794776916504, + "log_odds_ratio": -0.02071095071732998, + "logits/chosen": -1.7178289890289307, + "logits/rejected": -1.5295381546020508, + "logps/chosen": -0.09605400264263153, + "logps/rejected": -9.062322616577148, + "loss": 0.2139, + "nll_loss": 0.17898564040660858, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009605399332940578, + "rewards/margins": 0.8966268301010132, + "rewards/rejected": -0.9062322974205017, + "step": 957 + }, + { + "epoch": 1.8729227761485827, + "grad_norm": 0.5291386842727661, + "learning_rate": 1.8917155903457274e-05, + "log_odds_chosen": 15.182748794555664, + "log_odds_ratio": -0.0017178925918415189, + "logits/chosen": -1.8185887336730957, + "logits/rejected": -1.5410813093185425, + "logps/chosen": -0.07464256137609482, + "logps/rejected": -12.530855178833008, + "loss": 0.2166, + "nll_loss": 0.19226503372192383, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007464256137609482, + "rewards/margins": 1.2456213235855103, + "rewards/rejected": -1.2530856132507324, + "step": 958 + }, + { + "epoch": 1.8748778103616814, + "grad_norm": 0.5289320945739746, + "learning_rate": 1.888454011741683e-05, + "log_odds_chosen": 14.274322509765625, + "log_odds_ratio": -0.0031090653501451015, + "logits/chosen": -1.8169835805892944, + "logits/rejected": -1.5542118549346924, + "logps/chosen": -0.13138195872306824, + "logps/rejected": -12.206039428710938, + "loss": 0.21, + "nll_loss": 0.24779893457889557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013138197362422943, + "rewards/margins": 1.2074657678604126, + "rewards/rejected": -1.2206039428710938, + "step": 959 + }, + { + "epoch": 1.8768328445747802, + "grad_norm": 0.5218808650970459, + "learning_rate": 1.8851924331376386e-05, + "log_odds_chosen": 11.194610595703125, + "log_odds_ratio": -0.005354664288461208, + "logits/chosen": -1.6467081308364868, + "logits/rejected": -1.4620169401168823, + "logps/chosen": -0.07855609059333801, + "logps/rejected": -8.605729103088379, + "loss": 0.2088, + "nll_loss": 0.16386385262012482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007855609059333801, + "rewards/margins": 0.8527172803878784, + "rewards/rejected": -0.860572874546051, + "step": 960 + }, + { + "epoch": 1.878787878787879, + "grad_norm": 0.5423116683959961, + "learning_rate": 1.8819308545335942e-05, + "log_odds_chosen": 18.995433807373047, + "log_odds_ratio": -0.0009416565299034119, + "logits/chosen": -1.6257317066192627, + "logits/rejected": -1.4771126508712769, + "logps/chosen": -0.11390122771263123, + "logps/rejected": -16.376253128051758, + "loss": 0.2125, + "nll_loss": 0.3057602047920227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011390121653676033, + "rewards/margins": 1.6262352466583252, + "rewards/rejected": -1.6376253366470337, + "step": 961 + }, + { + "epoch": 1.8807429130009776, + "grad_norm": 0.5303785800933838, + "learning_rate": 1.8786692759295498e-05, + "log_odds_chosen": 13.896507263183594, + "log_odds_ratio": -0.010320382192730904, + "logits/chosen": -1.617781639099121, + "logits/rejected": -1.4047456979751587, + "logps/chosen": -0.1247473657131195, + "logps/rejected": -11.88205623626709, + "loss": 0.2142, + "nll_loss": 0.22898900508880615, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01247473806142807, + "rewards/margins": 1.1757309436798096, + "rewards/rejected": -1.1882057189941406, + "step": 962 + }, + { + "epoch": 1.8826979472140764, + "grad_norm": 0.5315988063812256, + "learning_rate": 1.8754076973255054e-05, + "log_odds_chosen": 13.564220428466797, + "log_odds_ratio": -0.0033048405312001705, + "logits/chosen": -1.6110117435455322, + "logits/rejected": -1.5083794593811035, + "logps/chosen": -0.09760623425245285, + "logps/rejected": -11.082769393920898, + "loss": 0.2155, + "nll_loss": 0.1673606038093567, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009760623797774315, + "rewards/margins": 1.0985163450241089, + "rewards/rejected": -1.1082769632339478, + "step": 963 + }, + { + "epoch": 1.884652981427175, + "grad_norm": 0.508407711982727, + "learning_rate": 1.872146118721461e-05, + "log_odds_chosen": 12.734369277954102, + "log_odds_ratio": -0.005565701052546501, + "logits/chosen": -1.6192638874053955, + "logits/rejected": -1.6282272338867188, + "logps/chosen": -0.0939040407538414, + "logps/rejected": -10.281912803649902, + "loss": 0.2052, + "nll_loss": 0.19543887674808502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00939040444791317, + "rewards/margins": 1.0188008546829224, + "rewards/rejected": -1.0281912088394165, + "step": 964 + }, + { + "epoch": 1.8866080156402738, + "grad_norm": 0.5211138129234314, + "learning_rate": 1.868884540117417e-05, + "log_odds_chosen": 12.481328964233398, + "log_odds_ratio": -0.004240207839757204, + "logits/chosen": -1.7398138046264648, + "logits/rejected": -1.6544268131256104, + "logps/chosen": -0.09387345612049103, + "logps/rejected": -10.165155410766602, + "loss": 0.2038, + "nll_loss": 0.2004334181547165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009387345984578133, + "rewards/margins": 1.0071282386779785, + "rewards/rejected": -1.0165156126022339, + "step": 965 + }, + { + "epoch": 1.8885630498533725, + "grad_norm": 0.5433597564697266, + "learning_rate": 1.8656229615133726e-05, + "log_odds_chosen": 10.694588661193848, + "log_odds_ratio": -0.003042093710973859, + "logits/chosen": -1.6780381202697754, + "logits/rejected": -1.639153003692627, + "logps/chosen": -0.10262198746204376, + "logps/rejected": -8.381757736206055, + "loss": 0.2132, + "nll_loss": 0.18871170282363892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010262197814881802, + "rewards/margins": 0.8279136419296265, + "rewards/rejected": -0.8381757736206055, + "step": 966 + }, + { + "epoch": 1.8905180840664713, + "grad_norm": 0.5212023854255676, + "learning_rate": 1.8623613829093282e-05, + "log_odds_chosen": 12.022539138793945, + "log_odds_ratio": -0.006545474287122488, + "logits/chosen": -1.8318220376968384, + "logits/rejected": -1.6401795148849487, + "logps/chosen": -0.10994569957256317, + "logps/rejected": -9.755353927612305, + "loss": 0.2077, + "nll_loss": 0.21007445454597473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010994568467140198, + "rewards/margins": 0.9645407199859619, + "rewards/rejected": -0.9755353331565857, + "step": 967 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.5012785792350769, + "learning_rate": 1.8590998043052838e-05, + "log_odds_chosen": 11.87542724609375, + "log_odds_ratio": -0.009576595388352871, + "logits/chosen": -1.5441622734069824, + "logits/rejected": -1.4290227890014648, + "logps/chosen": -0.13661938905715942, + "logps/rejected": -9.735187530517578, + "loss": 0.2016, + "nll_loss": 0.2499660849571228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.013661938719451427, + "rewards/margins": 0.9598568677902222, + "rewards/rejected": -0.9735187292098999, + "step": 968 + }, + { + "epoch": 1.8944281524926687, + "grad_norm": 0.5350011587142944, + "learning_rate": 1.8558382257012394e-05, + "log_odds_chosen": 14.679075241088867, + "log_odds_ratio": -0.003598837647587061, + "logits/chosen": -1.754746913909912, + "logits/rejected": -1.324758529663086, + "logps/chosen": -0.11385944485664368, + "logps/rejected": -12.35308837890625, + "loss": 0.2093, + "nll_loss": 0.20670071244239807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011385943740606308, + "rewards/margins": 1.2239229679107666, + "rewards/rejected": -1.2353088855743408, + "step": 969 + }, + { + "epoch": 1.8963831867057674, + "grad_norm": 0.520867645740509, + "learning_rate": 1.852576647097195e-05, + "log_odds_chosen": 13.637016296386719, + "log_odds_ratio": -0.002759072696790099, + "logits/chosen": -1.698967695236206, + "logits/rejected": -1.5496937036514282, + "logps/chosen": -0.06275978684425354, + "logps/rejected": -10.798617362976074, + "loss": 0.2023, + "nll_loss": 0.16024792194366455, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006275978870689869, + "rewards/margins": 1.0735857486724854, + "rewards/rejected": -1.0798616409301758, + "step": 970 + }, + { + "epoch": 1.8983382209188662, + "grad_norm": 0.5076127052307129, + "learning_rate": 1.8493150684931506e-05, + "log_odds_chosen": 19.734161376953125, + "log_odds_ratio": -0.0007472769939340651, + "logits/chosen": -1.7369909286499023, + "logits/rejected": -1.2602641582489014, + "logps/chosen": -0.0961739718914032, + "logps/rejected": -17.1567325592041, + "loss": 0.1972, + "nll_loss": 0.26420900225639343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00961739756166935, + "rewards/margins": 1.706055998802185, + "rewards/rejected": -1.7156733274459839, + "step": 971 + }, + { + "epoch": 1.900293255131965, + "grad_norm": 0.5097871422767639, + "learning_rate": 1.8460534898891062e-05, + "log_odds_chosen": 20.06525230407715, + "log_odds_ratio": -0.0006770020117983222, + "logits/chosen": -1.5859507322311401, + "logits/rejected": -1.3776848316192627, + "logps/chosen": -0.08597970008850098, + "logps/rejected": -17.23325538635254, + "loss": 0.1999, + "nll_loss": 0.18370234966278076, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008597970940172672, + "rewards/margins": 1.7147274017333984, + "rewards/rejected": -1.7233253717422485, + "step": 972 + }, + { + "epoch": 1.9022482893450636, + "grad_norm": 0.5162299275398254, + "learning_rate": 1.842791911285062e-05, + "log_odds_chosen": 21.147254943847656, + "log_odds_ratio": -0.0027495415415614843, + "logits/chosen": -1.6880927085876465, + "logits/rejected": -1.3161131143569946, + "logps/chosen": -0.07547241449356079, + "logps/rejected": -18.339529037475586, + "loss": 0.1997, + "nll_loss": 0.1364988535642624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007547242101281881, + "rewards/margins": 1.8264057636260986, + "rewards/rejected": -1.8339529037475586, + "step": 973 + }, + { + "epoch": 1.9042033235581624, + "grad_norm": 0.525593101978302, + "learning_rate": 1.8395303326810175e-05, + "log_odds_chosen": 10.63464069366455, + "log_odds_ratio": -0.00595649890601635, + "logits/chosen": -1.6921672821044922, + "logits/rejected": -1.4376354217529297, + "logps/chosen": -0.07808297127485275, + "logps/rejected": -7.898469924926758, + "loss": 0.2007, + "nll_loss": 0.18767626583576202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007808297872543335, + "rewards/margins": 0.782038688659668, + "rewards/rejected": -0.7898470163345337, + "step": 974 + }, + { + "epoch": 1.906158357771261, + "grad_norm": 0.5065926313400269, + "learning_rate": 1.8362687540769734e-05, + "log_odds_chosen": 11.372153282165527, + "log_odds_ratio": -0.005850179120898247, + "logits/chosen": -1.56984281539917, + "logits/rejected": -1.4184887409210205, + "logps/chosen": -0.08935675024986267, + "logps/rejected": -8.988911628723145, + "loss": 0.2027, + "nll_loss": 0.18849539756774902, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008935675024986267, + "rewards/margins": 0.8899555206298828, + "rewards/rejected": -0.8988910913467407, + "step": 975 + }, + { + "epoch": 1.9081133919843598, + "grad_norm": 0.5153484344482422, + "learning_rate": 1.833007175472929e-05, + "log_odds_chosen": 15.811614990234375, + "log_odds_ratio": -0.0026309506502002478, + "logits/chosen": -1.772322654724121, + "logits/rejected": -1.4128763675689697, + "logps/chosen": -0.08652554452419281, + "logps/rejected": -13.179115295410156, + "loss": 0.2024, + "nll_loss": 0.1815972626209259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00865255482494831, + "rewards/margins": 1.3092589378356934, + "rewards/rejected": -1.3179115056991577, + "step": 976 + }, + { + "epoch": 1.9100684261974585, + "grad_norm": 0.5084092617034912, + "learning_rate": 1.8297455968688846e-05, + "log_odds_chosen": 17.7987117767334, + "log_odds_ratio": -0.0002425488637527451, + "logits/chosen": -1.488189458847046, + "logits/rejected": -1.3071563243865967, + "logps/chosen": -0.08367197215557098, + "logps/rejected": -15.263147354125977, + "loss": 0.1937, + "nll_loss": 0.17510464787483215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008367197588086128, + "rewards/margins": 1.5179476737976074, + "rewards/rejected": -1.5263147354125977, + "step": 977 + }, + { + "epoch": 1.9120234604105573, + "grad_norm": 0.51636803150177, + "learning_rate": 1.8264840182648402e-05, + "log_odds_chosen": 13.810670852661133, + "log_odds_ratio": -0.0029793474823236465, + "logits/chosen": -1.682920217514038, + "logits/rejected": -1.6185922622680664, + "logps/chosen": -0.06452934443950653, + "logps/rejected": -11.013876914978027, + "loss": 0.2021, + "nll_loss": 0.1489814817905426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006452934350818396, + "rewards/margins": 1.0949348211288452, + "rewards/rejected": -1.1013877391815186, + "step": 978 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.5259555578231812, + "learning_rate": 1.823222439660796e-05, + "log_odds_chosen": 10.808871269226074, + "log_odds_ratio": -0.0013677317183464766, + "logits/chosen": -1.7702407836914062, + "logits/rejected": -1.5463579893112183, + "logps/chosen": -0.06742484867572784, + "logps/rejected": -8.04649543762207, + "loss": 0.1967, + "nll_loss": 0.20509254932403564, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006742484867572784, + "rewards/margins": 0.7979071140289307, + "rewards/rejected": -0.8046495914459229, + "step": 979 + }, + { + "epoch": 1.9159335288367547, + "grad_norm": 0.48379892110824585, + "learning_rate": 1.8199608610567515e-05, + "log_odds_chosen": 15.254210472106934, + "log_odds_ratio": -0.007108758669346571, + "logits/chosen": -1.727550745010376, + "logits/rejected": -1.500938057899475, + "logps/chosen": -0.09204863011837006, + "logps/rejected": -12.801860809326172, + "loss": 0.187, + "nll_loss": 0.16221971809864044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009204862639307976, + "rewards/margins": 1.2709813117980957, + "rewards/rejected": -1.2801861763000488, + "step": 980 + }, + { + "epoch": 1.9178885630498534, + "grad_norm": 0.501349687576294, + "learning_rate": 1.816699282452707e-05, + "log_odds_chosen": 11.22997760772705, + "log_odds_ratio": -0.005962572991847992, + "logits/chosen": -1.7543914318084717, + "logits/rejected": -1.5261790752410889, + "logps/chosen": -0.10508587956428528, + "logps/rejected": -8.949856758117676, + "loss": 0.1936, + "nll_loss": 0.22453844547271729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010508587583899498, + "rewards/margins": 0.8844771385192871, + "rewards/rejected": -0.8949857354164124, + "step": 981 + }, + { + "epoch": 1.9198435972629522, + "grad_norm": 0.4884853661060333, + "learning_rate": 1.8134377038486627e-05, + "log_odds_chosen": 14.311039924621582, + "log_odds_ratio": -0.0018215079326182604, + "logits/chosen": -1.5738770961761475, + "logits/rejected": -1.547934889793396, + "logps/chosen": -0.1020810455083847, + "logps/rejected": -12.014036178588867, + "loss": 0.1908, + "nll_loss": 0.17760112881660461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010208104737102985, + "rewards/margins": 1.1911954879760742, + "rewards/rejected": -1.2014036178588867, + "step": 982 + }, + { + "epoch": 1.921798631476051, + "grad_norm": 0.5078781247138977, + "learning_rate": 1.8101761252446183e-05, + "log_odds_chosen": 10.981912612915039, + "log_odds_ratio": -0.08073177188634872, + "logits/chosen": -1.6339105367660522, + "logits/rejected": -1.4005450010299683, + "logps/chosen": -0.12539860606193542, + "logps/rejected": -8.830392837524414, + "loss": 0.1926, + "nll_loss": 0.2083953469991684, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.012539861723780632, + "rewards/margins": 0.8704994916915894, + "rewards/rejected": -0.8830393552780151, + "step": 983 + }, + { + "epoch": 1.9237536656891496, + "grad_norm": 0.5118982791900635, + "learning_rate": 1.806914546640574e-05, + "log_odds_chosen": 10.608386993408203, + "log_odds_ratio": -0.00986986793577671, + "logits/chosen": -1.6540757417678833, + "logits/rejected": -1.712198257446289, + "logps/chosen": -0.0947684496641159, + "logps/rejected": -8.087210655212402, + "loss": 0.1945, + "nll_loss": 0.19767296314239502, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009476845152676105, + "rewards/margins": 0.799244225025177, + "rewards/rejected": -0.8087210655212402, + "step": 984 + }, + { + "epoch": 1.9257086999022484, + "grad_norm": 0.48460832238197327, + "learning_rate": 1.80365296803653e-05, + "log_odds_chosen": 10.47659683227539, + "log_odds_ratio": -0.005395964253693819, + "logits/chosen": -1.726790428161621, + "logits/rejected": -1.770263910293579, + "logps/chosen": -0.07246716320514679, + "logps/rejected": -7.801900863647461, + "loss": 0.186, + "nll_loss": 0.19834424555301666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007246716879308224, + "rewards/margins": 0.772943377494812, + "rewards/rejected": -0.7801901698112488, + "step": 985 + }, + { + "epoch": 1.927663734115347, + "grad_norm": 0.5013875961303711, + "learning_rate": 1.8003913894324854e-05, + "log_odds_chosen": 17.824657440185547, + "log_odds_ratio": -0.0012243939563632011, + "logits/chosen": -1.687028408050537, + "logits/rejected": -1.4915958642959595, + "logps/chosen": -0.08398881554603577, + "logps/rejected": -15.236303329467773, + "loss": 0.1912, + "nll_loss": 0.21258574724197388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008398882113397121, + "rewards/margins": 1.5152316093444824, + "rewards/rejected": -1.5236303806304932, + "step": 986 + }, + { + "epoch": 1.9296187683284458, + "grad_norm": 0.5143495202064514, + "learning_rate": 1.797129810828441e-05, + "log_odds_chosen": 8.193496704101562, + "log_odds_ratio": -0.008338719606399536, + "logits/chosen": -1.6446020603179932, + "logits/rejected": -1.4776092767715454, + "logps/chosen": -0.09046816825866699, + "logps/rejected": -5.7041168212890625, + "loss": 0.1905, + "nll_loss": 0.16259394586086273, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009046817198395729, + "rewards/margins": 0.5613648891448975, + "rewards/rejected": -0.5704116821289062, + "step": 987 + }, + { + "epoch": 1.9315738025415445, + "grad_norm": 0.49478569626808167, + "learning_rate": 1.7938682322243967e-05, + "log_odds_chosen": 10.458242416381836, + "log_odds_ratio": -0.01167108491063118, + "logits/chosen": -1.670329213142395, + "logits/rejected": -1.4333823919296265, + "logps/chosen": -0.09660440683364868, + "logps/rejected": -8.071928024291992, + "loss": 0.1882, + "nll_loss": 0.17400455474853516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009660441428422928, + "rewards/margins": 0.7975324392318726, + "rewards/rejected": -0.8071928024291992, + "step": 988 + }, + { + "epoch": 1.9335288367546433, + "grad_norm": 0.49254390597343445, + "learning_rate": 1.7906066536203523e-05, + "log_odds_chosen": 12.54660701751709, + "log_odds_ratio": -0.008032728917896748, + "logits/chosen": -1.6871862411499023, + "logits/rejected": -1.5788285732269287, + "logps/chosen": -0.10222392529249191, + "logps/rejected": -10.186248779296875, + "loss": 0.1873, + "nll_loss": 0.19412972033023834, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.010222392156720161, + "rewards/margins": 1.0084025859832764, + "rewards/rejected": -1.018625020980835, + "step": 989 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.4858187735080719, + "learning_rate": 1.787345075016308e-05, + "log_odds_chosen": 13.247310638427734, + "log_odds_ratio": -0.0018683091038838029, + "logits/chosen": -1.5636998414993286, + "logits/rejected": -1.241889476776123, + "logps/chosen": -0.06708269566297531, + "logps/rejected": -10.347076416015625, + "loss": 0.1861, + "nll_loss": 0.1722375452518463, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006708269007503986, + "rewards/margins": 1.0279994010925293, + "rewards/rejected": -1.0347076654434204, + "step": 990 + }, + { + "epoch": 1.9374389051808407, + "grad_norm": 0.4883933365345001, + "learning_rate": 1.7840834964122635e-05, + "log_odds_chosen": 12.05422592163086, + "log_odds_ratio": -0.002512829378247261, + "logits/chosen": -1.6841673851013184, + "logits/rejected": -1.550007700920105, + "logps/chosen": -0.0745367780327797, + "logps/rejected": -9.30723762512207, + "loss": 0.1855, + "nll_loss": 0.14559701085090637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007453677710145712, + "rewards/margins": 0.9232701063156128, + "rewards/rejected": -0.9307238459587097, + "step": 991 + }, + { + "epoch": 1.9393939393939394, + "grad_norm": 0.4624250531196594, + "learning_rate": 1.780821917808219e-05, + "log_odds_chosen": 19.58382225036621, + "log_odds_ratio": -0.011095216497778893, + "logits/chosen": -1.5768104791641235, + "logits/rejected": -1.2655513286590576, + "logps/chosen": -0.20935803651809692, + "logps/rejected": -17.606733322143555, + "loss": 0.1822, + "nll_loss": 0.35702621936798096, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.020935803651809692, + "rewards/margins": 1.7397375106811523, + "rewards/rejected": -1.7606732845306396, + "step": 992 + }, + { + "epoch": 1.9413489736070382, + "grad_norm": 0.4919642508029938, + "learning_rate": 1.7775603392041747e-05, + "log_odds_chosen": 17.09747886657715, + "log_odds_ratio": -0.0006131778354756534, + "logits/chosen": -1.8074331283569336, + "logits/rejected": -1.544524908065796, + "logps/chosen": -0.07670239359140396, + "logps/rejected": -14.427042007446289, + "loss": 0.1903, + "nll_loss": 0.17354464530944824, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007670239545404911, + "rewards/margins": 1.4350340366363525, + "rewards/rejected": -1.442704200744629, + "step": 993 + }, + { + "epoch": 1.943304007820137, + "grad_norm": 0.47353386878967285, + "learning_rate": 1.7742987606001303e-05, + "log_odds_chosen": 10.61418628692627, + "log_odds_ratio": -0.005836042109876871, + "logits/chosen": -1.5043628215789795, + "logits/rejected": -1.5706040859222412, + "logps/chosen": -0.08174462616443634, + "logps/rejected": -7.984548568725586, + "loss": 0.1828, + "nll_loss": 0.19292601943016052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008174463175237179, + "rewards/margins": 0.7902804017066956, + "rewards/rejected": -0.7984548807144165, + "step": 994 + }, + { + "epoch": 1.9452590420332356, + "grad_norm": 0.515110969543457, + "learning_rate": 1.7710371819960863e-05, + "log_odds_chosen": 15.401674270629883, + "log_odds_ratio": -0.0032722388859838247, + "logits/chosen": -1.7275961637496948, + "logits/rejected": -1.5735251903533936, + "logps/chosen": -0.05953095853328705, + "logps/rejected": -12.6295166015625, + "loss": 0.194, + "nll_loss": 0.1357138305902481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005953095853328705, + "rewards/margins": 1.2569985389709473, + "rewards/rejected": -1.2629516124725342, + "step": 995 + }, + { + "epoch": 1.9472140762463344, + "grad_norm": 0.5069015622138977, + "learning_rate": 1.767775603392042e-05, + "log_odds_chosen": 15.16097640991211, + "log_odds_ratio": -0.0018458880949765444, + "logits/chosen": -1.710456371307373, + "logits/rejected": -1.2926723957061768, + "logps/chosen": -0.07836496084928513, + "logps/rejected": -12.42801570892334, + "loss": 0.1838, + "nll_loss": 0.14545632898807526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007836495526134968, + "rewards/margins": 1.2349650859832764, + "rewards/rejected": -1.2428016662597656, + "step": 996 + }, + { + "epoch": 1.949169110459433, + "grad_norm": 0.47342154383659363, + "learning_rate": 1.7645140247879975e-05, + "log_odds_chosen": 14.024782180786133, + "log_odds_ratio": -0.004466162528842688, + "logits/chosen": -1.9000165462493896, + "logits/rejected": -1.530597448348999, + "logps/chosen": -0.08273196220397949, + "logps/rejected": -11.385818481445312, + "loss": 0.1806, + "nll_loss": 0.1819624900817871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008273196406662464, + "rewards/margins": 1.1303086280822754, + "rewards/rejected": -1.1385819911956787, + "step": 997 + }, + { + "epoch": 1.9511241446725318, + "grad_norm": 0.4814984202384949, + "learning_rate": 1.761252446183953e-05, + "log_odds_chosen": 17.498266220092773, + "log_odds_ratio": -6.137495074653998e-05, + "logits/chosen": -1.722541332244873, + "logits/rejected": -1.3508663177490234, + "logps/chosen": -0.09511016309261322, + "logps/rejected": -15.04718017578125, + "loss": 0.1813, + "nll_loss": 0.17818129062652588, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.009511017240583897, + "rewards/margins": 1.4952069520950317, + "rewards/rejected": -1.5047180652618408, + "step": 998 + }, + { + "epoch": 1.9530791788856305, + "grad_norm": 0.48567551374435425, + "learning_rate": 1.7579908675799087e-05, + "log_odds_chosen": 17.958662033081055, + "log_odds_ratio": -0.000862654997035861, + "logits/chosen": -1.6591370105743408, + "logits/rejected": -1.3592464923858643, + "logps/chosen": -0.06638650596141815, + "logps/rejected": -15.179159164428711, + "loss": 0.1856, + "nll_loss": 0.15598665177822113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00663865078240633, + "rewards/margins": 1.5112773180007935, + "rewards/rejected": -1.517915964126587, + "step": 999 + }, + { + "epoch": 1.9550342130987293, + "grad_norm": 0.47491875290870667, + "learning_rate": 1.7547292889758643e-05, + "log_odds_chosen": 10.575210571289062, + "log_odds_ratio": -0.0047175083309412, + "logits/chosen": -1.6772103309631348, + "logits/rejected": -1.4901946783065796, + "logps/chosen": -0.16069205105304718, + "logps/rejected": -8.356886863708496, + "loss": 0.1829, + "nll_loss": 0.2033214569091797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016069205477833748, + "rewards/margins": 0.8196194171905518, + "rewards/rejected": -0.8356887102127075, + "step": 1000 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.5007199048995972, + "learning_rate": 1.75146771037182e-05, + "log_odds_chosen": 13.53982925415039, + "log_odds_ratio": -0.0012861751019954681, + "logits/chosen": -1.803462266921997, + "logits/rejected": -1.5211877822875977, + "logps/chosen": -0.07321825623512268, + "logps/rejected": -10.561752319335938, + "loss": 0.1819, + "nll_loss": 0.15853643417358398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007321825250983238, + "rewards/margins": 1.0488535165786743, + "rewards/rejected": -1.0561753511428833, + "step": 1001 + }, + { + "epoch": 1.9589442815249267, + "grad_norm": 0.47284549474716187, + "learning_rate": 1.7482061317677755e-05, + "log_odds_chosen": 11.804466247558594, + "log_odds_ratio": -0.0033986561466008425, + "logits/chosen": -1.7320098876953125, + "logits/rejected": -1.5481688976287842, + "logps/chosen": -0.05814126133918762, + "logps/rejected": -8.718782424926758, + "loss": 0.1794, + "nll_loss": 0.18961867690086365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005814125761389732, + "rewards/margins": 0.866064190864563, + "rewards/rejected": -0.8718782663345337, + "step": 1002 + }, + { + "epoch": 1.9608993157380255, + "grad_norm": 0.4747210443019867, + "learning_rate": 1.744944553163731e-05, + "log_odds_chosen": 18.71762466430664, + "log_odds_ratio": -0.00034941593185067177, + "logits/chosen": -1.6778430938720703, + "logits/rejected": -1.4672201871871948, + "logps/chosen": -0.06283006817102432, + "logps/rejected": -15.802351951599121, + "loss": 0.1793, + "nll_loss": 0.17565126717090607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006283007562160492, + "rewards/margins": 1.573952317237854, + "rewards/rejected": -1.580235242843628, + "step": 1003 + }, + { + "epoch": 1.9628543499511242, + "grad_norm": 0.490570604801178, + "learning_rate": 1.7416829745596867e-05, + "log_odds_chosen": 13.87380599975586, + "log_odds_ratio": -0.0022793675307184458, + "logits/chosen": -1.5163145065307617, + "logits/rejected": -1.5859808921813965, + "logps/chosen": -0.07041376829147339, + "logps/rejected": -10.92829704284668, + "loss": 0.1793, + "nll_loss": 0.154494047164917, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007041377481073141, + "rewards/margins": 1.0857882499694824, + "rewards/rejected": -1.092829704284668, + "step": 1004 + }, + { + "epoch": 1.964809384164223, + "grad_norm": 0.47680002450942993, + "learning_rate": 1.7384213959556427e-05, + "log_odds_chosen": 7.258768081665039, + "log_odds_ratio": -0.00480604125186801, + "logits/chosen": -1.8266403675079346, + "logits/rejected": -1.5872018337249756, + "logps/chosen": -0.05676499009132385, + "logps/rejected": -4.365412712097168, + "loss": 0.1742, + "nll_loss": 0.1361757516860962, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005676499102264643, + "rewards/margins": 0.43086475133895874, + "rewards/rejected": -0.43654125928878784, + "step": 1005 + }, + { + "epoch": 1.9667644183773216, + "grad_norm": 0.4797651469707489, + "learning_rate": 1.7351598173515983e-05, + "log_odds_chosen": 12.452140808105469, + "log_odds_ratio": -0.006218497641384602, + "logits/chosen": -1.8221962451934814, + "logits/rejected": -1.4934077262878418, + "logps/chosen": -0.08319053053855896, + "logps/rejected": -9.870179176330566, + "loss": 0.1779, + "nll_loss": 0.17097346484661102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008319051936268806, + "rewards/margins": 0.9786988496780396, + "rewards/rejected": -0.9870178699493408, + "step": 1006 + }, + { + "epoch": 1.9687194525904204, + "grad_norm": 0.4610869586467743, + "learning_rate": 1.731898238747554e-05, + "log_odds_chosen": 16.667354583740234, + "log_odds_ratio": -0.0011464981362223625, + "logits/chosen": -1.6847001314163208, + "logits/rejected": -1.4607248306274414, + "logps/chosen": -0.052391715347766876, + "logps/rejected": -13.646353721618652, + "loss": 0.1728, + "nll_loss": 0.14865797758102417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00523917144164443, + "rewards/margins": 1.35939621925354, + "rewards/rejected": -1.3646354675292969, + "step": 1007 + }, + { + "epoch": 1.970674486803519, + "grad_norm": 0.47004663944244385, + "learning_rate": 1.7286366601435095e-05, + "log_odds_chosen": 10.20899772644043, + "log_odds_ratio": -0.004660847131162882, + "logits/chosen": -1.6344929933547974, + "logits/rejected": -1.5127387046813965, + "logps/chosen": -0.07543870806694031, + "logps/rejected": -7.622878074645996, + "loss": 0.177, + "nll_loss": 0.17975713312625885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007543870247900486, + "rewards/margins": 0.7547439336776733, + "rewards/rejected": -0.7622878551483154, + "step": 1008 + }, + { + "epoch": 1.9726295210166178, + "grad_norm": 0.4693288505077362, + "learning_rate": 1.725375081539465e-05, + "log_odds_chosen": 14.211265563964844, + "log_odds_ratio": -0.004400248173624277, + "logits/chosen": -1.8530360460281372, + "logits/rejected": -1.5400259494781494, + "logps/chosen": -0.06089823693037033, + "logps/rejected": -11.42688274383545, + "loss": 0.1723, + "nll_loss": 0.1442534625530243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006089824251830578, + "rewards/margins": 1.1365985870361328, + "rewards/rejected": -1.142688274383545, + "step": 1009 + }, + { + "epoch": 1.9745845552297165, + "grad_norm": 0.4765762984752655, + "learning_rate": 1.7221135029354207e-05, + "log_odds_chosen": 16.740489959716797, + "log_odds_ratio": -0.005447946023195982, + "logits/chosen": -1.6116132736206055, + "logits/rejected": -1.4202674627304077, + "logps/chosen": -0.08106441795825958, + "logps/rejected": -14.169790267944336, + "loss": 0.1741, + "nll_loss": 0.18983358144760132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008106442168354988, + "rewards/margins": 1.4088726043701172, + "rewards/rejected": -1.4169789552688599, + "step": 1010 + }, + { + "epoch": 1.9765395894428153, + "grad_norm": 0.4707831144332886, + "learning_rate": 1.7188519243313763e-05, + "log_odds_chosen": 11.813522338867188, + "log_odds_ratio": -0.0010746917687356472, + "logits/chosen": -1.7328612804412842, + "logits/rejected": -1.4170383214950562, + "logps/chosen": -0.07584033906459808, + "logps/rejected": -9.016257286071777, + "loss": 0.1737, + "nll_loss": 0.1521928906440735, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0075840335339307785, + "rewards/margins": 0.8940417766571045, + "rewards/rejected": -0.9016257524490356, + "step": 1011 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.47038188576698303, + "learning_rate": 1.715590345727332e-05, + "log_odds_chosen": 12.677786827087402, + "log_odds_ratio": -0.0033749567810446024, + "logits/chosen": -1.6626577377319336, + "logits/rejected": -1.2649115324020386, + "logps/chosen": -0.06428151577711105, + "logps/rejected": -9.810264587402344, + "loss": 0.1716, + "nll_loss": 0.19011980295181274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00642815139144659, + "rewards/margins": 0.9745982885360718, + "rewards/rejected": -0.9810264110565186, + "step": 1012 + }, + { + "epoch": 1.9804496578690127, + "grad_norm": 0.4701940715312958, + "learning_rate": 1.7123287671232875e-05, + "log_odds_chosen": 14.256345748901367, + "log_odds_ratio": -0.001447357819415629, + "logits/chosen": -1.7321281433105469, + "logits/rejected": -1.548928141593933, + "logps/chosen": -0.056652627885341644, + "logps/rejected": -11.393918991088867, + "loss": 0.173, + "nll_loss": 0.13331666588783264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005665263161063194, + "rewards/margins": 1.1337265968322754, + "rewards/rejected": -1.1393918991088867, + "step": 1013 + }, + { + "epoch": 1.9824046920821115, + "grad_norm": 0.47756630182266235, + "learning_rate": 1.709067188519243e-05, + "log_odds_chosen": 11.961751937866211, + "log_odds_ratio": -0.003919549752026796, + "logits/chosen": -1.6975386142730713, + "logits/rejected": -1.3096014261245728, + "logps/chosen": -0.0880308598279953, + "logps/rejected": -9.416887283325195, + "loss": 0.1741, + "nll_loss": 0.18372856080532074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00880308635532856, + "rewards/margins": 0.9328856468200684, + "rewards/rejected": -0.9416887760162354, + "step": 1014 + }, + { + "epoch": 1.9843597262952102, + "grad_norm": 0.4936063587665558, + "learning_rate": 1.705805609915199e-05, + "log_odds_chosen": 7.070412635803223, + "log_odds_ratio": -0.00505196675658226, + "logits/chosen": -1.5006966590881348, + "logits/rejected": -1.5212724208831787, + "logps/chosen": -0.0843782126903534, + "logps/rejected": -4.423410415649414, + "loss": 0.1809, + "nll_loss": 0.20195096731185913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00843781977891922, + "rewards/margins": 0.4339032769203186, + "rewards/rejected": -0.44234102964401245, + "step": 1015 + }, + { + "epoch": 1.986314760508309, + "grad_norm": 0.47297972440719604, + "learning_rate": 1.7025440313111547e-05, + "log_odds_chosen": 12.93967056274414, + "log_odds_ratio": -0.0066276537254452705, + "logits/chosen": -1.5733680725097656, + "logits/rejected": -1.5305302143096924, + "logps/chosen": -0.08055106550455093, + "logps/rejected": -10.361799240112305, + "loss": 0.1718, + "nll_loss": 0.19018521904945374, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008055107668042183, + "rewards/margins": 1.0281248092651367, + "rewards/rejected": -1.036180019378662, + "step": 1016 + }, + { + "epoch": 1.9882697947214076, + "grad_norm": 0.4715351164340973, + "learning_rate": 1.6992824527071103e-05, + "log_odds_chosen": 16.30682945251465, + "log_odds_ratio": -0.0035198379773646593, + "logits/chosen": -1.549741268157959, + "logits/rejected": -1.298494577407837, + "logps/chosen": -0.054167211055755615, + "logps/rejected": -13.393780708312988, + "loss": 0.1734, + "nll_loss": 0.1464179903268814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0054167211055755615, + "rewards/margins": 1.3339612483978271, + "rewards/rejected": -1.3393781185150146, + "step": 1017 + }, + { + "epoch": 1.9902248289345064, + "grad_norm": 0.4520447850227356, + "learning_rate": 1.696020874103066e-05, + "log_odds_chosen": 16.268028259277344, + "log_odds_ratio": -0.0038313032127916813, + "logits/chosen": -1.827608585357666, + "logits/rejected": -1.5700699090957642, + "logps/chosen": -0.060959771275520325, + "logps/rejected": -13.477943420410156, + "loss": 0.1672, + "nll_loss": 0.142561137676239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006095977500081062, + "rewards/margins": 1.341698408126831, + "rewards/rejected": -1.3477944135665894, + "step": 1018 + }, + { + "epoch": 1.992179863147605, + "grad_norm": 0.4838990867137909, + "learning_rate": 1.6927592954990215e-05, + "log_odds_chosen": 14.4407958984375, + "log_odds_ratio": -0.0021224250085651875, + "logits/chosen": -1.6597658395767212, + "logits/rejected": -1.538135290145874, + "logps/chosen": -0.06333006918430328, + "logps/rejected": -11.641084671020508, + "loss": 0.1733, + "nll_loss": 0.15810123085975647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006333007011562586, + "rewards/margins": 1.1577754020690918, + "rewards/rejected": -1.1641085147857666, + "step": 1019 + }, + { + "epoch": 1.9941348973607038, + "grad_norm": 0.4875689744949341, + "learning_rate": 1.689497716894977e-05, + "log_odds_chosen": 10.93487548828125, + "log_odds_ratio": -0.019431423395872116, + "logits/chosen": -1.7854640483856201, + "logits/rejected": -1.5719900131225586, + "logps/chosen": -0.07857014238834381, + "logps/rejected": -8.3015775680542, + "loss": 0.1715, + "nll_loss": 0.2171262800693512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.007857014425098896, + "rewards/margins": 0.8223007917404175, + "rewards/rejected": -0.8301577568054199, + "step": 1020 + }, + { + "epoch": 1.9960899315738025, + "grad_norm": 0.47365128993988037, + "learning_rate": 1.6862361382909328e-05, + "log_odds_chosen": 15.022473335266113, + "log_odds_ratio": -0.0019368290668353438, + "logits/chosen": -1.71101975440979, + "logits/rejected": -1.5131220817565918, + "logps/chosen": -0.05431981384754181, + "logps/rejected": -12.052108764648438, + "loss": 0.1698, + "nll_loss": 0.17422914505004883, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005431981757283211, + "rewards/margins": 1.1997790336608887, + "rewards/rejected": -1.2052109241485596, + "step": 1021 + }, + { + "epoch": 1.9980449657869013, + "grad_norm": 0.4644717574119568, + "learning_rate": 1.6829745596868884e-05, + "log_odds_chosen": 17.74345588684082, + "log_odds_ratio": -0.0013643424026668072, + "logits/chosen": -1.7256321907043457, + "logits/rejected": -1.2642879486083984, + "logps/chosen": -0.051039502024650574, + "logps/rejected": -14.532419204711914, + "loss": 0.1656, + "nll_loss": 0.1637251079082489, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005103949923068285, + "rewards/margins": 1.4481379985809326, + "rewards/rejected": -1.4532420635223389, + "step": 1022 + }, + { + "epoch": 2.0, + "grad_norm": 0.45869430899620056, + "learning_rate": 1.679712981082844e-05, + "log_odds_chosen": 13.588766098022461, + "log_odds_ratio": -0.006925757043063641, + "logits/chosen": -1.6371350288391113, + "logits/rejected": -1.5500277280807495, + "logps/chosen": -0.0688728541135788, + "logps/rejected": -10.80539608001709, + "loss": 0.1654, + "nll_loss": 0.166140615940094, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.006887285970151424, + "rewards/margins": 1.0736522674560547, + "rewards/rejected": -1.080539584159851, + "step": 1023 + }, + { + "epoch": 2.0019550342130987, + "grad_norm": 0.33533987402915955, + "learning_rate": 1.6764514024787996e-05, + "log_odds_chosen": 11.336206436157227, + "log_odds_ratio": -0.0024281213991343975, + "logits/chosen": -1.5667672157287598, + "logits/rejected": -1.5653495788574219, + "logps/chosen": -0.03323933854699135, + "logps/rejected": -7.898563385009766, + "loss": 0.1219, + "nll_loss": 0.12719321250915527, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033239335753023624, + "rewards/margins": 0.7865324020385742, + "rewards/rejected": -0.7898563742637634, + "step": 1024 + }, + { + "epoch": 2.0039100684261975, + "grad_norm": 0.337337851524353, + "learning_rate": 1.6731898238747555e-05, + "log_odds_chosen": 13.927925109863281, + "log_odds_ratio": -0.0019463921198621392, + "logits/chosen": -1.6855523586273193, + "logits/rejected": -1.4844789505004883, + "logps/chosen": -0.033702753484249115, + "logps/rejected": -10.410638809204102, + "loss": 0.1241, + "nll_loss": 0.12470100820064545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003370275255292654, + "rewards/margins": 1.0376936197280884, + "rewards/rejected": -1.041063904762268, + "step": 1025 + }, + { + "epoch": 2.005865102639296, + "grad_norm": 0.3427545428276062, + "learning_rate": 1.669928245270711e-05, + "log_odds_chosen": 15.914213180541992, + "log_odds_ratio": -0.0011518155224621296, + "logits/chosen": -1.5804736614227295, + "logits/rejected": -1.4996447563171387, + "logps/chosen": -0.028461534529924393, + "logps/rejected": -12.243548393249512, + "loss": 0.123, + "nll_loss": 0.1139058843255043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028461534529924393, + "rewards/margins": 1.2215086221694946, + "rewards/rejected": -1.2243547439575195, + "step": 1026 + }, + { + "epoch": 2.007820136852395, + "grad_norm": 0.357534259557724, + "learning_rate": 1.6666666666666667e-05, + "log_odds_chosen": 12.056427001953125, + "log_odds_ratio": -0.005053720902651548, + "logits/chosen": -1.8092262744903564, + "logits/rejected": -1.6400420665740967, + "logps/chosen": -0.0381663478910923, + "logps/rejected": -8.748214721679688, + "loss": 0.1227, + "nll_loss": 0.12274344265460968, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0038166344165802, + "rewards/margins": 0.8710047602653503, + "rewards/rejected": -0.8748214244842529, + "step": 1027 + }, + { + "epoch": 2.0097751710654936, + "grad_norm": 0.3629938066005707, + "learning_rate": 1.6634050880626224e-05, + "log_odds_chosen": 14.48709487915039, + "log_odds_ratio": -0.00222026533447206, + "logits/chosen": -1.688849925994873, + "logits/rejected": -1.394085168838501, + "logps/chosen": -0.037920866161584854, + "logps/rejected": -11.221081733703613, + "loss": 0.1221, + "nll_loss": 0.12267126142978668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037920866161584854, + "rewards/margins": 1.1183161735534668, + "rewards/rejected": -1.1221082210540771, + "step": 1028 + }, + { + "epoch": 2.0117302052785924, + "grad_norm": 0.36743736267089844, + "learning_rate": 1.660143509458578e-05, + "log_odds_chosen": 12.616044998168945, + "log_odds_ratio": -0.002136478666216135, + "logits/chosen": -1.6842187643051147, + "logits/rejected": -1.4720044136047363, + "logps/chosen": -0.04731526970863342, + "logps/rejected": -9.332539558410645, + "loss": 0.1209, + "nll_loss": 0.15744870901107788, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004731527529656887, + "rewards/margins": 0.9285224676132202, + "rewards/rejected": -0.9332538843154907, + "step": 1029 + }, + { + "epoch": 2.013685239491691, + "grad_norm": 0.3498314619064331, + "learning_rate": 1.6568819308545336e-05, + "log_odds_chosen": 12.292961120605469, + "log_odds_ratio": -0.002887208480387926, + "logits/chosen": -1.830162525177002, + "logits/rejected": -1.5554287433624268, + "logps/chosen": -0.032263875007629395, + "logps/rejected": -8.848503112792969, + "loss": 0.1181, + "nll_loss": 0.10476091504096985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032263873144984245, + "rewards/margins": 0.8816238641738892, + "rewards/rejected": -0.8848502039909363, + "step": 1030 + }, + { + "epoch": 2.01564027370479, + "grad_norm": 0.34879910945892334, + "learning_rate": 1.6536203522504892e-05, + "log_odds_chosen": 19.797271728515625, + "log_odds_ratio": -0.0005493622738867998, + "logits/chosen": -1.7808446884155273, + "logits/rejected": -1.4478907585144043, + "logps/chosen": -0.026238689199090004, + "logps/rejected": -16.1534481048584, + "loss": 0.1197, + "nll_loss": 0.10391734540462494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026238691061735153, + "rewards/margins": 1.6127209663391113, + "rewards/rejected": -1.6153450012207031, + "step": 1031 + }, + { + "epoch": 2.0175953079178885, + "grad_norm": 0.3677908480167389, + "learning_rate": 1.6503587736464448e-05, + "log_odds_chosen": 12.713852882385254, + "log_odds_ratio": -0.001224420964717865, + "logits/chosen": -1.6664257049560547, + "logits/rejected": -1.7417423725128174, + "logps/chosen": -0.039256609976291656, + "logps/rejected": -9.320921897888184, + "loss": 0.1244, + "nll_loss": 0.12551580369472504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003925661090761423, + "rewards/margins": 0.9281665682792664, + "rewards/rejected": -0.9320921897888184, + "step": 1032 + }, + { + "epoch": 2.0195503421309873, + "grad_norm": 0.3412433862686157, + "learning_rate": 1.6470971950424004e-05, + "log_odds_chosen": 14.687932014465332, + "log_odds_ratio": -0.001365888281725347, + "logits/chosen": -1.6259613037109375, + "logits/rejected": -1.642000436782837, + "logps/chosen": -0.05817065387964249, + "logps/rejected": -11.768264770507812, + "loss": 0.1197, + "nll_loss": 0.14406177401542664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005817065481096506, + "rewards/margins": 1.1710095405578613, + "rewards/rejected": -1.1768264770507812, + "step": 1033 + }, + { + "epoch": 2.021505376344086, + "grad_norm": 0.33611756563186646, + "learning_rate": 1.643835616438356e-05, + "log_odds_chosen": 14.45857048034668, + "log_odds_ratio": -0.0017855982296168804, + "logits/chosen": -1.625054121017456, + "logits/rejected": -1.2853590250015259, + "logps/chosen": -0.025254391133785248, + "logps/rejected": -10.748546600341797, + "loss": 0.1181, + "nll_loss": 0.1055411696434021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025254390202462673, + "rewards/margins": 1.0723292827606201, + "rewards/rejected": -1.0748547315597534, + "step": 1034 + }, + { + "epoch": 2.0234604105571847, + "grad_norm": 0.3383064866065979, + "learning_rate": 1.640574037834312e-05, + "log_odds_chosen": 14.12289810180664, + "log_odds_ratio": -0.0006786070880480111, + "logits/chosen": -1.6783475875854492, + "logits/rejected": -1.5409599542617798, + "logps/chosen": -0.026838157325983047, + "logps/rejected": -10.346410751342773, + "loss": 0.119, + "nll_loss": 0.1127219870686531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00268381554633379, + "rewards/margins": 1.0319571495056152, + "rewards/rejected": -1.0346410274505615, + "step": 1035 + }, + { + "epoch": 2.0254154447702835, + "grad_norm": 0.34170374274253845, + "learning_rate": 1.6373124592302676e-05, + "log_odds_chosen": 14.265939712524414, + "log_odds_ratio": -0.0006233911262825131, + "logits/chosen": -1.8125946521759033, + "logits/rejected": -1.2180986404418945, + "logps/chosen": -0.021749380975961685, + "logps/rejected": -10.384576797485352, + "loss": 0.12, + "nll_loss": 0.09651802480220795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021749380975961685, + "rewards/margins": 1.0362826585769653, + "rewards/rejected": -1.0384576320648193, + "step": 1036 + }, + { + "epoch": 2.027370478983382, + "grad_norm": 0.3453758656978607, + "learning_rate": 1.6340508806262232e-05, + "log_odds_chosen": 14.509671211242676, + "log_odds_ratio": -0.0006090041715651751, + "logits/chosen": -1.5697336196899414, + "logits/rejected": -1.4790533781051636, + "logps/chosen": -0.0369543731212616, + "logps/rejected": -11.083316802978516, + "loss": 0.1224, + "nll_loss": 0.1343960464000702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003695437218993902, + "rewards/margins": 1.1046361923217773, + "rewards/rejected": -1.1083316802978516, + "step": 1037 + }, + { + "epoch": 2.029325513196481, + "grad_norm": 0.32976001501083374, + "learning_rate": 1.6307893020221788e-05, + "log_odds_chosen": 16.012022018432617, + "log_odds_ratio": -0.0013269393239170313, + "logits/chosen": -1.7448830604553223, + "logits/rejected": -1.5270235538482666, + "logps/chosen": -0.03935016691684723, + "logps/rejected": -12.708949089050293, + "loss": 0.115, + "nll_loss": 0.10537561774253845, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003935016691684723, + "rewards/margins": 1.2669599056243896, + "rewards/rejected": -1.270895004272461, + "step": 1038 + }, + { + "epoch": 2.0312805474095796, + "grad_norm": 0.33822858333587646, + "learning_rate": 1.6275277234181344e-05, + "log_odds_chosen": 15.407791137695312, + "log_odds_ratio": -0.00024050036154221743, + "logits/chosen": -1.812911033630371, + "logits/rejected": -1.5146994590759277, + "logps/chosen": -0.030338935554027557, + "logps/rejected": -11.816789627075195, + "loss": 0.1177, + "nll_loss": 0.11291439831256866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003033893648535013, + "rewards/margins": 1.178645133972168, + "rewards/rejected": -1.1816790103912354, + "step": 1039 + }, + { + "epoch": 2.0332355816226784, + "grad_norm": 0.33873891830444336, + "learning_rate": 1.62426614481409e-05, + "log_odds_chosen": 13.700359344482422, + "log_odds_ratio": -0.0007127334247343242, + "logits/chosen": -1.646970272064209, + "logits/rejected": -1.5183926820755005, + "logps/chosen": -0.04763256013393402, + "logps/rejected": -10.425230026245117, + "loss": 0.1163, + "nll_loss": 0.18333211541175842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0047632562927901745, + "rewards/margins": 1.0377599000930786, + "rewards/rejected": -1.0425231456756592, + "step": 1040 + }, + { + "epoch": 2.035190615835777, + "grad_norm": 0.36663320660591125, + "learning_rate": 1.6210045662100456e-05, + "log_odds_chosen": 15.526372909545898, + "log_odds_ratio": -0.0005602582241408527, + "logits/chosen": -1.756584644317627, + "logits/rejected": -1.3869256973266602, + "logps/chosen": -0.04229070246219635, + "logps/rejected": -12.205892562866211, + "loss": 0.1232, + "nll_loss": 0.137793630361557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0042290701530873775, + "rewards/margins": 1.2163602113723755, + "rewards/rejected": -1.2205893993377686, + "step": 1041 + }, + { + "epoch": 2.037145650048876, + "grad_norm": 0.3288079798221588, + "learning_rate": 1.6177429876060012e-05, + "log_odds_chosen": 17.841602325439453, + "log_odds_ratio": -0.0005210249801166356, + "logits/chosen": -1.635957956314087, + "logits/rejected": -1.6155720949172974, + "logps/chosen": -0.03793695569038391, + "logps/rejected": -14.536437034606934, + "loss": 0.1153, + "nll_loss": 0.11515488475561142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003793695941567421, + "rewards/margins": 1.449850082397461, + "rewards/rejected": -1.453643560409546, + "step": 1042 + }, + { + "epoch": 2.0391006842619745, + "grad_norm": 0.33980265259742737, + "learning_rate": 1.6144814090019568e-05, + "log_odds_chosen": 14.393564224243164, + "log_odds_ratio": -0.0005733586149290204, + "logits/chosen": -1.8430328369140625, + "logits/rejected": -1.5842349529266357, + "logps/chosen": -0.02919265627861023, + "logps/rejected": -10.729811668395996, + "loss": 0.118, + "nll_loss": 0.12432640790939331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002919265301898122, + "rewards/margins": 1.0700619220733643, + "rewards/rejected": -1.0729811191558838, + "step": 1043 + }, + { + "epoch": 2.0410557184750733, + "grad_norm": 0.3430071771144867, + "learning_rate": 1.6112198303979124e-05, + "log_odds_chosen": 12.301066398620605, + "log_odds_ratio": -0.0013980371877551079, + "logits/chosen": -1.7202266454696655, + "logits/rejected": -1.5902425050735474, + "logps/chosen": -0.034268952906131744, + "logps/rejected": -8.856252670288086, + "loss": 0.1186, + "nll_loss": 0.1570996195077896, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034268954768776894, + "rewards/margins": 0.8821984529495239, + "rewards/rejected": -0.8856253623962402, + "step": 1044 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.33922719955444336, + "learning_rate": 1.6079582517938684e-05, + "log_odds_chosen": 16.242517471313477, + "log_odds_ratio": -0.000653935014270246, + "logits/chosen": -1.8005815744400024, + "logits/rejected": -1.4145524501800537, + "logps/chosen": -0.026289036497473717, + "logps/rejected": -12.447118759155273, + "loss": 0.1173, + "nll_loss": 0.14279134571552277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002628903603181243, + "rewards/margins": 1.242082953453064, + "rewards/rejected": -1.2447118759155273, + "step": 1045 + }, + { + "epoch": 2.0449657869012707, + "grad_norm": 0.33658885955810547, + "learning_rate": 1.604696673189824e-05, + "log_odds_chosen": 18.248130798339844, + "log_odds_ratio": -0.0006439623539336026, + "logits/chosen": -1.7995634078979492, + "logits/rejected": -1.5159252882003784, + "logps/chosen": -0.02123935893177986, + "logps/rejected": -14.331918716430664, + "loss": 0.1186, + "nll_loss": 0.11975152790546417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021239356137812138, + "rewards/margins": 1.431067943572998, + "rewards/rejected": -1.4331918954849243, + "step": 1046 + }, + { + "epoch": 2.0469208211143695, + "grad_norm": 0.348044216632843, + "learning_rate": 1.6014350945857796e-05, + "log_odds_chosen": 17.194683074951172, + "log_odds_ratio": -0.0008254529675468802, + "logits/chosen": -1.5637538433074951, + "logits/rejected": -1.4607949256896973, + "logps/chosen": -0.03191043436527252, + "logps/rejected": -13.659706115722656, + "loss": 0.1163, + "nll_loss": 0.11762291193008423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003191043622791767, + "rewards/margins": 1.3627796173095703, + "rewards/rejected": -1.3659706115722656, + "step": 1047 + }, + { + "epoch": 2.048875855327468, + "grad_norm": 0.335959792137146, + "learning_rate": 1.5981735159817352e-05, + "log_odds_chosen": 19.606332778930664, + "log_odds_ratio": -0.00033681836794130504, + "logits/chosen": -1.6299192905426025, + "logits/rejected": -1.5056042671203613, + "logps/chosen": -0.03427119553089142, + "logps/rejected": -16.09568214416504, + "loss": 0.1171, + "nll_loss": 0.09795060753822327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034271194599568844, + "rewards/margins": 1.6061413288116455, + "rewards/rejected": -1.6095683574676514, + "step": 1048 + }, + { + "epoch": 2.050830889540567, + "grad_norm": 0.33021610975265503, + "learning_rate": 1.5949119373776908e-05, + "log_odds_chosen": 16.05092430114746, + "log_odds_ratio": -0.0008061312837526202, + "logits/chosen": -1.5663495063781738, + "logits/rejected": -1.3301873207092285, + "logps/chosen": -0.03371976688504219, + "logps/rejected": -12.576943397521973, + "loss": 0.1176, + "nll_loss": 0.11990733444690704, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033719767816364765, + "rewards/margins": 1.2543222904205322, + "rewards/rejected": -1.2576943635940552, + "step": 1049 + }, + { + "epoch": 2.0527859237536656, + "grad_norm": 0.3385836184024811, + "learning_rate": 1.5916503587736464e-05, + "log_odds_chosen": 14.518239974975586, + "log_odds_ratio": -0.00030778360087424517, + "logits/chosen": -1.7959730625152588, + "logits/rejected": -1.4229018688201904, + "logps/chosen": -0.029165182262659073, + "logps/rejected": -10.941658973693848, + "loss": 0.1188, + "nll_loss": 0.08927049487829208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029165183659642935, + "rewards/margins": 1.0912494659423828, + "rewards/rejected": -1.0941660404205322, + "step": 1050 + }, + { + "epoch": 2.0547409579667644, + "grad_norm": 0.34997180104255676, + "learning_rate": 1.588388780169602e-05, + "log_odds_chosen": 16.079906463623047, + "log_odds_ratio": -0.0011211010860279202, + "logits/chosen": -1.6892530918121338, + "logits/rejected": -1.6070427894592285, + "logps/chosen": -0.029738733544945717, + "logps/rejected": -12.518460273742676, + "loss": 0.12, + "nll_loss": 0.0992460548877716, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029738731682300568, + "rewards/margins": 1.24887216091156, + "rewards/rejected": -1.2518460750579834, + "step": 1051 + }, + { + "epoch": 2.056695992179863, + "grad_norm": 0.3422069847583771, + "learning_rate": 1.5851272015655576e-05, + "log_odds_chosen": 9.726630210876465, + "log_odds_ratio": -0.0029128440655767918, + "logits/chosen": -1.7456433773040771, + "logits/rejected": -1.6333792209625244, + "logps/chosen": -0.035444483160972595, + "logps/rejected": -6.3894805908203125, + "loss": 0.1165, + "nll_loss": 0.09733754396438599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035444481763988733, + "rewards/margins": 0.6354036331176758, + "rewards/rejected": -0.6389480829238892, + "step": 1052 + }, + { + "epoch": 2.058651026392962, + "grad_norm": 0.3189685344696045, + "learning_rate": 1.5818656229615132e-05, + "log_odds_chosen": 12.87004280090332, + "log_odds_ratio": -0.0008589779026806355, + "logits/chosen": -1.9002139568328857, + "logits/rejected": -1.569457769393921, + "logps/chosen": -0.03395532816648483, + "logps/rejected": -9.412010192871094, + "loss": 0.1132, + "nll_loss": 0.09735795855522156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003395532723516226, + "rewards/margins": 0.9378055334091187, + "rewards/rejected": -0.9412010908126831, + "step": 1053 + }, + { + "epoch": 2.0606060606060606, + "grad_norm": 0.3197542428970337, + "learning_rate": 1.578604044357469e-05, + "log_odds_chosen": 20.134307861328125, + "log_odds_ratio": -0.0006152144051156938, + "logits/chosen": -1.6665730476379395, + "logits/rejected": -1.5158636569976807, + "logps/chosen": -0.029299519956111908, + "logps/rejected": -16.53022003173828, + "loss": 0.1145, + "nll_loss": 0.10112839937210083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002929951995611191, + "rewards/margins": 1.6500921249389648, + "rewards/rejected": -1.653022050857544, + "step": 1054 + }, + { + "epoch": 2.0625610948191593, + "grad_norm": 0.3136463463306427, + "learning_rate": 1.5753424657534248e-05, + "log_odds_chosen": 16.717947006225586, + "log_odds_ratio": -0.0003941088798455894, + "logits/chosen": -1.753899335861206, + "logits/rejected": -1.4957802295684814, + "logps/chosen": -0.021373068913817406, + "logps/rejected": -12.716117858886719, + "loss": 0.1117, + "nll_loss": 0.10373786091804504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021373070776462555, + "rewards/margins": 1.2694745063781738, + "rewards/rejected": -1.2716116905212402, + "step": 1055 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.3401486277580261, + "learning_rate": 1.5720808871493804e-05, + "log_odds_chosen": 17.890892028808594, + "log_odds_ratio": -0.0001102295791497454, + "logits/chosen": -1.6625443696975708, + "logits/rejected": -1.3718169927597046, + "logps/chosen": -0.07767707854509354, + "logps/rejected": -14.52018928527832, + "loss": 0.1182, + "nll_loss": 0.13433966040611267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0077677080407738686, + "rewards/margins": 1.444251298904419, + "rewards/rejected": -1.4520189762115479, + "step": 1056 + }, + { + "epoch": 2.0664711632453567, + "grad_norm": 0.32191887497901917, + "learning_rate": 1.568819308545336e-05, + "log_odds_chosen": 16.998258590698242, + "log_odds_ratio": -0.0005745643866248429, + "logits/chosen": -1.9298028945922852, + "logits/rejected": -1.5000274181365967, + "logps/chosen": -0.036141734570264816, + "logps/rejected": -13.543954849243164, + "loss": 0.1134, + "nll_loss": 0.10847647488117218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0036141732707619667, + "rewards/margins": 1.3507812023162842, + "rewards/rejected": -1.3543953895568848, + "step": 1057 + }, + { + "epoch": 2.0684261974584555, + "grad_norm": 0.3396334946155548, + "learning_rate": 1.5655577299412916e-05, + "log_odds_chosen": 9.759521484375, + "log_odds_ratio": -0.0015464967582374811, + "logits/chosen": -1.7767987251281738, + "logits/rejected": -1.702822208404541, + "logps/chosen": -0.028376547619700432, + "logps/rejected": -6.115135192871094, + "loss": 0.1188, + "nll_loss": 0.12179520726203918, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002837655134499073, + "rewards/margins": 0.6086758971214294, + "rewards/rejected": -0.6115135550498962, + "step": 1058 + }, + { + "epoch": 2.070381231671554, + "grad_norm": 0.3394601345062256, + "learning_rate": 1.5622961513372472e-05, + "log_odds_chosen": 12.268797874450684, + "log_odds_ratio": -0.0013241752749308944, + "logits/chosen": -1.7261989116668701, + "logits/rejected": -1.6570271253585815, + "logps/chosen": -0.03270871564745903, + "logps/rejected": -8.776420593261719, + "loss": 0.117, + "nll_loss": 0.12579834461212158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003270871937274933, + "rewards/margins": 0.8743712306022644, + "rewards/rejected": -0.8776420950889587, + "step": 1059 + }, + { + "epoch": 2.072336265884653, + "grad_norm": 0.3477969467639923, + "learning_rate": 1.559034572733203e-05, + "log_odds_chosen": 11.974687576293945, + "log_odds_ratio": -0.0031203203834593296, + "logits/chosen": -1.719900369644165, + "logits/rejected": -1.545088529586792, + "logps/chosen": -0.04289389029145241, + "logps/rejected": -8.639632225036621, + "loss": 0.1195, + "nll_loss": 0.11705274879932404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004289389122277498, + "rewards/margins": 0.8596738576889038, + "rewards/rejected": -0.86396324634552, + "step": 1060 + }, + { + "epoch": 2.0742913000977516, + "grad_norm": 0.33417418599128723, + "learning_rate": 1.5557729941291585e-05, + "log_odds_chosen": 18.0930233001709, + "log_odds_ratio": -0.0013917818432673812, + "logits/chosen": -1.6501340866088867, + "logits/rejected": -1.3592506647109985, + "logps/chosen": -0.030058465898036957, + "logps/rejected": -14.372444152832031, + "loss": 0.1178, + "nll_loss": 0.12814673781394958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003005846869200468, + "rewards/margins": 1.4342386722564697, + "rewards/rejected": -1.4372445344924927, + "step": 1061 + }, + { + "epoch": 2.0762463343108504, + "grad_norm": 0.3312622606754303, + "learning_rate": 1.552511415525114e-05, + "log_odds_chosen": 13.522285461425781, + "log_odds_ratio": -0.0010695646051317453, + "logits/chosen": -1.7518789768218994, + "logits/rejected": -1.4259185791015625, + "logps/chosen": -0.02975633554160595, + "logps/rejected": -9.982104301452637, + "loss": 0.116, + "nll_loss": 0.0993228405714035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00297563336789608, + "rewards/margins": 0.9952347278594971, + "rewards/rejected": -0.9982104301452637, + "step": 1062 + }, + { + "epoch": 2.078201368523949, + "grad_norm": 0.32901349663734436, + "learning_rate": 1.5492498369210697e-05, + "log_odds_chosen": 13.713624954223633, + "log_odds_ratio": -0.002659390913322568, + "logits/chosen": -1.620530366897583, + "logits/rejected": -1.6893064975738525, + "logps/chosen": -0.029313405975699425, + "logps/rejected": -10.055849075317383, + "loss": 0.1145, + "nll_loss": 0.11705295741558075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002931340830400586, + "rewards/margins": 1.00265371799469, + "rewards/rejected": -1.005584955215454, + "step": 1063 + }, + { + "epoch": 2.080156402737048, + "grad_norm": 0.3264022767543793, + "learning_rate": 1.5459882583170253e-05, + "log_odds_chosen": 11.994383811950684, + "log_odds_ratio": -0.0017851871671155095, + "logits/chosen": -1.76108717918396, + "logits/rejected": -1.7293450832366943, + "logps/chosen": -0.04741008207201958, + "logps/rejected": -8.663089752197266, + "loss": 0.1146, + "nll_loss": 0.12950050830841064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004741008393466473, + "rewards/margins": 0.8615680932998657, + "rewards/rejected": -0.8663090467453003, + "step": 1064 + }, + { + "epoch": 2.0821114369501466, + "grad_norm": 0.3456830382347107, + "learning_rate": 1.5427266797129812e-05, + "log_odds_chosen": 18.260122299194336, + "log_odds_ratio": -0.0004586844297591597, + "logits/chosen": -1.6433372497558594, + "logits/rejected": -1.6288082599639893, + "logps/chosen": -0.025389401242136955, + "logps/rejected": -14.403964042663574, + "loss": 0.1201, + "nll_loss": 0.12020343542098999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025389399379491806, + "rewards/margins": 1.4378573894500732, + "rewards/rejected": -1.4403964281082153, + "step": 1065 + }, + { + "epoch": 2.0840664711632453, + "grad_norm": 0.31798264384269714, + "learning_rate": 1.539465101108937e-05, + "log_odds_chosen": 16.223121643066406, + "log_odds_ratio": -0.0006520700990222394, + "logits/chosen": -1.7814526557922363, + "logits/rejected": -1.6468020677566528, + "logps/chosen": -0.04594970494508743, + "logps/rejected": -12.833085060119629, + "loss": 0.1137, + "nll_loss": 0.13912545144557953, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004594970494508743, + "rewards/margins": 1.2787134647369385, + "rewards/rejected": -1.2833083868026733, + "step": 1066 + }, + { + "epoch": 2.086021505376344, + "grad_norm": 0.34220531582832336, + "learning_rate": 1.5362035225048924e-05, + "log_odds_chosen": 14.1796293258667, + "log_odds_ratio": -0.0015654113376513124, + "logits/chosen": -1.6944143772125244, + "logits/rejected": -1.6549540758132935, + "logps/chosen": -0.021458107978105545, + "logps/rejected": -10.211512565612793, + "loss": 0.1173, + "nll_loss": 0.09881120920181274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021458107512444258, + "rewards/margins": 1.0190054178237915, + "rewards/rejected": -1.0211513042449951, + "step": 1067 + }, + { + "epoch": 2.0879765395894427, + "grad_norm": 0.33370307087898254, + "learning_rate": 1.532941943900848e-05, + "log_odds_chosen": 13.918153762817383, + "log_odds_ratio": -0.0030323578976094723, + "logits/chosen": -1.8319553136825562, + "logits/rejected": -1.4338328838348389, + "logps/chosen": -0.030548078939318657, + "logps/rejected": -10.356517791748047, + "loss": 0.1173, + "nll_loss": 0.10709220170974731, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003054807661101222, + "rewards/margins": 1.0325968265533447, + "rewards/rejected": -1.035651683807373, + "step": 1068 + }, + { + "epoch": 2.0899315738025415, + "grad_norm": 0.3268474340438843, + "learning_rate": 1.5296803652968037e-05, + "log_odds_chosen": 13.038372039794922, + "log_odds_ratio": -0.0026573524810373783, + "logits/chosen": -1.6884479522705078, + "logits/rejected": -1.4793235063552856, + "logps/chosen": -0.032610729336738586, + "logps/rejected": -9.469816207885742, + "loss": 0.1128, + "nll_loss": 0.11373158544301987, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032610732596367598, + "rewards/margins": 0.9437204599380493, + "rewards/rejected": -0.9469815492630005, + "step": 1069 + }, + { + "epoch": 2.09188660801564, + "grad_norm": 0.3392849862575531, + "learning_rate": 1.5264187866927593e-05, + "log_odds_chosen": 17.553058624267578, + "log_odds_ratio": -0.0002783384406939149, + "logits/chosen": -1.6805510520935059, + "logits/rejected": -1.5469753742218018, + "logps/chosen": -0.023296784609556198, + "logps/rejected": -13.665031433105469, + "loss": 0.1163, + "nll_loss": 0.15641778707504272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002329678740352392, + "rewards/margins": 1.3641736507415771, + "rewards/rejected": -1.3665032386779785, + "step": 1070 + }, + { + "epoch": 2.093841642228739, + "grad_norm": 0.30869606137275696, + "learning_rate": 1.5231572080887149e-05, + "log_odds_chosen": 10.530012130737305, + "log_odds_ratio": -0.002223176881670952, + "logits/chosen": -1.5162086486816406, + "logits/rejected": -1.5170822143554688, + "logps/chosen": -0.029837608337402344, + "logps/rejected": -6.958893775939941, + "loss": 0.1095, + "nll_loss": 0.12075936794281006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0029837607871741056, + "rewards/margins": 0.6929056644439697, + "rewards/rejected": -0.695889413356781, + "step": 1071 + }, + { + "epoch": 2.0957966764418376, + "grad_norm": 0.3245212733745575, + "learning_rate": 1.5198956294846705e-05, + "log_odds_chosen": 18.825868606567383, + "log_odds_ratio": -0.0012318575754761696, + "logits/chosen": -1.6466543674468994, + "logits/rejected": -1.362807273864746, + "logps/chosen": -0.02570754662156105, + "logps/rejected": -14.887340545654297, + "loss": 0.1144, + "nll_loss": 0.1586054265499115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00257075484842062, + "rewards/margins": 1.4861633777618408, + "rewards/rejected": -1.4887341260910034, + "step": 1072 + }, + { + "epoch": 2.0977517106549364, + "grad_norm": 0.3204919993877411, + "learning_rate": 1.5166340508806263e-05, + "log_odds_chosen": 14.322881698608398, + "log_odds_ratio": -0.0011641208548098803, + "logits/chosen": -1.8302421569824219, + "logits/rejected": -1.7914819717407227, + "logps/chosen": -0.03764183074235916, + "logps/rejected": -10.889884948730469, + "loss": 0.1128, + "nll_loss": 0.1191122978925705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0037641830276697874, + "rewards/margins": 1.0852243900299072, + "rewards/rejected": -1.0889886617660522, + "step": 1073 + }, + { + "epoch": 2.099706744868035, + "grad_norm": 0.3343120217323303, + "learning_rate": 1.5133724722765819e-05, + "log_odds_chosen": 22.293907165527344, + "log_odds_ratio": -0.0012663331581279635, + "logits/chosen": -1.8422858715057373, + "logits/rejected": -1.3732596635818481, + "logps/chosen": -0.03619083762168884, + "logps/rejected": -18.85214614868164, + "loss": 0.1135, + "nll_loss": 0.11095063388347626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003619083669036627, + "rewards/margins": 1.8815956115722656, + "rewards/rejected": -1.8852145671844482, + "step": 1074 + }, + { + "epoch": 2.101661779081134, + "grad_norm": 0.319514662027359, + "learning_rate": 1.5101108936725375e-05, + "log_odds_chosen": 15.609468460083008, + "log_odds_ratio": -0.0011176802217960358, + "logits/chosen": -1.6670646667480469, + "logits/rejected": -1.617210865020752, + "logps/chosen": -0.024465274065732956, + "logps/rejected": -11.779919624328613, + "loss": 0.1136, + "nll_loss": 0.1260300874710083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024465275928378105, + "rewards/margins": 1.1755454540252686, + "rewards/rejected": -1.1779918670654297, + "step": 1075 + }, + { + "epoch": 2.1036168132942326, + "grad_norm": 0.3322795033454895, + "learning_rate": 1.5068493150684931e-05, + "log_odds_chosen": 15.299909591674805, + "log_odds_ratio": -0.001896691625006497, + "logits/chosen": -1.6921896934509277, + "logits/rejected": -1.5207830667495728, + "logps/chosen": -0.03275565057992935, + "logps/rejected": -11.786615371704102, + "loss": 0.1152, + "nll_loss": 0.10493192076683044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032755653373897076, + "rewards/margins": 1.1753859519958496, + "rewards/rejected": -1.178661584854126, + "step": 1076 + }, + { + "epoch": 2.1055718475073313, + "grad_norm": 0.32493841648101807, + "learning_rate": 1.5035877364644487e-05, + "log_odds_chosen": 15.35038948059082, + "log_odds_ratio": -0.0016792698297649622, + "logits/chosen": -1.8704594373703003, + "logits/rejected": -1.5409023761749268, + "logps/chosen": -0.02776341512799263, + "logps/rejected": -11.631768226623535, + "loss": 0.1114, + "nll_loss": 0.11255459487438202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027763419784605503, + "rewards/margins": 1.160400390625, + "rewards/rejected": -1.1631767749786377, + "step": 1077 + }, + { + "epoch": 2.10752688172043, + "grad_norm": 0.3392033576965332, + "learning_rate": 1.5003261578604045e-05, + "log_odds_chosen": 19.556095123291016, + "log_odds_ratio": -0.0002444364654365927, + "logits/chosen": -1.5854518413543701, + "logits/rejected": -1.3122950792312622, + "logps/chosen": -0.026840317994356155, + "logps/rejected": -15.887866020202637, + "loss": 0.1121, + "nll_loss": 0.10932248830795288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002684032078832388, + "rewards/margins": 1.5861027240753174, + "rewards/rejected": -1.5887866020202637, + "step": 1078 + }, + { + "epoch": 2.1094819159335287, + "grad_norm": 0.33182093501091003, + "learning_rate": 1.4970645792563601e-05, + "log_odds_chosen": 14.207967758178711, + "log_odds_ratio": -0.0010789146181195974, + "logits/chosen": -1.5878233909606934, + "logits/rejected": -1.5317994356155396, + "logps/chosen": -0.02924281358718872, + "logps/rejected": -10.568962097167969, + "loss": 0.116, + "nll_loss": 0.10504008829593658, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002924281405285001, + "rewards/margins": 1.0539720058441162, + "rewards/rejected": -1.0568962097167969, + "step": 1079 + }, + { + "epoch": 2.1114369501466275, + "grad_norm": 0.3251301944255829, + "learning_rate": 1.4938030006523157e-05, + "log_odds_chosen": 16.499156951904297, + "log_odds_ratio": -0.0002313169534318149, + "logits/chosen": -1.6900944709777832, + "logits/rejected": -1.5772802829742432, + "logps/chosen": -0.02754800021648407, + "logps/rejected": -12.855330467224121, + "loss": 0.1132, + "nll_loss": 0.09077514708042145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002754800021648407, + "rewards/margins": 1.282778263092041, + "rewards/rejected": -1.2855329513549805, + "step": 1080 + }, + { + "epoch": 2.113391984359726, + "grad_norm": 0.3231799602508545, + "learning_rate": 1.4905414220482713e-05, + "log_odds_chosen": 10.424283981323242, + "log_odds_ratio": -0.0009156324085779488, + "logits/chosen": -1.6565099954605103, + "logits/rejected": -1.7133100032806396, + "logps/chosen": -0.0209567341953516, + "logps/rejected": -6.495638847351074, + "loss": 0.1135, + "nll_loss": 0.10958410799503326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020956736989319324, + "rewards/margins": 0.6474682092666626, + "rewards/rejected": -0.6495637893676758, + "step": 1081 + }, + { + "epoch": 2.115347018572825, + "grad_norm": 0.32553401589393616, + "learning_rate": 1.4872798434442269e-05, + "log_odds_chosen": 9.845966339111328, + "log_odds_ratio": -0.0014389795251190662, + "logits/chosen": -1.7175402641296387, + "logits/rejected": -1.6352355480194092, + "logps/chosen": -0.023851264268159866, + "logps/rejected": -5.93801212310791, + "loss": 0.1133, + "nll_loss": 0.1124882698059082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002385126193985343, + "rewards/margins": 0.5914161205291748, + "rewards/rejected": -0.5938012599945068, + "step": 1082 + }, + { + "epoch": 2.1173020527859236, + "grad_norm": 0.329665869474411, + "learning_rate": 1.4840182648401827e-05, + "log_odds_chosen": 16.894393920898438, + "log_odds_ratio": -0.0020030115265399218, + "logits/chosen": -1.823560357093811, + "logits/rejected": -1.637610673904419, + "logps/chosen": -0.03247976675629616, + "logps/rejected": -13.422605514526367, + "loss": 0.1125, + "nll_loss": 0.12999996542930603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032479767687618732, + "rewards/margins": 1.339012622833252, + "rewards/rejected": -1.3422605991363525, + "step": 1083 + }, + { + "epoch": 2.1192570869990224, + "grad_norm": 0.3212973475456238, + "learning_rate": 1.4807566862361383e-05, + "log_odds_chosen": 10.691280364990234, + "log_odds_ratio": -0.007760677020996809, + "logits/chosen": -1.7020363807678223, + "logits/rejected": -1.470642328262329, + "logps/chosen": -0.03130607306957245, + "logps/rejected": -7.137424468994141, + "loss": 0.1148, + "nll_loss": 0.13136997818946838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003130607306957245, + "rewards/margins": 0.7106119394302368, + "rewards/rejected": -0.7137424349784851, + "step": 1084 + }, + { + "epoch": 2.121212121212121, + "grad_norm": 0.3251332938671112, + "learning_rate": 1.4774951076320939e-05, + "log_odds_chosen": 10.960143089294434, + "log_odds_ratio": -0.0019635932985693216, + "logits/chosen": -1.660660982131958, + "logits/rejected": -1.4846289157867432, + "logps/chosen": -0.033660005778074265, + "logps/rejected": -7.559311866760254, + "loss": 0.1118, + "nll_loss": 0.11059079319238663, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003366000484675169, + "rewards/margins": 0.7525652647018433, + "rewards/rejected": -0.7559312582015991, + "step": 1085 + }, + { + "epoch": 2.12316715542522, + "grad_norm": 0.3148639500141144, + "learning_rate": 1.4742335290280495e-05, + "log_odds_chosen": 13.820270538330078, + "log_odds_ratio": -0.0008009726880118251, + "logits/chosen": -1.7189792394638062, + "logits/rejected": -1.7863619327545166, + "logps/chosen": -0.022834479808807373, + "logps/rejected": -10.010557174682617, + "loss": 0.1104, + "nll_loss": 0.08908478915691376, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00228344788774848, + "rewards/margins": 0.9987723231315613, + "rewards/rejected": -1.0010557174682617, + "step": 1086 + }, + { + "epoch": 2.1251221896383186, + "grad_norm": 0.33850497007369995, + "learning_rate": 1.4709719504240051e-05, + "log_odds_chosen": 19.249055862426758, + "log_odds_ratio": -3.2305940749211004e-06, + "logits/chosen": -1.6960794925689697, + "logits/rejected": -1.4271386861801147, + "logps/chosen": -0.028505638241767883, + "logps/rejected": -15.600777626037598, + "loss": 0.1144, + "nll_loss": 0.09717587381601334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002850563731044531, + "rewards/margins": 1.5572271347045898, + "rewards/rejected": -1.5600779056549072, + "step": 1087 + }, + { + "epoch": 2.1270772238514173, + "grad_norm": 0.33165860176086426, + "learning_rate": 1.4677103718199609e-05, + "log_odds_chosen": 13.135923385620117, + "log_odds_ratio": -0.001856994815170765, + "logits/chosen": -1.7508440017700195, + "logits/rejected": -1.5810317993164062, + "logps/chosen": -0.027378130704164505, + "logps/rejected": -9.432191848754883, + "loss": 0.1149, + "nll_loss": 0.09752420336008072, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002737813163548708, + "rewards/margins": 0.9404813051223755, + "rewards/rejected": -0.9432191848754883, + "step": 1088 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 0.3265529274940491, + "learning_rate": 1.4644487932159165e-05, + "log_odds_chosen": 14.662059783935547, + "log_odds_ratio": -0.0012815330410376191, + "logits/chosen": -1.605501651763916, + "logits/rejected": -1.6279170513153076, + "logps/chosen": -0.028766239061951637, + "logps/rejected": -10.954949378967285, + "loss": 0.1148, + "nll_loss": 0.09598137438297272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002876623999327421, + "rewards/margins": 1.0926183462142944, + "rewards/rejected": -1.0954949855804443, + "step": 1089 + }, + { + "epoch": 2.1309872922776147, + "grad_norm": 0.30931180715560913, + "learning_rate": 1.4611872146118721e-05, + "log_odds_chosen": 17.047466278076172, + "log_odds_ratio": -0.0001176233563455753, + "logits/chosen": -1.7217684984207153, + "logits/rejected": -1.5303325653076172, + "logps/chosen": -0.02601093053817749, + "logps/rejected": -13.351128578186035, + "loss": 0.1106, + "nll_loss": 0.11122730374336243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026010931469500065, + "rewards/margins": 1.3325117826461792, + "rewards/rejected": -1.3351128101348877, + "step": 1090 + }, + { + "epoch": 2.1329423264907135, + "grad_norm": 0.32056912779808044, + "learning_rate": 1.4579256360078277e-05, + "log_odds_chosen": 13.970354080200195, + "log_odds_ratio": -0.0012633117148652673, + "logits/chosen": -1.7809133529663086, + "logits/rejected": -1.4292854070663452, + "logps/chosen": -0.031233783811330795, + "logps/rejected": -10.305121421813965, + "loss": 0.1132, + "nll_loss": 0.11448855698108673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003123378613963723, + "rewards/margins": 1.0273888111114502, + "rewards/rejected": -1.0305120944976807, + "step": 1091 + }, + { + "epoch": 2.134897360703812, + "grad_norm": 0.3302634358406067, + "learning_rate": 1.4546640574037833e-05, + "log_odds_chosen": 13.363198280334473, + "log_odds_ratio": -0.0009658024064265192, + "logits/chosen": -1.5713398456573486, + "logits/rejected": -1.3830108642578125, + "logps/chosen": -0.02503567561507225, + "logps/rejected": -9.602993965148926, + "loss": 0.1139, + "nll_loss": 0.11114741861820221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025035676080733538, + "rewards/margins": 0.9577958583831787, + "rewards/rejected": -0.9602994918823242, + "step": 1092 + }, + { + "epoch": 2.136852394916911, + "grad_norm": 0.31564438343048096, + "learning_rate": 1.4514024787997391e-05, + "log_odds_chosen": 12.948343276977539, + "log_odds_ratio": -0.0006571576232090592, + "logits/chosen": -1.5720001459121704, + "logits/rejected": -1.611138105392456, + "logps/chosen": -0.015549423173069954, + "logps/rejected": -8.72246265411377, + "loss": 0.11, + "nll_loss": 0.12911422550678253, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015549424570053816, + "rewards/margins": 0.8706912994384766, + "rewards/rejected": -0.872246265411377, + "step": 1093 + }, + { + "epoch": 2.1388074291300097, + "grad_norm": 0.32747137546539307, + "learning_rate": 1.4481409001956947e-05, + "log_odds_chosen": 14.455265045166016, + "log_odds_ratio": -0.0010807233629748225, + "logits/chosen": -1.652566909790039, + "logits/rejected": -1.532617449760437, + "logps/chosen": -0.026076529175043106, + "logps/rejected": -10.707304954528809, + "loss": 0.1089, + "nll_loss": 0.09212049841880798, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026076529175043106, + "rewards/margins": 1.0681227445602417, + "rewards/rejected": -1.070730447769165, + "step": 1094 + }, + { + "epoch": 2.1407624633431084, + "grad_norm": 0.33023181557655334, + "learning_rate": 1.4448793215916503e-05, + "log_odds_chosen": 13.662521362304688, + "log_odds_ratio": -0.0011431826278567314, + "logits/chosen": -1.8400733470916748, + "logits/rejected": -1.6248741149902344, + "logps/chosen": -0.029087524861097336, + "logps/rejected": -9.90396785736084, + "loss": 0.1112, + "nll_loss": 0.11553007364273071, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002908752765506506, + "rewards/margins": 0.9874879717826843, + "rewards/rejected": -0.9903968572616577, + "step": 1095 + }, + { + "epoch": 2.142717497556207, + "grad_norm": 0.3137787878513336, + "learning_rate": 1.441617742987606e-05, + "log_odds_chosen": 18.0655517578125, + "log_odds_ratio": -0.0005201604799367487, + "logits/chosen": -1.6002628803253174, + "logits/rejected": -1.3583346605300903, + "logps/chosen": -0.023999687284231186, + "logps/rejected": -14.141166687011719, + "loss": 0.1088, + "nll_loss": 0.10259190201759338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00239996868185699, + "rewards/margins": 1.4117166996002197, + "rewards/rejected": -1.4141167402267456, + "step": 1096 + }, + { + "epoch": 2.144672531769306, + "grad_norm": 0.33355215191841125, + "learning_rate": 1.4383561643835617e-05, + "log_odds_chosen": 21.946359634399414, + "log_odds_ratio": -8.242335024988279e-05, + "logits/chosen": -1.80793297290802, + "logits/rejected": -1.4068830013275146, + "logps/chosen": -0.025842243805527687, + "logps/rejected": -18.126049041748047, + "loss": 0.1127, + "nll_loss": 0.09269306063652039, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025842241011559963, + "rewards/margins": 1.8100205659866333, + "rewards/rejected": -1.8126049041748047, + "step": 1097 + }, + { + "epoch": 2.1466275659824046, + "grad_norm": 0.327680766582489, + "learning_rate": 1.4350945857795173e-05, + "log_odds_chosen": 17.082834243774414, + "log_odds_ratio": -0.0011840637307614088, + "logits/chosen": -1.6714999675750732, + "logits/rejected": -1.4528847932815552, + "logps/chosen": -0.031979210674762726, + "logps/rejected": -13.4952974319458, + "loss": 0.1126, + "nll_loss": 0.11516587436199188, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031979207415133715, + "rewards/margins": 1.3463318347930908, + "rewards/rejected": -1.34952974319458, + "step": 1098 + }, + { + "epoch": 2.1485826001955033, + "grad_norm": 0.3160799741744995, + "learning_rate": 1.431833007175473e-05, + "log_odds_chosen": 14.526544570922852, + "log_odds_ratio": -0.0009687986457720399, + "logits/chosen": -1.7187449932098389, + "logits/rejected": -1.632563591003418, + "logps/chosen": -0.034590743482112885, + "logps/rejected": -11.062055587768555, + "loss": 0.1119, + "nll_loss": 0.10054491460323334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0034590745344758034, + "rewards/margins": 1.1027464866638184, + "rewards/rejected": -1.1062055826187134, + "step": 1099 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.31640636920928955, + "learning_rate": 1.4285714285714285e-05, + "log_odds_chosen": 17.659772872924805, + "log_odds_ratio": -0.0010321119334548712, + "logits/chosen": -1.7327983379364014, + "logits/rejected": -1.2112255096435547, + "logps/chosen": -0.021766752004623413, + "logps/rejected": -13.675732612609863, + "loss": 0.1093, + "nll_loss": 0.11651772260665894, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021766754798591137, + "rewards/margins": 1.3653966188430786, + "rewards/rejected": -1.3675732612609863, + "step": 1100 + }, + { + "epoch": 2.1524926686217007, + "grad_norm": 0.30974310636520386, + "learning_rate": 1.4253098499673842e-05, + "log_odds_chosen": 15.179466247558594, + "log_odds_ratio": -0.0011246133362874389, + "logits/chosen": -1.8089650869369507, + "logits/rejected": -1.487609624862671, + "logps/chosen": -0.0299833957105875, + "logps/rejected": -11.522958755493164, + "loss": 0.1106, + "nll_loss": 0.09733612835407257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00299833994358778, + "rewards/margins": 1.1492974758148193, + "rewards/rejected": -1.152295708656311, + "step": 1101 + }, + { + "epoch": 2.1544477028347995, + "grad_norm": 0.31644243001937866, + "learning_rate": 1.42204827136334e-05, + "log_odds_chosen": 13.693805694580078, + "log_odds_ratio": -0.0014316970482468605, + "logits/chosen": -1.6404271125793457, + "logits/rejected": -1.6056805849075317, + "logps/chosen": -0.03049309551715851, + "logps/rejected": -10.134271621704102, + "loss": 0.111, + "nll_loss": 0.08839400112628937, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003049309831112623, + "rewards/margins": 1.0103778839111328, + "rewards/rejected": -1.0134272575378418, + "step": 1102 + }, + { + "epoch": 2.156402737047898, + "grad_norm": 0.3201763927936554, + "learning_rate": 1.4187866927592955e-05, + "log_odds_chosen": 15.791759490966797, + "log_odds_ratio": -0.0006614634767174721, + "logits/chosen": -1.7384018898010254, + "logits/rejected": -1.4563298225402832, + "logps/chosen": -0.033465467393398285, + "logps/rejected": -12.150849342346191, + "loss": 0.1118, + "nll_loss": 0.12004804611206055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0033465465530753136, + "rewards/margins": 1.2117384672164917, + "rewards/rejected": -1.2150850296020508, + "step": 1103 + }, + { + "epoch": 2.158357771260997, + "grad_norm": 0.3259168565273285, + "learning_rate": 1.4155251141552511e-05, + "log_odds_chosen": 14.61630630493164, + "log_odds_ratio": -0.0018773848423734307, + "logits/chosen": -1.5762367248535156, + "logits/rejected": -1.5058197975158691, + "logps/chosen": -0.035964250564575195, + "logps/rejected": -11.306641578674316, + "loss": 0.1128, + "nll_loss": 0.11378102004528046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0035964250564575195, + "rewards/margins": 1.1270676851272583, + "rewards/rejected": -1.1306641101837158, + "step": 1104 + }, + { + "epoch": 2.1603128054740957, + "grad_norm": 0.30674585700035095, + "learning_rate": 1.4122635355512068e-05, + "log_odds_chosen": 8.628162384033203, + "log_odds_ratio": -0.0013820480089634657, + "logits/chosen": -1.779747724533081, + "logits/rejected": -1.7514610290527344, + "logps/chosen": -0.031530026346445084, + "logps/rejected": -4.990603446960449, + "loss": 0.1071, + "nll_loss": 0.12819804251194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003153002355247736, + "rewards/margins": 0.4959073066711426, + "rewards/rejected": -0.49906033277511597, + "step": 1105 + }, + { + "epoch": 2.1622678396871944, + "grad_norm": 0.32001015543937683, + "learning_rate": 1.4090019569471624e-05, + "log_odds_chosen": 12.455217361450195, + "log_odds_ratio": -0.0012224400416016579, + "logits/chosen": -1.7765834331512451, + "logits/rejected": -1.7200467586517334, + "logps/chosen": -0.029355887323617935, + "logps/rejected": -8.740660667419434, + "loss": 0.1122, + "nll_loss": 0.11565033346414566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002935588825494051, + "rewards/margins": 0.8711305856704712, + "rewards/rejected": -0.8740661144256592, + "step": 1106 + }, + { + "epoch": 2.164222873900293, + "grad_norm": 0.3196878433227539, + "learning_rate": 1.4057403783431181e-05, + "log_odds_chosen": 15.97258186340332, + "log_odds_ratio": -0.00032366043888032436, + "logits/chosen": -1.5854949951171875, + "logits/rejected": -1.708310842514038, + "logps/chosen": -0.025404267013072968, + "logps/rejected": -12.230663299560547, + "loss": 0.1106, + "nll_loss": 0.09550228714942932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002540426794439554, + "rewards/margins": 1.220525860786438, + "rewards/rejected": -1.2230663299560547, + "step": 1107 + }, + { + "epoch": 2.166177908113392, + "grad_norm": 0.3266177773475647, + "learning_rate": 1.4024787997390738e-05, + "log_odds_chosen": 11.924718856811523, + "log_odds_ratio": -0.0010334549006074667, + "logits/chosen": -1.8101897239685059, + "logits/rejected": -1.5497817993164062, + "logps/chosen": -0.03145691752433777, + "logps/rejected": -8.385236740112305, + "loss": 0.112, + "nll_loss": 0.09405319392681122, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031456919386982918, + "rewards/margins": 0.8353779315948486, + "rewards/rejected": -0.8385236263275146, + "step": 1108 + }, + { + "epoch": 2.1681329423264906, + "grad_norm": 0.32772329449653625, + "learning_rate": 1.3992172211350294e-05, + "log_odds_chosen": 15.110604286193848, + "log_odds_ratio": -0.001893544802442193, + "logits/chosen": -1.6893399953842163, + "logits/rejected": -1.6317219734191895, + "logps/chosen": -0.029062315821647644, + "logps/rejected": -11.466636657714844, + "loss": 0.1149, + "nll_loss": 0.0953298956155777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002906231442466378, + "rewards/margins": 1.1437574625015259, + "rewards/rejected": -1.1466636657714844, + "step": 1109 + }, + { + "epoch": 2.1700879765395893, + "grad_norm": 0.3034878373146057, + "learning_rate": 1.395955642530985e-05, + "log_odds_chosen": 13.74940299987793, + "log_odds_ratio": -0.0006089513190090656, + "logits/chosen": -1.7884690761566162, + "logits/rejected": -1.4943809509277344, + "logps/chosen": -0.023202620446681976, + "logps/rejected": -9.878677368164062, + "loss": 0.107, + "nll_loss": 0.12265698611736298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002320262137800455, + "rewards/margins": 0.9855474233627319, + "rewards/rejected": -0.9878677129745483, + "step": 1110 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.30563366413116455, + "learning_rate": 1.3926940639269406e-05, + "log_odds_chosen": 16.079242706298828, + "log_odds_ratio": -0.0010958779603242874, + "logits/chosen": -1.7792587280273438, + "logits/rejected": -1.6617246866226196, + "logps/chosen": -0.025452248752117157, + "logps/rejected": -12.343118667602539, + "loss": 0.108, + "nll_loss": 0.09208478033542633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002545224968343973, + "rewards/margins": 1.231766700744629, + "rewards/rejected": -1.2343119382858276, + "step": 1111 + }, + { + "epoch": 2.1739980449657867, + "grad_norm": 0.3228246867656708, + "learning_rate": 1.3894324853228964e-05, + "log_odds_chosen": 17.191511154174805, + "log_odds_ratio": -0.0003263961698394269, + "logits/chosen": -1.7962877750396729, + "logits/rejected": -1.3673759698867798, + "logps/chosen": -0.028632383793592453, + "logps/rejected": -13.480348587036133, + "loss": 0.1109, + "nll_loss": 0.12978193163871765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002863238099962473, + "rewards/margins": 1.345171570777893, + "rewards/rejected": -1.3480348587036133, + "step": 1112 + }, + { + "epoch": 2.1759530791788855, + "grad_norm": 0.32510459423065186, + "learning_rate": 1.386170906718852e-05, + "log_odds_chosen": 17.620155334472656, + "log_odds_ratio": -0.001332098850980401, + "logits/chosen": -1.7399214506149292, + "logits/rejected": -1.5305511951446533, + "logps/chosen": -0.03265148028731346, + "logps/rejected": -14.126086235046387, + "loss": 0.1104, + "nll_loss": 0.12057466804981232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003265148028731346, + "rewards/margins": 1.4093436002731323, + "rewards/rejected": -1.4126086235046387, + "step": 1113 + }, + { + "epoch": 2.177908113391984, + "grad_norm": 0.31796205043792725, + "learning_rate": 1.3829093281148076e-05, + "log_odds_chosen": 13.43435287475586, + "log_odds_ratio": -0.0006726178107783198, + "logits/chosen": -1.5504426956176758, + "logits/rejected": -1.4921879768371582, + "logps/chosen": -0.0260469950735569, + "logps/rejected": -9.686941146850586, + "loss": 0.1104, + "nll_loss": 0.15207746624946594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026046994607895613, + "rewards/margins": 0.9660894870758057, + "rewards/rejected": -0.9686942100524902, + "step": 1114 + }, + { + "epoch": 2.179863147605083, + "grad_norm": 0.3195163607597351, + "learning_rate": 1.3796477495107632e-05, + "log_odds_chosen": 19.836591720581055, + "log_odds_ratio": -8.66771224536933e-05, + "logits/chosen": -1.8182837963104248, + "logits/rejected": -1.6170527935028076, + "logps/chosen": -0.03150629997253418, + "logps/rejected": -16.251224517822266, + "loss": 0.1112, + "nll_loss": 0.09949299693107605, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003150629810988903, + "rewards/margins": 1.6219719648361206, + "rewards/rejected": -1.6251225471496582, + "step": 1115 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.3353526294231415, + "learning_rate": 1.3763861709067188e-05, + "log_odds_chosen": 13.751633644104004, + "log_odds_ratio": -0.00022762801381759346, + "logits/chosen": -1.6512808799743652, + "logits/rejected": -1.4259459972381592, + "logps/chosen": -0.02206231653690338, + "logps/rejected": -9.749410629272461, + "loss": 0.1122, + "nll_loss": 0.09886478632688522, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022062319330871105, + "rewards/margins": 0.972734808921814, + "rewards/rejected": -0.9749411344528198, + "step": 1116 + }, + { + "epoch": 2.1837732160312804, + "grad_norm": 0.31296300888061523, + "learning_rate": 1.3731245923026746e-05, + "log_odds_chosen": 11.940847396850586, + "log_odds_ratio": -0.004949463065713644, + "logits/chosen": -1.5997014045715332, + "logits/rejected": -1.5691821575164795, + "logps/chosen": -0.030441736802458763, + "logps/rejected": -8.415264129638672, + "loss": 0.1086, + "nll_loss": 0.119225412607193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003044173587113619, + "rewards/margins": 0.8384823203086853, + "rewards/rejected": -0.841526448726654, + "step": 1117 + }, + { + "epoch": 2.185728250244379, + "grad_norm": 0.3217872977256775, + "learning_rate": 1.3698630136986302e-05, + "log_odds_chosen": 15.077436447143555, + "log_odds_ratio": -0.0010500159114599228, + "logits/chosen": -1.5979083776474, + "logits/rejected": -1.520395278930664, + "logps/chosen": -0.022051256150007248, + "logps/rejected": -11.15072250366211, + "loss": 0.1132, + "nll_loss": 0.14749178290367126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022051255218684673, + "rewards/margins": 1.1128671169281006, + "rewards/rejected": -1.115072250366211, + "step": 1118 + }, + { + "epoch": 2.187683284457478, + "grad_norm": 0.3035362660884857, + "learning_rate": 1.3666014350945858e-05, + "log_odds_chosen": 15.279193878173828, + "log_odds_ratio": -0.0008794029126875103, + "logits/chosen": -1.7213451862335205, + "logits/rejected": -1.625499963760376, + "logps/chosen": -0.02736222743988037, + "logps/rejected": -11.549948692321777, + "loss": 0.1091, + "nll_loss": 0.0945327952504158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002736222930252552, + "rewards/margins": 1.1522586345672607, + "rewards/rejected": -1.1549947261810303, + "step": 1119 + }, + { + "epoch": 2.1896383186705766, + "grad_norm": 0.3029579818248749, + "learning_rate": 1.3633398564905414e-05, + "log_odds_chosen": 14.529765129089355, + "log_odds_ratio": -0.0006759315147064626, + "logits/chosen": -1.7154654264450073, + "logits/rejected": -1.2069692611694336, + "logps/chosen": -0.026901040226221085, + "logps/rejected": -10.853337287902832, + "loss": 0.1078, + "nll_loss": 0.11818870902061462, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002690104069188237, + "rewards/margins": 1.0826436281204224, + "rewards/rejected": -1.0853337049484253, + "step": 1120 + }, + { + "epoch": 2.1915933528836753, + "grad_norm": 0.3211905360221863, + "learning_rate": 1.360078277886497e-05, + "log_odds_chosen": 13.6465482711792, + "log_odds_ratio": -0.001289220992475748, + "logits/chosen": -1.5990406274795532, + "logits/rejected": -1.4451186656951904, + "logps/chosen": -0.026563061401247978, + "logps/rejected": -9.898841857910156, + "loss": 0.109, + "nll_loss": 0.1339687705039978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026563063729554415, + "rewards/margins": 0.9872279167175293, + "rewards/rejected": -0.9898842573165894, + "step": 1121 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 0.3243177831172943, + "learning_rate": 1.3568166992824528e-05, + "log_odds_chosen": 11.177212715148926, + "log_odds_ratio": -0.0015608755638822913, + "logits/chosen": -1.7059524059295654, + "logits/rejected": -1.9136972427368164, + "logps/chosen": -0.020282352343201637, + "logps/rejected": -7.259838104248047, + "loss": 0.1093, + "nll_loss": 0.09324497729539871, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020282352343201637, + "rewards/margins": 0.7239555716514587, + "rewards/rejected": -0.7259837985038757, + "step": 1122 + }, + { + "epoch": 2.1955034213098727, + "grad_norm": 0.31085407733917236, + "learning_rate": 1.3535551206784084e-05, + "log_odds_chosen": 16.874242782592773, + "log_odds_ratio": -0.001950153149664402, + "logits/chosen": -1.7278647422790527, + "logits/rejected": -1.5017333030700684, + "logps/chosen": -0.023257652297616005, + "logps/rejected": -13.024882316589355, + "loss": 0.1073, + "nll_loss": 0.0973532497882843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002325765322893858, + "rewards/margins": 1.3001625537872314, + "rewards/rejected": -1.3024883270263672, + "step": 1123 + }, + { + "epoch": 2.1974584555229715, + "grad_norm": 0.30442097783088684, + "learning_rate": 1.350293542074364e-05, + "log_odds_chosen": 14.734503746032715, + "log_odds_ratio": -0.001883713761344552, + "logits/chosen": -1.5316402912139893, + "logits/rejected": -1.5054035186767578, + "logps/chosen": -0.01906617172062397, + "logps/rejected": -10.68724250793457, + "loss": 0.1073, + "nll_loss": 0.11814533174037933, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019066171953454614, + "rewards/margins": 1.0668177604675293, + "rewards/rejected": -1.0687243938446045, + "step": 1124 + }, + { + "epoch": 2.19941348973607, + "grad_norm": 0.3198203146457672, + "learning_rate": 1.3470319634703196e-05, + "log_odds_chosen": 12.492280006408691, + "log_odds_ratio": -0.001319418428465724, + "logits/chosen": -1.7302340269088745, + "logits/rejected": -1.5685750246047974, + "logps/chosen": -0.018722860142588615, + "logps/rejected": -8.426648139953613, + "loss": 0.1083, + "nll_loss": 0.12541541457176208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018722860841080546, + "rewards/margins": 0.8407925963401794, + "rewards/rejected": -0.8426648378372192, + "step": 1125 + }, + { + "epoch": 2.201368523949169, + "grad_norm": 0.29951947927474976, + "learning_rate": 1.3437703848662752e-05, + "log_odds_chosen": 10.939717292785645, + "log_odds_ratio": -0.00207619764842093, + "logits/chosen": -1.8744854927062988, + "logits/rejected": -1.595996618270874, + "logps/chosen": -0.028006181120872498, + "logps/rejected": -7.191454887390137, + "loss": 0.1035, + "nll_loss": 0.11458531767129898, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028006178326904774, + "rewards/margins": 0.7163448929786682, + "rewards/rejected": -0.7191455364227295, + "step": 1126 + }, + { + "epoch": 2.2033235581622677, + "grad_norm": 0.31857064366340637, + "learning_rate": 1.340508806262231e-05, + "log_odds_chosen": 16.705310821533203, + "log_odds_ratio": -0.0009983984054997563, + "logits/chosen": -1.7334930896759033, + "logits/rejected": -1.548539638519287, + "logps/chosen": -0.025855351239442825, + "logps/rejected": -12.955543518066406, + "loss": 0.1091, + "nll_loss": 0.0977497398853302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025855349376797676, + "rewards/margins": 1.29296875, + "rewards/rejected": -1.2955543994903564, + "step": 1127 + }, + { + "epoch": 2.2052785923753664, + "grad_norm": 0.30951982736587524, + "learning_rate": 1.3372472276581866e-05, + "log_odds_chosen": 14.933333396911621, + "log_odds_ratio": -0.0012515278067439795, + "logits/chosen": -1.8728028535842896, + "logits/rejected": -1.7195100784301758, + "logps/chosen": -0.031756043434143066, + "logps/rejected": -11.197364807128906, + "loss": 0.1038, + "nll_loss": 0.17104418575763702, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003175604622811079, + "rewards/margins": 1.116560935974121, + "rewards/rejected": -1.1197364330291748, + "step": 1128 + }, + { + "epoch": 2.207233626588465, + "grad_norm": 0.3332799971103668, + "learning_rate": 1.3339856490541422e-05, + "log_odds_chosen": 21.188838958740234, + "log_odds_ratio": -6.818966994615039e-06, + "logits/chosen": -1.6993675231933594, + "logits/rejected": -1.4063489437103271, + "logps/chosen": -0.023222293704748154, + "logps/rejected": -17.3118896484375, + "loss": 0.112, + "nll_loss": 0.10293726623058319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023222295567393303, + "rewards/margins": 1.728866696357727, + "rewards/rejected": -1.7311888933181763, + "step": 1129 + }, + { + "epoch": 2.209188660801564, + "grad_norm": 0.30145546793937683, + "learning_rate": 1.3307240704500978e-05, + "log_odds_chosen": 13.385891914367676, + "log_odds_ratio": -0.001151224598288536, + "logits/chosen": -1.650691032409668, + "logits/rejected": -1.598999261856079, + "logps/chosen": -0.03497255593538284, + "logps/rejected": -9.994400978088379, + "loss": 0.1059, + "nll_loss": 0.10915939509868622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003497255500406027, + "rewards/margins": 0.9959428310394287, + "rewards/rejected": -0.9994401335716248, + "step": 1130 + }, + { + "epoch": 2.2111436950146626, + "grad_norm": 0.30330222845077515, + "learning_rate": 1.3274624918460534e-05, + "log_odds_chosen": 13.618780136108398, + "log_odds_ratio": -0.0014510011533275247, + "logits/chosen": -1.6648690700531006, + "logits/rejected": -1.7572294473648071, + "logps/chosen": -0.022681646049022675, + "logps/rejected": -9.755377769470215, + "loss": 0.1062, + "nll_loss": 0.09454211592674255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022681644186377525, + "rewards/margins": 0.9732697010040283, + "rewards/rejected": -0.9755377769470215, + "step": 1131 + }, + { + "epoch": 2.2130987292277613, + "grad_norm": 0.29909470677375793, + "learning_rate": 1.3242009132420092e-05, + "log_odds_chosen": 17.872257232666016, + "log_odds_ratio": -0.0018525953637436032, + "logits/chosen": -1.6055909395217896, + "logits/rejected": -1.4668525457382202, + "logps/chosen": -0.031429193913936615, + "logps/rejected": -14.377229690551758, + "loss": 0.1045, + "nll_loss": 0.09587347507476807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003142919857054949, + "rewards/margins": 1.4345803260803223, + "rewards/rejected": -1.437723159790039, + "step": 1132 + }, + { + "epoch": 2.21505376344086, + "grad_norm": 0.31370460987091064, + "learning_rate": 1.3209393346379648e-05, + "log_odds_chosen": 14.190629959106445, + "log_odds_ratio": -0.0006878445856273174, + "logits/chosen": -1.6413521766662598, + "logits/rejected": -1.6281696557998657, + "logps/chosen": -0.0313488207757473, + "logps/rejected": -10.467077255249023, + "loss": 0.1065, + "nll_loss": 0.09310659766197205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00313488207757473, + "rewards/margins": 1.0435729026794434, + "rewards/rejected": -1.0467077493667603, + "step": 1133 + }, + { + "epoch": 2.2170087976539588, + "grad_norm": 0.3087019622325897, + "learning_rate": 1.3176777560339204e-05, + "log_odds_chosen": 8.834861755371094, + "log_odds_ratio": -0.0014806953258812428, + "logits/chosen": -1.8054190874099731, + "logits/rejected": -1.7758727073669434, + "logps/chosen": -0.02398642711341381, + "logps/rejected": -4.987257480621338, + "loss": 0.1063, + "nll_loss": 0.11906880140304565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002398642711341381, + "rewards/margins": 0.49632713198661804, + "rewards/rejected": -0.4987257719039917, + "step": 1134 + }, + { + "epoch": 2.2189638318670575, + "grad_norm": 0.3200054168701172, + "learning_rate": 1.314416177429876e-05, + "log_odds_chosen": 12.325084686279297, + "log_odds_ratio": -0.0011202681344002485, + "logits/chosen": -1.7121777534484863, + "logits/rejected": -1.520967960357666, + "logps/chosen": -0.02480660192668438, + "logps/rejected": -8.527969360351562, + "loss": 0.1101, + "nll_loss": 0.10880665481090546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024806600995361805, + "rewards/margins": 0.8503162860870361, + "rewards/rejected": -0.8527969121932983, + "step": 1135 + }, + { + "epoch": 2.220918866080156, + "grad_norm": 0.31368350982666016, + "learning_rate": 1.3111545988258316e-05, + "log_odds_chosen": 22.296951293945312, + "log_odds_ratio": -0.00045871539623476565, + "logits/chosen": -1.6352462768554688, + "logits/rejected": -1.2725281715393066, + "logps/chosen": -0.020957784727215767, + "logps/rejected": -18.233659744262695, + "loss": 0.1073, + "nll_loss": 0.08221536874771118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020957787055522203, + "rewards/margins": 1.821270227432251, + "rewards/rejected": -1.8233659267425537, + "step": 1136 + }, + { + "epoch": 2.222873900293255, + "grad_norm": 0.31506651639938354, + "learning_rate": 1.3078930202217874e-05, + "log_odds_chosen": 14.317384719848633, + "log_odds_ratio": -0.002463732613250613, + "logits/chosen": -1.8307991027832031, + "logits/rejected": -1.6675159931182861, + "logps/chosen": -0.03981529921293259, + "logps/rejected": -10.927982330322266, + "loss": 0.1068, + "nll_loss": 0.156110018491745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003981529735028744, + "rewards/margins": 1.0888166427612305, + "rewards/rejected": -1.0927982330322266, + "step": 1137 + }, + { + "epoch": 2.2248289345063537, + "grad_norm": 0.30617862939834595, + "learning_rate": 1.304631441617743e-05, + "log_odds_chosen": 16.528793334960938, + "log_odds_ratio": -0.000713432440534234, + "logits/chosen": -1.6915268898010254, + "logits/rejected": -1.561146855354309, + "logps/chosen": -0.03990110009908676, + "logps/rejected": -12.82801628112793, + "loss": 0.1063, + "nll_loss": 0.0880490392446518, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003990110009908676, + "rewards/margins": 1.2788114547729492, + "rewards/rejected": -1.282801628112793, + "step": 1138 + }, + { + "epoch": 2.2267839687194524, + "grad_norm": 0.3185191750526428, + "learning_rate": 1.3013698630136986e-05, + "log_odds_chosen": 15.556495666503906, + "log_odds_ratio": -0.0006459623691625893, + "logits/chosen": -2.0002894401550293, + "logits/rejected": -1.7357373237609863, + "logps/chosen": -0.014608722180128098, + "logps/rejected": -11.292891502380371, + "loss": 0.1083, + "nll_loss": 0.11690311133861542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014608721248805523, + "rewards/margins": 1.1278282403945923, + "rewards/rejected": -1.129289150238037, + "step": 1139 + }, + { + "epoch": 2.228739002932551, + "grad_norm": 0.30373451113700867, + "learning_rate": 1.2981082844096542e-05, + "log_odds_chosen": 14.68218994140625, + "log_odds_ratio": -0.0007264434243552387, + "logits/chosen": -1.6636942625045776, + "logits/rejected": -1.866323709487915, + "logps/chosen": -0.039408475160598755, + "logps/rejected": -11.129993438720703, + "loss": 0.1062, + "nll_loss": 0.08248621970415115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00394084770232439, + "rewards/margins": 1.1090586185455322, + "rewards/rejected": -1.112999439239502, + "step": 1140 + }, + { + "epoch": 2.23069403714565, + "grad_norm": 0.3096325397491455, + "learning_rate": 1.2948467058056098e-05, + "log_odds_chosen": 15.303837776184082, + "log_odds_ratio": -0.0007589914603158832, + "logits/chosen": -1.7408888339996338, + "logits/rejected": -1.6452219486236572, + "logps/chosen": -0.023703480139374733, + "logps/rejected": -11.473710060119629, + "loss": 0.1062, + "nll_loss": 0.0925830751657486, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023703482002019882, + "rewards/margins": 1.145000696182251, + "rewards/rejected": -1.1473710536956787, + "step": 1141 + }, + { + "epoch": 2.2326490713587486, + "grad_norm": 0.30853161215782166, + "learning_rate": 1.2915851272015656e-05, + "log_odds_chosen": 19.41388702392578, + "log_odds_ratio": -6.592484169232193e-06, + "logits/chosen": -1.7259957790374756, + "logits/rejected": -1.4860804080963135, + "logps/chosen": -0.02724284864962101, + "logps/rejected": -15.573436737060547, + "loss": 0.103, + "nll_loss": 0.09279416501522064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027242847718298435, + "rewards/margins": 1.554619312286377, + "rewards/rejected": -1.5573437213897705, + "step": 1142 + }, + { + "epoch": 2.2346041055718473, + "grad_norm": 0.30812570452690125, + "learning_rate": 1.2883235485975212e-05, + "log_odds_chosen": 13.276664733886719, + "log_odds_ratio": -0.0016881832852959633, + "logits/chosen": -1.606605052947998, + "logits/rejected": -1.4446388483047485, + "logps/chosen": -0.0337722972035408, + "logps/rejected": -9.763077735900879, + "loss": 0.1068, + "nll_loss": 0.1075817197561264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003377229906618595, + "rewards/margins": 0.9729305505752563, + "rewards/rejected": -0.9763078093528748, + "step": 1143 + }, + { + "epoch": 2.236559139784946, + "grad_norm": 0.3045262396335602, + "learning_rate": 1.2850619699934768e-05, + "log_odds_chosen": 16.62299346923828, + "log_odds_ratio": -0.0005180421285331249, + "logits/chosen": -1.643305778503418, + "logits/rejected": -1.6350741386413574, + "logps/chosen": -0.02344709075987339, + "logps/rejected": -12.71142578125, + "loss": 0.1048, + "nll_loss": 0.08889302611351013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023447091225534678, + "rewards/margins": 1.2687978744506836, + "rewards/rejected": -1.2711424827575684, + "step": 1144 + }, + { + "epoch": 2.2385141739980448, + "grad_norm": 0.31203630566596985, + "learning_rate": 1.2818003913894325e-05, + "log_odds_chosen": 10.651779174804688, + "log_odds_ratio": -0.003193750511854887, + "logits/chosen": -1.6986634731292725, + "logits/rejected": -1.718296766281128, + "logps/chosen": -0.0340692438185215, + "logps/rejected": -7.1313652992248535, + "loss": 0.1057, + "nll_loss": 0.1365877389907837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003406924195587635, + "rewards/margins": 0.7097296714782715, + "rewards/rejected": -0.7131365537643433, + "step": 1145 + }, + { + "epoch": 2.2404692082111435, + "grad_norm": 0.29579490423202515, + "learning_rate": 1.278538812785388e-05, + "log_odds_chosen": 15.311534881591797, + "log_odds_ratio": -0.0008957284735515714, + "logits/chosen": -1.7503972053527832, + "logits/rejected": -1.49969482421875, + "logps/chosen": -0.02049863710999489, + "logps/rejected": -11.404952049255371, + "loss": 0.1071, + "nll_loss": 0.10752406716346741, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020498638041317463, + "rewards/margins": 1.138445496559143, + "rewards/rejected": -1.1404953002929688, + "step": 1146 + }, + { + "epoch": 2.242424242424242, + "grad_norm": 0.3065767288208008, + "learning_rate": 1.2752772341813438e-05, + "log_odds_chosen": 14.54326057434082, + "log_odds_ratio": -0.0010366676142439246, + "logits/chosen": -1.6934449672698975, + "logits/rejected": -1.6819870471954346, + "logps/chosen": -0.019080426543951035, + "logps/rejected": -10.497136116027832, + "loss": 0.1064, + "nll_loss": 0.08715134859085083, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019080427009612322, + "rewards/margins": 1.0478055477142334, + "rewards/rejected": -1.0497136116027832, + "step": 1147 + }, + { + "epoch": 2.244379276637341, + "grad_norm": 0.3020588755607605, + "learning_rate": 1.2720156555772994e-05, + "log_odds_chosen": 14.10142993927002, + "log_odds_ratio": -0.000853837700560689, + "logits/chosen": -1.7658922672271729, + "logits/rejected": -1.6780414581298828, + "logps/chosen": -0.030986156314611435, + "logps/rejected": -10.331323623657227, + "loss": 0.1064, + "nll_loss": 0.12580570578575134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003098615910857916, + "rewards/margins": 1.0300337076187134, + "rewards/rejected": -1.0331323146820068, + "step": 1148 + }, + { + "epoch": 2.2463343108504397, + "grad_norm": 0.3080529272556305, + "learning_rate": 1.268754076973255e-05, + "log_odds_chosen": 16.222248077392578, + "log_odds_ratio": -9.219753701472655e-05, + "logits/chosen": -1.683666467666626, + "logits/rejected": -1.720137357711792, + "logps/chosen": -0.024907205253839493, + "logps/rejected": -12.346099853515625, + "loss": 0.106, + "nll_loss": 0.09358131140470505, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024907204788178205, + "rewards/margins": 1.2321193218231201, + "rewards/rejected": -1.2346099615097046, + "step": 1149 + }, + { + "epoch": 2.2482893450635384, + "grad_norm": 0.30157727003097534, + "learning_rate": 1.2654924983692107e-05, + "log_odds_chosen": 17.574657440185547, + "log_odds_ratio": -0.0002812821476254612, + "logits/chosen": -1.5359711647033691, + "logits/rejected": -1.395485758781433, + "logps/chosen": -0.015458857640624046, + "logps/rejected": -13.234332084655762, + "loss": 0.1044, + "nll_loss": 0.12475326657295227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015458856942132115, + "rewards/margins": 1.3218873739242554, + "rewards/rejected": -1.32343327999115, + "step": 1150 + }, + { + "epoch": 2.250244379276637, + "grad_norm": 0.42335906624794006, + "learning_rate": 1.2622309197651663e-05, + "log_odds_chosen": 22.00009536743164, + "log_odds_ratio": -0.0006051398231647909, + "logits/chosen": -1.7881604433059692, + "logits/rejected": -1.4281229972839355, + "logps/chosen": -0.024927902966737747, + "logps/rejected": -18.175800323486328, + "loss": 0.1046, + "nll_loss": 0.12765765190124512, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024927903432399035, + "rewards/margins": 1.8150873184204102, + "rewards/rejected": -1.817579984664917, + "step": 1151 + }, + { + "epoch": 2.252199413489736, + "grad_norm": 0.3086209297180176, + "learning_rate": 1.258969341161122e-05, + "log_odds_chosen": 13.237990379333496, + "log_odds_ratio": -0.0005581544828601182, + "logits/chosen": -1.7555187940597534, + "logits/rejected": -1.5761466026306152, + "logps/chosen": -0.026154110208153725, + "logps/rejected": -9.472665786743164, + "loss": 0.1061, + "nll_loss": 0.09530985355377197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026154108345508575, + "rewards/margins": 0.9446511268615723, + "rewards/rejected": -0.9472665190696716, + "step": 1152 + }, + { + "epoch": 2.2541544477028346, + "grad_norm": 0.30428245663642883, + "learning_rate": 1.2557077625570777e-05, + "log_odds_chosen": 17.479812622070312, + "log_odds_ratio": -0.0023371526040136814, + "logits/chosen": -1.8658506870269775, + "logits/rejected": -1.52713942527771, + "logps/chosen": -0.02582157403230667, + "logps/rejected": -13.725807189941406, + "loss": 0.105, + "nll_loss": 0.1218845471739769, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025821574963629246, + "rewards/margins": 1.3699984550476074, + "rewards/rejected": -1.372580647468567, + "step": 1153 + }, + { + "epoch": 2.2561094819159333, + "grad_norm": 0.30896759033203125, + "learning_rate": 1.2524461839530333e-05, + "log_odds_chosen": 14.104043960571289, + "log_odds_ratio": -0.000837340543512255, + "logits/chosen": -1.6811842918395996, + "logits/rejected": -1.685966968536377, + "logps/chosen": -0.0316561684012413, + "logps/rejected": -10.503887176513672, + "loss": 0.1068, + "nll_loss": 0.09144346415996552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031656166538596153, + "rewards/margins": 1.0472229719161987, + "rewards/rejected": -1.0503886938095093, + "step": 1154 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.3063449263572693, + "learning_rate": 1.2491846053489889e-05, + "log_odds_chosen": 11.643097877502441, + "log_odds_ratio": -0.0016324296593666077, + "logits/chosen": -1.6160427331924438, + "logits/rejected": -1.7125513553619385, + "logps/chosen": -0.028395015746355057, + "logps/rejected": -8.011367797851562, + "loss": 0.1038, + "nll_loss": 0.08892276883125305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002839501481503248, + "rewards/margins": 0.7982972860336304, + "rewards/rejected": -0.80113685131073, + "step": 1155 + }, + { + "epoch": 2.2600195503421308, + "grad_norm": 0.30222856998443604, + "learning_rate": 1.2459230267449445e-05, + "log_odds_chosen": 20.092832565307617, + "log_odds_ratio": -0.0004874455335084349, + "logits/chosen": -1.850846529006958, + "logits/rejected": -1.497527837753296, + "logps/chosen": -0.022126484662294388, + "logps/rejected": -16.179698944091797, + "loss": 0.1046, + "nll_loss": 0.11257165670394897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002212648279964924, + "rewards/margins": 1.6157574653625488, + "rewards/rejected": -1.6179699897766113, + "step": 1156 + }, + { + "epoch": 2.2619745845552295, + "grad_norm": 0.2965600788593292, + "learning_rate": 1.2426614481409003e-05, + "log_odds_chosen": 15.046356201171875, + "log_odds_ratio": -0.0007164690177887678, + "logits/chosen": -1.49281907081604, + "logits/rejected": -1.2189282178878784, + "logps/chosen": -0.017215218394994736, + "logps/rejected": -10.902861595153809, + "loss": 0.1011, + "nll_loss": 0.13603240251541138, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017215218394994736, + "rewards/margins": 1.0885646343231201, + "rewards/rejected": -1.0902860164642334, + "step": 1157 + }, + { + "epoch": 2.263929618768328, + "grad_norm": 0.30786266922950745, + "learning_rate": 1.2393998695368559e-05, + "log_odds_chosen": 17.237010955810547, + "log_odds_ratio": -0.0010587276192381978, + "logits/chosen": -1.5913095474243164, + "logits/rejected": -1.6413562297821045, + "logps/chosen": -0.032551392912864685, + "logps/rejected": -13.705854415893555, + "loss": 0.1042, + "nll_loss": 0.1166643425822258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003255139570683241, + "rewards/margins": 1.367330551147461, + "rewards/rejected": -1.3705856800079346, + "step": 1158 + }, + { + "epoch": 2.265884652981427, + "grad_norm": 0.30523598194122314, + "learning_rate": 1.2361382909328115e-05, + "log_odds_chosen": 17.116239547729492, + "log_odds_ratio": -0.0007590126479044557, + "logits/chosen": -1.853874921798706, + "logits/rejected": -1.5820810794830322, + "logps/chosen": -0.020632829517126083, + "logps/rejected": -13.075298309326172, + "loss": 0.1053, + "nll_loss": 0.09404029697179794, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020632827654480934, + "rewards/margins": 1.3054664134979248, + "rewards/rejected": -1.3075296878814697, + "step": 1159 + }, + { + "epoch": 2.2678396871945257, + "grad_norm": 0.2880024313926697, + "learning_rate": 1.2328767123287671e-05, + "log_odds_chosen": 20.380836486816406, + "log_odds_ratio": -0.0010839966125786304, + "logits/chosen": -1.5966107845306396, + "logits/rejected": -1.342466950416565, + "logps/chosen": -0.03014402836561203, + "logps/rejected": -16.832918167114258, + "loss": 0.1017, + "nll_loss": 0.11218002438545227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003014402696862817, + "rewards/margins": 1.6802773475646973, + "rewards/rejected": -1.6832919120788574, + "step": 1160 + }, + { + "epoch": 2.2697947214076244, + "grad_norm": 0.2980796694755554, + "learning_rate": 1.2296151337247227e-05, + "log_odds_chosen": 13.520947456359863, + "log_odds_ratio": -0.0009887386113405228, + "logits/chosen": -1.8187681436538696, + "logits/rejected": -1.6340762376785278, + "logps/chosen": -0.025231599807739258, + "logps/rejected": -9.722896575927734, + "loss": 0.104, + "nll_loss": 0.10392262041568756, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0025231598410755396, + "rewards/margins": 0.9697664976119995, + "rewards/rejected": -0.9722896814346313, + "step": 1161 + }, + { + "epoch": 2.271749755620723, + "grad_norm": 0.3081485331058502, + "learning_rate": 1.2263535551206785e-05, + "log_odds_chosen": 13.998859405517578, + "log_odds_ratio": -0.001077833934687078, + "logits/chosen": -1.7066270112991333, + "logits/rejected": -1.4949091672897339, + "logps/chosen": -0.018470704555511475, + "logps/rejected": -9.777146339416504, + "loss": 0.104, + "nll_loss": 0.08818617463111877, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018470704089850187, + "rewards/margins": 0.9758676886558533, + "rewards/rejected": -0.9777146577835083, + "step": 1162 + }, + { + "epoch": 2.273704789833822, + "grad_norm": 0.2925223410129547, + "learning_rate": 1.223091976516634e-05, + "log_odds_chosen": 15.45893669128418, + "log_odds_ratio": -0.0009763048146851361, + "logits/chosen": -1.7892820835113525, + "logits/rejected": -1.6378560066223145, + "logps/chosen": -0.023193493485450745, + "logps/rejected": -11.594722747802734, + "loss": 0.103, + "nll_loss": 0.07983772456645966, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002319349441677332, + "rewards/margins": 1.1571528911590576, + "rewards/rejected": -1.1594722270965576, + "step": 1163 + }, + { + "epoch": 2.2756598240469206, + "grad_norm": 0.30191266536712646, + "learning_rate": 1.2198303979125897e-05, + "log_odds_chosen": 19.464906692504883, + "log_odds_ratio": -0.0008912183693610132, + "logits/chosen": -1.7306439876556396, + "logits/rejected": -1.5481524467468262, + "logps/chosen": -0.04165948927402496, + "logps/rejected": -15.955072402954102, + "loss": 0.1024, + "nll_loss": 0.12529152631759644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004165948834270239, + "rewards/margins": 1.591341495513916, + "rewards/rejected": -1.5955073833465576, + "step": 1164 + }, + { + "epoch": 2.2776148582600193, + "grad_norm": 0.3016494810581207, + "learning_rate": 1.2165688193085453e-05, + "log_odds_chosen": 17.817829132080078, + "log_odds_ratio": -0.0007310791406780481, + "logits/chosen": -1.5609028339385986, + "logits/rejected": -1.5860471725463867, + "logps/chosen": -0.015852540731430054, + "logps/rejected": -13.649505615234375, + "loss": 0.1032, + "nll_loss": 0.08888515830039978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015852542128413916, + "rewards/margins": 1.3633651733398438, + "rewards/rejected": -1.3649505376815796, + "step": 1165 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.29177963733673096, + "learning_rate": 1.2133072407045009e-05, + "log_odds_chosen": 17.70989418029785, + "log_odds_ratio": -0.0003594954323489219, + "logits/chosen": -1.496886968612671, + "logits/rejected": -1.362979769706726, + "logps/chosen": -0.02285999059677124, + "logps/rejected": -13.66968059539795, + "loss": 0.1004, + "nll_loss": 0.1011015921831131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002285999245941639, + "rewards/margins": 1.3646821975708008, + "rewards/rejected": -1.3669681549072266, + "step": 1166 + }, + { + "epoch": 2.281524926686217, + "grad_norm": 0.3027441203594208, + "learning_rate": 1.2100456621004567e-05, + "log_odds_chosen": 16.782508850097656, + "log_odds_ratio": -0.0011853454634547234, + "logits/chosen": -1.5694329738616943, + "logits/rejected": -1.558762550354004, + "logps/chosen": -0.017661092802882195, + "logps/rejected": -12.596482276916504, + "loss": 0.1025, + "nll_loss": 0.08146440982818604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001766109373420477, + "rewards/margins": 1.2578821182250977, + "rewards/rejected": -1.259648323059082, + "step": 1167 + }, + { + "epoch": 2.283479960899316, + "grad_norm": 0.2929542660713196, + "learning_rate": 1.2067840834964123e-05, + "log_odds_chosen": 17.126483917236328, + "log_odds_ratio": -0.0020707505755126476, + "logits/chosen": -1.5442051887512207, + "logits/rejected": -1.5315237045288086, + "logps/chosen": -0.019188735634088516, + "logps/rejected": -13.023796081542969, + "loss": 0.1014, + "nll_loss": 0.09149591624736786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001918873516842723, + "rewards/margins": 1.3004608154296875, + "rewards/rejected": -1.3023796081542969, + "step": 1168 + }, + { + "epoch": 2.2854349951124147, + "grad_norm": 0.2891608476638794, + "learning_rate": 1.2035225048923679e-05, + "log_odds_chosen": 16.977046966552734, + "log_odds_ratio": -0.0001666176103753969, + "logits/chosen": -1.6849807500839233, + "logits/rejected": -1.7080038785934448, + "logps/chosen": -0.019205067306756973, + "logps/rejected": -12.945755004882812, + "loss": 0.1009, + "nll_loss": 0.10568507760763168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019205065909773111, + "rewards/margins": 1.2926548719406128, + "rewards/rejected": -1.2945753335952759, + "step": 1169 + }, + { + "epoch": 2.2873900293255134, + "grad_norm": 0.3013962507247925, + "learning_rate": 1.2002609262883235e-05, + "log_odds_chosen": 15.788629531860352, + "log_odds_ratio": -0.0001705941976979375, + "logits/chosen": -1.8129768371582031, + "logits/rejected": -1.6673917770385742, + "logps/chosen": -0.026476601138710976, + "logps/rejected": -11.987786293029785, + "loss": 0.1025, + "nll_loss": 0.10179558396339417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0026476599741727114, + "rewards/margins": 1.1961309909820557, + "rewards/rejected": -1.198778748512268, + "step": 1170 + }, + { + "epoch": 2.289345063538612, + "grad_norm": 0.296260267496109, + "learning_rate": 1.1969993476842791e-05, + "log_odds_chosen": 17.333858489990234, + "log_odds_ratio": -0.007431511767208576, + "logits/chosen": -1.821242094039917, + "logits/rejected": -1.7491073608398438, + "logps/chosen": -0.031963057816028595, + "logps/rejected": -13.750947952270508, + "loss": 0.099, + "nll_loss": 0.10983152687549591, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0031963055953383446, + "rewards/margins": 1.3718984127044678, + "rewards/rejected": -1.3750946521759033, + "step": 1171 + }, + { + "epoch": 2.291300097751711, + "grad_norm": 0.3000406324863434, + "learning_rate": 1.1937377690802349e-05, + "log_odds_chosen": 19.3176212310791, + "log_odds_ratio": -0.0001763703767210245, + "logits/chosen": -1.668291687965393, + "logits/rejected": -1.4573988914489746, + "logps/chosen": -0.0277764480561018, + "logps/rejected": -15.58517074584961, + "loss": 0.1033, + "nll_loss": 0.10513830184936523, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027776448987424374, + "rewards/margins": 1.555739402770996, + "rewards/rejected": -1.5585170984268188, + "step": 1172 + }, + { + "epoch": 2.2932551319648096, + "grad_norm": 0.2952413558959961, + "learning_rate": 1.1904761904761905e-05, + "log_odds_chosen": 11.165311813354492, + "log_odds_ratio": -0.0015734744956716895, + "logits/chosen": -1.7329604625701904, + "logits/rejected": -1.847654104232788, + "logps/chosen": -0.020903874188661575, + "logps/rejected": -7.097284317016602, + "loss": 0.1019, + "nll_loss": 0.08262249827384949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020903872791677713, + "rewards/margins": 0.7076380252838135, + "rewards/rejected": -0.709728479385376, + "step": 1173 + }, + { + "epoch": 2.2952101661779083, + "grad_norm": 0.3257708251476288, + "learning_rate": 1.1872146118721461e-05, + "log_odds_chosen": 16.233848571777344, + "log_odds_ratio": -0.00034730983315967023, + "logits/chosen": -1.838031530380249, + "logits/rejected": -1.5459189414978027, + "logps/chosen": -0.01431376300752163, + "logps/rejected": -11.992810249328613, + "loss": 0.103, + "nll_loss": 0.08406604081392288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014313764404505491, + "rewards/margins": 1.1978497505187988, + "rewards/rejected": -1.1992809772491455, + "step": 1174 + }, + { + "epoch": 2.297165200391007, + "grad_norm": 0.30238255858421326, + "learning_rate": 1.1839530332681017e-05, + "log_odds_chosen": 15.14914321899414, + "log_odds_ratio": -0.00043100089533254504, + "logits/chosen": -1.5981534719467163, + "logits/rejected": -1.6016826629638672, + "logps/chosen": -0.027446433901786804, + "logps/rejected": -11.518872261047363, + "loss": 0.1056, + "nll_loss": 0.09370972216129303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002744643483310938, + "rewards/margins": 1.1491425037384033, + "rewards/rejected": -1.1518871784210205, + "step": 1175 + }, + { + "epoch": 2.2991202346041058, + "grad_norm": 0.28895506262779236, + "learning_rate": 1.1806914546640573e-05, + "log_odds_chosen": 15.488567352294922, + "log_odds_ratio": -0.0006002493319101632, + "logits/chosen": -1.7425012588500977, + "logits/rejected": -1.4738737344741821, + "logps/chosen": -0.0200343057513237, + "logps/rejected": -11.497786521911621, + "loss": 0.1016, + "nll_loss": 0.08544769883155823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002003430388867855, + "rewards/margins": 1.1477752923965454, + "rewards/rejected": -1.1497788429260254, + "step": 1176 + }, + { + "epoch": 2.3010752688172045, + "grad_norm": 0.3013085722923279, + "learning_rate": 1.1774298760600131e-05, + "log_odds_chosen": 17.835094451904297, + "log_odds_ratio": -0.0004594245401676744, + "logits/chosen": -1.6750438213348389, + "logits/rejected": -1.6024452447891235, + "logps/chosen": -0.020943105220794678, + "logps/rejected": -13.80720329284668, + "loss": 0.1026, + "nll_loss": 0.08666642010211945, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020943107083439827, + "rewards/margins": 1.3786261081695557, + "rewards/rejected": -1.3807203769683838, + "step": 1177 + }, + { + "epoch": 2.303030303030303, + "grad_norm": 0.2895101308822632, + "learning_rate": 1.1741682974559687e-05, + "log_odds_chosen": 19.343952178955078, + "log_odds_ratio": -0.0005372308660298586, + "logits/chosen": -1.615384817123413, + "logits/rejected": -1.333021879196167, + "logps/chosen": -0.02244553342461586, + "logps/rejected": -15.454601287841797, + "loss": 0.102, + "nll_loss": 0.09583017230033875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002244553528726101, + "rewards/margins": 1.5432155132293701, + "rewards/rejected": -1.5454599857330322, + "step": 1178 + }, + { + "epoch": 2.304985337243402, + "grad_norm": 0.29803594946861267, + "learning_rate": 1.1709067188519243e-05, + "log_odds_chosen": 18.634634017944336, + "log_odds_ratio": -0.0017147010657936335, + "logits/chosen": -1.6753652095794678, + "logits/rejected": -1.4890875816345215, + "logps/chosen": -0.0422760471701622, + "logps/rejected": -15.22972297668457, + "loss": 0.1019, + "nll_loss": 0.11634556204080582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00422760471701622, + "rewards/margins": 1.518744707107544, + "rewards/rejected": -1.5229722261428833, + "step": 1179 + }, + { + "epoch": 2.3069403714565007, + "grad_norm": 0.29434439539909363, + "learning_rate": 1.16764514024788e-05, + "log_odds_chosen": 21.818782806396484, + "log_odds_ratio": -0.00022868682572152466, + "logits/chosen": -1.56876540184021, + "logits/rejected": -1.5010402202606201, + "logps/chosen": -0.014569568447768688, + "logps/rejected": -17.51073455810547, + "loss": 0.1013, + "nll_loss": 0.08902469277381897, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014569568447768688, + "rewards/margins": 1.7496166229248047, + "rewards/rejected": -1.7510734796524048, + "step": 1180 + }, + { + "epoch": 2.3088954056695994, + "grad_norm": 0.2815559506416321, + "learning_rate": 1.1643835616438355e-05, + "log_odds_chosen": 13.74603271484375, + "log_odds_ratio": -0.0009025701438076794, + "logits/chosen": -1.6189110279083252, + "logits/rejected": -1.4833259582519531, + "logps/chosen": -0.019311366602778435, + "logps/rejected": -9.680876731872559, + "loss": 0.1006, + "nll_loss": 0.1127452701330185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019311367068439722, + "rewards/margins": 0.9661566019058228, + "rewards/rejected": -0.9680876731872559, + "step": 1181 + }, + { + "epoch": 2.310850439882698, + "grad_norm": 0.3025294840335846, + "learning_rate": 1.1611219830397913e-05, + "log_odds_chosen": 13.120148658752441, + "log_odds_ratio": -0.0005840749363414943, + "logits/chosen": -1.6903607845306396, + "logits/rejected": -1.5051552057266235, + "logps/chosen": -0.02101159282028675, + "logps/rejected": -9.140122413635254, + "loss": 0.1022, + "nll_loss": 0.0844653993844986, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021011591888964176, + "rewards/margins": 0.911911129951477, + "rewards/rejected": -0.9140121936798096, + "step": 1182 + }, + { + "epoch": 2.312805474095797, + "grad_norm": 0.3003866374492645, + "learning_rate": 1.157860404435747e-05, + "log_odds_chosen": 9.891081809997559, + "log_odds_ratio": -0.0005442576366476715, + "logits/chosen": -1.5739489793777466, + "logits/rejected": -1.7516565322875977, + "logps/chosen": -0.04119420796632767, + "logps/rejected": -6.397580146789551, + "loss": 0.1027, + "nll_loss": 0.1654490977525711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0041194213554263115, + "rewards/margins": 0.6356385946273804, + "rewards/rejected": -0.6397579908370972, + "step": 1183 + }, + { + "epoch": 2.3147605083088956, + "grad_norm": 0.2940807640552521, + "learning_rate": 1.1545988258317025e-05, + "log_odds_chosen": 18.182445526123047, + "log_odds_ratio": -0.000773474806919694, + "logits/chosen": -1.8380495309829712, + "logits/rejected": -1.5310512781143188, + "logps/chosen": -0.03287485986948013, + "logps/rejected": -14.412230491638184, + "loss": 0.0999, + "nll_loss": 0.09459316730499268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0032874862663447857, + "rewards/margins": 1.4379355907440186, + "rewards/rejected": -1.44122314453125, + "step": 1184 + }, + { + "epoch": 2.3167155425219943, + "grad_norm": 0.2985338866710663, + "learning_rate": 1.1513372472276582e-05, + "log_odds_chosen": 19.614315032958984, + "log_odds_ratio": -0.0006297653308138251, + "logits/chosen": -1.8920350074768066, + "logits/rejected": -1.4932405948638916, + "logps/chosen": -0.028022348880767822, + "logps/rejected": -15.830184936523438, + "loss": 0.1011, + "nll_loss": 0.10336121916770935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002802235074341297, + "rewards/margins": 1.5802161693572998, + "rewards/rejected": -1.5830185413360596, + "step": 1185 + }, + { + "epoch": 2.318670576735093, + "grad_norm": 0.28982144594192505, + "learning_rate": 1.1480756686236138e-05, + "log_odds_chosen": 15.5001220703125, + "log_odds_ratio": -0.0006055820267647505, + "logits/chosen": -1.7918837070465088, + "logits/rejected": -1.5091700553894043, + "logps/chosen": -0.022311262786388397, + "logps/rejected": -11.594979286193848, + "loss": 0.0983, + "nll_loss": 0.09272584319114685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022311261855065823, + "rewards/margins": 1.1572668552398682, + "rewards/rejected": -1.1594979763031006, + "step": 1186 + }, + { + "epoch": 2.3206256109481918, + "grad_norm": 0.30433061718940735, + "learning_rate": 1.1448140900195695e-05, + "log_odds_chosen": 17.039888381958008, + "log_odds_ratio": -0.0010160573292523623, + "logits/chosen": -1.7425951957702637, + "logits/rejected": -1.534334659576416, + "logps/chosen": -0.026401061564683914, + "logps/rejected": -13.29279899597168, + "loss": 0.1031, + "nll_loss": 0.09985601156949997, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002640106249600649, + "rewards/margins": 1.3266396522521973, + "rewards/rejected": -1.329279899597168, + "step": 1187 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 0.3091161847114563, + "learning_rate": 1.1415525114155251e-05, + "log_odds_chosen": 12.219566345214844, + "log_odds_ratio": -0.0014310807455331087, + "logits/chosen": -1.665606141090393, + "logits/rejected": -1.6409363746643066, + "logps/chosen": -0.01832357794046402, + "logps/rejected": -8.041732788085938, + "loss": 0.1041, + "nll_loss": 0.10203951597213745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001832357607781887, + "rewards/margins": 0.8023409843444824, + "rewards/rejected": -0.8041733503341675, + "step": 1188 + }, + { + "epoch": 2.324535679374389, + "grad_norm": 0.3190053701400757, + "learning_rate": 1.1382909328114808e-05, + "log_odds_chosen": 19.215858459472656, + "log_odds_ratio": -0.000609605573117733, + "logits/chosen": -1.7766083478927612, + "logits/rejected": -1.6157336235046387, + "logps/chosen": -0.046471007168293, + "logps/rejected": -15.959264755249023, + "loss": 0.1041, + "nll_loss": 0.09988875687122345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004647101275622845, + "rewards/margins": 1.5912795066833496, + "rewards/rejected": -1.5959265232086182, + "step": 1189 + }, + { + "epoch": 2.326490713587488, + "grad_norm": 0.3095645308494568, + "learning_rate": 1.1350293542074364e-05, + "log_odds_chosen": 18.24332046508789, + "log_odds_ratio": -0.0009069102234207094, + "logits/chosen": -1.703839659690857, + "logits/rejected": -1.6522083282470703, + "logps/chosen": -0.021912163123488426, + "logps/rejected": -14.326957702636719, + "loss": 0.1015, + "nll_loss": 0.07786824554204941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002191216219216585, + "rewards/margins": 1.430504560470581, + "rewards/rejected": -1.4326958656311035, + "step": 1190 + }, + { + "epoch": 2.3284457478005867, + "grad_norm": 0.2875043749809265, + "learning_rate": 1.131767775603392e-05, + "log_odds_chosen": 13.318960189819336, + "log_odds_ratio": -0.001863078330643475, + "logits/chosen": -1.7109254598617554, + "logits/rejected": -1.7462092638015747, + "logps/chosen": -0.02169024385511875, + "logps/rejected": -9.272987365722656, + "loss": 0.1001, + "nll_loss": 0.0900212824344635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002169024432078004, + "rewards/margins": 0.925129771232605, + "rewards/rejected": -0.9272987842559814, + "step": 1191 + }, + { + "epoch": 2.3304007820136854, + "grad_norm": 0.28939053416252136, + "learning_rate": 1.1285061969993477e-05, + "log_odds_chosen": 19.72264862060547, + "log_odds_ratio": -0.000960635778028518, + "logits/chosen": -1.7409484386444092, + "logits/rejected": -1.4306697845458984, + "logps/chosen": -0.016122860834002495, + "logps/rejected": -15.402368545532227, + "loss": 0.0983, + "nll_loss": 0.07497265934944153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016122860834002495, + "rewards/margins": 1.5386245250701904, + "rewards/rejected": -1.5402369499206543, + "step": 1192 + }, + { + "epoch": 2.332355816226784, + "grad_norm": 0.27742981910705566, + "learning_rate": 1.1252446183953034e-05, + "log_odds_chosen": 11.232357025146484, + "log_odds_ratio": -0.0005659288144670427, + "logits/chosen": -1.7384974956512451, + "logits/rejected": -1.6587035655975342, + "logps/chosen": -0.022037208080291748, + "logps/rejected": -7.048503875732422, + "loss": 0.0989, + "nll_loss": 0.09684090316295624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00220372062176466, + "rewards/margins": 0.7026467323303223, + "rewards/rejected": -0.7048503756523132, + "step": 1193 + }, + { + "epoch": 2.334310850439883, + "grad_norm": 0.29632261395454407, + "learning_rate": 1.121983039791259e-05, + "log_odds_chosen": 13.097776412963867, + "log_odds_ratio": -0.0014071785844862461, + "logits/chosen": -1.7255514860153198, + "logits/rejected": -1.7792878150939941, + "logps/chosen": -0.024855948984622955, + "logps/rejected": -9.262106895446777, + "loss": 0.1032, + "nll_loss": 0.112839475274086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024855949450284243, + "rewards/margins": 0.9237250089645386, + "rewards/rejected": -0.9262106418609619, + "step": 1194 + }, + { + "epoch": 2.3362658846529816, + "grad_norm": 0.29255250096321106, + "learning_rate": 1.1187214611872146e-05, + "log_odds_chosen": 17.79824447631836, + "log_odds_ratio": -0.000887008965946734, + "logits/chosen": -1.8256102800369263, + "logits/rejected": -1.4623615741729736, + "logps/chosen": -0.021613560616970062, + "logps/rejected": -13.89827823638916, + "loss": 0.1011, + "nll_loss": 0.08420459926128387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021613561548292637, + "rewards/margins": 1.3876664638519287, + "rewards/rejected": -1.389827847480774, + "step": 1195 + }, + { + "epoch": 2.3382209188660803, + "grad_norm": 0.2875407636165619, + "learning_rate": 1.1154598825831702e-05, + "log_odds_chosen": 15.214786529541016, + "log_odds_ratio": -0.0005938067333772779, + "logits/chosen": -1.7281208038330078, + "logits/rejected": -1.6432485580444336, + "logps/chosen": -0.01914653554558754, + "logps/rejected": -11.21931266784668, + "loss": 0.1003, + "nll_loss": 0.09757018089294434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019146535778418183, + "rewards/margins": 1.1200165748596191, + "rewards/rejected": -1.1219311952590942, + "step": 1196 + }, + { + "epoch": 2.340175953079179, + "grad_norm": 0.29823145270347595, + "learning_rate": 1.112198303979126e-05, + "log_odds_chosen": 18.075223922729492, + "log_odds_ratio": -0.0040017180144786835, + "logits/chosen": -1.6465635299682617, + "logits/rejected": -1.2967617511749268, + "logps/chosen": -0.044490884989500046, + "logps/rejected": -14.378511428833008, + "loss": 0.1008, + "nll_loss": 0.08784623444080353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.004449088592082262, + "rewards/margins": 1.4334019422531128, + "rewards/rejected": -1.4378511905670166, + "step": 1197 + }, + { + "epoch": 2.3421309872922778, + "grad_norm": 0.2902863025665283, + "learning_rate": 1.1089367253750816e-05, + "log_odds_chosen": 16.712583541870117, + "log_odds_ratio": -0.0010389173403382301, + "logits/chosen": -1.7564654350280762, + "logits/rejected": -1.6092474460601807, + "logps/chosen": -0.024089543148875237, + "logps/rejected": -12.83614730834961, + "loss": 0.1002, + "nll_loss": 0.11618964374065399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00240895408205688, + "rewards/margins": 1.281205654144287, + "rewards/rejected": -1.2836147546768188, + "step": 1198 + }, + { + "epoch": 2.3440860215053765, + "grad_norm": 0.2874576151371002, + "learning_rate": 1.1056751467710372e-05, + "log_odds_chosen": 15.802379608154297, + "log_odds_ratio": -0.00014688100782223046, + "logits/chosen": -1.64345383644104, + "logits/rejected": -1.5796186923980713, + "logps/chosen": -0.01723266951739788, + "logps/rejected": -11.580061912536621, + "loss": 0.0973, + "nll_loss": 0.08943931758403778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017232670215889812, + "rewards/margins": 1.156282901763916, + "rewards/rejected": -1.158006191253662, + "step": 1199 + }, + { + "epoch": 2.346041055718475, + "grad_norm": 0.2878759205341339, + "learning_rate": 1.1024135681669928e-05, + "log_odds_chosen": 15.355005264282227, + "log_odds_ratio": -0.000694389920681715, + "logits/chosen": -1.7960747480392456, + "logits/rejected": -1.5691046714782715, + "logps/chosen": -0.023648269474506378, + "logps/rejected": -11.412683486938477, + "loss": 0.098, + "nll_loss": 0.08654734492301941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023648268543183804, + "rewards/margins": 1.1389037370681763, + "rewards/rejected": -1.1412684917449951, + "step": 1200 + }, + { + "epoch": 2.347996089931574, + "grad_norm": 0.30056414008140564, + "learning_rate": 1.0991519895629484e-05, + "log_odds_chosen": 17.612918853759766, + "log_odds_ratio": -0.000943038088735193, + "logits/chosen": -1.7782025337219238, + "logits/rejected": -1.696010947227478, + "logps/chosen": -0.02554919570684433, + "logps/rejected": -13.868675231933594, + "loss": 0.1003, + "nll_loss": 0.09276920557022095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002554919570684433, + "rewards/margins": 1.3843128681182861, + "rewards/rejected": -1.3868677616119385, + "step": 1201 + }, + { + "epoch": 2.3499511241446727, + "grad_norm": 0.29431071877479553, + "learning_rate": 1.0958904109589042e-05, + "log_odds_chosen": 13.587051391601562, + "log_odds_ratio": -0.0012145789805799723, + "logits/chosen": -1.552847146987915, + "logits/rejected": -1.2723027467727661, + "logps/chosen": -0.017691439017653465, + "logps/rejected": -9.306119918823242, + "loss": 0.1007, + "nll_loss": 0.15059003233909607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017691439716145396, + "rewards/margins": 0.9288429021835327, + "rewards/rejected": -0.9306120276451111, + "step": 1202 + }, + { + "epoch": 2.3519061583577714, + "grad_norm": 0.29973798990249634, + "learning_rate": 1.0926288323548598e-05, + "log_odds_chosen": 17.542097091674805, + "log_odds_ratio": -0.0004478638875298202, + "logits/chosen": -1.7111179828643799, + "logits/rejected": -1.3868046998977661, + "logps/chosen": -0.018320303410291672, + "logps/rejected": -13.38705825805664, + "loss": 0.1045, + "nll_loss": 0.11894881725311279, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018320302478969097, + "rewards/margins": 1.3368737697601318, + "rewards/rejected": -1.3387058973312378, + "step": 1203 + }, + { + "epoch": 2.35386119257087, + "grad_norm": 0.2718693017959595, + "learning_rate": 1.0893672537508154e-05, + "log_odds_chosen": 10.718575477600098, + "log_odds_ratio": -0.002219983609393239, + "logits/chosen": -1.6893633604049683, + "logits/rejected": -1.6701087951660156, + "logps/chosen": -0.02491292916238308, + "logps/rejected": -6.696093559265137, + "loss": 0.0965, + "nll_loss": 0.09439520537853241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024912930093705654, + "rewards/margins": 0.6671180725097656, + "rewards/rejected": -0.6696093678474426, + "step": 1204 + }, + { + "epoch": 2.355816226783969, + "grad_norm": 0.2819632291793823, + "learning_rate": 1.086105675146771e-05, + "log_odds_chosen": 18.17858123779297, + "log_odds_ratio": -0.002157696755602956, + "logits/chosen": -1.8480546474456787, + "logits/rejected": -1.4867581129074097, + "logps/chosen": -0.027336157858371735, + "logps/rejected": -14.55450439453125, + "loss": 0.0981, + "nll_loss": 0.10160303860902786, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002733615692704916, + "rewards/margins": 1.4527168273925781, + "rewards/rejected": -1.455450415611267, + "step": 1205 + }, + { + "epoch": 2.3577712609970676, + "grad_norm": 0.2913580536842346, + "learning_rate": 1.0828440965427266e-05, + "log_odds_chosen": 14.677496910095215, + "log_odds_ratio": -0.0007688514888286591, + "logits/chosen": -1.648322343826294, + "logits/rejected": -1.6555049419403076, + "logps/chosen": -0.01728292554616928, + "logps/rejected": -10.495370864868164, + "loss": 0.1001, + "nll_loss": 0.07499287277460098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00172829267103225, + "rewards/margins": 1.0478087663650513, + "rewards/rejected": -1.0495370626449585, + "step": 1206 + }, + { + "epoch": 2.3597262952101663, + "grad_norm": 0.28286370635032654, + "learning_rate": 1.0795825179386824e-05, + "log_odds_chosen": 11.440203666687012, + "log_odds_ratio": -0.001824699342250824, + "logits/chosen": -1.644559383392334, + "logits/rejected": -1.6786460876464844, + "logps/chosen": -0.028952687978744507, + "logps/rejected": -7.549469947814941, + "loss": 0.0992, + "nll_loss": 0.11186206340789795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028952686116099358, + "rewards/margins": 0.7520517110824585, + "rewards/rejected": -0.7549470067024231, + "step": 1207 + }, + { + "epoch": 2.361681329423265, + "grad_norm": 0.2848741114139557, + "learning_rate": 1.076320939334638e-05, + "log_odds_chosen": 15.20301342010498, + "log_odds_ratio": -0.0012449836358428001, + "logits/chosen": -1.613490343093872, + "logits/rejected": -1.5964760780334473, + "logps/chosen": -0.016178766265511513, + "logps/rejected": -10.984964370727539, + "loss": 0.0998, + "nll_loss": 0.09612840414047241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016178765799850225, + "rewards/margins": 1.0968786478042603, + "rewards/rejected": -1.0984965562820435, + "step": 1208 + }, + { + "epoch": 2.3636363636363638, + "grad_norm": 0.2869538962841034, + "learning_rate": 1.0730593607305936e-05, + "log_odds_chosen": 13.82695484161377, + "log_odds_ratio": -0.0009157669264823198, + "logits/chosen": -1.612865924835205, + "logits/rejected": -1.6379823684692383, + "logps/chosen": -0.020125171169638634, + "logps/rejected": -9.758041381835938, + "loss": 0.099, + "nll_loss": 0.08650416135787964, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020125170703977346, + "rewards/margins": 0.9737915992736816, + "rewards/rejected": -0.9758040904998779, + "step": 1209 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 0.2904720902442932, + "learning_rate": 1.0697977821265492e-05, + "log_odds_chosen": 15.186901092529297, + "log_odds_ratio": -0.0007410614052787423, + "logits/chosen": -1.6738466024398804, + "logits/rejected": -1.3910810947418213, + "logps/chosen": -0.013683807104825974, + "logps/rejected": -10.827879905700684, + "loss": 0.1009, + "nll_loss": 0.10132420063018799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013683806173503399, + "rewards/margins": 1.0814197063446045, + "rewards/rejected": -1.0827879905700684, + "step": 1210 + }, + { + "epoch": 2.367546432062561, + "grad_norm": 0.2832164764404297, + "learning_rate": 1.0665362035225048e-05, + "log_odds_chosen": 19.179607391357422, + "log_odds_ratio": -0.00021775042114313692, + "logits/chosen": -1.545825719833374, + "logits/rejected": -1.5315971374511719, + "logps/chosen": -0.028954066336154938, + "logps/rejected": -15.384247779846191, + "loss": 0.0963, + "nll_loss": 0.13336580991744995, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0028954066801816225, + "rewards/margins": 1.535529375076294, + "rewards/rejected": -1.5384249687194824, + "step": 1211 + }, + { + "epoch": 2.36950146627566, + "grad_norm": 0.2937111258506775, + "learning_rate": 1.0632746249184606e-05, + "log_odds_chosen": 18.016706466674805, + "log_odds_ratio": -0.0003026944468729198, + "logits/chosen": -1.7709141969680786, + "logits/rejected": -1.558354377746582, + "logps/chosen": -0.018003560602664948, + "logps/rejected": -13.972262382507324, + "loss": 0.1004, + "nll_loss": 0.07848474383354187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018003559671342373, + "rewards/margins": 1.3954259157180786, + "rewards/rejected": -1.397226333618164, + "step": 1212 + }, + { + "epoch": 2.3714565004887587, + "grad_norm": 0.28161385655403137, + "learning_rate": 1.0600130463144162e-05, + "log_odds_chosen": 15.86215591430664, + "log_odds_ratio": -0.0009137580636888742, + "logits/chosen": -1.8324692249298096, + "logits/rejected": -1.4139703512191772, + "logps/chosen": -0.024320511147379875, + "logps/rejected": -11.99058723449707, + "loss": 0.0986, + "nll_loss": 0.10853007435798645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024320511147379875, + "rewards/margins": 1.1966266632080078, + "rewards/rejected": -1.1990587711334229, + "step": 1213 + }, + { + "epoch": 2.3734115347018574, + "grad_norm": 0.2968609035015106, + "learning_rate": 1.0567514677103718e-05, + "log_odds_chosen": 16.006229400634766, + "log_odds_ratio": -0.0005108686164021492, + "logits/chosen": -1.6442174911499023, + "logits/rejected": -1.4693918228149414, + "logps/chosen": -0.020375728607177734, + "logps/rejected": -12.002371788024902, + "loss": 0.1009, + "nll_loss": 0.09080222249031067, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020375726744532585, + "rewards/margins": 1.1981996297836304, + "rewards/rejected": -1.2002371549606323, + "step": 1214 + }, + { + "epoch": 2.375366568914956, + "grad_norm": 0.2915976047515869, + "learning_rate": 1.0534898891063274e-05, + "log_odds_chosen": 16.56793785095215, + "log_odds_ratio": -0.001129755051806569, + "logits/chosen": -1.6784634590148926, + "logits/rejected": -1.6042143106460571, + "logps/chosen": -0.01991603896021843, + "logps/rejected": -12.415844917297363, + "loss": 0.1002, + "nll_loss": 0.12117777019739151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019916039891541004, + "rewards/margins": 1.2395929098129272, + "rewards/rejected": -1.2415845394134521, + "step": 1215 + }, + { + "epoch": 2.377321603128055, + "grad_norm": 0.28369709849357605, + "learning_rate": 1.050228310502283e-05, + "log_odds_chosen": 17.063039779663086, + "log_odds_ratio": -0.0004621987172868103, + "logits/chosen": -1.6111129522323608, + "logits/rejected": -1.6641936302185059, + "logps/chosen": -0.02063385397195816, + "logps/rejected": -13.1271390914917, + "loss": 0.0981, + "nll_loss": 0.09046085178852081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020633856765925884, + "rewards/margins": 1.3106505870819092, + "rewards/rejected": -1.3127139806747437, + "step": 1216 + }, + { + "epoch": 2.3792766373411536, + "grad_norm": 0.27433934807777405, + "learning_rate": 1.0469667318982388e-05, + "log_odds_chosen": 16.789291381835938, + "log_odds_ratio": -0.0005347270634956658, + "logits/chosen": -1.6858878135681152, + "logits/rejected": -1.6001124382019043, + "logps/chosen": -0.013872748240828514, + "logps/rejected": -12.518061637878418, + "loss": 0.096, + "nll_loss": 0.08081893622875214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013872748240828514, + "rewards/margins": 1.2504189014434814, + "rewards/rejected": -1.2518062591552734, + "step": 1217 + }, + { + "epoch": 2.3812316715542523, + "grad_norm": 0.2781893312931061, + "learning_rate": 1.0437051532941944e-05, + "log_odds_chosen": 12.014448165893555, + "log_odds_ratio": -0.0012539734598249197, + "logits/chosen": -1.7401597499847412, + "logits/rejected": -1.4218521118164062, + "logps/chosen": -0.01746673882007599, + "logps/rejected": -7.919351577758789, + "loss": 0.0973, + "nll_loss": 0.08748139441013336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017466738354414701, + "rewards/margins": 0.7901885509490967, + "rewards/rejected": -0.7919352054595947, + "step": 1218 + }, + { + "epoch": 2.383186705767351, + "grad_norm": 0.29919642210006714, + "learning_rate": 1.04044357469015e-05, + "log_odds_chosen": 21.751361846923828, + "log_odds_ratio": -0.00036259787157177925, + "logits/chosen": -1.8047144412994385, + "logits/rejected": -1.4667177200317383, + "logps/chosen": -0.028409570455551147, + "logps/rejected": -18.009841918945312, + "loss": 0.1004, + "nll_loss": 0.09910668432712555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002840957138687372, + "rewards/margins": 1.7981431484222412, + "rewards/rejected": -1.8009843826293945, + "step": 1219 + }, + { + "epoch": 2.3851417399804498, + "grad_norm": 0.28498154878616333, + "learning_rate": 1.0371819960861056e-05, + "log_odds_chosen": 16.319093704223633, + "log_odds_ratio": -0.001194762997329235, + "logits/chosen": -1.76986825466156, + "logits/rejected": -1.7323994636535645, + "logps/chosen": -0.0247814878821373, + "logps/rejected": -12.442541122436523, + "loss": 0.0979, + "nll_loss": 0.09671172499656677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00247814878821373, + "rewards/margins": 1.2417759895324707, + "rewards/rejected": -1.2442541122436523, + "step": 1220 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.2786211371421814, + "learning_rate": 1.0339204174820612e-05, + "log_odds_chosen": 15.66905403137207, + "log_odds_ratio": -0.0009868318447843194, + "logits/chosen": -1.606317162513733, + "logits/rejected": -1.3767199516296387, + "logps/chosen": -0.024720290675759315, + "logps/rejected": -11.775184631347656, + "loss": 0.0971, + "nll_loss": 0.10629577934741974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024720290675759315, + "rewards/margins": 1.1750463247299194, + "rewards/rejected": -1.177518367767334, + "step": 1221 + }, + { + "epoch": 2.389051808406647, + "grad_norm": 0.27797892689704895, + "learning_rate": 1.030658838878017e-05, + "log_odds_chosen": 12.358529090881348, + "log_odds_ratio": -0.0009589589899405837, + "logits/chosen": -1.6777191162109375, + "logits/rejected": -1.5119141340255737, + "logps/chosen": -0.021553145721554756, + "logps/rejected": -8.42068099975586, + "loss": 0.0956, + "nll_loss": 0.0970316082239151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002155314665287733, + "rewards/margins": 0.8399126529693604, + "rewards/rejected": -0.842068076133728, + "step": 1222 + }, + { + "epoch": 2.391006842619746, + "grad_norm": 0.3178955912590027, + "learning_rate": 1.0273972602739726e-05, + "log_odds_chosen": 13.282560348510742, + "log_odds_ratio": -0.000952958595007658, + "logits/chosen": -1.5511280298233032, + "logits/rejected": -1.7054617404937744, + "logps/chosen": -0.029056871309876442, + "logps/rejected": -9.429645538330078, + "loss": 0.1018, + "nll_loss": 0.0933583527803421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002905687317252159, + "rewards/margins": 0.9400588274002075, + "rewards/rejected": -0.9429645538330078, + "step": 1223 + }, + { + "epoch": 2.3929618768328447, + "grad_norm": 0.2737756073474884, + "learning_rate": 1.0241356816699282e-05, + "log_odds_chosen": 15.605592727661133, + "log_odds_ratio": -0.00116172234993428, + "logits/chosen": -1.5730656385421753, + "logits/rejected": -1.6705658435821533, + "logps/chosen": -0.01851889304816723, + "logps/rejected": -11.526750564575195, + "loss": 0.0958, + "nll_loss": 0.08449254184961319, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018518893048167229, + "rewards/margins": 1.1508232355117798, + "rewards/rejected": -1.1526751518249512, + "step": 1224 + }, + { + "epoch": 2.3949169110459434, + "grad_norm": 0.2757607102394104, + "learning_rate": 1.0208741030658838e-05, + "log_odds_chosen": 19.7531795501709, + "log_odds_ratio": -0.0005468657473102212, + "logits/chosen": -1.70089852809906, + "logits/rejected": -1.522226095199585, + "logps/chosen": -0.018297815695405006, + "logps/rejected": -15.653696060180664, + "loss": 0.0959, + "nll_loss": 0.07724522054195404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018297816859558225, + "rewards/margins": 1.5635398626327515, + "rewards/rejected": -1.5653698444366455, + "step": 1225 + }, + { + "epoch": 2.396871945259042, + "grad_norm": 0.29207539558410645, + "learning_rate": 1.0176125244618395e-05, + "log_odds_chosen": 17.5367488861084, + "log_odds_ratio": -0.0012833974324166775, + "logits/chosen": -1.6794614791870117, + "logits/rejected": -1.2743046283721924, + "logps/chosen": -0.02346237376332283, + "logps/rejected": -13.593088150024414, + "loss": 0.0995, + "nll_loss": 0.12384796887636185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0023462374228984118, + "rewards/margins": 1.35696280002594, + "rewards/rejected": -1.3593090772628784, + "step": 1226 + }, + { + "epoch": 2.398826979472141, + "grad_norm": 0.2812841534614563, + "learning_rate": 1.0143509458577952e-05, + "log_odds_chosen": 16.11693572998047, + "log_odds_ratio": -0.0002550777280703187, + "logits/chosen": -1.4138200283050537, + "logits/rejected": -1.380128264427185, + "logps/chosen": -0.015586739405989647, + "logps/rejected": -11.874334335327148, + "loss": 0.0993, + "nll_loss": 0.09454625844955444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015586740337312222, + "rewards/margins": 1.1858748197555542, + "rewards/rejected": -1.1874334812164307, + "step": 1227 + }, + { + "epoch": 2.4007820136852396, + "grad_norm": 0.27531906962394714, + "learning_rate": 1.0110893672537508e-05, + "log_odds_chosen": 14.842418670654297, + "log_odds_ratio": -0.0018105607014149427, + "logits/chosen": -1.7221777439117432, + "logits/rejected": -1.7691240310668945, + "logps/chosen": -0.027332410216331482, + "logps/rejected": -10.87647819519043, + "loss": 0.0969, + "nll_loss": 0.09060025215148926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027332408353686333, + "rewards/margins": 1.0849145650863647, + "rewards/rejected": -1.087647795677185, + "step": 1228 + }, + { + "epoch": 2.4027370478983383, + "grad_norm": 0.2997584939002991, + "learning_rate": 1.0078277886497065e-05, + "log_odds_chosen": 11.309026718139648, + "log_odds_ratio": -0.0011679804883897305, + "logits/chosen": -1.7126739025115967, + "logits/rejected": -1.6962980031967163, + "logps/chosen": -0.018916096538305283, + "logps/rejected": -7.200608253479004, + "loss": 0.0975, + "nll_loss": 0.1053183525800705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018916097469627857, + "rewards/margins": 0.7181692123413086, + "rewards/rejected": -0.7200608253479004, + "step": 1229 + }, + { + "epoch": 2.404692082111437, + "grad_norm": 0.27213141322135925, + "learning_rate": 1.004566210045662e-05, + "log_odds_chosen": 16.71611785888672, + "log_odds_ratio": -0.0007851848495192826, + "logits/chosen": -1.7670707702636719, + "logits/rejected": -1.495254397392273, + "logps/chosen": -0.015570012852549553, + "logps/rejected": -12.399930953979492, + "loss": 0.0959, + "nll_loss": 0.09634080529212952, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015570013783872128, + "rewards/margins": 1.2384361028671265, + "rewards/rejected": -1.2399929761886597, + "step": 1230 + }, + { + "epoch": 2.4066471163245358, + "grad_norm": 0.29838693141937256, + "learning_rate": 1.0013046314416177e-05, + "log_odds_chosen": 16.503414154052734, + "log_odds_ratio": -0.000502765120472759, + "logits/chosen": -1.8295879364013672, + "logits/rejected": -1.7501182556152344, + "logps/chosen": -0.0229178573936224, + "logps/rejected": -12.587675094604492, + "loss": 0.0999, + "nll_loss": 0.11680856347084045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022917857859283686, + "rewards/margins": 1.256475806236267, + "rewards/rejected": -1.2587676048278809, + "step": 1231 + }, + { + "epoch": 2.4086021505376345, + "grad_norm": 0.2974795997142792, + "learning_rate": 9.980430528375734e-06, + "log_odds_chosen": 14.819419860839844, + "log_odds_ratio": -0.0003618305781856179, + "logits/chosen": -1.5685454607009888, + "logits/rejected": -1.423979640007019, + "logps/chosen": -0.011472604237496853, + "logps/rejected": -10.14090347290039, + "loss": 0.0989, + "nll_loss": 0.1908445805311203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011472604237496853, + "rewards/margins": 1.0129430294036865, + "rewards/rejected": -1.0140902996063232, + "step": 1232 + }, + { + "epoch": 2.410557184750733, + "grad_norm": 0.2732795178890228, + "learning_rate": 9.94781474233529e-06, + "log_odds_chosen": 19.92740249633789, + "log_odds_ratio": -3.654409374576062e-05, + "logits/chosen": -1.61177659034729, + "logits/rejected": -1.5494260787963867, + "logps/chosen": -0.017584023997187614, + "logps/rejected": -15.778470993041992, + "loss": 0.0936, + "nll_loss": 0.07112686336040497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017584024462848902, + "rewards/margins": 1.576088786125183, + "rewards/rejected": -1.5778471231460571, + "step": 1233 + }, + { + "epoch": 2.412512218963832, + "grad_norm": 0.2905467450618744, + "learning_rate": 9.915198956294847e-06, + "log_odds_chosen": 20.73139190673828, + "log_odds_ratio": -0.00033456156961619854, + "logits/chosen": -1.6188158988952637, + "logits/rejected": -1.4052584171295166, + "logps/chosen": -0.01337366085499525, + "logps/rejected": -16.351608276367188, + "loss": 0.0982, + "nll_loss": 0.11154802143573761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013373661786317825, + "rewards/margins": 1.6338233947753906, + "rewards/rejected": -1.6351609230041504, + "step": 1234 + }, + { + "epoch": 2.4144672531769307, + "grad_norm": 0.28609412908554077, + "learning_rate": 9.882583170254403e-06, + "log_odds_chosen": 17.053569793701172, + "log_odds_ratio": -0.0008361853542737663, + "logits/chosen": -1.3977982997894287, + "logits/rejected": -1.4292882680892944, + "logps/chosen": -0.01736394129693508, + "logps/rejected": -12.887306213378906, + "loss": 0.0971, + "nll_loss": 0.10121086239814758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017363941296935081, + "rewards/margins": 1.286994218826294, + "rewards/rejected": -1.2887306213378906, + "step": 1235 + }, + { + "epoch": 2.4164222873900294, + "grad_norm": 0.2841896116733551, + "learning_rate": 9.849967384213959e-06, + "log_odds_chosen": 10.93733024597168, + "log_odds_ratio": -0.0014327600365504622, + "logits/chosen": -1.8843519687652588, + "logits/rejected": -1.7000758647918701, + "logps/chosen": -0.014935913495719433, + "logps/rejected": -6.702945709228516, + "loss": 0.0969, + "nll_loss": 0.0840412899851799, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014935913495719433, + "rewards/margins": 0.668800950050354, + "rewards/rejected": -0.6702945232391357, + "step": 1236 + }, + { + "epoch": 2.418377321603128, + "grad_norm": 0.275334894657135, + "learning_rate": 9.817351598173517e-06, + "log_odds_chosen": 15.53630256652832, + "log_odds_ratio": -0.0010492854053154588, + "logits/chosen": -1.6707661151885986, + "logits/rejected": -1.5723297595977783, + "logps/chosen": -0.018999766558408737, + "logps/rejected": -11.45766830444336, + "loss": 0.0987, + "nll_loss": 0.09400048106908798, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018999767489731312, + "rewards/margins": 1.143866777420044, + "rewards/rejected": -1.1457667350769043, + "step": 1237 + }, + { + "epoch": 2.420332355816227, + "grad_norm": 0.2700774371623993, + "learning_rate": 9.784735812133073e-06, + "log_odds_chosen": 16.369638442993164, + "log_odds_ratio": -0.0005226012435741723, + "logits/chosen": -1.5732967853546143, + "logits/rejected": -1.3996493816375732, + "logps/chosen": -0.0180908665060997, + "logps/rejected": -12.112849235534668, + "loss": 0.0954, + "nll_loss": 0.10082529485225677, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00180908665060997, + "rewards/margins": 1.2094758749008179, + "rewards/rejected": -1.211284875869751, + "step": 1238 + }, + { + "epoch": 2.4222873900293256, + "grad_norm": 0.27263253927230835, + "learning_rate": 9.752120026092629e-06, + "log_odds_chosen": 15.773513793945312, + "log_odds_ratio": -0.001021233620122075, + "logits/chosen": -1.6876003742218018, + "logits/rejected": -1.6641387939453125, + "logps/chosen": -0.01903735101222992, + "logps/rejected": -11.738780975341797, + "loss": 0.0952, + "nll_loss": 0.0908300131559372, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001903735101222992, + "rewards/margins": 1.1719744205474854, + "rewards/rejected": -1.1738781929016113, + "step": 1239 + }, + { + "epoch": 2.4242424242424243, + "grad_norm": 0.29773402214050293, + "learning_rate": 9.719504240052185e-06, + "log_odds_chosen": 18.188438415527344, + "log_odds_ratio": -0.0002403208491159603, + "logits/chosen": -1.808403730392456, + "logits/rejected": -1.5578361749649048, + "logps/chosen": -0.025040410459041595, + "logps/rejected": -14.018678665161133, + "loss": 0.0979, + "nll_loss": 0.12854459881782532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002504040952771902, + "rewards/margins": 1.3993639945983887, + "rewards/rejected": -1.4018679857254028, + "step": 1240 + }, + { + "epoch": 2.426197458455523, + "grad_norm": 0.2745211720466614, + "learning_rate": 9.686888454011741e-06, + "log_odds_chosen": 18.549224853515625, + "log_odds_ratio": -0.00011833843745989725, + "logits/chosen": -1.5989506244659424, + "logits/rejected": -1.6449137926101685, + "logps/chosen": -0.019455697387456894, + "logps/rejected": -14.537464141845703, + "loss": 0.0952, + "nll_loss": 0.0937592163681984, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001945569645613432, + "rewards/margins": 1.45180082321167, + "rewards/rejected": -1.4537464380264282, + "step": 1241 + }, + { + "epoch": 2.4281524926686218, + "grad_norm": 0.26787787675857544, + "learning_rate": 9.654272667971299e-06, + "log_odds_chosen": 20.514488220214844, + "log_odds_ratio": -0.0004992170725017786, + "logits/chosen": -1.7307837009429932, + "logits/rejected": -1.4072054624557495, + "logps/chosen": -0.024895604699850082, + "logps/rejected": -16.62730598449707, + "loss": 0.0936, + "nll_loss": 0.10160660743713379, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002489560516551137, + "rewards/margins": 1.6602410078048706, + "rewards/rejected": -1.6627305746078491, + "step": 1242 + }, + { + "epoch": 2.4301075268817205, + "grad_norm": 0.27694934606552124, + "learning_rate": 9.621656881930855e-06, + "log_odds_chosen": 19.937623977661133, + "log_odds_ratio": -0.0006322496337816119, + "logits/chosen": -1.7493236064910889, + "logits/rejected": -1.6127536296844482, + "logps/chosen": -0.027632901445031166, + "logps/rejected": -16.103113174438477, + "loss": 0.0953, + "nll_loss": 0.1116277277469635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0027632901910692453, + "rewards/margins": 1.6075479984283447, + "rewards/rejected": -1.6103113889694214, + "step": 1243 + }, + { + "epoch": 2.432062561094819, + "grad_norm": 0.27457863092422485, + "learning_rate": 9.589041095890411e-06, + "log_odds_chosen": 18.17763900756836, + "log_odds_ratio": -0.001377025037072599, + "logits/chosen": -1.7711379528045654, + "logits/rejected": -1.4687120914459229, + "logps/chosen": -0.015651408582925797, + "logps/rejected": -13.911103248596191, + "loss": 0.0944, + "nll_loss": 0.11395489424467087, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015651409048587084, + "rewards/margins": 1.389545202255249, + "rewards/rejected": -1.3911103010177612, + "step": 1244 + }, + { + "epoch": 2.434017595307918, + "grad_norm": 0.2854832410812378, + "learning_rate": 9.556425309849967e-06, + "log_odds_chosen": 14.066255569458008, + "log_odds_ratio": -0.0008927732123993337, + "logits/chosen": -1.6493245363235474, + "logits/rejected": -1.5251697301864624, + "logps/chosen": -0.01666242629289627, + "logps/rejected": -9.874077796936035, + "loss": 0.0963, + "nll_loss": 0.09994536638259888, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016662424895912409, + "rewards/margins": 0.9857416152954102, + "rewards/rejected": -0.9874078631401062, + "step": 1245 + }, + { + "epoch": 2.4359726295210167, + "grad_norm": 0.277835875749588, + "learning_rate": 9.523809523809523e-06, + "log_odds_chosen": 17.69641876220703, + "log_odds_ratio": -0.0006502158939838409, + "logits/chosen": -1.7353942394256592, + "logits/rejected": -1.5269163846969604, + "logps/chosen": -0.024783628061413765, + "logps/rejected": -13.835673332214355, + "loss": 0.0959, + "nll_loss": 0.09445428103208542, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024783629924058914, + "rewards/margins": 1.3810889720916748, + "rewards/rejected": -1.3835673332214355, + "step": 1246 + }, + { + "epoch": 2.4379276637341154, + "grad_norm": 0.2691543698310852, + "learning_rate": 9.49119373776908e-06, + "log_odds_chosen": 12.759645462036133, + "log_odds_ratio": -0.00032998123788274825, + "logits/chosen": -1.7845423221588135, + "logits/rejected": -1.6374855041503906, + "logps/chosen": -0.012318762019276619, + "logps/rejected": -8.27766227722168, + "loss": 0.0938, + "nll_loss": 0.08926571905612946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00123187608551234, + "rewards/margins": 0.8265343904495239, + "rewards/rejected": -0.8277662396430969, + "step": 1247 + }, + { + "epoch": 2.439882697947214, + "grad_norm": 0.27644941210746765, + "learning_rate": 9.458577951728637e-06, + "log_odds_chosen": 14.028772354125977, + "log_odds_ratio": -0.000664411170873791, + "logits/chosen": -1.7376813888549805, + "logits/rejected": -1.6080303192138672, + "logps/chosen": -0.01374067459255457, + "logps/rejected": -9.661614418029785, + "loss": 0.0964, + "nll_loss": 0.07669417560100555, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013740675058215857, + "rewards/margins": 0.964787483215332, + "rewards/rejected": -0.9661614894866943, + "step": 1248 + }, + { + "epoch": 2.441837732160313, + "grad_norm": 0.2695925831794739, + "learning_rate": 9.425962165688193e-06, + "log_odds_chosen": 16.98004722595215, + "log_odds_ratio": -0.0005201207823120058, + "logits/chosen": -1.643376350402832, + "logits/rejected": -1.329243540763855, + "logps/chosen": -0.019546454772353172, + "logps/rejected": -12.9461030960083, + "loss": 0.0941, + "nll_loss": 0.08971033990383148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019546456169337034, + "rewards/margins": 1.2926557064056396, + "rewards/rejected": -1.2946103811264038, + "step": 1249 + }, + { + "epoch": 2.4437927663734116, + "grad_norm": 0.27579236030578613, + "learning_rate": 9.393346379647749e-06, + "log_odds_chosen": 16.758567810058594, + "log_odds_ratio": -0.0023232107050716877, + "logits/chosen": -1.704354166984558, + "logits/rejected": -1.643141508102417, + "logps/chosen": -0.03546595573425293, + "logps/rejected": -13.02908706665039, + "loss": 0.0949, + "nll_loss": 0.13738900423049927, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003546595573425293, + "rewards/margins": 1.2993621826171875, + "rewards/rejected": -1.3029088973999023, + "step": 1250 + }, + { + "epoch": 2.4457478005865103, + "grad_norm": 0.2647280693054199, + "learning_rate": 9.360730593607305e-06, + "log_odds_chosen": 12.5621919631958, + "log_odds_ratio": -0.00040237465873360634, + "logits/chosen": -1.8237224817276, + "logits/rejected": -1.5841515064239502, + "logps/chosen": -0.02059607394039631, + "logps/rejected": -8.478754043579102, + "loss": 0.0919, + "nll_loss": 0.08916322886943817, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020596073009073734, + "rewards/margins": 0.845815896987915, + "rewards/rejected": -0.8478755354881287, + "step": 1251 + }, + { + "epoch": 2.447702834799609, + "grad_norm": 0.2683350741863251, + "learning_rate": 9.328114807566863e-06, + "log_odds_chosen": 16.21337127685547, + "log_odds_ratio": -0.0005744653171859682, + "logits/chosen": -1.7329068183898926, + "logits/rejected": -1.5890699625015259, + "logps/chosen": -0.018677225336432457, + "logps/rejected": -12.193243026733398, + "loss": 0.0944, + "nll_loss": 0.08957495540380478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001867722487077117, + "rewards/margins": 1.217456579208374, + "rewards/rejected": -1.2193243503570557, + "step": 1252 + }, + { + "epoch": 2.4496578690127078, + "grad_norm": 0.26325178146362305, + "learning_rate": 9.295499021526419e-06, + "log_odds_chosen": 17.33675193786621, + "log_odds_ratio": -0.0005265962099656463, + "logits/chosen": -1.6493642330169678, + "logits/rejected": -1.410177230834961, + "logps/chosen": -0.027579402551054955, + "logps/rejected": -13.597867965698242, + "loss": 0.093, + "nll_loss": 0.12231851369142532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002757939975708723, + "rewards/margins": 1.3570287227630615, + "rewards/rejected": -1.3597867488861084, + "step": 1253 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 0.2770284414291382, + "learning_rate": 9.262883235485975e-06, + "log_odds_chosen": 15.386545181274414, + "log_odds_ratio": -0.0003682424430735409, + "logits/chosen": -1.5961501598358154, + "logits/rejected": -1.5874228477478027, + "logps/chosen": -0.020859217271208763, + "logps/rejected": -11.341324806213379, + "loss": 0.0945, + "nll_loss": 0.07652567327022552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020859218202531338, + "rewards/margins": 1.1320465803146362, + "rewards/rejected": -1.1341325044631958, + "step": 1254 + }, + { + "epoch": 2.4535679374389052, + "grad_norm": 0.29106414318084717, + "learning_rate": 9.230267449445531e-06, + "log_odds_chosen": 18.82553482055664, + "log_odds_ratio": -0.0002073765208479017, + "logits/chosen": -1.7108910083770752, + "logits/rejected": -1.6186524629592896, + "logps/chosen": -0.01836363784968853, + "logps/rejected": -14.694313049316406, + "loss": 0.099, + "nll_loss": 0.09434497356414795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018363639246672392, + "rewards/margins": 1.467595100402832, + "rewards/rejected": -1.4694314002990723, + "step": 1255 + }, + { + "epoch": 2.455522971652004, + "grad_norm": 0.2670532763004303, + "learning_rate": 9.197651663405087e-06, + "log_odds_chosen": 19.154699325561523, + "log_odds_ratio": -7.968130375957116e-05, + "logits/chosen": -1.4444985389709473, + "logits/rejected": -1.4625827074050903, + "logps/chosen": -0.02852361835539341, + "logps/rejected": -15.36965274810791, + "loss": 0.094, + "nll_loss": 0.100089892745018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002852361649274826, + "rewards/margins": 1.5341129302978516, + "rewards/rejected": -1.5369653701782227, + "step": 1256 + }, + { + "epoch": 2.4574780058651027, + "grad_norm": 0.2727811336517334, + "learning_rate": 9.165035877364645e-06, + "log_odds_chosen": 15.620579719543457, + "log_odds_ratio": -0.0007482037181034684, + "logits/chosen": -1.7655843496322632, + "logits/rejected": -1.6200366020202637, + "logps/chosen": -0.016514580696821213, + "logps/rejected": -11.482083320617676, + "loss": 0.0948, + "nll_loss": 0.0779709666967392, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016514579765498638, + "rewards/margins": 1.1465569734573364, + "rewards/rejected": -1.1482083797454834, + "step": 1257 + }, + { + "epoch": 2.4594330400782014, + "grad_norm": 0.2962504029273987, + "learning_rate": 9.132420091324201e-06, + "log_odds_chosen": 13.71031379699707, + "log_odds_ratio": -0.0006260788068175316, + "logits/chosen": -1.7608743906021118, + "logits/rejected": -1.6880930662155151, + "logps/chosen": -0.017510337755084038, + "logps/rejected": -9.614412307739258, + "loss": 0.0974, + "nll_loss": 0.07862059772014618, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017510335892438889, + "rewards/margins": 0.9596902132034302, + "rewards/rejected": -0.9614412784576416, + "step": 1258 + }, + { + "epoch": 2.4613880742913, + "grad_norm": 0.25987160205841064, + "learning_rate": 9.099804305283757e-06, + "log_odds_chosen": 8.914405822753906, + "log_odds_ratio": -0.0013890890404582024, + "logits/chosen": -1.8180320262908936, + "logits/rejected": -1.7258504629135132, + "logps/chosen": -0.019835827872157097, + "logps/rejected": -4.828947067260742, + "loss": 0.091, + "nll_loss": 0.11043907701969147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001983582740649581, + "rewards/margins": 0.4809111952781677, + "rewards/rejected": -0.48289474844932556, + "step": 1259 + }, + { + "epoch": 2.463343108504399, + "grad_norm": 0.2653640806674957, + "learning_rate": 9.067188519243313e-06, + "log_odds_chosen": 13.111917495727539, + "log_odds_ratio": -0.0005348058184608817, + "logits/chosen": -1.8467936515808105, + "logits/rejected": -1.646613597869873, + "logps/chosen": -0.01986905001103878, + "logps/rejected": -9.065193176269531, + "loss": 0.0945, + "nll_loss": 0.08471627533435822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001986905001103878, + "rewards/margins": 0.9045323133468628, + "rewards/rejected": -0.9065192341804504, + "step": 1260 + }, + { + "epoch": 2.4652981427174976, + "grad_norm": 0.2710256278514862, + "learning_rate": 9.03457273320287e-06, + "log_odds_chosen": 16.166059494018555, + "log_odds_ratio": -4.9462414608569816e-05, + "logits/chosen": -1.6709357500076294, + "logits/rejected": -1.5756384134292603, + "logps/chosen": -0.01602938398718834, + "logps/rejected": -11.91128158569336, + "loss": 0.0945, + "nll_loss": 0.0772620439529419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001602938398718834, + "rewards/margins": 1.1895252466201782, + "rewards/rejected": -1.191128134727478, + "step": 1261 + }, + { + "epoch": 2.4672531769305963, + "grad_norm": 0.2779797613620758, + "learning_rate": 9.001956947162427e-06, + "log_odds_chosen": 15.737964630126953, + "log_odds_ratio": -0.0012289367150515318, + "logits/chosen": -1.6137620210647583, + "logits/rejected": -1.6059118509292603, + "logps/chosen": -0.014550279825925827, + "logps/rejected": -11.433399200439453, + "loss": 0.0937, + "nll_loss": 0.08316093683242798, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014550278428941965, + "rewards/margins": 1.1418848037719727, + "rewards/rejected": -1.1433398723602295, + "step": 1262 + }, + { + "epoch": 2.469208211143695, + "grad_norm": 0.26749032735824585, + "learning_rate": 8.969341161121983e-06, + "log_odds_chosen": 15.637502670288086, + "log_odds_ratio": -0.0006388027686625719, + "logits/chosen": -1.529718041419983, + "logits/rejected": -1.509559154510498, + "logps/chosen": -0.01670614443719387, + "logps/rejected": -11.486351013183594, + "loss": 0.0913, + "nll_loss": 0.0796380341053009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016706145834177732, + "rewards/margins": 1.1469643115997314, + "rewards/rejected": -1.1486351490020752, + "step": 1263 + }, + { + "epoch": 2.4711632453567938, + "grad_norm": 0.2711767554283142, + "learning_rate": 8.93672537508154e-06, + "log_odds_chosen": 13.347395896911621, + "log_odds_ratio": -0.0005838965298607945, + "logits/chosen": -1.779496431350708, + "logits/rejected": -1.440359115600586, + "logps/chosen": -0.02195487543940544, + "logps/rejected": -9.467448234558105, + "loss": 0.0947, + "nll_loss": 0.09765554964542389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002195487730205059, + "rewards/margins": 0.9445493817329407, + "rewards/rejected": -0.9467447996139526, + "step": 1264 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.2762361168861389, + "learning_rate": 8.904109589041095e-06, + "log_odds_chosen": 14.777388572692871, + "log_odds_ratio": -0.0004015433369204402, + "logits/chosen": -1.7079516649246216, + "logits/rejected": -1.6369998455047607, + "logps/chosen": -0.01969601958990097, + "logps/rejected": -10.641499519348145, + "loss": 0.0953, + "nll_loss": 0.10128675401210785, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001969601958990097, + "rewards/margins": 1.0621802806854248, + "rewards/rejected": -1.0641499757766724, + "step": 1265 + }, + { + "epoch": 2.4750733137829912, + "grad_norm": 0.27220219373703003, + "learning_rate": 8.871493803000652e-06, + "log_odds_chosen": 15.839530944824219, + "log_odds_ratio": -0.00038906914414837956, + "logits/chosen": -1.6129131317138672, + "logits/rejected": -1.5264010429382324, + "logps/chosen": -0.01361269410699606, + "logps/rejected": -11.440387725830078, + "loss": 0.0955, + "nll_loss": 0.10780368745326996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013612695038318634, + "rewards/margins": 1.1426775455474854, + "rewards/rejected": -1.1440389156341553, + "step": 1266 + }, + { + "epoch": 2.47702834799609, + "grad_norm": 0.26665911078453064, + "learning_rate": 8.83887801696021e-06, + "log_odds_chosen": 19.379959106445312, + "log_odds_ratio": -0.0007127377903088927, + "logits/chosen": -1.785341501235962, + "logits/rejected": -1.4606064558029175, + "logps/chosen": -0.017906272783875465, + "logps/rejected": -15.237776756286621, + "loss": 0.0957, + "nll_loss": 0.09112047404050827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001790627371519804, + "rewards/margins": 1.5219870805740356, + "rewards/rejected": -1.523777723312378, + "step": 1267 + }, + { + "epoch": 2.4789833822091887, + "grad_norm": 0.2596190869808197, + "learning_rate": 8.806262230919765e-06, + "log_odds_chosen": 14.863079071044922, + "log_odds_ratio": -0.000338139507221058, + "logits/chosen": -1.6935179233551025, + "logits/rejected": -1.604269027709961, + "logps/chosen": -0.012356461025774479, + "logps/rejected": -10.36752700805664, + "loss": 0.0902, + "nll_loss": 0.08632341027259827, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012356460792943835, + "rewards/margins": 1.0355172157287598, + "rewards/rejected": -1.036752700805664, + "step": 1268 + }, + { + "epoch": 2.4809384164222874, + "grad_norm": 0.2704226076602936, + "learning_rate": 8.773646444879321e-06, + "log_odds_chosen": 15.504687309265137, + "log_odds_ratio": -0.0010852331761270761, + "logits/chosen": -1.856750726699829, + "logits/rejected": -1.564734935760498, + "logps/chosen": -0.02115883119404316, + "logps/rejected": -11.375167846679688, + "loss": 0.0933, + "nll_loss": 0.09332005679607391, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021158833988010883, + "rewards/margins": 1.1354010105133057, + "rewards/rejected": -1.137516736984253, + "step": 1269 + }, + { + "epoch": 2.482893450635386, + "grad_norm": 0.2997511625289917, + "learning_rate": 8.741030658838878e-06, + "log_odds_chosen": 14.347898483276367, + "log_odds_ratio": -0.000541596906259656, + "logits/chosen": -1.7089461088180542, + "logits/rejected": -1.4297078847885132, + "logps/chosen": -0.022937193512916565, + "logps/rejected": -10.195098876953125, + "loss": 0.0951, + "nll_loss": 0.13137993216514587, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0022937192115932703, + "rewards/margins": 1.0172163248062134, + "rewards/rejected": -1.01951003074646, + "step": 1270 + }, + { + "epoch": 2.484848484848485, + "grad_norm": 0.27311524748802185, + "learning_rate": 8.708414872798434e-06, + "log_odds_chosen": 14.932263374328613, + "log_odds_ratio": -0.00044191302731633186, + "logits/chosen": -1.736433982849121, + "logits/rejected": -1.5571181774139404, + "logps/chosen": -0.016823744401335716, + "logps/rejected": -10.61302375793457, + "loss": 0.0918, + "nll_loss": 0.10167229920625687, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001682374393567443, + "rewards/margins": 1.0596199035644531, + "rewards/rejected": -1.0613023042678833, + "step": 1271 + }, + { + "epoch": 2.4868035190615836, + "grad_norm": 0.2671138048171997, + "learning_rate": 8.675799086757991e-06, + "log_odds_chosen": 20.9695987701416, + "log_odds_ratio": -0.0005938963731750846, + "logits/chosen": -1.6473941802978516, + "logits/rejected": -1.4461230039596558, + "logps/chosen": -0.024617109447717667, + "logps/rejected": -17.048797607421875, + "loss": 0.0923, + "nll_loss": 0.10258974134922028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024617109447717667, + "rewards/margins": 1.7024180889129639, + "rewards/rejected": -1.704879879951477, + "step": 1272 + }, + { + "epoch": 2.4887585532746823, + "grad_norm": 0.258192241191864, + "learning_rate": 8.643183300717548e-06, + "log_odds_chosen": 16.39495849609375, + "log_odds_ratio": -0.00039925158489495516, + "logits/chosen": -1.7153818607330322, + "logits/rejected": -1.5483207702636719, + "logps/chosen": -0.01754547655582428, + "logps/rejected": -12.21939754486084, + "loss": 0.0903, + "nll_loss": 0.06876213848590851, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017545478185638785, + "rewards/margins": 1.2201851606369019, + "rewards/rejected": -1.2219396829605103, + "step": 1273 + }, + { + "epoch": 2.490713587487781, + "grad_norm": 0.27285265922546387, + "learning_rate": 8.610567514677104e-06, + "log_odds_chosen": 14.11989688873291, + "log_odds_ratio": -0.0005518329562619328, + "logits/chosen": -1.7661752700805664, + "logits/rejected": -1.630852222442627, + "logps/chosen": -0.0185469388961792, + "logps/rejected": -10.024789810180664, + "loss": 0.0939, + "nll_loss": 0.06919553875923157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018546937499195337, + "rewards/margins": 1.0006242990493774, + "rewards/rejected": -1.002479076385498, + "step": 1274 + }, + { + "epoch": 2.4926686217008798, + "grad_norm": 0.259773313999176, + "learning_rate": 8.57795172863666e-06, + "log_odds_chosen": 13.91519546508789, + "log_odds_ratio": -0.0007475917809642851, + "logits/chosen": -1.577063798904419, + "logits/rejected": -1.601999282836914, + "logps/chosen": -0.01973426342010498, + "logps/rejected": -9.93185043334961, + "loss": 0.0923, + "nll_loss": 0.08883222937583923, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019734264351427555, + "rewards/margins": 0.9912116527557373, + "rewards/rejected": -0.9931850433349609, + "step": 1275 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.26129987835884094, + "learning_rate": 8.545335942596216e-06, + "log_odds_chosen": 16.029008865356445, + "log_odds_ratio": -0.0004553288163151592, + "logits/chosen": -1.6975226402282715, + "logits/rejected": -1.4322450160980225, + "logps/chosen": -0.01548981387168169, + "logps/rejected": -11.740489959716797, + "loss": 0.0922, + "nll_loss": 0.09368571639060974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015489814104512334, + "rewards/margins": 1.1725001335144043, + "rewards/rejected": -1.1740491390228271, + "step": 1276 + }, + { + "epoch": 2.4965786901270772, + "grad_norm": 0.2649204730987549, + "learning_rate": 8.512720156555774e-06, + "log_odds_chosen": 13.69594669342041, + "log_odds_ratio": -0.000786449876613915, + "logits/chosen": -1.8451242446899414, + "logits/rejected": -1.4703410863876343, + "logps/chosen": -0.02401752769947052, + "logps/rejected": -9.553058624267578, + "loss": 0.0927, + "nll_loss": 0.1327364295721054, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024017528630793095, + "rewards/margins": 0.952904224395752, + "rewards/rejected": -0.9553059339523315, + "step": 1277 + }, + { + "epoch": 2.498533724340176, + "grad_norm": 0.2848568856716156, + "learning_rate": 8.48010437051533e-06, + "log_odds_chosen": 15.277889251708984, + "log_odds_ratio": -0.0008297850727103651, + "logits/chosen": -1.7619354724884033, + "logits/rejected": -1.556557536125183, + "logps/chosen": -0.01732664741575718, + "logps/rejected": -11.04472827911377, + "loss": 0.0956, + "nll_loss": 0.09643883258104324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017326647648587823, + "rewards/margins": 1.1027401685714722, + "rewards/rejected": -1.1044728755950928, + "step": 1278 + }, + { + "epoch": 2.5004887585532747, + "grad_norm": 0.2646327614784241, + "learning_rate": 8.447488584474886e-06, + "log_odds_chosen": 16.645769119262695, + "log_odds_ratio": -0.0008484501158818603, + "logits/chosen": -1.6945202350616455, + "logits/rejected": -1.776552438735962, + "logps/chosen": -0.024365156888961792, + "logps/rejected": -12.62756061553955, + "loss": 0.0931, + "nll_loss": 0.11208081245422363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002436515875160694, + "rewards/margins": 1.2603195905685425, + "rewards/rejected": -1.262756109237671, + "step": 1279 + }, + { + "epoch": 2.5024437927663734, + "grad_norm": 0.2602674961090088, + "learning_rate": 8.414872798434442e-06, + "log_odds_chosen": 16.735414505004883, + "log_odds_ratio": -0.00017297526937909424, + "logits/chosen": -1.545551061630249, + "logits/rejected": -1.4289958477020264, + "logps/chosen": -0.02179580181837082, + "logps/rejected": -12.825668334960938, + "loss": 0.0911, + "nll_loss": 0.08828248083591461, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021795802749693394, + "rewards/margins": 1.2803874015808105, + "rewards/rejected": -1.2825669050216675, + "step": 1280 + }, + { + "epoch": 2.504398826979472, + "grad_norm": 0.2732098400592804, + "learning_rate": 8.382257012393998e-06, + "log_odds_chosen": 10.842994689941406, + "log_odds_ratio": -0.002262754598632455, + "logits/chosen": -1.624801516532898, + "logits/rejected": -1.451723337173462, + "logps/chosen": -0.022339507937431335, + "logps/rejected": -6.890139102935791, + "loss": 0.092, + "nll_loss": 0.1165681928396225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002233950886875391, + "rewards/margins": 0.6867799758911133, + "rewards/rejected": -0.6890139579772949, + "step": 1281 + }, + { + "epoch": 2.506353861192571, + "grad_norm": 0.2626391649246216, + "learning_rate": 8.349641226353556e-06, + "log_odds_chosen": 19.52761459350586, + "log_odds_ratio": -0.00026997801614925265, + "logits/chosen": -1.660808801651001, + "logits/rejected": -1.400144338607788, + "logps/chosen": -0.01811567321419716, + "logps/rejected": -15.274335861206055, + "loss": 0.0911, + "nll_loss": 0.08942729234695435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018115672282874584, + "rewards/margins": 1.5256221294403076, + "rewards/rejected": -1.5274336338043213, + "step": 1282 + }, + { + "epoch": 2.5083088954056696, + "grad_norm": 0.2747136056423187, + "learning_rate": 8.317025440313112e-06, + "log_odds_chosen": 14.902545928955078, + "log_odds_ratio": -0.0019135810434818268, + "logits/chosen": -1.5360785722732544, + "logits/rejected": -1.6055047512054443, + "logps/chosen": -0.014750763773918152, + "logps/rejected": -10.561771392822266, + "loss": 0.0926, + "nll_loss": 0.09577751159667969, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001475076307542622, + "rewards/margins": 1.0547020435333252, + "rewards/rejected": -1.0561771392822266, + "step": 1283 + }, + { + "epoch": 2.5102639296187683, + "grad_norm": 0.25742822885513306, + "learning_rate": 8.284409654272668e-06, + "log_odds_chosen": 13.553119659423828, + "log_odds_ratio": -0.0017674975097179413, + "logits/chosen": -1.6780390739440918, + "logits/rejected": -1.7048472166061401, + "logps/chosen": -0.023420583456754684, + "logps/rejected": -9.588211059570312, + "loss": 0.0901, + "nll_loss": 0.08222377300262451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002342058578506112, + "rewards/margins": 0.9564790725708008, + "rewards/rejected": -0.958821177482605, + "step": 1284 + }, + { + "epoch": 2.512218963831867, + "grad_norm": 0.2808518409729004, + "learning_rate": 8.251793868232224e-06, + "log_odds_chosen": 17.119478225708008, + "log_odds_ratio": -0.0004471879219636321, + "logits/chosen": -1.7839102745056152, + "logits/rejected": -1.5578522682189941, + "logps/chosen": -0.01562604308128357, + "logps/rejected": -12.823095321655273, + "loss": 0.0936, + "nll_loss": 0.09318722784519196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015626042149960995, + "rewards/margins": 1.2807469367980957, + "rewards/rejected": -1.2823095321655273, + "step": 1285 + }, + { + "epoch": 2.5141739980449658, + "grad_norm": 0.2660909593105316, + "learning_rate": 8.21917808219178e-06, + "log_odds_chosen": 15.766510009765625, + "log_odds_ratio": -0.0012276258785277605, + "logits/chosen": -1.5425596237182617, + "logits/rejected": -1.604577660560608, + "logps/chosen": -0.027066480368375778, + "logps/rejected": -11.92447280883789, + "loss": 0.0917, + "nll_loss": 0.11334394663572311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002706647850573063, + "rewards/margins": 1.1897406578063965, + "rewards/rejected": -1.1924471855163574, + "step": 1286 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 0.27162861824035645, + "learning_rate": 8.186562296151338e-06, + "log_odds_chosen": 14.07529067993164, + "log_odds_ratio": -0.0007024817168712616, + "logits/chosen": -1.6500961780548096, + "logits/rejected": -1.6115750074386597, + "logps/chosen": -0.018732652068138123, + "logps/rejected": -9.980684280395508, + "loss": 0.0928, + "nll_loss": 0.08831369876861572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001873265253379941, + "rewards/margins": 0.9961951375007629, + "rewards/rejected": -0.9980683922767639, + "step": 1287 + }, + { + "epoch": 2.5180840664711632, + "grad_norm": 0.2506725788116455, + "learning_rate": 8.153946510110894e-06, + "log_odds_chosen": 15.449934005737305, + "log_odds_ratio": -0.0003422183508519083, + "logits/chosen": -1.7098281383514404, + "logits/rejected": -1.487168312072754, + "logps/chosen": -0.017731431871652603, + "logps/rejected": -11.209152221679688, + "loss": 0.0904, + "nll_loss": 0.10253704339265823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017731431871652603, + "rewards/margins": 1.1191420555114746, + "rewards/rejected": -1.1209152936935425, + "step": 1288 + }, + { + "epoch": 2.520039100684262, + "grad_norm": 0.2744153141975403, + "learning_rate": 8.12133072407045e-06, + "log_odds_chosen": 19.51742172241211, + "log_odds_ratio": -0.0005539779085665941, + "logits/chosen": -1.5982332229614258, + "logits/rejected": -1.4432196617126465, + "logps/chosen": -0.018760036677122116, + "logps/rejected": -15.393779754638672, + "loss": 0.0917, + "nll_loss": 0.08662646263837814, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018760035745799541, + "rewards/margins": 1.5375021696090698, + "rewards/rejected": -1.5393781661987305, + "step": 1289 + }, + { + "epoch": 2.5219941348973607, + "grad_norm": 0.25405818223953247, + "learning_rate": 8.088714938030006e-06, + "log_odds_chosen": 22.444889068603516, + "log_odds_ratio": -0.00030047082691453397, + "logits/chosen": -1.6340997219085693, + "logits/rejected": -1.4478598833084106, + "logps/chosen": -0.013467526063323021, + "logps/rejected": -17.870410919189453, + "loss": 0.0905, + "nll_loss": 0.08826641738414764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013467525132000446, + "rewards/margins": 1.7856942415237427, + "rewards/rejected": -1.7870409488677979, + "step": 1290 + }, + { + "epoch": 2.5239491691104594, + "grad_norm": 0.25839072465896606, + "learning_rate": 8.056099151989562e-06, + "log_odds_chosen": 15.421941757202148, + "log_odds_ratio": -0.00017601456784177572, + "logits/chosen": -1.662235140800476, + "logits/rejected": -1.4839212894439697, + "logps/chosen": -0.02035794034600258, + "logps/rejected": -11.331133842468262, + "loss": 0.0927, + "nll_loss": 0.0848364531993866, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020357940811663866, + "rewards/margins": 1.1310776472091675, + "rewards/rejected": -1.1331132650375366, + "step": 1291 + }, + { + "epoch": 2.525904203323558, + "grad_norm": 0.24465689063072205, + "learning_rate": 8.02348336594912e-06, + "log_odds_chosen": 16.392019271850586, + "log_odds_ratio": -0.0009510753443464637, + "logits/chosen": -1.6356796026229858, + "logits/rejected": -1.573986291885376, + "logps/chosen": -0.01601598411798477, + "logps/rejected": -12.206789016723633, + "loss": 0.0885, + "nll_loss": 0.08567281067371368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001601598458364606, + "rewards/margins": 1.2190773487091064, + "rewards/rejected": -1.2206788063049316, + "step": 1292 + }, + { + "epoch": 2.527859237536657, + "grad_norm": 0.2569182813167572, + "learning_rate": 7.990867579908676e-06, + "log_odds_chosen": 11.770082473754883, + "log_odds_ratio": -0.0034830814693123102, + "logits/chosen": -1.8162178993225098, + "logits/rejected": -1.6696500778198242, + "logps/chosen": -0.02575436607003212, + "logps/rejected": -7.732806205749512, + "loss": 0.0898, + "nll_loss": 0.11815929412841797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002575436607003212, + "rewards/margins": 0.7707051634788513, + "rewards/rejected": -0.7732806205749512, + "step": 1293 + }, + { + "epoch": 2.5298142717497556, + "grad_norm": 0.272216796875, + "learning_rate": 7.958251793868232e-06, + "log_odds_chosen": 12.683870315551758, + "log_odds_ratio": -0.0017053557094186544, + "logits/chosen": -1.7282660007476807, + "logits/rejected": -1.5683965682983398, + "logps/chosen": -0.021991997957229614, + "logps/rejected": -8.665885925292969, + "loss": 0.0933, + "nll_loss": 0.0880766212940216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021991997491568327, + "rewards/margins": 0.8643893599510193, + "rewards/rejected": -0.8665885925292969, + "step": 1294 + }, + { + "epoch": 2.5317693059628543, + "grad_norm": 0.2683495581150055, + "learning_rate": 7.925636007827788e-06, + "log_odds_chosen": 18.690338134765625, + "log_odds_ratio": -0.0004935867036692798, + "logits/chosen": -1.9365160465240479, + "logits/rejected": -1.4336211681365967, + "logps/chosen": -0.015391905792057514, + "logps/rejected": -14.366825103759766, + "loss": 0.0893, + "nll_loss": 0.10007214546203613, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015391905326396227, + "rewards/margins": 1.435143232345581, + "rewards/rejected": -1.4366824626922607, + "step": 1295 + }, + { + "epoch": 2.533724340175953, + "grad_norm": 0.25479453802108765, + "learning_rate": 7.893020221787344e-06, + "log_odds_chosen": 16.329166412353516, + "log_odds_ratio": -0.0004900491330772638, + "logits/chosen": -2.059445381164551, + "logits/rejected": -1.5260803699493408, + "logps/chosen": -0.017868559807538986, + "logps/rejected": -12.183778762817383, + "loss": 0.0901, + "nll_loss": 0.09066364169120789, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017868562135845423, + "rewards/margins": 1.2165911197662354, + "rewards/rejected": -1.2183780670166016, + "step": 1296 + }, + { + "epoch": 2.5356793743890518, + "grad_norm": 0.2438751459121704, + "learning_rate": 7.860404435746902e-06, + "log_odds_chosen": 18.142425537109375, + "log_odds_ratio": -0.0003563858044799417, + "logits/chosen": -1.5985394716262817, + "logits/rejected": -1.5598673820495605, + "logps/chosen": -0.016641609370708466, + "logps/rejected": -13.848846435546875, + "loss": 0.0875, + "nll_loss": 0.08708883821964264, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016641609836369753, + "rewards/margins": 1.3832204341888428, + "rewards/rejected": -1.3848845958709717, + "step": 1297 + }, + { + "epoch": 2.5376344086021505, + "grad_norm": 0.26766490936279297, + "learning_rate": 7.827788649706458e-06, + "log_odds_chosen": 15.213632583618164, + "log_odds_ratio": -0.00045669899554923177, + "logits/chosen": -1.8961809873580933, + "logits/rejected": -1.5351179838180542, + "logps/chosen": -0.012265181168913841, + "logps/rejected": -10.7838134765625, + "loss": 0.0917, + "nll_loss": 0.08257072418928146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012265180703252554, + "rewards/margins": 1.0771548748016357, + "rewards/rejected": -1.0783814191818237, + "step": 1298 + }, + { + "epoch": 2.5395894428152492, + "grad_norm": 0.2617800831794739, + "learning_rate": 7.795172863666014e-06, + "log_odds_chosen": 16.66992950439453, + "log_odds_ratio": -0.0010898449691012502, + "logits/chosen": -1.6443135738372803, + "logits/rejected": -1.6764745712280273, + "logps/chosen": -0.01910005696117878, + "logps/rejected": -12.58267593383789, + "loss": 0.0913, + "nll_loss": 0.0882246196269989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001910005696117878, + "rewards/margins": 1.2563576698303223, + "rewards/rejected": -1.2582676410675049, + "step": 1299 + }, + { + "epoch": 2.541544477028348, + "grad_norm": 0.247160866856575, + "learning_rate": 7.76255707762557e-06, + "log_odds_chosen": 15.28724479675293, + "log_odds_ratio": -0.0003794168005697429, + "logits/chosen": -1.6162657737731934, + "logits/rejected": -1.480910062789917, + "logps/chosen": -0.014417843893170357, + "logps/rejected": -11.020849227905273, + "loss": 0.0884, + "nll_loss": 0.07976850867271423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014417844358831644, + "rewards/margins": 1.1006431579589844, + "rewards/rejected": -1.1020851135253906, + "step": 1300 + }, + { + "epoch": 2.5434995112414467, + "grad_norm": 0.26031970977783203, + "learning_rate": 7.729941291585126e-06, + "log_odds_chosen": 16.155006408691406, + "log_odds_ratio": -0.0007065690588206053, + "logits/chosen": -1.668281078338623, + "logits/rejected": -1.5959720611572266, + "logps/chosen": -0.017629915848374367, + "logps/rejected": -12.109426498413086, + "loss": 0.0892, + "nll_loss": 0.0656154453754425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001762991538271308, + "rewards/margins": 1.2091796398162842, + "rewards/rejected": -1.2109425067901611, + "step": 1301 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.26799216866493225, + "learning_rate": 7.697325505544684e-06, + "log_odds_chosen": 14.31722640991211, + "log_odds_ratio": -0.0004150634922552854, + "logits/chosen": -1.6861565113067627, + "logits/rejected": -1.6362638473510742, + "logps/chosen": -0.016800668090581894, + "logps/rejected": -10.156327247619629, + "loss": 0.0914, + "nll_loss": 0.08588112890720367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016800669254735112, + "rewards/margins": 1.0139527320861816, + "rewards/rejected": -1.0156327486038208, + "step": 1302 + }, + { + "epoch": 2.547409579667644, + "grad_norm": 0.25394201278686523, + "learning_rate": 7.66470971950424e-06, + "log_odds_chosen": 17.076847076416016, + "log_odds_ratio": -0.001030922750942409, + "logits/chosen": -1.6528048515319824, + "logits/rejected": -1.465402603149414, + "logps/chosen": -0.019451329484581947, + "logps/rejected": -13.004094123840332, + "loss": 0.089, + "nll_loss": 0.09368628263473511, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001945133088156581, + "rewards/margins": 1.298464298248291, + "rewards/rejected": -1.3004094362258911, + "step": 1303 + }, + { + "epoch": 2.549364613880743, + "grad_norm": 0.2580939531326294, + "learning_rate": 7.632093933463796e-06, + "log_odds_chosen": 16.974029541015625, + "log_odds_ratio": -0.0004763302276842296, + "logits/chosen": -1.7626656293869019, + "logits/rejected": -1.5798165798187256, + "logps/chosen": -0.015192966908216476, + "logps/rejected": -12.699943542480469, + "loss": 0.0901, + "nll_loss": 0.08173169195652008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015192965511232615, + "rewards/margins": 1.2684751749038696, + "rewards/rejected": -1.2699944972991943, + "step": 1304 + }, + { + "epoch": 2.5513196480938416, + "grad_norm": 0.2597430944442749, + "learning_rate": 7.5994781474233524e-06, + "log_odds_chosen": 18.580310821533203, + "log_odds_ratio": -0.0003291497705504298, + "logits/chosen": -1.658205509185791, + "logits/rejected": -1.6098041534423828, + "logps/chosen": -0.017958546057343483, + "logps/rejected": -14.434165000915527, + "loss": 0.0905, + "nll_loss": 0.08320106565952301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001795854652300477, + "rewards/margins": 1.4416207075119019, + "rewards/rejected": -1.4434164762496948, + "step": 1305 + }, + { + "epoch": 2.5532746823069403, + "grad_norm": 0.26757094264030457, + "learning_rate": 7.566862361382909e-06, + "log_odds_chosen": 9.539407730102539, + "log_odds_ratio": -0.0015181582421064377, + "logits/chosen": -1.7149958610534668, + "logits/rejected": -1.6163280010223389, + "logps/chosen": -0.020821480080485344, + "logps/rejected": -5.502143859863281, + "loss": 0.0895, + "nll_loss": 0.08990241587162018, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002082147868350148, + "rewards/margins": 0.5481322407722473, + "rewards/rejected": -0.5502143502235413, + "step": 1306 + }, + { + "epoch": 2.555229716520039, + "grad_norm": 0.24828797578811646, + "learning_rate": 7.5342465753424655e-06, + "log_odds_chosen": 15.177103042602539, + "log_odds_ratio": -0.0010695522651076317, + "logits/chosen": -1.8481345176696777, + "logits/rejected": -1.6732354164123535, + "logps/chosen": -0.016110830008983612, + "logps/rejected": -10.964893341064453, + "loss": 0.0881, + "nll_loss": 0.08104624599218369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00161108304746449, + "rewards/margins": 1.0948783159255981, + "rewards/rejected": -1.0964893102645874, + "step": 1307 + }, + { + "epoch": 2.557184750733138, + "grad_norm": 0.2696016728878021, + "learning_rate": 7.501630789302022e-06, + "log_odds_chosen": 19.334636688232422, + "log_odds_ratio": -0.00036741208168677986, + "logits/chosen": -1.7544054985046387, + "logits/rejected": -1.4697946310043335, + "logps/chosen": -0.012969208881258965, + "logps/rejected": -14.952381134033203, + "loss": 0.0919, + "nll_loss": 0.08896414935588837, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012969209346920252, + "rewards/margins": 1.4939411878585815, + "rewards/rejected": -1.495238184928894, + "step": 1308 + }, + { + "epoch": 2.5591397849462365, + "grad_norm": 0.24756789207458496, + "learning_rate": 7.4690150032615785e-06, + "log_odds_chosen": 15.972633361816406, + "log_odds_ratio": -0.0005271253176033497, + "logits/chosen": -1.613710641860962, + "logits/rejected": -1.6014540195465088, + "logps/chosen": -0.015963932499289513, + "logps/rejected": -11.62980842590332, + "loss": 0.0878, + "nll_loss": 0.1278083324432373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015963930636644363, + "rewards/margins": 1.1613845825195312, + "rewards/rejected": -1.1629809141159058, + "step": 1309 + }, + { + "epoch": 2.5610948191593352, + "grad_norm": 0.25830745697021484, + "learning_rate": 7.4363992172211346e-06, + "log_odds_chosen": 20.94131851196289, + "log_odds_ratio": -0.00017385970568284392, + "logits/chosen": -1.648437261581421, + "logits/rejected": -1.4440536499023438, + "logps/chosen": -0.030910655856132507, + "logps/rejected": -17.170711517333984, + "loss": 0.0889, + "nll_loss": 0.09160855412483215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.003091065678745508, + "rewards/margins": 1.7139801979064941, + "rewards/rejected": -1.7170710563659668, + "step": 1310 + }, + { + "epoch": 2.563049853372434, + "grad_norm": 0.2674051523208618, + "learning_rate": 7.4037834311806915e-06, + "log_odds_chosen": 13.582014083862305, + "log_odds_ratio": -0.0014121856074780226, + "logits/chosen": -1.6272794008255005, + "logits/rejected": -1.5564243793487549, + "logps/chosen": -0.01727263256907463, + "logps/rejected": -9.390141487121582, + "loss": 0.0931, + "nll_loss": 0.09305553138256073, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017272632103413343, + "rewards/margins": 0.9372868537902832, + "rewards/rejected": -0.939014196395874, + "step": 1311 + }, + { + "epoch": 2.5650048875855327, + "grad_norm": 0.26351693272590637, + "learning_rate": 7.3711676451402476e-06, + "log_odds_chosen": 18.462207794189453, + "log_odds_ratio": -0.000575121957808733, + "logits/chosen": -1.4229483604431152, + "logits/rejected": -1.6772913932800293, + "logps/chosen": -0.013709656894207, + "logps/rejected": -14.147109985351562, + "loss": 0.0905, + "nll_loss": 0.10072703659534454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013709657359868288, + "rewards/margins": 1.4133400917053223, + "rewards/rejected": -1.4147109985351562, + "step": 1312 + }, + { + "epoch": 2.5669599217986314, + "grad_norm": 0.24591413140296936, + "learning_rate": 7.3385518590998045e-06, + "log_odds_chosen": 17.725170135498047, + "log_odds_ratio": -0.0008400719962082803, + "logits/chosen": -1.7609436511993408, + "logits/rejected": -1.5687241554260254, + "logps/chosen": -0.01945171132683754, + "logps/rejected": -13.636385917663574, + "loss": 0.0884, + "nll_loss": 0.08156269043684006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019451712723821402, + "rewards/margins": 1.3616933822631836, + "rewards/rejected": -1.3636386394500732, + "step": 1313 + }, + { + "epoch": 2.56891495601173, + "grad_norm": 0.2590751647949219, + "learning_rate": 7.305936073059361e-06, + "log_odds_chosen": 19.10104751586914, + "log_odds_ratio": -0.00019557021732907742, + "logits/chosen": -1.8422714471817017, + "logits/rejected": -1.5406944751739502, + "logps/chosen": -0.01728416606783867, + "logps/rejected": -14.764394760131836, + "loss": 0.0909, + "nll_loss": 0.10444855690002441, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017284165369346738, + "rewards/margins": 1.4747109413146973, + "rewards/rejected": -1.476439356803894, + "step": 1314 + }, + { + "epoch": 2.570869990224829, + "grad_norm": 0.2552576959133148, + "learning_rate": 7.273320287018917e-06, + "log_odds_chosen": 19.806827545166016, + "log_odds_ratio": -0.00025104734231717885, + "logits/chosen": -1.7647584676742554, + "logits/rejected": -1.7607823610305786, + "logps/chosen": -0.017587894573807716, + "logps/rejected": -15.557981491088867, + "loss": 0.089, + "nll_loss": 0.07916254550218582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001758789410814643, + "rewards/margins": 1.5540392398834229, + "rewards/rejected": -1.5557981729507446, + "step": 1315 + }, + { + "epoch": 2.5728250244379276, + "grad_norm": 0.24648021161556244, + "learning_rate": 7.240704500978474e-06, + "log_odds_chosen": 12.610635757446289, + "log_odds_ratio": -0.0004517057677730918, + "logits/chosen": -1.7257945537567139, + "logits/rejected": -1.8307931423187256, + "logps/chosen": -0.017031796276569366, + "logps/rejected": -8.37051010131836, + "loss": 0.0878, + "nll_loss": 0.09343992173671722, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017031796742230654, + "rewards/margins": 0.8353477716445923, + "rewards/rejected": -0.8370509147644043, + "step": 1316 + }, + { + "epoch": 2.5747800586510263, + "grad_norm": 0.24374085664749146, + "learning_rate": 7.20808871493803e-06, + "log_odds_chosen": 14.8475341796875, + "log_odds_ratio": -0.0005501352716237307, + "logits/chosen": -1.781693696975708, + "logits/rejected": -1.4125313758850098, + "logps/chosen": -0.017118966206908226, + "logps/rejected": -10.655760765075684, + "loss": 0.0878, + "nll_loss": 0.08956071734428406, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017118966206908226, + "rewards/margins": 1.0638642311096191, + "rewards/rejected": -1.0655760765075684, + "step": 1317 + }, + { + "epoch": 2.576735092864125, + "grad_norm": 0.24713225662708282, + "learning_rate": 7.175472928897587e-06, + "log_odds_chosen": 12.182392120361328, + "log_odds_ratio": -0.0006385013111867011, + "logits/chosen": -1.807685375213623, + "logits/rejected": -1.591869592666626, + "logps/chosen": -0.012736201286315918, + "logps/rejected": -7.742374420166016, + "loss": 0.088, + "nll_loss": 0.09268423914909363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012736201751977205, + "rewards/margins": 0.7729638814926147, + "rewards/rejected": -0.7742375135421753, + "step": 1318 + }, + { + "epoch": 2.578690127077224, + "grad_norm": 0.26376211643218994, + "learning_rate": 7.142857142857143e-06, + "log_odds_chosen": 13.970414161682129, + "log_odds_ratio": -0.00038209400372579694, + "logits/chosen": -1.739818811416626, + "logits/rejected": -1.7613723278045654, + "logps/chosen": -0.014872615225613117, + "logps/rejected": -9.637918472290039, + "loss": 0.0877, + "nll_loss": 0.07057145237922668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014872616156935692, + "rewards/margins": 0.9623045921325684, + "rewards/rejected": -0.9637918472290039, + "step": 1319 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.27145227789878845, + "learning_rate": 7.1102413568167e-06, + "log_odds_chosen": 16.794660568237305, + "log_odds_ratio": -0.0005424021510407329, + "logits/chosen": -1.9679019451141357, + "logits/rejected": -1.6998586654663086, + "logps/chosen": -0.0219590924680233, + "logps/rejected": -12.824663162231445, + "loss": 0.0912, + "nll_loss": 0.07689754664897919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0021959091536700726, + "rewards/margins": 1.2802704572677612, + "rewards/rejected": -1.2824664115905762, + "step": 1320 + }, + { + "epoch": 2.5826001955034212, + "grad_norm": 0.23904284834861755, + "learning_rate": 7.077625570776256e-06, + "log_odds_chosen": 12.15756607055664, + "log_odds_ratio": -0.001188489724881947, + "logits/chosen": -1.7266427278518677, + "logits/rejected": -1.4457892179489136, + "logps/chosen": -0.01360430009663105, + "logps/rejected": -7.890904903411865, + "loss": 0.0867, + "nll_loss": 0.07383792102336884, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013604301493614912, + "rewards/margins": 0.7877300381660461, + "rewards/rejected": -0.7890903949737549, + "step": 1321 + }, + { + "epoch": 2.58455522971652, + "grad_norm": 0.25901684165000916, + "learning_rate": 7.045009784735812e-06, + "log_odds_chosen": 10.913877487182617, + "log_odds_ratio": -0.0016878183232620358, + "logits/chosen": -1.894392728805542, + "logits/rejected": -1.8128666877746582, + "logps/chosen": -0.0186459943652153, + "logps/rejected": -6.853065013885498, + "loss": 0.0902, + "nll_loss": 0.07769537717103958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018645995296537876, + "rewards/margins": 0.6834418773651123, + "rewards/rejected": -0.6853064894676208, + "step": 1322 + }, + { + "epoch": 2.5865102639296187, + "grad_norm": 0.23614895343780518, + "learning_rate": 7.012393998695369e-06, + "log_odds_chosen": 17.280437469482422, + "log_odds_ratio": -0.0006433897651731968, + "logits/chosen": -1.7095146179199219, + "logits/rejected": -1.637399673461914, + "logps/chosen": -0.014427663758397102, + "logps/rejected": -12.792390823364258, + "loss": 0.0861, + "nll_loss": 0.08982429653406143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014427663991227746, + "rewards/margins": 1.2777963876724243, + "rewards/rejected": -1.2792390584945679, + "step": 1323 + }, + { + "epoch": 2.5884652981427174, + "grad_norm": 0.23057809472084045, + "learning_rate": 6.979778212654925e-06, + "log_odds_chosen": 15.214775085449219, + "log_odds_ratio": -0.0005857356009073555, + "logits/chosen": -1.6476051807403564, + "logits/rejected": -1.4447554349899292, + "logps/chosen": -0.015454625710844994, + "logps/rejected": -10.97266960144043, + "loss": 0.0848, + "nll_loss": 0.09640827775001526, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015454626409336925, + "rewards/margins": 1.0957213640213013, + "rewards/rejected": -1.0972669124603271, + "step": 1324 + }, + { + "epoch": 2.590420332355816, + "grad_norm": 0.2677174508571625, + "learning_rate": 6.947162426614482e-06, + "log_odds_chosen": 13.553678512573242, + "log_odds_ratio": -0.000907198409549892, + "logits/chosen": -1.836543321609497, + "logits/rejected": -1.5789799690246582, + "logps/chosen": -0.020604629069566727, + "logps/rejected": -9.684306144714355, + "loss": 0.0896, + "nll_loss": 0.09113696217536926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020604627206921577, + "rewards/margins": 0.9663702249526978, + "rewards/rejected": -0.9684306383132935, + "step": 1325 + }, + { + "epoch": 2.592375366568915, + "grad_norm": 0.2489839494228363, + "learning_rate": 6.914546640574038e-06, + "log_odds_chosen": 21.149160385131836, + "log_odds_ratio": -0.0005904726567678154, + "logits/chosen": -1.697214961051941, + "logits/rejected": -1.6216259002685547, + "logps/chosen": -0.018075037747621536, + "logps/rejected": -16.988008499145508, + "loss": 0.0886, + "nll_loss": 0.08359557390213013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001807503984309733, + "rewards/margins": 1.696993350982666, + "rewards/rejected": -1.698800802230835, + "step": 1326 + }, + { + "epoch": 2.5943304007820136, + "grad_norm": 0.2424367219209671, + "learning_rate": 6.881930854533594e-06, + "log_odds_chosen": 19.068693161010742, + "log_odds_ratio": -0.00025815737899392843, + "logits/chosen": -1.6847342252731323, + "logits/rejected": -1.512401819229126, + "logps/chosen": -0.016137490049004555, + "logps/rejected": -14.816920280456543, + "loss": 0.0865, + "nll_loss": 0.08382626622915268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016137489583343267, + "rewards/margins": 1.4800782203674316, + "rewards/rejected": -1.4816920757293701, + "step": 1327 + }, + { + "epoch": 2.5962854349951123, + "grad_norm": 0.2559719979763031, + "learning_rate": 6.849315068493151e-06, + "log_odds_chosen": 14.56241226196289, + "log_odds_ratio": -0.0010711950017139316, + "logits/chosen": -1.6439073085784912, + "logits/rejected": -1.4196972846984863, + "logps/chosen": -0.014433642849326134, + "logps/rejected": -10.288829803466797, + "loss": 0.0902, + "nll_loss": 0.09691017866134644, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001443364191800356, + "rewards/margins": 1.0274395942687988, + "rewards/rejected": -1.0288829803466797, + "step": 1328 + }, + { + "epoch": 2.598240469208211, + "grad_norm": 0.25303614139556885, + "learning_rate": 6.816699282452707e-06, + "log_odds_chosen": 19.40489387512207, + "log_odds_ratio": -0.0008238012669607997, + "logits/chosen": -1.639998197555542, + "logits/rejected": -1.4342682361602783, + "logps/chosen": -0.013574797660112381, + "logps/rejected": -15.043696403503418, + "loss": 0.0875, + "nll_loss": 0.07036659121513367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013574798358604312, + "rewards/margins": 1.5030121803283691, + "rewards/rejected": -1.5043697357177734, + "step": 1329 + }, + { + "epoch": 2.60019550342131, + "grad_norm": 0.24998827278614044, + "learning_rate": 6.784083496412264e-06, + "log_odds_chosen": 19.358596801757812, + "log_odds_ratio": -0.00031845844932831824, + "logits/chosen": -1.7752940654754639, + "logits/rejected": -1.4613628387451172, + "logps/chosen": -0.01480877585709095, + "logps/rejected": -15.132888793945312, + "loss": 0.0892, + "nll_loss": 0.07601015269756317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014808776322752237, + "rewards/margins": 1.511807918548584, + "rewards/rejected": -1.5132888555526733, + "step": 1330 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.2401730716228485, + "learning_rate": 6.75146771037182e-06, + "log_odds_chosen": 21.97699546813965, + "log_odds_ratio": -0.00011845905100926757, + "logits/chosen": -1.741469144821167, + "logits/rejected": -1.4425603151321411, + "logps/chosen": -0.01936524175107479, + "logps/rejected": -17.95206069946289, + "loss": 0.0859, + "nll_loss": 0.09575821459293365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019365240586921573, + "rewards/margins": 1.793269395828247, + "rewards/rejected": -1.795206069946289, + "step": 1331 + }, + { + "epoch": 2.6041055718475072, + "grad_norm": 0.2600780129432678, + "learning_rate": 6.718851924331376e-06, + "log_odds_chosen": 16.254159927368164, + "log_odds_ratio": -0.0004254962259437889, + "logits/chosen": -1.5901050567626953, + "logits/rejected": -1.6713459491729736, + "logps/chosen": -0.020995428785681725, + "logps/rejected": -12.20155143737793, + "loss": 0.0901, + "nll_loss": 0.09587657451629639, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020995428785681725, + "rewards/margins": 1.2180556058883667, + "rewards/rejected": -1.220155119895935, + "step": 1332 + }, + { + "epoch": 2.606060606060606, + "grad_norm": 0.23994995653629303, + "learning_rate": 6.686236138290933e-06, + "log_odds_chosen": 15.498520851135254, + "log_odds_ratio": -0.00020103671704418957, + "logits/chosen": -1.5241594314575195, + "logits/rejected": -1.232263445854187, + "logps/chosen": -0.013362068682909012, + "logps/rejected": -11.115100860595703, + "loss": 0.0873, + "nll_loss": 0.11826515197753906, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00133620691485703, + "rewards/margins": 1.1101739406585693, + "rewards/rejected": -1.1115100383758545, + "step": 1333 + }, + { + "epoch": 2.6080156402737047, + "grad_norm": 0.23701094090938568, + "learning_rate": 6.653620352250489e-06, + "log_odds_chosen": 10.160993576049805, + "log_odds_ratio": -0.0007732841186225414, + "logits/chosen": -1.9960887432098389, + "logits/rejected": -1.8302693367004395, + "logps/chosen": -0.01443202793598175, + "logps/rejected": -5.902247428894043, + "loss": 0.087, + "nll_loss": 0.08325265347957611, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014432028401643038, + "rewards/margins": 0.5887815356254578, + "rewards/rejected": -0.5902247428894043, + "step": 1334 + }, + { + "epoch": 2.6099706744868034, + "grad_norm": 0.23304416239261627, + "learning_rate": 6.621004566210046e-06, + "log_odds_chosen": 13.99232292175293, + "log_odds_ratio": -0.0005738055915571749, + "logits/chosen": -1.6718707084655762, + "logits/rejected": -1.6465710401535034, + "logps/chosen": -0.014607448130846024, + "logps/rejected": -9.77053451538086, + "loss": 0.0854, + "nll_loss": 0.08190443366765976, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014607447665184736, + "rewards/margins": 0.9755927324295044, + "rewards/rejected": -0.9770535230636597, + "step": 1335 + }, + { + "epoch": 2.611925708699902, + "grad_norm": 0.24231666326522827, + "learning_rate": 6.588388780169602e-06, + "log_odds_chosen": 17.2911434173584, + "log_odds_ratio": -0.00026823923690244555, + "logits/chosen": -1.710827112197876, + "logits/rejected": -1.515312910079956, + "logps/chosen": -0.01393210981041193, + "logps/rejected": -12.809952735900879, + "loss": 0.087, + "nll_loss": 0.10018889605998993, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013932108413428068, + "rewards/margins": 1.27960205078125, + "rewards/rejected": -1.2809953689575195, + "step": 1336 + }, + { + "epoch": 2.613880742913001, + "grad_norm": 0.2534627318382263, + "learning_rate": 6.555772994129158e-06, + "log_odds_chosen": 17.00023651123047, + "log_odds_ratio": -0.00013906984531786293, + "logits/chosen": -1.7243061065673828, + "logits/rejected": -1.5886945724487305, + "logps/chosen": -0.013622887432575226, + "logps/rejected": -12.643990516662598, + "loss": 0.0896, + "nll_loss": 0.07347705960273743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013622887199744582, + "rewards/margins": 1.2630367279052734, + "rewards/rejected": -1.2643990516662598, + "step": 1337 + }, + { + "epoch": 2.6158357771260996, + "grad_norm": 0.2483213245868683, + "learning_rate": 6.523157208088715e-06, + "log_odds_chosen": 20.324172973632812, + "log_odds_ratio": -6.417220720322803e-05, + "logits/chosen": -1.699568748474121, + "logits/rejected": -1.4486973285675049, + "logps/chosen": -0.013148881494998932, + "logps/rejected": -15.908246994018555, + "loss": 0.0865, + "nll_loss": 0.08532916009426117, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013148881262168288, + "rewards/margins": 1.5895097255706787, + "rewards/rejected": -1.5908246040344238, + "step": 1338 + }, + { + "epoch": 2.6177908113391983, + "grad_norm": 0.23430538177490234, + "learning_rate": 6.490541422048271e-06, + "log_odds_chosen": 13.660886764526367, + "log_odds_ratio": -0.000558268919121474, + "logits/chosen": -1.7452555894851685, + "logits/rejected": -1.7881484031677246, + "logps/chosen": -0.02032749354839325, + "logps/rejected": -9.381126403808594, + "loss": 0.0853, + "nll_loss": 0.07862603664398193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002032749354839325, + "rewards/margins": 0.9360799193382263, + "rewards/rejected": -0.938112735748291, + "step": 1339 + }, + { + "epoch": 2.619745845552297, + "grad_norm": 0.24682730436325073, + "learning_rate": 6.457925636007828e-06, + "log_odds_chosen": 13.754528045654297, + "log_odds_ratio": -0.0006615237798541784, + "logits/chosen": -1.8595807552337646, + "logits/rejected": -1.5209081172943115, + "logps/chosen": -0.013469253666698933, + "logps/rejected": -9.389045715332031, + "loss": 0.0866, + "nll_loss": 0.07781411707401276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013469253899529576, + "rewards/margins": 0.937557578086853, + "rewards/rejected": -0.9389045238494873, + "step": 1340 + }, + { + "epoch": 2.621700879765396, + "grad_norm": 0.2400364726781845, + "learning_rate": 6.425309849967384e-06, + "log_odds_chosen": 12.80472183227539, + "log_odds_ratio": -0.0004606802831403911, + "logits/chosen": -1.6204558610916138, + "logits/rejected": -1.5347530841827393, + "logps/chosen": -0.01408481691032648, + "logps/rejected": -8.429159164428711, + "loss": 0.0861, + "nll_loss": 0.13296519219875336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001408481621183455, + "rewards/margins": 0.8415073752403259, + "rewards/rejected": -0.8429158926010132, + "step": 1341 + }, + { + "epoch": 2.6236559139784945, + "grad_norm": 0.2327166348695755, + "learning_rate": 6.39269406392694e-06, + "log_odds_chosen": 17.034038543701172, + "log_odds_ratio": -0.0004244124284014106, + "logits/chosen": -1.6390163898468018, + "logits/rejected": -1.5390393733978271, + "logps/chosen": -0.01656501553952694, + "logps/rejected": -12.772520065307617, + "loss": 0.0827, + "nll_loss": 0.07288827002048492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016565017867833376, + "rewards/margins": 1.2755954265594482, + "rewards/rejected": -1.277251958847046, + "step": 1342 + }, + { + "epoch": 2.6256109481915932, + "grad_norm": 0.2705477178096771, + "learning_rate": 6.360078277886497e-06, + "log_odds_chosen": 17.538471221923828, + "log_odds_ratio": -0.00012071416131220758, + "logits/chosen": -1.640160083770752, + "logits/rejected": -1.7855037450790405, + "logps/chosen": -0.013881677761673927, + "logps/rejected": -13.255147933959961, + "loss": 0.0908, + "nll_loss": 0.0675792545080185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001388167729601264, + "rewards/margins": 1.3241266012191772, + "rewards/rejected": -1.325514793395996, + "step": 1343 + }, + { + "epoch": 2.627565982404692, + "grad_norm": 0.2410832643508911, + "learning_rate": 6.327462491846053e-06, + "log_odds_chosen": 18.097938537597656, + "log_odds_ratio": -0.0003732507466338575, + "logits/chosen": -1.6476906538009644, + "logits/rejected": -1.3248918056488037, + "logps/chosen": -0.014024168252944946, + "logps/rejected": -13.633408546447754, + "loss": 0.0853, + "nll_loss": 0.08003231883049011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014024167321622372, + "rewards/margins": 1.3619384765625, + "rewards/rejected": -1.3633408546447754, + "step": 1344 + }, + { + "epoch": 2.6295210166177907, + "grad_norm": 0.23784184455871582, + "learning_rate": 6.29484670580561e-06, + "log_odds_chosen": 21.21839141845703, + "log_odds_ratio": -8.364830864593387e-05, + "logits/chosen": -1.7110986709594727, + "logits/rejected": -1.4671964645385742, + "logps/chosen": -0.01489553414285183, + "logps/rejected": -16.99435806274414, + "loss": 0.0847, + "nll_loss": 0.08160245418548584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014895536005496979, + "rewards/margins": 1.697946548461914, + "rewards/rejected": -1.6994359493255615, + "step": 1345 + }, + { + "epoch": 2.6314760508308894, + "grad_norm": 0.2445104867219925, + "learning_rate": 6.262230919765166e-06, + "log_odds_chosen": 13.063493728637695, + "log_odds_ratio": -0.0005255556898191571, + "logits/chosen": -1.6997287273406982, + "logits/rejected": -1.803673505783081, + "logps/chosen": -0.013140164315700531, + "logps/rejected": -8.623611450195312, + "loss": 0.0846, + "nll_loss": 0.09555156528949738, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013140165247023106, + "rewards/margins": 0.8610472679138184, + "rewards/rejected": -0.8623612523078918, + "step": 1346 + }, + { + "epoch": 2.633431085043988, + "grad_norm": 0.24205578863620758, + "learning_rate": 6.2296151337247224e-06, + "log_odds_chosen": 20.070083618164062, + "log_odds_ratio": -0.00019749600323848426, + "logits/chosen": -1.744889736175537, + "logits/rejected": -1.3481817245483398, + "logps/chosen": -0.018038075417280197, + "logps/rejected": -15.878373146057129, + "loss": 0.0849, + "nll_loss": 0.0983564704656601, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001803807681426406, + "rewards/margins": 1.586033582687378, + "rewards/rejected": -1.5878372192382812, + "step": 1347 + }, + { + "epoch": 2.635386119257087, + "grad_norm": 0.23238857090473175, + "learning_rate": 6.196999347684279e-06, + "log_odds_chosen": 14.707376480102539, + "log_odds_ratio": -0.0004916730104014277, + "logits/chosen": -1.67202627658844, + "logits/rejected": -1.7609057426452637, + "logps/chosen": -0.012145262211561203, + "logps/rejected": -10.237564086914062, + "loss": 0.0865, + "nll_loss": 0.10524396598339081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012145262444391847, + "rewards/margins": 1.022541880607605, + "rewards/rejected": -1.023756504058838, + "step": 1348 + }, + { + "epoch": 2.6373411534701856, + "grad_norm": 0.23451533913612366, + "learning_rate": 6.1643835616438354e-06, + "log_odds_chosen": 12.80135726928711, + "log_odds_ratio": -0.0026504378765821457, + "logits/chosen": -1.6081392765045166, + "logits/rejected": -1.5417462587356567, + "logps/chosen": -0.02422334812581539, + "logps/rejected": -8.86054801940918, + "loss": 0.0846, + "nll_loss": 0.10687489062547684, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0024223346263170242, + "rewards/margins": 0.8836324214935303, + "rewards/rejected": -0.8860548734664917, + "step": 1349 + }, + { + "epoch": 2.6392961876832843, + "grad_norm": 0.24089816212654114, + "learning_rate": 6.131767775603392e-06, + "log_odds_chosen": 13.759660720825195, + "log_odds_ratio": -0.00015274703036993742, + "logits/chosen": -1.7945022583007812, + "logits/rejected": -1.5482336282730103, + "logps/chosen": -0.016995105892419815, + "logps/rejected": -9.457677841186523, + "loss": 0.087, + "nll_loss": 0.09079216420650482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001699510496109724, + "rewards/margins": 0.944068193435669, + "rewards/rejected": -0.9457676410675049, + "step": 1350 + }, + { + "epoch": 2.641251221896383, + "grad_norm": 0.2402338683605194, + "learning_rate": 6.0991519895629485e-06, + "log_odds_chosen": 14.108261108398438, + "log_odds_ratio": -0.0009484735783189535, + "logits/chosen": -1.6916444301605225, + "logits/rejected": -1.654512882232666, + "logps/chosen": -0.013676717877388, + "logps/rejected": -9.776981353759766, + "loss": 0.0868, + "nll_loss": 0.06829557567834854, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013676716480404139, + "rewards/margins": 0.9763305187225342, + "rewards/rejected": -0.9776982069015503, + "step": 1351 + }, + { + "epoch": 2.643206256109482, + "grad_norm": 0.2354341447353363, + "learning_rate": 6.0665362035225046e-06, + "log_odds_chosen": 15.685755729675293, + "log_odds_ratio": -0.0014591357903555036, + "logits/chosen": -1.5196572542190552, + "logits/rejected": -1.3737561702728271, + "logps/chosen": -0.02083222009241581, + "logps/rejected": -11.405620574951172, + "loss": 0.0854, + "nll_loss": 0.08655448257923126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020832219161093235, + "rewards/margins": 1.1384787559509277, + "rewards/rejected": -1.1405620574951172, + "step": 1352 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 0.24098795652389526, + "learning_rate": 6.0339204174820615e-06, + "log_odds_chosen": 12.744668960571289, + "log_odds_ratio": -0.0005049330648034811, + "logits/chosen": -1.6441378593444824, + "logits/rejected": -1.818966269493103, + "logps/chosen": -0.012312259525060654, + "logps/rejected": -8.350361824035645, + "loss": 0.0867, + "nll_loss": 0.09554189443588257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012312261387705803, + "rewards/margins": 0.8338050246238708, + "rewards/rejected": -0.8350362181663513, + "step": 1353 + }, + { + "epoch": 2.6471163245356792, + "grad_norm": 0.23993919789791107, + "learning_rate": 6.0013046314416176e-06, + "log_odds_chosen": 13.28754997253418, + "log_odds_ratio": -0.0012474025133997202, + "logits/chosen": -1.475282907485962, + "logits/rejected": -1.2891664505004883, + "logps/chosen": -0.014415562152862549, + "logps/rejected": -9.059569358825684, + "loss": 0.086, + "nll_loss": 0.12120013684034348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014415562618523836, + "rewards/margins": 0.9045153856277466, + "rewards/rejected": -0.9059569835662842, + "step": 1354 + }, + { + "epoch": 2.649071358748778, + "grad_norm": 0.2410915642976761, + "learning_rate": 5.9686888454011745e-06, + "log_odds_chosen": 15.7241849899292, + "log_odds_ratio": -0.0004126928106416017, + "logits/chosen": -1.8729097843170166, + "logits/rejected": -1.7591290473937988, + "logps/chosen": -0.015176977962255478, + "logps/rejected": -11.448503494262695, + "loss": 0.0832, + "nll_loss": 0.07761097699403763, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001517697935923934, + "rewards/margins": 1.1433327198028564, + "rewards/rejected": -1.1448503732681274, + "step": 1355 + }, + { + "epoch": 2.6510263929618767, + "grad_norm": 0.2388414889574051, + "learning_rate": 5.936073059360731e-06, + "log_odds_chosen": 13.448254585266113, + "log_odds_ratio": -0.0009103753254748881, + "logits/chosen": -1.690864086151123, + "logits/rejected": -1.6073575019836426, + "logps/chosen": -0.015028866939246655, + "logps/rejected": -9.144311904907227, + "loss": 0.0853, + "nll_loss": 0.09348151832818985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015028868801891804, + "rewards/margins": 0.9129283428192139, + "rewards/rejected": -0.9144312143325806, + "step": 1356 + }, + { + "epoch": 2.6529814271749754, + "grad_norm": 0.2351667881011963, + "learning_rate": 5.903457273320287e-06, + "log_odds_chosen": 22.9150390625, + "log_odds_ratio": -1.9570097720134072e-05, + "logits/chosen": -1.9205691814422607, + "logits/rejected": -1.476154088973999, + "logps/chosen": -0.012685487046837807, + "logps/rejected": -18.41855239868164, + "loss": 0.0845, + "nll_loss": 0.07350467145442963, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001268548658117652, + "rewards/margins": 1.8405870199203491, + "rewards/rejected": -1.841855525970459, + "step": 1357 + }, + { + "epoch": 2.654936461388074, + "grad_norm": 0.23467324674129486, + "learning_rate": 5.870841487279844e-06, + "log_odds_chosen": 18.979476928710938, + "log_odds_ratio": -0.0005919545656070113, + "logits/chosen": -1.9819082021713257, + "logits/rejected": -1.3798394203186035, + "logps/chosen": -0.01473043579608202, + "logps/rejected": -14.764744758605957, + "loss": 0.0835, + "nll_loss": 0.09351979196071625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014730436960235238, + "rewards/margins": 1.475001573562622, + "rewards/rejected": -1.4764745235443115, + "step": 1358 + }, + { + "epoch": 2.656891495601173, + "grad_norm": 0.23003219068050385, + "learning_rate": 5.8382257012394e-06, + "log_odds_chosen": 18.170862197875977, + "log_odds_ratio": -0.0002654770214576274, + "logits/chosen": -1.7958910465240479, + "logits/rejected": -1.4729522466659546, + "logps/chosen": -0.015741895884275436, + "logps/rejected": -13.829227447509766, + "loss": 0.0818, + "nll_loss": 0.07029322534799576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015741897514089942, + "rewards/margins": 1.3813486099243164, + "rewards/rejected": -1.3829227685928345, + "step": 1359 + }, + { + "epoch": 2.6588465298142716, + "grad_norm": 0.240265354514122, + "learning_rate": 5.805609915198957e-06, + "log_odds_chosen": 18.73770523071289, + "log_odds_ratio": -0.00017368695989716798, + "logits/chosen": -1.9022865295410156, + "logits/rejected": -1.5422135591506958, + "logps/chosen": -0.014644755981862545, + "logps/rejected": -14.443714141845703, + "loss": 0.0861, + "nll_loss": 0.07107584923505783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014644755283370614, + "rewards/margins": 1.4429068565368652, + "rewards/rejected": -1.444371223449707, + "step": 1360 + }, + { + "epoch": 2.6608015640273703, + "grad_norm": 0.23693592846393585, + "learning_rate": 5.772994129158513e-06, + "log_odds_chosen": 14.430854797363281, + "log_odds_ratio": -0.0006007493939250708, + "logits/chosen": -1.6938350200653076, + "logits/rejected": -1.8243266344070435, + "logps/chosen": -0.01782204583287239, + "logps/rejected": -10.350326538085938, + "loss": 0.0851, + "nll_loss": 0.07877586781978607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017822046065703034, + "rewards/margins": 1.033250331878662, + "rewards/rejected": -1.0350326299667358, + "step": 1361 + }, + { + "epoch": 2.662756598240469, + "grad_norm": 0.24746550619602203, + "learning_rate": 5.740378343118069e-06, + "log_odds_chosen": 15.373712539672852, + "log_odds_ratio": -0.0006012669182382524, + "logits/chosen": -1.8313629627227783, + "logits/rejected": -1.6752997636795044, + "logps/chosen": -0.018561899662017822, + "logps/rejected": -11.282917022705078, + "loss": 0.0854, + "nll_loss": 0.08049023151397705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0018561899196356535, + "rewards/margins": 1.1264355182647705, + "rewards/rejected": -1.1282917261123657, + "step": 1362 + }, + { + "epoch": 2.664711632453568, + "grad_norm": 0.226264089345932, + "learning_rate": 5.707762557077626e-06, + "log_odds_chosen": 20.38063621520996, + "log_odds_ratio": -6.084171764086932e-05, + "logits/chosen": -1.6630607843399048, + "logits/rejected": -1.4614766836166382, + "logps/chosen": -0.014852493070065975, + "logps/rejected": -16.100387573242188, + "loss": 0.0838, + "nll_loss": 0.07216036319732666, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014852492604404688, + "rewards/margins": 1.6085535287857056, + "rewards/rejected": -1.6100387573242188, + "step": 1363 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.23460572957992554, + "learning_rate": 5.675146771037182e-06, + "log_odds_chosen": 14.906927108764648, + "log_odds_ratio": -0.00046487763756886125, + "logits/chosen": -1.8731887340545654, + "logits/rejected": -1.6688058376312256, + "logps/chosen": -0.016944868490099907, + "logps/rejected": -10.790531158447266, + "loss": 0.0851, + "nll_loss": 0.07002277672290802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016944869421422482, + "rewards/margins": 1.077358603477478, + "rewards/rejected": -1.0790531635284424, + "step": 1364 + }, + { + "epoch": 2.6686217008797652, + "grad_norm": 0.23652450740337372, + "learning_rate": 5.642530984996739e-06, + "log_odds_chosen": 19.21593475341797, + "log_odds_ratio": -0.00041264190804213285, + "logits/chosen": -1.8361032009124756, + "logits/rejected": -1.6263864040374756, + "logps/chosen": -0.014743195846676826, + "logps/rejected": -14.892086029052734, + "loss": 0.0845, + "nll_loss": 0.08839753270149231, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014743197243660688, + "rewards/margins": 1.487734317779541, + "rewards/rejected": -1.489208698272705, + "step": 1365 + }, + { + "epoch": 2.670576735092864, + "grad_norm": 0.22367554903030396, + "learning_rate": 5.609915198956295e-06, + "log_odds_chosen": 18.581350326538086, + "log_odds_ratio": -0.00035246970946900547, + "logits/chosen": -1.8868123292922974, + "logits/rejected": -1.4640965461730957, + "logps/chosen": -0.01950150914490223, + "logps/rejected": -14.372889518737793, + "loss": 0.0842, + "nll_loss": 0.07851099967956543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019501510541886091, + "rewards/margins": 1.4353389739990234, + "rewards/rejected": -1.4372889995574951, + "step": 1366 + }, + { + "epoch": 2.6725317693059627, + "grad_norm": 0.22806672751903534, + "learning_rate": 5.577299412915851e-06, + "log_odds_chosen": 13.219954490661621, + "log_odds_ratio": -0.000671424379106611, + "logits/chosen": -1.7096527814865112, + "logits/rejected": -1.490918517112732, + "logps/chosen": -0.014840124174952507, + "logps/rejected": -8.912649154663086, + "loss": 0.084, + "nll_loss": 0.07914505898952484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014840123476460576, + "rewards/margins": 0.8897808790206909, + "rewards/rejected": -0.8912649154663086, + "step": 1367 + }, + { + "epoch": 2.6744868035190614, + "grad_norm": 0.2532554566860199, + "learning_rate": 5.544683626875408e-06, + "log_odds_chosen": 11.495140075683594, + "log_odds_ratio": -0.0015552756376564503, + "logits/chosen": -1.6854774951934814, + "logits/rejected": -1.7192832231521606, + "logps/chosen": -0.01476674061268568, + "logps/rejected": -7.133611679077148, + "loss": 0.0871, + "nll_loss": 0.08100435137748718, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001476673991419375, + "rewards/margins": 0.7118844985961914, + "rewards/rejected": -0.7133611440658569, + "step": 1368 + }, + { + "epoch": 2.67644183773216, + "grad_norm": 0.22560283541679382, + "learning_rate": 5.512067840834964e-06, + "log_odds_chosen": 17.532136917114258, + "log_odds_ratio": -0.00024910556385293603, + "logits/chosen": -1.8475593328475952, + "logits/rejected": -1.538378119468689, + "logps/chosen": -0.010560798458755016, + "logps/rejected": -12.940755844116211, + "loss": 0.0828, + "nll_loss": 0.06715281307697296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001056079869158566, + "rewards/margins": 1.2930195331573486, + "rewards/rejected": -1.2940754890441895, + "step": 1369 + }, + { + "epoch": 2.678396871945259, + "grad_norm": 0.2460900992155075, + "learning_rate": 5.479452054794521e-06, + "log_odds_chosen": 17.023550033569336, + "log_odds_ratio": -0.0004560156085062772, + "logits/chosen": -1.7133039236068726, + "logits/rejected": -1.393816590309143, + "logps/chosen": -0.0200082678347826, + "logps/rejected": -12.712419509887695, + "loss": 0.0868, + "nll_loss": 0.10922148823738098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020008268766105175, + "rewards/margins": 1.2692410945892334, + "rewards/rejected": -1.2712419033050537, + "step": 1370 + }, + { + "epoch": 2.6803519061583576, + "grad_norm": 0.22542138397693634, + "learning_rate": 5.446836268754077e-06, + "log_odds_chosen": 11.270610809326172, + "log_odds_ratio": -0.0008602000307291746, + "logits/chosen": -1.6068685054779053, + "logits/rejected": -1.5642797946929932, + "logps/chosen": -0.01335608959197998, + "logps/rejected": -6.89227294921875, + "loss": 0.084, + "nll_loss": 0.07813853025436401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013356090057641268, + "rewards/margins": 0.6878917217254639, + "rewards/rejected": -0.689227283000946, + "step": 1371 + }, + { + "epoch": 2.6823069403714563, + "grad_norm": 0.23530328273773193, + "learning_rate": 5.414220482713633e-06, + "log_odds_chosen": 15.844158172607422, + "log_odds_ratio": -0.00041987854638136923, + "logits/chosen": -1.8388903141021729, + "logits/rejected": -1.5387542247772217, + "logps/chosen": -0.019701343029737473, + "logps/rejected": -11.841022491455078, + "loss": 0.0854, + "nll_loss": 0.07931030541658401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00197013420984149, + "rewards/margins": 1.1821321249008179, + "rewards/rejected": -1.1841022968292236, + "step": 1372 + }, + { + "epoch": 2.684261974584555, + "grad_norm": 0.22017863392829895, + "learning_rate": 5.38160469667319e-06, + "log_odds_chosen": 23.469615936279297, + "log_odds_ratio": -7.22427103028167e-06, + "logits/chosen": -1.6907424926757812, + "logits/rejected": -1.4230047464370728, + "logps/chosen": -0.017254360020160675, + "logps/rejected": -19.327312469482422, + "loss": 0.0837, + "nll_loss": 0.07204738259315491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001725435839034617, + "rewards/margins": 1.9310057163238525, + "rewards/rejected": -1.9327311515808105, + "step": 1373 + }, + { + "epoch": 2.686217008797654, + "grad_norm": 0.22322295606136322, + "learning_rate": 5.348988910632746e-06, + "log_odds_chosen": 15.522857666015625, + "log_odds_ratio": -0.00032473012106493115, + "logits/chosen": -1.765173316001892, + "logits/rejected": -1.545440912246704, + "logps/chosen": -0.014367840252816677, + "logps/rejected": -11.204596519470215, + "loss": 0.0813, + "nll_loss": 0.07623310387134552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001436784164980054, + "rewards/margins": 1.1190228462219238, + "rewards/rejected": -1.1204596757888794, + "step": 1374 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.2239384949207306, + "learning_rate": 5.316373124592303e-06, + "log_odds_chosen": 21.608154296875, + "log_odds_ratio": -0.00018215530144516379, + "logits/chosen": -1.7844113111495972, + "logits/rejected": -1.4685436487197876, + "logps/chosen": -0.011265195906162262, + "logps/rejected": -17.019363403320312, + "loss": 0.0828, + "nll_loss": 0.08479063212871552, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001126519520767033, + "rewards/margins": 1.7008099555969238, + "rewards/rejected": -1.7019362449645996, + "step": 1375 + }, + { + "epoch": 2.6901270772238517, + "grad_norm": 0.24183540046215057, + "learning_rate": 5.283757338551859e-06, + "log_odds_chosen": 17.40275764465332, + "log_odds_ratio": -8.42968511278741e-05, + "logits/chosen": -1.735642910003662, + "logits/rejected": -1.5158191919326782, + "logps/chosen": -0.01976381056010723, + "logps/rejected": -12.950923919677734, + "loss": 0.0849, + "nll_loss": 0.10276223719120026, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019763812888413668, + "rewards/margins": 1.2931162118911743, + "rewards/rejected": -1.2950925827026367, + "step": 1376 + }, + { + "epoch": 2.6920821114369504, + "grad_norm": 0.22180967032909393, + "learning_rate": 5.251141552511415e-06, + "log_odds_chosen": 17.329509735107422, + "log_odds_ratio": -8.301735215354711e-05, + "logits/chosen": -1.717423439025879, + "logits/rejected": -1.674834966659546, + "logps/chosen": -0.016704168170690536, + "logps/rejected": -13.049413681030273, + "loss": 0.0817, + "nll_loss": 0.07942169159650803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001670417026616633, + "rewards/margins": 1.303270936012268, + "rewards/rejected": -1.3049412965774536, + "step": 1377 + }, + { + "epoch": 2.694037145650049, + "grad_norm": 0.22977374494075775, + "learning_rate": 5.218525766470972e-06, + "log_odds_chosen": 14.812623977661133, + "log_odds_ratio": -0.0008930329931899905, + "logits/chosen": -1.6756062507629395, + "logits/rejected": -1.7540287971496582, + "logps/chosen": -0.01933005452156067, + "logps/rejected": -10.772614479064941, + "loss": 0.0822, + "nll_loss": 0.08293505012989044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019330056384205818, + "rewards/margins": 1.0753283500671387, + "rewards/rejected": -1.0772614479064941, + "step": 1378 + }, + { + "epoch": 2.695992179863148, + "grad_norm": 0.22190766036510468, + "learning_rate": 5.185909980430528e-06, + "log_odds_chosen": 17.793537139892578, + "log_odds_ratio": -1.5378072930616327e-06, + "logits/chosen": -1.6444168090820312, + "logits/rejected": -1.639197587966919, + "logps/chosen": -0.01601443812251091, + "logps/rejected": -13.506669998168945, + "loss": 0.0822, + "nll_loss": 0.0865546241402626, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001601443742401898, + "rewards/margins": 1.3490654230117798, + "rewards/rejected": -1.3506669998168945, + "step": 1379 + }, + { + "epoch": 2.6979472140762466, + "grad_norm": 0.2310553640127182, + "learning_rate": 5.153294194390085e-06, + "log_odds_chosen": 16.08123016357422, + "log_odds_ratio": -0.00031957909232005477, + "logits/chosen": -1.6531232595443726, + "logits/rejected": -1.465660572052002, + "logps/chosen": -0.01050543412566185, + "logps/rejected": -11.445451736450195, + "loss": 0.0831, + "nll_loss": 0.08018959313631058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010505433892831206, + "rewards/margins": 1.143494725227356, + "rewards/rejected": -1.144545316696167, + "step": 1380 + }, + { + "epoch": 2.6999022482893453, + "grad_norm": 0.2281239777803421, + "learning_rate": 5.120678408349641e-06, + "log_odds_chosen": 19.85298728942871, + "log_odds_ratio": -0.00030119583243504167, + "logits/chosen": -1.6838781833648682, + "logits/rejected": -1.535907506942749, + "logps/chosen": -0.01762087456882, + "logps/rejected": -15.677340507507324, + "loss": 0.0819, + "nll_loss": 0.08098310977220535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017620873404666781, + "rewards/margins": 1.5659719705581665, + "rewards/rejected": -1.5677341222763062, + "step": 1381 + }, + { + "epoch": 2.701857282502444, + "grad_norm": 0.22207678854465485, + "learning_rate": 5.088062622309197e-06, + "log_odds_chosen": 21.926128387451172, + "log_odds_ratio": -0.00045934069203212857, + "logits/chosen": -1.745086908340454, + "logits/rejected": -1.3466291427612305, + "logps/chosen": -0.015750480815768242, + "logps/rejected": -17.760311126708984, + "loss": 0.0827, + "nll_loss": 0.0814022570848465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015750480815768242, + "rewards/margins": 1.774456262588501, + "rewards/rejected": -1.776031255722046, + "step": 1382 + }, + { + "epoch": 2.703812316715543, + "grad_norm": 0.2081715613603592, + "learning_rate": 5.055446836268754e-06, + "log_odds_chosen": 16.320194244384766, + "log_odds_ratio": -0.0002969941997434944, + "logits/chosen": -1.7491111755371094, + "logits/rejected": -1.5619745254516602, + "logps/chosen": -0.012107864953577518, + "logps/rejected": -11.847797393798828, + "loss": 0.0809, + "nll_loss": 0.07700228691101074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012107864022254944, + "rewards/margins": 1.183569073677063, + "rewards/rejected": -1.1847798824310303, + "step": 1383 + }, + { + "epoch": 2.7057673509286415, + "grad_norm": 0.2198181003332138, + "learning_rate": 5.02283105022831e-06, + "log_odds_chosen": 18.501585006713867, + "log_odds_ratio": -0.00036069334601052105, + "logits/chosen": -1.6897950172424316, + "logits/rejected": -1.383049488067627, + "logps/chosen": -0.012394314631819725, + "logps/rejected": -14.074163436889648, + "loss": 0.0819, + "nll_loss": 0.08540821075439453, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00123943155631423, + "rewards/margins": 1.4061768054962158, + "rewards/rejected": -1.4074163436889648, + "step": 1384 + }, + { + "epoch": 2.7077223851417402, + "grad_norm": 0.23383906483650208, + "learning_rate": 4.990215264187867e-06, + "log_odds_chosen": 17.729589462280273, + "log_odds_ratio": -0.0006591804558411241, + "logits/chosen": -1.8223004341125488, + "logits/rejected": -1.4805347919464111, + "logps/chosen": -0.009518935345113277, + "logps/rejected": -12.95673942565918, + "loss": 0.0828, + "nll_loss": 0.08434778451919556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009518934530206025, + "rewards/margins": 1.294722080230713, + "rewards/rejected": -1.2956739664077759, + "step": 1385 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.22375398874282837, + "learning_rate": 4.957599478147423e-06, + "log_odds_chosen": 17.074357986450195, + "log_odds_ratio": -0.0009208754636347294, + "logits/chosen": -1.6738367080688477, + "logits/rejected": -1.5142278671264648, + "logps/chosen": -0.010684634558856487, + "logps/rejected": -12.401653289794922, + "loss": 0.0815, + "nll_loss": 0.09922201931476593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010684635490179062, + "rewards/margins": 1.2390968799591064, + "rewards/rejected": -1.24016535282135, + "step": 1386 + }, + { + "epoch": 2.7116324535679377, + "grad_norm": 0.21748141944408417, + "learning_rate": 4.924983692106979e-06, + "log_odds_chosen": 14.436260223388672, + "log_odds_ratio": -0.0003858639975078404, + "logits/chosen": -1.7435328960418701, + "logits/rejected": -1.5676829814910889, + "logps/chosen": -0.012636867351830006, + "logps/rejected": -9.99660873413086, + "loss": 0.083, + "nll_loss": 0.07866960763931274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012636866886168718, + "rewards/margins": 0.9983971118927002, + "rewards/rejected": -0.999660849571228, + "step": 1387 + }, + { + "epoch": 2.7135874877810364, + "grad_norm": 0.23223426938056946, + "learning_rate": 4.892367906066536e-06, + "log_odds_chosen": 15.260448455810547, + "log_odds_ratio": -0.0003057975263800472, + "logits/chosen": -1.7058593034744263, + "logits/rejected": -1.5251952409744263, + "logps/chosen": -0.01138223521411419, + "logps/rejected": -10.759020805358887, + "loss": 0.0834, + "nll_loss": 0.09914039075374603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011382234515622258, + "rewards/margins": 1.0747637748718262, + "rewards/rejected": -1.075901985168457, + "step": 1388 + }, + { + "epoch": 2.715542521994135, + "grad_norm": 0.2293192446231842, + "learning_rate": 4.8597521200260924e-06, + "log_odds_chosen": 17.963756561279297, + "log_odds_ratio": -0.0007372437394224107, + "logits/chosen": -1.7489739656448364, + "logits/rejected": -1.5127437114715576, + "logps/chosen": -0.015912175178527832, + "logps/rejected": -13.774677276611328, + "loss": 0.0845, + "nll_loss": 0.07211339473724365, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015912174712866545, + "rewards/margins": 1.375876545906067, + "rewards/rejected": -1.3774677515029907, + "step": 1389 + }, + { + "epoch": 2.717497556207234, + "grad_norm": 0.22039449214935303, + "learning_rate": 4.827136333985649e-06, + "log_odds_chosen": 20.84225845336914, + "log_odds_ratio": -7.578605436719954e-05, + "logits/chosen": -1.7832390069961548, + "logits/rejected": -1.191445231437683, + "logps/chosen": -0.01745019666850567, + "logps/rejected": -16.512407302856445, + "loss": 0.0811, + "nll_loss": 0.09157788753509521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001745019806548953, + "rewards/margins": 1.6494958400726318, + "rewards/rejected": -1.6512408256530762, + "step": 1390 + }, + { + "epoch": 2.7194525904203326, + "grad_norm": 0.22289712727069855, + "learning_rate": 4.7945205479452054e-06, + "log_odds_chosen": 15.013643264770508, + "log_odds_ratio": -0.0009355657966807485, + "logits/chosen": -1.5876383781433105, + "logits/rejected": -1.658778190612793, + "logps/chosen": -0.015999846160411835, + "logps/rejected": -10.880146026611328, + "loss": 0.0824, + "nll_loss": 0.06578506529331207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015999844763427973, + "rewards/margins": 1.0864145755767822, + "rewards/rejected": -1.0880146026611328, + "step": 1391 + }, + { + "epoch": 2.7214076246334313, + "grad_norm": 0.23160883784294128, + "learning_rate": 4.7619047619047615e-06, + "log_odds_chosen": 12.815338134765625, + "log_odds_ratio": -0.0005428754957392812, + "logits/chosen": -1.7721686363220215, + "logits/rejected": -1.7140676975250244, + "logps/chosen": -0.01205015741288662, + "logps/rejected": -8.360174179077148, + "loss": 0.0835, + "nll_loss": 0.07096798717975616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012050156947225332, + "rewards/margins": 0.8348124027252197, + "rewards/rejected": -0.8360174298286438, + "step": 1392 + }, + { + "epoch": 2.72336265884653, + "grad_norm": 0.2194000631570816, + "learning_rate": 4.7292889758643185e-06, + "log_odds_chosen": 15.073657989501953, + "log_odds_ratio": -0.00015646274550817907, + "logits/chosen": -1.631828784942627, + "logits/rejected": -1.565638542175293, + "logps/chosen": -0.016023878008127213, + "logps/rejected": -10.638891220092773, + "loss": 0.084, + "nll_loss": 0.0877763107419014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016023877542465925, + "rewards/margins": 1.0622868537902832, + "rewards/rejected": -1.0638891458511353, + "step": 1393 + }, + { + "epoch": 2.725317693059629, + "grad_norm": 0.21407462656497955, + "learning_rate": 4.6966731898238745e-06, + "log_odds_chosen": 14.887929916381836, + "log_odds_ratio": -0.00025963137159124017, + "logits/chosen": -1.858780026435852, + "logits/rejected": -1.5729093551635742, + "logps/chosen": -0.012127503752708435, + "logps/rejected": -10.373006820678711, + "loss": 0.0809, + "nll_loss": 0.07164174318313599, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012127503287047148, + "rewards/margins": 1.0360878705978394, + "rewards/rejected": -1.0373005867004395, + "step": 1394 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 0.2394556701183319, + "learning_rate": 4.6640574037834315e-06, + "log_odds_chosen": 18.923702239990234, + "log_odds_ratio": -0.00040338310645893216, + "logits/chosen": -1.8224849700927734, + "logits/rejected": -1.504266381263733, + "logps/chosen": -0.012917052954435349, + "logps/rejected": -14.526187896728516, + "loss": 0.0867, + "nll_loss": 0.0783626139163971, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012917054118588567, + "rewards/margins": 1.4513270854949951, + "rewards/rejected": -1.4526188373565674, + "step": 1395 + }, + { + "epoch": 2.7292277614858262, + "grad_norm": 0.21908769011497498, + "learning_rate": 4.6314416177429876e-06, + "log_odds_chosen": 16.682220458984375, + "log_odds_ratio": -0.00029080803506076336, + "logits/chosen": -1.7472410202026367, + "logits/rejected": -1.6808364391326904, + "logps/chosen": -0.011761901900172234, + "logps/rejected": -12.15920352935791, + "loss": 0.0824, + "nll_loss": 0.08079066872596741, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001176190096884966, + "rewards/margins": 1.214744210243225, + "rewards/rejected": -1.215920329093933, + "step": 1396 + }, + { + "epoch": 2.731182795698925, + "grad_norm": 0.21371623873710632, + "learning_rate": 4.598825831702544e-06, + "log_odds_chosen": 19.67323875427246, + "log_odds_ratio": -0.00020980305271223187, + "logits/chosen": -1.7635841369628906, + "logits/rejected": -1.3755505084991455, + "logps/chosen": -0.011126364581286907, + "logps/rejected": -15.086343765258789, + "loss": 0.0805, + "nll_loss": 0.08067172765731812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001112636411562562, + "rewards/margins": 1.507521629333496, + "rewards/rejected": -1.5086344480514526, + "step": 1397 + }, + { + "epoch": 2.7331378299120237, + "grad_norm": 0.22552798688411713, + "learning_rate": 4.566210045662101e-06, + "log_odds_chosen": 22.413043975830078, + "log_odds_ratio": -0.0001859797048382461, + "logits/chosen": -1.6815659999847412, + "logits/rejected": -1.62296724319458, + "logps/chosen": -0.013388799503445625, + "logps/rejected": -18.018810272216797, + "loss": 0.0818, + "nll_loss": 0.09595977514982224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013388799270614982, + "rewards/margins": 1.8005421161651611, + "rewards/rejected": -1.8018810749053955, + "step": 1398 + }, + { + "epoch": 2.7350928641251224, + "grad_norm": 0.21278339624404907, + "learning_rate": 4.533594259621657e-06, + "log_odds_chosen": 15.669225692749023, + "log_odds_ratio": -0.00036996009293943644, + "logits/chosen": -1.7248315811157227, + "logits/rejected": -1.489015817642212, + "logps/chosen": -0.011503364890813828, + "logps/rejected": -11.101966857910156, + "loss": 0.0808, + "nll_loss": 0.08662126213312149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011503364657983184, + "rewards/margins": 1.109046220779419, + "rewards/rejected": -1.110196590423584, + "step": 1399 + }, + { + "epoch": 2.737047898338221, + "grad_norm": 0.22082675993442535, + "learning_rate": 4.500978473581214e-06, + "log_odds_chosen": 14.10184383392334, + "log_odds_ratio": -0.0006277647917158902, + "logits/chosen": -1.7920900583267212, + "logits/rejected": -1.62939453125, + "logps/chosen": -0.020115256309509277, + "logps/rejected": -10.10966682434082, + "loss": 0.0851, + "nll_loss": 0.08496905863285065, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0020115259103477, + "rewards/margins": 1.0089552402496338, + "rewards/rejected": -1.0109667778015137, + "step": 1400 + }, + { + "epoch": 2.73900293255132, + "grad_norm": 0.22186191380023956, + "learning_rate": 4.46836268754077e-06, + "log_odds_chosen": 14.591611862182617, + "log_odds_ratio": -0.0011415660846978426, + "logits/chosen": -1.781114935874939, + "logits/rejected": -1.5118143558502197, + "logps/chosen": -0.01705213077366352, + "logps/rejected": -10.334091186523438, + "loss": 0.0828, + "nll_loss": 0.07527509331703186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017052129842340946, + "rewards/margins": 1.0317038297653198, + "rewards/rejected": -1.0334091186523438, + "step": 1401 + }, + { + "epoch": 2.7409579667644186, + "grad_norm": 0.21967096626758575, + "learning_rate": 4.435746901500326e-06, + "log_odds_chosen": 17.459388732910156, + "log_odds_ratio": -0.00047861627535894513, + "logits/chosen": -1.7119327783584595, + "logits/rejected": -1.7084481716156006, + "logps/chosen": -0.013440278358757496, + "logps/rejected": -13.069551467895508, + "loss": 0.0805, + "nll_loss": 0.07226152718067169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001344027929008007, + "rewards/margins": 1.305611252784729, + "rewards/rejected": -1.306955337524414, + "step": 1402 + }, + { + "epoch": 2.7429130009775173, + "grad_norm": 0.20690959692001343, + "learning_rate": 4.403131115459883e-06, + "log_odds_chosen": 9.697114944458008, + "log_odds_ratio": -0.0010019519831985235, + "logits/chosen": -1.734316110610962, + "logits/rejected": -1.6081664562225342, + "logps/chosen": -0.016272684559226036, + "logps/rejected": -5.291118144989014, + "loss": 0.081, + "nll_loss": 0.09545547515153885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016272685024887323, + "rewards/margins": 0.5274845361709595, + "rewards/rejected": -0.5291118621826172, + "step": 1403 + }, + { + "epoch": 2.744868035190616, + "grad_norm": 0.2121542990207672, + "learning_rate": 4.370515329419439e-06, + "log_odds_chosen": 10.650406837463379, + "log_odds_ratio": -0.0009865548927336931, + "logits/chosen": -1.6687700748443604, + "logits/rejected": -1.6315406560897827, + "logps/chosen": -0.011939805001020432, + "logps/rejected": -6.215253829956055, + "loss": 0.0802, + "nll_loss": 0.10688444972038269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011939803371205926, + "rewards/margins": 0.6203314661979675, + "rewards/rejected": -0.6215254068374634, + "step": 1404 + }, + { + "epoch": 2.746823069403715, + "grad_norm": 0.250916451215744, + "learning_rate": 4.337899543378996e-06, + "log_odds_chosen": 20.420589447021484, + "log_odds_ratio": -0.0005120772984810174, + "logits/chosen": -1.517376184463501, + "logits/rejected": -1.474517583847046, + "logps/chosen": -0.01327221468091011, + "logps/rejected": -16.07321548461914, + "loss": 0.0847, + "nll_loss": 0.083368681371212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013272215146571398, + "rewards/margins": 1.6059942245483398, + "rewards/rejected": -1.6073216199874878, + "step": 1405 + }, + { + "epoch": 2.7487781036168135, + "grad_norm": 0.22176022827625275, + "learning_rate": 4.305283757338552e-06, + "log_odds_chosen": 18.053112030029297, + "log_odds_ratio": -0.0003342803393024951, + "logits/chosen": -1.6479170322418213, + "logits/rejected": -1.4304425716400146, + "logps/chosen": -0.0104847252368927, + "logps/rejected": -13.497895240783691, + "loss": 0.0812, + "nll_loss": 0.07471203804016113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010484724771231413, + "rewards/margins": 1.348741054534912, + "rewards/rejected": -1.3497896194458008, + "step": 1406 + }, + { + "epoch": 2.7507331378299122, + "grad_norm": 0.21649028360843658, + "learning_rate": 4.272667971298108e-06, + "log_odds_chosen": 13.109201431274414, + "log_odds_ratio": -0.0006343978457152843, + "logits/chosen": -1.6810282468795776, + "logits/rejected": -1.5533556938171387, + "logps/chosen": -0.010841192677617073, + "logps/rejected": -8.552605628967285, + "loss": 0.0818, + "nll_loss": 0.07927624881267548, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001084119314327836, + "rewards/margins": 0.85417640209198, + "rewards/rejected": -0.8552605509757996, + "step": 1407 + }, + { + "epoch": 2.752688172043011, + "grad_norm": 0.23443396389484406, + "learning_rate": 4.240052185257665e-06, + "log_odds_chosen": 23.12303352355957, + "log_odds_ratio": -2.1721349185099825e-05, + "logits/chosen": -1.7908635139465332, + "logits/rejected": -1.2949105501174927, + "logps/chosen": -0.010792475193738937, + "logps/rejected": -18.52618980407715, + "loss": 0.0855, + "nll_loss": 0.09809169173240662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010792474495247006, + "rewards/margins": 1.8515396118164062, + "rewards/rejected": -1.852618932723999, + "step": 1408 + }, + { + "epoch": 2.7546432062561097, + "grad_norm": 0.21723920106887817, + "learning_rate": 4.207436399217221e-06, + "log_odds_chosen": 19.513914108276367, + "log_odds_ratio": -0.0003233043826185167, + "logits/chosen": -1.5639877319335938, + "logits/rejected": -1.5407098531723022, + "logps/chosen": -0.01347254030406475, + "logps/rejected": -15.140386581420898, + "loss": 0.0807, + "nll_loss": 0.06133116036653519, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001347254146821797, + "rewards/margins": 1.5126914978027344, + "rewards/rejected": -1.5140388011932373, + "step": 1409 + }, + { + "epoch": 2.7565982404692084, + "grad_norm": 0.2151932418346405, + "learning_rate": 4.174820613176778e-06, + "log_odds_chosen": 20.172548294067383, + "log_odds_ratio": -0.0018143854103982449, + "logits/chosen": -1.6746225357055664, + "logits/rejected": -1.451095700263977, + "logps/chosen": -0.019139159470796585, + "logps/rejected": -16.0296688079834, + "loss": 0.0801, + "nll_loss": 0.08870963752269745, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019139160867780447, + "rewards/margins": 1.60105299949646, + "rewards/rejected": -1.6029667854309082, + "step": 1410 + }, + { + "epoch": 2.758553274682307, + "grad_norm": 0.2137944996356964, + "learning_rate": 4.142204827136334e-06, + "log_odds_chosen": 19.72972869873047, + "log_odds_ratio": -0.00023403785598929971, + "logits/chosen": -1.4315783977508545, + "logits/rejected": -1.4641997814178467, + "logps/chosen": -0.01139539759606123, + "logps/rejected": -15.195585250854492, + "loss": 0.0816, + "nll_loss": 0.10154493898153305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001139539759606123, + "rewards/margins": 1.5184192657470703, + "rewards/rejected": -1.5195586681365967, + "step": 1411 + }, + { + "epoch": 2.760508308895406, + "grad_norm": 0.2053534835577011, + "learning_rate": 4.10958904109589e-06, + "log_odds_chosen": 15.910257339477539, + "log_odds_ratio": -0.0004660442937165499, + "logits/chosen": -1.7770519256591797, + "logits/rejected": -1.5140727758407593, + "logps/chosen": -0.019759241491556168, + "logps/rejected": -11.61939525604248, + "loss": 0.0795, + "nll_loss": 0.10752853751182556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0019759242422878742, + "rewards/margins": 1.159963607788086, + "rewards/rejected": -1.1619395017623901, + "step": 1412 + }, + { + "epoch": 2.7624633431085046, + "grad_norm": 0.20367534458637238, + "learning_rate": 4.076973255055447e-06, + "log_odds_chosen": 13.098642349243164, + "log_odds_ratio": -0.0009495607810094953, + "logits/chosen": -1.608619213104248, + "logits/rejected": -1.6680212020874023, + "logps/chosen": -0.014616288244724274, + "logps/rejected": -8.71374225616455, + "loss": 0.0789, + "nll_loss": 0.07208046317100525, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014616288244724274, + "rewards/margins": 0.8699126243591309, + "rewards/rejected": -0.871374249458313, + "step": 1413 + }, + { + "epoch": 2.7644183773216033, + "grad_norm": 0.22360770404338837, + "learning_rate": 4.044357469015003e-06, + "log_odds_chosen": 16.02871322631836, + "log_odds_ratio": -0.0003249670553486794, + "logits/chosen": -1.8458737134933472, + "logits/rejected": -1.4927363395690918, + "logps/chosen": -0.011710352264344692, + "logps/rejected": -11.522972106933594, + "loss": 0.0825, + "nll_loss": 0.07847534120082855, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011710352264344692, + "rewards/margins": 1.1511262655258179, + "rewards/rejected": -1.1522972583770752, + "step": 1414 + }, + { + "epoch": 2.766373411534702, + "grad_norm": 0.22024822235107422, + "learning_rate": 4.01174168297456e-06, + "log_odds_chosen": 12.990108489990234, + "log_odds_ratio": -0.0007975318003445864, + "logits/chosen": -1.656334638595581, + "logits/rejected": -1.6830315589904785, + "logps/chosen": -0.016510970890522003, + "logps/rejected": -8.740978240966797, + "loss": 0.081, + "nll_loss": 0.06800730526447296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001651097321882844, + "rewards/margins": 0.8724467158317566, + "rewards/rejected": -0.8740978240966797, + "step": 1415 + }, + { + "epoch": 2.768328445747801, + "grad_norm": 0.22090774774551392, + "learning_rate": 3.979125896934116e-06, + "log_odds_chosen": 14.33363151550293, + "log_odds_ratio": -0.0006001435103826225, + "logits/chosen": -1.7273964881896973, + "logits/rejected": -1.723812460899353, + "logps/chosen": -0.01357558649033308, + "logps/rejected": -9.9566650390625, + "loss": 0.0807, + "nll_loss": 0.08656138181686401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001357558649033308, + "rewards/margins": 0.9943088293075562, + "rewards/rejected": -0.99566650390625, + "step": 1416 + }, + { + "epoch": 2.7702834799608995, + "grad_norm": 0.21953164041042328, + "learning_rate": 3.946510110893672e-06, + "log_odds_chosen": 15.575373649597168, + "log_odds_ratio": -0.0004517009947448969, + "logits/chosen": -1.795624017715454, + "logits/rejected": -1.6253209114074707, + "logps/chosen": -0.010224909521639347, + "logps/rejected": -10.943206787109375, + "loss": 0.0811, + "nll_loss": 0.06362880021333694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001022490905597806, + "rewards/margins": 1.0932981967926025, + "rewards/rejected": -1.0943207740783691, + "step": 1417 + }, + { + "epoch": 2.7722385141739982, + "grad_norm": 0.21253448724746704, + "learning_rate": 3.913894324853229e-06, + "log_odds_chosen": 19.073692321777344, + "log_odds_ratio": -7.556322088930756e-05, + "logits/chosen": -1.7300567626953125, + "logits/rejected": -1.6011295318603516, + "logps/chosen": -0.010433971881866455, + "logps/rejected": -14.370267868041992, + "loss": 0.0808, + "nll_loss": 0.07635022699832916, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010433972347527742, + "rewards/margins": 1.435983419418335, + "rewards/rejected": -1.4370267391204834, + "step": 1418 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 0.22634565830230713, + "learning_rate": 3.881278538812785e-06, + "log_odds_chosen": 12.79379653930664, + "log_odds_ratio": -0.000934603507630527, + "logits/chosen": -1.7471911907196045, + "logits/rejected": -1.727224588394165, + "logps/chosen": -0.01840735785663128, + "logps/rejected": -8.527036666870117, + "loss": 0.0819, + "nll_loss": 0.12961219251155853, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001840735785663128, + "rewards/margins": 0.850862979888916, + "rewards/rejected": -0.8527036905288696, + "step": 1419 + }, + { + "epoch": 2.7761485826001957, + "grad_norm": 0.21070584654808044, + "learning_rate": 3.848662752772342e-06, + "log_odds_chosen": 19.1431827545166, + "log_odds_ratio": -0.0001676370739005506, + "logits/chosen": -1.5616148710250854, + "logits/rejected": -1.490607738494873, + "logps/chosen": -0.010426739230751991, + "logps/rejected": -14.570661544799805, + "loss": 0.0814, + "nll_loss": 0.09827926009893417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010426738299429417, + "rewards/margins": 1.4560234546661377, + "rewards/rejected": -1.4570661783218384, + "step": 1420 + }, + { + "epoch": 2.7781036168132944, + "grad_norm": 0.21997205913066864, + "learning_rate": 3.816046966731898e-06, + "log_odds_chosen": 19.0329532623291, + "log_odds_ratio": -3.066218414460309e-05, + "logits/chosen": -1.8920624256134033, + "logits/rejected": -1.4328985214233398, + "logps/chosen": -0.00975363701581955, + "logps/rejected": -14.160276412963867, + "loss": 0.0822, + "nll_loss": 0.07611250877380371, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000975363771431148, + "rewards/margins": 1.4150521755218506, + "rewards/rejected": -1.416027545928955, + "step": 1421 + }, + { + "epoch": 2.780058651026393, + "grad_norm": 0.20549887418746948, + "learning_rate": 3.7834311806914547e-06, + "log_odds_chosen": 15.44743537902832, + "log_odds_ratio": -0.000868486356921494, + "logits/chosen": -1.5946916341781616, + "logits/rejected": -1.6754839420318604, + "logps/chosen": -0.01580328680574894, + "logps/rejected": -10.940855026245117, + "loss": 0.0804, + "nll_loss": 0.08873756229877472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001580328680574894, + "rewards/margins": 1.0925052165985107, + "rewards/rejected": -1.094085454940796, + "step": 1422 + }, + { + "epoch": 2.782013685239492, + "grad_norm": 0.20398256182670593, + "learning_rate": 3.750815394651011e-06, + "log_odds_chosen": 14.897165298461914, + "log_odds_ratio": -0.0003329133032821119, + "logits/chosen": -1.776026964187622, + "logits/rejected": -1.5636169910430908, + "logps/chosen": -0.0108027383685112, + "logps/rejected": -10.358603477478027, + "loss": 0.0817, + "nll_loss": 0.08335089683532715, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001080273767001927, + "rewards/margins": 1.0347801446914673, + "rewards/rejected": -1.0358604192733765, + "step": 1423 + }, + { + "epoch": 2.7839687194525906, + "grad_norm": 0.23326706886291504, + "learning_rate": 3.7181996086105673e-06, + "log_odds_chosen": 17.709779739379883, + "log_odds_ratio": -0.00024230030248872936, + "logits/chosen": -1.5847704410552979, + "logits/rejected": -1.7695772647857666, + "logps/chosen": -0.01736326701939106, + "logps/rejected": -13.450725555419922, + "loss": 0.0828, + "nll_loss": 0.08864600956439972, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017363266088068485, + "rewards/margins": 1.3433361053466797, + "rewards/rejected": -1.3450725078582764, + "step": 1424 + }, + { + "epoch": 2.7859237536656893, + "grad_norm": 0.21826620399951935, + "learning_rate": 3.6855838225701238e-06, + "log_odds_chosen": 14.12002182006836, + "log_odds_ratio": -0.00039236777229234576, + "logits/chosen": -1.8629786968231201, + "logits/rejected": -1.720128059387207, + "logps/chosen": -0.01714688539505005, + "logps/rejected": -9.905383110046387, + "loss": 0.0813, + "nll_loss": 0.071865975856781, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0017146884929388762, + "rewards/margins": 0.9888235926628113, + "rewards/rejected": -0.9905382990837097, + "step": 1425 + }, + { + "epoch": 2.787878787878788, + "grad_norm": 0.2175516039133072, + "learning_rate": 3.6529680365296803e-06, + "log_odds_chosen": 12.5006103515625, + "log_odds_ratio": -0.00032674302929081023, + "logits/chosen": -1.5724233388900757, + "logits/rejected": -1.779284119606018, + "logps/chosen": -0.012470053508877754, + "logps/rejected": -8.077159881591797, + "loss": 0.0815, + "nll_loss": 0.08442910760641098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012470053043216467, + "rewards/margins": 0.8064690828323364, + "rewards/rejected": -0.8077160716056824, + "step": 1426 + }, + { + "epoch": 2.789833822091887, + "grad_norm": 0.21551395952701569, + "learning_rate": 3.620352250489237e-06, + "log_odds_chosen": 19.877239227294922, + "log_odds_ratio": -0.00029253726825118065, + "logits/chosen": -1.7698850631713867, + "logits/rejected": -1.541152000427246, + "logps/chosen": -0.012867584824562073, + "logps/rejected": -15.379823684692383, + "loss": 0.0823, + "nll_loss": 0.07375846803188324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012867585755884647, + "rewards/margins": 1.5366955995559692, + "rewards/rejected": -1.5379823446273804, + "step": 1427 + }, + { + "epoch": 2.7917888563049855, + "grad_norm": 0.21592314541339874, + "learning_rate": 3.5877364644487933e-06, + "log_odds_chosen": 14.679252624511719, + "log_odds_ratio": -0.0016106204129755497, + "logits/chosen": -1.573777675628662, + "logits/rejected": -1.4061260223388672, + "logps/chosen": -0.014339936897158623, + "logps/rejected": -10.291369438171387, + "loss": 0.0835, + "nll_loss": 0.08583664894104004, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014339936897158623, + "rewards/margins": 1.0277029275894165, + "rewards/rejected": -1.0291370153427124, + "step": 1428 + }, + { + "epoch": 2.7937438905180842, + "grad_norm": 0.21084415912628174, + "learning_rate": 3.55512067840835e-06, + "log_odds_chosen": 11.59364128112793, + "log_odds_ratio": -0.0005991404759697616, + "logits/chosen": -2.0013952255249023, + "logits/rejected": -1.77824866771698, + "logps/chosen": -0.009960463270545006, + "logps/rejected": -6.976263046264648, + "loss": 0.0798, + "nll_loss": 0.09603486955165863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000996046350337565, + "rewards/margins": 0.6966302990913391, + "rewards/rejected": -0.6976262927055359, + "step": 1429 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.20100361108779907, + "learning_rate": 3.522504892367906e-06, + "log_odds_chosen": 16.244901657104492, + "log_odds_ratio": -0.0005505355075001717, + "logits/chosen": -1.8106794357299805, + "logits/rejected": -1.6085774898529053, + "logps/chosen": -0.013437996618449688, + "logps/rejected": -11.941839218139648, + "loss": 0.0783, + "nll_loss": 0.0697370171546936, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013437997549772263, + "rewards/margins": 1.1928400993347168, + "rewards/rejected": -1.1941839456558228, + "step": 1430 + }, + { + "epoch": 2.7976539589442817, + "grad_norm": 0.20349393784999847, + "learning_rate": 3.4898891063274624e-06, + "log_odds_chosen": 17.093250274658203, + "log_odds_ratio": -0.000637281103990972, + "logits/chosen": -1.817251443862915, + "logits/rejected": -1.5614787340164185, + "logps/chosen": -0.013161275535821915, + "logps/rejected": -12.599088668823242, + "loss": 0.0778, + "nll_loss": 0.07243766635656357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013161274837329984, + "rewards/margins": 1.2585927248001099, + "rewards/rejected": -1.25990891456604, + "step": 1431 + }, + { + "epoch": 2.7996089931573804, + "grad_norm": 0.2157328575849533, + "learning_rate": 3.457273320287019e-06, + "log_odds_chosen": 15.642946243286133, + "log_odds_ratio": -0.0007096335175447166, + "logits/chosen": -1.6405131816864014, + "logits/rejected": -1.4290716648101807, + "logps/chosen": -0.021885842084884644, + "logps/rejected": -11.628900527954102, + "loss": 0.0815, + "nll_loss": 0.08518733084201813, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002188584301620722, + "rewards/margins": 1.1607015132904053, + "rewards/rejected": -1.162890076637268, + "step": 1432 + }, + { + "epoch": 2.801564027370479, + "grad_norm": 0.21345654129981995, + "learning_rate": 3.4246575342465754e-06, + "log_odds_chosen": 13.560210227966309, + "log_odds_ratio": -0.0005744062946178019, + "logits/chosen": -1.6940008401870728, + "logits/rejected": -1.7729192972183228, + "logps/chosen": -0.009883694350719452, + "logps/rejected": -8.984162330627441, + "loss": 0.08, + "nll_loss": 0.07809774577617645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009883694583550096, + "rewards/margins": 0.8974278569221497, + "rewards/rejected": -0.8984162211418152, + "step": 1433 + }, + { + "epoch": 2.803519061583578, + "grad_norm": 0.2061988115310669, + "learning_rate": 3.392041748206132e-06, + "log_odds_chosen": 22.55651092529297, + "log_odds_ratio": -0.0003933907428290695, + "logits/chosen": -1.6207926273345947, + "logits/rejected": -1.4487590789794922, + "logps/chosen": -0.013094104826450348, + "logps/rejected": -18.13521957397461, + "loss": 0.0804, + "nll_loss": 0.07227349281311035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001309410436078906, + "rewards/margins": 1.8122124671936035, + "rewards/rejected": -1.8135218620300293, + "step": 1434 + }, + { + "epoch": 2.8054740957966766, + "grad_norm": 0.20143333077430725, + "learning_rate": 3.359425962165688e-06, + "log_odds_chosen": 16.64504623413086, + "log_odds_ratio": -0.00035327504156157374, + "logits/chosen": -1.6669495105743408, + "logits/rejected": -1.7052109241485596, + "logps/chosen": -0.012735281139612198, + "logps/rejected": -12.217037200927734, + "loss": 0.079, + "nll_loss": 0.07670622318983078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012735280906781554, + "rewards/margins": 1.2204301357269287, + "rewards/rejected": -1.2217037677764893, + "step": 1435 + }, + { + "epoch": 2.8074291300097753, + "grad_norm": 0.20032286643981934, + "learning_rate": 3.3268101761252445e-06, + "log_odds_chosen": 19.522930145263672, + "log_odds_ratio": -0.0002793906314764172, + "logits/chosen": -1.8474031686782837, + "logits/rejected": -1.7273261547088623, + "logps/chosen": -0.010898962616920471, + "logps/rejected": -14.951000213623047, + "loss": 0.0778, + "nll_loss": 0.06764844059944153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010898963082581758, + "rewards/margins": 1.4940102100372314, + "rewards/rejected": -1.4951000213623047, + "step": 1436 + }, + { + "epoch": 2.809384164222874, + "grad_norm": 0.21461720764636993, + "learning_rate": 3.294194390084801e-06, + "log_odds_chosen": 12.330846786499023, + "log_odds_ratio": -0.0004407696542330086, + "logits/chosen": -1.737532615661621, + "logits/rejected": -1.6869227886199951, + "logps/chosen": -0.010710827074944973, + "logps/rejected": -7.794095993041992, + "loss": 0.0801, + "nll_loss": 0.05688254162669182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010710827773436904, + "rewards/margins": 0.7783385515213013, + "rewards/rejected": -0.779409646987915, + "step": 1437 + }, + { + "epoch": 2.811339198435973, + "grad_norm": 0.21349570155143738, + "learning_rate": 3.2615786040443576e-06, + "log_odds_chosen": 21.559345245361328, + "log_odds_ratio": -3.848791675409302e-05, + "logits/chosen": -1.6855626106262207, + "logits/rejected": -1.3288264274597168, + "logps/chosen": -0.010321589186787605, + "logps/rejected": -16.913818359375, + "loss": 0.0803, + "nll_loss": 0.06894154846668243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010321589652448893, + "rewards/margins": 1.6903496980667114, + "rewards/rejected": -1.6913816928863525, + "step": 1438 + }, + { + "epoch": 2.8132942326490715, + "grad_norm": 0.20425014197826385, + "learning_rate": 3.228962818003914e-06, + "log_odds_chosen": 22.963590621948242, + "log_odds_ratio": -1.37693732540356e-05, + "logits/chosen": -1.471928358078003, + "logits/rejected": -1.4808752536773682, + "logps/chosen": -0.014267636463046074, + "logps/rejected": -18.566057205200195, + "loss": 0.0784, + "nll_loss": 0.07082002609968185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014267637161538005, + "rewards/margins": 1.8551790714263916, + "rewards/rejected": -1.8566057682037354, + "step": 1439 + }, + { + "epoch": 2.8152492668621703, + "grad_norm": 0.2054031938314438, + "learning_rate": 3.19634703196347e-06, + "log_odds_chosen": 15.163761138916016, + "log_odds_ratio": -0.0002906534355133772, + "logits/chosen": -1.7191970348358154, + "logits/rejected": -1.6459484100341797, + "logps/chosen": -0.009707455523312092, + "logps/rejected": -10.45467758178711, + "loss": 0.0787, + "nll_loss": 0.06916622817516327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009707455174066126, + "rewards/margins": 1.044497013092041, + "rewards/rejected": -1.0454678535461426, + "step": 1440 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.20819254219532013, + "learning_rate": 3.1637312459230267e-06, + "log_odds_chosen": 15.227266311645508, + "log_odds_ratio": -0.00023824983509257436, + "logits/chosen": -1.6852941513061523, + "logits/rejected": -1.7233989238739014, + "logps/chosen": -0.014528934843838215, + "logps/rejected": -10.98037338256836, + "loss": 0.0794, + "nll_loss": 0.07284374535083771, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014528934843838215, + "rewards/margins": 1.0965845584869385, + "rewards/rejected": -1.0980374813079834, + "step": 1441 + }, + { + "epoch": 2.8191593352883677, + "grad_norm": 0.2085520625114441, + "learning_rate": 3.131115459882583e-06, + "log_odds_chosen": 17.274864196777344, + "log_odds_ratio": -9.896212577586994e-05, + "logits/chosen": -1.6438992023468018, + "logits/rejected": -1.7470543384552002, + "logps/chosen": -0.011640486307442188, + "logps/rejected": -12.721195220947266, + "loss": 0.08, + "nll_loss": 0.06563065946102142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011640486773103476, + "rewards/margins": 1.2709556818008423, + "rewards/rejected": -1.2721196413040161, + "step": 1442 + }, + { + "epoch": 2.8211143695014664, + "grad_norm": 0.21044373512268066, + "learning_rate": 3.0984996738421397e-06, + "log_odds_chosen": 16.944984436035156, + "log_odds_ratio": -0.00011349123087711632, + "logits/chosen": -1.755531907081604, + "logits/rejected": -1.7220419645309448, + "logps/chosen": -0.011827008798718452, + "logps/rejected": -12.367532730102539, + "loss": 0.0787, + "nll_loss": 0.07431063055992126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011827009730041027, + "rewards/margins": 1.2355706691741943, + "rewards/rejected": -1.236753225326538, + "step": 1443 + }, + { + "epoch": 2.823069403714565, + "grad_norm": 0.21550652384757996, + "learning_rate": 3.065883887801696e-06, + "log_odds_chosen": 19.948387145996094, + "log_odds_ratio": -0.00017012232274282724, + "logits/chosen": -1.7468385696411133, + "logits/rejected": -1.5931971073150635, + "logps/chosen": -0.015198064967989922, + "logps/rejected": -15.632307052612305, + "loss": 0.0807, + "nll_loss": 0.08488671481609344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0015198065666481853, + "rewards/margins": 1.5617111921310425, + "rewards/rejected": -1.5632309913635254, + "step": 1444 + }, + { + "epoch": 2.825024437927664, + "grad_norm": 0.20727437734603882, + "learning_rate": 3.0332681017612523e-06, + "log_odds_chosen": 19.799713134765625, + "log_odds_ratio": -0.0003293619374744594, + "logits/chosen": -1.755293846130371, + "logits/rejected": -1.4547486305236816, + "logps/chosen": -0.011725397780537605, + "logps/rejected": -15.288446426391602, + "loss": 0.079, + "nll_loss": 0.08251908421516418, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011725398944690824, + "rewards/margins": 1.5276720523834229, + "rewards/rejected": -1.5288445949554443, + "step": 1445 + }, + { + "epoch": 2.8269794721407626, + "grad_norm": 0.20422512292861938, + "learning_rate": 3.0006523157208088e-06, + "log_odds_chosen": 13.275364875793457, + "log_odds_ratio": -0.0003739061066880822, + "logits/chosen": -1.6587649583816528, + "logits/rejected": -1.792945146560669, + "logps/chosen": -0.014840802177786827, + "logps/rejected": -9.018012046813965, + "loss": 0.0779, + "nll_loss": 0.06779356300830841, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014840803341940045, + "rewards/margins": 0.9003171920776367, + "rewards/rejected": -0.9018012285232544, + "step": 1446 + }, + { + "epoch": 2.8289345063538613, + "grad_norm": 0.20889635384082794, + "learning_rate": 2.9680365296803653e-06, + "log_odds_chosen": 11.921173095703125, + "log_odds_ratio": -0.0005504163564182818, + "logits/chosen": -1.7999391555786133, + "logits/rejected": -1.7562098503112793, + "logps/chosen": -0.007559135090559721, + "logps/rejected": -7.022247314453125, + "loss": 0.0811, + "nll_loss": 0.10852588713169098, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007559135556221008, + "rewards/margins": 0.701468825340271, + "rewards/rejected": -0.7022247314453125, + "step": 1447 + }, + { + "epoch": 2.83088954056696, + "grad_norm": 0.21123018860816956, + "learning_rate": 2.935420743639922e-06, + "log_odds_chosen": 18.0858211517334, + "log_odds_ratio": -0.0003419049608055502, + "logits/chosen": -1.559652328491211, + "logits/rejected": -1.5198795795440674, + "logps/chosen": -0.009276317432522774, + "logps/rejected": -13.392126083374023, + "loss": 0.0812, + "nll_loss": 0.08168494701385498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009276316268369555, + "rewards/margins": 1.338284969329834, + "rewards/rejected": -1.3392126560211182, + "step": 1448 + }, + { + "epoch": 2.832844574780059, + "grad_norm": 0.2092096209526062, + "learning_rate": 2.9028049575994783e-06, + "log_odds_chosen": 13.698029518127441, + "log_odds_ratio": -0.0005500835250131786, + "logits/chosen": -1.4598925113677979, + "logits/rejected": -1.346684455871582, + "logps/chosen": -0.007340648211538792, + "logps/rejected": -8.808646202087402, + "loss": 0.0809, + "nll_loss": 0.12190280854701996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0007340647862292826, + "rewards/margins": 0.8801306486129761, + "rewards/rejected": -0.8808646202087402, + "step": 1449 + }, + { + "epoch": 2.8347996089931575, + "grad_norm": 0.20978540182113647, + "learning_rate": 2.8701891715590344e-06, + "log_odds_chosen": 15.93485164642334, + "log_odds_ratio": -0.0007936656475067139, + "logits/chosen": -1.7372007369995117, + "logits/rejected": -1.5357680320739746, + "logps/chosen": -0.008182371966540813, + "logps/rejected": -11.039178848266602, + "loss": 0.0797, + "nll_loss": 0.09662888944149017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008182372548617423, + "rewards/margins": 1.1030997037887573, + "rewards/rejected": -1.1039178371429443, + "step": 1450 + }, + { + "epoch": 2.8367546432062563, + "grad_norm": 0.2129531055688858, + "learning_rate": 2.837573385518591e-06, + "log_odds_chosen": 14.292501449584961, + "log_odds_ratio": -0.000741309835575521, + "logits/chosen": -1.7626450061798096, + "logits/rejected": -1.7125171422958374, + "logps/chosen": -0.011831633746623993, + "logps/rejected": -9.756145477294922, + "loss": 0.0795, + "nll_loss": 0.06285040080547333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011831634910777211, + "rewards/margins": 0.9744314551353455, + "rewards/rejected": -0.9756145477294922, + "step": 1451 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 0.19826431572437286, + "learning_rate": 2.8049575994781474e-06, + "log_odds_chosen": 20.732942581176758, + "log_odds_ratio": -0.00018991615797858685, + "logits/chosen": -1.6228821277618408, + "logits/rejected": -1.4476673603057861, + "logps/chosen": -0.012207070365548134, + "logps/rejected": -16.135231018066406, + "loss": 0.077, + "nll_loss": 0.07096413522958755, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001220707199536264, + "rewards/margins": 1.612302303314209, + "rewards/rejected": -1.613523244857788, + "step": 1452 + }, + { + "epoch": 2.8406647116324537, + "grad_norm": 0.19973202049732208, + "learning_rate": 2.772341813437704e-06, + "log_odds_chosen": 22.22629165649414, + "log_odds_ratio": -9.531041723676026e-05, + "logits/chosen": -1.6806018352508545, + "logits/rejected": -1.562767505645752, + "logps/chosen": -0.012918703258037567, + "logps/rejected": -17.767242431640625, + "loss": 0.0791, + "nll_loss": 0.08121513575315475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012918703723698854, + "rewards/margins": 1.7754321098327637, + "rewards/rejected": -1.776724100112915, + "step": 1453 + }, + { + "epoch": 2.8426197458455524, + "grad_norm": 0.20117171108722687, + "learning_rate": 2.7397260273972604e-06, + "log_odds_chosen": 16.020565032958984, + "log_odds_ratio": -0.00039825018029659986, + "logits/chosen": -1.6767420768737793, + "logits/rejected": -1.5782711505889893, + "logps/chosen": -0.00939343310892582, + "logps/rejected": -11.331616401672363, + "loss": 0.0783, + "nll_loss": 0.07553350925445557, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009393433574587107, + "rewards/margins": 1.132222294807434, + "rewards/rejected": -1.1331616640090942, + "step": 1454 + }, + { + "epoch": 2.844574780058651, + "grad_norm": 0.19781357049942017, + "learning_rate": 2.7071102413568165e-06, + "log_odds_chosen": 14.57145881652832, + "log_odds_ratio": -0.0007613384514115751, + "logits/chosen": -1.8877952098846436, + "logits/rejected": -1.7688992023468018, + "logps/chosen": -0.012062359601259232, + "logps/rejected": -10.126323699951172, + "loss": 0.0773, + "nll_loss": 0.0730029046535492, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012062359601259232, + "rewards/margins": 1.0114262104034424, + "rewards/rejected": -1.0126323699951172, + "step": 1455 + }, + { + "epoch": 2.84652981427175, + "grad_norm": 0.20946891605854034, + "learning_rate": 2.674494455316373e-06, + "log_odds_chosen": 18.75739288330078, + "log_odds_ratio": -7.01729950378649e-05, + "logits/chosen": -1.692115068435669, + "logits/rejected": -1.5634831190109253, + "logps/chosen": -0.012921661138534546, + "logps/rejected": -14.398590087890625, + "loss": 0.0823, + "nll_loss": 0.060306064784526825, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012921660672873259, + "rewards/margins": 1.4385666847229004, + "rewards/rejected": -1.4398590326309204, + "step": 1456 + }, + { + "epoch": 2.8484848484848486, + "grad_norm": 0.2129439264535904, + "learning_rate": 2.6418786692759295e-06, + "log_odds_chosen": 13.671670913696289, + "log_odds_ratio": -0.00044743437319993973, + "logits/chosen": -1.8106963634490967, + "logits/rejected": -1.6515758037567139, + "logps/chosen": -0.013243112713098526, + "logps/rejected": -9.202911376953125, + "loss": 0.0791, + "nll_loss": 0.08351089060306549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013243112480267882, + "rewards/margins": 0.9189668893814087, + "rewards/rejected": -0.9202911853790283, + "step": 1457 + }, + { + "epoch": 2.8504398826979473, + "grad_norm": 0.20060031116008759, + "learning_rate": 2.609262883235486e-06, + "log_odds_chosen": 18.764404296875, + "log_odds_ratio": -0.000340557424351573, + "logits/chosen": -1.6182591915130615, + "logits/rejected": -1.677892804145813, + "logps/chosen": -0.0114726722240448, + "logps/rejected": -14.266817092895508, + "loss": 0.0804, + "nll_loss": 0.07115955650806427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001147267292253673, + "rewards/margins": 1.4255344867706299, + "rewards/rejected": -1.4266817569732666, + "step": 1458 + }, + { + "epoch": 2.852394916911046, + "grad_norm": 0.1895112842321396, + "learning_rate": 2.5766470971950426e-06, + "log_odds_chosen": 19.503032684326172, + "log_odds_ratio": -0.00011033449845854193, + "logits/chosen": -1.5301618576049805, + "logits/rejected": -1.47642183303833, + "logps/chosen": -0.010483385063707829, + "logps/rejected": -14.868196487426758, + "loss": 0.0767, + "nll_loss": 0.07461599260568619, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010483384830877185, + "rewards/margins": 1.4857712984085083, + "rewards/rejected": -1.4868196249008179, + "step": 1459 + }, + { + "epoch": 2.854349951124145, + "grad_norm": 0.196576327085495, + "learning_rate": 2.5440313111545986e-06, + "log_odds_chosen": 17.954547882080078, + "log_odds_ratio": -0.0003154594451189041, + "logits/chosen": -1.495388150215149, + "logits/rejected": -1.4714674949645996, + "logps/chosen": -0.012495644390583038, + "logps/rejected": -13.46505355834961, + "loss": 0.0772, + "nll_loss": 0.08006879687309265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012495644623413682, + "rewards/margins": 1.345255732536316, + "rewards/rejected": -1.3465054035186768, + "step": 1460 + }, + { + "epoch": 2.8563049853372435, + "grad_norm": 0.21279685199260712, + "learning_rate": 2.511415525114155e-06, + "log_odds_chosen": 18.399248123168945, + "log_odds_ratio": -0.0006856415420770645, + "logits/chosen": -1.758833885192871, + "logits/rejected": -1.513486623764038, + "logps/chosen": -0.013361372984945774, + "logps/rejected": -14.000244140625, + "loss": 0.0809, + "nll_loss": 0.08637436479330063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013361372984945774, + "rewards/margins": 1.3986884355545044, + "rewards/rejected": -1.4000245332717896, + "step": 1461 + }, + { + "epoch": 2.8582600195503423, + "grad_norm": 0.19989065825939178, + "learning_rate": 2.4787997390737117e-06, + "log_odds_chosen": 17.27680206298828, + "log_odds_ratio": -0.0003770712937694043, + "logits/chosen": -1.7011613845825195, + "logits/rejected": -1.477466344833374, + "logps/chosen": -0.01485876552760601, + "logps/rejected": -12.965486526489258, + "loss": 0.0777, + "nll_loss": 0.07872633635997772, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014858766226097941, + "rewards/margins": 1.295062780380249, + "rewards/rejected": -1.29654860496521, + "step": 1462 + }, + { + "epoch": 2.860215053763441, + "grad_norm": 0.20376908779144287, + "learning_rate": 2.446183953033268e-06, + "log_odds_chosen": 10.553802490234375, + "log_odds_ratio": -0.0004403528291732073, + "logits/chosen": -1.7601383924484253, + "logits/rejected": -1.6785389184951782, + "logps/chosen": -0.011569966562092304, + "logps/rejected": -6.057766914367676, + "loss": 0.0802, + "nll_loss": 0.06766286492347717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011569967027753592, + "rewards/margins": 0.6046197414398193, + "rewards/rejected": -0.6057767868041992, + "step": 1463 + }, + { + "epoch": 2.8621700879765397, + "grad_norm": 0.19747941195964813, + "learning_rate": 2.4135681669928247e-06, + "log_odds_chosen": 25.108295440673828, + "log_odds_ratio": -9.536745437799254e-08, + "logits/chosen": -1.6360479593276978, + "logits/rejected": -1.3645198345184326, + "logps/chosen": -0.010831404477357864, + "logps/rejected": -20.4732666015625, + "loss": 0.0772, + "nll_loss": 0.08034773170948029, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010831404943019152, + "rewards/margins": 2.046243667602539, + "rewards/rejected": -2.0473265647888184, + "step": 1464 + }, + { + "epoch": 2.8641251221896384, + "grad_norm": 0.2142031192779541, + "learning_rate": 2.3809523809523808e-06, + "log_odds_chosen": 21.44690704345703, + "log_odds_ratio": -0.00010300617577740923, + "logits/chosen": -1.7103520631790161, + "logits/rejected": -1.5634174346923828, + "logps/chosen": -0.01092301681637764, + "logps/rejected": -16.70315933227539, + "loss": 0.0796, + "nll_loss": 0.08514614403247833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001092301681637764, + "rewards/margins": 1.6692235469818115, + "rewards/rejected": -1.6703159809112549, + "step": 1465 + }, + { + "epoch": 2.866080156402737, + "grad_norm": 0.19190414249897003, + "learning_rate": 2.3483365949119373e-06, + "log_odds_chosen": 14.556534767150879, + "log_odds_ratio": -0.0002324216184206307, + "logits/chosen": -1.4811129570007324, + "logits/rejected": -1.5093162059783936, + "logps/chosen": -0.010659998282790184, + "logps/rejected": -9.972354888916016, + "loss": 0.0775, + "nll_loss": 0.0944826751947403, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010659998515620828, + "rewards/margins": 0.9961694478988647, + "rewards/rejected": -0.9972355365753174, + "step": 1466 + }, + { + "epoch": 2.868035190615836, + "grad_norm": 0.19627873599529266, + "learning_rate": 2.3157208088714938e-06, + "log_odds_chosen": 19.038524627685547, + "log_odds_ratio": -0.000645025516860187, + "logits/chosen": -1.8591448068618774, + "logits/rejected": -1.6291253566741943, + "logps/chosen": -0.014496760442852974, + "logps/rejected": -14.752888679504395, + "loss": 0.079, + "nll_loss": 0.07975881546735764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014496759977191687, + "rewards/margins": 1.4738390445709229, + "rewards/rejected": -1.4752888679504395, + "step": 1467 + }, + { + "epoch": 2.8699902248289346, + "grad_norm": 0.2133602648973465, + "learning_rate": 2.2831050228310503e-06, + "log_odds_chosen": 15.714179992675781, + "log_odds_ratio": -0.0005358572816476226, + "logits/chosen": -1.8080897331237793, + "logits/rejected": -1.7287206649780273, + "logps/chosen": -0.011155230924487114, + "logps/rejected": -11.195362091064453, + "loss": 0.0781, + "nll_loss": 0.05752440169453621, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011155230458825827, + "rewards/margins": 1.1184207201004028, + "rewards/rejected": -1.1195361614227295, + "step": 1468 + }, + { + "epoch": 2.8719452590420333, + "grad_norm": 0.19677306711673737, + "learning_rate": 2.250489236790607e-06, + "log_odds_chosen": 17.674579620361328, + "log_odds_ratio": -0.00029678436112590134, + "logits/chosen": -1.7343833446502686, + "logits/rejected": -1.6803743839263916, + "logps/chosen": -0.02416212484240532, + "logps/rejected": -13.679360389709473, + "loss": 0.0767, + "nll_loss": 0.07603667676448822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.002416212810203433, + "rewards/margins": 1.3655197620391846, + "rewards/rejected": -1.3679358959197998, + "step": 1469 + }, + { + "epoch": 2.873900293255132, + "grad_norm": 0.2123916745185852, + "learning_rate": 2.217873450750163e-06, + "log_odds_chosen": 18.856159210205078, + "log_odds_ratio": -0.001210150308907032, + "logits/chosen": -1.721738338470459, + "logits/rejected": -1.6166062355041504, + "logps/chosen": -0.015524781309068203, + "logps/rejected": -14.66404914855957, + "loss": 0.0791, + "nll_loss": 0.08319263160228729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001552478177472949, + "rewards/margins": 1.4648523330688477, + "rewards/rejected": -1.466404914855957, + "step": 1470 + }, + { + "epoch": 2.875855327468231, + "grad_norm": 0.20428688824176788, + "learning_rate": 2.1852576647097194e-06, + "log_odds_chosen": 15.650864601135254, + "log_odds_ratio": -0.0009228275739587843, + "logits/chosen": -1.6859712600708008, + "logits/rejected": -1.6297874450683594, + "logps/chosen": -0.012775249779224396, + "logps/rejected": -11.276302337646484, + "loss": 0.0811, + "nll_loss": 0.05783946067094803, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012775249779224396, + "rewards/margins": 1.1263525485992432, + "rewards/rejected": -1.1276301145553589, + "step": 1471 + }, + { + "epoch": 2.8778103616813295, + "grad_norm": 0.20103013515472412, + "learning_rate": 2.152641878669276e-06, + "log_odds_chosen": 14.288949012756348, + "log_odds_ratio": -0.0003334594948682934, + "logits/chosen": -1.7078830003738403, + "logits/rejected": -1.4695665836334229, + "logps/chosen": -0.008021773770451546, + "logps/rejected": -9.403225898742676, + "loss": 0.0764, + "nll_loss": 0.10075429081916809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0008021773537620902, + "rewards/margins": 0.9395203590393066, + "rewards/rejected": -0.9403225183486938, + "step": 1472 + }, + { + "epoch": 2.8797653958944283, + "grad_norm": 0.20431239902973175, + "learning_rate": 2.1200260926288324e-06, + "log_odds_chosen": 13.029438018798828, + "log_odds_ratio": -0.0005850634770467877, + "logits/chosen": -1.7571396827697754, + "logits/rejected": -1.606663465499878, + "logps/chosen": -0.009863034822046757, + "logps/rejected": -8.411710739135742, + "loss": 0.0782, + "nll_loss": 0.08211535215377808, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009863035520538688, + "rewards/margins": 0.8401848077774048, + "rewards/rejected": -0.841171145439148, + "step": 1473 + }, + { + "epoch": 2.881720430107527, + "grad_norm": 0.201075941324234, + "learning_rate": 2.087410306588389e-06, + "log_odds_chosen": 19.739933013916016, + "log_odds_ratio": -0.0003810084890574217, + "logits/chosen": -1.7552638053894043, + "logits/rejected": -1.5606236457824707, + "logps/chosen": -0.010097164660692215, + "logps/rejected": -15.122573852539062, + "loss": 0.0787, + "nll_loss": 0.06292251497507095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010097165359184146, + "rewards/margins": 1.5112476348876953, + "rewards/rejected": -1.5122573375701904, + "step": 1474 + }, + { + "epoch": 2.8836754643206257, + "grad_norm": 0.20981575548648834, + "learning_rate": 2.054794520547945e-06, + "log_odds_chosen": 15.69469165802002, + "log_odds_ratio": -0.00018131177057512105, + "logits/chosen": -1.828817367553711, + "logits/rejected": -1.6827406883239746, + "logps/chosen": -0.00953361950814724, + "logps/rejected": -11.022476196289062, + "loss": 0.08, + "nll_loss": 0.08629879355430603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009533619740977883, + "rewards/margins": 1.1012942790985107, + "rewards/rejected": -1.1022474765777588, + "step": 1475 + }, + { + "epoch": 2.8856304985337244, + "grad_norm": 0.19882982969284058, + "learning_rate": 2.0221787345075015e-06, + "log_odds_chosen": 18.109317779541016, + "log_odds_ratio": -0.00025611568707972765, + "logits/chosen": -1.5455198287963867, + "logits/rejected": -1.592794418334961, + "logps/chosen": -0.016005923971533775, + "logps/rejected": -13.942718505859375, + "loss": 0.0769, + "nll_loss": 0.06991303712129593, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0016005923971533775, + "rewards/margins": 1.3926713466644287, + "rewards/rejected": -1.3942718505859375, + "step": 1476 + }, + { + "epoch": 2.887585532746823, + "grad_norm": 0.19902966916561127, + "learning_rate": 1.989562948467058e-06, + "log_odds_chosen": 19.979801177978516, + "log_odds_ratio": -0.0001291811204282567, + "logits/chosen": -1.5822046995162964, + "logits/rejected": -1.4967069625854492, + "logps/chosen": -0.009720398113131523, + "logps/rejected": -15.330105781555176, + "loss": 0.0801, + "nll_loss": 0.06474808603525162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009720397647470236, + "rewards/margins": 1.5320385694503784, + "rewards/rejected": -1.533010482788086, + "step": 1477 + }, + { + "epoch": 2.889540566959922, + "grad_norm": 0.1953638643026352, + "learning_rate": 1.9569471624266145e-06, + "log_odds_chosen": 15.711566925048828, + "log_odds_ratio": -0.0006316553917713463, + "logits/chosen": -1.6953073740005493, + "logits/rejected": -1.333676815032959, + "logps/chosen": -0.013543512672185898, + "logps/rejected": -11.220989227294922, + "loss": 0.0774, + "nll_loss": 0.09018939733505249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013543511740863323, + "rewards/margins": 1.1207447052001953, + "rewards/rejected": -1.1220989227294922, + "step": 1478 + }, + { + "epoch": 2.8914956011730206, + "grad_norm": 0.20279891788959503, + "learning_rate": 1.924331376386171e-06, + "log_odds_chosen": 22.53974723815918, + "log_odds_ratio": -0.00014170857321005315, + "logits/chosen": -1.6489124298095703, + "logits/rejected": -1.7476140260696411, + "logps/chosen": -0.01326517853885889, + "logps/rejected": -18.088653564453125, + "loss": 0.0795, + "nll_loss": 0.0791163444519043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013265179004520178, + "rewards/margins": 1.8075387477874756, + "rewards/rejected": -1.8088651895523071, + "step": 1479 + }, + { + "epoch": 2.8934506353861194, + "grad_norm": 0.19062131643295288, + "learning_rate": 1.8917155903457273e-06, + "log_odds_chosen": 18.098800659179688, + "log_odds_ratio": -0.0012570557883009315, + "logits/chosen": -1.7767479419708252, + "logits/rejected": -1.6103172302246094, + "logps/chosen": -0.013711370527744293, + "logps/rejected": -13.810415267944336, + "loss": 0.0772, + "nll_loss": 0.08754576742649078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0013711369829252362, + "rewards/margins": 1.37967050075531, + "rewards/rejected": -1.3810416460037231, + "step": 1480 + }, + { + "epoch": 2.895405669599218, + "grad_norm": 0.19328714907169342, + "learning_rate": 1.8590998043052836e-06, + "log_odds_chosen": 13.684877395629883, + "log_odds_ratio": -0.0006710195448249578, + "logits/chosen": -1.727038860321045, + "logits/rejected": -1.6674818992614746, + "logps/chosen": -0.01150820218026638, + "logps/rejected": -9.150880813598633, + "loss": 0.0756, + "nll_loss": 0.09850761294364929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0011508201714605093, + "rewards/margins": 0.9139372110366821, + "rewards/rejected": -0.9150880575180054, + "step": 1481 + }, + { + "epoch": 2.897360703812317, + "grad_norm": 0.20898067951202393, + "learning_rate": 1.8264840182648401e-06, + "log_odds_chosen": 22.099035263061523, + "log_odds_ratio": -5.6490913266316056e-05, + "logits/chosen": -1.718506097793579, + "logits/rejected": -1.3940391540527344, + "logps/chosen": -0.010725274682044983, + "logps/rejected": -17.50336265563965, + "loss": 0.0795, + "nll_loss": 0.05710586905479431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010725274914875627, + "rewards/margins": 1.749263882637024, + "rewards/rejected": -1.7503364086151123, + "step": 1482 + }, + { + "epoch": 2.8993157380254155, + "grad_norm": 0.19680063426494598, + "learning_rate": 1.7938682322243967e-06, + "log_odds_chosen": 12.330469131469727, + "log_odds_ratio": -0.0008986520115286112, + "logits/chosen": -1.6145880222320557, + "logits/rejected": -1.5729186534881592, + "logps/chosen": -0.012523572891950607, + "logps/rejected": -7.853984832763672, + "loss": 0.0773, + "nll_loss": 0.09260198473930359, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012523573823273182, + "rewards/margins": 0.7841460704803467, + "rewards/rejected": -0.7853984832763672, + "step": 1483 + }, + { + "epoch": 2.9012707722385143, + "grad_norm": 0.19589459896087646, + "learning_rate": 1.761252446183953e-06, + "log_odds_chosen": 14.922159194946289, + "log_odds_ratio": -5.6400785979349166e-05, + "logits/chosen": -1.9159443378448486, + "logits/rejected": -1.789707899093628, + "logps/chosen": -0.01071165595203638, + "logps/rejected": -10.325664520263672, + "loss": 0.0774, + "nll_loss": 0.0796593427658081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010711655486375093, + "rewards/margins": 1.031495213508606, + "rewards/rejected": -1.0325664281845093, + "step": 1484 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.19799520075321198, + "learning_rate": 1.7286366601435095e-06, + "log_odds_chosen": 18.543649673461914, + "log_odds_ratio": -0.00029915967024862766, + "logits/chosen": -1.841958999633789, + "logits/rejected": -1.4537465572357178, + "logps/chosen": -0.012471344321966171, + "logps/rejected": -14.126659393310547, + "loss": 0.0782, + "nll_loss": 0.09155097603797913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001247134292498231, + "rewards/margins": 1.4114189147949219, + "rewards/rejected": -1.4126659631729126, + "step": 1485 + }, + { + "epoch": 2.9051808406647117, + "grad_norm": 0.19848677515983582, + "learning_rate": 1.696020874103066e-06, + "log_odds_chosen": 15.792093276977539, + "log_odds_ratio": -0.0004979911027476192, + "logits/chosen": -1.745445966720581, + "logits/rejected": -1.6695605516433716, + "logps/chosen": -0.011151527985930443, + "logps/rejected": -11.334598541259766, + "loss": 0.0784, + "nll_loss": 0.06213226914405823, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001115152845159173, + "rewards/margins": 1.1323447227478027, + "rewards/rejected": -1.1334599256515503, + "step": 1486 + }, + { + "epoch": 2.9071358748778104, + "grad_norm": 0.1904672384262085, + "learning_rate": 1.6634050880626223e-06, + "log_odds_chosen": 16.01700210571289, + "log_odds_ratio": -0.0003042141324840486, + "logits/chosen": -1.5693330764770508, + "logits/rejected": -1.639192819595337, + "logps/chosen": -0.009459175169467926, + "logps/rejected": -11.281381607055664, + "loss": 0.0778, + "nll_loss": 0.08754301071166992, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009459175053052604, + "rewards/margins": 1.1271922588348389, + "rewards/rejected": -1.1281383037567139, + "step": 1487 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.19924743473529816, + "learning_rate": 1.6307893020221788e-06, + "log_odds_chosen": 14.290169715881348, + "log_odds_ratio": -0.0002681137411855161, + "logits/chosen": -1.7437142133712769, + "logits/rejected": -1.3878300189971924, + "logps/chosen": -0.01076921634376049, + "logps/rejected": -9.709854125976562, + "loss": 0.0785, + "nll_loss": 0.110350102186203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010769217042252421, + "rewards/margins": 0.9699084758758545, + "rewards/rejected": -0.9709854125976562, + "step": 1488 + }, + { + "epoch": 2.911045943304008, + "grad_norm": 0.19391462206840515, + "learning_rate": 1.598173515981735e-06, + "log_odds_chosen": 16.176929473876953, + "log_odds_ratio": -0.0002922725398093462, + "logits/chosen": -1.790961503982544, + "logits/rejected": -1.6050810813903809, + "logps/chosen": -0.009729748591780663, + "logps/rejected": -11.545692443847656, + "loss": 0.0775, + "nll_loss": 0.06793016195297241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009729749290272593, + "rewards/margins": 1.15359628200531, + "rewards/rejected": -1.1545692682266235, + "step": 1489 + }, + { + "epoch": 2.9130009775171066, + "grad_norm": 0.1870325207710266, + "learning_rate": 1.5655577299412916e-06, + "log_odds_chosen": 15.364751815795898, + "log_odds_ratio": -0.000525734736584127, + "logits/chosen": -1.7321767807006836, + "logits/rejected": -1.6721115112304688, + "logps/chosen": -0.012388203293085098, + "logps/rejected": -10.95600414276123, + "loss": 0.0758, + "nll_loss": 0.07433757185935974, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0012388202594593167, + "rewards/margins": 1.0943615436553955, + "rewards/rejected": -1.0956002473831177, + "step": 1490 + }, + { + "epoch": 2.9149560117302054, + "grad_norm": 0.1972556859254837, + "learning_rate": 1.532941943900848e-06, + "log_odds_chosen": 19.255107879638672, + "log_odds_ratio": -0.00018592635751701891, + "logits/chosen": -2.0034267902374268, + "logits/rejected": -1.5229068994522095, + "logps/chosen": -0.0096965953707695, + "logps/rejected": -14.575029373168945, + "loss": 0.0782, + "nll_loss": 0.0837118923664093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009696596534922719, + "rewards/margins": 1.456533432006836, + "rewards/rejected": -1.457502841949463, + "step": 1491 + }, + { + "epoch": 2.916911045943304, + "grad_norm": 0.20751720666885376, + "learning_rate": 1.5003261578604044e-06, + "log_odds_chosen": 11.738481521606445, + "log_odds_ratio": -0.0005208428483456373, + "logits/chosen": -1.5957136154174805, + "logits/rejected": -1.7790695428848267, + "logps/chosen": -0.014326272532343864, + "logps/rejected": -7.425950527191162, + "loss": 0.08, + "nll_loss": 0.0718710720539093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014326273230835795, + "rewards/margins": 0.7411624789237976, + "rewards/rejected": -0.7425951361656189, + "step": 1492 + }, + { + "epoch": 2.918866080156403, + "grad_norm": 0.1926564872264862, + "learning_rate": 1.467710371819961e-06, + "log_odds_chosen": 8.054010391235352, + "log_odds_ratio": -0.0017957367235794663, + "logits/chosen": -1.6801862716674805, + "logits/rejected": -1.6927542686462402, + "logps/chosen": -0.014472026377916336, + "logps/rejected": -3.7939300537109375, + "loss": 0.0761, + "nll_loss": 0.07047348469495773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014472028706222773, + "rewards/margins": 0.37794581055641174, + "rewards/rejected": -0.37939298152923584, + "step": 1493 + }, + { + "epoch": 2.9208211143695015, + "grad_norm": 0.20222824811935425, + "learning_rate": 1.4350945857795172e-06, + "log_odds_chosen": 17.130638122558594, + "log_odds_ratio": -0.00011028557491954416, + "logits/chosen": -1.7393877506256104, + "logits/rejected": -1.6817913055419922, + "logps/chosen": -0.013206176459789276, + "logps/rejected": -12.726601600646973, + "loss": 0.0784, + "nll_loss": 0.06973245739936829, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.001320617739111185, + "rewards/margins": 1.2713396549224854, + "rewards/rejected": -1.272660255432129, + "step": 1494 + }, + { + "epoch": 2.9227761485826003, + "grad_norm": 0.2073470950126648, + "learning_rate": 1.4024787997390737e-06, + "log_odds_chosen": 13.752469062805176, + "log_odds_ratio": -0.0004933988675475121, + "logits/chosen": -1.7421140670776367, + "logits/rejected": -1.5024909973144531, + "logps/chosen": -0.010890847072005272, + "logps/rejected": -9.232769012451172, + "loss": 0.078, + "nll_loss": 0.07376818358898163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010890846606343985, + "rewards/margins": 0.9221878051757812, + "rewards/rejected": -0.9232769012451172, + "step": 1495 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.2018287479877472, + "learning_rate": 1.3698630136986302e-06, + "log_odds_chosen": 15.601982116699219, + "log_odds_ratio": -0.0003531479451339692, + "logits/chosen": -1.7029614448547363, + "logits/rejected": -1.6000235080718994, + "logps/chosen": -0.010443879291415215, + "logps/rejected": -11.00921630859375, + "loss": 0.0783, + "nll_loss": 0.07322066277265549, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0010443880455568433, + "rewards/margins": 1.0998773574829102, + "rewards/rejected": -1.1009217500686646, + "step": 1496 + }, + { + "epoch": 2.9266862170087977, + "grad_norm": 0.20101407170295715, + "learning_rate": 1.3372472276581865e-06, + "log_odds_chosen": 22.351964950561523, + "log_odds_ratio": -0.0001279293210245669, + "logits/chosen": -1.699800968170166, + "logits/rejected": -1.2033727169036865, + "logps/chosen": -0.009313588961958885, + "logps/rejected": -17.58734893798828, + "loss": 0.0806, + "nll_loss": 0.09913205355405807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009313588379882276, + "rewards/margins": 1.7578035593032837, + "rewards/rejected": -1.7587348222732544, + "step": 1497 + }, + { + "epoch": 2.9286412512218964, + "grad_norm": 0.2110470086336136, + "learning_rate": 1.304631441617743e-06, + "log_odds_chosen": 22.100086212158203, + "log_odds_ratio": -0.00011470272147562355, + "logits/chosen": -1.643517017364502, + "logits/rejected": -1.5001323223114014, + "logps/chosen": -0.009322058409452438, + "logps/rejected": -17.378070831298828, + "loss": 0.082, + "nll_loss": 0.06731847673654556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0009322059340775013, + "rewards/margins": 1.736875057220459, + "rewards/rejected": -1.737807273864746, + "step": 1498 + }, + { + "epoch": 2.930596285434995, + "grad_norm": 0.20179909467697144, + "learning_rate": 1.2720156555772993e-06, + "log_odds_chosen": 17.702545166015625, + "log_odds_ratio": -0.0002582801098469645, + "logits/chosen": -1.612778663635254, + "logits/rejected": -1.3706592321395874, + "logps/chosen": -0.014767156913876534, + "logps/rejected": -13.389540672302246, + "loss": 0.0776, + "nll_loss": 0.08485251665115356, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0014767157845199108, + "rewards/margins": 1.3374773263931274, + "rewards/rejected": -1.338953971862793, + "step": 1499 + }, + { + "epoch": 2.932551319648094, + "grad_norm": 0.19258083403110504, + "learning_rate": 1.2393998695368558e-06, + "log_odds_chosen": 15.281501770019531, + "log_odds_ratio": -0.00022356452245730907, + "logits/chosen": -1.738450288772583, + "logits/rejected": -1.5850335359573364, + "logps/chosen": -0.009983589872717857, + "logps/rejected": -10.613533020019531, + "loss": 0.0772, + "nll_loss": 0.07746520638465881, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.000998358940705657, + "rewards/margins": 1.0603549480438232, + "rewards/rejected": -1.061353325843811, + "step": 1500 + } + ], + "logging_steps": 1, + "max_steps": 1533, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 5, + "trial_name": null, + "trial_params": null +}