{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 903, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03322259136212625, "grad_norm": 516.0, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.16222620010375977, "log_odds_ratio": -0.6306548118591309, "logits/chosen": -2.278585433959961, "logits/rejected": -2.279832124710083, "logps/chosen": -2.096900463104248, "logps/rejected": -2.239978551864624, "loss": 4.4301, "nll_loss": 4.511023998260498, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10484502464532852, "rewards/margins": 0.007153891958296299, "rewards/rejected": -0.11199891567230225, "step": 10 }, { "epoch": 0.0664451827242525, "grad_norm": 51.25, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.2909570336341858, "log_odds_ratio": -0.5764315724372864, "logits/chosen": -2.9006943702697754, "logits/rejected": -2.899392604827881, "logps/chosen": -1.8797166347503662, "logps/rejected": -2.132899045944214, "loss": 2.0537, "nll_loss": 1.9548499584197998, "rewards/accuracies": 0.8125, "rewards/chosen": -0.09398583322763443, "rewards/margins": 0.012659117579460144, "rewards/rejected": -0.10664495080709457, "step": 20 }, { "epoch": 0.09966777408637874, "grad_norm": 21.5, "learning_rate": 1.5e-06, "log_odds_chosen": 0.4692462384700775, "log_odds_ratio": -0.510283350944519, "logits/chosen": -2.651613235473633, "logits/rejected": -2.651846170425415, "logps/chosen": -1.1686553955078125, "logps/rejected": -1.4823462963104248, "loss": 0.8489, "nll_loss": 0.7852751612663269, "rewards/accuracies": 0.875, "rewards/chosen": -0.058432769030332565, "rewards/margins": 0.015684548765420914, "rewards/rejected": -0.07411732524633408, "step": 30 }, { "epoch": 0.132890365448505, "grad_norm": 5.4375, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 1.7947794198989868, "log_odds_ratio": -0.26858600974082947, "logits/chosen": -2.2309794425964355, "logits/rejected": -2.229917526245117, "logps/chosen": -0.3180321156978607, "logps/rejected": -1.0096584558486938, "loss": 0.682, "nll_loss": 0.6571449041366577, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.015901606529951096, "rewards/margins": 0.03458131104707718, "rewards/rejected": -0.05048292130231857, "step": 40 }, { "epoch": 0.16611295681063123, "grad_norm": 3.921875, "learning_rate": 2.5e-06, "log_odds_chosen": 3.033090114593506, "log_odds_ratio": -0.14658799767494202, "logits/chosen": -2.118901014328003, "logits/rejected": -2.1173033714294434, "logps/chosen": -0.25798267126083374, "logps/rejected": -1.627856969833374, "loss": 0.6248, "nll_loss": 0.600904107093811, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.012899133376777172, "rewards/margins": 0.06849371641874313, "rewards/rejected": -0.08139285445213318, "step": 50 }, { "epoch": 0.19933554817275748, "grad_norm": 6.03125, "learning_rate": 3e-06, "log_odds_chosen": 3.421691417694092, "log_odds_ratio": -0.14014041423797607, "logits/chosen": -2.077253580093384, "logits/rejected": -2.0778613090515137, "logps/chosen": -0.22530755400657654, "logps/rejected": -1.8401321172714233, "loss": 0.6023, "nll_loss": 0.6070769429206848, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.011265376582741737, "rewards/margins": 0.08074121922254562, "rewards/rejected": -0.0920066088438034, "step": 60 }, { "epoch": 0.23255813953488372, "grad_norm": 4.40625, "learning_rate": 3.5e-06, "log_odds_chosen": 4.030662536621094, "log_odds_ratio": -0.07589299231767654, "logits/chosen": -2.0758180618286133, "logits/rejected": -2.0744235515594482, "logps/chosen": -0.2279697209596634, "logps/rejected": -2.004603385925293, "loss": 0.5389, "nll_loss": 0.534622073173523, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.011398485861718655, "rewards/margins": 0.08883167803287506, "rewards/rejected": -0.10023017227649689, "step": 70 }, { "epoch": 0.26578073089701, "grad_norm": 3.609375, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 4.552683353424072, "log_odds_ratio": -0.056131958961486816, "logits/chosen": -1.9744676351547241, "logits/rejected": -1.9743177890777588, "logps/chosen": -0.18155953288078308, "logps/rejected": -2.2489826679229736, "loss": 0.5203, "nll_loss": 0.4936625063419342, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.009077977389097214, "rewards/margins": 0.10337115824222565, "rewards/rejected": -0.11244914680719376, "step": 80 }, { "epoch": 0.29900332225913623, "grad_norm": 4.09375, "learning_rate": 4.5e-06, "log_odds_chosen": 4.036518096923828, "log_odds_ratio": -0.11566118150949478, "logits/chosen": -2.0107204914093018, "logits/rejected": -2.009970188140869, "logps/chosen": -0.2278076857328415, "logps/rejected": -2.3444762229919434, "loss": 0.4353, "nll_loss": 0.42819660902023315, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01139038521796465, "rewards/margins": 0.10583342611789703, "rewards/rejected": -0.11722382158041, "step": 90 }, { "epoch": 0.33222591362126247, "grad_norm": 5.59375, "learning_rate": 5e-06, "log_odds_chosen": 5.044631004333496, "log_odds_ratio": -0.04541964456439018, "logits/chosen": -1.9699828624725342, "logits/rejected": -1.973362922668457, "logps/chosen": -0.17292837798595428, "logps/rejected": -2.476628065109253, "loss": 0.378, "nll_loss": 0.4115411639213562, "rewards/accuracies": 1.0, "rewards/chosen": -0.008646419271826744, "rewards/margins": 0.11518500000238419, "rewards/rejected": -0.12383142858743668, "step": 100 }, { "epoch": 0.3654485049833887, "grad_norm": 5.15625, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 5.11702823638916, "log_odds_ratio": -0.06080981343984604, "logits/chosen": -1.883387565612793, "logits/rejected": -1.8867937326431274, "logps/chosen": -0.2399381399154663, "logps/rejected": -2.785667896270752, "loss": 0.3143, "nll_loss": 0.25671663880348206, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.011996905319392681, "rewards/margins": 0.12728647887706757, "rewards/rejected": -0.13928338885307312, "step": 110 }, { "epoch": 0.39867109634551495, "grad_norm": 5.0625, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 5.193495273590088, "log_odds_ratio": -0.06189825385808945, "logits/chosen": -2.042405605316162, "logits/rejected": -2.0465760231018066, "logps/chosen": -0.1781654804944992, "logps/rejected": -2.7364754676818848, "loss": 0.2883, "nll_loss": 0.31883668899536133, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.008908274583518505, "rewards/margins": 0.12791548669338226, "rewards/rejected": -0.13682377338409424, "step": 120 }, { "epoch": 0.4318936877076412, "grad_norm": 6.40625, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 5.065199375152588, "log_odds_ratio": -0.08915611356496811, "logits/chosen": -1.965550184249878, "logits/rejected": -1.9655630588531494, "logps/chosen": -0.2024417221546173, "logps/rejected": -2.5124077796936035, "loss": 0.2346, "nll_loss": 0.2726798355579376, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01012208592146635, "rewards/margins": 0.11549830436706543, "rewards/rejected": -0.12562039494514465, "step": 130 }, { "epoch": 0.46511627906976744, "grad_norm": 5.65625, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 6.224053859710693, "log_odds_ratio": -0.03768063336610794, "logits/chosen": -1.9541261196136475, "logits/rejected": -1.9546234607696533, "logps/chosen": -0.18753428757190704, "logps/rejected": -3.1887362003326416, "loss": 0.2075, "nll_loss": 0.1721208095550537, "rewards/accuracies": 1.0, "rewards/chosen": -0.009376714006066322, "rewards/margins": 0.15006008744239807, "rewards/rejected": -0.15943679213523865, "step": 140 }, { "epoch": 0.4983388704318937, "grad_norm": 5.53125, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 5.947408199310303, "log_odds_ratio": -0.03485158830881119, "logits/chosen": -1.928086280822754, "logits/rejected": -1.9305970668792725, "logps/chosen": -0.16427160799503326, "logps/rejected": -3.250744581222534, "loss": 0.1935, "nll_loss": 0.1781485378742218, "rewards/accuracies": 1.0, "rewards/chosen": -0.008213580586016178, "rewards/margins": 0.15432362258434296, "rewards/rejected": -0.16253721714019775, "step": 150 }, { "epoch": 0.53156146179402, "grad_norm": 5.0625, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 5.442956924438477, "log_odds_ratio": -0.0749388113617897, "logits/chosen": -2.025440216064453, "logits/rejected": -2.0270285606384277, "logps/chosen": -0.18180342018604279, "logps/rejected": -2.965075731277466, "loss": 0.1607, "nll_loss": 0.18244585394859314, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.00909017026424408, "rewards/margins": 0.13916362822055817, "rewards/rejected": -0.14825379848480225, "step": 160 }, { "epoch": 0.5647840531561462, "grad_norm": 4.8125, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 6.501151084899902, "log_odds_ratio": -0.04770870879292488, "logits/chosen": -1.9193477630615234, "logits/rejected": -1.9223600625991821, "logps/chosen": -0.16726627945899963, "logps/rejected": -3.4573769569396973, "loss": 0.1339, "nll_loss": 0.16945432126522064, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.008363312110304832, "rewards/margins": 0.16450552642345428, "rewards/rejected": -0.17286884784698486, "step": 170 }, { "epoch": 0.5980066445182725, "grad_norm": 4.8125, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 6.667401313781738, "log_odds_ratio": -0.049022819846868515, "logits/chosen": -1.849381685256958, "logits/rejected": -1.8515217304229736, "logps/chosen": -0.135690376162529, "logps/rejected": -3.4136643409729004, "loss": 0.12, "nll_loss": 0.0964335948228836, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.006784519646316767, "rewards/margins": 0.16389869153499603, "rewards/rejected": -0.17068320512771606, "step": 180 }, { "epoch": 0.6312292358803987, "grad_norm": 4.0625, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 6.884723663330078, "log_odds_ratio": -0.028747648000717163, "logits/chosen": -1.9872407913208008, "logits/rejected": -1.9903675317764282, "logps/chosen": -0.1553266942501068, "logps/rejected": -3.7033779621124268, "loss": 0.1064, "nll_loss": 0.1674821376800537, "rewards/accuracies": 1.0, "rewards/chosen": -0.007766333874315023, "rewards/margins": 0.1774025708436966, "rewards/rejected": -0.18516890704631805, "step": 190 }, { "epoch": 0.6644518272425249, "grad_norm": 4.40625, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 6.214459419250488, "log_odds_ratio": -0.04790915921330452, "logits/chosen": -1.8185112476348877, "logits/rejected": -1.820067048072815, "logps/chosen": -0.15120986104011536, "logps/rejected": -3.4781315326690674, "loss": 0.0925, "nll_loss": 0.06244741007685661, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.007560492493212223, "rewards/margins": 0.16634607315063477, "rewards/rejected": -0.17390656471252441, "step": 200 }, { "epoch": 0.6976744186046512, "grad_norm": 5.46875, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 7.0073442459106445, "log_odds_ratio": -0.021652357652783394, "logits/chosen": -1.8007183074951172, "logits/rejected": -1.8030471801757812, "logps/chosen": -0.15487684309482574, "logps/rejected": -3.859619140625, "loss": 0.0897, "nll_loss": 0.07238463312387466, "rewards/accuracies": 1.0, "rewards/chosen": -0.007743841968476772, "rewards/margins": 0.1852371096611023, "rewards/rejected": -0.19298096001148224, "step": 210 }, { "epoch": 0.7308970099667774, "grad_norm": 3.484375, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 6.155528545379639, "log_odds_ratio": -0.03304092958569527, "logits/chosen": -1.827543020248413, "logits/rejected": -1.8281749486923218, "logps/chosen": -0.13896045088768005, "logps/rejected": -3.173088312149048, "loss": 0.0762, "nll_loss": 0.08422436565160751, "rewards/accuracies": 1.0, "rewards/chosen": -0.0069480217061936855, "rewards/margins": 0.15170639753341675, "rewards/rejected": -0.15865442156791687, "step": 220 }, { "epoch": 0.7641196013289037, "grad_norm": 5.03125, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 7.247349739074707, "log_odds_ratio": -0.028882017359137535, "logits/chosen": -1.8549703359603882, "logits/rejected": -1.854103446006775, "logps/chosen": -0.13769736886024475, "logps/rejected": -3.765294313430786, "loss": 0.0634, "nll_loss": 0.058101166039705276, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.006884869188070297, "rewards/margins": 0.18137982487678528, "rewards/rejected": -0.18826469779014587, "step": 230 }, { "epoch": 0.7973421926910299, "grad_norm": 3.875, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 7.331563472747803, "log_odds_ratio": -0.05921437218785286, "logits/chosen": -1.9100215435028076, "logits/rejected": -1.9127085208892822, "logps/chosen": -0.12960004806518555, "logps/rejected": -3.6168124675750732, "loss": 0.0571, "nll_loss": 0.05991581082344055, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.00648000231012702, "rewards/margins": 0.17436063289642334, "rewards/rejected": -0.1808406263589859, "step": 240 }, { "epoch": 0.8305647840531561, "grad_norm": 3.53125, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 7.729872703552246, "log_odds_ratio": -0.01583888754248619, "logits/chosen": -1.9329345226287842, "logits/rejected": -1.9311659336090088, "logps/chosen": -0.16400082409381866, "logps/rejected": -4.614955902099609, "loss": 0.0623, "nll_loss": 0.045454978942871094, "rewards/accuracies": 1.0, "rewards/chosen": -0.008200041949748993, "rewards/margins": 0.2225477695465088, "rewards/rejected": -0.230747789144516, "step": 250 }, { "epoch": 0.8637873754152824, "grad_norm": 3.15625, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 7.562008857727051, "log_odds_ratio": -0.06357506662607193, "logits/chosen": -1.8379156589508057, "logits/rejected": -1.8368641138076782, "logps/chosen": -0.15229454636573792, "logps/rejected": -4.107020854949951, "loss": 0.0485, "nll_loss": 0.033093374222517014, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.0076147266663610935, "rewards/margins": 0.19773633778095245, "rewards/rejected": -0.2053510695695877, "step": 260 }, { "epoch": 0.8970099667774086, "grad_norm": 3.15625, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 7.212728023529053, "log_odds_ratio": -0.07752545177936554, "logits/chosen": -1.7939443588256836, "logits/rejected": -1.7951726913452148, "logps/chosen": -0.16109412908554077, "logps/rejected": -3.8484885692596436, "loss": 0.0557, "nll_loss": 0.05208224803209305, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.008054706268012524, "rewards/margins": 0.18436971306800842, "rewards/rejected": -0.19242441654205322, "step": 270 }, { "epoch": 0.9302325581395349, "grad_norm": 9.25, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 7.317690372467041, "log_odds_ratio": -0.03297095373272896, "logits/chosen": -1.7949488162994385, "logits/rejected": -1.7990652322769165, "logps/chosen": -0.13610824942588806, "logps/rejected": -3.9614219665527344, "loss": 0.0511, "nll_loss": 0.05410151928663254, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0068054115399718285, "rewards/margins": 0.19126567244529724, "rewards/rejected": -0.19807109236717224, "step": 280 }, { "epoch": 0.9634551495016611, "grad_norm": 3.796875, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 7.124808311462402, "log_odds_ratio": -0.059395015239715576, "logits/chosen": -1.7850925922393799, "logits/rejected": -1.7865415811538696, "logps/chosen": -0.12057201564311981, "logps/rejected": -3.576596736907959, "loss": 0.0428, "nll_loss": 0.03806814178824425, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.0060286009684205055, "rewards/margins": 0.17280122637748718, "rewards/rejected": -0.1788298487663269, "step": 290 }, { "epoch": 0.9966777408637874, "grad_norm": 3.65625, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 8.637829780578613, "log_odds_ratio": -0.03425002470612526, "logits/chosen": -1.8364009857177734, "logits/rejected": -1.8421306610107422, "logps/chosen": -0.09547251462936401, "logps/rejected": -4.495790481567383, "loss": 0.0425, "nll_loss": 0.04902663081884384, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.004773625638335943, "rewards/margins": 0.22001588344573975, "rewards/rejected": -0.22478953003883362, "step": 300 }, { "epoch": 1.0299003322259137, "grad_norm": 2.8125, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 7.561132907867432, "log_odds_ratio": -0.03642101213335991, "logits/chosen": -1.7316315174102783, "logits/rejected": -1.7338483333587646, "logps/chosen": -0.0959225445985794, "logps/rejected": -4.059569358825684, "loss": 0.0292, "nll_loss": 0.028634298592805862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0047961268573999405, "rewards/margins": 0.1981823742389679, "rewards/rejected": -0.2029784917831421, "step": 310 }, { "epoch": 1.06312292358804, "grad_norm": 2.53125, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 9.18364429473877, "log_odds_ratio": -0.014110135845839977, "logits/chosen": -1.7149658203125, "logits/rejected": -1.7151539325714111, "logps/chosen": -0.12401266396045685, "logps/rejected": -5.025017738342285, "loss": 0.0279, "nll_loss": 0.019647331908345222, "rewards/accuracies": 1.0, "rewards/chosen": -0.0062006330117583275, "rewards/margins": 0.24505026638507843, "rewards/rejected": -0.25125089287757874, "step": 320 }, { "epoch": 1.0963455149501662, "grad_norm": 3.78125, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 8.793200492858887, "log_odds_ratio": -0.009492707438766956, "logits/chosen": -1.7698380947113037, "logits/rejected": -1.770939588546753, "logps/chosen": -0.11295183002948761, "logps/rejected": -4.82761812210083, "loss": 0.0332, "nll_loss": 0.02587791346013546, "rewards/accuracies": 1.0, "rewards/chosen": -0.005647591315209866, "rewards/margins": 0.23573331534862518, "rewards/rejected": -0.24138090014457703, "step": 330 }, { "epoch": 1.1295681063122924, "grad_norm": 1.78125, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 8.388921737670898, "log_odds_ratio": -0.009843870997428894, "logits/chosen": -1.7839330434799194, "logits/rejected": -1.7847379446029663, "logps/chosen": -0.0786074846982956, "logps/rejected": -4.0679216384887695, "loss": 0.0277, "nll_loss": 0.022286545485258102, "rewards/accuracies": 1.0, "rewards/chosen": -0.003930374514311552, "rewards/margins": 0.19946573674678802, "rewards/rejected": -0.20339611172676086, "step": 340 }, { "epoch": 1.1627906976744187, "grad_norm": 2.28125, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 8.371394157409668, "log_odds_ratio": -0.016807865351438522, "logits/chosen": -1.829673171043396, "logits/rejected": -1.8303911685943604, "logps/chosen": -0.10171355307102203, "logps/rejected": -4.102365016937256, "loss": 0.0254, "nll_loss": 0.019215276464819908, "rewards/accuracies": 1.0, "rewards/chosen": -0.005085677839815617, "rewards/margins": 0.20003259181976318, "rewards/rejected": -0.20511826872825623, "step": 350 }, { "epoch": 1.196013289036545, "grad_norm": 3.484375, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 8.600504875183105, "log_odds_ratio": -0.01216288935393095, "logits/chosen": -1.7985435724258423, "logits/rejected": -1.8070056438446045, "logps/chosen": -0.06962008774280548, "logps/rejected": -4.320959568023682, "loss": 0.0267, "nll_loss": 0.029016951099038124, "rewards/accuracies": 1.0, "rewards/chosen": -0.0034810048528015614, "rewards/margins": 0.21256697177886963, "rewards/rejected": -0.2160479724407196, "step": 360 }, { "epoch": 1.2292358803986712, "grad_norm": 3.1875, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 9.181499481201172, "log_odds_ratio": -0.009369775652885437, "logits/chosen": -1.7924983501434326, "logits/rejected": -1.7953475713729858, "logps/chosen": -0.11782409995794296, "logps/rejected": -4.833601474761963, "loss": 0.0282, "nll_loss": 0.02526969090104103, "rewards/accuracies": 1.0, "rewards/chosen": -0.0058912052772939205, "rewards/margins": 0.23578886687755585, "rewards/rejected": -0.2416801005601883, "step": 370 }, { "epoch": 1.2624584717607974, "grad_norm": 2.796875, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 9.619462966918945, "log_odds_ratio": -0.039833612740039825, "logits/chosen": -1.8319047689437866, "logits/rejected": -1.8300600051879883, "logps/chosen": -0.10069389641284943, "logps/rejected": -5.014215469360352, "loss": 0.0266, "nll_loss": 0.028005924075841904, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.005034694913774729, "rewards/margins": 0.24567607045173645, "rewards/rejected": -0.25071078538894653, "step": 380 }, { "epoch": 1.2956810631229236, "grad_norm": 2.59375, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 8.534521102905273, "log_odds_ratio": -0.01909024640917778, "logits/chosen": -1.8544292449951172, "logits/rejected": -1.8542238473892212, "logps/chosen": -0.08696131408214569, "logps/rejected": -4.430135250091553, "loss": 0.0232, "nll_loss": 0.02147216536104679, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.004348065238445997, "rewards/margins": 0.217158704996109, "rewards/rejected": -0.2215067595243454, "step": 390 }, { "epoch": 1.3289036544850499, "grad_norm": 1.640625, "learning_rate": 2.5e-06, "log_odds_chosen": 8.600648880004883, "log_odds_ratio": -0.012287040241062641, "logits/chosen": -1.8202216625213623, "logits/rejected": -1.8218141794204712, "logps/chosen": -0.09084287285804749, "logps/rejected": -4.572846412658691, "loss": 0.0247, "nll_loss": 0.02059631608426571, "rewards/accuracies": 1.0, "rewards/chosen": -0.004542144015431404, "rewards/margins": 0.22410018742084503, "rewards/rejected": -0.22864234447479248, "step": 400 }, { "epoch": 1.3621262458471761, "grad_norm": 3.640625, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 9.651135444641113, "log_odds_ratio": -0.00363975390791893, "logits/chosen": -1.7791026830673218, "logits/rejected": -1.7837848663330078, "logps/chosen": -0.06081225723028183, "logps/rejected": -4.943568706512451, "loss": 0.024, "nll_loss": 0.021141935139894485, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030406129080802202, "rewards/margins": 0.24413780868053436, "rewards/rejected": -0.24717843532562256, "step": 410 }, { "epoch": 1.3953488372093024, "grad_norm": 1.875, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 8.751879692077637, "log_odds_ratio": -0.010004991665482521, "logits/chosen": -1.8006718158721924, "logits/rejected": -1.804359793663025, "logps/chosen": -0.0964532420039177, "logps/rejected": -4.5417680740356445, "loss": 0.0242, "nll_loss": 0.025043126195669174, "rewards/accuracies": 1.0, "rewards/chosen": -0.004822662565857172, "rewards/margins": 0.22226576507091522, "rewards/rejected": -0.22708842158317566, "step": 420 }, { "epoch": 1.4285714285714286, "grad_norm": 1.984375, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 9.289121627807617, "log_odds_ratio": -0.015525879338383675, "logits/chosen": -1.7658954858779907, "logits/rejected": -1.7667725086212158, "logps/chosen": -0.08038794249296188, "logps/rejected": -4.9636030197143555, "loss": 0.0201, "nll_loss": 0.020869722589850426, "rewards/accuracies": 1.0, "rewards/chosen": -0.004019397310912609, "rewards/margins": 0.24416080117225647, "rewards/rejected": -0.24818019568920135, "step": 430 }, { "epoch": 1.4617940199335548, "grad_norm": 2.09375, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 8.703948974609375, "log_odds_ratio": -0.03381601721048355, "logits/chosen": -1.8417913913726807, "logits/rejected": -1.845391869544983, "logps/chosen": -0.09374421089887619, "logps/rejected": -4.612320899963379, "loss": 0.0229, "nll_loss": 0.021974634379148483, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.004687210079282522, "rewards/margins": 0.22592882812023163, "rewards/rejected": -0.23061604797840118, "step": 440 }, { "epoch": 1.495016611295681, "grad_norm": 1.890625, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 9.317599296569824, "log_odds_ratio": -0.008442175574600697, "logits/chosen": -1.9028446674346924, "logits/rejected": -1.9078031778335571, "logps/chosen": -0.0840989276766777, "logps/rejected": -4.985965251922607, "loss": 0.0277, "nll_loss": 0.019282350316643715, "rewards/accuracies": 1.0, "rewards/chosen": -0.0042049465700984, "rewards/margins": 0.24509334564208984, "rewards/rejected": -0.24929828941822052, "step": 450 }, { "epoch": 1.5282392026578073, "grad_norm": 1.109375, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 8.135089874267578, "log_odds_ratio": -0.014614465646445751, "logits/chosen": -1.80266535282135, "logits/rejected": -1.8127800226211548, "logps/chosen": -0.08102138340473175, "logps/rejected": -4.199796676635742, "loss": 0.0208, "nll_loss": 0.02294105850160122, "rewards/accuracies": 1.0, "rewards/chosen": -0.004051069263368845, "rewards/margins": 0.20593877136707306, "rewards/rejected": -0.20998983085155487, "step": 460 }, { "epoch": 1.5614617940199336, "grad_norm": 2.40625, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 9.386737823486328, "log_odds_ratio": -0.008776369504630566, "logits/chosen": -1.7513538599014282, "logits/rejected": -1.7554657459259033, "logps/chosen": -0.06230410188436508, "logps/rejected": -4.882990837097168, "loss": 0.0227, "nll_loss": 0.01421122532337904, "rewards/accuracies": 1.0, "rewards/chosen": -0.003115205094218254, "rewards/margins": 0.24103431403636932, "rewards/rejected": -0.24414952099323273, "step": 470 }, { "epoch": 1.5946843853820598, "grad_norm": 1.625, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 8.983893394470215, "log_odds_ratio": -0.024142052978277206, "logits/chosen": -1.744749665260315, "logits/rejected": -1.7481634616851807, "logps/chosen": -0.06949851661920547, "logps/rejected": -4.313258171081543, "loss": 0.0222, "nll_loss": 0.014674236066639423, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0034749258775264025, "rewards/margins": 0.2121879756450653, "rewards/rejected": -0.2156629115343094, "step": 480 }, { "epoch": 1.627906976744186, "grad_norm": 2.0, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 9.338783264160156, "log_odds_ratio": -0.003996879793703556, "logits/chosen": -1.791486382484436, "logits/rejected": -1.795069694519043, "logps/chosen": -0.06457408517599106, "logps/rejected": -4.7583208084106445, "loss": 0.0177, "nll_loss": 0.015971561893820763, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032287046778947115, "rewards/margins": 0.23468737304210663, "rewards/rejected": -0.23791606724262238, "step": 490 }, { "epoch": 1.6611295681063123, "grad_norm": 1.6015625, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 9.166845321655273, "log_odds_ratio": -0.019674357026815414, "logits/chosen": -1.7548977136611938, "logits/rejected": -1.7554121017456055, "logps/chosen": -0.10164159536361694, "logps/rejected": -4.969311714172363, "loss": 0.0212, "nll_loss": 0.01509636640548706, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.00508207967504859, "rewards/margins": 0.243383526802063, "rewards/rejected": -0.24846558272838593, "step": 500 }, { "epoch": 1.6943521594684385, "grad_norm": 1.28125, "learning_rate": 2.2140372138502386e-06, "log_odds_chosen": 8.379947662353516, "log_odds_ratio": -0.03198238089680672, "logits/chosen": -1.8583186864852905, "logits/rejected": -1.8610032796859741, "logps/chosen": -0.08590197563171387, "logps/rejected": -4.655932426452637, "loss": 0.0195, "nll_loss": 0.017975686118006706, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0042950985953211784, "rewards/margins": 0.22850151360034943, "rewards/rejected": -0.23279662430286407, "step": 510 }, { "epoch": 1.7275747508305648, "grad_norm": 3.125, "learning_rate": 2.1926450482675734e-06, "log_odds_chosen": 8.935117721557617, "log_odds_ratio": -0.014949078671634197, "logits/chosen": -1.7289230823516846, "logits/rejected": -1.73250412940979, "logps/chosen": -0.07164986431598663, "logps/rejected": -4.507022857666016, "loss": 0.0178, "nll_loss": 0.014084184542298317, "rewards/accuracies": 1.0, "rewards/chosen": -0.003582493169233203, "rewards/margins": 0.22176864743232727, "rewards/rejected": -0.22535113990306854, "step": 520 }, { "epoch": 1.760797342192691, "grad_norm": 1.71875, "learning_rate": 2.1718612138153473e-06, "log_odds_chosen": 10.034872055053711, "log_odds_ratio": -0.006512313149869442, "logits/chosen": -1.7052526473999023, "logits/rejected": -1.7084852457046509, "logps/chosen": -0.07591713964939117, "logps/rejected": -5.4831976890563965, "loss": 0.0223, "nll_loss": 0.020342020317912102, "rewards/accuracies": 1.0, "rewards/chosen": -0.003795857075601816, "rewards/margins": 0.27036404609680176, "rewards/rejected": -0.27415987849235535, "step": 530 }, { "epoch": 1.7940199335548173, "grad_norm": 4.46875, "learning_rate": 2.151657414559676e-06, "log_odds_chosen": 8.771195411682129, "log_odds_ratio": -0.020360399037599564, "logits/chosen": -1.7692371606826782, "logits/rejected": -1.772956132888794, "logps/chosen": -0.07401047646999359, "logps/rejected": -4.339105129241943, "loss": 0.0207, "nll_loss": 0.02204059436917305, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0037005238700658083, "rewards/margins": 0.21325473487377167, "rewards/rejected": -0.2169552594423294, "step": 540 }, { "epoch": 1.8272425249169435, "grad_norm": 2.4375, "learning_rate": 2.132007163556104e-06, "log_odds_chosen": 8.97265625, "log_odds_ratio": -0.010379938408732414, "logits/chosen": -1.761279821395874, "logits/rejected": -1.7625354528427124, "logps/chosen": -0.08797116577625275, "logps/rejected": -4.506954669952393, "loss": 0.0191, "nll_loss": 0.015043037012219429, "rewards/accuracies": 1.0, "rewards/chosen": -0.004398558288812637, "rewards/margins": 0.2209491729736328, "rewards/rejected": -0.22534772753715515, "step": 550 }, { "epoch": 1.8604651162790697, "grad_norm": 1.96875, "learning_rate": 2.1128856368212917e-06, "log_odds_chosen": 9.888033866882324, "log_odds_ratio": -0.00335489958524704, "logits/chosen": -1.7607628107070923, "logits/rejected": -1.7624956369400024, "logps/chosen": -0.06476293504238129, "logps/rejected": -5.111817359924316, "loss": 0.0194, "nll_loss": 0.013030583038926125, "rewards/accuracies": 1.0, "rewards/chosen": -0.003238147124648094, "rewards/margins": 0.2523527443408966, "rewards/rejected": -0.25559088587760925, "step": 560 }, { "epoch": 1.893687707641196, "grad_norm": 1.6328125, "learning_rate": 2.0942695414584777e-06, "log_odds_chosen": 8.265462875366211, "log_odds_ratio": -0.017327692359685898, "logits/chosen": -1.7851202487945557, "logits/rejected": -1.7864612340927124, "logps/chosen": -0.10817401111125946, "logps/rejected": -4.388330459594727, "loss": 0.0191, "nll_loss": 0.01878109760582447, "rewards/accuracies": 1.0, "rewards/chosen": -0.005408700555562973, "rewards/margins": 0.21400780975818634, "rewards/rejected": -0.2194165289402008, "step": 570 }, { "epoch": 1.9269102990033222, "grad_norm": 1.2265625, "learning_rate": 2.0761369963434992e-06, "log_odds_chosen": 8.885993003845215, "log_odds_ratio": -0.027743179351091385, "logits/chosen": -1.7333558797836304, "logits/rejected": -1.7339531183242798, "logps/chosen": -0.1336405724287033, "logps/rejected": -4.482719421386719, "loss": 0.0201, "nll_loss": 0.015746701508760452, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0066820294596254826, "rewards/margins": 0.21745392680168152, "rewards/rejected": -0.22413596510887146, "step": 580 }, { "epoch": 1.9601328903654485, "grad_norm": 2.171875, "learning_rate": 2.058467423981546e-06, "log_odds_chosen": 9.54238224029541, "log_odds_ratio": -0.018449265509843826, "logits/chosen": -1.7866191864013672, "logits/rejected": -1.7889735698699951, "logps/chosen": -0.10259035974740982, "logps/rejected": -4.987481117248535, "loss": 0.0187, "nll_loss": 0.0172494538128376, "rewards/accuracies": 1.0, "rewards/chosen": -0.005129518453031778, "rewards/margins": 0.2442445456981659, "rewards/rejected": -0.24937407672405243, "step": 590 }, { "epoch": 1.9933554817275747, "grad_norm": 0.890625, "learning_rate": 2.0412414523193154e-06, "log_odds_chosen": 9.205097198486328, "log_odds_ratio": -0.005630264058709145, "logits/chosen": -1.7373039722442627, "logits/rejected": -1.7389856576919556, "logps/chosen": -0.09646005928516388, "logps/rejected": -4.7054619789123535, "loss": 0.0182, "nll_loss": 0.02860497497022152, "rewards/accuracies": 1.0, "rewards/chosen": -0.004823002498596907, "rewards/margins": 0.2304501086473465, "rewards/rejected": -0.235273078083992, "step": 600 }, { "epoch": 2.026578073089701, "grad_norm": 1.4921875, "learning_rate": 2.0244408254472904e-06, "log_odds_chosen": 10.055734634399414, "log_odds_ratio": -0.011299138888716698, "logits/chosen": -1.7703691720962524, "logits/rejected": -1.771695852279663, "logps/chosen": -0.06641928851604462, "logps/rejected": -5.32825231552124, "loss": 0.017, "nll_loss": 0.013218941166996956, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033209645189344883, "rewards/margins": 0.26309165358543396, "rewards/rejected": -0.2664126455783844, "step": 610 }, { "epoch": 2.0598006644518274, "grad_norm": 1.3671875, "learning_rate": 2.0080483222562476e-06, "log_odds_chosen": 10.432465553283691, "log_odds_ratio": -0.016602743417024612, "logits/chosen": -1.8167240619659424, "logits/rejected": -1.8178882598876953, "logps/chosen": -0.05541493743658066, "logps/rejected": -5.0302910804748535, "loss": 0.0145, "nll_loss": 0.015641603618860245, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0027707472909241915, "rewards/margins": 0.24874380230903625, "rewards/rejected": -0.25151461362838745, "step": 620 }, { "epoch": 2.0930232558139537, "grad_norm": 0.7421875, "learning_rate": 1.9920476822239895e-06, "log_odds_chosen": 10.32500171661377, "log_odds_ratio": -0.0108437929302454, "logits/chosen": -1.6814196109771729, "logits/rejected": -1.6834462881088257, "logps/chosen": -0.07114370167255402, "logps/rejected": -5.47824764251709, "loss": 0.0146, "nll_loss": 0.012577347457408905, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.003557185409590602, "rewards/margins": 0.270355224609375, "rewards/rejected": -0.27391237020492554, "step": 630 }, { "epoch": 2.12624584717608, "grad_norm": 1.7734375, "learning_rate": 1.976423537605237e-06, "log_odds_chosen": 9.84516429901123, "log_odds_ratio": -0.007125245872884989, "logits/chosen": -1.823743224143982, "logits/rejected": -1.8290736675262451, "logps/chosen": -0.06525563448667526, "logps/rejected": -5.315881729125977, "loss": 0.0157, "nll_loss": 0.014731844887137413, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032627820037305355, "rewards/margins": 0.2625313103199005, "rewards/rejected": -0.2657940983772278, "step": 640 }, { "epoch": 2.159468438538206, "grad_norm": 1.140625, "learning_rate": 1.961161351381841e-06, "log_odds_chosen": 11.63329029083252, "log_odds_ratio": -0.0004948956775479019, "logits/chosen": -1.805872917175293, "logits/rejected": -1.8104311227798462, "logps/chosen": -0.04712063446640968, "logps/rejected": -6.125610828399658, "loss": 0.0142, "nll_loss": 0.013872918672859669, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023560314439237118, "rewards/margins": 0.3039245009422302, "rewards/rejected": -0.30628055334091187, "step": 650 }, { "epoch": 2.1926910299003324, "grad_norm": 1.640625, "learning_rate": 1.9462473604038077e-06, "log_odds_chosen": 10.331804275512695, "log_odds_ratio": -0.015263216570019722, "logits/chosen": -1.8137140274047852, "logits/rejected": -1.8163013458251953, "logps/chosen": -0.05831971764564514, "logps/rejected": -5.453424453735352, "loss": 0.0144, "nll_loss": 0.014606691896915436, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.002915985882282257, "rewards/margins": 0.26975521445274353, "rewards/rejected": -0.2726712226867676, "step": 660 }, { "epoch": 2.2259136212624586, "grad_norm": 2.703125, "learning_rate": 1.9316685232156397e-06, "log_odds_chosen": 10.702049255371094, "log_odds_ratio": -0.018240805715322495, "logits/chosen": -1.913975715637207, "logits/rejected": -1.9187599420547485, "logps/chosen": -0.07275418192148209, "logps/rejected": -5.977799892425537, "loss": 0.0142, "nll_loss": 0.015033453702926636, "rewards/accuracies": 1.0, "rewards/chosen": -0.0036377091892063618, "rewards/margins": 0.2952522933483124, "rewards/rejected": -0.29889002442359924, "step": 670 }, { "epoch": 2.259136212624585, "grad_norm": 1.0546875, "learning_rate": 1.917412472118426e-06, "log_odds_chosen": 11.560079574584961, "log_odds_ratio": -0.0027265329845249653, "logits/chosen": -1.849691390991211, "logits/rejected": -1.8556410074234009, "logps/chosen": -0.05384901165962219, "logps/rejected": -6.353396415710449, "loss": 0.0171, "nll_loss": 0.02180541306734085, "rewards/accuracies": 1.0, "rewards/chosen": -0.0026924503035843372, "rewards/margins": 0.31497737765312195, "rewards/rejected": -0.3176698088645935, "step": 680 }, { "epoch": 2.292358803986711, "grad_norm": 1.1875, "learning_rate": 1.9034674690672024e-06, "log_odds_chosen": 11.148561477661133, "log_odds_ratio": -0.00223861588165164, "logits/chosen": -1.8589222431182861, "logits/rejected": -1.8631727695465088, "logps/chosen": -0.06614092737436295, "logps/rejected": -5.854241371154785, "loss": 0.0151, "nll_loss": 0.01388646848499775, "rewards/accuracies": 1.0, "rewards/chosen": -0.0033070463687181473, "rewards/margins": 0.28940504789352417, "rewards/rejected": -0.2927120625972748, "step": 690 }, { "epoch": 2.3255813953488373, "grad_norm": 0.72265625, "learning_rate": 1.8898223650461362e-06, "log_odds_chosen": 10.924173355102539, "log_odds_ratio": -0.006120534148067236, "logits/chosen": -1.833099365234375, "logits/rejected": -1.841202974319458, "logps/chosen": -0.045662157237529755, "logps/rejected": -5.663559913635254, "loss": 0.0141, "nll_loss": 0.014723509550094604, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022831077221781015, "rewards/margins": 0.2808949053287506, "rewards/rejected": -0.2831780016422272, "step": 700 }, { "epoch": 2.3588039867109636, "grad_norm": 1.546875, "learning_rate": 1.876466562602004e-06, "log_odds_chosen": 11.659400939941406, "log_odds_ratio": -0.007115496788173914, "logits/chosen": -1.8497368097305298, "logits/rejected": -1.8600342273712158, "logps/chosen": -0.043242715299129486, "logps/rejected": -5.790225028991699, "loss": 0.014, "nll_loss": 0.014149373397231102, "rewards/accuracies": 1.0, "rewards/chosen": -0.002162135671824217, "rewards/margins": 0.28734907507896423, "rewards/rejected": -0.28951120376586914, "step": 710 }, { "epoch": 2.39202657807309, "grad_norm": 1.703125, "learning_rate": 1.863389981249825e-06, "log_odds_chosen": 11.449972152709961, "log_odds_ratio": -0.005292683839797974, "logits/chosen": -1.9128930568695068, "logits/rejected": -1.9192262887954712, "logps/chosen": -0.043063901364803314, "logps/rejected": -5.712512016296387, "loss": 0.0141, "nll_loss": 0.014921635389328003, "rewards/accuracies": 1.0, "rewards/chosen": -0.0021531949751079082, "rewards/margins": 0.2834724187850952, "rewards/rejected": -0.2856256365776062, "step": 720 }, { "epoch": 2.425249169435216, "grad_norm": 3.125, "learning_rate": 1.8505830254940132e-06, "log_odds_chosen": 10.498836517333984, "log_odds_ratio": -0.004357654135674238, "logits/chosen": -1.881967306137085, "logits/rejected": -1.8853543996810913, "logps/chosen": -0.03361859172582626, "logps/rejected": -5.354216575622559, "loss": 0.0141, "nll_loss": 0.011369029060006142, "rewards/accuracies": 1.0, "rewards/chosen": -0.0016809297958388925, "rewards/margins": 0.26602986454963684, "rewards/rejected": -0.2677108347415924, "step": 730 }, { "epoch": 2.4584717607973423, "grad_norm": 1.5625, "learning_rate": 1.8380365552345197e-06, "log_odds_chosen": 10.925847053527832, "log_odds_ratio": -0.003954787738621235, "logits/chosen": -1.8275858163833618, "logits/rejected": -1.8305232524871826, "logps/chosen": -0.06278284639120102, "logps/rejected": -5.769686698913574, "loss": 0.0141, "nll_loss": 0.013375637121498585, "rewards/accuracies": 1.0, "rewards/chosen": -0.003139142645522952, "rewards/margins": 0.285345196723938, "rewards/rejected": -0.2884843945503235, "step": 740 }, { "epoch": 2.4916943521594686, "grad_norm": 0.640625, "learning_rate": 1.8257418583505536e-06, "log_odds_chosen": 10.625173568725586, "log_odds_ratio": -0.0035576275549829006, "logits/chosen": -1.7775007486343384, "logits/rejected": -1.7832441329956055, "logps/chosen": -0.04924372583627701, "logps/rejected": -5.4554243087768555, "loss": 0.0151, "nll_loss": 0.015531172044575214, "rewards/accuracies": 1.0, "rewards/chosen": -0.002462186384946108, "rewards/margins": 0.2703090310096741, "rewards/rejected": -0.2727712094783783, "step": 750 }, { "epoch": 2.524916943521595, "grad_norm": 0.72265625, "learning_rate": 1.8136906252750293e-06, "log_odds_chosen": 11.038446426391602, "log_odds_ratio": -0.0013516563922166824, "logits/chosen": -1.81307053565979, "logits/rejected": -1.8172311782836914, "logps/chosen": -0.037691373378038406, "logps/rejected": -5.5027947425842285, "loss": 0.0155, "nll_loss": 0.011486930772662163, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018845684826374054, "rewards/margins": 0.2732551693916321, "rewards/rejected": -0.275139719247818, "step": 760 }, { "epoch": 2.558139534883721, "grad_norm": 1.1015625, "learning_rate": 1.801874925391118e-06, "log_odds_chosen": 11.234697341918945, "log_odds_ratio": -0.005816595163196325, "logits/chosen": -1.8077905178070068, "logits/rejected": -1.8116910457611084, "logps/chosen": -0.06007402017712593, "logps/rejected": -5.977658748626709, "loss": 0.0135, "nll_loss": 0.013816078193485737, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030037013348191977, "rewards/margins": 0.2958792448043823, "rewards/rejected": -0.29888293147087097, "step": 770 }, { "epoch": 2.5913621262458473, "grad_norm": 0.65234375, "learning_rate": 1.7902871850985824e-06, "log_odds_chosen": 11.535958290100098, "log_odds_ratio": -0.009717768058180809, "logits/chosen": -1.8791577816009521, "logits/rejected": -1.883548378944397, "logps/chosen": -0.051692645996809006, "logps/rejected": -5.989034652709961, "loss": 0.014, "nll_loss": 0.014420375227928162, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0025846322532743216, "rewards/margins": 0.29686713218688965, "rewards/rejected": -0.2994517385959625, "step": 780 }, { "epoch": 2.6245847176079735, "grad_norm": 0.76171875, "learning_rate": 1.7789201674120502e-06, "log_odds_chosen": 10.751108169555664, "log_odds_ratio": -0.01122227031737566, "logits/chosen": -1.8293044567108154, "logits/rejected": -1.8323638439178467, "logps/chosen": -0.05979070067405701, "logps/rejected": -5.497213363647461, "loss": 0.0157, "nll_loss": 0.014203609898686409, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0029895349871367216, "rewards/margins": 0.2718711495399475, "rewards/rejected": -0.274860680103302, "step": 790 }, { "epoch": 2.6578073089700998, "grad_norm": 0.5859375, "learning_rate": 1.7677669529663689e-06, "log_odds_chosen": 10.65892219543457, "log_odds_ratio": -0.006628723349422216, "logits/chosen": -1.8738857507705688, "logits/rejected": -1.877375602722168, "logps/chosen": -0.06150083988904953, "logps/rejected": -5.296011924743652, "loss": 0.0149, "nll_loss": 0.013364692218601704, "rewards/accuracies": 1.0, "rewards/chosen": -0.0030750418081879616, "rewards/margins": 0.2617255747318268, "rewards/rejected": -0.2648006081581116, "step": 800 }, { "epoch": 2.691029900332226, "grad_norm": 1.6640625, "learning_rate": 1.7568209223157664e-06, "log_odds_chosen": 11.236889839172363, "log_odds_ratio": -0.004805346950888634, "logits/chosen": -1.9045976400375366, "logits/rejected": -1.9087021350860596, "logps/chosen": -0.047368817031383514, "logps/rejected": -5.490727424621582, "loss": 0.0147, "nll_loss": 0.0136332456022501, "rewards/accuracies": 1.0, "rewards/chosen": -0.0023684408515691757, "rewards/margins": 0.2721679210662842, "rewards/rejected": -0.2745364010334015, "step": 810 }, { "epoch": 2.7242524916943522, "grad_norm": 0.76953125, "learning_rate": 1.7460757394239458e-06, "log_odds_chosen": 11.060879707336426, "log_odds_ratio": -0.0016804604092612863, "logits/chosen": -1.8651930093765259, "logits/rejected": -1.8689038753509521, "logps/chosen": -0.036452341824769974, "logps/rejected": -5.505632400512695, "loss": 0.013, "nll_loss": 0.011724123731255531, "rewards/accuracies": 1.0, "rewards/chosen": -0.0018226171378046274, "rewards/margins": 0.2734590172767639, "rewards/rejected": -0.2752816081047058, "step": 820 }, { "epoch": 2.7574750830564785, "grad_norm": 1.3828125, "learning_rate": 1.7355253362515584e-06, "log_odds_chosen": 12.019643783569336, "log_odds_ratio": -0.0025712151546031237, "logits/chosen": -1.9404821395874023, "logits/rejected": -1.9456230401992798, "logps/chosen": -0.04571037366986275, "logps/rejected": -6.3564581871032715, "loss": 0.0144, "nll_loss": 0.01494914572685957, "rewards/accuracies": 1.0, "rewards/chosen": -0.0022855184506624937, "rewards/margins": 0.31553739309310913, "rewards/rejected": -0.3178229033946991, "step": 830 }, { "epoch": 2.7906976744186047, "grad_norm": 1.1484375, "learning_rate": 1.7251638983558855e-06, "log_odds_chosen": 10.963711738586426, "log_odds_ratio": -0.004456724040210247, "logits/chosen": -1.8980438709259033, "logits/rejected": -1.9045253992080688, "logps/chosen": -0.04593021795153618, "logps/rejected": -5.473552703857422, "loss": 0.0145, "nll_loss": 0.017903735861182213, "rewards/accuracies": 1.0, "rewards/chosen": -0.002296511083841324, "rewards/margins": 0.27138110995292664, "rewards/rejected": -0.27367764711380005, "step": 840 }, { "epoch": 2.823920265780731, "grad_norm": 0.95703125, "learning_rate": 1.7149858514250883e-06, "log_odds_chosen": 10.605644226074219, "log_odds_ratio": -0.004892362747341394, "logits/chosen": -1.8568840026855469, "logits/rejected": -1.867110013961792, "logps/chosen": -0.0643405169248581, "logps/rejected": -5.792882919311523, "loss": 0.0137, "nll_loss": 0.01375966053456068, "rewards/accuracies": 1.0, "rewards/chosen": -0.0032170258928090334, "rewards/margins": 0.2864271104335785, "rewards/rejected": -0.28964415192604065, "step": 850 }, { "epoch": 2.857142857142857, "grad_norm": 0.578125, "learning_rate": 1.704985848676184e-06, "log_odds_chosen": 10.766576766967773, "log_odds_ratio": -0.01473341602832079, "logits/chosen": -1.8542922735214233, "logits/rejected": -1.8589084148406982, "logps/chosen": -0.07984187453985214, "logps/rejected": -5.777710914611816, "loss": 0.0152, "nll_loss": 0.020643722265958786, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.00399209326133132, "rewards/margins": 0.2848934531211853, "rewards/rejected": -0.28888556361198425, "step": 860 }, { "epoch": 2.8903654485049834, "grad_norm": 0.4921875, "learning_rate": 1.6951587590520263e-06, "log_odds_chosen": 11.57789421081543, "log_odds_ratio": -0.0173480324447155, "logits/chosen": -1.7418874502182007, "logits/rejected": -1.7448689937591553, "logps/chosen": -0.054336708039045334, "logps/rejected": -5.907016754150391, "loss": 0.0129, "nll_loss": 0.010191375389695168, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.002716835355386138, "rewards/margins": 0.2926340401172638, "rewards/rejected": -0.2953508496284485, "step": 870 }, { "epoch": 2.9235880398671097, "grad_norm": 0.6875, "learning_rate": 1.6854996561581053e-06, "log_odds_chosen": 11.569136619567871, "log_odds_ratio": -0.011355452239513397, "logits/chosen": -1.959159255027771, "logits/rejected": -1.9617881774902344, "logps/chosen": -0.06317956745624542, "logps/rejected": -6.309741020202637, "loss": 0.0135, "nll_loss": 0.014484817162156105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0031589786522090435, "rewards/margins": 0.3123281002044678, "rewards/rejected": -0.3154870867729187, "step": 880 }, { "epoch": 2.956810631229236, "grad_norm": 0.5625, "learning_rate": 1.6760038078849776e-06, "log_odds_chosen": 11.66430377960205, "log_odds_ratio": -0.001645472482778132, "logits/chosen": -1.8734182119369507, "logits/rejected": -1.8795799016952515, "logps/chosen": -0.04230424761772156, "logps/rejected": -5.725351333618164, "loss": 0.0143, "nll_loss": 0.013855007477104664, "rewards/accuracies": 1.0, "rewards/chosen": -0.002115212380886078, "rewards/margins": 0.28415238857269287, "rewards/rejected": -0.28626757860183716, "step": 890 }, { "epoch": 2.990033222591362, "grad_norm": 0.9140625, "learning_rate": 1.6666666666666667e-06, "log_odds_chosen": 11.357660293579102, "log_odds_ratio": -0.0027510782238096, "logits/chosen": -1.856702446937561, "logits/rejected": -1.8652187585830688, "logps/chosen": -0.05658254772424698, "logps/rejected": -5.779760360717773, "loss": 0.0168, "nll_loss": 0.03194582462310791, "rewards/accuracies": 1.0, "rewards/chosen": -0.0028291274793446064, "rewards/margins": 0.2861589193344116, "rewards/rejected": -0.28898805379867554, "step": 900 }, { "epoch": 3.0, "step": 903, "total_flos": 0.0, "train_loss": 0.16268803322004982, "train_runtime": 7294.4356, "train_samples_per_second": 7.915, "train_steps_per_second": 0.124 } ], "logging_steps": 10, "max_steps": 903, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }