{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.998631074606434, "eval_steps": 500, "global_step": 730, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0027378507871321013, "grad_norm": 5.325876235961914, "learning_rate": 9.986301369863014e-07, "log_odds_chosen": -0.7948390245437622, "log_odds_ratio": -1.3712958097457886, "logits/chosen": 0.004306085407733917, "logits/rejected": -0.02672051638364792, "logps/chosen": -3.6178853511810303, "logps/rejected": -2.826040506362915, "loss": 2.837, "nll_loss": 2.699842691421509, "rewards/accuracies": 0.5, "rewards/chosen": -0.3617885708808899, "rewards/margins": -0.07918448001146317, "rewards/rejected": -0.28260406851768494, "step": 1 }, { "epoch": 0.0054757015742642025, "grad_norm": 5.367353439331055, "learning_rate": 9.972602739726028e-07, "log_odds_chosen": -0.35803794860839844, "log_odds_ratio": -1.0826644897460938, "logits/chosen": -0.05404001474380493, "logits/rejected": -0.08446118980646133, "logps/chosen": -3.4454493522644043, "logps/rejected": -3.095252752304077, "loss": 2.8593, "nll_loss": 2.751082181930542, "rewards/accuracies": 0.5, "rewards/chosen": -0.344544917345047, "rewards/margins": -0.0350196436047554, "rewards/rejected": -0.3095252513885498, "step": 2 }, { "epoch": 0.008213552361396304, "grad_norm": 6.374548435211182, "learning_rate": 9.95890410958904e-07, "log_odds_chosen": -0.8186060190200806, "log_odds_ratio": -1.3569588661193848, "logits/chosen": 0.0552675724029541, "logits/rejected": 0.11841100454330444, "logps/chosen": -4.426992893218994, "logps/rejected": -3.617198944091797, "loss": 3.0276, "nll_loss": 2.8918769359588623, "rewards/accuracies": 0.25, "rewards/chosen": -0.4426993131637573, "rewards/margins": -0.08097940683364868, "rewards/rejected": -0.36171990633010864, "step": 3 }, { "epoch": 0.010951403148528405, "grad_norm": 6.276937961578369, "learning_rate": 9.945205479452054e-07, "log_odds_chosen": -1.4323616027832031, "log_odds_ratio": -1.8015589714050293, "logits/chosen": 0.17375428974628448, "logits/rejected": 0.2747578024864197, "logps/chosen": -4.302623748779297, "logps/rejected": -2.8863744735717773, "loss": 3.0225, "nll_loss": 2.842334032058716, "rewards/accuracies": 0.25, "rewards/chosen": -0.43026232719421387, "rewards/margins": -0.14162486791610718, "rewards/rejected": -0.2886374592781067, "step": 4 }, { "epoch": 0.013689253935660506, "grad_norm": 5.779869079589844, "learning_rate": 9.931506849315068e-07, "log_odds_chosen": -0.638068437576294, "log_odds_ratio": -1.280259132385254, "logits/chosen": -0.013226918876171112, "logits/rejected": -0.03952325880527496, "logps/chosen": -4.334023952484131, "logps/rejected": -3.690998077392578, "loss": 2.9025, "nll_loss": 2.7744553089141846, "rewards/accuracies": 0.5, "rewards/chosen": -0.433402419090271, "rewards/margins": -0.06430260092020035, "rewards/rejected": -0.36909979581832886, "step": 5 }, { "epoch": 0.01642710472279261, "grad_norm": 5.636603355407715, "learning_rate": 9.917808219178082e-07, "log_odds_chosen": -0.2390626072883606, "log_odds_ratio": -0.9677859544754028, "logits/chosen": -0.13452304899692535, "logits/rejected": -0.09584984183311462, "logps/chosen": -3.173994302749634, "logps/rejected": -2.930485248565674, "loss": 2.863, "nll_loss": 2.7661867141723633, "rewards/accuracies": 0.5, "rewards/chosen": -0.31739944219589233, "rewards/margins": -0.024350881576538086, "rewards/rejected": -0.29304856061935425, "step": 6 }, { "epoch": 0.019164955509924708, "grad_norm": 5.4650750160217285, "learning_rate": 9.904109589041094e-07, "log_odds_chosen": -0.6746731996536255, "log_odds_ratio": -1.2391283512115479, "logits/chosen": 0.04482875019311905, "logits/rejected": 0.026651956140995026, "logps/chosen": -3.639770746231079, "logps/rejected": -2.950205087661743, "loss": 2.8389, "nll_loss": 2.7149813175201416, "rewards/accuracies": 0.375, "rewards/chosen": -0.3639770448207855, "rewards/margins": -0.06895654648542404, "rewards/rejected": -0.29502052068710327, "step": 7 }, { "epoch": 0.02190280629705681, "grad_norm": 4.4891581535339355, "learning_rate": 9.89041095890411e-07, "log_odds_chosen": 0.07528090476989746, "log_odds_ratio": -0.7195493578910828, "logits/chosen": 0.07742876559495926, "logits/rejected": 0.013959864154458046, "logps/chosen": -2.377894401550293, "logps/rejected": -2.429080009460449, "loss": 2.711, "nll_loss": 2.639007091522217, "rewards/accuracies": 0.625, "rewards/chosen": -0.23778943717479706, "rewards/margins": 0.00511857308447361, "rewards/rejected": -0.24290801584720612, "step": 8 }, { "epoch": 0.024640657084188913, "grad_norm": 6.0114336013793945, "learning_rate": 9.876712328767123e-07, "log_odds_chosen": -0.702069878578186, "log_odds_ratio": -1.2055552005767822, "logits/chosen": -0.04755246639251709, "logits/rejected": 0.04538070037961006, "logps/chosen": -4.668376445770264, "logps/rejected": -3.9819188117980957, "loss": 2.9717, "nll_loss": 2.85113263130188, "rewards/accuracies": 0.25, "rewards/chosen": -0.466837614774704, "rewards/margins": -0.06864573061466217, "rewards/rejected": -0.3981918692588806, "step": 9 }, { "epoch": 0.02737850787132101, "grad_norm": 5.077043056488037, "learning_rate": 9.863013698630137e-07, "log_odds_chosen": -0.5381810665130615, "log_odds_ratio": -1.0643706321716309, "logits/chosen": 0.06925356388092041, "logits/rejected": 0.030795082449913025, "logps/chosen": -3.136054039001465, "logps/rejected": -2.6327905654907227, "loss": 2.804, "nll_loss": 2.6975343227386475, "rewards/accuracies": 0.25, "rewards/chosen": -0.3136054277420044, "rewards/margins": -0.05032633990049362, "rewards/rejected": -0.2632790803909302, "step": 10 }, { "epoch": 0.030116358658453114, "grad_norm": 5.173473358154297, "learning_rate": 9.84931506849315e-07, "log_odds_chosen": -0.0710364431142807, "log_odds_ratio": -0.8080786466598511, "logits/chosen": 0.06147963926196098, "logits/rejected": 0.0687752217054367, "logps/chosen": -2.7179152965545654, "logps/rejected": -2.585996389389038, "loss": 2.7109, "nll_loss": 2.6300885677337646, "rewards/accuracies": 0.375, "rewards/chosen": -0.2717915177345276, "rewards/margins": -0.013191889971494675, "rewards/rejected": -0.2585996389389038, "step": 11 }, { "epoch": 0.03285420944558522, "grad_norm": 6.352484703063965, "learning_rate": 9.835616438356163e-07, "log_odds_chosen": -1.0600309371948242, "log_odds_ratio": -1.6905040740966797, "logits/chosen": 0.04360724613070488, "logits/rejected": 0.19397321343421936, "logps/chosen": -4.162622928619385, "logps/rejected": -3.1176581382751465, "loss": 3.0347, "nll_loss": 2.865666389465332, "rewards/accuracies": 0.25, "rewards/chosen": -0.41626226902008057, "rewards/margins": -0.10449647903442383, "rewards/rejected": -0.3117658197879791, "step": 12 }, { "epoch": 0.03559206023271732, "grad_norm": 4.913181781768799, "learning_rate": 9.821917808219177e-07, "log_odds_chosen": -0.020423416048288345, "log_odds_ratio": -0.7258975505828857, "logits/chosen": -0.008023146539926529, "logits/rejected": -0.03850743919610977, "logps/chosen": -2.985048294067383, "logps/rejected": -2.965977907180786, "loss": 2.6823, "nll_loss": 2.609661102294922, "rewards/accuracies": 0.375, "rewards/chosen": -0.29850485920906067, "rewards/margins": -0.001907050609588623, "rewards/rejected": -0.29659780859947205, "step": 13 }, { "epoch": 0.038329911019849415, "grad_norm": 5.877553939819336, "learning_rate": 9.808219178082191e-07, "log_odds_chosen": -0.469057559967041, "log_odds_ratio": -1.1004008054733276, "logits/chosen": 0.029794173315167427, "logits/rejected": 0.052903711795806885, "logps/chosen": -3.5851757526397705, "logps/rejected": -3.100710868835449, "loss": 2.8954, "nll_loss": 2.785405158996582, "rewards/accuracies": 0.375, "rewards/chosen": -0.3585175573825836, "rewards/margins": -0.04844648018479347, "rewards/rejected": -0.31007108092308044, "step": 14 }, { "epoch": 0.04106776180698152, "grad_norm": 6.193238735198975, "learning_rate": 9.794520547945205e-07, "log_odds_chosen": -0.6505619287490845, "log_odds_ratio": -1.4907243251800537, "logits/chosen": 0.005648050457239151, "logits/rejected": 0.07195015996694565, "logps/chosen": -3.832523822784424, "logps/rejected": -3.108867883682251, "loss": 2.9504, "nll_loss": 2.801290512084961, "rewards/accuracies": 0.5, "rewards/chosen": -0.38325241208076477, "rewards/margins": -0.07236562669277191, "rewards/rejected": -0.31088680028915405, "step": 15 }, { "epoch": 0.04380561259411362, "grad_norm": 5.038293361663818, "learning_rate": 9.78082191780822e-07, "log_odds_chosen": -0.45255395770072937, "log_odds_ratio": -1.1312205791473389, "logits/chosen": -0.023001179099082947, "logits/rejected": -0.0563918799161911, "logps/chosen": -3.470187187194824, "logps/rejected": -2.984997272491455, "loss": 2.828, "nll_loss": 2.7148518562316895, "rewards/accuracies": 0.5, "rewards/chosen": -0.3470187485218048, "rewards/margins": -0.04851900041103363, "rewards/rejected": -0.2984997034072876, "step": 16 }, { "epoch": 0.04654346338124572, "grad_norm": 6.355246543884277, "learning_rate": 9.767123287671234e-07, "log_odds_chosen": -0.48295435309410095, "log_odds_ratio": -1.158775806427002, "logits/chosen": 0.04277827963232994, "logits/rejected": 0.08843998610973358, "logps/chosen": -3.90274715423584, "logps/rejected": -3.4281516075134277, "loss": 2.9701, "nll_loss": 2.854196548461914, "rewards/accuracies": 0.5, "rewards/chosen": -0.39027467370033264, "rewards/margins": -0.047459542751312256, "rewards/rejected": -0.3428151607513428, "step": 17 }, { "epoch": 0.049281314168377825, "grad_norm": 5.158921718597412, "learning_rate": 9.753424657534246e-07, "log_odds_chosen": -0.18216584622859955, "log_odds_ratio": -0.9116010069847107, "logits/chosen": 0.08967956900596619, "logits/rejected": 0.05615648999810219, "logps/chosen": -3.02425479888916, "logps/rejected": -2.8011155128479004, "loss": 2.8052, "nll_loss": 2.713996410369873, "rewards/accuracies": 0.5, "rewards/chosen": -0.30242547392845154, "rewards/margins": -0.022313890978693962, "rewards/rejected": -0.2801115810871124, "step": 18 }, { "epoch": 0.05201916495550993, "grad_norm": 4.8966779708862305, "learning_rate": 9.73972602739726e-07, "log_odds_chosen": -0.5640780329704285, "log_odds_ratio": -1.1776620149612427, "logits/chosen": -0.14784425497055054, "logits/rejected": -0.2602556347846985, "logps/chosen": -3.8133745193481445, "logps/rejected": -3.2284460067749023, "loss": 2.7761, "nll_loss": 2.6583518981933594, "rewards/accuracies": 0.375, "rewards/chosen": -0.3813374638557434, "rewards/margins": -0.058492835611104965, "rewards/rejected": -0.32284462451934814, "step": 19 }, { "epoch": 0.05475701574264202, "grad_norm": 4.884150981903076, "learning_rate": 9.726027397260274e-07, "log_odds_chosen": -0.053273797035217285, "log_odds_ratio": -0.8726117610931396, "logits/chosen": 0.1603344827890396, "logits/rejected": 0.12810952961444855, "logps/chosen": -2.7467539310455322, "logps/rejected": -2.663269281387329, "loss": 2.6424, "nll_loss": 2.5551464557647705, "rewards/accuracies": 0.625, "rewards/chosen": -0.2746753990650177, "rewards/margins": -0.008348453789949417, "rewards/rejected": -0.2663269340991974, "step": 20 }, { "epoch": 0.057494866529774126, "grad_norm": 5.123602390289307, "learning_rate": 9.712328767123286e-07, "log_odds_chosen": 0.597856879234314, "log_odds_ratio": -0.5302311182022095, "logits/chosen": 0.10386967658996582, "logits/rejected": 0.08535340428352356, "logps/chosen": -2.477381944656372, "logps/rejected": -3.0170912742614746, "loss": 2.7632, "nll_loss": 2.710167646408081, "rewards/accuracies": 0.75, "rewards/chosen": -0.24773819744586945, "rewards/margins": 0.05397092550992966, "rewards/rejected": -0.3017091155052185, "step": 21 }, { "epoch": 0.06023271731690623, "grad_norm": 5.332507610321045, "learning_rate": 9.6986301369863e-07, "log_odds_chosen": 0.4000973105430603, "log_odds_ratio": -0.7710477113723755, "logits/chosen": -0.2360171675682068, "logits/rejected": -0.17117254436016083, "logps/chosen": -3.1005032062530518, "logps/rejected": -3.46038818359375, "loss": 2.8026, "nll_loss": 2.7254815101623535, "rewards/accuracies": 0.625, "rewards/chosen": -0.3100503087043762, "rewards/margins": 0.03598848730325699, "rewards/rejected": -0.346038818359375, "step": 22 }, { "epoch": 0.06297056810403832, "grad_norm": 5.614559173583984, "learning_rate": 9.684931506849314e-07, "log_odds_chosen": -0.10460536926984787, "log_odds_ratio": -0.9382399320602417, "logits/chosen": -0.03645119071006775, "logits/rejected": -0.04790172725915909, "logps/chosen": -3.558135509490967, "logps/rejected": -3.4187960624694824, "loss": 2.784, "nll_loss": 2.690208673477173, "rewards/accuracies": 0.625, "rewards/chosen": -0.35581356287002563, "rewards/margins": -0.01393393985927105, "rewards/rejected": -0.34187963604927063, "step": 23 }, { "epoch": 0.06570841889117043, "grad_norm": 5.184487819671631, "learning_rate": 9.671232876712329e-07, "log_odds_chosen": -0.1957252025604248, "log_odds_ratio": -0.8964800238609314, "logits/chosen": 0.016660287976264954, "logits/rejected": 0.009348414838314056, "logps/chosen": -3.219160556793213, "logps/rejected": -3.0224804878234863, "loss": 2.7843, "nll_loss": 2.694638729095459, "rewards/accuracies": 0.375, "rewards/chosen": -0.3219160735607147, "rewards/margins": -0.019668016582727432, "rewards/rejected": -0.3022480309009552, "step": 24 }, { "epoch": 0.06844626967830253, "grad_norm": 5.400012493133545, "learning_rate": 9.657534246575343e-07, "log_odds_chosen": -0.3845922350883484, "log_odds_ratio": -1.1526037454605103, "logits/chosen": -0.03754670172929764, "logits/rejected": 0.02617911621928215, "logps/chosen": -3.077850580215454, "logps/rejected": -2.647724151611328, "loss": 2.9506, "nll_loss": 2.8353137969970703, "rewards/accuracies": 0.625, "rewards/chosen": -0.3077850341796875, "rewards/margins": -0.043012626469135284, "rewards/rejected": -0.2647724151611328, "step": 25 }, { "epoch": 0.07118412046543464, "grad_norm": 4.83126974105835, "learning_rate": 9.643835616438357e-07, "log_odds_chosen": 0.2840996980667114, "log_odds_ratio": -0.6046136021614075, "logits/chosen": 0.058806292712688446, "logits/rejected": -0.021810440346598625, "logps/chosen": -3.0626206398010254, "logps/rejected": -3.2922115325927734, "loss": 2.7535, "nll_loss": 2.6930365562438965, "rewards/accuracies": 0.875, "rewards/chosen": -0.3062621057033539, "rewards/margins": 0.022959083318710327, "rewards/rejected": -0.3292211592197418, "step": 26 }, { "epoch": 0.07392197125256673, "grad_norm": 4.855805397033691, "learning_rate": 9.630136986301369e-07, "log_odds_chosen": -0.23633955419063568, "log_odds_ratio": -0.9520198106765747, "logits/chosen": -0.02162906900048256, "logits/rejected": -0.06640756875276566, "logps/chosen": -3.4552199840545654, "logps/rejected": -3.1949808597564697, "loss": 2.7233, "nll_loss": 2.6281135082244873, "rewards/accuracies": 0.75, "rewards/chosen": -0.3455219864845276, "rewards/margins": -0.02602391131222248, "rewards/rejected": -0.31949809193611145, "step": 27 }, { "epoch": 0.07665982203969883, "grad_norm": 5.520872116088867, "learning_rate": 9.616438356164383e-07, "log_odds_chosen": -0.09288820624351501, "log_odds_ratio": -0.93186354637146, "logits/chosen": 0.12476132810115814, "logits/rejected": 0.16062240302562714, "logps/chosen": -3.4240076541900635, "logps/rejected": -3.3085784912109375, "loss": 2.8322, "nll_loss": 2.7390356063842773, "rewards/accuracies": 0.5, "rewards/chosen": -0.34240075945854187, "rewards/margins": -0.011542899534106255, "rewards/rejected": -0.3308578431606293, "step": 28 }, { "epoch": 0.07939767282683094, "grad_norm": 5.895773887634277, "learning_rate": 9.602739726027397e-07, "log_odds_chosen": -1.0542347431182861, "log_odds_ratio": -1.4992132186889648, "logits/chosen": 0.20510117709636688, "logits/rejected": 0.2987060546875, "logps/chosen": -4.026015281677246, "logps/rejected": -2.9859957695007324, "loss": 3.0599, "nll_loss": 2.9100136756896973, "rewards/accuracies": 0.375, "rewards/chosen": -0.40260154008865356, "rewards/margins": -0.10400194674730301, "rewards/rejected": -0.29859960079193115, "step": 29 }, { "epoch": 0.08213552361396304, "grad_norm": 5.86776876449585, "learning_rate": 9.58904109589041e-07, "log_odds_chosen": -1.3581258058547974, "log_odds_ratio": -1.905985713005066, "logits/chosen": 0.12062501162290573, "logits/rejected": 0.1980278044939041, "logps/chosen": -4.493518352508545, "logps/rejected": -3.1259045600891113, "loss": 2.9936, "nll_loss": 2.803016185760498, "rewards/accuracies": 0.375, "rewards/chosen": -0.44935184717178345, "rewards/margins": -0.1367613673210144, "rewards/rejected": -0.31259047985076904, "step": 30 }, { "epoch": 0.08487337440109514, "grad_norm": 6.282186031341553, "learning_rate": 9.575342465753423e-07, "log_odds_chosen": -1.2390145063400269, "log_odds_ratio": -1.6391897201538086, "logits/chosen": 0.08628277480602264, "logits/rejected": 0.18436183035373688, "logps/chosen": -4.137444972991943, "logps/rejected": -2.9128832817077637, "loss": 2.9497, "nll_loss": 2.785759210586548, "rewards/accuracies": 0.25, "rewards/chosen": -0.4137445092201233, "rewards/margins": -0.12245617806911469, "rewards/rejected": -0.2912883162498474, "step": 31 }, { "epoch": 0.08761122518822724, "grad_norm": 5.477962493896484, "learning_rate": 9.561643835616437e-07, "log_odds_chosen": -0.2897709608078003, "log_odds_ratio": -1.6469095945358276, "logits/chosen": 0.002665966749191284, "logits/rejected": -0.028004739433526993, "logps/chosen": -3.9941623210906982, "logps/rejected": -3.67787766456604, "loss": 2.91, "nll_loss": 2.745316505432129, "rewards/accuracies": 0.25, "rewards/chosen": -0.3994162380695343, "rewards/margins": -0.03162848949432373, "rewards/rejected": -0.36778777837753296, "step": 32 }, { "epoch": 0.09034907597535935, "grad_norm": 4.968799591064453, "learning_rate": 9.547945205479452e-07, "log_odds_chosen": -0.09502658247947693, "log_odds_ratio": -0.7701165080070496, "logits/chosen": 0.07308821380138397, "logits/rejected": 0.06565812975168228, "logps/chosen": -2.922135353088379, "logps/rejected": -2.8222599029541016, "loss": 2.7277, "nll_loss": 2.6506760120391846, "rewards/accuracies": 0.5, "rewards/chosen": -0.2922135591506958, "rewards/margins": -0.009987544268369675, "rewards/rejected": -0.28222599625587463, "step": 33 }, { "epoch": 0.09308692676249145, "grad_norm": 5.0665998458862305, "learning_rate": 9.534246575342465e-07, "log_odds_chosen": -0.1576860547065735, "log_odds_ratio": -0.8909002542495728, "logits/chosen": 0.19687339663505554, "logits/rejected": 0.11922043561935425, "logps/chosen": -3.4109559059143066, "logps/rejected": -3.2358617782592773, "loss": 2.7657, "nll_loss": 2.6765999794006348, "rewards/accuracies": 0.625, "rewards/chosen": -0.34109559655189514, "rewards/margins": -0.01750941202044487, "rewards/rejected": -0.3235861659049988, "step": 34 }, { "epoch": 0.09582477754962354, "grad_norm": 4.97713565826416, "learning_rate": 9.520547945205479e-07, "log_odds_chosen": -0.4582623243331909, "log_odds_ratio": -1.275357961654663, "logits/chosen": -0.1561899036169052, "logits/rejected": -0.17601940035820007, "logps/chosen": -3.5033769607543945, "logps/rejected": -3.0153675079345703, "loss": 2.6955, "nll_loss": 2.568002223968506, "rewards/accuracies": 0.375, "rewards/chosen": -0.3503377139568329, "rewards/margins": -0.04880094900727272, "rewards/rejected": -0.3015367388725281, "step": 35 }, { "epoch": 0.09856262833675565, "grad_norm": 5.655263423919678, "learning_rate": 9.506849315068493e-07, "log_odds_chosen": -0.5786654949188232, "log_odds_ratio": -1.1101577281951904, "logits/chosen": 0.0034108199179172516, "logits/rejected": 0.09071224182844162, "logps/chosen": -3.2604587078094482, "logps/rejected": -2.6875393390655518, "loss": 2.8995, "nll_loss": 2.788483142852783, "rewards/accuracies": 0.375, "rewards/chosen": -0.3260458707809448, "rewards/margins": -0.057291947305202484, "rewards/rejected": -0.26875394582748413, "step": 36 }, { "epoch": 0.10130047912388775, "grad_norm": 4.954617500305176, "learning_rate": 9.493150684931507e-07, "log_odds_chosen": 0.3866296708583832, "log_odds_ratio": -0.5999068021774292, "logits/chosen": -0.02764936350286007, "logits/rejected": -0.040936123579740524, "logps/chosen": -2.6118736267089844, "logps/rejected": -2.918231964111328, "loss": 2.6294, "nll_loss": 2.5694468021392822, "rewards/accuracies": 0.625, "rewards/chosen": -0.2611873745918274, "rewards/margins": 0.03063584305346012, "rewards/rejected": -0.29182320833206177, "step": 37 }, { "epoch": 0.10403832991101986, "grad_norm": 5.24688720703125, "learning_rate": 9.47945205479452e-07, "log_odds_chosen": -0.2714232802391052, "log_odds_ratio": -1.0010899305343628, "logits/chosen": 0.036227546632289886, "logits/rejected": 0.0893690437078476, "logps/chosen": -3.3653087615966797, "logps/rejected": -3.0536296367645264, "loss": 2.7633, "nll_loss": 2.663212537765503, "rewards/accuracies": 0.625, "rewards/chosen": -0.336530864238739, "rewards/margins": -0.031167881563305855, "rewards/rejected": -0.3053629696369171, "step": 38 }, { "epoch": 0.10677618069815195, "grad_norm": 5.400045394897461, "learning_rate": 9.465753424657534e-07, "log_odds_chosen": -0.035731345415115356, "log_odds_ratio": -0.8203904628753662, "logits/chosen": -0.039293527603149414, "logits/rejected": -0.004623129963874817, "logps/chosen": -3.3981266021728516, "logps/rejected": -3.3737077713012695, "loss": 2.9015, "nll_loss": 2.8195085525512695, "rewards/accuracies": 0.25, "rewards/chosen": -0.33981266617774963, "rewards/margins": -0.0024418868124485016, "rewards/rejected": -0.33737078309059143, "step": 39 }, { "epoch": 0.10951403148528405, "grad_norm": 5.602653980255127, "learning_rate": 9.452054794520548e-07, "log_odds_chosen": 0.05367310717701912, "log_odds_ratio": -0.7894372940063477, "logits/chosen": -0.03544227406382561, "logits/rejected": -0.024303562939167023, "logps/chosen": -3.306229591369629, "logps/rejected": -3.314364194869995, "loss": 2.8346, "nll_loss": 2.7556493282318115, "rewards/accuracies": 0.5, "rewards/chosen": -0.33062297105789185, "rewards/margins": 0.0008134860545396805, "rewards/rejected": -0.331436425447464, "step": 40 }, { "epoch": 0.11225188227241616, "grad_norm": 5.160926342010498, "learning_rate": 9.438356164383561e-07, "log_odds_chosen": -0.8572806119918823, "log_odds_ratio": -1.3036504983901978, "logits/chosen": -0.0058150263503193855, "logits/rejected": -0.0008316696621477604, "logps/chosen": -3.467806339263916, "logps/rejected": -2.633718967437744, "loss": 2.7862, "nll_loss": 2.6558210849761963, "rewards/accuracies": 0.25, "rewards/chosen": -0.3467806577682495, "rewards/margins": -0.08340875804424286, "rewards/rejected": -0.26337188482284546, "step": 41 }, { "epoch": 0.11498973305954825, "grad_norm": 4.942453861236572, "learning_rate": 9.424657534246575e-07, "log_odds_chosen": 0.7019941210746765, "log_odds_ratio": -0.5062806606292725, "logits/chosen": 0.05452834069728851, "logits/rejected": -0.036492928862571716, "logps/chosen": -2.9884114265441895, "logps/rejected": -3.6389312744140625, "loss": 2.7313, "nll_loss": 2.6806559562683105, "rewards/accuracies": 0.75, "rewards/chosen": -0.2988411784172058, "rewards/margins": 0.06505196541547775, "rewards/rejected": -0.3638930916786194, "step": 42 }, { "epoch": 0.11772758384668036, "grad_norm": 5.36843204498291, "learning_rate": 9.410958904109588e-07, "log_odds_chosen": -1.370179533958435, "log_odds_ratio": -1.813017725944519, "logits/chosen": -0.16351664066314697, "logits/rejected": -0.14719775319099426, "logps/chosen": -3.9842123985290527, "logps/rejected": -2.6477725505828857, "loss": 2.8848, "nll_loss": 2.703521728515625, "rewards/accuracies": 0.375, "rewards/chosen": -0.3984212577342987, "rewards/margins": -0.1336439996957779, "rewards/rejected": -0.264777272939682, "step": 43 }, { "epoch": 0.12046543463381246, "grad_norm": 6.346832752227783, "learning_rate": 9.397260273972603e-07, "log_odds_chosen": -0.885704517364502, "log_odds_ratio": -1.7123254537582397, "logits/chosen": 0.09353246539831161, "logits/rejected": 0.16552674770355225, "logps/chosen": -4.375518798828125, "logps/rejected": -3.4759602546691895, "loss": 2.9097, "nll_loss": 2.7384369373321533, "rewards/accuracies": 0.375, "rewards/chosen": -0.43755191564559937, "rewards/margins": -0.0899558886885643, "rewards/rejected": -0.34759604930877686, "step": 44 }, { "epoch": 0.12320328542094455, "grad_norm": 6.2034912109375, "learning_rate": 9.383561643835616e-07, "log_odds_chosen": -0.7216796875, "log_odds_ratio": -1.170172929763794, "logits/chosen": 0.036534667015075684, "logits/rejected": 0.11518233269453049, "logps/chosen": -3.488222599029541, "logps/rejected": -2.8049118518829346, "loss": 2.8588, "nll_loss": 2.741745948791504, "rewards/accuracies": 0.25, "rewards/chosen": -0.34882229566574097, "rewards/margins": -0.06833109259605408, "rewards/rejected": -0.2804911732673645, "step": 45 }, { "epoch": 0.12594113620807665, "grad_norm": 5.265769004821777, "learning_rate": 9.36986301369863e-07, "log_odds_chosen": -0.7860671281814575, "log_odds_ratio": -1.3124102354049683, "logits/chosen": -0.019555842503905296, "logits/rejected": -0.03303737938404083, "logps/chosen": -3.701235771179199, "logps/rejected": -2.933785915374756, "loss": 2.8837, "nll_loss": 2.752495288848877, "rewards/accuracies": 0.125, "rewards/chosen": -0.37012356519699097, "rewards/margins": -0.07674498111009598, "rewards/rejected": -0.2933785915374756, "step": 46 }, { "epoch": 0.12867898699520877, "grad_norm": 5.545430660247803, "learning_rate": 9.356164383561643e-07, "log_odds_chosen": -0.5810374021530151, "log_odds_ratio": -1.1533958911895752, "logits/chosen": -0.0035392548888921738, "logits/rejected": 0.06812423467636108, "logps/chosen": -2.9764397144317627, "logps/rejected": -2.4439263343811035, "loss": 2.8678, "nll_loss": 2.752434730529785, "rewards/accuracies": 0.5, "rewards/chosen": -0.2976439893245697, "rewards/margins": -0.05325133353471756, "rewards/rejected": -0.24439266324043274, "step": 47 }, { "epoch": 0.13141683778234087, "grad_norm": 4.9642462730407715, "learning_rate": 9.342465753424658e-07, "log_odds_chosen": -0.20204751193523407, "log_odds_ratio": -1.0109822750091553, "logits/chosen": -0.05270653963088989, "logits/rejected": 0.009669464081525803, "logps/chosen": -3.012859344482422, "logps/rejected": -2.80397629737854, "loss": 2.7285, "nll_loss": 2.627441883087158, "rewards/accuracies": 0.375, "rewards/chosen": -0.301285982131958, "rewards/margins": -0.020888330414891243, "rewards/rejected": -0.2803976237773895, "step": 48 }, { "epoch": 0.13415468856947296, "grad_norm": 5.447738170623779, "learning_rate": 9.328767123287671e-07, "log_odds_chosen": 0.4857527017593384, "log_odds_ratio": -0.6674563884735107, "logits/chosen": -0.04414322227239609, "logits/rejected": -0.048294391483068466, "logps/chosen": -3.56960129737854, "logps/rejected": -4.022181510925293, "loss": 2.8493, "nll_loss": 2.782576322555542, "rewards/accuracies": 0.625, "rewards/chosen": -0.35696011781692505, "rewards/margins": 0.045258063822984695, "rewards/rejected": -0.40221816301345825, "step": 49 }, { "epoch": 0.13689253935660506, "grad_norm": 5.712128162384033, "learning_rate": 9.315068493150684e-07, "log_odds_chosen": -0.40854331851005554, "log_odds_ratio": -1.1242904663085938, "logits/chosen": 0.014060504734516144, "logits/rejected": 0.06882058829069138, "logps/chosen": -3.2962703704833984, "logps/rejected": -2.8780980110168457, "loss": 2.794, "nll_loss": 2.6816015243530273, "rewards/accuracies": 0.625, "rewards/chosen": -0.32962706685066223, "rewards/margins": -0.04181721433997154, "rewards/rejected": -0.2878097891807556, "step": 50 }, { "epoch": 0.13963039014373715, "grad_norm": 5.1079206466674805, "learning_rate": 9.301369863013698e-07, "log_odds_chosen": -0.3931911587715149, "log_odds_ratio": -1.0097352266311646, "logits/chosen": 0.07219232618808746, "logits/rejected": 0.08471345901489258, "logps/chosen": -3.3886666297912598, "logps/rejected": -2.998568296432495, "loss": 2.8426, "nll_loss": 2.741645574569702, "rewards/accuracies": 0.375, "rewards/chosen": -0.338866651058197, "rewards/margins": -0.03900982812047005, "rewards/rejected": -0.29985684156417847, "step": 51 }, { "epoch": 0.14236824093086928, "grad_norm": 5.925088405609131, "learning_rate": 9.287671232876712e-07, "log_odds_chosen": -1.4712878465652466, "log_odds_ratio": -2.138023614883423, "logits/chosen": 0.10598265379667282, "logits/rejected": 0.07451222091913223, "logps/chosen": -4.513218879699707, "logps/rejected": -3.0692200660705566, "loss": 2.9544, "nll_loss": 2.7405781745910645, "rewards/accuracies": 0.375, "rewards/chosen": -0.45132192969322205, "rewards/margins": -0.14439989626407623, "rewards/rejected": -0.3069220185279846, "step": 52 }, { "epoch": 0.14510609171800137, "grad_norm": 5.254667282104492, "learning_rate": 9.273972602739726e-07, "log_odds_chosen": 0.9314041137695312, "log_odds_ratio": -0.7991589903831482, "logits/chosen": -0.0828799158334732, "logits/rejected": -0.08882374316453934, "logps/chosen": -3.095332622528076, "logps/rejected": -4.019404888153076, "loss": 2.8542, "nll_loss": 2.774242877960205, "rewards/accuracies": 0.375, "rewards/chosen": -0.3095332980155945, "rewards/margins": 0.0924072191119194, "rewards/rejected": -0.4019404947757721, "step": 53 }, { "epoch": 0.14784394250513347, "grad_norm": 6.099798679351807, "learning_rate": 9.260273972602739e-07, "log_odds_chosen": -0.9812139272689819, "log_odds_ratio": -1.5443625450134277, "logits/chosen": 0.0667799562215805, "logits/rejected": 0.11943984776735306, "logps/chosen": -4.63852596282959, "logps/rejected": -3.667084217071533, "loss": 3.0048, "nll_loss": 2.850315570831299, "rewards/accuracies": 0.25, "rewards/chosen": -0.46385258436203003, "rewards/margins": -0.09714416414499283, "rewards/rejected": -0.3667084276676178, "step": 54 }, { "epoch": 0.15058179329226556, "grad_norm": 4.812582015991211, "learning_rate": 9.246575342465753e-07, "log_odds_chosen": 1.208328366279602, "log_odds_ratio": -0.553124189376831, "logits/chosen": -0.14755326509475708, "logits/rejected": -0.291365385055542, "logps/chosen": -2.547384023666382, "logps/rejected": -3.681511878967285, "loss": 2.6549, "nll_loss": 2.5995736122131348, "rewards/accuracies": 0.625, "rewards/chosen": -0.2547384202480316, "rewards/margins": 0.1134127825498581, "rewards/rejected": -0.3681511878967285, "step": 55 }, { "epoch": 0.15331964407939766, "grad_norm": 5.092081546783447, "learning_rate": 9.232876712328766e-07, "log_odds_chosen": -0.6556471586227417, "log_odds_ratio": -1.210165023803711, "logits/chosen": -0.02094440534710884, "logits/rejected": -0.15378020703792572, "logps/chosen": -3.173057794570923, "logps/rejected": -2.5407660007476807, "loss": 2.7309, "nll_loss": 2.609856367111206, "rewards/accuracies": 0.375, "rewards/chosen": -0.3173058032989502, "rewards/margins": -0.06322918087244034, "rewards/rejected": -0.25407660007476807, "step": 56 }, { "epoch": 0.15605749486652978, "grad_norm": 5.540200710296631, "learning_rate": 9.219178082191781e-07, "log_odds_chosen": -0.47873935103416443, "log_odds_ratio": -1.0346264839172363, "logits/chosen": 0.0016667302697896957, "logits/rejected": 0.06582161784172058, "logps/chosen": -3.4776949882507324, "logps/rejected": -3.013324737548828, "loss": 2.8288, "nll_loss": 2.7252964973449707, "rewards/accuracies": 0.25, "rewards/chosen": -0.34776949882507324, "rewards/margins": -0.04643699526786804, "rewards/rejected": -0.3013325035572052, "step": 57 }, { "epoch": 0.15879534565366188, "grad_norm": 4.964200019836426, "learning_rate": 9.205479452054794e-07, "log_odds_chosen": 0.9422581791877747, "log_odds_ratio": -0.6117509007453918, "logits/chosen": 0.04438550025224686, "logits/rejected": -0.017819661647081375, "logps/chosen": -2.550828218460083, "logps/rejected": -3.4157986640930176, "loss": 2.6487, "nll_loss": 2.5874781608581543, "rewards/accuracies": 0.625, "rewards/chosen": -0.2550828456878662, "rewards/margins": 0.08649703115224838, "rewards/rejected": -0.3415798842906952, "step": 58 }, { "epoch": 0.16153319644079397, "grad_norm": 5.2354736328125, "learning_rate": 9.191780821917808e-07, "log_odds_chosen": -1.153167963027954, "log_odds_ratio": -1.6352087259292603, "logits/chosen": -0.060185618698596954, "logits/rejected": 0.004690185189247131, "logps/chosen": -3.849971055984497, "logps/rejected": -2.713663339614868, "loss": 2.9165, "nll_loss": 2.752948045730591, "rewards/accuracies": 0.375, "rewards/chosen": -0.38499709963798523, "rewards/margins": -0.11363077908754349, "rewards/rejected": -0.2713663578033447, "step": 59 }, { "epoch": 0.16427104722792607, "grad_norm": 4.832169055938721, "learning_rate": 9.178082191780822e-07, "log_odds_chosen": 0.7515749335289001, "log_odds_ratio": -0.5497936010360718, "logits/chosen": 0.09419383853673935, "logits/rejected": 0.04687604308128357, "logps/chosen": -2.9503471851348877, "logps/rejected": -3.642731189727783, "loss": 2.7084, "nll_loss": 2.65346622467041, "rewards/accuracies": 0.75, "rewards/chosen": -0.2950347065925598, "rewards/margins": 0.06923837959766388, "rewards/rejected": -0.3642731010913849, "step": 60 }, { "epoch": 0.16700889801505817, "grad_norm": 5.460700988769531, "learning_rate": 9.164383561643835e-07, "log_odds_chosen": -0.5644867420196533, "log_odds_ratio": -1.131011724472046, "logits/chosen": -0.03796534985303879, "logits/rejected": -0.015639949589967728, "logps/chosen": -3.68270206451416, "logps/rejected": -3.144409656524658, "loss": 2.8739, "nll_loss": 2.760838747024536, "rewards/accuracies": 0.375, "rewards/chosen": -0.3682701587677002, "rewards/margins": -0.05382924526929855, "rewards/rejected": -0.3144409656524658, "step": 61 }, { "epoch": 0.1697467488021903, "grad_norm": 4.776995658874512, "learning_rate": 9.150684931506849e-07, "log_odds_chosen": -0.38963037729263306, "log_odds_ratio": -1.0918842554092407, "logits/chosen": -0.15824729204177856, "logits/rejected": -0.15877051651477814, "logps/chosen": -2.8163299560546875, "logps/rejected": -2.4301767349243164, "loss": 2.6805, "nll_loss": 2.5713369846343994, "rewards/accuracies": 0.375, "rewards/chosen": -0.2816329896450043, "rewards/margins": -0.03861531615257263, "rewards/rejected": -0.24301767349243164, "step": 62 }, { "epoch": 0.17248459958932238, "grad_norm": 5.603599548339844, "learning_rate": 9.136986301369862e-07, "log_odds_chosen": -0.53826504945755, "log_odds_ratio": -1.2401034832000732, "logits/chosen": -0.030877070501446724, "logits/rejected": -0.008710749447345734, "logps/chosen": -3.698755979537964, "logps/rejected": -3.1162848472595215, "loss": 2.9096, "nll_loss": 2.785621404647827, "rewards/accuracies": 0.375, "rewards/chosen": -0.36987560987472534, "rewards/margins": -0.05824711173772812, "rewards/rejected": -0.311628520488739, "step": 63 }, { "epoch": 0.17522245037645448, "grad_norm": 5.137451648712158, "learning_rate": 9.123287671232876e-07, "log_odds_chosen": -0.5817815065383911, "log_odds_ratio": -1.2119686603546143, "logits/chosen": 0.008122764527797699, "logits/rejected": 0.025984667241573334, "logps/chosen": -3.435953140258789, "logps/rejected": -2.8259458541870117, "loss": 2.8128, "nll_loss": 2.6915664672851562, "rewards/accuracies": 0.375, "rewards/chosen": -0.3435952663421631, "rewards/margins": -0.06100068241357803, "rewards/rejected": -0.28259462118148804, "step": 64 }, { "epoch": 0.17796030116358658, "grad_norm": 4.918903827667236, "learning_rate": 9.10958904109589e-07, "log_odds_chosen": -0.32956117391586304, "log_odds_ratio": -1.0389676094055176, "logits/chosen": -0.03142030909657478, "logits/rejected": -0.07528287917375565, "logps/chosen": -2.809595823287964, "logps/rejected": -2.465104103088379, "loss": 2.6847, "nll_loss": 2.580833911895752, "rewards/accuracies": 0.5, "rewards/chosen": -0.2809596061706543, "rewards/margins": -0.03444918990135193, "rewards/rejected": -0.24651043117046356, "step": 65 }, { "epoch": 0.1806981519507187, "grad_norm": 5.357472896575928, "learning_rate": 9.095890410958904e-07, "log_odds_chosen": 0.19165408611297607, "log_odds_ratio": -0.8281350135803223, "logits/chosen": 0.09655125439167023, "logits/rejected": 0.10981010645627975, "logps/chosen": -2.760957717895508, "logps/rejected": -2.9357762336730957, "loss": 2.7787, "nll_loss": 2.695857048034668, "rewards/accuracies": 0.375, "rewards/chosen": -0.27609577775001526, "rewards/margins": 0.017481878399848938, "rewards/rejected": -0.293577641248703, "step": 66 }, { "epoch": 0.1834360027378508, "grad_norm": 5.283940315246582, "learning_rate": 9.082191780821917e-07, "log_odds_chosen": -0.5104767084121704, "log_odds_ratio": -1.2048356533050537, "logits/chosen": -0.024251803755760193, "logits/rejected": 0.019075654447078705, "logps/chosen": -3.2958998680114746, "logps/rejected": -2.7570960521698, "loss": 2.9024, "nll_loss": 2.7818756103515625, "rewards/accuracies": 0.5, "rewards/chosen": -0.32958999276161194, "rewards/margins": -0.05388038232922554, "rewards/rejected": -0.2757095992565155, "step": 67 }, { "epoch": 0.1861738535249829, "grad_norm": 4.96014404296875, "learning_rate": 9.068493150684932e-07, "log_odds_chosen": -0.13346531987190247, "log_odds_ratio": -0.8861931562423706, "logits/chosen": 0.012957122176885605, "logits/rejected": 0.0510651096701622, "logps/chosen": -2.8195085525512695, "logps/rejected": -2.643789052963257, "loss": 2.6885, "nll_loss": 2.5998778343200684, "rewards/accuracies": 0.5, "rewards/chosen": -0.28195086121559143, "rewards/margins": -0.017571941018104553, "rewards/rejected": -0.2643789052963257, "step": 68 }, { "epoch": 0.188911704312115, "grad_norm": 5.280044078826904, "learning_rate": 9.054794520547945e-07, "log_odds_chosen": -0.25548240542411804, "log_odds_ratio": -0.8996731042861938, "logits/chosen": 0.012915432453155518, "logits/rejected": -0.04845929890871048, "logps/chosen": -3.163198471069336, "logps/rejected": -2.9207119941711426, "loss": 2.7836, "nll_loss": 2.693610429763794, "rewards/accuracies": 0.375, "rewards/chosen": -0.31631985306739807, "rewards/margins": -0.024248674511909485, "rewards/rejected": -0.2920711934566498, "step": 69 }, { "epoch": 0.19164955509924708, "grad_norm": 4.689385414123535, "learning_rate": 9.041095890410958e-07, "log_odds_chosen": 0.22673647105693817, "log_odds_ratio": -0.9052112698554993, "logits/chosen": 0.14824099838733673, "logits/rejected": -0.020201072096824646, "logps/chosen": -2.5145788192749023, "logps/rejected": -2.6732938289642334, "loss": 2.6335, "nll_loss": 2.5429866313934326, "rewards/accuracies": 0.625, "rewards/chosen": -0.2514578700065613, "rewards/margins": 0.015871508046984673, "rewards/rejected": -0.2673293948173523, "step": 70 }, { "epoch": 0.1943874058863792, "grad_norm": 4.4549336433410645, "learning_rate": 9.027397260273972e-07, "log_odds_chosen": 0.4998761713504791, "log_odds_ratio": -0.6250263452529907, "logits/chosen": 0.08190062642097473, "logits/rejected": -0.009746000170707703, "logps/chosen": -2.206167459487915, "logps/rejected": -2.671086549758911, "loss": 2.6303, "nll_loss": 2.5678391456604004, "rewards/accuracies": 0.75, "rewards/chosen": -0.22061675786972046, "rewards/margins": 0.04649189114570618, "rewards/rejected": -0.26710861921310425, "step": 71 }, { "epoch": 0.1971252566735113, "grad_norm": 5.427595138549805, "learning_rate": 9.013698630136985e-07, "log_odds_chosen": 0.0076923668384552, "log_odds_ratio": -1.0395538806915283, "logits/chosen": -0.10673359036445618, "logits/rejected": -0.02635042741894722, "logps/chosen": -3.163586139678955, "logps/rejected": -3.163938522338867, "loss": 2.7645, "nll_loss": 2.660569906234741, "rewards/accuracies": 0.625, "rewards/chosen": -0.31635862588882446, "rewards/margins": 3.5256147384643555e-05, "rewards/rejected": -0.3163938522338867, "step": 72 }, { "epoch": 0.1998631074606434, "grad_norm": 5.098458766937256, "learning_rate": 9e-07, "log_odds_chosen": -0.3647969365119934, "log_odds_ratio": -1.0555599927902222, "logits/chosen": 0.028712008148431778, "logits/rejected": -0.0035294145345687866, "logps/chosen": -3.473923683166504, "logps/rejected": -3.0717504024505615, "loss": 2.7398, "nll_loss": 2.634279727935791, "rewards/accuracies": 0.5, "rewards/chosen": -0.34739238023757935, "rewards/margins": -0.040217310190200806, "rewards/rejected": -0.30717507004737854, "step": 73 }, { "epoch": 0.2026009582477755, "grad_norm": 4.565901279449463, "learning_rate": 8.986301369863013e-07, "log_odds_chosen": -0.1216525286436081, "log_odds_ratio": -0.8057986497879028, "logits/chosen": -0.10222484171390533, "logits/rejected": -0.13043251633644104, "logps/chosen": -2.686145782470703, "logps/rejected": -2.5653457641601562, "loss": 2.6357, "nll_loss": 2.555159330368042, "rewards/accuracies": 0.625, "rewards/chosen": -0.26861459016799927, "rewards/margins": -0.0120799969881773, "rewards/rejected": -0.2565345764160156, "step": 74 }, { "epoch": 0.2053388090349076, "grad_norm": 5.430127143859863, "learning_rate": 8.972602739726027e-07, "log_odds_chosen": -0.5529830455780029, "log_odds_ratio": -1.1986985206604004, "logits/chosen": 0.11078452318906784, "logits/rejected": 0.16691845655441284, "logps/chosen": -3.687809467315674, "logps/rejected": -3.1404600143432617, "loss": 2.9332, "nll_loss": 2.8132989406585693, "rewards/accuracies": 0.375, "rewards/chosen": -0.3687809705734253, "rewards/margins": -0.05473494529724121, "rewards/rejected": -0.3140460252761841, "step": 75 }, { "epoch": 0.2080766598220397, "grad_norm": 5.057312965393066, "learning_rate": 8.958904109589041e-07, "log_odds_chosen": -0.39810243248939514, "log_odds_ratio": -1.118593454360962, "logits/chosen": 0.04414316266775131, "logits/rejected": 0.02474937029182911, "logps/chosen": -3.535921573638916, "logps/rejected": -3.1252005100250244, "loss": 2.7736, "nll_loss": 2.6617865562438965, "rewards/accuracies": 0.375, "rewards/chosen": -0.3535921573638916, "rewards/margins": -0.04107213392853737, "rewards/rejected": -0.3125200569629669, "step": 76 }, { "epoch": 0.2108145106091718, "grad_norm": 5.508824825286865, "learning_rate": 8.945205479452055e-07, "log_odds_chosen": -0.32327917218208313, "log_odds_ratio": -1.0193800926208496, "logits/chosen": 0.06491488218307495, "logits/rejected": 0.11034958064556122, "logps/chosen": -3.588590621948242, "logps/rejected": -3.2554008960723877, "loss": 2.8478, "nll_loss": 2.745824098587036, "rewards/accuracies": 0.375, "rewards/chosen": -0.3588590919971466, "rewards/margins": -0.033318981528282166, "rewards/rejected": -0.32554009556770325, "step": 77 }, { "epoch": 0.2135523613963039, "grad_norm": 4.958682060241699, "learning_rate": 8.931506849315068e-07, "log_odds_chosen": 0.23038266599178314, "log_odds_ratio": -0.7035965323448181, "logits/chosen": 0.134477898478508, "logits/rejected": 0.08116934448480606, "logps/chosen": -2.5955047607421875, "logps/rejected": -2.818180561065674, "loss": 2.6943, "nll_loss": 2.62394380569458, "rewards/accuracies": 0.625, "rewards/chosen": -0.2595504820346832, "rewards/margins": 0.022267593070864677, "rewards/rejected": -0.28181809186935425, "step": 78 }, { "epoch": 0.216290212183436, "grad_norm": 5.462538242340088, "learning_rate": 8.917808219178081e-07, "log_odds_chosen": -0.3372669816017151, "log_odds_ratio": -0.9966406226158142, "logits/chosen": -0.06603681296110153, "logits/rejected": 0.019096991047263145, "logps/chosen": -3.773010492324829, "logps/rejected": -3.431824207305908, "loss": 2.83, "nll_loss": 2.73036527633667, "rewards/accuracies": 0.375, "rewards/chosen": -0.37730106711387634, "rewards/margins": -0.0341186448931694, "rewards/rejected": -0.34318244457244873, "step": 79 }, { "epoch": 0.2190280629705681, "grad_norm": 5.621030807495117, "learning_rate": 8.904109589041095e-07, "log_odds_chosen": -0.995664119720459, "log_odds_ratio": -1.5889744758605957, "logits/chosen": -0.04355470463633537, "logits/rejected": 0.025290735065937042, "logps/chosen": -3.8529562950134277, "logps/rejected": -2.8639047145843506, "loss": 2.8712, "nll_loss": 2.712272882461548, "rewards/accuracies": 0.25, "rewards/chosen": -0.3852955996990204, "rewards/margins": -0.09890513122081757, "rewards/rejected": -0.286390483379364, "step": 80 }, { "epoch": 0.22176591375770022, "grad_norm": 5.240285873413086, "learning_rate": 8.890410958904109e-07, "log_odds_chosen": 0.160344660282135, "log_odds_ratio": -0.8395337462425232, "logits/chosen": -0.11772207915782928, "logits/rejected": -0.009794194251298904, "logps/chosen": -2.962883472442627, "logps/rejected": -3.127861976623535, "loss": 2.8275, "nll_loss": 2.743520736694336, "rewards/accuracies": 0.375, "rewards/chosen": -0.2962883412837982, "rewards/margins": 0.016497863456606865, "rewards/rejected": -0.3127862215042114, "step": 81 }, { "epoch": 0.2245037645448323, "grad_norm": 5.536981582641602, "learning_rate": 8.876712328767123e-07, "log_odds_chosen": -1.0215141773223877, "log_odds_ratio": -1.9759223461151123, "logits/chosen": 0.039745084941387177, "logits/rejected": -0.011454209685325623, "logps/chosen": -4.487369537353516, "logps/rejected": -3.4529685974121094, "loss": 2.9874, "nll_loss": 2.7897744178771973, "rewards/accuracies": 0.625, "rewards/chosen": -0.4487369656562805, "rewards/margins": -0.10344010591506958, "rewards/rejected": -0.34529685974121094, "step": 82 }, { "epoch": 0.2272416153319644, "grad_norm": 5.34192419052124, "learning_rate": 8.863013698630136e-07, "log_odds_chosen": -0.6272554397583008, "log_odds_ratio": -1.3076993227005005, "logits/chosen": 0.07117203623056412, "logits/rejected": 0.10196933150291443, "logps/chosen": -3.383453369140625, "logps/rejected": -2.7360336780548096, "loss": 2.8182, "nll_loss": 2.6874539852142334, "rewards/accuracies": 0.5, "rewards/chosen": -0.33834534883499146, "rewards/margins": -0.06474198400974274, "rewards/rejected": -0.2736033797264099, "step": 83 }, { "epoch": 0.2299794661190965, "grad_norm": 5.183108806610107, "learning_rate": 8.849315068493151e-07, "log_odds_chosen": -0.03445900231599808, "log_odds_ratio": -0.7749941349029541, "logits/chosen": 0.12075208127498627, "logits/rejected": 0.17413528263568878, "logps/chosen": -2.833005666732788, "logps/rejected": -2.80629301071167, "loss": 2.7424, "nll_loss": 2.6649179458618164, "rewards/accuracies": 0.625, "rewards/chosen": -0.28330057859420776, "rewards/margins": -0.002671279013156891, "rewards/rejected": -0.28062930703163147, "step": 84 }, { "epoch": 0.2327173169062286, "grad_norm": 4.641160011291504, "learning_rate": 8.835616438356164e-07, "log_odds_chosen": 0.005456328392028809, "log_odds_ratio": -0.7938748598098755, "logits/chosen": -0.05543015897274017, "logits/rejected": 0.012453850358724594, "logps/chosen": -2.530592203140259, "logps/rejected": -2.523275136947632, "loss": 2.6769, "nll_loss": 2.5975334644317627, "rewards/accuracies": 0.625, "rewards/chosen": -0.2530592083930969, "rewards/margins": -0.0007316954433917999, "rewards/rejected": -0.2523275315761566, "step": 85 }, { "epoch": 0.23545516769336072, "grad_norm": 5.416834354400635, "learning_rate": 8.821917808219178e-07, "log_odds_chosen": -0.7474377751350403, "log_odds_ratio": -1.5944329500198364, "logits/chosen": 0.08188977837562561, "logits/rejected": 0.11497823148965836, "logps/chosen": -3.5699493885040283, "logps/rejected": -2.772251605987549, "loss": 2.7912, "nll_loss": 2.6317179203033447, "rewards/accuracies": 0.625, "rewards/chosen": -0.35699495673179626, "rewards/margins": -0.0797697901725769, "rewards/rejected": -0.27722519636154175, "step": 86 }, { "epoch": 0.23819301848049282, "grad_norm": 5.383276462554932, "learning_rate": 8.808219178082191e-07, "log_odds_chosen": -0.4177709221839905, "log_odds_ratio": -1.003016710281372, "logits/chosen": 0.11962847411632538, "logits/rejected": 0.15621821582317352, "logps/chosen": -3.0079965591430664, "logps/rejected": -2.601195812225342, "loss": 2.7393, "nll_loss": 2.6389691829681396, "rewards/accuracies": 0.375, "rewards/chosen": -0.3007996678352356, "rewards/margins": -0.040680065751075745, "rewards/rejected": -0.26011958718299866, "step": 87 }, { "epoch": 0.24093086926762491, "grad_norm": 5.805488586425781, "learning_rate": 8.794520547945205e-07, "log_odds_chosen": 0.15172141790390015, "log_odds_ratio": -1.055099606513977, "logits/chosen": 0.010634312406182289, "logits/rejected": 0.09639895707368851, "logps/chosen": -3.380492925643921, "logps/rejected": -3.5242254734039307, "loss": 2.8418, "nll_loss": 2.7363362312316895, "rewards/accuracies": 0.625, "rewards/chosen": -0.3380492925643921, "rewards/margins": 0.01437327265739441, "rewards/rejected": -0.3524225652217865, "step": 88 }, { "epoch": 0.243668720054757, "grad_norm": 5.332287788391113, "learning_rate": 8.780821917808219e-07, "log_odds_chosen": 0.906158983707428, "log_odds_ratio": -0.865318775177002, "logits/chosen": 0.06712628901004791, "logits/rejected": 0.002991609275341034, "logps/chosen": -3.0221452713012695, "logps/rejected": -3.8754043579101562, "loss": 2.6775, "nll_loss": 2.591000556945801, "rewards/accuracies": 0.75, "rewards/chosen": -0.30221453309059143, "rewards/margins": 0.08532591164112091, "rewards/rejected": -0.38754042983055115, "step": 89 }, { "epoch": 0.2464065708418891, "grad_norm": 5.552064418792725, "learning_rate": 8.767123287671232e-07, "log_odds_chosen": -0.48719701170921326, "log_odds_ratio": -1.0688904523849487, "logits/chosen": 0.05244023725390434, "logits/rejected": 0.06628427654504776, "logps/chosen": -3.5055127143859863, "logps/rejected": -3.010310173034668, "loss": 2.7952, "nll_loss": 2.6883535385131836, "rewards/accuracies": 0.25, "rewards/chosen": -0.3505512773990631, "rewards/margins": -0.049520257860422134, "rewards/rejected": -0.3010310232639313, "step": 90 }, { "epoch": 0.24914442162902123, "grad_norm": 5.111067295074463, "learning_rate": 8.753424657534246e-07, "log_odds_chosen": 0.03639820218086243, "log_odds_ratio": -0.7950779795646667, "logits/chosen": -0.015696246176958084, "logits/rejected": -0.04619509354233742, "logps/chosen": -2.6451220512390137, "logps/rejected": -2.6811509132385254, "loss": 2.724, "nll_loss": 2.6444740295410156, "rewards/accuracies": 0.375, "rewards/chosen": -0.26451224088668823, "rewards/margins": 0.0036028623580932617, "rewards/rejected": -0.2681150734424591, "step": 91 }, { "epoch": 0.2518822724161533, "grad_norm": 5.241069793701172, "learning_rate": 8.73972602739726e-07, "log_odds_chosen": 0.06199149042367935, "log_odds_ratio": -0.80806565284729, "logits/chosen": 0.23409900069236755, "logits/rejected": 0.2555784285068512, "logps/chosen": -3.132072687149048, "logps/rejected": -3.204441785812378, "loss": 2.7087, "nll_loss": 2.627885341644287, "rewards/accuracies": 0.625, "rewards/chosen": -0.3132072687149048, "rewards/margins": 0.007236909121274948, "rewards/rejected": -0.32044416666030884, "step": 92 }, { "epoch": 0.2546201232032854, "grad_norm": 5.414149284362793, "learning_rate": 8.726027397260274e-07, "log_odds_chosen": -0.5448219180107117, "log_odds_ratio": -1.0877211093902588, "logits/chosen": -0.10222721099853516, "logits/rejected": 0.09992042183876038, "logps/chosen": -2.68206787109375, "logps/rejected": -2.1586380004882812, "loss": 2.853, "nll_loss": 2.7442119121551514, "rewards/accuracies": 0.375, "rewards/chosen": -0.26820677518844604, "rewards/margins": -0.052342966198921204, "rewards/rejected": -0.21586382389068604, "step": 93 }, { "epoch": 0.25735797399041754, "grad_norm": 4.905978679656982, "learning_rate": 8.712328767123287e-07, "log_odds_chosen": 0.08377820998430252, "log_odds_ratio": -0.7297275066375732, "logits/chosen": 0.031086016446352005, "logits/rejected": 0.036139778792858124, "logps/chosen": -3.015242099761963, "logps/rejected": -3.082963466644287, "loss": 2.7212, "nll_loss": 2.648233652114868, "rewards/accuracies": 0.625, "rewards/chosen": -0.30152422189712524, "rewards/margins": 0.006772145628929138, "rewards/rejected": -0.3082963526248932, "step": 94 }, { "epoch": 0.2600958247775496, "grad_norm": 5.912672519683838, "learning_rate": 8.698630136986301e-07, "log_odds_chosen": -0.8350614309310913, "log_odds_ratio": -1.3994274139404297, "logits/chosen": 0.028327755630016327, "logits/rejected": 0.07602806389331818, "logps/chosen": -4.222169876098633, "logps/rejected": -3.412238121032715, "loss": 2.8948, "nll_loss": 2.7548818588256836, "rewards/accuracies": 0.25, "rewards/chosen": -0.4222170412540436, "rewards/margins": -0.08099324256181717, "rewards/rejected": -0.341223806142807, "step": 95 }, { "epoch": 0.26283367556468173, "grad_norm": 4.6590142250061035, "learning_rate": 8.684931506849314e-07, "log_odds_chosen": 0.17314226925373077, "log_odds_ratio": -0.6444829702377319, "logits/chosen": 0.07070562988519669, "logits/rejected": 0.01486390084028244, "logps/chosen": -2.444746732711792, "logps/rejected": -2.5737931728363037, "loss": 2.6436, "nll_loss": 2.5791168212890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.24447467923164368, "rewards/margins": 0.012904642149806023, "rewards/rejected": -0.25737932324409485, "step": 96 }, { "epoch": 0.2655715263518138, "grad_norm": 4.773935794830322, "learning_rate": 8.671232876712329e-07, "log_odds_chosen": -0.08470071107149124, "log_odds_ratio": -0.7818529605865479, "logits/chosen": -0.16878731548786163, "logits/rejected": -0.15945443511009216, "logps/chosen": -3.5998969078063965, "logps/rejected": -3.5072052478790283, "loss": 2.5804, "nll_loss": 2.5021817684173584, "rewards/accuracies": 0.5, "rewards/chosen": -0.3599897027015686, "rewards/margins": -0.009269164875149727, "rewards/rejected": -0.35072052478790283, "step": 97 }, { "epoch": 0.2683093771389459, "grad_norm": 5.117630481719971, "learning_rate": 8.657534246575342e-07, "log_odds_chosen": -0.7961372137069702, "log_odds_ratio": -1.3029788732528687, "logits/chosen": -0.03958554565906525, "logits/rejected": -0.038987353444099426, "logps/chosen": -3.5037198066711426, "logps/rejected": -2.7201778888702393, "loss": 2.75, "nll_loss": 2.619654655456543, "rewards/accuracies": 0.25, "rewards/chosen": -0.35037195682525635, "rewards/margins": -0.07835415750741959, "rewards/rejected": -0.27201780676841736, "step": 98 }, { "epoch": 0.27104722792607805, "grad_norm": 5.135016918182373, "learning_rate": 8.643835616438355e-07, "log_odds_chosen": 0.10121018439531326, "log_odds_ratio": -0.8994015455245972, "logits/chosen": -0.04489586874842644, "logits/rejected": -0.05412030220031738, "logps/chosen": -3.267733573913574, "logps/rejected": -3.327514171600342, "loss": 2.7088, "nll_loss": 2.6188712120056152, "rewards/accuracies": 0.75, "rewards/chosen": -0.32677334547042847, "rewards/margins": 0.0059780701994895935, "rewards/rejected": -0.33275142312049866, "step": 99 }, { "epoch": 0.2737850787132101, "grad_norm": 5.345401763916016, "learning_rate": 8.63013698630137e-07, "log_odds_chosen": -0.5142669081687927, "log_odds_ratio": -1.1056458950042725, "logits/chosen": -0.05225278437137604, "logits/rejected": -0.06310966610908508, "logps/chosen": -3.2981748580932617, "logps/rejected": -2.7804999351501465, "loss": 2.715, "nll_loss": 2.6044445037841797, "rewards/accuracies": 0.375, "rewards/chosen": -0.329817533493042, "rewards/margins": -0.051767513155937195, "rewards/rejected": -0.2780500054359436, "step": 100 }, { "epoch": 0.27652292950034224, "grad_norm": 4.611669540405273, "learning_rate": 8.616438356164383e-07, "log_odds_chosen": 0.7641834616661072, "log_odds_ratio": -0.7200108170509338, "logits/chosen": 0.052901484072208405, "logits/rejected": -0.0651407465338707, "logps/chosen": -2.4582314491271973, "logps/rejected": -3.1833372116088867, "loss": 2.6819, "nll_loss": 2.609849691390991, "rewards/accuracies": 0.625, "rewards/chosen": -0.24582314491271973, "rewards/margins": 0.07251057773828506, "rewards/rejected": -0.3183337152004242, "step": 101 }, { "epoch": 0.2792607802874743, "grad_norm": 5.063290596008301, "learning_rate": 8.602739726027397e-07, "log_odds_chosen": -0.21819449961185455, "log_odds_ratio": -1.1872097253799438, "logits/chosen": -0.08451250940561295, "logits/rejected": -0.057115621864795685, "logps/chosen": -3.719264507293701, "logps/rejected": -3.505208969116211, "loss": 2.739, "nll_loss": 2.6202876567840576, "rewards/accuracies": 0.5, "rewards/chosen": -0.371926486492157, "rewards/margins": -0.021405545994639397, "rewards/rejected": -0.35052090883255005, "step": 102 }, { "epoch": 0.28199863107460643, "grad_norm": 4.5849528312683105, "learning_rate": 8.58904109589041e-07, "log_odds_chosen": 0.1459561437368393, "log_odds_ratio": -0.7679399251937866, "logits/chosen": -0.08841703087091446, "logits/rejected": -0.15351717174053192, "logps/chosen": -2.5281739234924316, "logps/rejected": -2.671278953552246, "loss": 2.6387, "nll_loss": 2.561890125274658, "rewards/accuracies": 0.5, "rewards/chosen": -0.25281742215156555, "rewards/margins": 0.014310481026768684, "rewards/rejected": -0.2671278715133667, "step": 103 }, { "epoch": 0.28473648186173856, "grad_norm": 5.076570510864258, "learning_rate": 8.575342465753424e-07, "log_odds_chosen": -0.5450716614723206, "log_odds_ratio": -1.2857344150543213, "logits/chosen": 0.08676767349243164, "logits/rejected": 0.04222572594881058, "logps/chosen": -3.7181591987609863, "logps/rejected": -3.136157751083374, "loss": 2.6867, "nll_loss": 2.5581469535827637, "rewards/accuracies": 0.5, "rewards/chosen": -0.371815949678421, "rewards/margins": -0.0582001730799675, "rewards/rejected": -0.3136157989501953, "step": 104 }, { "epoch": 0.2874743326488706, "grad_norm": 4.5968017578125, "learning_rate": 8.561643835616438e-07, "log_odds_chosen": -0.18065893650054932, "log_odds_ratio": -0.9697253704071045, "logits/chosen": 0.1953902244567871, "logits/rejected": 0.07095462828874588, "logps/chosen": -2.766700506210327, "logps/rejected": -2.5636508464813232, "loss": 2.5796, "nll_loss": 2.482670307159424, "rewards/accuracies": 0.5, "rewards/chosen": -0.27667006850242615, "rewards/margins": -0.020304983481764793, "rewards/rejected": -0.2563650906085968, "step": 105 }, { "epoch": 0.29021218343600275, "grad_norm": 5.682470321655273, "learning_rate": 8.547945205479452e-07, "log_odds_chosen": -0.6108859777450562, "log_odds_ratio": -1.1625951528549194, "logits/chosen": 0.09651096165180206, "logits/rejected": 0.13033248484134674, "logps/chosen": -3.8438382148742676, "logps/rejected": -3.233224391937256, "loss": 2.8132, "nll_loss": 2.6969075202941895, "rewards/accuracies": 0.375, "rewards/chosen": -0.38438382744789124, "rewards/margins": -0.061061419546604156, "rewards/rejected": -0.3233224153518677, "step": 106 }, { "epoch": 0.2929500342231348, "grad_norm": 4.4542694091796875, "learning_rate": 8.534246575342465e-07, "log_odds_chosen": -0.46444785594940186, "log_odds_ratio": -1.0643482208251953, "logits/chosen": -0.09616702049970627, "logits/rejected": -0.10365986824035645, "logps/chosen": -2.5969672203063965, "logps/rejected": -2.121328830718994, "loss": 2.529, "nll_loss": 2.4225549697875977, "rewards/accuracies": 0.5, "rewards/chosen": -0.25969672203063965, "rewards/margins": -0.0475638322532177, "rewards/rejected": -0.21213288605213165, "step": 107 }, { "epoch": 0.29568788501026694, "grad_norm": 5.713870525360107, "learning_rate": 8.52054794520548e-07, "log_odds_chosen": -0.44408291578292847, "log_odds_ratio": -1.0953612327575684, "logits/chosen": 0.1173749566078186, "logits/rejected": 0.18929004669189453, "logps/chosen": -3.751938581466675, "logps/rejected": -3.305945873260498, "loss": 2.764, "nll_loss": 2.654416084289551, "rewards/accuracies": 0.5, "rewards/chosen": -0.37519386410713196, "rewards/margins": -0.0445992648601532, "rewards/rejected": -0.33059459924697876, "step": 108 }, { "epoch": 0.29842573579739906, "grad_norm": 4.649056434631348, "learning_rate": 8.506849315068493e-07, "log_odds_chosen": 0.03832213580608368, "log_odds_ratio": -0.8033921122550964, "logits/chosen": -0.03193608671426773, "logits/rejected": -0.016600247472524643, "logps/chosen": -2.5145537853240967, "logps/rejected": -2.5330116748809814, "loss": 2.656, "nll_loss": 2.5756263732910156, "rewards/accuracies": 0.625, "rewards/chosen": -0.2514553666114807, "rewards/margins": 0.00184578076004982, "rewards/rejected": -0.2533011734485626, "step": 109 }, { "epoch": 0.30116358658453113, "grad_norm": 5.52592134475708, "learning_rate": 8.493150684931506e-07, "log_odds_chosen": -0.030976902693510056, "log_odds_ratio": -0.8477308750152588, "logits/chosen": -0.039980657398700714, "logits/rejected": 0.05584407597780228, "logps/chosen": -3.52891206741333, "logps/rejected": -3.498065948486328, "loss": 2.7123, "nll_loss": 2.627558708190918, "rewards/accuracies": 0.5, "rewards/chosen": -0.352891206741333, "rewards/margins": -0.003084583207964897, "rewards/rejected": -0.34980660676956177, "step": 110 }, { "epoch": 0.30390143737166325, "grad_norm": 4.404420852661133, "learning_rate": 8.47945205479452e-07, "log_odds_chosen": 0.13461825251579285, "log_odds_ratio": -0.649572491645813, "logits/chosen": -0.04981840401887894, "logits/rejected": -0.13366931676864624, "logps/chosen": -2.631746292114258, "logps/rejected": -2.7443199157714844, "loss": 2.5668, "nll_loss": 2.501880645751953, "rewards/accuracies": 0.5, "rewards/chosen": -0.2631746530532837, "rewards/margins": 0.011257361620664597, "rewards/rejected": -0.2744320034980774, "step": 111 }, { "epoch": 0.3066392881587953, "grad_norm": 5.516549110412598, "learning_rate": 8.465753424657533e-07, "log_odds_chosen": -1.1331403255462646, "log_odds_ratio": -1.6765213012695312, "logits/chosen": 0.05844881385564804, "logits/rejected": 0.11941379308700562, "logps/chosen": -4.220817565917969, "logps/rejected": -3.0974698066711426, "loss": 2.8575, "nll_loss": 2.6898696422576904, "rewards/accuracies": 0.5, "rewards/chosen": -0.42208170890808105, "rewards/margins": -0.11233475804328918, "rewards/rejected": -0.30974698066711426, "step": 112 }, { "epoch": 0.30937713894592744, "grad_norm": 4.348398685455322, "learning_rate": 8.452054794520548e-07, "log_odds_chosen": 0.8053466081619263, "log_odds_ratio": -0.6897112727165222, "logits/chosen": -0.09470576047897339, "logits/rejected": -0.21735456585884094, "logps/chosen": -2.4283194541931152, "logps/rejected": -3.211439609527588, "loss": 2.5478, "nll_loss": 2.4788074493408203, "rewards/accuracies": 0.625, "rewards/chosen": -0.24283196032047272, "rewards/margins": 0.0783119946718216, "rewards/rejected": -0.3211439847946167, "step": 113 }, { "epoch": 0.31211498973305957, "grad_norm": 5.613483905792236, "learning_rate": 8.438356164383561e-07, "log_odds_chosen": -0.5904162526130676, "log_odds_ratio": -1.1531586647033691, "logits/chosen": 0.055997200310230255, "logits/rejected": 0.06349929422140121, "logps/chosen": -3.7193055152893066, "logps/rejected": -3.122709274291992, "loss": 2.7727, "nll_loss": 2.657397747039795, "rewards/accuracies": 0.375, "rewards/chosen": -0.3719305694103241, "rewards/margins": -0.05965964496135712, "rewards/rejected": -0.3122709095478058, "step": 114 }, { "epoch": 0.31485284052019163, "grad_norm": 5.987019062042236, "learning_rate": 8.424657534246576e-07, "log_odds_chosen": -1.484458565711975, "log_odds_ratio": -1.8952608108520508, "logits/chosen": 0.07551038265228271, "logits/rejected": 0.20748886466026306, "logps/chosen": -4.595029830932617, "logps/rejected": -3.1478004455566406, "loss": 2.947, "nll_loss": 2.7574405670166016, "rewards/accuracies": 0.125, "rewards/chosen": -0.45950302481651306, "rewards/margins": -0.14472293853759766, "rewards/rejected": -0.314780056476593, "step": 115 }, { "epoch": 0.31759069130732376, "grad_norm": 5.192722320556641, "learning_rate": 8.410958904109589e-07, "log_odds_chosen": 0.15878228843212128, "log_odds_ratio": -0.7366304993629456, "logits/chosen": 0.10267484933137894, "logits/rejected": 0.10423342883586884, "logps/chosen": -2.6503379344940186, "logps/rejected": -2.7962679862976074, "loss": 2.7036, "nll_loss": 2.6299173831939697, "rewards/accuracies": 0.5, "rewards/chosen": -0.2650338113307953, "rewards/margins": 0.01459299772977829, "rewards/rejected": -0.2796267867088318, "step": 116 }, { "epoch": 0.3203285420944558, "grad_norm": 4.743743896484375, "learning_rate": 8.397260273972603e-07, "log_odds_chosen": 0.05917172133922577, "log_odds_ratio": -0.7534778118133545, "logits/chosen": 0.07976137101650238, "logits/rejected": 0.01851552724838257, "logps/chosen": -2.9227795600891113, "logps/rejected": -2.970993757247925, "loss": 2.6875, "nll_loss": 2.612126350402832, "rewards/accuracies": 0.5, "rewards/chosen": -0.292277991771698, "rewards/margins": 0.004821397364139557, "rewards/rejected": -0.29709938168525696, "step": 117 }, { "epoch": 0.32306639288158795, "grad_norm": 5.511942386627197, "learning_rate": 8.383561643835616e-07, "log_odds_chosen": -0.4375941753387451, "log_odds_ratio": -1.0551364421844482, "logits/chosen": 0.02794283628463745, "logits/rejected": 0.1068202406167984, "logps/chosen": -3.9804835319519043, "logps/rejected": -3.5415894985198975, "loss": 2.7642, "nll_loss": 2.658712387084961, "rewards/accuracies": 0.5, "rewards/chosen": -0.3980483412742615, "rewards/margins": -0.04388939216732979, "rewards/rejected": -0.3541589677333832, "step": 118 }, { "epoch": 0.3258042436687201, "grad_norm": 5.530320644378662, "learning_rate": 8.369863013698629e-07, "log_odds_chosen": -0.3271387219429016, "log_odds_ratio": -1.1650317907333374, "logits/chosen": 0.09769092500209808, "logits/rejected": 0.08835098147392273, "logps/chosen": -3.6588406562805176, "logps/rejected": -3.3258042335510254, "loss": 2.7744, "nll_loss": 2.657860279083252, "rewards/accuracies": 0.5, "rewards/chosen": -0.36588406562805176, "rewards/margins": -0.033303648233413696, "rewards/rejected": -0.33258044719696045, "step": 119 }, { "epoch": 0.32854209445585214, "grad_norm": 5.221133232116699, "learning_rate": 8.356164383561643e-07, "log_odds_chosen": -1.0695018768310547, "log_odds_ratio": -1.556466817855835, "logits/chosen": -0.12404382973909378, "logits/rejected": -0.10268328338861465, "logps/chosen": -3.9094130992889404, "logps/rejected": -2.851447582244873, "loss": 2.7996, "nll_loss": 2.6439225673675537, "rewards/accuracies": 0.375, "rewards/chosen": -0.390941321849823, "rewards/margins": -0.10579656809568405, "rewards/rejected": -0.28514474630355835, "step": 120 }, { "epoch": 0.33127994524298426, "grad_norm": 4.4994893074035645, "learning_rate": 8.342465753424657e-07, "log_odds_chosen": -0.1419440060853958, "log_odds_ratio": -0.8645087480545044, "logits/chosen": -0.0799705982208252, "logits/rejected": -0.05594753473997116, "logps/chosen": -2.972623348236084, "logps/rejected": -2.8249316215515137, "loss": 2.6229, "nll_loss": 2.536421537399292, "rewards/accuracies": 0.625, "rewards/chosen": -0.2972623109817505, "rewards/margins": -0.014769146218895912, "rewards/rejected": -0.2824931740760803, "step": 121 }, { "epoch": 0.33401779603011633, "grad_norm": 5.7806806564331055, "learning_rate": 8.328767123287671e-07, "log_odds_chosen": -0.2480340600013733, "log_odds_ratio": -1.2135717868804932, "logits/chosen": 0.07092062383890152, "logits/rejected": 0.12589797377586365, "logps/chosen": -3.732267141342163, "logps/rejected": -3.518439531326294, "loss": 2.8206, "nll_loss": 2.6992225646972656, "rewards/accuracies": 0.125, "rewards/chosen": -0.37322673201560974, "rewards/margins": -0.021382786333560944, "rewards/rejected": -0.3518439531326294, "step": 122 }, { "epoch": 0.33675564681724846, "grad_norm": 4.906563758850098, "learning_rate": 8.315068493150684e-07, "log_odds_chosen": -0.4503452479839325, "log_odds_ratio": -1.1866440773010254, "logits/chosen": 0.1408560872077942, "logits/rejected": 0.1792748123407364, "logps/chosen": -3.630963087081909, "logps/rejected": -3.1465396881103516, "loss": 2.7194, "nll_loss": 2.600691795349121, "rewards/accuracies": 0.5, "rewards/chosen": -0.36309629678726196, "rewards/margins": -0.04844232276082039, "rewards/rejected": -0.31465399265289307, "step": 123 }, { "epoch": 0.3394934976043806, "grad_norm": 4.547911643981934, "learning_rate": 8.301369863013699e-07, "log_odds_chosen": -0.16464996337890625, "log_odds_ratio": -0.9852699041366577, "logits/chosen": 0.06894254684448242, "logits/rejected": -0.0006795674562454224, "logps/chosen": -3.026521921157837, "logps/rejected": -2.8518526554107666, "loss": 2.7263, "nll_loss": 2.627774477005005, "rewards/accuracies": 0.375, "rewards/chosen": -0.30265218019485474, "rewards/margins": -0.017466923221945763, "rewards/rejected": -0.2851852774620056, "step": 124 }, { "epoch": 0.34223134839151265, "grad_norm": 6.79627799987793, "learning_rate": 8.287671232876712e-07, "log_odds_chosen": -1.3960654735565186, "log_odds_ratio": -1.8424336910247803, "logits/chosen": 0.12886382639408112, "logits/rejected": 0.2655690908432007, "logps/chosen": -4.9076738357543945, "logps/rejected": -3.53265643119812, "loss": 3.0133, "nll_loss": 2.829030990600586, "rewards/accuracies": 0.25, "rewards/chosen": -0.49076738953590393, "rewards/margins": -0.13750171661376953, "rewards/rejected": -0.3532656729221344, "step": 125 }, { "epoch": 0.34496919917864477, "grad_norm": 5.838741779327393, "learning_rate": 8.273972602739726e-07, "log_odds_chosen": -0.7481288313865662, "log_odds_ratio": -1.5164552927017212, "logits/chosen": 0.17773666977882385, "logits/rejected": 0.19780272245407104, "logps/chosen": -4.60248327255249, "logps/rejected": -3.870394706726074, "loss": 2.8459, "nll_loss": 2.694265365600586, "rewards/accuracies": 0.25, "rewards/chosen": -0.46024832129478455, "rewards/margins": -0.07320884615182877, "rewards/rejected": -0.3870394825935364, "step": 126 }, { "epoch": 0.34770704996577684, "grad_norm": 5.745665550231934, "learning_rate": 8.260273972602739e-07, "log_odds_chosen": -0.7968280911445618, "log_odds_ratio": -1.4300434589385986, "logits/chosen": 0.051290158182382584, "logits/rejected": 0.13242006301879883, "logps/chosen": -4.335784435272217, "logps/rejected": -3.5359253883361816, "loss": 2.8523, "nll_loss": 2.709270477294922, "rewards/accuracies": 0.25, "rewards/chosen": -0.4335784912109375, "rewards/margins": -0.07998590916395187, "rewards/rejected": -0.35359254479408264, "step": 127 }, { "epoch": 0.35044490075290896, "grad_norm": 5.355977535247803, "learning_rate": 8.246575342465753e-07, "log_odds_chosen": -0.892553985118866, "log_odds_ratio": -1.294431447982788, "logits/chosen": -0.07482755184173584, "logits/rejected": 0.027580875903367996, "logps/chosen": -3.456449270248413, "logps/rejected": -2.6238536834716797, "loss": 2.7659, "nll_loss": 2.6364381313323975, "rewards/accuracies": 0.125, "rewards/chosen": -0.3456449508666992, "rewards/margins": -0.08325955271720886, "rewards/rejected": -0.26238536834716797, "step": 128 }, { "epoch": 0.3531827515400411, "grad_norm": 5.089910507202148, "learning_rate": 8.232876712328767e-07, "log_odds_chosen": -0.43336546421051025, "log_odds_ratio": -1.0339585542678833, "logits/chosen": -0.065255306661129, "logits/rejected": -0.06154555082321167, "logps/chosen": -3.040546417236328, "logps/rejected": -2.61319637298584, "loss": 2.7053, "nll_loss": 2.6019351482391357, "rewards/accuracies": 0.5, "rewards/chosen": -0.3040546476840973, "rewards/margins": -0.042734988033771515, "rewards/rejected": -0.261319637298584, "step": 129 }, { "epoch": 0.35592060232717315, "grad_norm": 4.921382427215576, "learning_rate": 8.21917808219178e-07, "log_odds_chosen": -0.1120958924293518, "log_odds_ratio": -1.093326449394226, "logits/chosen": -0.11236831545829773, "logits/rejected": -0.10197293013334274, "logps/chosen": -3.2274575233459473, "logps/rejected": -3.074105739593506, "loss": 2.6461, "nll_loss": 2.5367417335510254, "rewards/accuracies": 0.625, "rewards/chosen": -0.32274574041366577, "rewards/margins": -0.01533520594239235, "rewards/rejected": -0.3074105381965637, "step": 130 }, { "epoch": 0.3586584531143053, "grad_norm": 5.340031623840332, "learning_rate": 8.205479452054795e-07, "log_odds_chosen": -0.7879136800765991, "log_odds_ratio": -1.2712788581848145, "logits/chosen": -0.006152201443910599, "logits/rejected": 0.08079586923122406, "logps/chosen": -3.537541151046753, "logps/rejected": -2.7657127380371094, "loss": 2.8498, "nll_loss": 2.72265625, "rewards/accuracies": 0.25, "rewards/chosen": -0.3537541627883911, "rewards/margins": -0.0771828442811966, "rewards/rejected": -0.2765713036060333, "step": 131 }, { "epoch": 0.3613963039014374, "grad_norm": 4.1563239097595215, "learning_rate": 8.191780821917808e-07, "log_odds_chosen": 0.5028682947158813, "log_odds_ratio": -0.9190099835395813, "logits/chosen": 0.17367449402809143, "logits/rejected": 0.0772772878408432, "logps/chosen": -2.5822296142578125, "logps/rejected": -3.021998643875122, "loss": 2.4946, "nll_loss": 2.402658462524414, "rewards/accuracies": 0.5, "rewards/chosen": -0.2582229673862457, "rewards/margins": 0.04397690296173096, "rewards/rejected": -0.3021998703479767, "step": 132 }, { "epoch": 0.36413415468856947, "grad_norm": 4.912601947784424, "learning_rate": 8.178082191780822e-07, "log_odds_chosen": 0.07808439433574677, "log_odds_ratio": -1.0035173892974854, "logits/chosen": 0.13134883344173431, "logits/rejected": 0.04939732700586319, "logps/chosen": -2.9957351684570312, "logps/rejected": -3.105175256729126, "loss": 2.7592, "nll_loss": 2.658818483352661, "rewards/accuracies": 0.375, "rewards/chosen": -0.29957354068756104, "rewards/margins": 0.010943982750177383, "rewards/rejected": -0.31051748991012573, "step": 133 }, { "epoch": 0.3668720054757016, "grad_norm": 6.35336971282959, "learning_rate": 8.164383561643835e-07, "log_odds_chosen": -1.4732717275619507, "log_odds_ratio": -1.979213833808899, "logits/chosen": 0.18146081268787384, "logits/rejected": 0.23853205144405365, "logps/chosen": -4.583284854888916, "logps/rejected": -3.1476097106933594, "loss": 3.0203, "nll_loss": 2.8223843574523926, "rewards/accuracies": 0.25, "rewards/chosen": -0.458328515291214, "rewards/margins": -0.1435675323009491, "rewards/rejected": -0.3147609829902649, "step": 134 }, { "epoch": 0.36960985626283366, "grad_norm": 4.528878211975098, "learning_rate": 8.150684931506849e-07, "log_odds_chosen": 0.30220621824264526, "log_odds_ratio": -0.5676321387290955, "logits/chosen": 0.1190810278058052, "logits/rejected": 0.027241915464401245, "logps/chosen": -2.4064064025878906, "logps/rejected": -2.6852381229400635, "loss": 2.6143, "nll_loss": 2.5575852394104004, "rewards/accuracies": 0.75, "rewards/chosen": -0.24064064025878906, "rewards/margins": 0.027883166447281837, "rewards/rejected": -0.26852381229400635, "step": 135 }, { "epoch": 0.3723477070499658, "grad_norm": 4.908228874206543, "learning_rate": 8.136986301369862e-07, "log_odds_chosen": 0.30751633644104004, "log_odds_ratio": -0.6178356409072876, "logits/chosen": 0.026802334934473038, "logits/rejected": 0.0029505454003810883, "logps/chosen": -3.1158447265625, "logps/rejected": -3.3844704627990723, "loss": 2.5694, "nll_loss": 2.5076217651367188, "rewards/accuracies": 0.75, "rewards/chosen": -0.31158447265625, "rewards/margins": 0.026862578466534615, "rewards/rejected": -0.33844709396362305, "step": 136 }, { "epoch": 0.3750855578370979, "grad_norm": 5.165262222290039, "learning_rate": 8.123287671232877e-07, "log_odds_chosen": -0.24896365404129028, "log_odds_ratio": -0.940774142742157, "logits/chosen": 0.0694512277841568, "logits/rejected": 0.0033429116010665894, "logps/chosen": -2.5693089962005615, "logps/rejected": -2.3200910091400146, "loss": 2.6237, "nll_loss": 2.5296616554260254, "rewards/accuracies": 0.5, "rewards/chosen": -0.2569308876991272, "rewards/margins": -0.024921799078583717, "rewards/rejected": -0.23200909793376923, "step": 137 }, { "epoch": 0.37782340862423, "grad_norm": 5.296397686004639, "learning_rate": 8.10958904109589e-07, "log_odds_chosen": 0.30035966634750366, "log_odds_ratio": -0.6360397338867188, "logits/chosen": 0.032199591398239136, "logits/rejected": 0.11230064928531647, "logps/chosen": -2.9642462730407715, "logps/rejected": -3.226930856704712, "loss": 2.6857, "nll_loss": 2.6221022605895996, "rewards/accuracies": 0.75, "rewards/chosen": -0.29642462730407715, "rewards/margins": 0.026268452405929565, "rewards/rejected": -0.3226930797100067, "step": 138 }, { "epoch": 0.3805612594113621, "grad_norm": 3.9951326847076416, "learning_rate": 8.095890410958903e-07, "log_odds_chosen": 0.24913182854652405, "log_odds_ratio": -0.6806709170341492, "logits/chosen": 0.1950964331626892, "logits/rejected": 0.09817096590995789, "logps/chosen": -2.0106048583984375, "logps/rejected": -2.176565170288086, "loss": 2.4338, "nll_loss": 2.3657777309417725, "rewards/accuracies": 0.75, "rewards/chosen": -0.201060488820076, "rewards/margins": 0.01659601554274559, "rewards/rejected": -0.21765650808811188, "step": 139 }, { "epoch": 0.38329911019849416, "grad_norm": 4.452392101287842, "learning_rate": 8.082191780821918e-07, "log_odds_chosen": -0.03701530396938324, "log_odds_ratio": -0.8400585651397705, "logits/chosen": 0.04078359156847, "logits/rejected": 0.02274397388100624, "logps/chosen": -2.9355831146240234, "logps/rejected": -2.8481290340423584, "loss": 2.5494, "nll_loss": 2.4653878211975098, "rewards/accuracies": 0.625, "rewards/chosen": -0.2935582995414734, "rewards/margins": -0.008745409548282623, "rewards/rejected": -0.28481289744377136, "step": 140 }, { "epoch": 0.3860369609856263, "grad_norm": 4.92817497253418, "learning_rate": 8.068493150684931e-07, "log_odds_chosen": 0.03609389066696167, "log_odds_ratio": -0.7845745086669922, "logits/chosen": 0.0740036591887474, "logits/rejected": 0.12889054417610168, "logps/chosen": -2.6699769496917725, "logps/rejected": -2.711621046066284, "loss": 2.6664, "nll_loss": 2.5879385471343994, "rewards/accuracies": 0.75, "rewards/chosen": -0.26699769496917725, "rewards/margins": 0.0041644033044576645, "rewards/rejected": -0.27116209268569946, "step": 141 }, { "epoch": 0.3887748117727584, "grad_norm": 5.576766490936279, "learning_rate": 8.054794520547945e-07, "log_odds_chosen": -0.1940675675868988, "log_odds_ratio": -1.079195499420166, "logits/chosen": 0.03625727817416191, "logits/rejected": -0.008380084298551083, "logps/chosen": -4.0837907791137695, "logps/rejected": -3.878000497817993, "loss": 2.7058, "nll_loss": 2.5978312492370605, "rewards/accuracies": 0.25, "rewards/chosen": -0.40837910771369934, "rewards/margins": -0.020579013973474503, "rewards/rejected": -0.38780003786087036, "step": 142 }, { "epoch": 0.3915126625598905, "grad_norm": 4.369960784912109, "learning_rate": 8.041095890410958e-07, "log_odds_chosen": 0.1165953278541565, "log_odds_ratio": -0.697565495967865, "logits/chosen": -0.08080273866653442, "logits/rejected": -0.09490703046321869, "logps/chosen": -2.8017616271972656, "logps/rejected": -2.9027516841888428, "loss": 2.5076, "nll_loss": 2.4378695487976074, "rewards/accuracies": 0.5, "rewards/chosen": -0.28017619252204895, "rewards/margins": 0.010098977945744991, "rewards/rejected": -0.2902751564979553, "step": 143 }, { "epoch": 0.3942505133470226, "grad_norm": 5.2118239402771, "learning_rate": 8.027397260273972e-07, "log_odds_chosen": -0.09128093719482422, "log_odds_ratio": -0.8927233815193176, "logits/chosen": 0.013101601973176003, "logits/rejected": 0.10448876023292542, "logps/chosen": -2.833847999572754, "logps/rejected": -2.7237372398376465, "loss": 2.734, "nll_loss": 2.644758939743042, "rewards/accuracies": 0.375, "rewards/chosen": -0.2833847999572754, "rewards/margins": -0.011011065915226936, "rewards/rejected": -0.272373765707016, "step": 144 }, { "epoch": 0.39698836413415467, "grad_norm": 4.563209533691406, "learning_rate": 8.013698630136985e-07, "log_odds_chosen": 1.1184730529785156, "log_odds_ratio": -0.34519118070602417, "logits/chosen": 0.12265323102474213, "logits/rejected": -0.016204889863729477, "logps/chosen": -2.280416488647461, "logps/rejected": -3.297238349914551, "loss": 2.5555, "nll_loss": 2.5209949016571045, "rewards/accuracies": 0.875, "rewards/chosen": -0.2280416488647461, "rewards/margins": 0.10168221592903137, "rewards/rejected": -0.32972386479377747, "step": 145 }, { "epoch": 0.3997262149212868, "grad_norm": 5.071506977081299, "learning_rate": 8e-07, "log_odds_chosen": -0.5733729004859924, "log_odds_ratio": -1.0904850959777832, "logits/chosen": -0.12711253762245178, "logits/rejected": -0.06703178584575653, "logps/chosen": -3.1350269317626953, "logps/rejected": -2.557878017425537, "loss": 2.677, "nll_loss": 2.5679924488067627, "rewards/accuracies": 0.25, "rewards/chosen": -0.313502699136734, "rewards/margins": -0.05771488696336746, "rewards/rejected": -0.25578781962394714, "step": 146 }, { "epoch": 0.4024640657084189, "grad_norm": 4.981823444366455, "learning_rate": 7.986301369863014e-07, "log_odds_chosen": -0.3255354166030884, "log_odds_ratio": -1.1211822032928467, "logits/chosen": 0.06655294448137283, "logits/rejected": 0.05261637270450592, "logps/chosen": -3.6465749740600586, "logps/rejected": -3.294454574584961, "loss": 2.6733, "nll_loss": 2.561215877532959, "rewards/accuracies": 0.375, "rewards/chosen": -0.3646574914455414, "rewards/margins": -0.035212062299251556, "rewards/rejected": -0.3294454514980316, "step": 147 }, { "epoch": 0.405201916495551, "grad_norm": 5.425663948059082, "learning_rate": 7.972602739726027e-07, "log_odds_chosen": -0.4669288396835327, "log_odds_ratio": -1.2796355485916138, "logits/chosen": 0.23230881989002228, "logits/rejected": 0.26139649748802185, "logps/chosen": -3.863210678100586, "logps/rejected": -3.3846559524536133, "loss": 2.8189, "nll_loss": 2.690903902053833, "rewards/accuracies": 0.375, "rewards/chosen": -0.3863210678100586, "rewards/margins": -0.04785546287894249, "rewards/rejected": -0.3384656310081482, "step": 148 }, { "epoch": 0.4079397672826831, "grad_norm": 4.647740364074707, "learning_rate": 7.958904109589041e-07, "log_odds_chosen": -0.2650125324726105, "log_odds_ratio": -1.0441346168518066, "logits/chosen": 0.08518370240926743, "logits/rejected": 0.09227320551872253, "logps/chosen": -3.3089752197265625, "logps/rejected": -2.9873201847076416, "loss": 2.6744, "nll_loss": 2.569993019104004, "rewards/accuracies": 0.5, "rewards/chosen": -0.3308975100517273, "rewards/margins": -0.03216549754142761, "rewards/rejected": -0.29873204231262207, "step": 149 }, { "epoch": 0.4106776180698152, "grad_norm": 4.632514476776123, "learning_rate": 7.945205479452054e-07, "log_odds_chosen": 0.4719037115573883, "log_odds_ratio": -0.5987849235534668, "logits/chosen": -0.07288885116577148, "logits/rejected": -0.10832367837429047, "logps/chosen": -2.6919078826904297, "logps/rejected": -3.1243882179260254, "loss": 2.5104, "nll_loss": 2.450500965118408, "rewards/accuracies": 0.625, "rewards/chosen": -0.26919078826904297, "rewards/margins": 0.04324803501367569, "rewards/rejected": -0.31243881583213806, "step": 150 }, { "epoch": 0.4134154688569473, "grad_norm": 5.252760887145996, "learning_rate": 7.931506849315068e-07, "log_odds_chosen": 0.17152300477027893, "log_odds_ratio": -0.7139335870742798, "logits/chosen": 0.23747709393501282, "logits/rejected": 0.2663489580154419, "logps/chosen": -3.0192437171936035, "logps/rejected": -3.173619508743286, "loss": 2.6743, "nll_loss": 2.602921962738037, "rewards/accuracies": 0.625, "rewards/chosen": -0.30192437767982483, "rewards/margins": 0.015437567606568336, "rewards/rejected": -0.3173619508743286, "step": 151 }, { "epoch": 0.4161533196440794, "grad_norm": 5.213066101074219, "learning_rate": 7.917808219178081e-07, "log_odds_chosen": -0.20811690390110016, "log_odds_ratio": -0.9999563694000244, "logits/chosen": -0.1033179834485054, "logits/rejected": -0.10814832150936127, "logps/chosen": -3.735199213027954, "logps/rejected": -3.517808437347412, "loss": 2.7108, "nll_loss": 2.610849618911743, "rewards/accuracies": 0.5, "rewards/chosen": -0.3735198974609375, "rewards/margins": -0.021739067509770393, "rewards/rejected": -0.35178083181381226, "step": 152 }, { "epoch": 0.4188911704312115, "grad_norm": 5.2352519035339355, "learning_rate": 7.904109589041096e-07, "log_odds_chosen": -0.9564603567123413, "log_odds_ratio": -1.5220892429351807, "logits/chosen": -0.07183430343866348, "logits/rejected": -0.017278503626585007, "logps/chosen": -3.737497329711914, "logps/rejected": -2.778204917907715, "loss": 2.7042, "nll_loss": 2.5520410537719727, "rewards/accuracies": 0.25, "rewards/chosen": -0.373749703168869, "rewards/margins": -0.09592922031879425, "rewards/rejected": -0.27782049775123596, "step": 153 }, { "epoch": 0.4216290212183436, "grad_norm": 5.2295145988464355, "learning_rate": 7.890410958904109e-07, "log_odds_chosen": -0.10855421423912048, "log_odds_ratio": -1.0579591989517212, "logits/chosen": 0.01892589032649994, "logits/rejected": -0.12148555368185043, "logps/chosen": -3.3193728923797607, "logps/rejected": -3.1652607917785645, "loss": 2.5434, "nll_loss": 2.4376258850097656, "rewards/accuracies": 0.375, "rewards/chosen": -0.331937313079834, "rewards/margins": -0.015411220490932465, "rewards/rejected": -0.3165260851383209, "step": 154 }, { "epoch": 0.4243668720054757, "grad_norm": 4.8524065017700195, "learning_rate": 7.876712328767124e-07, "log_odds_chosen": -0.12624457478523254, "log_odds_ratio": -0.828548014163971, "logits/chosen": 0.034901510924100876, "logits/rejected": 0.08272993564605713, "logps/chosen": -2.7381515502929688, "logps/rejected": -2.58807110786438, "loss": 2.5724, "nll_loss": 2.4895637035369873, "rewards/accuracies": 0.5, "rewards/chosen": -0.27381518483161926, "rewards/margins": -0.015008067712187767, "rewards/rejected": -0.25880712270736694, "step": 155 }, { "epoch": 0.4271047227926078, "grad_norm": 5.246121406555176, "learning_rate": 7.863013698630137e-07, "log_odds_chosen": -0.12372248619794846, "log_odds_ratio": -0.9061101675033569, "logits/chosen": 0.15544115006923676, "logits/rejected": 0.11919774115085602, "logps/chosen": -3.072633981704712, "logps/rejected": -2.9283158779144287, "loss": 2.6136, "nll_loss": 2.5229923725128174, "rewards/accuracies": 0.5, "rewards/chosen": -0.30726340413093567, "rewards/margins": -0.014431800693273544, "rewards/rejected": -0.2928315997123718, "step": 156 }, { "epoch": 0.42984257357973993, "grad_norm": 5.2776031494140625, "learning_rate": 7.849315068493151e-07, "log_odds_chosen": -1.3132951259613037, "log_odds_ratio": -1.799795150756836, "logits/chosen": -0.19920024275779724, "logits/rejected": -0.08912026882171631, "logps/chosen": -4.311314582824707, "logps/rejected": -3.0634191036224365, "loss": 2.8533, "nll_loss": 2.6733627319335938, "rewards/accuracies": 0.25, "rewards/chosen": -0.4311314821243286, "rewards/margins": -0.12478958070278168, "rewards/rejected": -0.30634188652038574, "step": 157 }, { "epoch": 0.432580424366872, "grad_norm": 4.408506393432617, "learning_rate": 7.835616438356164e-07, "log_odds_chosen": -0.19986368715763092, "log_odds_ratio": -1.0154876708984375, "logits/chosen": -0.025157593190670013, "logits/rejected": -0.07617749273777008, "logps/chosen": -2.670163631439209, "logps/rejected": -2.482354164123535, "loss": 2.5778, "nll_loss": 2.476226329803467, "rewards/accuracies": 0.5, "rewards/chosen": -0.26701638102531433, "rewards/margins": -0.018780970945954323, "rewards/rejected": -0.24823541939258575, "step": 158 }, { "epoch": 0.4353182751540041, "grad_norm": 4.734574317932129, "learning_rate": 7.821917808219177e-07, "log_odds_chosen": -0.23829573392868042, "log_odds_ratio": -0.8620508909225464, "logits/chosen": -0.13206389546394348, "logits/rejected": -0.12853720784187317, "logps/chosen": -2.7744786739349365, "logps/rejected": -2.527787208557129, "loss": 2.6279, "nll_loss": 2.5416862964630127, "rewards/accuracies": 0.5, "rewards/chosen": -0.2774478793144226, "rewards/margins": -0.024669161066412926, "rewards/rejected": -0.25277870893478394, "step": 159 }, { "epoch": 0.4380561259411362, "grad_norm": 5.343784809112549, "learning_rate": 7.808219178082191e-07, "log_odds_chosen": -0.026756521314382553, "log_odds_ratio": -1.064041256904602, "logits/chosen": 0.07147421687841415, "logits/rejected": 0.016089774668216705, "logps/chosen": -3.4863076210021973, "logps/rejected": -3.4466772079467773, "loss": 2.7171, "nll_loss": 2.610668897628784, "rewards/accuracies": 0.375, "rewards/chosen": -0.34863075613975525, "rewards/margins": -0.003963042050600052, "rewards/rejected": -0.3446677327156067, "step": 160 }, { "epoch": 0.4407939767282683, "grad_norm": 4.827378273010254, "learning_rate": 7.794520547945204e-07, "log_odds_chosen": 0.10204842686653137, "log_odds_ratio": -0.8528003692626953, "logits/chosen": -0.2045591026544571, "logits/rejected": -0.19694186747074127, "logps/chosen": -2.8782272338867188, "logps/rejected": -2.9752416610717773, "loss": 2.6375, "nll_loss": 2.5522608757019043, "rewards/accuracies": 0.625, "rewards/chosen": -0.2878227233886719, "rewards/margins": 0.009701471775770187, "rewards/rejected": -0.29752418398857117, "step": 161 }, { "epoch": 0.44353182751540043, "grad_norm": 4.854065418243408, "learning_rate": 7.780821917808219e-07, "log_odds_chosen": -0.39335134625434875, "log_odds_ratio": -1.104198932647705, "logits/chosen": 0.05974643677473068, "logits/rejected": 0.08279106020927429, "logps/chosen": -3.1677839756011963, "logps/rejected": -2.751312732696533, "loss": 2.635, "nll_loss": 2.524587631225586, "rewards/accuracies": 0.625, "rewards/chosen": -0.31677842140197754, "rewards/margins": -0.04164711385965347, "rewards/rejected": -0.2751312851905823, "step": 162 }, { "epoch": 0.4462696783025325, "grad_norm": 5.261326313018799, "learning_rate": 7.767123287671233e-07, "log_odds_chosen": -1.1018282175064087, "log_odds_ratio": -1.5159809589385986, "logits/chosen": -0.010554943233728409, "logits/rejected": 0.035331953316926956, "logps/chosen": -3.6116456985473633, "logps/rejected": -2.5398457050323486, "loss": 2.7846, "nll_loss": 2.632953405380249, "rewards/accuracies": 0.25, "rewards/chosen": -0.36116456985473633, "rewards/margins": -0.10717999190092087, "rewards/rejected": -0.25398457050323486, "step": 163 }, { "epoch": 0.4490075290896646, "grad_norm": 5.134049415588379, "learning_rate": 7.753424657534247e-07, "log_odds_chosen": -0.21409326791763306, "log_odds_ratio": -0.8781447410583496, "logits/chosen": -0.048168350011110306, "logits/rejected": -0.03957008942961693, "logps/chosen": -3.458059072494507, "logps/rejected": -3.246427536010742, "loss": 2.6694, "nll_loss": 2.581584930419922, "rewards/accuracies": 0.375, "rewards/chosen": -0.34580594301223755, "rewards/margins": -0.02116316184401512, "rewards/rejected": -0.32464277744293213, "step": 164 }, { "epoch": 0.4517453798767967, "grad_norm": 5.2716803550720215, "learning_rate": 7.73972602739726e-07, "log_odds_chosen": -0.3125859200954437, "log_odds_ratio": -1.0261259078979492, "logits/chosen": -0.04627792537212372, "logits/rejected": 0.022587157785892487, "logps/chosen": -3.692244291305542, "logps/rejected": -3.384213924407959, "loss": 2.6987, "nll_loss": 2.5960702896118164, "rewards/accuracies": 0.375, "rewards/chosen": -0.3692244291305542, "rewards/margins": -0.030803032219409943, "rewards/rejected": -0.33842140436172485, "step": 165 }, { "epoch": 0.4544832306639288, "grad_norm": 4.59981107711792, "learning_rate": 7.726027397260274e-07, "log_odds_chosen": -0.48942890763282776, "log_odds_ratio": -1.1387290954589844, "logits/chosen": 0.12552586197853088, "logits/rejected": 0.10511250793933868, "logps/chosen": -3.2197911739349365, "logps/rejected": -2.7062172889709473, "loss": 2.6164, "nll_loss": 2.5025155544281006, "rewards/accuracies": 0.5, "rewards/chosen": -0.3219791054725647, "rewards/margins": -0.05135737359523773, "rewards/rejected": -0.27062174677848816, "step": 166 }, { "epoch": 0.45722108145106094, "grad_norm": 5.23454475402832, "learning_rate": 7.712328767123287e-07, "log_odds_chosen": -0.46423250436782837, "log_odds_ratio": -1.143385648727417, "logits/chosen": 0.14395672082901, "logits/rejected": 0.20035839080810547, "logps/chosen": -3.4796078205108643, "logps/rejected": -3.0233144760131836, "loss": 2.6387, "nll_loss": 2.5243842601776123, "rewards/accuracies": 0.375, "rewards/chosen": -0.34796077013015747, "rewards/margins": -0.045629337430000305, "rewards/rejected": -0.30233144760131836, "step": 167 }, { "epoch": 0.459958932238193, "grad_norm": 4.7880096435546875, "learning_rate": 7.6986301369863e-07, "log_odds_chosen": 0.09932902455329895, "log_odds_ratio": -0.7072969675064087, "logits/chosen": -0.15282298624515533, "logits/rejected": -0.08775343745946884, "logps/chosen": -2.8566102981567383, "logps/rejected": -2.9638140201568604, "loss": 2.6044, "nll_loss": 2.533665418624878, "rewards/accuracies": 0.75, "rewards/chosen": -0.2856610417366028, "rewards/margins": 0.010720381513237953, "rewards/rejected": -0.296381413936615, "step": 168 }, { "epoch": 0.46269678302532513, "grad_norm": 4.706996440887451, "learning_rate": 7.684931506849314e-07, "log_odds_chosen": 0.1009058803319931, "log_odds_ratio": -0.8264567255973816, "logits/chosen": 0.06813651323318481, "logits/rejected": -0.041794899851083755, "logps/chosen": -2.8084332942962646, "logps/rejected": -2.8795723915100098, "loss": 2.5305, "nll_loss": 2.4478163719177246, "rewards/accuracies": 0.5, "rewards/chosen": -0.2808433473110199, "rewards/margins": 0.0071139149367809296, "rewards/rejected": -0.28795725107192993, "step": 169 }, { "epoch": 0.4654346338124572, "grad_norm": 4.699556827545166, "learning_rate": 7.671232876712328e-07, "log_odds_chosen": 0.20330113172531128, "log_odds_ratio": -0.7406771183013916, "logits/chosen": 0.1270163655281067, "logits/rejected": 0.18574845790863037, "logps/chosen": -2.6692442893981934, "logps/rejected": -2.779956102371216, "loss": 2.5987, "nll_loss": 2.5246689319610596, "rewards/accuracies": 0.625, "rewards/chosen": -0.2669244408607483, "rewards/margins": 0.011071167886257172, "rewards/rejected": -0.27799561619758606, "step": 170 }, { "epoch": 0.4681724845995893, "grad_norm": 4.8885931968688965, "learning_rate": 7.657534246575343e-07, "log_odds_chosen": -1.04922354221344, "log_odds_ratio": -1.5431528091430664, "logits/chosen": -0.07891508936882019, "logits/rejected": -0.12109138071537018, "logps/chosen": -3.8562355041503906, "logps/rejected": -2.8164453506469727, "loss": 2.6535, "nll_loss": 2.4991438388824463, "rewards/accuracies": 0.25, "rewards/chosen": -0.385623574256897, "rewards/margins": -0.10397907346487045, "rewards/rejected": -0.2816445231437683, "step": 171 }, { "epoch": 0.47091033538672145, "grad_norm": 4.749286651611328, "learning_rate": 7.643835616438356e-07, "log_odds_chosen": -0.5765501260757446, "log_odds_ratio": -1.0768626928329468, "logits/chosen": -0.04662257060408592, "logits/rejected": 0.02211187779903412, "logps/chosen": -3.012894630432129, "logps/rejected": -2.4973292350769043, "loss": 2.5463, "nll_loss": 2.4385976791381836, "rewards/accuracies": 0.25, "rewards/chosen": -0.301289439201355, "rewards/margins": -0.0515565499663353, "rewards/rejected": -0.24973291158676147, "step": 172 }, { "epoch": 0.4736481861738535, "grad_norm": 5.128815174102783, "learning_rate": 7.63013698630137e-07, "log_odds_chosen": 0.13594317436218262, "log_odds_ratio": -0.8193863034248352, "logits/chosen": 0.08470793813467026, "logits/rejected": 0.04828367382287979, "logps/chosen": -3.4517416954040527, "logps/rejected": -3.571084499359131, "loss": 2.6304, "nll_loss": 2.5485100746154785, "rewards/accuracies": 0.375, "rewards/chosen": -0.3451741635799408, "rewards/margins": 0.011934267356991768, "rewards/rejected": -0.3571084141731262, "step": 173 }, { "epoch": 0.47638603696098564, "grad_norm": 5.684353351593018, "learning_rate": 7.616438356164383e-07, "log_odds_chosen": -1.298626184463501, "log_odds_ratio": -1.7986021041870117, "logits/chosen": 0.023386184126138687, "logits/rejected": 0.10048684477806091, "logps/chosen": -4.276363849639893, "logps/rejected": -3.0042710304260254, "loss": 2.7588, "nll_loss": 2.5789783000946045, "rewards/accuracies": 0.25, "rewards/chosen": -0.42763641476631165, "rewards/margins": -0.12720927596092224, "rewards/rejected": -0.3004271388053894, "step": 174 }, { "epoch": 0.4791238877481177, "grad_norm": 4.81318473815918, "learning_rate": 7.602739726027397e-07, "log_odds_chosen": -0.4031527042388916, "log_odds_ratio": -1.0160083770751953, "logits/chosen": -0.03177962824702263, "logits/rejected": 0.011517442762851715, "logps/chosen": -3.0223588943481445, "logps/rejected": -2.6158852577209473, "loss": 2.5945, "nll_loss": 2.492891788482666, "rewards/accuracies": 0.375, "rewards/chosen": -0.3022359013557434, "rewards/margins": -0.040647365152835846, "rewards/rejected": -0.26158851385116577, "step": 175 }, { "epoch": 0.48186173853524983, "grad_norm": 4.941562175750732, "learning_rate": 7.58904109589041e-07, "log_odds_chosen": 0.0646696537733078, "log_odds_ratio": -0.7937285900115967, "logits/chosen": -0.09965157508850098, "logits/rejected": 0.009743809700012207, "logps/chosen": -3.095606803894043, "logps/rejected": -3.136169910430908, "loss": 2.6153, "nll_loss": 2.5359244346618652, "rewards/accuracies": 0.375, "rewards/chosen": -0.3095606863498688, "rewards/margins": 0.004056304693222046, "rewards/rejected": -0.3136169910430908, "step": 176 }, { "epoch": 0.48459958932238195, "grad_norm": 5.7309770584106445, "learning_rate": 7.575342465753424e-07, "log_odds_chosen": 0.008794128894805908, "log_odds_ratio": -0.787406861782074, "logits/chosen": 0.12258473038673401, "logits/rejected": 0.1396290361881256, "logps/chosen": -3.508349895477295, "logps/rejected": -3.509385108947754, "loss": 2.7545, "nll_loss": 2.6758017539978027, "rewards/accuracies": 0.25, "rewards/chosen": -0.3508349657058716, "rewards/margins": 0.0001035202294588089, "rewards/rejected": -0.35093846917152405, "step": 177 }, { "epoch": 0.487337440109514, "grad_norm": 5.872817039489746, "learning_rate": 7.561643835616438e-07, "log_odds_chosen": -1.0715616941452026, "log_odds_ratio": -1.5137381553649902, "logits/chosen": 0.004768716171383858, "logits/rejected": 0.041171640157699585, "logps/chosen": -4.776034355163574, "logps/rejected": -3.7373266220092773, "loss": 2.7666, "nll_loss": 2.6152522563934326, "rewards/accuracies": 0.25, "rewards/chosen": -0.47760340571403503, "rewards/margins": -0.10387074947357178, "rewards/rejected": -0.37373268604278564, "step": 178 }, { "epoch": 0.49007529089664614, "grad_norm": 4.383761405944824, "learning_rate": 7.547945205479452e-07, "log_odds_chosen": -0.03350743651390076, "log_odds_ratio": -0.8374693393707275, "logits/chosen": -0.036628514528274536, "logits/rejected": -0.06123049929738045, "logps/chosen": -3.1459012031555176, "logps/rejected": -3.0838186740875244, "loss": 2.4919, "nll_loss": 2.408162832260132, "rewards/accuracies": 0.625, "rewards/chosen": -0.31459009647369385, "rewards/margins": -0.006208213046193123, "rewards/rejected": -0.3083818852901459, "step": 179 }, { "epoch": 0.4928131416837782, "grad_norm": 6.134665489196777, "learning_rate": 7.534246575342466e-07, "log_odds_chosen": -1.1654714345932007, "log_odds_ratio": -1.4946410655975342, "logits/chosen": 0.23109936714172363, "logits/rejected": 0.3909558057785034, "logps/chosen": -4.282272815704346, "logps/rejected": -3.142139434814453, "loss": 2.8542, "nll_loss": 2.7047462463378906, "rewards/accuracies": 0.0, "rewards/chosen": -0.4282273054122925, "rewards/margins": -0.11401335150003433, "rewards/rejected": -0.31421393156051636, "step": 180 }, { "epoch": 0.49555099247091033, "grad_norm": 5.375032901763916, "learning_rate": 7.520547945205479e-07, "log_odds_chosen": -0.7568830251693726, "log_odds_ratio": -1.2390906810760498, "logits/chosen": -0.09193158894777298, "logits/rejected": 0.019036345183849335, "logps/chosen": -3.638357639312744, "logps/rejected": -2.8979485034942627, "loss": 2.7197, "nll_loss": 2.5957722663879395, "rewards/accuracies": 0.25, "rewards/chosen": -0.36383575201034546, "rewards/margins": -0.07404091209173203, "rewards/rejected": -0.2897948622703552, "step": 181 }, { "epoch": 0.49828884325804246, "grad_norm": 4.501798152923584, "learning_rate": 7.506849315068493e-07, "log_odds_chosen": 0.8570611476898193, "log_odds_ratio": -0.6286982297897339, "logits/chosen": -0.07229740917682648, "logits/rejected": -0.16275355219841003, "logps/chosen": -2.790649890899658, "logps/rejected": -3.619676113128662, "loss": 2.5552, "nll_loss": 2.492363929748535, "rewards/accuracies": 0.75, "rewards/chosen": -0.27906501293182373, "rewards/margins": 0.08290261030197144, "rewards/rejected": -0.36196762323379517, "step": 182 }, { "epoch": 0.5010266940451745, "grad_norm": 5.064631462097168, "learning_rate": 7.493150684931506e-07, "log_odds_chosen": -0.0016392432153224945, "log_odds_ratio": -0.7492592334747314, "logits/chosen": 0.05073312669992447, "logits/rejected": 0.05753450095653534, "logps/chosen": -3.286245346069336, "logps/rejected": -3.2455413341522217, "loss": 2.6027, "nll_loss": 2.5277259349823, "rewards/accuracies": 0.5, "rewards/chosen": -0.32862454652786255, "rewards/margins": -0.004070397466421127, "rewards/rejected": -0.3245541453361511, "step": 183 }, { "epoch": 0.5037645448323066, "grad_norm": 4.802375316619873, "learning_rate": 7.47945205479452e-07, "log_odds_chosen": -0.3385929465293884, "log_odds_ratio": -1.0983567237854004, "logits/chosen": 0.19483047723770142, "logits/rejected": 0.19090327620506287, "logps/chosen": -3.0471911430358887, "logps/rejected": -2.694143056869507, "loss": 2.6169, "nll_loss": 2.5071072578430176, "rewards/accuracies": 0.375, "rewards/chosen": -0.30471912026405334, "rewards/margins": -0.03530482202768326, "rewards/rejected": -0.2694143056869507, "step": 184 }, { "epoch": 0.5065023956194388, "grad_norm": 4.705829620361328, "learning_rate": 7.465753424657533e-07, "log_odds_chosen": -0.05856429040431976, "log_odds_ratio": -0.8393728733062744, "logits/chosen": -0.06814052909612656, "logits/rejected": -0.08334772288799286, "logps/chosen": -2.475954294204712, "logps/rejected": -2.3880929946899414, "loss": 2.5185, "nll_loss": 2.434542655944824, "rewards/accuracies": 0.375, "rewards/chosen": -0.2475954294204712, "rewards/margins": -0.008786125108599663, "rewards/rejected": -0.23880933225154877, "step": 185 }, { "epoch": 0.5092402464065708, "grad_norm": 4.929784297943115, "learning_rate": 7.452054794520548e-07, "log_odds_chosen": -0.24358782172203064, "log_odds_ratio": -0.8646731376647949, "logits/chosen": 0.12667307257652283, "logits/rejected": 0.16591303050518036, "logps/chosen": -3.0257983207702637, "logps/rejected": -2.78933048248291, "loss": 2.6056, "nll_loss": 2.519127368927002, "rewards/accuracies": 0.375, "rewards/chosen": -0.3025798797607422, "rewards/margins": -0.023646803572773933, "rewards/rejected": -0.278933048248291, "step": 186 }, { "epoch": 0.5119780971937029, "grad_norm": 4.881514072418213, "learning_rate": 7.438356164383562e-07, "log_odds_chosen": -0.20553477108478546, "log_odds_ratio": -0.9106322526931763, "logits/chosen": -0.07019771635532379, "logits/rejected": -0.04865971952676773, "logps/chosen": -3.0318922996520996, "logps/rejected": -2.8451409339904785, "loss": 2.5708, "nll_loss": 2.47977614402771, "rewards/accuracies": 0.5, "rewards/chosen": -0.303189218044281, "rewards/margins": -0.01867511309683323, "rewards/rejected": -0.28451409935951233, "step": 187 }, { "epoch": 0.5147159479808351, "grad_norm": 4.9186930656433105, "learning_rate": 7.424657534246575e-07, "log_odds_chosen": -0.34743866324424744, "log_odds_ratio": -1.1207821369171143, "logits/chosen": 0.04795347526669502, "logits/rejected": -0.05020894482731819, "logps/chosen": -3.4929580688476562, "logps/rejected": -3.1358895301818848, "loss": 2.6119, "nll_loss": 2.4998176097869873, "rewards/accuracies": 0.25, "rewards/chosen": -0.34929582476615906, "rewards/margins": -0.03570687770843506, "rewards/rejected": -0.313588947057724, "step": 188 }, { "epoch": 0.5174537987679672, "grad_norm": 4.495395183563232, "learning_rate": 7.410958904109589e-07, "log_odds_chosen": 0.5567930340766907, "log_odds_ratio": -0.4822555184364319, "logits/chosen": 0.12219853699207306, "logits/rejected": 0.0822681114077568, "logps/chosen": -2.321150302886963, "logps/rejected": -2.7883448600769043, "loss": 2.4222, "nll_loss": 2.3739614486694336, "rewards/accuracies": 0.875, "rewards/chosen": -0.2321150302886963, "rewards/margins": 0.04671946167945862, "rewards/rejected": -0.2788344919681549, "step": 189 }, { "epoch": 0.5201916495550992, "grad_norm": 5.169776916503906, "learning_rate": 7.397260273972602e-07, "log_odds_chosen": -0.046890512108802795, "log_odds_ratio": -1.2631781101226807, "logits/chosen": -0.019189201295375824, "logits/rejected": -0.003974858671426773, "logps/chosen": -3.312772512435913, "logps/rejected": -3.2966489791870117, "loss": 2.6593, "nll_loss": 2.5329673290252686, "rewards/accuracies": 0.25, "rewards/chosen": -0.3312772214412689, "rewards/margins": -0.0016123447567224503, "rewards/rejected": -0.3296648859977722, "step": 190 }, { "epoch": 0.5229295003422314, "grad_norm": 5.0094146728515625, "learning_rate": 7.383561643835616e-07, "log_odds_chosen": -0.5100201964378357, "log_odds_ratio": -1.1653603315353394, "logits/chosen": 0.0012830719351768494, "logits/rejected": 0.14291541278362274, "logps/chosen": -3.2917118072509766, "logps/rejected": -2.797135829925537, "loss": 2.7317, "nll_loss": 2.6151771545410156, "rewards/accuracies": 0.375, "rewards/chosen": -0.32917118072509766, "rewards/margins": -0.04945757985115051, "rewards/rejected": -0.27971360087394714, "step": 191 }, { "epoch": 0.5256673511293635, "grad_norm": 5.53822135925293, "learning_rate": 7.369863013698629e-07, "log_odds_chosen": -0.6519789695739746, "log_odds_ratio": -1.1085233688354492, "logits/chosen": 0.0846756100654602, "logits/rejected": 0.15941914916038513, "logps/chosen": -3.697871685028076, "logps/rejected": -3.0630130767822266, "loss": 2.7094, "nll_loss": 2.5985751152038574, "rewards/accuracies": 0.125, "rewards/chosen": -0.36978715658187866, "rewards/margins": -0.06348587572574615, "rewards/rejected": -0.3063013255596161, "step": 192 }, { "epoch": 0.5284052019164955, "grad_norm": 5.459115982055664, "learning_rate": 7.356164383561643e-07, "log_odds_chosen": -0.8343923687934875, "log_odds_ratio": -1.4152802228927612, "logits/chosen": -0.06260256469249725, "logits/rejected": 0.06481786072254181, "logps/chosen": -3.7732958793640137, "logps/rejected": -2.948927640914917, "loss": 2.6864, "nll_loss": 2.5449118614196777, "rewards/accuracies": 0.375, "rewards/chosen": -0.37732958793640137, "rewards/margins": -0.08243682980537415, "rewards/rejected": -0.2948927581310272, "step": 193 }, { "epoch": 0.5311430527036276, "grad_norm": 5.646156311035156, "learning_rate": 7.342465753424657e-07, "log_odds_chosen": 0.023254845291376114, "log_odds_ratio": -1.4809001684188843, "logits/chosen": 0.011422708630561829, "logits/rejected": 0.01431339979171753, "logps/chosen": -4.582982063293457, "logps/rejected": -4.555511951446533, "loss": 2.6864, "nll_loss": 2.5383105278015137, "rewards/accuracies": 0.5, "rewards/chosen": -0.4582982063293457, "rewards/margins": -0.0027469955384731293, "rewards/rejected": -0.45555126667022705, "step": 194 }, { "epoch": 0.5338809034907598, "grad_norm": 4.794840335845947, "learning_rate": 7.328767123287672e-07, "log_odds_chosen": -0.033262789249420166, "log_odds_ratio": -0.9741391539573669, "logits/chosen": 0.0707741379737854, "logits/rejected": 0.03290639445185661, "logps/chosen": -3.137920379638672, "logps/rejected": -3.120746612548828, "loss": 2.5389, "nll_loss": 2.4415078163146973, "rewards/accuracies": 0.5, "rewards/chosen": -0.31379204988479614, "rewards/margins": -0.0017174072563648224, "rewards/rejected": -0.3120746612548828, "step": 195 }, { "epoch": 0.5366187542778919, "grad_norm": 5.1391377449035645, "learning_rate": 7.315068493150685e-07, "log_odds_chosen": -0.32853883504867554, "log_odds_ratio": -1.4258835315704346, "logits/chosen": 0.015549033880233765, "logits/rejected": 0.029304251074790955, "logps/chosen": -3.3663620948791504, "logps/rejected": -3.0815649032592773, "loss": 2.6182, "nll_loss": 2.4756481647491455, "rewards/accuracies": 0.375, "rewards/chosen": -0.3366362452507019, "rewards/margins": -0.028479745611548424, "rewards/rejected": -0.30815649032592773, "step": 196 }, { "epoch": 0.5393566050650239, "grad_norm": 4.616846561431885, "learning_rate": 7.301369863013699e-07, "log_odds_chosen": -0.19488653540611267, "log_odds_ratio": -0.90641850233078, "logits/chosen": -0.16477739810943604, "logits/rejected": -0.21325021982192993, "logps/chosen": -3.139003276824951, "logps/rejected": -2.9059321880340576, "loss": 2.5219, "nll_loss": 2.4312520027160645, "rewards/accuracies": 0.5, "rewards/chosen": -0.31390032172203064, "rewards/margins": -0.023307092487812042, "rewards/rejected": -0.2905932068824768, "step": 197 }, { "epoch": 0.5420944558521561, "grad_norm": 5.090418815612793, "learning_rate": 7.287671232876712e-07, "log_odds_chosen": 0.5642824769020081, "log_odds_ratio": -0.6301884651184082, "logits/chosen": 0.05938664823770523, "logits/rejected": 0.053088657557964325, "logps/chosen": -3.1376547813415527, "logps/rejected": -3.6900429725646973, "loss": 2.6026, "nll_loss": 2.5395407676696777, "rewards/accuracies": 0.625, "rewards/chosen": -0.3137654662132263, "rewards/margins": 0.05523882061243057, "rewards/rejected": -0.3690042793750763, "step": 198 }, { "epoch": 0.5448323066392882, "grad_norm": 4.54019021987915, "learning_rate": 7.273972602739725e-07, "log_odds_chosen": 0.3299625813961029, "log_odds_ratio": -0.7810162305831909, "logits/chosen": -0.06545304507017136, "logits/rejected": -0.02625918760895729, "logps/chosen": -2.7569305896759033, "logps/rejected": -3.0667409896850586, "loss": 2.5071, "nll_loss": 2.429004192352295, "rewards/accuracies": 0.375, "rewards/chosen": -0.27569305896759033, "rewards/margins": 0.03098103031516075, "rewards/rejected": -0.30667412281036377, "step": 199 }, { "epoch": 0.5475701574264202, "grad_norm": 4.861699104309082, "learning_rate": 7.260273972602739e-07, "log_odds_chosen": -0.1965613067150116, "log_odds_ratio": -0.8892083764076233, "logits/chosen": 0.08470970392227173, "logits/rejected": 0.09756029397249222, "logps/chosen": -2.967592239379883, "logps/rejected": -2.761378526687622, "loss": 2.5359, "nll_loss": 2.4469470977783203, "rewards/accuracies": 0.5, "rewards/chosen": -0.2967592179775238, "rewards/margins": -0.02062136121094227, "rewards/rejected": -0.2761378288269043, "step": 200 }, { "epoch": 0.5503080082135524, "grad_norm": 5.528141975402832, "learning_rate": 7.246575342465752e-07, "log_odds_chosen": -0.7470605373382568, "log_odds_ratio": -1.2498635053634644, "logits/chosen": 0.0293300598859787, "logits/rejected": 0.08984871208667755, "logps/chosen": -3.705836296081543, "logps/rejected": -2.9849228858947754, "loss": 2.7017, "nll_loss": 2.5767064094543457, "rewards/accuracies": 0.25, "rewards/chosen": -0.3705836236476898, "rewards/margins": -0.07209135591983795, "rewards/rejected": -0.29849231243133545, "step": 201 }, { "epoch": 0.5530458590006845, "grad_norm": 4.736202716827393, "learning_rate": 7.232876712328767e-07, "log_odds_chosen": -0.23113590478897095, "log_odds_ratio": -1.0836189985275269, "logits/chosen": 0.03904544562101364, "logits/rejected": 0.002329513430595398, "logps/chosen": -3.111180067062378, "logps/rejected": -2.8227314949035645, "loss": 2.5184, "nll_loss": 2.4100213050842285, "rewards/accuracies": 0.625, "rewards/chosen": -0.3111180067062378, "rewards/margins": -0.028844842687249184, "rewards/rejected": -0.28227317333221436, "step": 202 }, { "epoch": 0.5557837097878165, "grad_norm": 6.381757736206055, "learning_rate": 7.219178082191781e-07, "log_odds_chosen": -1.0090314149856567, "log_odds_ratio": -1.4150774478912354, "logits/chosen": -0.0679711326956749, "logits/rejected": 0.051225174218416214, "logps/chosen": -4.168906211853027, "logps/rejected": -3.1884422302246094, "loss": 2.7865, "nll_loss": 2.644969940185547, "rewards/accuracies": 0.0, "rewards/chosen": -0.4168906807899475, "rewards/margins": -0.0980464294552803, "rewards/rejected": -0.318844199180603, "step": 203 }, { "epoch": 0.5585215605749486, "grad_norm": 6.186321258544922, "learning_rate": 7.205479452054795e-07, "log_odds_chosen": -1.5501277446746826, "log_odds_ratio": -1.9320886135101318, "logits/chosen": 0.07576481997966766, "logits/rejected": 0.14109908044338226, "logps/chosen": -4.9548540115356445, "logps/rejected": -3.420687675476074, "loss": 2.7645, "nll_loss": 2.5712978839874268, "rewards/accuracies": 0.25, "rewards/chosen": -0.49548542499542236, "rewards/margins": -0.15341664850711823, "rewards/rejected": -0.34206873178482056, "step": 204 }, { "epoch": 0.5612594113620808, "grad_norm": 4.433249473571777, "learning_rate": 7.191780821917808e-07, "log_odds_chosen": -0.03714729845523834, "log_odds_ratio": -0.7702639102935791, "logits/chosen": -0.0651475191116333, "logits/rejected": -0.13326050341129303, "logps/chosen": -2.6124582290649414, "logps/rejected": -2.5511767864227295, "loss": 2.4685, "nll_loss": 2.3914880752563477, "rewards/accuracies": 0.5, "rewards/chosen": -0.26124584674835205, "rewards/margins": -0.006128145381808281, "rewards/rejected": -0.2551176846027374, "step": 205 }, { "epoch": 0.5639972621492129, "grad_norm": 5.5141215324401855, "learning_rate": 7.178082191780822e-07, "log_odds_chosen": -1.801929235458374, "log_odds_ratio": -2.1478018760681152, "logits/chosen": -0.04369679093360901, "logits/rejected": -0.013092336244881153, "logps/chosen": -4.537594795227051, "logps/rejected": -2.8013105392456055, "loss": 2.8312, "nll_loss": 2.6163744926452637, "rewards/accuracies": 0.125, "rewards/chosen": -0.45375949144363403, "rewards/margins": -0.17362843453884125, "rewards/rejected": -0.2801310420036316, "step": 206 }, { "epoch": 0.5667351129363449, "grad_norm": 4.919399738311768, "learning_rate": 7.164383561643835e-07, "log_odds_chosen": 0.3176688849925995, "log_odds_ratio": -0.752197265625, "logits/chosen": 0.26457899808883667, "logits/rejected": 0.18546858429908752, "logps/chosen": -2.7472238540649414, "logps/rejected": -3.053791046142578, "loss": 2.5278, "nll_loss": 2.452547311782837, "rewards/accuracies": 0.5, "rewards/chosen": -0.2747223973274231, "rewards/margins": 0.030656706541776657, "rewards/rejected": -0.30537909269332886, "step": 207 }, { "epoch": 0.5694729637234771, "grad_norm": 5.8410444259643555, "learning_rate": 7.150684931506848e-07, "log_odds_chosen": 0.20858274400234222, "log_odds_ratio": -0.658391535282135, "logits/chosen": 0.1466752290725708, "logits/rejected": 0.24734917283058167, "logps/chosen": -3.1444201469421387, "logps/rejected": -3.3082525730133057, "loss": 2.6402, "nll_loss": 2.5743865966796875, "rewards/accuracies": 0.5, "rewards/chosen": -0.3144420385360718, "rewards/margins": 0.016383256763219833, "rewards/rejected": -0.3308252692222595, "step": 208 }, { "epoch": 0.5722108145106092, "grad_norm": 4.569679260253906, "learning_rate": 7.136986301369862e-07, "log_odds_chosen": 0.4052177369594574, "log_odds_ratio": -0.5784952044487, "logits/chosen": 0.1251700222492218, "logits/rejected": -0.030930712819099426, "logps/chosen": -2.270498275756836, "logps/rejected": -2.600602626800537, "loss": 2.3837, "nll_loss": 2.325883388519287, "rewards/accuracies": 0.75, "rewards/chosen": -0.2270498126745224, "rewards/margins": 0.033010438084602356, "rewards/rejected": -0.26006025075912476, "step": 209 }, { "epoch": 0.5749486652977412, "grad_norm": 5.444876194000244, "learning_rate": 7.123287671232876e-07, "log_odds_chosen": -0.7202804684638977, "log_odds_ratio": -1.3300820589065552, "logits/chosen": -0.022380106151103973, "logits/rejected": 0.02940603718161583, "logps/chosen": -4.069800853729248, "logps/rejected": -3.351992130279541, "loss": 2.6505, "nll_loss": 2.517528772354126, "rewards/accuracies": 0.25, "rewards/chosen": -0.40698012709617615, "rewards/margins": -0.07178092002868652, "rewards/rejected": -0.3351992070674896, "step": 210 }, { "epoch": 0.5776865160848734, "grad_norm": 5.224420070648193, "learning_rate": 7.109589041095891e-07, "log_odds_chosen": -0.9381393194198608, "log_odds_ratio": -1.4949214458465576, "logits/chosen": -0.05032486468553543, "logits/rejected": 0.021673373878002167, "logps/chosen": -4.4706711769104, "logps/rejected": -3.523010730743408, "loss": 2.682, "nll_loss": 2.532548427581787, "rewards/accuracies": 0.375, "rewards/chosen": -0.44706714153289795, "rewards/margins": -0.0947660505771637, "rewards/rejected": -0.35230109095573425, "step": 211 }, { "epoch": 0.5804243668720055, "grad_norm": 5.566216945648193, "learning_rate": 7.095890410958904e-07, "log_odds_chosen": -0.8985710740089417, "log_odds_ratio": -1.4362444877624512, "logits/chosen": 0.05827118456363678, "logits/rejected": 0.1442776620388031, "logps/chosen": -4.507694244384766, "logps/rejected": -3.6176671981811523, "loss": 2.7558, "nll_loss": 2.612217664718628, "rewards/accuracies": 0.375, "rewards/chosen": -0.45076942443847656, "rewards/margins": -0.08900273591279984, "rewards/rejected": -0.3617666959762573, "step": 212 }, { "epoch": 0.5831622176591376, "grad_norm": 4.92706823348999, "learning_rate": 7.082191780821918e-07, "log_odds_chosen": 0.32234102487564087, "log_odds_ratio": -0.6993477940559387, "logits/chosen": 0.004501551389694214, "logits/rejected": 0.025910528376698494, "logps/chosen": -3.236074447631836, "logps/rejected": -3.49320387840271, "loss": 2.5023, "nll_loss": 2.432374954223633, "rewards/accuracies": 0.625, "rewards/chosen": -0.3236074447631836, "rewards/margins": 0.02571295201778412, "rewards/rejected": -0.3493203818798065, "step": 213 }, { "epoch": 0.5859000684462696, "grad_norm": 5.146281719207764, "learning_rate": 7.068493150684931e-07, "log_odds_chosen": -0.5441873073577881, "log_odds_ratio": -1.184274673461914, "logits/chosen": -0.0167497918009758, "logits/rejected": 0.035420458763837814, "logps/chosen": -3.2501537799835205, "logps/rejected": -2.746094226837158, "loss": 2.6065, "nll_loss": 2.4880878925323486, "rewards/accuracies": 0.25, "rewards/chosen": -0.3250153660774231, "rewards/margins": -0.050405967980623245, "rewards/rejected": -0.27460944652557373, "step": 214 }, { "epoch": 0.5886379192334018, "grad_norm": 5.092358589172363, "learning_rate": 7.054794520547945e-07, "log_odds_chosen": 0.35117554664611816, "log_odds_ratio": -0.792178750038147, "logits/chosen": 0.23000556230545044, "logits/rejected": 0.1713697910308838, "logps/chosen": -3.3806722164154053, "logps/rejected": -3.72029447555542, "loss": 2.5007, "nll_loss": 2.421445369720459, "rewards/accuracies": 0.625, "rewards/chosen": -0.3380672037601471, "rewards/margins": 0.03396221995353699, "rewards/rejected": -0.3720294237136841, "step": 215 }, { "epoch": 0.5913757700205339, "grad_norm": 4.896887302398682, "learning_rate": 7.041095890410958e-07, "log_odds_chosen": -0.8846392631530762, "log_odds_ratio": -1.4222475290298462, "logits/chosen": 0.010059930384159088, "logits/rejected": -0.02775367721915245, "logps/chosen": -3.4881515502929688, "logps/rejected": -2.6234514713287354, "loss": 2.6255, "nll_loss": 2.483288288116455, "rewards/accuracies": 0.375, "rewards/chosen": -0.3488151431083679, "rewards/margins": -0.08647001534700394, "rewards/rejected": -0.26234516501426697, "step": 216 }, { "epoch": 0.5941136208076659, "grad_norm": 4.769819736480713, "learning_rate": 7.027397260273972e-07, "log_odds_chosen": -0.09030167758464813, "log_odds_ratio": -0.8799868822097778, "logits/chosen": -0.06859217584133148, "logits/rejected": -0.06643827259540558, "logps/chosen": -2.995481014251709, "logps/rejected": -2.9025025367736816, "loss": 2.5432, "nll_loss": 2.455183982849121, "rewards/accuracies": 0.375, "rewards/chosen": -0.29954811930656433, "rewards/margins": -0.009297840297222137, "rewards/rejected": -0.290250301361084, "step": 217 }, { "epoch": 0.5968514715947981, "grad_norm": 4.495776176452637, "learning_rate": 7.013698630136986e-07, "log_odds_chosen": -0.2489549219608307, "log_odds_ratio": -0.9730269312858582, "logits/chosen": 0.18297651410102844, "logits/rejected": 0.1819668561220169, "logps/chosen": -2.726717472076416, "logps/rejected": -2.451536178588867, "loss": 2.4439, "nll_loss": 2.3465518951416016, "rewards/accuracies": 0.375, "rewards/chosen": -0.27267175912857056, "rewards/margins": -0.02751813642680645, "rewards/rejected": -0.24515360593795776, "step": 218 }, { "epoch": 0.5995893223819302, "grad_norm": 4.795025825500488, "learning_rate": 7e-07, "log_odds_chosen": -0.9815844893455505, "log_odds_ratio": -1.399261474609375, "logits/chosen": 0.0442492850124836, "logits/rejected": 0.10816596448421478, "logps/chosen": -3.4513700008392334, "logps/rejected": -2.496610641479492, "loss": 2.6548, "nll_loss": 2.5148544311523438, "rewards/accuracies": 0.25, "rewards/chosen": -0.3451370298862457, "rewards/margins": -0.0954759418964386, "rewards/rejected": -0.24966108798980713, "step": 219 }, { "epoch": 0.6023271731690623, "grad_norm": 4.278154373168945, "learning_rate": 6.986301369863014e-07, "log_odds_chosen": -0.08185780048370361, "log_odds_ratio": -0.8619571924209595, "logits/chosen": 0.05853396654129028, "logits/rejected": -0.035963281989097595, "logps/chosen": -2.6130199432373047, "logps/rejected": -2.5199055671691895, "loss": 2.5767, "nll_loss": 2.49050235748291, "rewards/accuracies": 0.5, "rewards/chosen": -0.26130199432373047, "rewards/margins": -0.009311452507972717, "rewards/rejected": -0.25199055671691895, "step": 220 }, { "epoch": 0.6050650239561944, "grad_norm": 4.884463310241699, "learning_rate": 6.972602739726027e-07, "log_odds_chosen": -0.8247765302658081, "log_odds_ratio": -1.2995328903198242, "logits/chosen": -0.11857061088085175, "logits/rejected": -0.0898372232913971, "logps/chosen": -2.962923049926758, "logps/rejected": -2.199338436126709, "loss": 2.5081, "nll_loss": 2.3781075477600098, "rewards/accuracies": 0.25, "rewards/chosen": -0.2962923049926758, "rewards/margins": -0.07635847479104996, "rewards/rejected": -0.21993383765220642, "step": 221 }, { "epoch": 0.6078028747433265, "grad_norm": 4.603568077087402, "learning_rate": 6.958904109589041e-07, "log_odds_chosen": -0.49495163559913635, "log_odds_ratio": -1.4135432243347168, "logits/chosen": -0.0071433596312999725, "logits/rejected": -0.11174926161766052, "logps/chosen": -3.40264892578125, "logps/rejected": -2.89626145362854, "loss": 2.6017, "nll_loss": 2.4603304862976074, "rewards/accuracies": 0.5, "rewards/chosen": -0.3402648866176605, "rewards/margins": -0.05063874274492264, "rewards/rejected": -0.2896261513233185, "step": 222 }, { "epoch": 0.6105407255304586, "grad_norm": 5.637377738952637, "learning_rate": 6.945205479452054e-07, "log_odds_chosen": -1.1229054927825928, "log_odds_ratio": -1.6065648794174194, "logits/chosen": 0.035821333527565, "logits/rejected": 0.0966348648071289, "logps/chosen": -3.9978692531585693, "logps/rejected": -2.8861756324768066, "loss": 2.7146, "nll_loss": 2.553983211517334, "rewards/accuracies": 0.375, "rewards/chosen": -0.39978694915771484, "rewards/margins": -0.11116936802864075, "rewards/rejected": -0.2886175513267517, "step": 223 }, { "epoch": 0.6132785763175906, "grad_norm": 5.069491863250732, "learning_rate": 6.931506849315068e-07, "log_odds_chosen": -0.0026367604732513428, "log_odds_ratio": -0.8056624531745911, "logits/chosen": 0.17919668555259705, "logits/rejected": 0.13972757756710052, "logps/chosen": -3.145388126373291, "logps/rejected": -3.0985922813415527, "loss": 2.5973, "nll_loss": 2.516758918762207, "rewards/accuracies": 0.625, "rewards/chosen": -0.3145388066768646, "rewards/margins": -0.004679594188928604, "rewards/rejected": -0.3098592460155487, "step": 224 }, { "epoch": 0.6160164271047228, "grad_norm": 4.207461357116699, "learning_rate": 6.917808219178081e-07, "log_odds_chosen": 0.4255756139755249, "log_odds_ratio": -0.6054771542549133, "logits/chosen": -0.04342208802700043, "logits/rejected": -0.10433880239725113, "logps/chosen": -2.1399898529052734, "logps/rejected": -2.5161292552948, "loss": 2.3608, "nll_loss": 2.3002541065216064, "rewards/accuracies": 0.5, "rewards/chosen": -0.21399900317192078, "rewards/margins": 0.03761393949389458, "rewards/rejected": -0.25161293148994446, "step": 225 }, { "epoch": 0.6187542778918549, "grad_norm": 4.367359638214111, "learning_rate": 6.904109589041097e-07, "log_odds_chosen": -0.39474064111709595, "log_odds_ratio": -1.0286775827407837, "logits/chosen": -0.21970435976982117, "logits/rejected": -0.20612099766731262, "logps/chosen": -3.033529043197632, "logps/rejected": -2.6375036239624023, "loss": 2.4516, "nll_loss": 2.3487133979797363, "rewards/accuracies": 0.25, "rewards/chosen": -0.30335289239883423, "rewards/margins": -0.03960254788398743, "rewards/rejected": -0.2637503445148468, "step": 226 }, { "epoch": 0.621492128678987, "grad_norm": 5.068934440612793, "learning_rate": 6.89041095890411e-07, "log_odds_chosen": -0.0687209963798523, "log_odds_ratio": -0.8692194819450378, "logits/chosen": 0.09497439861297607, "logits/rejected": 0.14432509243488312, "logps/chosen": -3.0124361515045166, "logps/rejected": -2.9635064601898193, "loss": 2.4945, "nll_loss": 2.4075379371643066, "rewards/accuracies": 0.375, "rewards/chosen": -0.3012436330318451, "rewards/margins": -0.0048929620534181595, "rewards/rejected": -0.2963506579399109, "step": 227 }, { "epoch": 0.6242299794661191, "grad_norm": 4.577626705169678, "learning_rate": 6.876712328767123e-07, "log_odds_chosen": -0.02625662088394165, "log_odds_ratio": -0.9411875009536743, "logits/chosen": -0.11364158242940903, "logits/rejected": -0.08194661140441895, "logps/chosen": -2.842125177383423, "logps/rejected": -2.7702462673187256, "loss": 2.5196, "nll_loss": 2.4254794120788574, "rewards/accuracies": 0.5, "rewards/chosen": -0.28421252965927124, "rewards/margins": -0.0071878861635923386, "rewards/rejected": -0.27702462673187256, "step": 228 }, { "epoch": 0.6269678302532512, "grad_norm": 5.261784076690674, "learning_rate": 6.863013698630137e-07, "log_odds_chosen": -0.728386402130127, "log_odds_ratio": -1.2739462852478027, "logits/chosen": 0.17214325070381165, "logits/rejected": 0.21048542857170105, "logps/chosen": -3.882966995239258, "logps/rejected": -3.1877384185791016, "loss": 2.6656, "nll_loss": 2.538234233856201, "rewards/accuracies": 0.375, "rewards/chosen": -0.3882966935634613, "rewards/margins": -0.06952283531427383, "rewards/rejected": -0.31877386569976807, "step": 229 }, { "epoch": 0.6297056810403833, "grad_norm": 5.143325328826904, "learning_rate": 6.84931506849315e-07, "log_odds_chosen": -0.7179534435272217, "log_odds_ratio": -1.39925217628479, "logits/chosen": 0.055905453860759735, "logits/rejected": 0.11166197806596756, "logps/chosen": -3.82053279876709, "logps/rejected": -3.0749967098236084, "loss": 2.5891, "nll_loss": 2.4491732120513916, "rewards/accuracies": 0.5, "rewards/chosen": -0.38205331563949585, "rewards/margins": -0.07455362379550934, "rewards/rejected": -0.3074996769428253, "step": 230 }, { "epoch": 0.6324435318275154, "grad_norm": 4.973780155181885, "learning_rate": 6.835616438356164e-07, "log_odds_chosen": 0.8255993127822876, "log_odds_ratio": -0.8820317983627319, "logits/chosen": 0.16140997409820557, "logits/rejected": 0.09378485381603241, "logps/chosen": -3.0497851371765137, "logps/rejected": -3.857548475265503, "loss": 2.5353, "nll_loss": 2.447101593017578, "rewards/accuracies": 0.625, "rewards/chosen": -0.30497851967811584, "rewards/margins": 0.08077634871006012, "rewards/rejected": -0.38575488328933716, "step": 231 }, { "epoch": 0.6351813826146475, "grad_norm": 5.857908248901367, "learning_rate": 6.821917808219177e-07, "log_odds_chosen": -0.9766697883605957, "log_odds_ratio": -1.523526906967163, "logits/chosen": 0.011811915785074234, "logits/rejected": 0.09320055693387985, "logps/chosen": -4.4324951171875, "logps/rejected": -3.4229187965393066, "loss": 2.6433, "nll_loss": 2.490957260131836, "rewards/accuracies": 0.25, "rewards/chosen": -0.44324952363967896, "rewards/margins": -0.10095761716365814, "rewards/rejected": -0.3422918915748596, "step": 232 }, { "epoch": 0.6379192334017796, "grad_norm": 5.177812099456787, "learning_rate": 6.808219178082191e-07, "log_odds_chosen": -0.6854463815689087, "log_odds_ratio": -1.1385173797607422, "logits/chosen": 0.1209825798869133, "logits/rejected": 0.19607406854629517, "logps/chosen": -3.2679085731506348, "logps/rejected": -2.6177010536193848, "loss": 2.6081, "nll_loss": 2.494257926940918, "rewards/accuracies": 0.125, "rewards/chosen": -0.32679086923599243, "rewards/margins": -0.06502074003219604, "rewards/rejected": -0.2617701292037964, "step": 233 }, { "epoch": 0.6406570841889117, "grad_norm": 4.934125900268555, "learning_rate": 6.794520547945205e-07, "log_odds_chosen": -0.4428246021270752, "log_odds_ratio": -1.02138352394104, "logits/chosen": 0.0465245395898819, "logits/rejected": 0.0695435181260109, "logps/chosen": -2.7717010974884033, "logps/rejected": -2.354743003845215, "loss": 2.5815, "nll_loss": 2.479362964630127, "rewards/accuracies": 0.625, "rewards/chosen": -0.2771700918674469, "rewards/margins": -0.04169580712914467, "rewards/rejected": -0.23547430336475372, "step": 234 }, { "epoch": 0.6433949349760438, "grad_norm": 5.3110671043396, "learning_rate": 6.78082191780822e-07, "log_odds_chosen": -0.9543406963348389, "log_odds_ratio": -1.5009839534759521, "logits/chosen": 0.11150917410850525, "logits/rejected": 0.1842668205499649, "logps/chosen": -3.7570605278015137, "logps/rejected": -2.812906503677368, "loss": 2.7082, "nll_loss": 2.5581445693969727, "rewards/accuracies": 0.25, "rewards/chosen": -0.3757060766220093, "rewards/margins": -0.09441541135311127, "rewards/rejected": -0.2812906503677368, "step": 235 }, { "epoch": 0.6461327857631759, "grad_norm": 5.511343479156494, "learning_rate": 6.767123287671233e-07, "log_odds_chosen": -0.9143854379653931, "log_odds_ratio": -1.564393401145935, "logits/chosen": 0.1147187277674675, "logits/rejected": 0.16841277480125427, "logps/chosen": -3.6126182079315186, "logps/rejected": -2.6662678718566895, "loss": 2.6151, "nll_loss": 2.458667278289795, "rewards/accuracies": 0.375, "rewards/chosen": -0.3612618148326874, "rewards/margins": -0.0946350023150444, "rewards/rejected": -0.2666268050670624, "step": 236 }, { "epoch": 0.648870636550308, "grad_norm": 5.096794605255127, "learning_rate": 6.753424657534246e-07, "log_odds_chosen": -0.02740788459777832, "log_odds_ratio": -0.8997558355331421, "logits/chosen": -0.09336019307374954, "logits/rejected": -0.10445239394903183, "logps/chosen": -3.173326253890991, "logps/rejected": -3.11535906791687, "loss": 2.5527, "nll_loss": 2.462705135345459, "rewards/accuracies": 0.625, "rewards/chosen": -0.3173326253890991, "rewards/margins": -0.005796711891889572, "rewards/rejected": -0.31153589487075806, "step": 237 }, { "epoch": 0.6516084873374401, "grad_norm": 5.272287368774414, "learning_rate": 6.73972602739726e-07, "log_odds_chosen": 0.1397632360458374, "log_odds_ratio": -0.8075830936431885, "logits/chosen": 0.13099578022956848, "logits/rejected": 0.21442186832427979, "logps/chosen": -3.328632116317749, "logps/rejected": -3.459021806716919, "loss": 2.6098, "nll_loss": 2.5290188789367676, "rewards/accuracies": 0.5, "rewards/chosen": -0.3328631818294525, "rewards/margins": 0.01303897425532341, "rewards/rejected": -0.3459021747112274, "step": 238 }, { "epoch": 0.6543463381245722, "grad_norm": 4.95614767074585, "learning_rate": 6.726027397260273e-07, "log_odds_chosen": -0.31494832038879395, "log_odds_ratio": -0.9794392585754395, "logits/chosen": -0.04773079231381416, "logits/rejected": -0.08330851793289185, "logps/chosen": -3.23880672454834, "logps/rejected": -2.9017858505249023, "loss": 2.5375, "nll_loss": 2.4395835399627686, "rewards/accuracies": 0.5, "rewards/chosen": -0.3238806426525116, "rewards/margins": -0.03370208293199539, "rewards/rejected": -0.2901785671710968, "step": 239 }, { "epoch": 0.6570841889117043, "grad_norm": 4.243324279785156, "learning_rate": 6.712328767123287e-07, "log_odds_chosen": -0.08991982042789459, "log_odds_ratio": -0.814184308052063, "logits/chosen": 0.13363957405090332, "logits/rejected": 0.07057931274175644, "logps/chosen": -2.4763545989990234, "logps/rejected": -2.3748960494995117, "loss": 2.4617, "nll_loss": 2.3803279399871826, "rewards/accuracies": 0.625, "rewards/chosen": -0.24763545393943787, "rewards/margins": -0.010145829990506172, "rewards/rejected": -0.23748964071273804, "step": 240 }, { "epoch": 0.6598220396988365, "grad_norm": 4.893343925476074, "learning_rate": 6.6986301369863e-07, "log_odds_chosen": -1.302870512008667, "log_odds_ratio": -1.7597503662109375, "logits/chosen": 0.02562994882464409, "logits/rejected": 0.07116930931806564, "logps/chosen": -3.597940444946289, "logps/rejected": -2.328540563583374, "loss": 2.6488, "nll_loss": 2.472846508026123, "rewards/accuracies": 0.375, "rewards/chosen": -0.35979408025741577, "rewards/margins": -0.12694001197814941, "rewards/rejected": -0.23285405337810516, "step": 241 }, { "epoch": 0.6625598904859685, "grad_norm": 4.973079681396484, "learning_rate": 6.684931506849316e-07, "log_odds_chosen": -0.18834689259529114, "log_odds_ratio": -1.1994693279266357, "logits/chosen": -0.08539362251758575, "logits/rejected": -0.03089449554681778, "logps/chosen": -3.667734146118164, "logps/rejected": -3.4771816730499268, "loss": 2.5835, "nll_loss": 2.463542938232422, "rewards/accuracies": 0.375, "rewards/chosen": -0.36677345633506775, "rewards/margins": -0.01905527338385582, "rewards/rejected": -0.34771817922592163, "step": 242 }, { "epoch": 0.6652977412731006, "grad_norm": 5.498478889465332, "learning_rate": 6.671232876712329e-07, "log_odds_chosen": -0.6216822862625122, "log_odds_ratio": -1.2226052284240723, "logits/chosen": -0.028855837881565094, "logits/rejected": -0.025220729410648346, "logps/chosen": -4.143547058105469, "logps/rejected": -3.5080957412719727, "loss": 2.6342, "nll_loss": 2.511918067932129, "rewards/accuracies": 0.25, "rewards/chosen": -0.41435471177101135, "rewards/margins": -0.06354516744613647, "rewards/rejected": -0.35080957412719727, "step": 243 }, { "epoch": 0.6680355920602327, "grad_norm": 5.180506229400635, "learning_rate": 6.657534246575343e-07, "log_odds_chosen": -0.8445864319801331, "log_odds_ratio": -1.3588154315948486, "logits/chosen": 0.0399240180850029, "logits/rejected": 0.042666904628276825, "logps/chosen": -3.6566519737243652, "logps/rejected": -2.812948226928711, "loss": 2.5631, "nll_loss": 2.427196979522705, "rewards/accuracies": 0.375, "rewards/chosen": -0.3656651973724365, "rewards/margins": -0.08437040448188782, "rewards/rejected": -0.2812948226928711, "step": 244 }, { "epoch": 0.6707734428473648, "grad_norm": 4.4903178215026855, "learning_rate": 6.643835616438356e-07, "log_odds_chosen": -0.44873738288879395, "log_odds_ratio": -1.0548312664031982, "logits/chosen": 0.07587964832782745, "logits/rejected": 0.14429041743278503, "logps/chosen": -2.731480598449707, "logps/rejected": -2.2800512313842773, "loss": 2.5304, "nll_loss": 2.4249215126037598, "rewards/accuracies": 0.5, "rewards/chosen": -0.2731480896472931, "rewards/margins": -0.04514295607805252, "rewards/rejected": -0.22800512611865997, "step": 245 }, { "epoch": 0.6735112936344969, "grad_norm": 4.5725250244140625, "learning_rate": 6.63013698630137e-07, "log_odds_chosen": -0.1818137764930725, "log_odds_ratio": -0.9741560816764832, "logits/chosen": -0.12933403253555298, "logits/rejected": -0.14489741623401642, "logps/chosen": -2.953366756439209, "logps/rejected": -2.7463278770446777, "loss": 2.4546, "nll_loss": 2.3571817874908447, "rewards/accuracies": 0.375, "rewards/chosen": -0.29533666372299194, "rewards/margins": -0.020703861489892006, "rewards/rejected": -0.2746328115463257, "step": 246 }, { "epoch": 0.676249144421629, "grad_norm": 5.198054790496826, "learning_rate": 6.616438356164383e-07, "log_odds_chosen": -1.8086977005004883, "log_odds_ratio": -2.1203644275665283, "logits/chosen": 0.04603598266839981, "logits/rejected": 0.033513493835926056, "logps/chosen": -4.232247829437256, "logps/rejected": -2.478642463684082, "loss": 2.6291, "nll_loss": 2.417079448699951, "rewards/accuracies": 0.25, "rewards/chosen": -0.4232247769832611, "rewards/margins": -0.1753605306148529, "rewards/rejected": -0.2478642463684082, "step": 247 }, { "epoch": 0.6789869952087612, "grad_norm": 4.819722652435303, "learning_rate": 6.602739726027396e-07, "log_odds_chosen": -0.6560128927230835, "log_odds_ratio": -1.2587296962738037, "logits/chosen": 0.006307970732450485, "logits/rejected": 0.0019667409360408783, "logps/chosen": -2.6578540802001953, "logps/rejected": -2.049960136413574, "loss": 2.5723, "nll_loss": 2.446427345275879, "rewards/accuracies": 0.5, "rewards/chosen": -0.26578542590141296, "rewards/margins": -0.06078939884901047, "rewards/rejected": -0.2049960494041443, "step": 248 }, { "epoch": 0.6817248459958932, "grad_norm": 4.689973831176758, "learning_rate": 6.58904109589041e-07, "log_odds_chosen": -0.20994029939174652, "log_odds_ratio": -0.8327036499977112, "logits/chosen": 0.06593801826238632, "logits/rejected": 0.04547547549009323, "logps/chosen": -3.159902572631836, "logps/rejected": -2.9415457248687744, "loss": 2.4638, "nll_loss": 2.380502700805664, "rewards/accuracies": 0.375, "rewards/chosen": -0.31599023938179016, "rewards/margins": -0.021835675463080406, "rewards/rejected": -0.2941545844078064, "step": 249 }, { "epoch": 0.6844626967830253, "grad_norm": 4.076164245605469, "learning_rate": 6.575342465753423e-07, "log_odds_chosen": 0.6720829606056213, "log_odds_ratio": -0.5038990378379822, "logits/chosen": 0.2524539530277252, "logits/rejected": 0.18504521250724792, "logps/chosen": -1.5330722332000732, "logps/rejected": -2.112656354904175, "loss": 2.3467, "nll_loss": 2.296325206756592, "rewards/accuracies": 0.75, "rewards/chosen": -0.1533072292804718, "rewards/margins": 0.057958412915468216, "rewards/rejected": -0.2112656533718109, "step": 250 }, { "epoch": 0.6872005475701575, "grad_norm": 4.784945011138916, "learning_rate": 6.561643835616439e-07, "log_odds_chosen": 0.4486689865589142, "log_odds_ratio": -0.5326199531555176, "logits/chosen": -0.09825973957777023, "logits/rejected": -0.054362647235393524, "logps/chosen": -2.4640793800354004, "logps/rejected": -2.86727237701416, "loss": 2.4198, "nll_loss": 2.366565227508545, "rewards/accuracies": 0.75, "rewards/chosen": -0.24640795588493347, "rewards/margins": 0.040319256484508514, "rewards/rejected": -0.2867271900177002, "step": 251 }, { "epoch": 0.6899383983572895, "grad_norm": 4.1262712478637695, "learning_rate": 6.547945205479452e-07, "log_odds_chosen": -0.12096546590328217, "log_odds_ratio": -0.8541849255561829, "logits/chosen": -0.017267301678657532, "logits/rejected": -0.0658317357301712, "logps/chosen": -2.5908565521240234, "logps/rejected": -2.4546008110046387, "loss": 2.4032, "nll_loss": 2.317767858505249, "rewards/accuracies": 0.375, "rewards/chosen": -0.25908565521240234, "rewards/margins": -0.01362556777894497, "rewards/rejected": -0.24546009302139282, "step": 252 }, { "epoch": 0.6926762491444216, "grad_norm": 4.7536492347717285, "learning_rate": 6.534246575342466e-07, "log_odds_chosen": -0.05135500431060791, "log_odds_ratio": -0.9071228504180908, "logits/chosen": -0.10200881958007812, "logits/rejected": -0.14349958300590515, "logps/chosen": -3.253169536590576, "logps/rejected": -3.146576166152954, "loss": 2.4789, "nll_loss": 2.3882107734680176, "rewards/accuracies": 0.625, "rewards/chosen": -0.3253169655799866, "rewards/margins": -0.0106593556702137, "rewards/rejected": -0.31465762853622437, "step": 253 }, { "epoch": 0.6954140999315537, "grad_norm": 5.859878063201904, "learning_rate": 6.520547945205479e-07, "log_odds_chosen": -0.6564053893089294, "log_odds_ratio": -1.4981660842895508, "logits/chosen": 0.13688607513904572, "logits/rejected": 0.24432089924812317, "logps/chosen": -4.003457546234131, "logps/rejected": -3.3245303630828857, "loss": 2.6808, "nll_loss": 2.5309457778930664, "rewards/accuracies": 0.25, "rewards/chosen": -0.4003457725048065, "rewards/margins": -0.06789270043373108, "rewards/rejected": -0.33245304226875305, "step": 254 }, { "epoch": 0.6981519507186859, "grad_norm": 4.999378204345703, "learning_rate": 6.506849315068493e-07, "log_odds_chosen": -0.7535296082496643, "log_odds_ratio": -1.242681860923767, "logits/chosen": -0.085236094892025, "logits/rejected": -0.10006479173898697, "logps/chosen": -3.5029473304748535, "logps/rejected": -2.75996732711792, "loss": 2.6172, "nll_loss": 2.4929733276367188, "rewards/accuracies": 0.125, "rewards/chosen": -0.3502947688102722, "rewards/margins": -0.07429802417755127, "rewards/rejected": -0.27599674463272095, "step": 255 }, { "epoch": 0.7008898015058179, "grad_norm": 5.243526458740234, "learning_rate": 6.493150684931506e-07, "log_odds_chosen": -0.06602996587753296, "log_odds_ratio": -0.814079761505127, "logits/chosen": 0.011560364626348019, "logits/rejected": 0.060998693108558655, "logps/chosen": -3.41300106048584, "logps/rejected": -3.3090667724609375, "loss": 2.5604, "nll_loss": 2.478989601135254, "rewards/accuracies": 0.375, "rewards/chosen": -0.3413001000881195, "rewards/margins": -0.010393454693257809, "rewards/rejected": -0.3309066593647003, "step": 256 }, { "epoch": 0.70362765229295, "grad_norm": 4.2953338623046875, "learning_rate": 6.479452054794519e-07, "log_odds_chosen": -0.41074007749557495, "log_odds_ratio": -0.9806354641914368, "logits/chosen": -0.03916022926568985, "logits/rejected": -0.0406041257083416, "logps/chosen": -2.6973915100097656, "logps/rejected": -2.3061225414276123, "loss": 2.4114, "nll_loss": 2.3133838176727295, "rewards/accuracies": 0.375, "rewards/chosen": -0.26973915100097656, "rewards/margins": -0.03912688419222832, "rewards/rejected": -0.23061226308345795, "step": 257 }, { "epoch": 0.7063655030800822, "grad_norm": 4.117415904998779, "learning_rate": 6.465753424657535e-07, "log_odds_chosen": 0.13378334045410156, "log_odds_ratio": -0.9081048369407654, "logits/chosen": 0.05803064629435539, "logits/rejected": 0.028830867260694504, "logps/chosen": -2.4578537940979004, "logps/rejected": -2.5433950424194336, "loss": 2.3104, "nll_loss": 2.219574213027954, "rewards/accuracies": 0.625, "rewards/chosen": -0.24578538537025452, "rewards/margins": 0.008554138243198395, "rewards/rejected": -0.2543395161628723, "step": 258 }, { "epoch": 0.7091033538672142, "grad_norm": 5.487018585205078, "learning_rate": 6.452054794520548e-07, "log_odds_chosen": -1.2689862251281738, "log_odds_ratio": -1.6068130731582642, "logits/chosen": 0.03011230006814003, "logits/rejected": 0.09672123193740845, "logps/chosen": -4.393725395202637, "logps/rejected": -3.154423952102661, "loss": 2.7232, "nll_loss": 2.562497138977051, "rewards/accuracies": 0.125, "rewards/chosen": -0.4393725097179413, "rewards/margins": -0.12393013387918472, "rewards/rejected": -0.31544238328933716, "step": 259 }, { "epoch": 0.7118412046543463, "grad_norm": 4.6822919845581055, "learning_rate": 6.438356164383562e-07, "log_odds_chosen": 0.4101831614971161, "log_odds_ratio": -0.599892795085907, "logits/chosen": 0.1118047684431076, "logits/rejected": 0.17961010336875916, "logps/chosen": -2.4548428058624268, "logps/rejected": -2.8050615787506104, "loss": 2.4306, "nll_loss": 2.3705623149871826, "rewards/accuracies": 0.875, "rewards/chosen": -0.24548429250717163, "rewards/margins": 0.035021863877773285, "rewards/rejected": -0.2805061638355255, "step": 260 }, { "epoch": 0.7145790554414785, "grad_norm": 5.490548133850098, "learning_rate": 6.424657534246575e-07, "log_odds_chosen": -0.5271972417831421, "log_odds_ratio": -1.2030560970306396, "logits/chosen": -0.040312111377716064, "logits/rejected": 0.021050887182354927, "logps/chosen": -3.845855474472046, "logps/rejected": -3.31992244720459, "loss": 2.6191, "nll_loss": 2.498746156692505, "rewards/accuracies": 0.5, "rewards/chosen": -0.38458555936813354, "rewards/margins": -0.05259328335523605, "rewards/rejected": -0.3319922685623169, "step": 261 }, { "epoch": 0.7173169062286106, "grad_norm": 4.907824993133545, "learning_rate": 6.410958904109589e-07, "log_odds_chosen": -0.5863639116287231, "log_odds_ratio": -1.1674158573150635, "logits/chosen": 0.018329758197069168, "logits/rejected": 0.025641847401857376, "logps/chosen": -3.2147390842437744, "logps/rejected": -2.6253983974456787, "loss": 2.4419, "nll_loss": 2.3251373767852783, "rewards/accuracies": 0.375, "rewards/chosen": -0.3214738965034485, "rewards/margins": -0.058934055268764496, "rewards/rejected": -0.2625398635864258, "step": 262 }, { "epoch": 0.7200547570157426, "grad_norm": 4.413336277008057, "learning_rate": 6.397260273972602e-07, "log_odds_chosen": -0.4279462397098541, "log_odds_ratio": -1.1752336025238037, "logits/chosen": -0.03971664234995842, "logits/rejected": -0.13199643790721893, "logps/chosen": -2.9677734375, "logps/rejected": -2.545456886291504, "loss": 2.4795, "nll_loss": 2.3619370460510254, "rewards/accuracies": 0.25, "rewards/chosen": -0.2967773377895355, "rewards/margins": -0.042231664061546326, "rewards/rejected": -0.2545456886291504, "step": 263 }, { "epoch": 0.7227926078028748, "grad_norm": 6.008773326873779, "learning_rate": 6.383561643835616e-07, "log_odds_chosen": -0.8124847412109375, "log_odds_ratio": -1.275472640991211, "logits/chosen": 0.13663597404956818, "logits/rejected": 0.21812930703163147, "logps/chosen": -4.041656494140625, "logps/rejected": -3.232344388961792, "loss": 2.6082, "nll_loss": 2.480694055557251, "rewards/accuracies": 0.125, "rewards/chosen": -0.404165655374527, "rewards/margins": -0.08093120157718658, "rewards/rejected": -0.3232344388961792, "step": 264 }, { "epoch": 0.7255304585900069, "grad_norm": 5.957432746887207, "learning_rate": 6.369863013698629e-07, "log_odds_chosen": -1.1945117712020874, "log_odds_ratio": -1.6556203365325928, "logits/chosen": 0.06650920957326889, "logits/rejected": 0.14928501844406128, "logps/chosen": -4.653295040130615, "logps/rejected": -3.463467597961426, "loss": 2.7095, "nll_loss": 2.543966770172119, "rewards/accuracies": 0.375, "rewards/chosen": -0.46532949805259705, "rewards/margins": -0.11898273229598999, "rewards/rejected": -0.34634676575660706, "step": 265 }, { "epoch": 0.7282683093771389, "grad_norm": 5.225288391113281, "learning_rate": 6.356164383561645e-07, "log_odds_chosen": 0.5597747564315796, "log_odds_ratio": -0.6020723581314087, "logits/chosen": 0.2207382470369339, "logits/rejected": 0.21145404875278473, "logps/chosen": -2.9415342807769775, "logps/rejected": -3.4528274536132812, "loss": 2.5063, "nll_loss": 2.4460763931274414, "rewards/accuracies": 0.75, "rewards/chosen": -0.29415345191955566, "rewards/margins": 0.051129300147295, "rewards/rejected": -0.34528273344039917, "step": 266 }, { "epoch": 0.731006160164271, "grad_norm": 4.630634784698486, "learning_rate": 6.342465753424658e-07, "log_odds_chosen": -0.051216915249824524, "log_odds_ratio": -0.8129382729530334, "logits/chosen": -0.08536396920681, "logits/rejected": -0.0812244638800621, "logps/chosen": -2.705653190612793, "logps/rejected": -2.658048391342163, "loss": 2.5442, "nll_loss": 2.4629364013671875, "rewards/accuracies": 0.5, "rewards/chosen": -0.27056533098220825, "rewards/margins": -0.00476047582924366, "rewards/rejected": -0.26580485701560974, "step": 267 }, { "epoch": 0.7337440109514032, "grad_norm": 5.596179485321045, "learning_rate": 6.328767123287671e-07, "log_odds_chosen": -1.0874063968658447, "log_odds_ratio": -1.548377513885498, "logits/chosen": 0.013192698359489441, "logits/rejected": 0.09842094033956528, "logps/chosen": -4.036106586456299, "logps/rejected": -2.9482150077819824, "loss": 2.6639, "nll_loss": 2.5091090202331543, "rewards/accuracies": 0.25, "rewards/chosen": -0.4036106765270233, "rewards/margins": -0.10878914594650269, "rewards/rejected": -0.29482153058052063, "step": 268 }, { "epoch": 0.7364818617385352, "grad_norm": 5.008697032928467, "learning_rate": 6.315068493150685e-07, "log_odds_chosen": 0.6385722756385803, "log_odds_ratio": -0.7551898956298828, "logits/chosen": -0.09328697621822357, "logits/rejected": -0.12504838407039642, "logps/chosen": -3.079742908477783, "logps/rejected": -3.6418967247009277, "loss": 2.4668, "nll_loss": 2.3912720680236816, "rewards/accuracies": 0.625, "rewards/chosen": -0.30797427892684937, "rewards/margins": 0.056215398013591766, "rewards/rejected": -0.3641897141933441, "step": 269 }, { "epoch": 0.7392197125256673, "grad_norm": 4.657661437988281, "learning_rate": 6.301369863013698e-07, "log_odds_chosen": 0.006682500243186951, "log_odds_ratio": -0.7860355377197266, "logits/chosen": 0.17804022133350372, "logits/rejected": 0.18920959532260895, "logps/chosen": -2.6118109226226807, "logps/rejected": -2.5890045166015625, "loss": 2.4109, "nll_loss": 2.332343101501465, "rewards/accuracies": 0.5, "rewards/chosen": -0.2611810863018036, "rewards/margins": -0.0022806432098150253, "rewards/rejected": -0.2589004635810852, "step": 270 }, { "epoch": 0.7419575633127995, "grad_norm": 5.350391387939453, "learning_rate": 6.287671232876712e-07, "log_odds_chosen": -0.350725919008255, "log_odds_ratio": -1.059503197669983, "logits/chosen": 0.004890406504273415, "logits/rejected": 0.017158225178718567, "logps/chosen": -3.246616840362549, "logps/rejected": -2.888737201690674, "loss": 2.5388, "nll_loss": 2.4328439235687256, "rewards/accuracies": 0.25, "rewards/chosen": -0.3246616721153259, "rewards/margins": -0.03578794747591019, "rewards/rejected": -0.28887373208999634, "step": 271 }, { "epoch": 0.7446954140999316, "grad_norm": 4.859589099884033, "learning_rate": 6.273972602739725e-07, "log_odds_chosen": -0.00597420334815979, "log_odds_ratio": -1.0293700695037842, "logits/chosen": -0.01828036829829216, "logits/rejected": -0.0025422777980566025, "logps/chosen": -3.389308214187622, "logps/rejected": -3.3730664253234863, "loss": 2.4539, "nll_loss": 2.3509278297424316, "rewards/accuracies": 0.25, "rewards/chosen": -0.33893081545829773, "rewards/margins": -0.001624174416065216, "rewards/rejected": -0.3373066484928131, "step": 272 }, { "epoch": 0.7474332648870636, "grad_norm": 4.102257251739502, "learning_rate": 6.260273972602739e-07, "log_odds_chosen": 0.2521878480911255, "log_odds_ratio": -0.649282693862915, "logits/chosen": -0.015790380537509918, "logits/rejected": -0.11974681168794632, "logps/chosen": -2.5819313526153564, "logps/rejected": -2.8105430603027344, "loss": 2.3458, "nll_loss": 2.280858039855957, "rewards/accuracies": 0.625, "rewards/chosen": -0.25819313526153564, "rewards/margins": 0.02286117896437645, "rewards/rejected": -0.2810543179512024, "step": 273 }, { "epoch": 0.7501711156741958, "grad_norm": 5.602250099182129, "learning_rate": 6.246575342465754e-07, "log_odds_chosen": -0.8059207201004028, "log_odds_ratio": -1.2356117963790894, "logits/chosen": 0.12328280508518219, "logits/rejected": 0.181259423494339, "logps/chosen": -3.949251174926758, "logps/rejected": -3.1630942821502686, "loss": 2.5927, "nll_loss": 2.4691507816314697, "rewards/accuracies": 0.125, "rewards/chosen": -0.3949251174926758, "rewards/margins": -0.0786156877875328, "rewards/rejected": -0.31630945205688477, "step": 274 }, { "epoch": 0.7529089664613279, "grad_norm": 4.863170146942139, "learning_rate": 6.232876712328768e-07, "log_odds_chosen": 0.8995407223701477, "log_odds_ratio": -0.9453299045562744, "logits/chosen": 0.014537274837493896, "logits/rejected": -0.13086646795272827, "logps/chosen": -3.160731077194214, "logps/rejected": -3.985992431640625, "loss": 2.4332, "nll_loss": 2.3386905193328857, "rewards/accuracies": 0.5, "rewards/chosen": -0.31607311964035034, "rewards/margins": 0.08252615481615067, "rewards/rejected": -0.3985992670059204, "step": 275 }, { "epoch": 0.75564681724846, "grad_norm": 5.301851272583008, "learning_rate": 6.219178082191781e-07, "log_odds_chosen": -0.16485744714736938, "log_odds_ratio": -1.0617129802703857, "logits/chosen": 0.24444977939128876, "logits/rejected": 0.19750769436359406, "logps/chosen": -3.2442352771759033, "logps/rejected": -3.0705385208129883, "loss": 2.4988, "nll_loss": 2.3926405906677246, "rewards/accuracies": 0.5, "rewards/chosen": -0.32442355155944824, "rewards/margins": -0.017369670793414116, "rewards/rejected": -0.3070538640022278, "step": 276 }, { "epoch": 0.758384668035592, "grad_norm": 5.482330322265625, "learning_rate": 6.205479452054794e-07, "log_odds_chosen": -0.35675445199012756, "log_odds_ratio": -1.1690130233764648, "logits/chosen": 0.009800916537642479, "logits/rejected": -0.005710981786251068, "logps/chosen": -3.850849151611328, "logps/rejected": -3.502582550048828, "loss": 2.5793, "nll_loss": 2.4623894691467285, "rewards/accuracies": 0.25, "rewards/chosen": -0.38508492708206177, "rewards/margins": -0.034826673567295074, "rewards/rejected": -0.3502582609653473, "step": 277 }, { "epoch": 0.7611225188227242, "grad_norm": 5.302452564239502, "learning_rate": 6.191780821917808e-07, "log_odds_chosen": 0.25523287057876587, "log_odds_ratio": -0.7524204254150391, "logits/chosen": 0.18045896291732788, "logits/rejected": 0.1075131893157959, "logps/chosen": -2.765738010406494, "logps/rejected": -3.01051664352417, "loss": 2.488, "nll_loss": 2.412797212600708, "rewards/accuracies": 0.5, "rewards/chosen": -0.2765737771987915, "rewards/margins": 0.024477874860167503, "rewards/rejected": -0.30105167627334595, "step": 278 }, { "epoch": 0.7638603696098563, "grad_norm": 5.110561847686768, "learning_rate": 6.178082191780821e-07, "log_odds_chosen": -1.153632640838623, "log_odds_ratio": -1.607186198234558, "logits/chosen": -0.07597219944000244, "logits/rejected": -0.01535467617213726, "logps/chosen": -3.919712781906128, "logps/rejected": -2.786058187484741, "loss": 2.5821, "nll_loss": 2.4214091300964355, "rewards/accuracies": 0.5, "rewards/chosen": -0.39197129011154175, "rewards/margins": -0.11336544156074524, "rewards/rejected": -0.2786058187484741, "step": 279 }, { "epoch": 0.7665982203969883, "grad_norm": 5.882496356964111, "learning_rate": 6.164383561643835e-07, "log_odds_chosen": -0.017168134450912476, "log_odds_ratio": -0.7687655091285706, "logits/chosen": 0.11862994730472565, "logits/rejected": 0.1885986328125, "logps/chosen": -4.53303337097168, "logps/rejected": -4.515741348266602, "loss": 2.6395, "nll_loss": 2.562621593475342, "rewards/accuracies": 0.375, "rewards/chosen": -0.45330333709716797, "rewards/margins": -0.0017292331904172897, "rewards/rejected": -0.4515741169452667, "step": 280 }, { "epoch": 0.7693360711841205, "grad_norm": 4.07002067565918, "learning_rate": 6.150684931506848e-07, "log_odds_chosen": 0.27963802218437195, "log_odds_ratio": -0.6714184880256653, "logits/chosen": 0.013174034655094147, "logits/rejected": 0.02797350287437439, "logps/chosen": -2.054170608520508, "logps/rejected": -2.31955885887146, "loss": 2.308, "nll_loss": 2.2408900260925293, "rewards/accuracies": 0.5, "rewards/chosen": -0.20541706681251526, "rewards/margins": 0.02653881348669529, "rewards/rejected": -0.231955885887146, "step": 281 }, { "epoch": 0.7720739219712526, "grad_norm": 3.9717628955841064, "learning_rate": 6.136986301369864e-07, "log_odds_chosen": 0.48539188504219055, "log_odds_ratio": -0.6426748037338257, "logits/chosen": 0.2482144683599472, "logits/rejected": 0.17099612951278687, "logps/chosen": -2.1998586654663086, "logps/rejected": -2.6249678134918213, "loss": 2.3266, "nll_loss": 2.2623066902160645, "rewards/accuracies": 0.875, "rewards/chosen": -0.21998587250709534, "rewards/margins": 0.042510900646448135, "rewards/rejected": -0.2624967694282532, "step": 282 }, { "epoch": 0.7748117727583846, "grad_norm": 4.329807281494141, "learning_rate": 6.123287671232877e-07, "log_odds_chosen": 0.08154270052909851, "log_odds_ratio": -0.8325246572494507, "logits/chosen": 0.02031555585563183, "logits/rejected": -0.013957394286990166, "logps/chosen": -3.077521800994873, "logps/rejected": -3.127079725265503, "loss": 2.4363, "nll_loss": 2.35302996635437, "rewards/accuracies": 0.5, "rewards/chosen": -0.30775222182273865, "rewards/margins": 0.00495578907430172, "rewards/rejected": -0.31270796060562134, "step": 283 }, { "epoch": 0.7775496235455168, "grad_norm": 5.073023796081543, "learning_rate": 6.109589041095891e-07, "log_odds_chosen": -0.16050808131694794, "log_odds_ratio": -0.8030011653900146, "logits/chosen": -0.08691301941871643, "logits/rejected": -0.0951288715004921, "logps/chosen": -2.778270721435547, "logps/rejected": -2.6156246662139893, "loss": 2.4775, "nll_loss": 2.3971612453460693, "rewards/accuracies": 0.5, "rewards/chosen": -0.27782705426216125, "rewards/margins": -0.016264598816633224, "rewards/rejected": -0.2615624666213989, "step": 284 }, { "epoch": 0.7802874743326489, "grad_norm": 4.366613864898682, "learning_rate": 6.095890410958904e-07, "log_odds_chosen": 0.9148069620132446, "log_odds_ratio": -0.7951111793518066, "logits/chosen": 0.02127496898174286, "logits/rejected": -0.06821198761463165, "logps/chosen": -2.7663679122924805, "logps/rejected": -3.675036668777466, "loss": 2.415, "nll_loss": 2.3354644775390625, "rewards/accuracies": 0.625, "rewards/chosen": -0.2766368091106415, "rewards/margins": 0.09086690843105316, "rewards/rejected": -0.36750370264053345, "step": 285 }, { "epoch": 0.783025325119781, "grad_norm": 5.009302139282227, "learning_rate": 6.082191780821918e-07, "log_odds_chosen": -0.5509586334228516, "log_odds_ratio": -1.1373780965805054, "logits/chosen": -0.028641484677791595, "logits/rejected": 0.016280926764011383, "logps/chosen": -3.1224794387817383, "logps/rejected": -2.570261001586914, "loss": 2.4853, "nll_loss": 2.3715322017669678, "rewards/accuracies": 0.5, "rewards/chosen": -0.3122479319572449, "rewards/margins": -0.05522181838750839, "rewards/rejected": -0.2570261061191559, "step": 286 }, { "epoch": 0.785763175906913, "grad_norm": 5.525331497192383, "learning_rate": 6.068493150684931e-07, "log_odds_chosen": -1.2448863983154297, "log_odds_ratio": -1.745962142944336, "logits/chosen": -0.09726065397262573, "logits/rejected": -0.07490905374288559, "logps/chosen": -3.871507167816162, "logps/rejected": -2.6520156860351562, "loss": 2.5762, "nll_loss": 2.40163516998291, "rewards/accuracies": 0.25, "rewards/chosen": -0.38715070486068726, "rewards/margins": -0.12194909900426865, "rewards/rejected": -0.2652015686035156, "step": 287 }, { "epoch": 0.7885010266940452, "grad_norm": 4.7197465896606445, "learning_rate": 6.054794520547944e-07, "log_odds_chosen": -0.9225754737854004, "log_odds_ratio": -1.4056127071380615, "logits/chosen": 0.015226287767291069, "logits/rejected": 0.06905457377433777, "logps/chosen": -3.3585362434387207, "logps/rejected": -2.4862818717956543, "loss": 2.542, "nll_loss": 2.4014804363250732, "rewards/accuracies": 0.25, "rewards/chosen": -0.335853636264801, "rewards/margins": -0.08722544461488724, "rewards/rejected": -0.2486281841993332, "step": 288 }, { "epoch": 0.7912388774811773, "grad_norm": 4.454334735870361, "learning_rate": 6.041095890410958e-07, "log_odds_chosen": -0.33523404598236084, "log_odds_ratio": -0.9805834293365479, "logits/chosen": 0.18846753239631653, "logits/rejected": 0.13166026771068573, "logps/chosen": -2.632218360900879, "logps/rejected": -2.2942450046539307, "loss": 2.4674, "nll_loss": 2.3693203926086426, "rewards/accuracies": 0.375, "rewards/chosen": -0.2632218599319458, "rewards/margins": -0.03379736840724945, "rewards/rejected": -0.22942449152469635, "step": 289 }, { "epoch": 0.7939767282683093, "grad_norm": 5.964893817901611, "learning_rate": 6.027397260273972e-07, "log_odds_chosen": -0.72393399477005, "log_odds_ratio": -1.200779676437378, "logits/chosen": 0.1295369267463684, "logits/rejected": 0.18835893273353577, "logps/chosen": -3.9324755668640137, "logps/rejected": -3.2315711975097656, "loss": 2.5943, "nll_loss": 2.4742536544799805, "rewards/accuracies": 0.375, "rewards/chosen": -0.3932475745677948, "rewards/margins": -0.0700903981924057, "rewards/rejected": -0.3231571316719055, "step": 290 }, { "epoch": 0.7967145790554415, "grad_norm": 4.771971702575684, "learning_rate": 6.013698630136987e-07, "log_odds_chosen": 0.6690773367881775, "log_odds_ratio": -0.6582130193710327, "logits/chosen": 0.0551299974322319, "logits/rejected": 0.010655827820301056, "logps/chosen": -3.0226926803588867, "logps/rejected": -3.653472423553467, "loss": 2.341, "nll_loss": 2.2752251625061035, "rewards/accuracies": 0.5, "rewards/chosen": -0.3022692799568176, "rewards/margins": 0.06307797133922577, "rewards/rejected": -0.3653472661972046, "step": 291 }, { "epoch": 0.7994524298425736, "grad_norm": 5.763476848602295, "learning_rate": 6e-07, "log_odds_chosen": -0.978889524936676, "log_odds_ratio": -1.412625789642334, "logits/chosen": 0.12539520859718323, "logits/rejected": 0.1630793809890747, "logps/chosen": -3.8070197105407715, "logps/rejected": -2.843020439147949, "loss": 2.5665, "nll_loss": 2.425201177597046, "rewards/accuracies": 0.125, "rewards/chosen": -0.3807019889354706, "rewards/margins": -0.09639997780323029, "rewards/rejected": -0.2843020558357239, "step": 292 }, { "epoch": 0.8021902806297057, "grad_norm": 5.223800182342529, "learning_rate": 5.986301369863014e-07, "log_odds_chosen": -0.46508660912513733, "log_odds_ratio": -1.2016397714614868, "logits/chosen": -0.05964890122413635, "logits/rejected": -0.07051603496074677, "logps/chosen": -3.5616888999938965, "logps/rejected": -3.106105327606201, "loss": 2.5175, "nll_loss": 2.397372245788574, "rewards/accuracies": 0.5, "rewards/chosen": -0.3561689257621765, "rewards/margins": -0.04555835947394371, "rewards/rejected": -0.3106105327606201, "step": 293 }, { "epoch": 0.8049281314168378, "grad_norm": 4.278611183166504, "learning_rate": 5.972602739726027e-07, "log_odds_chosen": 0.5784717202186584, "log_odds_ratio": -0.6599467992782593, "logits/chosen": 0.20962825417518616, "logits/rejected": 0.07676893472671509, "logps/chosen": -2.3236820697784424, "logps/rejected": -2.8496620655059814, "loss": 2.3833, "nll_loss": 2.3173272609710693, "rewards/accuracies": 0.625, "rewards/chosen": -0.23236820101737976, "rewards/margins": 0.05259798839688301, "rewards/rejected": -0.28496620059013367, "step": 294 }, { "epoch": 0.8076659822039699, "grad_norm": 5.107121467590332, "learning_rate": 5.958904109589041e-07, "log_odds_chosen": 0.05096369981765747, "log_odds_ratio": -0.782370924949646, "logits/chosen": 0.026670459657907486, "logits/rejected": -0.063239187002182, "logps/chosen": -3.0275936126708984, "logps/rejected": -3.0612692832946777, "loss": 2.4085, "nll_loss": 2.3302412033081055, "rewards/accuracies": 0.5, "rewards/chosen": -0.3027593493461609, "rewards/margins": 0.0033675823360681534, "rewards/rejected": -0.3061269521713257, "step": 295 }, { "epoch": 0.810403832991102, "grad_norm": 4.952361106872559, "learning_rate": 5.945205479452054e-07, "log_odds_chosen": -0.40552765130996704, "log_odds_ratio": -1.070742130279541, "logits/chosen": 0.14718632400035858, "logits/rejected": 0.12782476842403412, "logps/chosen": -2.9209752082824707, "logps/rejected": -2.5072758197784424, "loss": 2.4732, "nll_loss": 2.366135358810425, "rewards/accuracies": 0.375, "rewards/chosen": -0.2920975089073181, "rewards/margins": -0.04136992245912552, "rewards/rejected": -0.2507275938987732, "step": 296 }, { "epoch": 0.813141683778234, "grad_norm": 6.159732818603516, "learning_rate": 5.931506849315067e-07, "log_odds_chosen": -1.483036994934082, "log_odds_ratio": -1.891395092010498, "logits/chosen": 0.030647829174995422, "logits/rejected": 0.1459646373987198, "logps/chosen": -4.400207042694092, "logps/rejected": -2.9378628730773926, "loss": 2.6551, "nll_loss": 2.4659695625305176, "rewards/accuracies": 0.25, "rewards/chosen": -0.44002074003219604, "rewards/margins": -0.1462344229221344, "rewards/rejected": -0.29378628730773926, "step": 297 }, { "epoch": 0.8158795345653662, "grad_norm": 5.489201545715332, "learning_rate": 5.917808219178083e-07, "log_odds_chosen": -0.5754733085632324, "log_odds_ratio": -1.2508845329284668, "logits/chosen": 0.004121609032154083, "logits/rejected": 0.058685656636953354, "logps/chosen": -3.5667002201080322, "logps/rejected": -2.9979469776153564, "loss": 2.5649, "nll_loss": 2.4397690296173096, "rewards/accuracies": 0.375, "rewards/chosen": -0.3566700220108032, "rewards/margins": -0.05687534436583519, "rewards/rejected": -0.29979467391967773, "step": 298 }, { "epoch": 0.8186173853524983, "grad_norm": 4.889370918273926, "learning_rate": 5.904109589041096e-07, "log_odds_chosen": -0.6514835357666016, "log_odds_ratio": -1.2089248895645142, "logits/chosen": -0.051150090992450714, "logits/rejected": -0.048109009861946106, "logps/chosen": -3.2982163429260254, "logps/rejected": -2.654674768447876, "loss": 2.5022, "nll_loss": 2.3813183307647705, "rewards/accuracies": 0.375, "rewards/chosen": -0.3298216760158539, "rewards/margins": -0.06435418874025345, "rewards/rejected": -0.26546746492385864, "step": 299 }, { "epoch": 0.8213552361396304, "grad_norm": 5.024616241455078, "learning_rate": 5.89041095890411e-07, "log_odds_chosen": 0.5784456133842468, "log_odds_ratio": -0.4950610399246216, "logits/chosen": -0.00798560306429863, "logits/rejected": -0.05155077949166298, "logps/chosen": -2.9308598041534424, "logps/rejected": -3.478214740753174, "loss": 2.4581, "nll_loss": 2.408564805984497, "rewards/accuracies": 0.75, "rewards/chosen": -0.2930859923362732, "rewards/margins": 0.054735515266656876, "rewards/rejected": -0.34782150387763977, "step": 300 }, { "epoch": 0.8240930869267625, "grad_norm": 4.52841329574585, "learning_rate": 5.876712328767123e-07, "log_odds_chosen": -0.41313400864601135, "log_odds_ratio": -1.003822922706604, "logits/chosen": -0.0018108412623405457, "logits/rejected": 0.014571554958820343, "logps/chosen": -2.630751132965088, "logps/rejected": -2.223686695098877, "loss": 2.3979, "nll_loss": 2.297539234161377, "rewards/accuracies": 0.5, "rewards/chosen": -0.2630751132965088, "rewards/margins": -0.040706440806388855, "rewards/rejected": -0.22236867249011993, "step": 301 }, { "epoch": 0.8268309377138946, "grad_norm": 5.427985668182373, "learning_rate": 5.863013698630137e-07, "log_odds_chosen": -0.7024896144866943, "log_odds_ratio": -1.4149155616760254, "logits/chosen": 0.21851757168769836, "logits/rejected": 0.27289825677871704, "logps/chosen": -3.5455574989318848, "logps/rejected": -2.822269916534424, "loss": 2.5307, "nll_loss": 2.3892438411712646, "rewards/accuracies": 0.625, "rewards/chosen": -0.35455578565597534, "rewards/margins": -0.07232876121997833, "rewards/rejected": -0.2822270095348358, "step": 302 }, { "epoch": 0.8295687885010267, "grad_norm": 5.404674053192139, "learning_rate": 5.84931506849315e-07, "log_odds_chosen": -0.32944542169570923, "log_odds_ratio": -1.0246658325195312, "logits/chosen": 0.13472801446914673, "logits/rejected": 0.175514355301857, "logps/chosen": -2.8926033973693848, "logps/rejected": -2.5897483825683594, "loss": 2.56, "nll_loss": 2.457524538040161, "rewards/accuracies": 0.5, "rewards/chosen": -0.2892603278160095, "rewards/margins": -0.030285503715276718, "rewards/rejected": -0.2589748203754425, "step": 303 }, { "epoch": 0.8323066392881588, "grad_norm": 6.196621417999268, "learning_rate": 5.835616438356164e-07, "log_odds_chosen": -0.5237674117088318, "log_odds_ratio": -1.1337907314300537, "logits/chosen": 0.09478698670864105, "logits/rejected": 0.23683449625968933, "logps/chosen": -4.227561950683594, "logps/rejected": -3.6743826866149902, "loss": 2.5581, "nll_loss": 2.444672107696533, "rewards/accuracies": 0.375, "rewards/chosen": -0.4227561354637146, "rewards/margins": -0.05531787872314453, "rewards/rejected": -0.36743828654289246, "step": 304 }, { "epoch": 0.8350444900752909, "grad_norm": 5.091249465942383, "learning_rate": 5.821917808219177e-07, "log_odds_chosen": -0.10108545422554016, "log_odds_ratio": -0.7844785451889038, "logits/chosen": 0.056622643023729324, "logits/rejected": 0.12807747721672058, "logps/chosen": -2.881999969482422, "logps/rejected": -2.777674436569214, "loss": 2.436, "nll_loss": 2.357590675354004, "rewards/accuracies": 0.25, "rewards/chosen": -0.2882000207901001, "rewards/margins": -0.010432569310069084, "rewards/rejected": -0.2777674198150635, "step": 305 }, { "epoch": 0.837782340862423, "grad_norm": 4.163002967834473, "learning_rate": 5.808219178082191e-07, "log_odds_chosen": -0.293673038482666, "log_odds_ratio": -0.8997273445129395, "logits/chosen": 0.11321250349283218, "logits/rejected": -0.021611429750919342, "logps/chosen": -2.7542314529418945, "logps/rejected": -2.474597930908203, "loss": 2.3792, "nll_loss": 2.2891931533813477, "rewards/accuracies": 0.375, "rewards/chosen": -0.27542316913604736, "rewards/margins": -0.027963347733020782, "rewards/rejected": -0.24745981395244598, "step": 306 }, { "epoch": 0.840520191649555, "grad_norm": 4.41457462310791, "learning_rate": 5.794520547945206e-07, "log_odds_chosen": 0.8310244083404541, "log_odds_ratio": -0.4198392331600189, "logits/chosen": 0.06802003085613251, "logits/rejected": -0.008474549278616905, "logps/chosen": -2.0657856464385986, "logps/rejected": -2.8054614067077637, "loss": 2.2725, "nll_loss": 2.2305588722229004, "rewards/accuracies": 0.875, "rewards/chosen": -0.2065785676240921, "rewards/margins": 0.07396756112575531, "rewards/rejected": -0.2805461287498474, "step": 307 }, { "epoch": 0.8432580424366872, "grad_norm": 4.667488098144531, "learning_rate": 5.780821917808219e-07, "log_odds_chosen": -0.4489067792892456, "log_odds_ratio": -1.0721862316131592, "logits/chosen": -0.07646472752094269, "logits/rejected": -0.08765817433595657, "logps/chosen": -2.767123222351074, "logps/rejected": -2.3156888484954834, "loss": 2.4374, "nll_loss": 2.3301897048950195, "rewards/accuracies": 0.5, "rewards/chosen": -0.2767123281955719, "rewards/margins": -0.045143429189920425, "rewards/rejected": -0.23156890273094177, "step": 308 }, { "epoch": 0.8459958932238193, "grad_norm": 4.607532978057861, "learning_rate": 5.767123287671233e-07, "log_odds_chosen": 0.047156840562820435, "log_odds_ratio": -0.7675751447677612, "logits/chosen": 0.03389085829257965, "logits/rejected": 0.01889800652861595, "logps/chosen": -2.7353034019470215, "logps/rejected": -2.7686543464660645, "loss": 2.367, "nll_loss": 2.2902488708496094, "rewards/accuracies": 0.625, "rewards/chosen": -0.27353033423423767, "rewards/margins": 0.003335084766149521, "rewards/rejected": -0.2768654227256775, "step": 309 }, { "epoch": 0.8487337440109514, "grad_norm": 4.569915771484375, "learning_rate": 5.753424657534246e-07, "log_odds_chosen": 0.8717784881591797, "log_odds_ratio": -0.6553237438201904, "logits/chosen": -0.018093910068273544, "logits/rejected": -0.10653818398714066, "logps/chosen": -2.4317338466644287, "logps/rejected": -3.2365469932556152, "loss": 2.3307, "nll_loss": 2.265212059020996, "rewards/accuracies": 0.625, "rewards/chosen": -0.24317336082458496, "rewards/margins": 0.08048132807016373, "rewards/rejected": -0.3236547112464905, "step": 310 }, { "epoch": 0.8514715947980835, "grad_norm": 4.484014987945557, "learning_rate": 5.73972602739726e-07, "log_odds_chosen": 1.253088116645813, "log_odds_ratio": -0.5318684577941895, "logits/chosen": -0.1512272208929062, "logits/rejected": -0.20366331934928894, "logps/chosen": -2.112152099609375, "logps/rejected": -3.283669948577881, "loss": 2.3423, "nll_loss": 2.2891597747802734, "rewards/accuracies": 0.75, "rewards/chosen": -0.21121522784233093, "rewards/margins": 0.11715175956487656, "rewards/rejected": -0.3283669948577881, "step": 311 }, { "epoch": 0.8542094455852156, "grad_norm": 5.111313343048096, "learning_rate": 5.726027397260273e-07, "log_odds_chosen": 0.07448148727416992, "log_odds_ratio": -0.8397217392921448, "logits/chosen": 0.02789650298655033, "logits/rejected": 0.0012134425342082977, "logps/chosen": -3.2838668823242188, "logps/rejected": -3.3463668823242188, "loss": 2.4953, "nll_loss": 2.4112844467163086, "rewards/accuracies": 0.5, "rewards/chosen": -0.32838666439056396, "rewards/margins": 0.006250012665987015, "rewards/rejected": -0.3346366882324219, "step": 312 }, { "epoch": 0.8569472963723477, "grad_norm": 4.8861870765686035, "learning_rate": 5.712328767123287e-07, "log_odds_chosen": -0.4023418724536896, "log_odds_ratio": -1.1957526206970215, "logits/chosen": 0.009147566743195057, "logits/rejected": -0.034676674753427505, "logps/chosen": -3.274811267852783, "logps/rejected": -2.848217487335205, "loss": 2.4703, "nll_loss": 2.3507704734802246, "rewards/accuracies": 0.5, "rewards/chosen": -0.32748112082481384, "rewards/margins": -0.042659372091293335, "rewards/rejected": -0.2848217487335205, "step": 313 }, { "epoch": 0.8596851471594799, "grad_norm": 5.659534454345703, "learning_rate": 5.698630136986301e-07, "log_odds_chosen": -1.246006965637207, "log_odds_ratio": -1.6202926635742188, "logits/chosen": -0.03421664610505104, "logits/rejected": 0.10230289399623871, "logps/chosen": -3.577780246734619, "logps/rejected": -2.3689560890197754, "loss": 2.5217, "nll_loss": 2.3597049713134766, "rewards/accuracies": 0.25, "rewards/chosen": -0.35777804255485535, "rewards/margins": -0.12088243663311005, "rewards/rejected": -0.2368956059217453, "step": 314 }, { "epoch": 0.8624229979466119, "grad_norm": 5.301451683044434, "learning_rate": 5.684931506849316e-07, "log_odds_chosen": -0.37013864517211914, "log_odds_ratio": -0.9746360778808594, "logits/chosen": 0.12029973417520523, "logits/rejected": 0.21213775873184204, "logps/chosen": -3.024554491043091, "logps/rejected": -2.6684179306030273, "loss": 2.4731, "nll_loss": 2.375676155090332, "rewards/accuracies": 0.5, "rewards/chosen": -0.30245548486709595, "rewards/margins": -0.03561365231871605, "rewards/rejected": -0.2668418288230896, "step": 315 }, { "epoch": 0.865160848733744, "grad_norm": 5.388719081878662, "learning_rate": 5.671232876712329e-07, "log_odds_chosen": -0.7324517965316772, "log_odds_ratio": -1.3163135051727295, "logits/chosen": 0.13230307400226593, "logits/rejected": 0.15553590655326843, "logps/chosen": -3.543262004852295, "logps/rejected": -2.8107540607452393, "loss": 2.5108, "nll_loss": 2.3792002201080322, "rewards/accuracies": 0.375, "rewards/chosen": -0.3543262481689453, "rewards/margins": -0.07325082272291183, "rewards/rejected": -0.2810754179954529, "step": 316 }, { "epoch": 0.8678986995208761, "grad_norm": 4.594443321228027, "learning_rate": 5.657534246575342e-07, "log_odds_chosen": -0.07103124260902405, "log_odds_ratio": -0.818784773349762, "logits/chosen": 0.24709539115428925, "logits/rejected": 0.1458885371685028, "logps/chosen": -2.496610164642334, "logps/rejected": -2.3974595069885254, "loss": 2.3601, "nll_loss": 2.2782349586486816, "rewards/accuracies": 0.5, "rewards/chosen": -0.24966099858283997, "rewards/margins": -0.009915085509419441, "rewards/rejected": -0.23974591493606567, "step": 317 }, { "epoch": 0.8706365503080082, "grad_norm": 4.738966941833496, "learning_rate": 5.643835616438356e-07, "log_odds_chosen": 0.020344078540802002, "log_odds_ratio": -0.7756747007369995, "logits/chosen": -0.014380238950252533, "logits/rejected": -0.0011908896267414093, "logps/chosen": -2.700018882751465, "logps/rejected": -2.686809539794922, "loss": 2.3712, "nll_loss": 2.293600082397461, "rewards/accuracies": 0.625, "rewards/chosen": -0.27000191807746887, "rewards/margins": -0.001320958137512207, "rewards/rejected": -0.26868095993995667, "step": 318 }, { "epoch": 0.8733744010951403, "grad_norm": 4.732463836669922, "learning_rate": 5.630136986301369e-07, "log_odds_chosen": -0.9774704575538635, "log_odds_ratio": -1.5626633167266846, "logits/chosen": -0.1066199243068695, "logits/rejected": -0.11105413734912872, "logps/chosen": -3.235360622406006, "logps/rejected": -2.2601823806762695, "loss": 2.4564, "nll_loss": 2.3001821041107178, "rewards/accuracies": 0.375, "rewards/chosen": -0.32353606820106506, "rewards/margins": -0.09751781821250916, "rewards/rejected": -0.22601823508739471, "step": 319 }, { "epoch": 0.8761122518822724, "grad_norm": 4.937757968902588, "learning_rate": 5.616438356164383e-07, "log_odds_chosen": -0.2411559671163559, "log_odds_ratio": -0.9039050936698914, "logits/chosen": -0.08673197776079178, "logits/rejected": -0.039657145738601685, "logps/chosen": -2.926811456680298, "logps/rejected": -2.7193942070007324, "loss": 2.5214, "nll_loss": 2.431051015853882, "rewards/accuracies": 0.25, "rewards/chosen": -0.29268112778663635, "rewards/margins": -0.020741716027259827, "rewards/rejected": -0.2719394564628601, "step": 320 }, { "epoch": 0.8788501026694046, "grad_norm": 6.378179550170898, "learning_rate": 5.602739726027396e-07, "log_odds_chosen": -0.7366122007369995, "log_odds_ratio": -1.4010939598083496, "logits/chosen": 0.14732983708381653, "logits/rejected": 0.27102935314178467, "logps/chosen": -3.796440362930298, "logps/rejected": -3.092010974884033, "loss": 2.6042, "nll_loss": 2.4640824794769287, "rewards/accuracies": 0.25, "rewards/chosen": -0.3796440362930298, "rewards/margins": -0.07044297456741333, "rewards/rejected": -0.30920106172561646, "step": 321 }, { "epoch": 0.8815879534565366, "grad_norm": 5.11533260345459, "learning_rate": 5.589041095890411e-07, "log_odds_chosen": 0.2389271855354309, "log_odds_ratio": -0.7612255811691284, "logits/chosen": 0.12966862320899963, "logits/rejected": 0.025735409930348396, "logps/chosen": -3.2613911628723145, "logps/rejected": -3.487372875213623, "loss": 2.4788, "nll_loss": 2.4027059078216553, "rewards/accuracies": 0.5, "rewards/chosen": -0.3261391222476959, "rewards/margins": 0.02259819209575653, "rewards/rejected": -0.34873729944229126, "step": 322 }, { "epoch": 0.8843258042436687, "grad_norm": 4.654810428619385, "learning_rate": 5.575342465753425e-07, "log_odds_chosen": -0.36224478483200073, "log_odds_ratio": -0.9641405344009399, "logits/chosen": 0.1279234141111374, "logits/rejected": 0.17666441202163696, "logps/chosen": -2.5694046020507812, "logps/rejected": -2.2314231395721436, "loss": 2.4156, "nll_loss": 2.319136619567871, "rewards/accuracies": 0.25, "rewards/chosen": -0.25694048404693604, "rewards/margins": -0.03379816561937332, "rewards/rejected": -0.2231423258781433, "step": 323 }, { "epoch": 0.8870636550308009, "grad_norm": 5.557539939880371, "learning_rate": 5.561643835616439e-07, "log_odds_chosen": -0.3432334065437317, "log_odds_ratio": -1.079829454421997, "logits/chosen": 0.12373773753643036, "logits/rejected": 0.13403405249118805, "logps/chosen": -4.014306545257568, "logps/rejected": -3.6616241931915283, "loss": 2.5079, "nll_loss": 2.3999481201171875, "rewards/accuracies": 0.625, "rewards/chosen": -0.4014306366443634, "rewards/margins": -0.035268232226371765, "rewards/rejected": -0.36616241931915283, "step": 324 }, { "epoch": 0.8898015058179329, "grad_norm": 5.743906497955322, "learning_rate": 5.547945205479452e-07, "log_odds_chosen": -0.8573713302612305, "log_odds_ratio": -1.4247148036956787, "logits/chosen": 0.09932580590248108, "logits/rejected": 0.1869983822107315, "logps/chosen": -4.074947357177734, "logps/rejected": -3.212770462036133, "loss": 2.6251, "nll_loss": 2.4825925827026367, "rewards/accuracies": 0.375, "rewards/chosen": -0.4074947237968445, "rewards/margins": -0.08621768653392792, "rewards/rejected": -0.32127705216407776, "step": 325 }, { "epoch": 0.892539356605065, "grad_norm": 4.71714973449707, "learning_rate": 5.534246575342465e-07, "log_odds_chosen": 0.18622466921806335, "log_odds_ratio": -0.7730242013931274, "logits/chosen": 0.1743350625038147, "logits/rejected": 0.18292617797851562, "logps/chosen": -2.68891978263855, "logps/rejected": -2.839977741241455, "loss": 2.3881, "nll_loss": 2.310781240463257, "rewards/accuracies": 0.5, "rewards/chosen": -0.26889199018478394, "rewards/margins": 0.015105772763490677, "rewards/rejected": -0.2839977741241455, "step": 326 }, { "epoch": 0.8952772073921971, "grad_norm": 4.980324745178223, "learning_rate": 5.520547945205479e-07, "log_odds_chosen": 0.9997991323471069, "log_odds_ratio": -0.6253899335861206, "logits/chosen": -0.10914178192615509, "logits/rejected": -0.09169194102287292, "logps/chosen": -2.910888671875, "logps/rejected": -3.8912510871887207, "loss": 2.3953, "nll_loss": 2.3327152729034424, "rewards/accuracies": 0.625, "rewards/chosen": -0.29108887910842896, "rewards/margins": 0.09803621470928192, "rewards/rejected": -0.3891250491142273, "step": 327 }, { "epoch": 0.8980150581793293, "grad_norm": 4.987423419952393, "learning_rate": 5.506849315068492e-07, "log_odds_chosen": -0.6586142778396606, "log_odds_ratio": -1.2375357151031494, "logits/chosen": -0.13884910941123962, "logits/rejected": -0.1268475204706192, "logps/chosen": -3.3948655128479004, "logps/rejected": -2.7655045986175537, "loss": 2.5375, "nll_loss": 2.4137191772460938, "rewards/accuracies": 0.25, "rewards/chosen": -0.33948656916618347, "rewards/margins": -0.06293608248233795, "rewards/rejected": -0.2765505015850067, "step": 328 }, { "epoch": 0.9007529089664613, "grad_norm": 5.2714667320251465, "learning_rate": 5.493150684931506e-07, "log_odds_chosen": 0.08624114096164703, "log_odds_ratio": -0.8693986535072327, "logits/chosen": 0.19627845287322998, "logits/rejected": 0.20881707966327667, "logps/chosen": -3.363434314727783, "logps/rejected": -3.397749662399292, "loss": 2.3865, "nll_loss": 2.2995615005493164, "rewards/accuracies": 0.75, "rewards/chosen": -0.3363434672355652, "rewards/margins": 0.003431526944041252, "rewards/rejected": -0.3397749662399292, "step": 329 }, { "epoch": 0.9034907597535934, "grad_norm": 4.605297088623047, "learning_rate": 5.47945205479452e-07, "log_odds_chosen": -0.2824283242225647, "log_odds_ratio": -0.9239029884338379, "logits/chosen": 0.07524114847183228, "logits/rejected": 0.016072221100330353, "logps/chosen": -2.578930139541626, "logps/rejected": -2.3077096939086914, "loss": 2.4266, "nll_loss": 2.334177255630493, "rewards/accuracies": 0.5, "rewards/chosen": -0.25789302587509155, "rewards/margins": -0.02712203562259674, "rewards/rejected": -0.2307709902524948, "step": 330 }, { "epoch": 0.9062286105407256, "grad_norm": 4.668397426605225, "learning_rate": 5.465753424657535e-07, "log_odds_chosen": 1.3789019584655762, "log_odds_ratio": -0.5552793145179749, "logits/chosen": -0.11804622411727905, "logits/rejected": -0.17402440309524536, "logps/chosen": -2.4194462299346924, "logps/rejected": -3.7387237548828125, "loss": 2.3573, "nll_loss": 2.3017330169677734, "rewards/accuracies": 0.625, "rewards/chosen": -0.24194464087486267, "rewards/margins": 0.1319277584552765, "rewards/rejected": -0.37387239933013916, "step": 331 }, { "epoch": 0.9089664613278576, "grad_norm": 4.6178741455078125, "learning_rate": 5.452054794520548e-07, "log_odds_chosen": -0.4008067548274994, "log_odds_ratio": -1.192920446395874, "logits/chosen": 0.1002085730433464, "logits/rejected": 0.14770543575286865, "logps/chosen": -3.027672052383423, "logps/rejected": -2.6053497791290283, "loss": 2.4242, "nll_loss": 2.3048605918884277, "rewards/accuracies": 0.75, "rewards/chosen": -0.30276721715927124, "rewards/margins": -0.042232222855091095, "rewards/rejected": -0.26053500175476074, "step": 332 }, { "epoch": 0.9117043121149897, "grad_norm": 5.947726249694824, "learning_rate": 5.438356164383562e-07, "log_odds_chosen": -0.9364914894104004, "log_odds_ratio": -1.361746072769165, "logits/chosen": 0.12176491320133209, "logits/rejected": 0.20827209949493408, "logps/chosen": -3.81888484954834, "logps/rejected": -2.906480073928833, "loss": 2.5346, "nll_loss": 2.3984298706054688, "rewards/accuracies": 0.25, "rewards/chosen": -0.3818885087966919, "rewards/margins": -0.09124049544334412, "rewards/rejected": -0.2906480133533478, "step": 333 }, { "epoch": 0.9144421629021219, "grad_norm": 4.7592620849609375, "learning_rate": 5.424657534246575e-07, "log_odds_chosen": -0.5121784806251526, "log_odds_ratio": -1.148374319076538, "logits/chosen": -0.10523848235607147, "logits/rejected": -0.11441829800605774, "logps/chosen": -2.6340832710266113, "logps/rejected": -2.1467528343200684, "loss": 2.4448, "nll_loss": 2.3299717903137207, "rewards/accuracies": 0.375, "rewards/chosen": -0.2634083330631256, "rewards/margins": -0.04873305559158325, "rewards/rejected": -0.21467527747154236, "step": 334 }, { "epoch": 0.917180013689254, "grad_norm": 4.252990245819092, "learning_rate": 5.410958904109589e-07, "log_odds_chosen": -0.007228732109069824, "log_odds_ratio": -0.9173561334609985, "logits/chosen": -0.07126085460186005, "logits/rejected": -0.10666590183973312, "logps/chosen": -2.49204421043396, "logps/rejected": -2.453251600265503, "loss": 2.3683, "nll_loss": 2.276602268218994, "rewards/accuracies": 0.625, "rewards/chosen": -0.24920444190502167, "rewards/margins": -0.003879273310303688, "rewards/rejected": -0.24532514810562134, "step": 335 }, { "epoch": 0.919917864476386, "grad_norm": 5.450808048248291, "learning_rate": 5.397260273972602e-07, "log_odds_chosen": -1.1336231231689453, "log_odds_ratio": -1.6720139980316162, "logits/chosen": 0.031431570649147034, "logits/rejected": 0.09944222867488861, "logps/chosen": -4.465979099273682, "logps/rejected": -3.3218464851379395, "loss": 2.6137, "nll_loss": 2.446524143218994, "rewards/accuracies": 0.375, "rewards/chosen": -0.44659796357154846, "rewards/margins": -0.11441324651241302, "rewards/rejected": -0.33218467235565186, "step": 336 }, { "epoch": 0.9226557152635181, "grad_norm": 5.502899646759033, "learning_rate": 5.383561643835615e-07, "log_odds_chosen": -0.9491117000579834, "log_odds_ratio": -1.3301019668579102, "logits/chosen": -0.13083219528198242, "logits/rejected": 0.017184648662805557, "logps/chosen": -3.474172830581665, "logps/rejected": -2.5961720943450928, "loss": 2.5063, "nll_loss": 2.3732540607452393, "rewards/accuracies": 0.125, "rewards/chosen": -0.34741729497909546, "rewards/margins": -0.08780006319284439, "rewards/rejected": -0.2596172094345093, "step": 337 }, { "epoch": 0.9253935660506503, "grad_norm": 5.6494317054748535, "learning_rate": 5.36986301369863e-07, "log_odds_chosen": -0.7061262130737305, "log_odds_ratio": -1.2173131704330444, "logits/chosen": 0.006599133834242821, "logits/rejected": 0.0854707881808281, "logps/chosen": -3.559879779815674, "logps/rejected": -2.8734617233276367, "loss": 2.4586, "nll_loss": 2.3368940353393555, "rewards/accuracies": 0.25, "rewards/chosen": -0.3559879958629608, "rewards/margins": -0.068641796708107, "rewards/rejected": -0.2873461842536926, "step": 338 }, { "epoch": 0.9281314168377823, "grad_norm": 5.1742377281188965, "learning_rate": 5.356164383561644e-07, "log_odds_chosen": -0.8410936594009399, "log_odds_ratio": -1.2998244762420654, "logits/chosen": -0.05221130698919296, "logits/rejected": 0.01092202216386795, "logps/chosen": -3.3726961612701416, "logps/rejected": -2.550672769546509, "loss": 2.5388, "nll_loss": 2.4088315963745117, "rewards/accuracies": 0.25, "rewards/chosen": -0.3372696042060852, "rewards/margins": -0.08220233023166656, "rewards/rejected": -0.25506725907325745, "step": 339 }, { "epoch": 0.9308692676249144, "grad_norm": 5.6524882316589355, "learning_rate": 5.342465753424658e-07, "log_odds_chosen": -0.5199385285377502, "log_odds_ratio": -1.1627999544143677, "logits/chosen": 0.04782119765877724, "logits/rejected": 0.13119655847549438, "logps/chosen": -2.812671184539795, "logps/rejected": -2.2880783081054688, "loss": 2.4284, "nll_loss": 2.312103271484375, "rewards/accuracies": 0.5, "rewards/chosen": -0.2812671363353729, "rewards/margins": -0.05245929956436157, "rewards/rejected": -0.22880783677101135, "step": 340 }, { "epoch": 0.9336071184120466, "grad_norm": 4.990926742553711, "learning_rate": 5.328767123287671e-07, "log_odds_chosen": 0.048435937613248825, "log_odds_ratio": -0.723381519317627, "logits/chosen": 0.07001964002847672, "logits/rejected": 0.04892638698220253, "logps/chosen": -3.1176300048828125, "logps/rejected": -3.15669322013855, "loss": 2.4279, "nll_loss": 2.3555943965911865, "rewards/accuracies": 0.625, "rewards/chosen": -0.3117629885673523, "rewards/margins": 0.003906333819031715, "rewards/rejected": -0.31566929817199707, "step": 341 }, { "epoch": 0.9363449691991786, "grad_norm": 6.3131184577941895, "learning_rate": 5.315068493150685e-07, "log_odds_chosen": -0.7838089466094971, "log_odds_ratio": -1.3790791034698486, "logits/chosen": 0.15107548236846924, "logits/rejected": 0.23953473567962646, "logps/chosen": -4.126433849334717, "logps/rejected": -3.3320090770721436, "loss": 2.6146, "nll_loss": 2.47670578956604, "rewards/accuracies": 0.375, "rewards/chosen": -0.4126434326171875, "rewards/margins": -0.07944250851869583, "rewards/rejected": -0.33320093154907227, "step": 342 }, { "epoch": 0.9390828199863107, "grad_norm": 6.066688060760498, "learning_rate": 5.301369863013698e-07, "log_odds_chosen": -0.7313181161880493, "log_odds_ratio": -1.2235113382339478, "logits/chosen": -0.042700089514255524, "logits/rejected": 0.030888572335243225, "logps/chosen": -3.77520751953125, "logps/rejected": -3.050729274749756, "loss": 2.501, "nll_loss": 2.378633499145508, "rewards/accuracies": 0.25, "rewards/chosen": -0.37752074003219604, "rewards/margins": -0.07244780659675598, "rewards/rejected": -0.30507293343544006, "step": 343 }, { "epoch": 0.9418206707734429, "grad_norm": 6.012105941772461, "learning_rate": 5.287671232876712e-07, "log_odds_chosen": 0.007373996078968048, "log_odds_ratio": -0.8422769904136658, "logits/chosen": 0.09739996492862701, "logits/rejected": 0.20005381107330322, "logps/chosen": -3.533095598220825, "logps/rejected": -3.5473427772521973, "loss": 2.4825, "nll_loss": 2.3982393741607666, "rewards/accuracies": 0.375, "rewards/chosen": -0.3533095717430115, "rewards/margins": 0.0014246981590986252, "rewards/rejected": -0.35473430156707764, "step": 344 }, { "epoch": 0.944558521560575, "grad_norm": 4.9929280281066895, "learning_rate": 5.273972602739725e-07, "log_odds_chosen": -0.25531166791915894, "log_odds_ratio": -0.9833787083625793, "logits/chosen": -0.12326525151729584, "logits/rejected": -0.06227739900350571, "logps/chosen": -2.692805767059326, "logps/rejected": -2.4415273666381836, "loss": 2.3893, "nll_loss": 2.2909913063049316, "rewards/accuracies": 0.25, "rewards/chosen": -0.2692805826663971, "rewards/margins": -0.025127843022346497, "rewards/rejected": -0.2441527396440506, "step": 345 }, { "epoch": 0.947296372347707, "grad_norm": 4.774659156799316, "learning_rate": 5.260273972602739e-07, "log_odds_chosen": -0.012323133647441864, "log_odds_ratio": -0.7592999935150146, "logits/chosen": -0.046874627470970154, "logits/rejected": -0.035170383751392365, "logps/chosen": -2.8152854442596436, "logps/rejected": -2.8005220890045166, "loss": 2.4604, "nll_loss": 2.384432792663574, "rewards/accuracies": 0.5, "rewards/chosen": -0.2815285623073578, "rewards/margins": -0.0014763455837965012, "rewards/rejected": -0.28005221486091614, "step": 346 }, { "epoch": 0.9500342231348392, "grad_norm": 5.19882869720459, "learning_rate": 5.246575342465754e-07, "log_odds_chosen": -0.7868533730506897, "log_odds_ratio": -1.2872501611709595, "logits/chosen": -0.12551721930503845, "logits/rejected": -0.0600445419549942, "logps/chosen": -3.8494620323181152, "logps/rejected": -3.062821388244629, "loss": 2.4711, "nll_loss": 2.3423705101013184, "rewards/accuracies": 0.375, "rewards/chosen": -0.38494622707366943, "rewards/margins": -0.07866408675909042, "rewards/rejected": -0.3062821626663208, "step": 347 }, { "epoch": 0.9527720739219713, "grad_norm": 4.655478000640869, "learning_rate": 5.232876712328767e-07, "log_odds_chosen": 0.04947313666343689, "log_odds_ratio": -0.7165818810462952, "logits/chosen": 0.14159749448299408, "logits/rejected": 0.05462660640478134, "logps/chosen": -2.7125799655914307, "logps/rejected": -2.7222909927368164, "loss": 2.4131, "nll_loss": 2.341411828994751, "rewards/accuracies": 0.5, "rewards/chosen": -0.27125799655914307, "rewards/margins": 0.0009710937738418579, "rewards/rejected": -0.27222907543182373, "step": 348 }, { "epoch": 0.9555099247091033, "grad_norm": 5.473310947418213, "learning_rate": 5.219178082191781e-07, "log_odds_chosen": -0.4588828682899475, "log_odds_ratio": -1.0958784818649292, "logits/chosen": -0.03434830158948898, "logits/rejected": 0.01618095487356186, "logps/chosen": -3.5747454166412354, "logps/rejected": -3.1136274337768555, "loss": 2.4507, "nll_loss": 2.3411614894866943, "rewards/accuracies": 0.375, "rewards/chosen": -0.35747456550598145, "rewards/margins": -0.046111810952425, "rewards/rejected": -0.31136274337768555, "step": 349 }, { "epoch": 0.9582477754962354, "grad_norm": 4.988995552062988, "learning_rate": 5.205479452054794e-07, "log_odds_chosen": -0.6172557473182678, "log_odds_ratio": -1.1769685745239258, "logits/chosen": 0.057034753262996674, "logits/rejected": 0.11969223618507385, "logps/chosen": -3.221980571746826, "logps/rejected": -2.6362528800964355, "loss": 2.4152, "nll_loss": 2.297463893890381, "rewards/accuracies": 0.5, "rewards/chosen": -0.3221980631351471, "rewards/margins": -0.05857277661561966, "rewards/rejected": -0.26362529397010803, "step": 350 }, { "epoch": 0.9609856262833676, "grad_norm": 5.9521636962890625, "learning_rate": 5.191780821917808e-07, "log_odds_chosen": -0.972407341003418, "log_odds_ratio": -1.4900689125061035, "logits/chosen": 0.0595562607049942, "logits/rejected": 0.1801726520061493, "logps/chosen": -4.279046535491943, "logps/rejected": -3.3233158588409424, "loss": 2.5334, "nll_loss": 2.384413480758667, "rewards/accuracies": 0.375, "rewards/chosen": -0.4279046654701233, "rewards/margins": -0.0955730751156807, "rewards/rejected": -0.3323315978050232, "step": 351 }, { "epoch": 0.9637234770704997, "grad_norm": 6.119920253753662, "learning_rate": 5.178082191780821e-07, "log_odds_chosen": -1.2332972288131714, "log_odds_ratio": -1.5470566749572754, "logits/chosen": 0.05183951556682587, "logits/rejected": 0.1992112398147583, "logps/chosen": -4.208905220031738, "logps/rejected": -3.00703763961792, "loss": 2.5385, "nll_loss": 2.3837828636169434, "rewards/accuracies": 0.125, "rewards/chosen": -0.4208905100822449, "rewards/margins": -0.12018676847219467, "rewards/rejected": -0.300703763961792, "step": 352 }, { "epoch": 0.9664613278576317, "grad_norm": 4.580576419830322, "learning_rate": 5.164383561643836e-07, "log_odds_chosen": 0.43731966614723206, "log_odds_ratio": -0.5603713989257812, "logits/chosen": 0.12227784842252731, "logits/rejected": 0.06878460943698883, "logps/chosen": -2.425980567932129, "logps/rejected": -2.8084535598754883, "loss": 2.298, "nll_loss": 2.2419931888580322, "rewards/accuracies": 0.75, "rewards/chosen": -0.2425980567932129, "rewards/margins": 0.03824729472398758, "rewards/rejected": -0.2808453440666199, "step": 353 }, { "epoch": 0.9691991786447639, "grad_norm": 4.372311115264893, "learning_rate": 5.150684931506849e-07, "log_odds_chosen": -0.12120915949344635, "log_odds_ratio": -0.7756071090698242, "logits/chosen": -0.02794109284877777, "logits/rejected": -0.04794108867645264, "logps/chosen": -2.2430222034454346, "logps/rejected": -2.1506667137145996, "loss": 2.2366, "nll_loss": 2.1590287685394287, "rewards/accuracies": 0.5, "rewards/chosen": -0.2243022322654724, "rewards/margins": -0.009235532023012638, "rewards/rejected": -0.21506668627262115, "step": 354 }, { "epoch": 0.971937029431896, "grad_norm": 5.292059898376465, "learning_rate": 5.136986301369864e-07, "log_odds_chosen": -0.5302423238754272, "log_odds_ratio": -1.174440622329712, "logits/chosen": -0.02259286865592003, "logits/rejected": 0.02095727249979973, "logps/chosen": -3.5735063552856445, "logps/rejected": -3.034064292907715, "loss": 2.5147, "nll_loss": 2.397282600402832, "rewards/accuracies": 0.625, "rewards/chosen": -0.3573506474494934, "rewards/margins": -0.053944218903779984, "rewards/rejected": -0.30340641736984253, "step": 355 }, { "epoch": 0.974674880219028, "grad_norm": 4.310571193695068, "learning_rate": 5.123287671232877e-07, "log_odds_chosen": 0.5833410620689392, "log_odds_ratio": -0.7093979716300964, "logits/chosen": 0.12236674875020981, "logits/rejected": 0.0010679885745048523, "logps/chosen": -2.509328842163086, "logps/rejected": -3.0367188453674316, "loss": 2.2971, "nll_loss": 2.226206064224243, "rewards/accuracies": 0.625, "rewards/chosen": -0.250932902097702, "rewards/margins": 0.052738964557647705, "rewards/rejected": -0.30367186665534973, "step": 356 }, { "epoch": 0.9774127310061602, "grad_norm": 5.437140941619873, "learning_rate": 5.10958904109589e-07, "log_odds_chosen": -0.1506991982460022, "log_odds_ratio": -0.8678931593894958, "logits/chosen": 0.010896777734160423, "logits/rejected": 0.07645861804485321, "logps/chosen": -3.5863523483276367, "logps/rejected": -3.4308338165283203, "loss": 2.4653, "nll_loss": 2.3785274028778076, "rewards/accuracies": 0.625, "rewards/chosen": -0.358635276556015, "rewards/margins": -0.01555185578763485, "rewards/rejected": -0.34308338165283203, "step": 357 }, { "epoch": 0.9801505817932923, "grad_norm": 5.340737342834473, "learning_rate": 5.095890410958904e-07, "log_odds_chosen": 0.24418146908283234, "log_odds_ratio": -0.7979166507720947, "logits/chosen": 0.01569487154483795, "logits/rejected": 0.037125371396541595, "logps/chosen": -3.0225942134857178, "logps/rejected": -3.2122464179992676, "loss": 2.4178, "nll_loss": 2.337998151779175, "rewards/accuracies": 0.375, "rewards/chosen": -0.3022594153881073, "rewards/margins": 0.018965231254696846, "rewards/rejected": -0.3212246298789978, "step": 358 }, { "epoch": 0.9828884325804244, "grad_norm": 5.355991363525391, "learning_rate": 5.082191780821917e-07, "log_odds_chosen": -0.1315455138683319, "log_odds_ratio": -0.987265944480896, "logits/chosen": 0.028672268614172935, "logits/rejected": 0.06637300550937653, "logps/chosen": -3.389922857284546, "logps/rejected": -3.2596631050109863, "loss": 2.4769, "nll_loss": 2.3781652450561523, "rewards/accuracies": 0.5, "rewards/chosen": -0.33899226784706116, "rewards/margins": -0.013025971129536629, "rewards/rejected": -0.3259662985801697, "step": 359 }, { "epoch": 0.9856262833675564, "grad_norm": 5.639803886413574, "learning_rate": 5.068493150684931e-07, "log_odds_chosen": -0.5896866321563721, "log_odds_ratio": -1.1401245594024658, "logits/chosen": 0.09576465934515, "logits/rejected": 0.11838044971227646, "logps/chosen": -3.610902786254883, "logps/rejected": -3.0310511589050293, "loss": 2.4494, "nll_loss": 2.335397481918335, "rewards/accuracies": 0.25, "rewards/chosen": -0.3610903024673462, "rewards/margins": -0.05798518657684326, "rewards/rejected": -0.30310511589050293, "step": 360 }, { "epoch": 0.9883641341546886, "grad_norm": 4.48254919052124, "learning_rate": 5.054794520547944e-07, "log_odds_chosen": 0.62310791015625, "log_odds_ratio": -0.554405152797699, "logits/chosen": 0.05224139243364334, "logits/rejected": 0.05903426557779312, "logps/chosen": -2.5841119289398193, "logps/rejected": -3.17307710647583, "loss": 2.3914, "nll_loss": 2.3359997272491455, "rewards/accuracies": 0.625, "rewards/chosen": -0.258411169052124, "rewards/margins": 0.058896541595458984, "rewards/rejected": -0.317307710647583, "step": 361 }, { "epoch": 0.9911019849418207, "grad_norm": 4.496145725250244, "learning_rate": 5.041095890410959e-07, "log_odds_chosen": -0.07177823781967163, "log_odds_ratio": -0.8126384019851685, "logits/chosen": -0.2031949758529663, "logits/rejected": -0.2795363962650299, "logps/chosen": -2.375849723815918, "logps/rejected": -2.280764102935791, "loss": 2.3091, "nll_loss": 2.2278623580932617, "rewards/accuracies": 0.5, "rewards/chosen": -0.23758497834205627, "rewards/margins": -0.009508555755019188, "rewards/rejected": -0.22807641327381134, "step": 362 }, { "epoch": 0.9938398357289527, "grad_norm": 5.1862311363220215, "learning_rate": 5.027397260273973e-07, "log_odds_chosen": -0.3638313412666321, "log_odds_ratio": -0.9674633145332336, "logits/chosen": 0.03204905986785889, "logits/rejected": 0.12746036052703857, "logps/chosen": -3.1155829429626465, "logps/rejected": -2.7331364154815674, "loss": 2.411, "nll_loss": 2.314293384552002, "rewards/accuracies": 0.375, "rewards/chosen": -0.3115583062171936, "rewards/margins": -0.03824464976787567, "rewards/rejected": -0.27331364154815674, "step": 363 }, { "epoch": 0.9965776865160849, "grad_norm": 4.381834983825684, "learning_rate": 5.013698630136987e-07, "log_odds_chosen": 0.14931876957416534, "log_odds_ratio": -0.6691440343856812, "logits/chosen": 0.11110764741897583, "logits/rejected": 0.06303411722183228, "logps/chosen": -2.3636856079101562, "logps/rejected": -2.477562427520752, "loss": 2.2487, "nll_loss": 2.1817660331726074, "rewards/accuracies": 0.75, "rewards/chosen": -0.2363685667514801, "rewards/margins": 0.011387661099433899, "rewards/rejected": -0.2477562427520752, "step": 364 }, { "epoch": 0.999315537303217, "grad_norm": 5.214937210083008, "learning_rate": 5e-07, "log_odds_chosen": -0.23712116479873657, "log_odds_ratio": -0.8817236423492432, "logits/chosen": -0.051974162459373474, "logits/rejected": -0.06648384779691696, "logps/chosen": -3.224728584289551, "logps/rejected": -2.9947681427001953, "loss": 2.4533, "nll_loss": 2.36515474319458, "rewards/accuracies": 0.5, "rewards/chosen": -0.3224729001522064, "rewards/margins": -0.02299606055021286, "rewards/rejected": -0.2994768023490906, "step": 365 }, { "epoch": 1.002053388090349, "grad_norm": 5.908603668212891, "learning_rate": 4.986301369863014e-07, "log_odds_chosen": -0.5617843866348267, "log_odds_ratio": -1.4421237707138062, "logits/chosen": 0.024201691150665283, "logits/rejected": -0.029679544270038605, "logps/chosen": -4.33668327331543, "logps/rejected": -3.7533321380615234, "loss": 2.4399, "nll_loss": 2.295675754547119, "rewards/accuracies": 0.375, "rewards/chosen": -0.4336683452129364, "rewards/margins": -0.05833511799573898, "rewards/rejected": -0.3753332197666168, "step": 366 }, { "epoch": 1.0047912388774811, "grad_norm": 5.770951747894287, "learning_rate": 4.972602739726027e-07, "log_odds_chosen": -0.7181705236434937, "log_odds_ratio": -1.4189003705978394, "logits/chosen": -0.012546788901090622, "logits/rejected": 0.06571546941995621, "logps/chosen": -3.956329107284546, "logps/rejected": -3.2094430923461914, "loss": 2.4294, "nll_loss": 2.287473201751709, "rewards/accuracies": 0.375, "rewards/chosen": -0.39563289284706116, "rewards/margins": -0.07468859851360321, "rewards/rejected": -0.32094430923461914, "step": 367 }, { "epoch": 1.0075290896646132, "grad_norm": 5.032556533813477, "learning_rate": 4.958904109589041e-07, "log_odds_chosen": 0.5284329652786255, "log_odds_ratio": -0.8029075860977173, "logits/chosen": -0.021031133830547333, "logits/rejected": -0.08632637560367584, "logps/chosen": -2.896500587463379, "logps/rejected": -3.3671326637268066, "loss": 2.3307, "nll_loss": 2.250399112701416, "rewards/accuracies": 0.75, "rewards/chosen": -0.2896500527858734, "rewards/margins": 0.0470631942152977, "rewards/rejected": -0.3367132544517517, "step": 368 }, { "epoch": 1.0102669404517455, "grad_norm": 6.262066841125488, "learning_rate": 4.945205479452055e-07, "log_odds_chosen": -0.7743909955024719, "log_odds_ratio": -1.238142490386963, "logits/chosen": 0.15788042545318604, "logits/rejected": 0.29433247447013855, "logps/chosen": -4.030814170837402, "logps/rejected": -3.2959084510803223, "loss": 2.611, "nll_loss": 2.4871580600738525, "rewards/accuracies": 0.25, "rewards/chosen": -0.4030814468860626, "rewards/margins": -0.07349057495594025, "rewards/rejected": -0.3295908570289612, "step": 369 }, { "epoch": 1.0130047912388775, "grad_norm": 5.441160202026367, "learning_rate": 4.931506849315068e-07, "log_odds_chosen": -0.7209240198135376, "log_odds_ratio": -1.3290034532546997, "logits/chosen": 0.14688603579998016, "logits/rejected": 0.209958016872406, "logps/chosen": -3.4920501708984375, "logps/rejected": -2.775761842727661, "loss": 2.4221, "nll_loss": 2.2892379760742188, "rewards/accuracies": 0.375, "rewards/chosen": -0.34920504689216614, "rewards/margins": -0.0716288685798645, "rewards/rejected": -0.27757617831230164, "step": 370 }, { "epoch": 1.0157426420260096, "grad_norm": 5.580389976501465, "learning_rate": 4.917808219178081e-07, "log_odds_chosen": -0.31656917929649353, "log_odds_ratio": -1.0771565437316895, "logits/chosen": 0.00922493264079094, "logits/rejected": 0.0830044224858284, "logps/chosen": -3.490795373916626, "logps/rejected": -3.163020610809326, "loss": 2.4036, "nll_loss": 2.295867919921875, "rewards/accuracies": 0.5, "rewards/chosen": -0.34907951951026917, "rewards/margins": -0.03277746960520744, "rewards/rejected": -0.3163020610809326, "step": 371 }, { "epoch": 1.0184804928131417, "grad_norm": 4.599774360656738, "learning_rate": 4.904109589041096e-07, "log_odds_chosen": 0.1163327619433403, "log_odds_ratio": -0.7180848121643066, "logits/chosen": 0.10609789937734604, "logits/rejected": 0.072285495698452, "logps/chosen": -2.3129448890686035, "logps/rejected": -2.375857353210449, "loss": 2.3225, "nll_loss": 2.250708818435669, "rewards/accuracies": 0.5, "rewards/chosen": -0.23129448294639587, "rewards/margins": 0.006291264668107033, "rewards/rejected": -0.23758573830127716, "step": 372 }, { "epoch": 1.0212183436002737, "grad_norm": 6.191583633422852, "learning_rate": 4.89041095890411e-07, "log_odds_chosen": -1.724252462387085, "log_odds_ratio": -2.057070732116699, "logits/chosen": 0.015158601105213165, "logits/rejected": 0.2236141860485077, "logps/chosen": -4.085386753082275, "logps/rejected": -2.4758777618408203, "loss": 2.5748, "nll_loss": 2.369095802307129, "rewards/accuracies": 0.25, "rewards/chosen": -0.4085386395454407, "rewards/margins": -0.16095086932182312, "rewards/rejected": -0.24758780002593994, "step": 373 }, { "epoch": 1.0239561943874058, "grad_norm": 4.722418308258057, "learning_rate": 4.876712328767123e-07, "log_odds_chosen": 0.01080542802810669, "log_odds_ratio": -0.872761607170105, "logits/chosen": 0.04428698495030403, "logits/rejected": 0.02829374372959137, "logps/chosen": -2.709874153137207, "logps/rejected": -2.6718358993530273, "loss": 2.3539, "nll_loss": 2.266601085662842, "rewards/accuracies": 0.75, "rewards/chosen": -0.27098745107650757, "rewards/margins": -0.0038038548082113266, "rewards/rejected": -0.2671835720539093, "step": 374 }, { "epoch": 1.0266940451745379, "grad_norm": 5.431556224822998, "learning_rate": 4.863013698630137e-07, "log_odds_chosen": -0.6912620067596436, "log_odds_ratio": -1.3051767349243164, "logits/chosen": 0.05767488479614258, "logits/rejected": 0.13379725813865662, "logps/chosen": -3.4554476737976074, "logps/rejected": -2.811490297317505, "loss": 2.492, "nll_loss": 2.3615212440490723, "rewards/accuracies": 0.5, "rewards/chosen": -0.3455447554588318, "rewards/margins": -0.0643957257270813, "rewards/rejected": -0.2811490297317505, "step": 375 }, { "epoch": 1.0294318959616702, "grad_norm": 4.6178131103515625, "learning_rate": 4.84931506849315e-07, "log_odds_chosen": 0.0948125422000885, "log_odds_ratio": -0.7743974328041077, "logits/chosen": 0.027112113311886787, "logits/rejected": 0.02411729097366333, "logps/chosen": -3.2451610565185547, "logps/rejected": -3.3106863498687744, "loss": 2.3603, "nll_loss": 2.282850742340088, "rewards/accuracies": 0.625, "rewards/chosen": -0.3245161175727844, "rewards/margins": 0.006552550941705704, "rewards/rejected": -0.33106863498687744, "step": 376 }, { "epoch": 1.0321697467488022, "grad_norm": 5.358955383300781, "learning_rate": 4.835616438356164e-07, "log_odds_chosen": 0.2524774372577667, "log_odds_ratio": -0.8374321460723877, "logits/chosen": -0.007798105478286743, "logits/rejected": -0.015978436917066574, "logps/chosen": -3.054305076599121, "logps/rejected": -3.2659945487976074, "loss": 2.358, "nll_loss": 2.274211883544922, "rewards/accuracies": 0.5, "rewards/chosen": -0.30543050169944763, "rewards/margins": 0.02116895467042923, "rewards/rejected": -0.32659947872161865, "step": 377 }, { "epoch": 1.0349075975359343, "grad_norm": 5.313357830047607, "learning_rate": 4.821917808219178e-07, "log_odds_chosen": -0.7545703649520874, "log_odds_ratio": -1.2157427072525024, "logits/chosen": 0.0695311427116394, "logits/rejected": 0.12221226096153259, "logps/chosen": -3.4862520694732666, "logps/rejected": -2.749798536300659, "loss": 2.4516, "nll_loss": 2.3300318717956543, "rewards/accuracies": 0.375, "rewards/chosen": -0.34862521290779114, "rewards/margins": -0.07364536076784134, "rewards/rejected": -0.274979829788208, "step": 378 }, { "epoch": 1.0376454483230664, "grad_norm": 4.993705749511719, "learning_rate": 4.808219178082192e-07, "log_odds_chosen": -0.04906243830919266, "log_odds_ratio": -0.7537844181060791, "logits/chosen": 0.060577601194381714, "logits/rejected": 0.044161051511764526, "logps/chosen": -2.612955331802368, "logps/rejected": -2.558094024658203, "loss": 2.3193, "nll_loss": 2.2438974380493164, "rewards/accuracies": 0.625, "rewards/chosen": -0.2612955570220947, "rewards/margins": -0.005486123263835907, "rewards/rejected": -0.2558094263076782, "step": 379 }, { "epoch": 1.0403832991101984, "grad_norm": 5.452589988708496, "learning_rate": 4.794520547945205e-07, "log_odds_chosen": -0.021835684776306152, "log_odds_ratio": -1.2789952754974365, "logits/chosen": -0.07295296341180801, "logits/rejected": -0.10991863161325455, "logps/chosen": -3.864856004714966, "logps/rejected": -3.8119513988494873, "loss": 2.4, "nll_loss": 2.2721128463745117, "rewards/accuracies": 0.625, "rewards/chosen": -0.38648560643196106, "rewards/margins": -0.0052904486656188965, "rewards/rejected": -0.3811951279640198, "step": 380 }, { "epoch": 1.0431211498973305, "grad_norm": 5.023849964141846, "learning_rate": 4.780821917808219e-07, "log_odds_chosen": -0.07030405104160309, "log_odds_ratio": -0.9247550368309021, "logits/chosen": 0.005172554403543472, "logits/rejected": -0.0189129039645195, "logps/chosen": -2.900688886642456, "logps/rejected": -2.8118350505828857, "loss": 2.3267, "nll_loss": 2.2341806888580322, "rewards/accuracies": 0.5, "rewards/chosen": -0.29006892442703247, "rewards/margins": -0.008885413408279419, "rewards/rejected": -0.28118351101875305, "step": 381 }, { "epoch": 1.0458590006844628, "grad_norm": 4.756534576416016, "learning_rate": 4.7671232876712324e-07, "log_odds_chosen": -0.11676640808582306, "log_odds_ratio": -0.9643990993499756, "logits/chosen": 0.033027805387973785, "logits/rejected": 0.01537320762872696, "logps/chosen": -2.764939308166504, "logps/rejected": -2.5988471508026123, "loss": 2.3219, "nll_loss": 2.2254576683044434, "rewards/accuracies": 0.625, "rewards/chosen": -0.27649393677711487, "rewards/margins": -0.016609208658337593, "rewards/rejected": -0.25988471508026123, "step": 382 }, { "epoch": 1.0485968514715949, "grad_norm": 4.643230438232422, "learning_rate": 4.7534246575342465e-07, "log_odds_chosen": 0.2590058147907257, "log_odds_ratio": -0.5832602977752686, "logits/chosen": 0.01789967715740204, "logits/rejected": -0.018647748976945877, "logps/chosen": -2.3996071815490723, "logps/rejected": -2.6234679222106934, "loss": 2.2695, "nll_loss": 2.2111752033233643, "rewards/accuracies": 0.75, "rewards/chosen": -0.239960715174675, "rewards/margins": 0.022386079654097557, "rewards/rejected": -0.2623468041419983, "step": 383 }, { "epoch": 1.051334702258727, "grad_norm": 5.349544525146484, "learning_rate": 4.73972602739726e-07, "log_odds_chosen": 0.39352354407310486, "log_odds_ratio": -0.7821465730667114, "logits/chosen": 0.07762517780065536, "logits/rejected": 0.05221031233668327, "logps/chosen": -3.2707650661468506, "logps/rejected": -3.640747547149658, "loss": 2.3648, "nll_loss": 2.2865519523620605, "rewards/accuracies": 0.625, "rewards/chosen": -0.3270764946937561, "rewards/margins": 0.036998260766267776, "rewards/rejected": -0.3640747666358948, "step": 384 }, { "epoch": 1.054072553045859, "grad_norm": 4.809791564941406, "learning_rate": 4.726027397260274e-07, "log_odds_chosen": -0.1907552182674408, "log_odds_ratio": -0.9655675888061523, "logits/chosen": -0.054453060030937195, "logits/rejected": -0.036064498126506805, "logps/chosen": -2.8532779216766357, "logps/rejected": -2.634531259536743, "loss": 2.3692, "nll_loss": 2.2726805210113525, "rewards/accuracies": 0.5, "rewards/chosen": -0.2853277921676636, "rewards/margins": -0.02187465690076351, "rewards/rejected": -0.2634531259536743, "step": 385 }, { "epoch": 1.056810403832991, "grad_norm": 4.528129577636719, "learning_rate": 4.7123287671232874e-07, "log_odds_chosen": 0.9293969869613647, "log_odds_ratio": -0.5286825895309448, "logits/chosen": 0.023556187748908997, "logits/rejected": -0.03155913203954697, "logps/chosen": -2.211449146270752, "logps/rejected": -3.045412540435791, "loss": 2.2713, "nll_loss": 2.2183964252471924, "rewards/accuracies": 0.75, "rewards/chosen": -0.2211449146270752, "rewards/margins": 0.08339633792638779, "rewards/rejected": -0.3045412600040436, "step": 386 }, { "epoch": 1.0595482546201231, "grad_norm": Infinity, "learning_rate": 4.7123287671232874e-07, "log_odds_chosen": -1.224727749824524, "log_odds_ratio": -2.0935583114624023, "logits/chosen": 0.005170188844203949, "logits/rejected": 0.01016082614660263, "logps/chosen": -4.164824962615967, "logps/rejected": -2.9304447174072266, "loss": 2.4088, "nll_loss": 2.199441432952881, "rewards/accuracies": 0.375, "rewards/chosen": -0.41648250818252563, "rewards/margins": -0.12343801558017731, "rewards/rejected": -0.2930445075035095, "step": 387 }, { "epoch": 1.0622861054072552, "grad_norm": 5.042448043823242, "learning_rate": 4.6986301369863015e-07, "log_odds_chosen": -0.07995343953371048, "log_odds_ratio": -0.8668429851531982, "logits/chosen": -0.07994567602872849, "logits/rejected": -0.09483615309000015, "logps/chosen": -3.1129183769226074, "logps/rejected": -3.0152463912963867, "loss": 2.3782, "nll_loss": 2.2914881706237793, "rewards/accuracies": 0.5, "rewards/chosen": -0.3112918436527252, "rewards/margins": -0.009767191484570503, "rewards/rejected": -0.30152463912963867, "step": 388 }, { "epoch": 1.0650239561943875, "grad_norm": 5.417836666107178, "learning_rate": 4.684931506849315e-07, "log_odds_chosen": -0.7087728381156921, "log_odds_ratio": -1.1641706228256226, "logits/chosen": 0.0033634917344897985, "logits/rejected": 0.06731382757425308, "logps/chosen": -3.476639986038208, "logps/rejected": -2.788933515548706, "loss": 2.5112, "nll_loss": 2.394766330718994, "rewards/accuracies": 0.125, "rewards/chosen": -0.3476639986038208, "rewards/margins": -0.0687706395983696, "rewards/rejected": -0.2788933515548706, "step": 389 }, { "epoch": 1.0677618069815196, "grad_norm": 5.263003826141357, "learning_rate": 4.671232876712329e-07, "log_odds_chosen": -0.2923949062824249, "log_odds_ratio": -1.0213072299957275, "logits/chosen": 0.0670640841126442, "logits/rejected": 0.09948022663593292, "logps/chosen": -3.480116367340088, "logps/rejected": -3.2070937156677246, "loss": 2.5306, "nll_loss": 2.4284658432006836, "rewards/accuracies": 0.5, "rewards/chosen": -0.34801164269447327, "rewards/margins": -0.027302253991365433, "rewards/rejected": -0.32070937752723694, "step": 390 }, { "epoch": 1.0704996577686516, "grad_norm": 5.075218200683594, "learning_rate": 4.657534246575342e-07, "log_odds_chosen": -1.5030109882354736, "log_odds_ratio": -2.0591392517089844, "logits/chosen": -0.105323925614357, "logits/rejected": -0.14008685946464539, "logps/chosen": -4.1197052001953125, "logps/rejected": -2.58528470993042, "loss": 2.4634, "nll_loss": 2.2574784755706787, "rewards/accuracies": 0.5, "rewards/chosen": -0.4119705259799957, "rewards/margins": -0.15344206988811493, "rewards/rejected": -0.258528470993042, "step": 391 }, { "epoch": 1.0732375085557837, "grad_norm": 4.501094818115234, "learning_rate": 4.643835616438356e-07, "log_odds_chosen": 0.12421564012765884, "log_odds_ratio": -0.6750105023384094, "logits/chosen": 0.08600734919309616, "logits/rejected": 0.03662235289812088, "logps/chosen": -2.4019057750701904, "logps/rejected": -2.4746077060699463, "loss": 2.2719, "nll_loss": 2.204350233078003, "rewards/accuracies": 0.625, "rewards/chosen": -0.24019058048725128, "rewards/margins": 0.007270200178027153, "rewards/rejected": -0.24746078252792358, "step": 392 }, { "epoch": 1.0759753593429158, "grad_norm": 5.350184917449951, "learning_rate": 4.6301369863013696e-07, "log_odds_chosen": -0.8374404311180115, "log_odds_ratio": -1.417017936706543, "logits/chosen": 0.09659107774496078, "logits/rejected": 0.158115416765213, "logps/chosen": -3.6425700187683105, "logps/rejected": -2.8366482257843018, "loss": 2.4015, "nll_loss": 2.2598209381103516, "rewards/accuracies": 0.25, "rewards/chosen": -0.36425700783729553, "rewards/margins": -0.08059217035770416, "rewards/rejected": -0.2836648225784302, "step": 393 }, { "epoch": 1.0787132101300478, "grad_norm": 5.004094123840332, "learning_rate": 4.616438356164383e-07, "log_odds_chosen": 0.03648257255554199, "log_odds_ratio": -0.7389318346977234, "logits/chosen": -0.03709302097558975, "logits/rejected": 0.014902663417160511, "logps/chosen": -2.943631887435913, "logps/rejected": -2.933053731918335, "loss": 2.2848, "nll_loss": 2.2109036445617676, "rewards/accuracies": 0.5, "rewards/chosen": -0.29436320066452026, "rewards/margins": -0.001057824119925499, "rewards/rejected": -0.2933053970336914, "step": 394 }, { "epoch": 1.0814510609171801, "grad_norm": 4.613653182983398, "learning_rate": 4.602739726027397e-07, "log_odds_chosen": 0.28650742769241333, "log_odds_ratio": -0.7458634376525879, "logits/chosen": 0.09195517748594284, "logits/rejected": 0.018861956894397736, "logps/chosen": -2.5861196517944336, "logps/rejected": -2.876814365386963, "loss": 2.304, "nll_loss": 2.229414701461792, "rewards/accuracies": 0.625, "rewards/chosen": -0.2586119771003723, "rewards/margins": 0.029069455340504646, "rewards/rejected": -0.2876814603805542, "step": 395 }, { "epoch": 1.0841889117043122, "grad_norm": 5.3887224197387695, "learning_rate": 4.589041095890411e-07, "log_odds_chosen": -0.8451989889144897, "log_odds_ratio": -1.3752379417419434, "logits/chosen": -0.08137476444244385, "logits/rejected": -0.09716072678565979, "logps/chosen": -3.6603951454162598, "logps/rejected": -2.832125186920166, "loss": 2.5217, "nll_loss": 2.3842227458953857, "rewards/accuracies": 0.375, "rewards/chosen": -0.366039514541626, "rewards/margins": -0.08282699435949326, "rewards/rejected": -0.2832125425338745, "step": 396 }, { "epoch": 1.0869267624914443, "grad_norm": 4.890497207641602, "learning_rate": 4.5753424657534246e-07, "log_odds_chosen": -0.11476224660873413, "log_odds_ratio": -1.1401735544204712, "logits/chosen": -0.026284202933311462, "logits/rejected": -0.12409505248069763, "logps/chosen": -3.0331435203552246, "logps/rejected": -2.9192492961883545, "loss": 2.3562, "nll_loss": 2.242135763168335, "rewards/accuracies": 0.375, "rewards/chosen": -0.30331435799598694, "rewards/margins": -0.011389443650841713, "rewards/rejected": -0.29192492365837097, "step": 397 }, { "epoch": 1.0896646132785763, "grad_norm": 4.666297435760498, "learning_rate": 4.561643835616438e-07, "log_odds_chosen": 0.10677388310432434, "log_odds_ratio": -0.8687868118286133, "logits/chosen": 0.009997975081205368, "logits/rejected": -0.02988889254629612, "logps/chosen": -2.4022059440612793, "logps/rejected": -2.4889464378356934, "loss": 2.3516, "nll_loss": 2.2647604942321777, "rewards/accuracies": 0.5, "rewards/chosen": -0.24022060632705688, "rewards/margins": 0.008674047887325287, "rewards/rejected": -0.24889466166496277, "step": 398 }, { "epoch": 1.0924024640657084, "grad_norm": 5.756850242614746, "learning_rate": 4.547945205479452e-07, "log_odds_chosen": -0.9015145301818848, "log_odds_ratio": -1.5495609045028687, "logits/chosen": 0.011329762637615204, "logits/rejected": 0.011600010097026825, "logps/chosen": -4.142426490783691, "logps/rejected": -3.2353134155273438, "loss": 2.4538, "nll_loss": 2.298842430114746, "rewards/accuracies": 0.375, "rewards/chosen": -0.4142426550388336, "rewards/margins": -0.09071129560470581, "rewards/rejected": -0.3235313594341278, "step": 399 }, { "epoch": 1.0951403148528405, "grad_norm": 4.26601505279541, "learning_rate": 4.534246575342466e-07, "log_odds_chosen": 0.40464484691619873, "log_odds_ratio": -0.586471676826477, "logits/chosen": 0.09806948900222778, "logits/rejected": -0.010234665125608444, "logps/chosen": -2.138322353363037, "logps/rejected": -2.5236053466796875, "loss": 2.2129, "nll_loss": 2.1542701721191406, "rewards/accuracies": 0.625, "rewards/chosen": -0.21383222937583923, "rewards/margins": 0.038528308272361755, "rewards/rejected": -0.2523605227470398, "step": 400 }, { "epoch": 1.0978781656399725, "grad_norm": 5.892656326293945, "learning_rate": 4.520547945205479e-07, "log_odds_chosen": -0.6440671682357788, "log_odds_ratio": -1.1378921270370483, "logits/chosen": 0.06533412635326385, "logits/rejected": 0.19057035446166992, "logps/chosen": -3.45481014251709, "logps/rejected": -2.8467493057250977, "loss": 2.4545, "nll_loss": 2.3406810760498047, "rewards/accuracies": 0.375, "rewards/chosen": -0.3454810380935669, "rewards/margins": -0.06080609932541847, "rewards/rejected": -0.2846749424934387, "step": 401 }, { "epoch": 1.1006160164271048, "grad_norm": 5.731852054595947, "learning_rate": 4.5068493150684927e-07, "log_odds_chosen": -0.7315272688865662, "log_odds_ratio": -1.405369758605957, "logits/chosen": 0.03438958898186684, "logits/rejected": 0.12068842351436615, "logps/chosen": -3.923351287841797, "logps/rejected": -3.185575246810913, "loss": 2.4778, "nll_loss": 2.3372726440429688, "rewards/accuracies": 0.5, "rewards/chosen": -0.3923351764678955, "rewards/margins": -0.07377763092517853, "rewards/rejected": -0.3185575008392334, "step": 402 }, { "epoch": 1.103353867214237, "grad_norm": 6.089354038238525, "learning_rate": 4.4931506849315063e-07, "log_odds_chosen": -0.8171826601028442, "log_odds_ratio": -1.4427666664123535, "logits/chosen": 0.1567404717206955, "logits/rejected": 0.16172659397125244, "logps/chosen": -4.274406433105469, "logps/rejected": -3.4423859119415283, "loss": 2.3975, "nll_loss": 2.2532317638397217, "rewards/accuracies": 0.375, "rewards/chosen": -0.4274406433105469, "rewards/margins": -0.083202064037323, "rewards/rejected": -0.3442385792732239, "step": 403 }, { "epoch": 1.106091718001369, "grad_norm": 4.875988483428955, "learning_rate": 4.4794520547945205e-07, "log_odds_chosen": 0.14539144933223724, "log_odds_ratio": -0.8401167392730713, "logits/chosen": 0.12276140600442886, "logits/rejected": 0.11103425174951553, "logps/chosen": -2.80849027633667, "logps/rejected": -2.9358856678009033, "loss": 2.3906, "nll_loss": 2.306551456451416, "rewards/accuracies": 0.375, "rewards/chosen": -0.28084900975227356, "rewards/margins": 0.012739565223455429, "rewards/rejected": -0.2935885787010193, "step": 404 }, { "epoch": 1.108829568788501, "grad_norm": 4.733882427215576, "learning_rate": 4.465753424657534e-07, "log_odds_chosen": -0.16852039098739624, "log_odds_ratio": -0.8302285671234131, "logits/chosen": -0.11202071607112885, "logits/rejected": -0.1003677099943161, "logps/chosen": -2.9291553497314453, "logps/rejected": -2.7588181495666504, "loss": 2.3731, "nll_loss": 2.2901058197021484, "rewards/accuracies": 0.5, "rewards/chosen": -0.2929155230522156, "rewards/margins": -0.017033709213137627, "rewards/rejected": -0.275881826877594, "step": 405 }, { "epoch": 1.111567419575633, "grad_norm": 4.839572906494141, "learning_rate": 4.4520547945205477e-07, "log_odds_chosen": 0.8738216161727905, "log_odds_ratio": -0.6447734236717224, "logits/chosen": -0.0035543255507946014, "logits/rejected": -0.03280435502529144, "logps/chosen": -2.576500177383423, "logps/rejected": -3.4099135398864746, "loss": 2.2301, "nll_loss": 2.1656224727630615, "rewards/accuracies": 0.625, "rewards/chosen": -0.2576500177383423, "rewards/margins": 0.08334135264158249, "rewards/rejected": -0.34099137783050537, "step": 406 }, { "epoch": 1.1143052703627652, "grad_norm": 5.893270492553711, "learning_rate": 4.4383561643835613e-07, "log_odds_chosen": -0.6505089998245239, "log_odds_ratio": -1.1640396118164062, "logits/chosen": -0.024860680103302002, "logits/rejected": 0.043922703713178635, "logps/chosen": -3.8622946739196777, "logps/rejected": -3.216883420944214, "loss": 2.4087, "nll_loss": 2.2922487258911133, "rewards/accuracies": 0.25, "rewards/chosen": -0.3862294554710388, "rewards/margins": -0.06454111635684967, "rewards/rejected": -0.32168835401535034, "step": 407 }, { "epoch": 1.1170431211498972, "grad_norm": 5.57807731628418, "learning_rate": 4.4246575342465755e-07, "log_odds_chosen": -1.0690479278564453, "log_odds_ratio": -1.439133882522583, "logits/chosen": -0.052530623972415924, "logits/rejected": -0.030300632119178772, "logps/chosen": -3.8780243396759033, "logps/rejected": -2.8457155227661133, "loss": 2.4636, "nll_loss": 2.3196990489959717, "rewards/accuracies": 0.125, "rewards/chosen": -0.38780245184898376, "rewards/margins": -0.10323087871074677, "rewards/rejected": -0.2845715582370758, "step": 408 }, { "epoch": 1.1197809719370295, "grad_norm": 6.266043663024902, "learning_rate": 4.410958904109589e-07, "log_odds_chosen": -1.3925626277923584, "log_odds_ratio": -1.6826658248901367, "logits/chosen": 0.016271699219942093, "logits/rejected": 0.14834582805633545, "logps/chosen": -4.5913004875183105, "logps/rejected": -3.2267980575561523, "loss": 2.5562, "nll_loss": 2.3879051208496094, "rewards/accuracies": 0.125, "rewards/chosen": -0.45913001894950867, "rewards/margins": -0.13645021617412567, "rewards/rejected": -0.3226798176765442, "step": 409 }, { "epoch": 1.1225188227241616, "grad_norm": 4.678506374359131, "learning_rate": 4.397260273972603e-07, "log_odds_chosen": 0.05179817974567413, "log_odds_ratio": -0.7642968893051147, "logits/chosen": 0.18166837096214294, "logits/rejected": 0.17617671191692352, "logps/chosen": -2.554231643676758, "logps/rejected": -2.599766731262207, "loss": 2.2898, "nll_loss": 2.2133398056030273, "rewards/accuracies": 0.5, "rewards/chosen": -0.2554231882095337, "rewards/margins": 0.004553500562906265, "rewards/rejected": -0.25997668504714966, "step": 410 }, { "epoch": 1.1252566735112937, "grad_norm": 5.079634666442871, "learning_rate": 4.383561643835616e-07, "log_odds_chosen": 0.11370605230331421, "log_odds_ratio": -0.8739943504333496, "logits/chosen": -0.13030792772769928, "logits/rejected": -0.14292779564857483, "logps/chosen": -2.87604022026062, "logps/rejected": -2.9959278106689453, "loss": 2.3452, "nll_loss": 2.2577667236328125, "rewards/accuracies": 0.5, "rewards/chosen": -0.2876040041446686, "rewards/margins": 0.01198878139257431, "rewards/rejected": -0.2995927929878235, "step": 411 }, { "epoch": 1.1279945242984257, "grad_norm": 5.025259494781494, "learning_rate": 4.36986301369863e-07, "log_odds_chosen": -0.4763372540473938, "log_odds_ratio": -1.100449562072754, "logits/chosen": -0.05287010967731476, "logits/rejected": -0.0662466436624527, "logps/chosen": -3.118692398071289, "logps/rejected": -2.648695230484009, "loss": 2.3824, "nll_loss": 2.2723593711853027, "rewards/accuracies": 0.375, "rewards/chosen": -0.3118692636489868, "rewards/margins": -0.0469997301697731, "rewards/rejected": -0.2648695409297943, "step": 412 }, { "epoch": 1.1307323750855578, "grad_norm": 5.244449615478516, "learning_rate": 4.3561643835616436e-07, "log_odds_chosen": 0.09541428089141846, "log_odds_ratio": -0.7020537853240967, "logits/chosen": -0.11573384702205658, "logits/rejected": -0.028682544827461243, "logps/chosen": -2.367044687271118, "logps/rejected": -2.4302332401275635, "loss": 2.2647, "nll_loss": 2.1945056915283203, "rewards/accuracies": 0.625, "rewards/chosen": -0.23670446872711182, "rewards/margins": 0.006318834610283375, "rewards/rejected": -0.24302330613136292, "step": 413 }, { "epoch": 1.1334702258726899, "grad_norm": 5.226861953735352, "learning_rate": 4.342465753424657e-07, "log_odds_chosen": -0.159830242395401, "log_odds_ratio": -0.9033122658729553, "logits/chosen": 0.06566141545772552, "logits/rejected": 0.09473785758018494, "logps/chosen": -3.300760269165039, "logps/rejected": -3.146719455718994, "loss": 2.3246, "nll_loss": 2.234271287918091, "rewards/accuracies": 0.5, "rewards/chosen": -0.33007603883743286, "rewards/margins": -0.015404080972075462, "rewards/rejected": -0.31467193365097046, "step": 414 }, { "epoch": 1.136208076659822, "grad_norm": 5.226866722106934, "learning_rate": 4.328767123287671e-07, "log_odds_chosen": -0.5697858929634094, "log_odds_ratio": -1.1693246364593506, "logits/chosen": 0.03815573453903198, "logits/rejected": 0.08240340650081635, "logps/chosen": -3.3523149490356445, "logps/rejected": -2.807058334350586, "loss": 2.411, "nll_loss": 2.294095039367676, "rewards/accuracies": 0.25, "rewards/chosen": -0.3352314829826355, "rewards/margins": -0.05452564358711243, "rewards/rejected": -0.28070583939552307, "step": 415 }, { "epoch": 1.1389459274469542, "grad_norm": 5.85351037979126, "learning_rate": 4.315068493150685e-07, "log_odds_chosen": -0.5193745493888855, "log_odds_ratio": -1.1182255744934082, "logits/chosen": 0.0042268214747309685, "logits/rejected": 0.07935918867588043, "logps/chosen": -3.297212839126587, "logps/rejected": -2.777817726135254, "loss": 2.387, "nll_loss": 2.2751331329345703, "rewards/accuracies": 0.375, "rewards/chosen": -0.32972127199172974, "rewards/margins": -0.05193948745727539, "rewards/rejected": -0.27778181433677673, "step": 416 }, { "epoch": 1.1416837782340863, "grad_norm": 5.2029619216918945, "learning_rate": 4.3013698630136986e-07, "log_odds_chosen": -0.4460708200931549, "log_odds_ratio": -1.008157730102539, "logits/chosen": 0.118003249168396, "logits/rejected": 0.16368994116783142, "logps/chosen": -3.1738815307617188, "logps/rejected": -2.7652218341827393, "loss": 2.4779, "nll_loss": 2.3770902156829834, "rewards/accuracies": 0.125, "rewards/chosen": -0.3173881769180298, "rewards/margins": -0.040865980088710785, "rewards/rejected": -0.276522159576416, "step": 417 }, { "epoch": 1.1444216290212184, "grad_norm": 5.3958587646484375, "learning_rate": 4.287671232876712e-07, "log_odds_chosen": -0.040702756494283676, "log_odds_ratio": -0.7328950762748718, "logits/chosen": 0.04464839771389961, "logits/rejected": 0.004531826823949814, "logps/chosen": -2.9893221855163574, "logps/rejected": -2.9631810188293457, "loss": 2.3548, "nll_loss": 2.2815096378326416, "rewards/accuracies": 0.5, "rewards/chosen": -0.2989322245121002, "rewards/margins": -0.0026141442358493805, "rewards/rejected": -0.29631805419921875, "step": 418 }, { "epoch": 1.1471594798083504, "grad_norm": 5.62909460067749, "learning_rate": 4.273972602739726e-07, "log_odds_chosen": -0.1402309536933899, "log_odds_ratio": -0.9706823229789734, "logits/chosen": 0.04744041711091995, "logits/rejected": 0.0970272570848465, "logps/chosen": -3.4842610359191895, "logps/rejected": -3.2925310134887695, "loss": 2.3188, "nll_loss": 2.22174072265625, "rewards/accuracies": 0.625, "rewards/chosen": -0.34842610359191895, "rewards/margins": -0.019173022359609604, "rewards/rejected": -0.32925310730934143, "step": 419 }, { "epoch": 1.1498973305954825, "grad_norm": 5.100620269775391, "learning_rate": 4.26027397260274e-07, "log_odds_chosen": 0.42866796255111694, "log_odds_ratio": -0.6619006395339966, "logits/chosen": 0.0329519547522068, "logits/rejected": 0.04991436377167702, "logps/chosen": -2.7379531860351562, "logps/rejected": -3.0916779041290283, "loss": 2.2452, "nll_loss": 2.179022789001465, "rewards/accuracies": 0.625, "rewards/chosen": -0.27379533648490906, "rewards/margins": 0.03537246584892273, "rewards/rejected": -0.3091678023338318, "step": 420 }, { "epoch": 1.1526351813826146, "grad_norm": 5.695131301879883, "learning_rate": 4.246575342465753e-07, "log_odds_chosen": -0.31435760855674744, "log_odds_ratio": -0.9232361912727356, "logits/chosen": -0.06910122185945511, "logits/rejected": 0.015930429100990295, "logps/chosen": -3.9220376014709473, "logps/rejected": -3.6239962577819824, "loss": 2.5083, "nll_loss": 2.415956497192383, "rewards/accuracies": 0.25, "rewards/chosen": -0.39220377802848816, "rewards/margins": -0.029804140329360962, "rewards/rejected": -0.3623996376991272, "step": 421 }, { "epoch": 1.1553730321697468, "grad_norm": 4.937402248382568, "learning_rate": 4.2328767123287667e-07, "log_odds_chosen": 0.008844926953315735, "log_odds_ratio": -0.8334065675735474, "logits/chosen": -0.04878664016723633, "logits/rejected": -0.04740029573440552, "logps/chosen": -3.0775835514068604, "logps/rejected": -3.056251049041748, "loss": 2.3227, "nll_loss": 2.239346981048584, "rewards/accuracies": 0.625, "rewards/chosen": -0.3077583611011505, "rewards/margins": -0.0021332427859306335, "rewards/rejected": -0.3056251108646393, "step": 422 }, { "epoch": 1.158110882956879, "grad_norm": 4.461740493774414, "learning_rate": 4.2191780821917803e-07, "log_odds_chosen": 0.5741345882415771, "log_odds_ratio": -0.8326670527458191, "logits/chosen": 0.08781367540359497, "logits/rejected": 0.02053842321038246, "logps/chosen": -2.5979621410369873, "logps/rejected": -3.105987071990967, "loss": 2.3007, "nll_loss": 2.217406988143921, "rewards/accuracies": 0.875, "rewards/chosen": -0.2597962021827698, "rewards/margins": 0.05080248415470123, "rewards/rejected": -0.3105987012386322, "step": 423 }, { "epoch": 1.160848733744011, "grad_norm": 4.943958282470703, "learning_rate": 4.2054794520547945e-07, "log_odds_chosen": -0.08555913716554642, "log_odds_ratio": -0.8768163919448853, "logits/chosen": 0.17046010494232178, "logits/rejected": 0.12751667201519012, "logps/chosen": -2.685103178024292, "logps/rejected": -2.5579564571380615, "loss": 2.2858, "nll_loss": 2.1980903148651123, "rewards/accuracies": 0.5, "rewards/chosen": -0.2685103416442871, "rewards/margins": -0.012714684009552002, "rewards/rejected": -0.2557956278324127, "step": 424 }, { "epoch": 1.163586584531143, "grad_norm": 5.661214351654053, "learning_rate": 4.191780821917808e-07, "log_odds_chosen": -0.14769473671913147, "log_odds_ratio": -0.8182895183563232, "logits/chosen": -0.01863884925842285, "logits/rejected": 0.04830760881304741, "logps/chosen": -3.2134504318237305, "logps/rejected": -3.0654947757720947, "loss": 2.3931, "nll_loss": 2.311295509338379, "rewards/accuracies": 0.25, "rewards/chosen": -0.3213450610637665, "rewards/margins": -0.014795588329434395, "rewards/rejected": -0.3065494894981384, "step": 425 }, { "epoch": 1.1663244353182751, "grad_norm": 5.998075485229492, "learning_rate": 4.1780821917808217e-07, "log_odds_chosen": 0.15378260612487793, "log_odds_ratio": -1.2818543910980225, "logits/chosen": 0.023111391812562943, "logits/rejected": 0.11409363150596619, "logps/chosen": -4.18448543548584, "logps/rejected": -4.276222229003906, "loss": 2.4533, "nll_loss": 2.3250880241394043, "rewards/accuracies": 0.25, "rewards/chosen": -0.4184485375881195, "rewards/margins": 0.009173724800348282, "rewards/rejected": -0.4276222586631775, "step": 426 }, { "epoch": 1.1690622861054072, "grad_norm": 5.027435779571533, "learning_rate": 4.1643835616438353e-07, "log_odds_chosen": -0.4404062330722809, "log_odds_ratio": -1.2338910102844238, "logits/chosen": 0.11598196625709534, "logits/rejected": 0.1821243166923523, "logps/chosen": -3.2327232360839844, "logps/rejected": -2.8018064498901367, "loss": 2.3492, "nll_loss": 2.2258262634277344, "rewards/accuracies": 0.5, "rewards/chosen": -0.32327234745025635, "rewards/margins": -0.04309168830513954, "rewards/rejected": -0.2801806330680847, "step": 427 }, { "epoch": 1.1718001368925393, "grad_norm": 6.048883438110352, "learning_rate": 4.1506849315068495e-07, "log_odds_chosen": -1.3832123279571533, "log_odds_ratio": -1.8150864839553833, "logits/chosen": 0.1211610808968544, "logits/rejected": 0.12895575165748596, "logps/chosen": -4.158782005310059, "logps/rejected": -2.788921594619751, "loss": 2.4791, "nll_loss": 2.2976253032684326, "rewards/accuracies": 0.375, "rewards/chosen": -0.41587817668914795, "rewards/margins": -0.13698601722717285, "rewards/rejected": -0.2788921594619751, "step": 428 }, { "epoch": 1.1745379876796715, "grad_norm": 5.228074550628662, "learning_rate": 4.136986301369863e-07, "log_odds_chosen": -0.405284583568573, "log_odds_ratio": -1.0469324588775635, "logits/chosen": -0.023255418986082077, "logits/rejected": -0.01744154840707779, "logps/chosen": -2.952859401702881, "logps/rejected": -2.5357747077941895, "loss": 2.3409, "nll_loss": 2.236161470413208, "rewards/accuracies": 0.5, "rewards/chosen": -0.2952859401702881, "rewards/margins": -0.04170847311615944, "rewards/rejected": -0.25357747077941895, "step": 429 }, { "epoch": 1.1772758384668036, "grad_norm": 5.5170369148254395, "learning_rate": 4.1232876712328767e-07, "log_odds_chosen": -0.23314699530601501, "log_odds_ratio": -0.9600057601928711, "logits/chosen": 0.12225230038166046, "logits/rejected": 0.21065682172775269, "logps/chosen": -3.404064655303955, "logps/rejected": -3.1465606689453125, "loss": 2.4368, "nll_loss": 2.340756893157959, "rewards/accuracies": 0.375, "rewards/chosen": -0.34040647745132446, "rewards/margins": -0.025750400498509407, "rewards/rejected": -0.3146560788154602, "step": 430 }, { "epoch": 1.1800136892539357, "grad_norm": 4.5604023933410645, "learning_rate": 4.10958904109589e-07, "log_odds_chosen": 0.47425955533981323, "log_odds_ratio": -0.535810649394989, "logits/chosen": -0.09913605451583862, "logits/rejected": -0.10769333690404892, "logps/chosen": -2.201857089996338, "logps/rejected": -2.5965261459350586, "loss": 2.163, "nll_loss": 2.10943341255188, "rewards/accuracies": 0.75, "rewards/chosen": -0.22018572688102722, "rewards/margins": 0.03946688771247864, "rewards/rejected": -0.25965261459350586, "step": 431 }, { "epoch": 1.1827515400410678, "grad_norm": 4.955456256866455, "learning_rate": 4.095890410958904e-07, "log_odds_chosen": -0.37585902214050293, "log_odds_ratio": -1.1610127687454224, "logits/chosen": -0.021536577492952347, "logits/rejected": -0.031454432755708694, "logps/chosen": -3.345940589904785, "logps/rejected": -2.924708127975464, "loss": 2.3497, "nll_loss": 2.233586311340332, "rewards/accuracies": 0.75, "rewards/chosen": -0.33459407091140747, "rewards/margins": -0.04212326556444168, "rewards/rejected": -0.2924708127975464, "step": 432 }, { "epoch": 1.1854893908281998, "grad_norm": 4.869629859924316, "learning_rate": 4.0821917808219176e-07, "log_odds_chosen": 0.49114349484443665, "log_odds_ratio": -0.8851611614227295, "logits/chosen": 0.0422031432390213, "logits/rejected": -0.07309658825397491, "logps/chosen": -2.5655388832092285, "logps/rejected": -3.000309467315674, "loss": 2.3407, "nll_loss": 2.252178907394409, "rewards/accuracies": 0.625, "rewards/chosen": -0.25655388832092285, "rewards/margins": 0.043477047234773636, "rewards/rejected": -0.3000309467315674, "step": 433 }, { "epoch": 1.1882272416153319, "grad_norm": 4.49625301361084, "learning_rate": 4.068493150684931e-07, "log_odds_chosen": -0.17225173115730286, "log_odds_ratio": -0.8467838764190674, "logits/chosen": -0.10300610214471817, "logits/rejected": -0.16532133519649506, "logps/chosen": -2.521907329559326, "logps/rejected": -2.351069450378418, "loss": 2.3395, "nll_loss": 2.2548108100891113, "rewards/accuracies": 0.625, "rewards/chosen": -0.2521907687187195, "rewards/margins": -0.01708379201591015, "rewards/rejected": -0.2351069450378418, "step": 434 }, { "epoch": 1.1909650924024642, "grad_norm": 6.115157127380371, "learning_rate": 4.054794520547945e-07, "log_odds_chosen": -0.9094139337539673, "log_odds_ratio": -1.3699438571929932, "logits/chosen": 0.006653893738985062, "logits/rejected": 0.1001754179596901, "logps/chosen": -3.714550256729126, "logps/rejected": -2.83139967918396, "loss": 2.3896, "nll_loss": 2.2526450157165527, "rewards/accuracies": 0.25, "rewards/chosen": -0.37145504355430603, "rewards/margins": -0.08831509947776794, "rewards/rejected": -0.2831399440765381, "step": 435 }, { "epoch": 1.1937029431895962, "grad_norm": 4.734996795654297, "learning_rate": 4.041095890410959e-07, "log_odds_chosen": 0.566529393196106, "log_odds_ratio": -0.6164541840553284, "logits/chosen": 0.03299278765916824, "logits/rejected": -0.11178945004940033, "logps/chosen": -3.362462282180786, "logps/rejected": -3.857870101928711, "loss": 2.3046, "nll_loss": 2.24298095703125, "rewards/accuracies": 0.5, "rewards/chosen": -0.3362462520599365, "rewards/margins": 0.049540769308805466, "rewards/rejected": -0.3857870101928711, "step": 436 }, { "epoch": 1.1964407939767283, "grad_norm": 5.089818477630615, "learning_rate": 4.0273972602739726e-07, "log_odds_chosen": -0.08485670387744904, "log_odds_ratio": -0.8969945907592773, "logits/chosen": -0.06501057744026184, "logits/rejected": -0.055723126977682114, "logps/chosen": -2.811077117919922, "logps/rejected": -2.7013399600982666, "loss": 2.3592, "nll_loss": 2.2695491313934326, "rewards/accuracies": 0.5, "rewards/chosen": -0.28110772371292114, "rewards/margins": -0.010973721742630005, "rewards/rejected": -0.27013400197029114, "step": 437 }, { "epoch": 1.1991786447638604, "grad_norm": 5.065425872802734, "learning_rate": 4.013698630136986e-07, "log_odds_chosen": -0.3935747742652893, "log_odds_ratio": -0.9852059483528137, "logits/chosen": -0.08052230626344681, "logits/rejected": -0.0672333613038063, "logps/chosen": -3.3722360134124756, "logps/rejected": -2.976762294769287, "loss": 2.3365, "nll_loss": 2.2380058765411377, "rewards/accuracies": 0.375, "rewards/chosen": -0.337223619222641, "rewards/margins": -0.0395473875105381, "rewards/rejected": -0.2976762354373932, "step": 438 }, { "epoch": 1.2019164955509924, "grad_norm": 4.157912254333496, "learning_rate": 4e-07, "log_odds_chosen": 0.8066654801368713, "log_odds_ratio": -0.7023454308509827, "logits/chosen": 0.19416674971580505, "logits/rejected": 0.07940132170915604, "logps/chosen": -1.999971866607666, "logps/rejected": -2.778855323791504, "loss": 2.171, "nll_loss": 2.1007702350616455, "rewards/accuracies": 0.625, "rewards/chosen": -0.1999972015619278, "rewards/margins": 0.07788833975791931, "rewards/rejected": -0.2778855562210083, "step": 439 }, { "epoch": 1.2046543463381245, "grad_norm": 6.3766889572143555, "learning_rate": 3.9863013698630134e-07, "log_odds_chosen": -0.7610796689987183, "log_odds_ratio": -1.2295639514923096, "logits/chosen": 0.15381640195846558, "logits/rejected": 0.2857794165611267, "logps/chosen": -3.9424076080322266, "logps/rejected": -3.2227959632873535, "loss": 2.4842, "nll_loss": 2.3612470626831055, "rewards/accuracies": 0.25, "rewards/chosen": -0.3942407965660095, "rewards/margins": -0.0719611644744873, "rewards/rejected": -0.3222796320915222, "step": 440 }, { "epoch": 1.2073921971252566, "grad_norm": 6.295462608337402, "learning_rate": 3.972602739726027e-07, "log_odds_chosen": -1.4003329277038574, "log_odds_ratio": -2.0869739055633545, "logits/chosen": 0.06913314759731293, "logits/rejected": 0.11308316886425018, "logps/chosen": -4.832578659057617, "logps/rejected": -3.4559314250946045, "loss": 2.4975, "nll_loss": 2.2887697219848633, "rewards/accuracies": 0.25, "rewards/chosen": -0.48325785994529724, "rewards/margins": -0.13766470551490784, "rewards/rejected": -0.3455931544303894, "step": 441 }, { "epoch": 1.2101300479123887, "grad_norm": 5.594604969024658, "learning_rate": 3.9589041095890407e-07, "log_odds_chosen": -0.05878061428666115, "log_odds_ratio": -0.7625088691711426, "logits/chosen": 0.034367796033620834, "logits/rejected": 0.07053948938846588, "logps/chosen": -3.3144612312316895, "logps/rejected": -3.275209903717041, "loss": 2.3347, "nll_loss": 2.258484363555908, "rewards/accuracies": 0.375, "rewards/chosen": -0.33144611120224, "rewards/margins": -0.0039251502603292465, "rewards/rejected": -0.3275209665298462, "step": 442 }, { "epoch": 1.212867898699521, "grad_norm": 5.129866600036621, "learning_rate": 3.9452054794520543e-07, "log_odds_chosen": -0.2857498526573181, "log_odds_ratio": -0.8955768942832947, "logits/chosen": -0.10461799800395966, "logits/rejected": -0.046510566025972366, "logps/chosen": -2.981543779373169, "logps/rejected": -2.7018487453460693, "loss": 2.3248, "nll_loss": 2.235257387161255, "rewards/accuracies": 0.375, "rewards/chosen": -0.29815438389778137, "rewards/margins": -0.027969518676400185, "rewards/rejected": -0.27018487453460693, "step": 443 }, { "epoch": 1.215605749486653, "grad_norm": 5.2830657958984375, "learning_rate": 3.9315068493150684e-07, "log_odds_chosen": -0.570990800857544, "log_odds_ratio": -1.2896728515625, "logits/chosen": 0.22771388292312622, "logits/rejected": 0.2974521517753601, "logps/chosen": -3.2452237606048584, "logps/rejected": -2.651289463043213, "loss": 2.293, "nll_loss": 2.1640822887420654, "rewards/accuracies": 0.5, "rewards/chosen": -0.32452237606048584, "rewards/margins": -0.05939342454075813, "rewards/rejected": -0.2651289403438568, "step": 444 }, { "epoch": 1.218343600273785, "grad_norm": 5.56787633895874, "learning_rate": 3.917808219178082e-07, "log_odds_chosen": 0.005086496472358704, "log_odds_ratio": -0.7625888586044312, "logits/chosen": 0.23340243101119995, "logits/rejected": 0.3234289586544037, "logps/chosen": -2.837780714035034, "logps/rejected": -2.831012725830078, "loss": 2.3034, "nll_loss": 2.227184534072876, "rewards/accuracies": 0.625, "rewards/chosen": -0.2837781012058258, "rewards/margins": -0.0006768312305212021, "rewards/rejected": -0.28310126066207886, "step": 445 }, { "epoch": 1.2210814510609171, "grad_norm": 5.5315260887146, "learning_rate": 3.9041095890410957e-07, "log_odds_chosen": -0.023572519421577454, "log_odds_ratio": -0.790415346622467, "logits/chosen": 0.10500869154930115, "logits/rejected": 0.0744297131896019, "logps/chosen": -2.8697667121887207, "logps/rejected": -2.804124355316162, "loss": 2.3305, "nll_loss": 2.2514405250549316, "rewards/accuracies": 0.625, "rewards/chosen": -0.28697669506073, "rewards/margins": -0.006564265117049217, "rewards/rejected": -0.2804124355316162, "step": 446 }, { "epoch": 1.2238193018480492, "grad_norm": 4.241142272949219, "learning_rate": 3.8904109589041093e-07, "log_odds_chosen": 0.1832469403743744, "log_odds_ratio": -0.6611942648887634, "logits/chosen": 0.0570850670337677, "logits/rejected": -0.05894461274147034, "logps/chosen": -2.134988307952881, "logps/rejected": -2.293339729309082, "loss": 2.1984, "nll_loss": 2.1322906017303467, "rewards/accuracies": 0.5, "rewards/chosen": -0.21349883079528809, "rewards/margins": 0.015835151076316833, "rewards/rejected": -0.22933396697044373, "step": 447 }, { "epoch": 1.2265571526351815, "grad_norm": 4.579638481140137, "learning_rate": 3.8767123287671235e-07, "log_odds_chosen": 0.11692874133586884, "log_odds_ratio": -0.7094518542289734, "logits/chosen": 0.15690502524375916, "logits/rejected": 0.15148845314979553, "logps/chosen": -2.3017520904541016, "logps/rejected": -2.3987131118774414, "loss": 2.2658, "nll_loss": 2.1948163509368896, "rewards/accuracies": 0.75, "rewards/chosen": -0.2301752269268036, "rewards/margins": 0.009696070104837418, "rewards/rejected": -0.2398712933063507, "step": 448 }, { "epoch": 1.2292950034223136, "grad_norm": 5.538445472717285, "learning_rate": 3.863013698630137e-07, "log_odds_chosen": 0.20809456706047058, "log_odds_ratio": -0.7077125310897827, "logits/chosen": 0.17006219923496246, "logits/rejected": 0.2324070930480957, "logps/chosen": -3.039630651473999, "logps/rejected": -3.1949124336242676, "loss": 2.3378, "nll_loss": 2.266995906829834, "rewards/accuracies": 0.625, "rewards/chosen": -0.3039630651473999, "rewards/margins": 0.015528194606304169, "rewards/rejected": -0.3194912374019623, "step": 449 }, { "epoch": 1.2320328542094456, "grad_norm": 4.894786357879639, "learning_rate": 3.84931506849315e-07, "log_odds_chosen": -0.011183511465787888, "log_odds_ratio": -0.8836530447006226, "logits/chosen": -0.17769910395145416, "logits/rejected": -0.21598482131958008, "logps/chosen": -3.0942201614379883, "logps/rejected": -3.0989980697631836, "loss": 2.294, "nll_loss": 2.2056050300598145, "rewards/accuracies": 0.375, "rewards/chosen": -0.30942201614379883, "rewards/margins": 0.0004777815192937851, "rewards/rejected": -0.30989980697631836, "step": 450 }, { "epoch": 1.2347707049965777, "grad_norm": 6.482978343963623, "learning_rate": 3.835616438356164e-07, "log_odds_chosen": -1.214763879776001, "log_odds_ratio": -1.8017902374267578, "logits/chosen": 0.15441754460334778, "logits/rejected": 0.2062607705593109, "logps/chosen": -4.445630073547363, "logps/rejected": -3.2353053092956543, "loss": 2.4299, "nll_loss": 2.2497055530548096, "rewards/accuracies": 0.25, "rewards/chosen": -0.44456303119659424, "rewards/margins": -0.12103252112865448, "rewards/rejected": -0.32353052496910095, "step": 451 }, { "epoch": 1.2375085557837098, "grad_norm": 4.779342174530029, "learning_rate": 3.821917808219178e-07, "log_odds_chosen": 0.41829296946525574, "log_odds_ratio": -0.5854824781417847, "logits/chosen": 0.14223235845565796, "logits/rejected": 0.019388623535633087, "logps/chosen": -2.5229992866516113, "logps/rejected": -2.8851661682128906, "loss": 2.2626, "nll_loss": 2.2040114402770996, "rewards/accuracies": 0.625, "rewards/chosen": -0.2522999346256256, "rewards/margins": 0.036216676235198975, "rewards/rejected": -0.2885166108608246, "step": 452 }, { "epoch": 1.2402464065708418, "grad_norm": 5.297737121582031, "learning_rate": 3.8082191780821916e-07, "log_odds_chosen": -0.19046646356582642, "log_odds_ratio": -1.0816650390625, "logits/chosen": 0.13894535601139069, "logits/rejected": 0.05238910764455795, "logps/chosen": -3.038539409637451, "logps/rejected": -2.8137638568878174, "loss": 2.3158, "nll_loss": 2.2076566219329834, "rewards/accuracies": 0.625, "rewards/chosen": -0.30385395884513855, "rewards/margins": -0.022477544844150543, "rewards/rejected": -0.2813764214515686, "step": 453 }, { "epoch": 1.242984257357974, "grad_norm": 4.891027927398682, "learning_rate": 3.794520547945205e-07, "log_odds_chosen": -0.4608806371688843, "log_odds_ratio": -0.9887082576751709, "logits/chosen": -0.21973080933094025, "logits/rejected": -0.20780184864997864, "logps/chosen": -2.9825210571289062, "logps/rejected": -2.5504889488220215, "loss": 2.2958, "nll_loss": 2.1969528198242188, "rewards/accuracies": 0.25, "rewards/chosen": -0.2982521057128906, "rewards/margins": -0.043203212320804596, "rewards/rejected": -0.2550489008426666, "step": 454 }, { "epoch": 1.245722108145106, "grad_norm": 5.42530632019043, "learning_rate": 3.780821917808219e-07, "log_odds_chosen": -0.1797340363264084, "log_odds_ratio": -0.9970426559448242, "logits/chosen": 0.05224143713712692, "logits/rejected": 0.0031717494130134583, "logps/chosen": -3.5762226581573486, "logps/rejected": -3.350186824798584, "loss": 2.38, "nll_loss": 2.2802908420562744, "rewards/accuracies": 0.625, "rewards/chosen": -0.35762226581573486, "rewards/margins": -0.02260356955230236, "rewards/rejected": -0.33501869440078735, "step": 455 }, { "epoch": 1.2484599589322383, "grad_norm": 5.364457130432129, "learning_rate": 3.767123287671233e-07, "log_odds_chosen": -0.04329226166009903, "log_odds_ratio": -0.8333800435066223, "logits/chosen": 0.033848777413368225, "logits/rejected": 0.08939385414123535, "logps/chosen": -2.6975908279418945, "logps/rejected": -2.6550509929656982, "loss": 2.3678, "nll_loss": 2.2844414710998535, "rewards/accuracies": 0.375, "rewards/chosen": -0.26975908875465393, "rewards/margins": -0.004253963008522987, "rewards/rejected": -0.2655051052570343, "step": 456 }, { "epoch": 1.2511978097193703, "grad_norm": 5.434315204620361, "learning_rate": 3.7534246575342466e-07, "log_odds_chosen": -0.02842816710472107, "log_odds_ratio": -0.946124792098999, "logits/chosen": 0.0014019666705280542, "logits/rejected": 0.07051176577806473, "logps/chosen": -3.585379123687744, "logps/rejected": -3.530275583267212, "loss": 2.326, "nll_loss": 2.231414556503296, "rewards/accuracies": 0.5, "rewards/chosen": -0.3585379123687744, "rewards/margins": -0.0055103544145822525, "rewards/rejected": -0.3530275821685791, "step": 457 }, { "epoch": 1.2539356605065024, "grad_norm": 5.733176231384277, "learning_rate": 3.73972602739726e-07, "log_odds_chosen": -0.2749987244606018, "log_odds_ratio": -0.8739068508148193, "logits/chosen": -0.040217746049165726, "logits/rejected": 0.013007130473852158, "logps/chosen": -3.2865023612976074, "logps/rejected": -3.019029140472412, "loss": 2.3356, "nll_loss": 2.248253345489502, "rewards/accuracies": 0.375, "rewards/chosen": -0.32865023612976074, "rewards/margins": -0.026747316122055054, "rewards/rejected": -0.3019028902053833, "step": 458 }, { "epoch": 1.2566735112936345, "grad_norm": 5.4259748458862305, "learning_rate": 3.726027397260274e-07, "log_odds_chosen": 0.10445556044578552, "log_odds_ratio": -0.7014907598495483, "logits/chosen": -0.04992939904332161, "logits/rejected": -0.04054194316267967, "logps/chosen": -2.9888110160827637, "logps/rejected": -3.0775341987609863, "loss": 2.3123, "nll_loss": 2.242107391357422, "rewards/accuracies": 0.625, "rewards/chosen": -0.2988811135292053, "rewards/margins": 0.008872314356267452, "rewards/rejected": -0.30775344371795654, "step": 459 }, { "epoch": 1.2594113620807665, "grad_norm": 4.578041076660156, "learning_rate": 3.7123287671232874e-07, "log_odds_chosen": 0.4031992554664612, "log_odds_ratio": -0.645115077495575, "logits/chosen": 0.08439967036247253, "logits/rejected": -0.08593320846557617, "logps/chosen": -2.4816460609436035, "logps/rejected": -2.8504421710968018, "loss": 2.2609, "nll_loss": 2.196371555328369, "rewards/accuracies": 0.625, "rewards/chosen": -0.2481645941734314, "rewards/margins": 0.03687962517142296, "rewards/rejected": -0.28504419326782227, "step": 460 }, { "epoch": 1.2621492128678988, "grad_norm": 4.594241142272949, "learning_rate": 3.698630136986301e-07, "log_odds_chosen": -0.06855431199073792, "log_odds_ratio": -0.7659021615982056, "logits/chosen": 0.08447252959012985, "logits/rejected": 0.05858520790934563, "logps/chosen": -2.3745269775390625, "logps/rejected": -2.3242478370666504, "loss": 2.1946, "nll_loss": 2.1179614067077637, "rewards/accuracies": 0.625, "rewards/chosen": -0.2374526858329773, "rewards/margins": -0.005027917213737965, "rewards/rejected": -0.2324247807264328, "step": 461 }, { "epoch": 1.264887063655031, "grad_norm": 5.030348777770996, "learning_rate": 3.6849315068493147e-07, "log_odds_chosen": 0.3986515998840332, "log_odds_ratio": -0.7215300798416138, "logits/chosen": -0.06113892421126366, "logits/rejected": -0.06887085735797882, "logps/chosen": -3.0069618225097656, "logps/rejected": -3.369171619415283, "loss": 2.2746, "nll_loss": 2.202443838119507, "rewards/accuracies": 0.375, "rewards/chosen": -0.3006962239742279, "rewards/margins": 0.0362209752202034, "rewards/rejected": -0.3369171619415283, "step": 462 }, { "epoch": 1.267624914442163, "grad_norm": 4.454153060913086, "learning_rate": 3.6712328767123283e-07, "log_odds_chosen": 0.922951877117157, "log_odds_ratio": -0.4120529890060425, "logits/chosen": 0.14326262474060059, "logits/rejected": 0.0064850784838199615, "logps/chosen": -1.9967904090881348, "logps/rejected": -2.850372791290283, "loss": 2.1316, "nll_loss": 2.0903899669647217, "rewards/accuracies": 0.875, "rewards/chosen": -0.19967906177043915, "rewards/margins": 0.08535823971033096, "rewards/rejected": -0.2850372791290283, "step": 463 }, { "epoch": 1.270362765229295, "grad_norm": 4.759671688079834, "learning_rate": 3.6575342465753424e-07, "log_odds_chosen": 1.5899970531463623, "log_odds_ratio": -0.5412559509277344, "logits/chosen": 0.0026228725910186768, "logits/rejected": -0.06718827784061432, "logps/chosen": -2.4929537773132324, "logps/rejected": -4.032220363616943, "loss": 2.2422, "nll_loss": 2.188117742538452, "rewards/accuracies": 0.75, "rewards/chosen": -0.24929536879062653, "rewards/margins": 0.15392665565013885, "rewards/rejected": -0.4032220244407654, "step": 464 }, { "epoch": 1.273100616016427, "grad_norm": 7.0283522605896, "learning_rate": 3.643835616438356e-07, "log_odds_chosen": -1.0900795459747314, "log_odds_ratio": -1.4130733013153076, "logits/chosen": 0.14429815113544464, "logits/rejected": 0.28826794028282166, "logps/chosen": -4.523922443389893, "logps/rejected": -3.4579856395721436, "loss": 2.4272, "nll_loss": 2.2858777046203613, "rewards/accuracies": 0.0, "rewards/chosen": -0.4523922801017761, "rewards/margins": -0.10659370571374893, "rewards/rejected": -0.3457985818386078, "step": 465 }, { "epoch": 1.2758384668035592, "grad_norm": 5.343384742736816, "learning_rate": 3.6301369863013697e-07, "log_odds_chosen": -0.4886016845703125, "log_odds_ratio": -1.1182588338851929, "logits/chosen": 0.0006374716758728027, "logits/rejected": -0.027314189821481705, "logps/chosen": -3.491541862487793, "logps/rejected": -2.987880229949951, "loss": 2.2768, "nll_loss": 2.1649763584136963, "rewards/accuracies": 0.375, "rewards/chosen": -0.34915420413017273, "rewards/margins": -0.05036617070436478, "rewards/rejected": -0.29878801107406616, "step": 466 }, { "epoch": 1.2785763175906912, "grad_norm": 5.166013717651367, "learning_rate": 3.6164383561643833e-07, "log_odds_chosen": -0.3198099434375763, "log_odds_ratio": -0.9702780842781067, "logits/chosen": -0.08795923739671707, "logits/rejected": -0.08021971583366394, "logps/chosen": -3.190288543701172, "logps/rejected": -2.8623976707458496, "loss": 2.2535, "nll_loss": 2.1564621925354004, "rewards/accuracies": 0.375, "rewards/chosen": -0.3190288543701172, "rewards/margins": -0.03278910368680954, "rewards/rejected": -0.28623977303504944, "step": 467 }, { "epoch": 1.2813141683778233, "grad_norm": 7.284139156341553, "learning_rate": 3.6027397260273974e-07, "log_odds_chosen": -1.580754280090332, "log_odds_ratio": -2.0216305255889893, "logits/chosen": 0.15826945006847382, "logits/rejected": 0.22554798424243927, "logps/chosen": -4.944746971130371, "logps/rejected": -3.3422627449035645, "loss": 2.4502, "nll_loss": 2.2480287551879883, "rewards/accuracies": 0.25, "rewards/chosen": -0.4944746792316437, "rewards/margins": -0.16024842858314514, "rewards/rejected": -0.33422625064849854, "step": 468 }, { "epoch": 1.2840520191649554, "grad_norm": 5.2165045738220215, "learning_rate": 3.589041095890411e-07, "log_odds_chosen": -0.19529777765274048, "log_odds_ratio": -0.9622232913970947, "logits/chosen": -0.08826644718647003, "logits/rejected": -0.05652342364192009, "logps/chosen": -3.035346031188965, "logps/rejected": -2.8173813819885254, "loss": 2.2838, "nll_loss": 2.187602996826172, "rewards/accuracies": 0.375, "rewards/chosen": -0.303534597158432, "rewards/margins": -0.021796438843011856, "rewards/rejected": -0.28173816204071045, "step": 469 }, { "epoch": 1.2867898699520877, "grad_norm": 4.9239020347595215, "learning_rate": 3.575342465753424e-07, "log_odds_chosen": 0.6400248408317566, "log_odds_ratio": -0.6016005277633667, "logits/chosen": -0.09108103066682816, "logits/rejected": -0.07915110886096954, "logps/chosen": -2.533848524093628, "logps/rejected": -3.0848352909088135, "loss": 2.2646, "nll_loss": 2.2044613361358643, "rewards/accuracies": 0.5, "rewards/chosen": -0.25338485836982727, "rewards/margins": 0.05509868264198303, "rewards/rejected": -0.3084835410118103, "step": 470 }, { "epoch": 1.2895277207392197, "grad_norm": 6.3430047035217285, "learning_rate": 3.561643835616438e-07, "log_odds_chosen": 0.24292409420013428, "log_odds_ratio": -0.8240742683410645, "logits/chosen": 0.10006479918956757, "logits/rejected": 0.0774255245923996, "logps/chosen": -3.3682756423950195, "logps/rejected": -3.582939624786377, "loss": 2.2819, "nll_loss": 2.1994781494140625, "rewards/accuracies": 0.75, "rewards/chosen": -0.3368275761604309, "rewards/margins": 0.021466396749019623, "rewards/rejected": -0.35829395055770874, "step": 471 }, { "epoch": 1.2922655715263518, "grad_norm": 5.448814868927002, "learning_rate": 3.547945205479452e-07, "log_odds_chosen": -0.7297390699386597, "log_odds_ratio": -1.1919960975646973, "logits/chosen": -0.10811695456504822, "logits/rejected": -0.05346079543232918, "logps/chosen": -3.3871541023254395, "logps/rejected": -2.6806554794311523, "loss": 2.3902, "nll_loss": 2.270991325378418, "rewards/accuracies": 0.25, "rewards/chosen": -0.33871543407440186, "rewards/margins": -0.0706498771905899, "rewards/rejected": -0.26806554198265076, "step": 472 }, { "epoch": 1.2950034223134839, "grad_norm": 5.587649822235107, "learning_rate": 3.5342465753424655e-07, "log_odds_chosen": -0.6019833087921143, "log_odds_ratio": -1.0907214879989624, "logits/chosen": -0.04009854793548584, "logits/rejected": 0.006647564470767975, "logps/chosen": -3.2413673400878906, "logps/rejected": -2.6758201122283936, "loss": 2.3829, "nll_loss": 2.273836135864258, "rewards/accuracies": 0.25, "rewards/chosen": -0.32413673400878906, "rewards/margins": -0.056554730981588364, "rewards/rejected": -0.2675819993019104, "step": 473 }, { "epoch": 1.2977412731006162, "grad_norm": 5.749831676483154, "learning_rate": 3.520547945205479e-07, "log_odds_chosen": -0.012916192412376404, "log_odds_ratio": -0.8724889755249023, "logits/chosen": 0.007406923919916153, "logits/rejected": 0.04089938476681709, "logps/chosen": -2.83253812789917, "logps/rejected": -2.8287770748138428, "loss": 2.2998, "nll_loss": 2.212523937225342, "rewards/accuracies": 0.5, "rewards/chosen": -0.28325384855270386, "rewards/margins": -0.00037613697350025177, "rewards/rejected": -0.28287768363952637, "step": 474 }, { "epoch": 1.3004791238877482, "grad_norm": 4.912413120269775, "learning_rate": 3.506849315068493e-07, "log_odds_chosen": -0.6297730803489685, "log_odds_ratio": -1.1220077276229858, "logits/chosen": -0.007578203454613686, "logits/rejected": -0.047176215797662735, "logps/chosen": -2.903296947479248, "logps/rejected": -2.322216510772705, "loss": 2.3162, "nll_loss": 2.204035758972168, "rewards/accuracies": 0.125, "rewards/chosen": -0.2903296947479248, "rewards/margins": -0.05810806155204773, "rewards/rejected": -0.23222164809703827, "step": 475 }, { "epoch": 1.3032169746748803, "grad_norm": 5.274984836578369, "learning_rate": 3.493150684931507e-07, "log_odds_chosen": -0.16193144023418427, "log_odds_ratio": -0.9524926543235779, "logits/chosen": -0.09428137540817261, "logits/rejected": -0.09950833767652512, "logps/chosen": -2.9154279232025146, "logps/rejected": -2.7560887336730957, "loss": 2.2926, "nll_loss": 2.197387218475342, "rewards/accuracies": 0.375, "rewards/chosen": -0.29154279828071594, "rewards/margins": -0.015933915972709656, "rewards/rejected": -0.2756088972091675, "step": 476 }, { "epoch": 1.3059548254620124, "grad_norm": 6.391251087188721, "learning_rate": 3.4794520547945205e-07, "log_odds_chosen": -1.0086678266525269, "log_odds_ratio": -1.6490472555160522, "logits/chosen": -0.021672870963811874, "logits/rejected": 0.03508831560611725, "logps/chosen": -4.014772415161133, "logps/rejected": -3.0110480785369873, "loss": 2.3788, "nll_loss": 2.213893175125122, "rewards/accuracies": 0.375, "rewards/chosen": -0.40147724747657776, "rewards/margins": -0.10037241131067276, "rewards/rejected": -0.3011048138141632, "step": 477 }, { "epoch": 1.3086926762491444, "grad_norm": 6.454684734344482, "learning_rate": 3.465753424657534e-07, "log_odds_chosen": -0.9912631511688232, "log_odds_ratio": -1.3899617195129395, "logits/chosen": 0.05304156616330147, "logits/rejected": 0.14474628865718842, "logps/chosen": -4.375779151916504, "logps/rejected": -3.4018890857696533, "loss": 2.4429, "nll_loss": 2.3038649559020996, "rewards/accuracies": 0.125, "rewards/chosen": -0.4375779330730438, "rewards/margins": -0.09738900512456894, "rewards/rejected": -0.3401889204978943, "step": 478 }, { "epoch": 1.3114305270362765, "grad_norm": 6.337299346923828, "learning_rate": 3.4520547945205483e-07, "log_odds_chosen": -1.0574742555618286, "log_odds_ratio": -1.5193884372711182, "logits/chosen": 0.1611642837524414, "logits/rejected": 0.19359366595745087, "logps/chosen": -3.8438045978546143, "logps/rejected": -2.796142101287842, "loss": 2.4232, "nll_loss": 2.271299362182617, "rewards/accuracies": 0.375, "rewards/chosen": -0.3843804597854614, "rewards/margins": -0.10476621985435486, "rewards/rejected": -0.2796142101287842, "step": 479 }, { "epoch": 1.3141683778234086, "grad_norm": 5.63308572769165, "learning_rate": 3.4383561643835614e-07, "log_odds_chosen": 0.13480527698993683, "log_odds_ratio": -0.8175848126411438, "logits/chosen": -0.04285239428281784, "logits/rejected": 0.010982515290379524, "logps/chosen": -2.926442861557007, "logps/rejected": -2.9757587909698486, "loss": 2.2714, "nll_loss": 2.1896896362304688, "rewards/accuracies": 0.5, "rewards/chosen": -0.29264429211616516, "rewards/margins": 0.004931598901748657, "rewards/rejected": -0.2975758910179138, "step": 480 }, { "epoch": 1.3169062286105406, "grad_norm": 5.585013389587402, "learning_rate": 3.424657534246575e-07, "log_odds_chosen": -0.1636361926794052, "log_odds_ratio": -0.9045370817184448, "logits/chosen": 0.14214769005775452, "logits/rejected": 0.19063599407672882, "logps/chosen": -3.3427577018737793, "logps/rejected": -3.1725850105285645, "loss": 2.261, "nll_loss": 2.1705050468444824, "rewards/accuracies": 0.625, "rewards/chosen": -0.3342757821083069, "rewards/margins": -0.01701727882027626, "rewards/rejected": -0.31725847721099854, "step": 481 }, { "epoch": 1.3196440793976727, "grad_norm": 5.557474136352539, "learning_rate": 3.4109589041095886e-07, "log_odds_chosen": -0.3926643431186676, "log_odds_ratio": -1.1315499544143677, "logits/chosen": 0.006485864520072937, "logits/rejected": 0.03087715059518814, "logps/chosen": -3.3390755653381348, "logps/rejected": -2.9929490089416504, "loss": 2.3491, "nll_loss": 2.235987663269043, "rewards/accuracies": 0.375, "rewards/chosen": -0.3339075446128845, "rewards/margins": -0.034612640738487244, "rewards/rejected": -0.29929491877555847, "step": 482 }, { "epoch": 1.322381930184805, "grad_norm": 5.592511177062988, "learning_rate": 3.397260273972602e-07, "log_odds_chosen": -1.1344586610794067, "log_odds_ratio": -1.623070478439331, "logits/chosen": -0.13453149795532227, "logits/rejected": -0.127396360039711, "logps/chosen": -4.000253200531006, "logps/rejected": -2.8759419918060303, "loss": 2.3945, "nll_loss": 2.232154130935669, "rewards/accuracies": 0.375, "rewards/chosen": -0.40002527832984924, "rewards/margins": -0.11243107914924622, "rewards/rejected": -0.287594199180603, "step": 483 }, { "epoch": 1.325119780971937, "grad_norm": 6.450253009796143, "learning_rate": 3.3835616438356164e-07, "log_odds_chosen": -0.5328342914581299, "log_odds_ratio": -1.3228648900985718, "logits/chosen": 0.041990846395492554, "logits/rejected": 0.12226836383342743, "logps/chosen": -4.075714111328125, "logps/rejected": -3.554366111755371, "loss": 2.4355, "nll_loss": 2.303171157836914, "rewards/accuracies": 0.375, "rewards/chosen": -0.4075714349746704, "rewards/margins": -0.052134811878204346, "rewards/rejected": -0.3554365932941437, "step": 484 }, { "epoch": 1.3278576317590691, "grad_norm": 5.5972418785095215, "learning_rate": 3.36986301369863e-07, "log_odds_chosen": -0.8519666790962219, "log_odds_ratio": -1.3391461372375488, "logits/chosen": -0.06674499064683914, "logits/rejected": -0.02508239448070526, "logps/chosen": -3.7931272983551025, "logps/rejected": -2.965847969055176, "loss": 2.3279, "nll_loss": 2.1940226554870605, "rewards/accuracies": 0.25, "rewards/chosen": -0.37931275367736816, "rewards/margins": -0.08272794634103775, "rewards/rejected": -0.2965847849845886, "step": 485 }, { "epoch": 1.3305954825462012, "grad_norm": 6.934753894805908, "learning_rate": 3.3561643835616436e-07, "log_odds_chosen": -1.1547881364822388, "log_odds_ratio": -1.485748052597046, "logits/chosen": 0.03696586936712265, "logits/rejected": 0.09524349123239517, "logps/chosen": -4.206586837768555, "logps/rejected": -3.099271774291992, "loss": 2.478, "nll_loss": 2.3294436931610107, "rewards/accuracies": 0.0, "rewards/chosen": -0.4206587076187134, "rewards/margins": -0.11073151975870132, "rewards/rejected": -0.30992722511291504, "step": 486 }, { "epoch": 1.3333333333333333, "grad_norm": 4.6998677253723145, "learning_rate": 3.342465753424658e-07, "log_odds_chosen": -0.21258150041103363, "log_odds_ratio": -0.9485479593276978, "logits/chosen": 0.01701650768518448, "logits/rejected": -0.08464266359806061, "logps/chosen": -2.602543830871582, "logps/rejected": -2.387998580932617, "loss": 2.257, "nll_loss": 2.1621904373168945, "rewards/accuracies": 0.625, "rewards/chosen": -0.2602543830871582, "rewards/margins": -0.02145451307296753, "rewards/rejected": -0.2387998402118683, "step": 487 }, { "epoch": 1.3360711841204655, "grad_norm": 5.408716678619385, "learning_rate": 3.3287671232876714e-07, "log_odds_chosen": -0.8966997861862183, "log_odds_ratio": -1.3453482389450073, "logits/chosen": -0.1589011400938034, "logits/rejected": -0.09838604927062988, "logps/chosen": -3.3335704803466797, "logps/rejected": -2.4830548763275146, "loss": 2.3764, "nll_loss": 2.241849422454834, "rewards/accuracies": 0.125, "rewards/chosen": -0.3333570957183838, "rewards/margins": -0.08505159616470337, "rewards/rejected": -0.24830549955368042, "step": 488 }, { "epoch": 1.3388090349075976, "grad_norm": 5.925401210784912, "learning_rate": 3.315068493150685e-07, "log_odds_chosen": -0.5891808867454529, "log_odds_ratio": -1.2120648622512817, "logits/chosen": 0.013024047017097473, "logits/rejected": 0.05488660931587219, "logps/chosen": -3.897545576095581, "logps/rejected": -3.294340133666992, "loss": 2.3229, "nll_loss": 2.201685905456543, "rewards/accuracies": 0.375, "rewards/chosen": -0.3897545337677002, "rewards/margins": -0.060320544987916946, "rewards/rejected": -0.32943400740623474, "step": 489 }, { "epoch": 1.3415468856947297, "grad_norm": 5.796994209289551, "learning_rate": 3.301369863013698e-07, "log_odds_chosen": -0.7781983017921448, "log_odds_ratio": -1.2522270679473877, "logits/chosen": -0.02797550894320011, "logits/rejected": 0.05884423851966858, "logps/chosen": -3.542487144470215, "logps/rejected": -2.7879111766815186, "loss": 2.3154, "nll_loss": 2.190169095993042, "rewards/accuracies": 0.25, "rewards/chosen": -0.3542487323284149, "rewards/margins": -0.0754576027393341, "rewards/rejected": -0.2787911295890808, "step": 490 }, { "epoch": 1.3442847364818618, "grad_norm": 5.0834641456604, "learning_rate": 3.287671232876712e-07, "log_odds_chosen": -0.5489201545715332, "log_odds_ratio": -1.2914053201675415, "logits/chosen": -0.007166869938373566, "logits/rejected": 0.06967471539974213, "logps/chosen": -3.2000811100006104, "logps/rejected": -2.647230625152588, "loss": 2.3452, "nll_loss": 2.216064929962158, "rewards/accuracies": 0.5, "rewards/chosen": -0.32000812888145447, "rewards/margins": -0.05528504401445389, "rewards/rejected": -0.2647230923175812, "step": 491 }, { "epoch": 1.3470225872689938, "grad_norm": 5.5747480392456055, "learning_rate": 3.273972602739726e-07, "log_odds_chosen": -0.19120138883590698, "log_odds_ratio": -1.0595166683197021, "logits/chosen": 0.03803810477256775, "logits/rejected": -0.03616348281502724, "logps/chosen": -3.2633285522460938, "logps/rejected": -3.093308925628662, "loss": 2.3252, "nll_loss": 2.2192234992980957, "rewards/accuracies": 0.25, "rewards/chosen": -0.32633286714553833, "rewards/margins": -0.017002005130052567, "rewards/rejected": -0.30933088064193726, "step": 492 }, { "epoch": 1.3497604380561259, "grad_norm": 6.181543350219727, "learning_rate": 3.2602739726027395e-07, "log_odds_chosen": -0.05553251504898071, "log_odds_ratio": -1.0328266620635986, "logits/chosen": -0.014334917068481445, "logits/rejected": 0.10062199085950851, "logps/chosen": -3.91768479347229, "logps/rejected": -3.8916168212890625, "loss": 2.3604, "nll_loss": 2.2571372985839844, "rewards/accuracies": 0.375, "rewards/chosen": -0.39176851511001587, "rewards/margins": -0.002606825903058052, "rewards/rejected": -0.3891616761684418, "step": 493 }, { "epoch": 1.352498288843258, "grad_norm": 5.772453308105469, "learning_rate": 3.246575342465753e-07, "log_odds_chosen": -0.059465520083904266, "log_odds_ratio": -0.7376603484153748, "logits/chosen": 0.19245952367782593, "logits/rejected": 0.19753485918045044, "logps/chosen": -3.7475991249084473, "logps/rejected": -3.6864655017852783, "loss": 2.3195, "nll_loss": 2.245767116546631, "rewards/accuracies": 0.5, "rewards/chosen": -0.3747599422931671, "rewards/margins": -0.006113380193710327, "rewards/rejected": -0.3686465322971344, "step": 494 }, { "epoch": 1.35523613963039, "grad_norm": 5.909037113189697, "learning_rate": 3.2328767123287673e-07, "log_odds_chosen": -0.2502025365829468, "log_odds_ratio": -0.9144882559776306, "logits/chosen": 0.10171175748109818, "logits/rejected": 0.1464611291885376, "logps/chosen": -3.4546518325805664, "logps/rejected": -3.1751904487609863, "loss": 2.3338, "nll_loss": 2.242335796356201, "rewards/accuracies": 0.375, "rewards/chosen": -0.34546521306037903, "rewards/margins": -0.027946149930357933, "rewards/rejected": -0.31751903891563416, "step": 495 }, { "epoch": 1.3579739904175223, "grad_norm": 5.341694355010986, "learning_rate": 3.219178082191781e-07, "log_odds_chosen": -0.10177134722471237, "log_odds_ratio": -0.8052993416786194, "logits/chosen": 0.06704305857419968, "logits/rejected": 0.08066705614328384, "logps/chosen": -2.9685792922973633, "logps/rejected": -2.8593251705169678, "loss": 2.2883, "nll_loss": 2.2077767848968506, "rewards/accuracies": 0.5, "rewards/chosen": -0.29685789346694946, "rewards/margins": -0.010925395414233208, "rewards/rejected": -0.2859325110912323, "step": 496 }, { "epoch": 1.3607118412046544, "grad_norm": 4.939920902252197, "learning_rate": 3.2054794520547945e-07, "log_odds_chosen": -0.3084990978240967, "log_odds_ratio": -1.0039947032928467, "logits/chosen": -0.03718341141939163, "logits/rejected": -0.10434527695178986, "logps/chosen": -2.8921923637390137, "logps/rejected": -2.5617551803588867, "loss": 2.2016, "nll_loss": 2.101160764694214, "rewards/accuracies": 0.5, "rewards/chosen": -0.2892192006111145, "rewards/margins": -0.03304370492696762, "rewards/rejected": -0.25617551803588867, "step": 497 }, { "epoch": 1.3634496919917864, "grad_norm": 4.6267852783203125, "learning_rate": 3.191780821917808e-07, "log_odds_chosen": 0.38066986203193665, "log_odds_ratio": -0.7018658518791199, "logits/chosen": 0.10582330077886581, "logits/rejected": 0.016034364700317383, "logps/chosen": -2.0792794227600098, "logps/rejected": -2.3877739906311035, "loss": 2.2338, "nll_loss": 2.163630962371826, "rewards/accuracies": 0.625, "rewards/chosen": -0.20792794227600098, "rewards/margins": 0.030849479138851166, "rewards/rejected": -0.23877744376659393, "step": 498 }, { "epoch": 1.3661875427789185, "grad_norm": 6.115396022796631, "learning_rate": 3.1780821917808223e-07, "log_odds_chosen": 0.012408852577209473, "log_odds_ratio": -0.9572428464889526, "logits/chosen": 0.1410335749387741, "logits/rejected": 0.12708210945129395, "logps/chosen": -3.285216808319092, "logps/rejected": -3.274174690246582, "loss": 2.3146, "nll_loss": 2.218827962875366, "rewards/accuracies": 0.5, "rewards/chosen": -0.328521728515625, "rewards/margins": -0.001104237511754036, "rewards/rejected": -0.32741743326187134, "step": 499 }, { "epoch": 1.3689253935660506, "grad_norm": 4.81636905670166, "learning_rate": 3.1643835616438354e-07, "log_odds_chosen": 0.1474734991788864, "log_odds_ratio": -0.6777741312980652, "logits/chosen": -0.016430826857686043, "logits/rejected": -0.2229195237159729, "logps/chosen": -2.9596927165985107, "logps/rejected": -3.0858473777770996, "loss": 2.1986, "nll_loss": 2.1308023929595947, "rewards/accuracies": 0.625, "rewards/chosen": -0.29596927762031555, "rewards/margins": 0.012615473940968513, "rewards/rejected": -0.3085847496986389, "step": 500 }, { "epoch": 1.3716632443531829, "grad_norm": 5.707921981811523, "learning_rate": 3.150684931506849e-07, "log_odds_chosen": -0.2446933388710022, "log_odds_ratio": -0.9602322578430176, "logits/chosen": 0.09039861708879471, "logits/rejected": 0.12752988934516907, "logps/chosen": -3.5473761558532715, "logps/rejected": -3.295029878616333, "loss": 2.4115, "nll_loss": 2.315462112426758, "rewards/accuracies": 0.5, "rewards/chosen": -0.35473763942718506, "rewards/margins": -0.025234637781977654, "rewards/rejected": -0.32950299978256226, "step": 501 }, { "epoch": 1.374401095140315, "grad_norm": 5.7837138175964355, "learning_rate": 3.1369863013698626e-07, "log_odds_chosen": -0.6719075441360474, "log_odds_ratio": -1.2524690628051758, "logits/chosen": -0.029604056850075722, "logits/rejected": 0.03731606900691986, "logps/chosen": -3.8181612491607666, "logps/rejected": -3.1650290489196777, "loss": 2.3556, "nll_loss": 2.230351448059082, "rewards/accuracies": 0.25, "rewards/chosen": -0.38181614875793457, "rewards/margins": -0.06531320512294769, "rewards/rejected": -0.3165029287338257, "step": 502 }, { "epoch": 1.377138945927447, "grad_norm": 4.789351940155029, "learning_rate": 3.123287671232877e-07, "log_odds_chosen": 0.218215212225914, "log_odds_ratio": -0.650187075138092, "logits/chosen": -0.07055118680000305, "logits/rejected": -0.09701339900493622, "logps/chosen": -2.2209396362304688, "logps/rejected": -2.421265125274658, "loss": 2.2051, "nll_loss": 2.1401071548461914, "rewards/accuracies": 0.5, "rewards/chosen": -0.22209396958351135, "rewards/margins": 0.020032566040754318, "rewards/rejected": -0.24212652444839478, "step": 503 }, { "epoch": 1.379876796714579, "grad_norm": 5.414278030395508, "learning_rate": 3.1095890410958904e-07, "log_odds_chosen": -0.14698636531829834, "log_odds_ratio": -0.9254823923110962, "logits/chosen": 0.07469984889030457, "logits/rejected": 0.0711158812046051, "logps/chosen": -3.2979235649108887, "logps/rejected": -3.127633571624756, "loss": 2.2725, "nll_loss": 2.179966449737549, "rewards/accuracies": 0.625, "rewards/chosen": -0.3297923803329468, "rewards/margins": -0.01702900603413582, "rewards/rejected": -0.31276339292526245, "step": 504 }, { "epoch": 1.3826146475017111, "grad_norm": 5.3937506675720215, "learning_rate": 3.095890410958904e-07, "log_odds_chosen": 0.11916675418615341, "log_odds_ratio": -0.7799686789512634, "logits/chosen": 0.14986683428287506, "logits/rejected": 0.20639488101005554, "logps/chosen": -2.8927125930786133, "logps/rejected": -2.986351251602173, "loss": 2.2795, "nll_loss": 2.2014636993408203, "rewards/accuracies": 0.25, "rewards/chosen": -0.2892712950706482, "rewards/margins": 0.009363863617181778, "rewards/rejected": -0.2986351251602173, "step": 505 }, { "epoch": 1.3853524982888432, "grad_norm": 5.792346954345703, "learning_rate": 3.0821917808219176e-07, "log_odds_chosen": -0.18777666985988617, "log_odds_ratio": -0.8986004590988159, "logits/chosen": 0.022499438375234604, "logits/rejected": 0.0795322060585022, "logps/chosen": -3.0100302696228027, "logps/rejected": -2.7993898391723633, "loss": 2.2922, "nll_loss": 2.202380657196045, "rewards/accuracies": 0.625, "rewards/chosen": -0.30100303888320923, "rewards/margins": -0.021064044907689095, "rewards/rejected": -0.2799389958381653, "step": 506 }, { "epoch": 1.3880903490759753, "grad_norm": 4.573023796081543, "learning_rate": 3.068493150684932e-07, "log_odds_chosen": -0.04278240352869034, "log_odds_ratio": -0.7474222183227539, "logits/chosen": -0.08397772163152695, "logits/rejected": -0.12413323670625687, "logps/chosen": -2.4738292694091797, "logps/rejected": -2.4160585403442383, "loss": 2.1553, "nll_loss": 2.080545425415039, "rewards/accuracies": 0.5, "rewards/chosen": -0.24738292396068573, "rewards/margins": -0.005777062848210335, "rewards/rejected": -0.24160586297512054, "step": 507 }, { "epoch": 1.3908281998631074, "grad_norm": 6.102405071258545, "learning_rate": 3.0547945205479454e-07, "log_odds_chosen": 0.12484188377857208, "log_odds_ratio": -0.8106358051300049, "logits/chosen": -0.004715586081147194, "logits/rejected": -0.07894787937402725, "logps/chosen": -3.5265817642211914, "logps/rejected": -3.612518787384033, "loss": 2.2323, "nll_loss": 2.151268482208252, "rewards/accuracies": 0.625, "rewards/chosen": -0.3526581823825836, "rewards/margins": 0.008593734353780746, "rewards/rejected": -0.36125192046165466, "step": 508 }, { "epoch": 1.3935660506502396, "grad_norm": 5.443105697631836, "learning_rate": 3.041095890410959e-07, "log_odds_chosen": -0.6028913259506226, "log_odds_ratio": -1.1053428649902344, "logits/chosen": -0.10412528365850449, "logits/rejected": -0.028124134987592697, "logps/chosen": -3.002903938293457, "logps/rejected": -2.426511287689209, "loss": 2.2835, "nll_loss": 2.17293119430542, "rewards/accuracies": 0.375, "rewards/chosen": -0.30029040575027466, "rewards/margins": -0.057639285922050476, "rewards/rejected": -0.24265113472938538, "step": 509 }, { "epoch": 1.3963039014373717, "grad_norm": 5.101766586303711, "learning_rate": 3.027397260273972e-07, "log_odds_chosen": -0.05048826336860657, "log_odds_ratio": -0.7889392971992493, "logits/chosen": 0.010095905512571335, "logits/rejected": 0.013137686997652054, "logps/chosen": -2.6217594146728516, "logps/rejected": -2.5844340324401855, "loss": 2.2911, "nll_loss": 2.21223783493042, "rewards/accuracies": 0.5, "rewards/chosen": -0.26217594742774963, "rewards/margins": -0.00373254157602787, "rewards/rejected": -0.2584434151649475, "step": 510 }, { "epoch": 1.3990417522245038, "grad_norm": 6.625648498535156, "learning_rate": 3.013698630136986e-07, "log_odds_chosen": -0.9961777925491333, "log_odds_ratio": -1.3565753698349, "logits/chosen": 0.011817499995231628, "logits/rejected": 0.22775843739509583, "logps/chosen": -3.81886625289917, "logps/rejected": -2.8944599628448486, "loss": 2.3946, "nll_loss": 2.258906602859497, "rewards/accuracies": 0.125, "rewards/chosen": -0.3818866014480591, "rewards/margins": -0.09244061261415482, "rewards/rejected": -0.28944599628448486, "step": 511 }, { "epoch": 1.4017796030116358, "grad_norm": 4.930182933807373, "learning_rate": 3e-07, "log_odds_chosen": 0.4737943112850189, "log_odds_ratio": -0.6605276465415955, "logits/chosen": -0.014883797615766525, "logits/rejected": -0.056787729263305664, "logps/chosen": -2.532705545425415, "logps/rejected": -2.9663281440734863, "loss": 2.2388, "nll_loss": 2.172752618789673, "rewards/accuracies": 0.625, "rewards/chosen": -0.25327053666114807, "rewards/margins": 0.04336225613951683, "rewards/rejected": -0.2966327965259552, "step": 512 }, { "epoch": 1.404517453798768, "grad_norm": 5.679470539093018, "learning_rate": 2.9863013698630135e-07, "log_odds_chosen": 0.11839020252227783, "log_odds_ratio": -0.9728469848632812, "logits/chosen": -0.01923982799053192, "logits/rejected": -0.0028579309582710266, "logps/chosen": -3.57297945022583, "logps/rejected": -3.6371450424194336, "loss": 2.306, "nll_loss": 2.208706855773926, "rewards/accuracies": 0.5, "rewards/chosen": -0.3572979271411896, "rewards/margins": 0.006416594609618187, "rewards/rejected": -0.3637145161628723, "step": 513 }, { "epoch": 1.4072553045859002, "grad_norm": 6.656508445739746, "learning_rate": 2.972602739726027e-07, "log_odds_chosen": -0.8157762289047241, "log_odds_ratio": -1.5214951038360596, "logits/chosen": 0.22989320755004883, "logits/rejected": 0.23819515109062195, "logps/chosen": -4.47310733795166, "logps/rejected": -3.648251533508301, "loss": 2.409, "nll_loss": 2.2568554878234863, "rewards/accuracies": 0.25, "rewards/chosen": -0.44731077551841736, "rewards/margins": -0.082485631108284, "rewards/rejected": -0.36482512950897217, "step": 514 }, { "epoch": 1.4099931553730323, "grad_norm": 5.183349609375, "learning_rate": 2.958904109589041e-07, "log_odds_chosen": 0.25687962770462036, "log_odds_ratio": -0.6275082230567932, "logits/chosen": 0.011102486401796341, "logits/rejected": 0.039823178201913834, "logps/chosen": -2.865298271179199, "logps/rejected": -3.065389394760132, "loss": 2.2175, "nll_loss": 2.1547064781188965, "rewards/accuracies": 0.75, "rewards/chosen": -0.2865298390388489, "rewards/margins": 0.02000909298658371, "rewards/rejected": -0.3065389394760132, "step": 515 }, { "epoch": 1.4127310061601643, "grad_norm": 4.752790927886963, "learning_rate": 2.945205479452055e-07, "log_odds_chosen": 0.0588127076625824, "log_odds_ratio": -0.8340564966201782, "logits/chosen": 0.09365172684192657, "logits/rejected": -0.059259288012981415, "logps/chosen": -2.4143285751342773, "logps/rejected": -2.449937343597412, "loss": 2.1178, "nll_loss": 2.034355640411377, "rewards/accuracies": 0.75, "rewards/chosen": -0.24143286049365997, "rewards/margins": 0.003560863435268402, "rewards/rejected": -0.24499374628067017, "step": 516 }, { "epoch": 1.4154688569472964, "grad_norm": 5.278335094451904, "learning_rate": 2.9315068493150685e-07, "log_odds_chosen": -0.8812060356140137, "log_odds_ratio": -1.3138723373413086, "logits/chosen": -0.0560636930167675, "logits/rejected": 0.0032491162419319153, "logps/chosen": -3.6448912620544434, "logps/rejected": -2.790030002593994, "loss": 2.3571, "nll_loss": 2.2256979942321777, "rewards/accuracies": 0.25, "rewards/chosen": -0.3644891083240509, "rewards/margins": -0.08548610657453537, "rewards/rejected": -0.2790030241012573, "step": 517 }, { "epoch": 1.4182067077344285, "grad_norm": 5.048372745513916, "learning_rate": 2.917808219178082e-07, "log_odds_chosen": 0.2055189311504364, "log_odds_ratio": -1.3933991193771362, "logits/chosen": 0.0026966482400894165, "logits/rejected": -0.07698722183704376, "logps/chosen": -3.7170794010162354, "logps/rejected": -3.867635726928711, "loss": 2.2968, "nll_loss": 2.157435417175293, "rewards/accuracies": 0.625, "rewards/chosen": -0.371707946062088, "rewards/margins": 0.015055650845170021, "rewards/rejected": -0.3867636024951935, "step": 518 }, { "epoch": 1.4209445585215605, "grad_norm": 5.649327754974365, "learning_rate": 2.9041095890410957e-07, "log_odds_chosen": 0.08016543090343475, "log_odds_ratio": -0.8377710580825806, "logits/chosen": 0.049582384526729584, "logits/rejected": 0.06936471164226532, "logps/chosen": -3.0108282566070557, "logps/rejected": -3.0444998741149902, "loss": 2.2939, "nll_loss": 2.210101366043091, "rewards/accuracies": 0.375, "rewards/chosen": -0.3010828197002411, "rewards/margins": 0.003367159515619278, "rewards/rejected": -0.30444997549057007, "step": 519 }, { "epoch": 1.4236824093086926, "grad_norm": 5.381533622741699, "learning_rate": 2.8904109589041093e-07, "log_odds_chosen": 0.2028275430202484, "log_odds_ratio": -0.7507288455963135, "logits/chosen": -0.012865766882896423, "logits/rejected": 0.03598605841398239, "logps/chosen": -2.5446763038635254, "logps/rejected": -2.7629916667938232, "loss": 2.3211, "nll_loss": 2.246070384979248, "rewards/accuracies": 0.25, "rewards/chosen": -0.254467636346817, "rewards/margins": 0.02183152176439762, "rewards/rejected": -0.2762991487979889, "step": 520 }, { "epoch": 1.4264202600958247, "grad_norm": 5.87486457824707, "learning_rate": 2.876712328767123e-07, "log_odds_chosen": -0.21903096139431, "log_odds_ratio": -0.9439655542373657, "logits/chosen": 0.08733217418193817, "logits/rejected": 0.07410553842782974, "logps/chosen": -3.8251490592956543, "logps/rejected": -3.5884413719177246, "loss": 2.2575, "nll_loss": 2.16312575340271, "rewards/accuracies": 0.375, "rewards/chosen": -0.3825148940086365, "rewards/margins": -0.02367076836526394, "rewards/rejected": -0.35884416103363037, "step": 521 }, { "epoch": 1.4291581108829567, "grad_norm": 5.0880045890808105, "learning_rate": 2.8630136986301366e-07, "log_odds_chosen": 0.15806369483470917, "log_odds_ratio": -0.7144630551338196, "logits/chosen": -0.08035792410373688, "logits/rejected": -0.028561636805534363, "logps/chosen": -2.8779215812683105, "logps/rejected": -2.9961435794830322, "loss": 2.2351, "nll_loss": 2.163699150085449, "rewards/accuracies": 0.625, "rewards/chosen": -0.2877921760082245, "rewards/margins": 0.01182219572365284, "rewards/rejected": -0.2996143698692322, "step": 522 }, { "epoch": 1.431895961670089, "grad_norm": 6.255398750305176, "learning_rate": 2.849315068493151e-07, "log_odds_chosen": -1.1444085836410522, "log_odds_ratio": -1.6930272579193115, "logits/chosen": 0.05462189018726349, "logits/rejected": 0.15643158555030823, "logps/chosen": -4.678664684295654, "logps/rejected": -3.5528197288513184, "loss": 2.4345, "nll_loss": 2.2651515007019043, "rewards/accuracies": 0.125, "rewards/chosen": -0.4678664803504944, "rewards/margins": -0.11258445680141449, "rewards/rejected": -0.3552820086479187, "step": 523 }, { "epoch": 1.434633812457221, "grad_norm": 6.135236740112305, "learning_rate": 2.8356164383561644e-07, "log_odds_chosen": -0.4048798382282257, "log_odds_ratio": -1.091315746307373, "logits/chosen": -0.01587524451315403, "logits/rejected": 0.05468585342168808, "logps/chosen": -3.3766250610351562, "logps/rejected": -2.962419033050537, "loss": 2.3121, "nll_loss": 2.202920436859131, "rewards/accuracies": 0.375, "rewards/chosen": -0.33766254782676697, "rewards/margins": -0.041420646011829376, "rewards/rejected": -0.2962418794631958, "step": 524 }, { "epoch": 1.4373716632443532, "grad_norm": 5.923580646514893, "learning_rate": 2.821917808219178e-07, "log_odds_chosen": 0.6138994097709656, "log_odds_ratio": -0.9321097135543823, "logits/chosen": -0.018013888970017433, "logits/rejected": -0.011266987770795822, "logps/chosen": -3.6281485557556152, "logps/rejected": -4.203816890716553, "loss": 2.3304, "nll_loss": 2.2371785640716553, "rewards/accuracies": 0.375, "rewards/chosen": -0.36281484365463257, "rewards/margins": 0.057566843926906586, "rewards/rejected": -0.42038169503211975, "step": 525 }, { "epoch": 1.4401095140314852, "grad_norm": 5.27130126953125, "learning_rate": 2.8082191780821916e-07, "log_odds_chosen": -0.7190687656402588, "log_odds_ratio": -1.2913264036178589, "logits/chosen": -0.02617219090461731, "logits/rejected": -0.06289803236722946, "logps/chosen": -3.2000279426574707, "logps/rejected": -2.490751266479492, "loss": 2.3079, "nll_loss": 2.1787281036376953, "rewards/accuracies": 0.5, "rewards/chosen": -0.32000282406806946, "rewards/margins": -0.070927694439888, "rewards/rejected": -0.24907511472702026, "step": 526 }, { "epoch": 1.4428473648186173, "grad_norm": 5.099101543426514, "learning_rate": 2.794520547945206e-07, "log_odds_chosen": -0.1783868372440338, "log_odds_ratio": -1.0022306442260742, "logits/chosen": -0.06852371990680695, "logits/rejected": -0.1995367556810379, "logps/chosen": -3.093550205230713, "logps/rejected": -2.903968572616577, "loss": 2.2311, "nll_loss": 2.1308329105377197, "rewards/accuracies": 0.25, "rewards/chosen": -0.3093550205230713, "rewards/margins": -0.018958158791065216, "rewards/rejected": -0.29039686918258667, "step": 527 }, { "epoch": 1.4455852156057496, "grad_norm": 4.85512638092041, "learning_rate": 2.7808219178082194e-07, "log_odds_chosen": 0.4128085970878601, "log_odds_ratio": -0.5583857893943787, "logits/chosen": 0.08901478350162506, "logits/rejected": 0.0512315034866333, "logps/chosen": -2.52378249168396, "logps/rejected": -2.8886475563049316, "loss": 2.1835, "nll_loss": 2.1276602745056152, "rewards/accuracies": 0.75, "rewards/chosen": -0.2523782551288605, "rewards/margins": 0.036486513912677765, "rewards/rejected": -0.28886476159095764, "step": 528 }, { "epoch": 1.4483230663928817, "grad_norm": 5.171926021575928, "learning_rate": 2.7671232876712325e-07, "log_odds_chosen": 0.11781377345323563, "log_odds_ratio": -0.8870463967323303, "logits/chosen": -0.0034662410616874695, "logits/rejected": -0.006114408373832703, "logps/chosen": -2.8348186016082764, "logps/rejected": -2.9147696495056152, "loss": 2.2254, "nll_loss": 2.1366994380950928, "rewards/accuracies": 0.625, "rewards/chosen": -0.2834818363189697, "rewards/margins": 0.0079951211810112, "rewards/rejected": -0.2914769649505615, "step": 529 }, { "epoch": 1.4510609171800137, "grad_norm": 4.479701519012451, "learning_rate": 2.753424657534246e-07, "log_odds_chosen": 0.2911962866783142, "log_odds_ratio": -0.6817440986633301, "logits/chosen": 0.18595457077026367, "logits/rejected": -0.004911322146654129, "logps/chosen": -2.082458257675171, "logps/rejected": -2.3330347537994385, "loss": 2.172, "nll_loss": 2.1038355827331543, "rewards/accuracies": 0.75, "rewards/chosen": -0.20824584364891052, "rewards/margins": 0.02505764737725258, "rewards/rejected": -0.2333034723997116, "step": 530 }, { "epoch": 1.4537987679671458, "grad_norm": 6.687743663787842, "learning_rate": 2.73972602739726e-07, "log_odds_chosen": -1.0374252796173096, "log_odds_ratio": -1.6135807037353516, "logits/chosen": 0.05407434701919556, "logits/rejected": 0.099021315574646, "logps/chosen": -3.830376148223877, "logps/rejected": -2.8291850090026855, "loss": 2.3927, "nll_loss": 2.2313826084136963, "rewards/accuracies": 0.375, "rewards/chosen": -0.38303762674331665, "rewards/margins": -0.10011914372444153, "rewards/rejected": -0.2829185128211975, "step": 531 }, { "epoch": 1.4565366187542779, "grad_norm": 4.755809783935547, "learning_rate": 2.726027397260274e-07, "log_odds_chosen": 0.05609976127743721, "log_odds_ratio": -0.7203800082206726, "logits/chosen": -0.028567075729370117, "logits/rejected": -0.037616390734910965, "logps/chosen": -2.9309182167053223, "logps/rejected": -2.971466541290283, "loss": 2.2098, "nll_loss": 2.137744665145874, "rewards/accuracies": 0.5, "rewards/chosen": -0.29309186339378357, "rewards/margins": 0.004054799675941467, "rewards/rejected": -0.29714664816856384, "step": 532 }, { "epoch": 1.45927446954141, "grad_norm": 5.1411943435668945, "learning_rate": 2.7123287671232875e-07, "log_odds_chosen": 0.07375755906105042, "log_odds_ratio": -0.7827408909797668, "logits/chosen": 0.012819580733776093, "logits/rejected": 0.009574100375175476, "logps/chosen": -2.4176580905914307, "logps/rejected": -2.4609694480895996, "loss": 2.2366, "nll_loss": 2.1583311557769775, "rewards/accuracies": 0.5, "rewards/chosen": -0.2417658120393753, "rewards/margins": 0.004331137984991074, "rewards/rejected": -0.24609696865081787, "step": 533 }, { "epoch": 1.462012320328542, "grad_norm": 5.4109015464782715, "learning_rate": 2.698630136986301e-07, "log_odds_chosen": -0.8343024849891663, "log_odds_ratio": -1.3394944667816162, "logits/chosen": 0.052829742431640625, "logits/rejected": 0.07096557319164276, "logps/chosen": -3.7396411895751953, "logps/rejected": -2.949357032775879, "loss": 2.3996, "nll_loss": 2.2656455039978027, "rewards/accuracies": 0.25, "rewards/chosen": -0.3739641308784485, "rewards/margins": -0.07902842015028, "rewards/rejected": -0.2949357032775879, "step": 534 }, { "epoch": 1.464750171115674, "grad_norm": 4.8164262771606445, "learning_rate": 2.684931506849315e-07, "log_odds_chosen": -0.6518239974975586, "log_odds_ratio": -1.1906871795654297, "logits/chosen": -0.08358734846115112, "logits/rejected": -0.08426947146654129, "logps/chosen": -2.5405781269073486, "logps/rejected": -1.9376742839813232, "loss": 2.218, "nll_loss": 2.098907709121704, "rewards/accuracies": 0.25, "rewards/chosen": -0.25405779480934143, "rewards/margins": -0.0602903738617897, "rewards/rejected": -0.19376744329929352, "step": 535 }, { "epoch": 1.4674880219028064, "grad_norm": 6.420029640197754, "learning_rate": 2.671232876712329e-07, "log_odds_chosen": -0.49013659358024597, "log_odds_ratio": -1.1313477754592896, "logits/chosen": 0.04360520839691162, "logits/rejected": 0.17583084106445312, "logps/chosen": -3.6895976066589355, "logps/rejected": -3.2317922115325928, "loss": 2.3486, "nll_loss": 2.235480785369873, "rewards/accuracies": 0.375, "rewards/chosen": -0.36895981431007385, "rewards/margins": -0.04578056558966637, "rewards/rejected": -0.3231792151927948, "step": 536 }, { "epoch": 1.4702258726899384, "grad_norm": 4.926302909851074, "learning_rate": 2.6575342465753425e-07, "log_odds_chosen": -0.5413859486579895, "log_odds_ratio": -1.185945987701416, "logits/chosen": -0.18891730904579163, "logits/rejected": -0.19275709986686707, "logps/chosen": -3.0596678256988525, "logps/rejected": -2.527942180633545, "loss": 2.286, "nll_loss": 2.167407512664795, "rewards/accuracies": 0.625, "rewards/chosen": -0.3059667944908142, "rewards/margins": -0.05317256227135658, "rewards/rejected": -0.25279420614242554, "step": 537 }, { "epoch": 1.4729637234770705, "grad_norm": 4.674115180969238, "learning_rate": 2.643835616438356e-07, "log_odds_chosen": 0.6142107844352722, "log_odds_ratio": -0.4894741475582123, "logits/chosen": 0.166742742061615, "logits/rejected": 0.03254932165145874, "logps/chosen": -2.2076103687286377, "logps/rejected": -2.766512870788574, "loss": 2.1275, "nll_loss": 2.0785789489746094, "rewards/accuracies": 0.875, "rewards/chosen": -0.2207610160112381, "rewards/margins": 0.05589026212692261, "rewards/rejected": -0.2766512930393219, "step": 538 }, { "epoch": 1.4757015742642026, "grad_norm": 5.554623126983643, "learning_rate": 2.6301369863013697e-07, "log_odds_chosen": 0.07010182738304138, "log_odds_ratio": -0.8915207982063293, "logits/chosen": -0.1867343783378601, "logits/rejected": -0.2121187150478363, "logps/chosen": -3.670637369155884, "logps/rejected": -3.700511932373047, "loss": 2.3003, "nll_loss": 2.2111940383911133, "rewards/accuracies": 0.625, "rewards/chosen": -0.3670637309551239, "rewards/margins": 0.0029874704778194427, "rewards/rejected": -0.37005123496055603, "step": 539 }, { "epoch": 1.4784394250513346, "grad_norm": 5.278901100158691, "learning_rate": 2.6164383561643833e-07, "log_odds_chosen": -0.03049275279045105, "log_odds_ratio": -0.7553520798683167, "logits/chosen": 0.049033455550670624, "logits/rejected": 0.11532928049564362, "logps/chosen": -2.8429739475250244, "logps/rejected": -2.8003602027893066, "loss": 2.1948, "nll_loss": 2.119304656982422, "rewards/accuracies": 0.375, "rewards/chosen": -0.2842974066734314, "rewards/margins": -0.004261361435055733, "rewards/rejected": -0.2800360321998596, "step": 540 }, { "epoch": 1.481177275838467, "grad_norm": 5.406636714935303, "learning_rate": 2.602739726027397e-07, "log_odds_chosen": 0.19687627255916595, "log_odds_ratio": -0.6803970336914062, "logits/chosen": 0.10068129003047943, "logits/rejected": 0.17478856444358826, "logps/chosen": -3.000424861907959, "logps/rejected": -3.16933012008667, "loss": 2.2939, "nll_loss": 2.225886821746826, "rewards/accuracies": 0.625, "rewards/chosen": -0.3000425100326538, "rewards/margins": 0.016890501603484154, "rewards/rejected": -0.3169330060482025, "step": 541 }, { "epoch": 1.483915126625599, "grad_norm": 5.053083419799805, "learning_rate": 2.5890410958904106e-07, "log_odds_chosen": -0.25765907764434814, "log_odds_ratio": -0.9448106288909912, "logits/chosen": -0.06714671850204468, "logits/rejected": -0.10157474875450134, "logps/chosen": -2.9896039962768555, "logps/rejected": -2.7261314392089844, "loss": 2.2453, "nll_loss": 2.1508500576019287, "rewards/accuracies": 0.375, "rewards/chosen": -0.2989603877067566, "rewards/margins": -0.02634725719690323, "rewards/rejected": -0.27261313796043396, "step": 542 }, { "epoch": 1.486652977412731, "grad_norm": 5.685427188873291, "learning_rate": 2.5753424657534247e-07, "log_odds_chosen": -0.20632846653461456, "log_odds_ratio": -1.0129079818725586, "logits/chosen": 0.06900501251220703, "logits/rejected": 0.11312845349311829, "logps/chosen": -2.9844894409179688, "logps/rejected": -2.7137508392333984, "loss": 2.2102, "nll_loss": 2.1088943481445312, "rewards/accuracies": 0.375, "rewards/chosen": -0.29844895005226135, "rewards/margins": -0.02707386016845703, "rewards/rejected": -0.2713750898838043, "step": 543 }, { "epoch": 1.4893908281998631, "grad_norm": 6.180427074432373, "learning_rate": 2.5616438356164383e-07, "log_odds_chosen": -0.12624932825565338, "log_odds_ratio": -0.9604856967926025, "logits/chosen": 0.06802250444889069, "logits/rejected": 0.10327346622943878, "logps/chosen": -3.5015292167663574, "logps/rejected": -3.3700990676879883, "loss": 2.3131, "nll_loss": 2.2170701026916504, "rewards/accuracies": 0.5, "rewards/chosen": -0.3501529395580292, "rewards/margins": -0.01314302533864975, "rewards/rejected": -0.33700990676879883, "step": 544 }, { "epoch": 1.4921286789869952, "grad_norm": 5.431606292724609, "learning_rate": 2.547945205479452e-07, "log_odds_chosen": -0.005647286772727966, "log_odds_ratio": -0.7341178059577942, "logits/chosen": -0.09891626983880997, "logits/rejected": -0.17287805676460266, "logps/chosen": -3.3563485145568848, "logps/rejected": -3.3306264877319336, "loss": 2.2776, "nll_loss": 2.2041664123535156, "rewards/accuracies": 0.5, "rewards/chosen": -0.33563482761383057, "rewards/margins": -0.0025721993297338486, "rewards/rejected": -0.33306261897087097, "step": 545 }, { "epoch": 1.4948665297741273, "grad_norm": 4.990319728851318, "learning_rate": 2.5342465753424656e-07, "log_odds_chosen": -0.13878941535949707, "log_odds_ratio": -1.0888487100601196, "logits/chosen": 0.1445801556110382, "logits/rejected": 0.05228227376937866, "logps/chosen": -2.9334092140197754, "logps/rejected": -2.776885747909546, "loss": 2.1918, "nll_loss": 2.082962989807129, "rewards/accuracies": 0.5, "rewards/chosen": -0.29334092140197754, "rewards/margins": -0.01565234363079071, "rewards/rejected": -0.27768856287002563, "step": 546 }, { "epoch": 1.4976043805612593, "grad_norm": 4.991815090179443, "learning_rate": 2.5205479452054797e-07, "log_odds_chosen": -0.07949626445770264, "log_odds_ratio": -0.8476681113243103, "logits/chosen": 0.0389617457985878, "logits/rejected": 0.009867965243756771, "logps/chosen": -2.4770801067352295, "logps/rejected": -2.3631927967071533, "loss": 2.189, "nll_loss": 2.1042041778564453, "rewards/accuracies": 0.625, "rewards/chosen": -0.2477080374956131, "rewards/margins": -0.011388754472136497, "rewards/rejected": -0.23631928861141205, "step": 547 }, { "epoch": 1.5003422313483914, "grad_norm": 5.854538440704346, "learning_rate": 2.5068493150684933e-07, "log_odds_chosen": -0.8737121224403381, "log_odds_ratio": -1.3934805393218994, "logits/chosen": -0.03593862056732178, "logits/rejected": 0.038441091775894165, "logps/chosen": -3.568598747253418, "logps/rejected": -2.711717367172241, "loss": 2.3376, "nll_loss": 2.198273181915283, "rewards/accuracies": 0.25, "rewards/chosen": -0.35685986280441284, "rewards/margins": -0.08568812906742096, "rewards/rejected": -0.2711717486381531, "step": 548 }, { "epoch": 1.5030800821355235, "grad_norm": 5.557005882263184, "learning_rate": 2.493150684931507e-07, "log_odds_chosen": -0.44232773780822754, "log_odds_ratio": -1.034480333328247, "logits/chosen": -0.09877490997314453, "logits/rejected": -0.06762783974409103, "logps/chosen": -3.1356709003448486, "logps/rejected": -2.7246932983398438, "loss": 2.3272, "nll_loss": 2.223726272583008, "rewards/accuracies": 0.375, "rewards/chosen": -0.3135671019554138, "rewards/margins": -0.04109775274991989, "rewards/rejected": -0.27246934175491333, "step": 549 }, { "epoch": 1.5058179329226558, "grad_norm": 5.670400142669678, "learning_rate": 2.4794520547945206e-07, "log_odds_chosen": -0.3868519067764282, "log_odds_ratio": -0.9562245607376099, "logits/chosen": -0.0761035680770874, "logits/rejected": 0.023160941898822784, "logps/chosen": -2.950747013092041, "logps/rejected": -2.59641695022583, "loss": 2.3305, "nll_loss": 2.2348828315734863, "rewards/accuracies": 0.25, "rewards/chosen": -0.2950747013092041, "rewards/margins": -0.03543300926685333, "rewards/rejected": -0.25964170694351196, "step": 550 }, { "epoch": 1.5085557837097878, "grad_norm": 5.039727210998535, "learning_rate": 2.465753424657534e-07, "log_odds_chosen": -0.27150991559028625, "log_odds_ratio": -0.9220325946807861, "logits/chosen": 0.11287762969732285, "logits/rejected": 0.06370721757411957, "logps/chosen": -2.8519012928009033, "logps/rejected": -2.575408935546875, "loss": 2.2248, "nll_loss": 2.1326093673706055, "rewards/accuracies": 0.375, "rewards/chosen": -0.2851901352405548, "rewards/margins": -0.027649246156215668, "rewards/rejected": -0.25754088163375854, "step": 551 }, { "epoch": 1.51129363449692, "grad_norm": 6.4048895835876465, "learning_rate": 2.452054794520548e-07, "log_odds_chosen": -1.2539105415344238, "log_odds_ratio": -1.678093671798706, "logits/chosen": -0.020567387342453003, "logits/rejected": 0.020001552999019623, "logps/chosen": -4.311328887939453, "logps/rejected": -3.0767054557800293, "loss": 2.3422, "nll_loss": 2.1744062900543213, "rewards/accuracies": 0.25, "rewards/chosen": -0.4311329126358032, "rewards/margins": -0.12346230447292328, "rewards/rejected": -0.30767059326171875, "step": 552 }, { "epoch": 1.5140314852840522, "grad_norm": 5.821403980255127, "learning_rate": 2.4383561643835614e-07, "log_odds_chosen": -0.12126775830984116, "log_odds_ratio": -0.8244590759277344, "logits/chosen": -0.037693388760089874, "logits/rejected": 0.01353674940764904, "logps/chosen": -3.136507511138916, "logps/rejected": -2.9899284839630127, "loss": 2.2245, "nll_loss": 2.1420202255249023, "rewards/accuracies": 0.625, "rewards/chosen": -0.3136507272720337, "rewards/margins": -0.014657881110906601, "rewards/rejected": -0.2989928722381592, "step": 553 }, { "epoch": 1.5167693360711842, "grad_norm": 5.919713973999023, "learning_rate": 2.424657534246575e-07, "log_odds_chosen": -0.5319406390190125, "log_odds_ratio": -1.0879271030426025, "logits/chosen": -0.04304712265729904, "logits/rejected": 0.028457028791308403, "logps/chosen": -3.194000720977783, "logps/rejected": -2.6742663383483887, "loss": 2.3158, "nll_loss": 2.2070515155792236, "rewards/accuracies": 0.375, "rewards/chosen": -0.3194000720977783, "rewards/margins": -0.05197344720363617, "rewards/rejected": -0.26742663979530334, "step": 554 }, { "epoch": 1.5195071868583163, "grad_norm": 5.694066047668457, "learning_rate": 2.410958904109589e-07, "log_odds_chosen": 0.16525837779045105, "log_odds_ratio": -0.776091456413269, "logits/chosen": -0.06310921162366867, "logits/rejected": -0.14551225304603577, "logps/chosen": -3.8357203006744385, "logps/rejected": -3.970489025115967, "loss": 2.3237, "nll_loss": 2.2461113929748535, "rewards/accuracies": 0.375, "rewards/chosen": -0.3835720419883728, "rewards/margins": 0.013476856052875519, "rewards/rejected": -0.3970488905906677, "step": 555 }, { "epoch": 1.5222450376454484, "grad_norm": 5.482293605804443, "learning_rate": 2.3972602739726023e-07, "log_odds_chosen": -0.28749850392341614, "log_odds_ratio": -0.9448988437652588, "logits/chosen": -0.03511228784918785, "logits/rejected": -0.07359041273593903, "logps/chosen": -2.6896235942840576, "logps/rejected": -2.3910350799560547, "loss": 2.2005, "nll_loss": 2.1060445308685303, "rewards/accuracies": 0.5, "rewards/chosen": -0.26896238327026367, "rewards/margins": -0.029858866706490517, "rewards/rejected": -0.2391035109758377, "step": 556 }, { "epoch": 1.5249828884325805, "grad_norm": 5.1661176681518555, "learning_rate": 2.3835616438356162e-07, "log_odds_chosen": -0.3651108741760254, "log_odds_ratio": -1.1762151718139648, "logits/chosen": -0.023738035932183266, "logits/rejected": -0.07882168889045715, "logps/chosen": -3.3604140281677246, "logps/rejected": -2.979665756225586, "loss": 2.2918, "nll_loss": 2.1741390228271484, "rewards/accuracies": 0.5, "rewards/chosen": -0.3360413908958435, "rewards/margins": -0.03807482123374939, "rewards/rejected": -0.2979665994644165, "step": 557 }, { "epoch": 1.5277207392197125, "grad_norm": 5.658573627471924, "learning_rate": 2.36986301369863e-07, "log_odds_chosen": -0.10685323923826218, "log_odds_ratio": -0.7833995223045349, "logits/chosen": -0.03857327252626419, "logits/rejected": -0.05402214825153351, "logps/chosen": -2.9609127044677734, "logps/rejected": -2.848165273666382, "loss": 2.2375, "nll_loss": 2.1591506004333496, "rewards/accuracies": 0.5, "rewards/chosen": -0.2960912585258484, "rewards/margins": -0.011274725198745728, "rewards/rejected": -0.28481653332710266, "step": 558 }, { "epoch": 1.5304585900068446, "grad_norm": 5.089676380157471, "learning_rate": 2.3561643835616437e-07, "log_odds_chosen": -0.4793161451816559, "log_odds_ratio": -1.087127685546875, "logits/chosen": -0.12383940815925598, "logits/rejected": -0.1696588099002838, "logps/chosen": -2.7777037620544434, "logps/rejected": -2.294367790222168, "loss": 2.2318, "nll_loss": 2.1230435371398926, "rewards/accuracies": 0.375, "rewards/chosen": -0.27777040004730225, "rewards/margins": -0.04833361878991127, "rewards/rejected": -0.22943677008152008, "step": 559 }, { "epoch": 1.5331964407939767, "grad_norm": 6.460014820098877, "learning_rate": 2.3424657534246576e-07, "log_odds_chosen": -0.4386973977088928, "log_odds_ratio": -1.0162138938903809, "logits/chosen": 0.031088724732398987, "logits/rejected": 0.09123626351356506, "logps/chosen": -3.85209321975708, "logps/rejected": -3.416396141052246, "loss": 2.3193, "nll_loss": 2.217728614807129, "rewards/accuracies": 0.25, "rewards/chosen": -0.385209321975708, "rewards/margins": -0.043569713830947876, "rewards/rejected": -0.34163957834243774, "step": 560 }, { "epoch": 1.5359342915811087, "grad_norm": 5.748598575592041, "learning_rate": 2.328767123287671e-07, "log_odds_chosen": -0.008034393191337585, "log_odds_ratio": -0.8269500732421875, "logits/chosen": 0.1410813331604004, "logits/rejected": 0.19633471965789795, "logps/chosen": -2.985964059829712, "logps/rejected": -2.963994264602661, "loss": 2.2449, "nll_loss": 2.1621718406677246, "rewards/accuracies": 0.375, "rewards/chosen": -0.2985963821411133, "rewards/margins": -0.00219696294516325, "rewards/rejected": -0.29639941453933716, "step": 561 }, { "epoch": 1.5386721423682408, "grad_norm": 6.240468978881836, "learning_rate": 2.3150684931506848e-07, "log_odds_chosen": -0.8862042427062988, "log_odds_ratio": -1.5335297584533691, "logits/chosen": 0.007831797003746033, "logits/rejected": 0.05675174295902252, "logps/chosen": -4.4758219718933105, "logps/rejected": -3.598918914794922, "loss": 2.3531, "nll_loss": 2.1996991634368896, "rewards/accuracies": 0.5, "rewards/chosen": -0.4475822150707245, "rewards/margins": -0.0876903384923935, "rewards/rejected": -0.3598918914794922, "step": 562 }, { "epoch": 1.541409993155373, "grad_norm": 5.3637542724609375, "learning_rate": 2.3013698630136984e-07, "log_odds_chosen": -0.5796384811401367, "log_odds_ratio": -1.1875134706497192, "logits/chosen": -0.19675993919372559, "logits/rejected": -0.1654302477836609, "logps/chosen": -3.0511913299560547, "logps/rejected": -2.473684310913086, "loss": 2.1866, "nll_loss": 2.0678226947784424, "rewards/accuracies": 0.375, "rewards/chosen": -0.3051191568374634, "rewards/margins": -0.057750701904296875, "rewards/rejected": -0.2473684400320053, "step": 563 }, { "epoch": 1.5441478439425051, "grad_norm": 5.644280433654785, "learning_rate": 2.2876712328767123e-07, "log_odds_chosen": -0.7876030802726746, "log_odds_ratio": -1.300382137298584, "logits/chosen": -0.08872195333242416, "logits/rejected": -0.03610421344637871, "logps/chosen": -3.52645206451416, "logps/rejected": -2.7632341384887695, "loss": 2.3094, "nll_loss": 2.179349422454834, "rewards/accuracies": 0.125, "rewards/chosen": -0.35264521837234497, "rewards/margins": -0.0763217955827713, "rewards/rejected": -0.27632343769073486, "step": 564 }, { "epoch": 1.5468856947296372, "grad_norm": 5.900241374969482, "learning_rate": 2.273972602739726e-07, "log_odds_chosen": -0.44707953929901123, "log_odds_ratio": -1.2865389585494995, "logits/chosen": -0.04851119965314865, "logits/rejected": -0.11282414197921753, "logps/chosen": -3.692246437072754, "logps/rejected": -3.2365899085998535, "loss": 2.3113, "nll_loss": 2.182637929916382, "rewards/accuracies": 0.5, "rewards/chosen": -0.3692246377468109, "rewards/margins": -0.0455656461417675, "rewards/rejected": -0.3236590027809143, "step": 565 }, { "epoch": 1.5496235455167693, "grad_norm": 5.089560508728027, "learning_rate": 2.2602739726027396e-07, "log_odds_chosen": 0.8582962155342102, "log_odds_ratio": -0.721403956413269, "logits/chosen": 0.09629938006401062, "logits/rejected": 0.10864283889532089, "logps/chosen": -2.572805404663086, "logps/rejected": -3.3152432441711426, "loss": 2.2, "nll_loss": 2.127857208251953, "rewards/accuracies": 0.75, "rewards/chosen": -0.257280558347702, "rewards/margins": 0.07424376904964447, "rewards/rejected": -0.3315243124961853, "step": 566 }, { "epoch": 1.5523613963039016, "grad_norm": 5.053847789764404, "learning_rate": 2.2465753424657532e-07, "log_odds_chosen": -0.03251586854457855, "log_odds_ratio": -0.8611246347427368, "logits/chosen": -0.056228965520858765, "logits/rejected": -0.1135706827044487, "logps/chosen": -2.7730116844177246, "logps/rejected": -2.701169013977051, "loss": 2.2492, "nll_loss": 2.1630892753601074, "rewards/accuracies": 0.625, "rewards/chosen": -0.27730119228363037, "rewards/margins": -0.007184267044067383, "rewards/rejected": -0.2701168954372406, "step": 567 }, { "epoch": 1.5550992470910336, "grad_norm": 4.544806957244873, "learning_rate": 2.232876712328767e-07, "log_odds_chosen": 0.3524159789085388, "log_odds_ratio": -0.6729674935340881, "logits/chosen": -0.12071948498487473, "logits/rejected": -0.22868266701698303, "logps/chosen": -2.2626659870147705, "logps/rejected": -2.511387586593628, "loss": 2.1393, "nll_loss": 2.072007656097412, "rewards/accuracies": 0.625, "rewards/chosen": -0.22626660764217377, "rewards/margins": 0.024872148409485817, "rewards/rejected": -0.25113874673843384, "step": 568 }, { "epoch": 1.5578370978781657, "grad_norm": 5.652469158172607, "learning_rate": 2.2191780821917807e-07, "log_odds_chosen": 0.01816701889038086, "log_odds_ratio": -0.7838731408119202, "logits/chosen": 0.07203053683042526, "logits/rejected": 0.04526427015662193, "logps/chosen": -3.29569149017334, "logps/rejected": -3.2989864349365234, "loss": 2.2906, "nll_loss": 2.212198257446289, "rewards/accuracies": 0.5, "rewards/chosen": -0.3295691907405853, "rewards/margins": 0.0003294851630926132, "rewards/rejected": -0.3298986554145813, "step": 569 }, { "epoch": 1.5605749486652978, "grad_norm": 5.649674415588379, "learning_rate": 2.2054794520547946e-07, "log_odds_chosen": -0.07336264848709106, "log_odds_ratio": -0.9111175537109375, "logits/chosen": 0.05582130327820778, "logits/rejected": 0.055057063698768616, "logps/chosen": -3.472378730773926, "logps/rejected": -3.367121934890747, "loss": 2.1561, "nll_loss": 2.0650219917297363, "rewards/accuracies": 0.5, "rewards/chosen": -0.34723788499832153, "rewards/margins": -0.010525684803724289, "rewards/rejected": -0.33671218156814575, "step": 570 }, { "epoch": 1.5633127994524298, "grad_norm": 6.844268798828125, "learning_rate": 2.191780821917808e-07, "log_odds_chosen": -0.8138778209686279, "log_odds_ratio": -1.3301362991333008, "logits/chosen": 0.02802121639251709, "logits/rejected": 0.1293838769197464, "logps/chosen": -3.921013116836548, "logps/rejected": -3.119124412536621, "loss": 2.3411, "nll_loss": 2.2081010341644287, "rewards/accuracies": 0.25, "rewards/chosen": -0.39210131764411926, "rewards/margins": -0.08018887042999268, "rewards/rejected": -0.3119124472141266, "step": 571 }, { "epoch": 1.566050650239562, "grad_norm": 5.803696632385254, "learning_rate": 2.1780821917808218e-07, "log_odds_chosen": 0.011398404836654663, "log_odds_ratio": -0.7619016170501709, "logits/chosen": -0.10779833793640137, "logits/rejected": 0.03627026826143265, "logps/chosen": -2.838355541229248, "logps/rejected": -2.853437662124634, "loss": 2.2277, "nll_loss": 2.1515002250671387, "rewards/accuracies": 0.75, "rewards/chosen": -0.2838355600833893, "rewards/margins": 0.0015082154422998428, "rewards/rejected": -0.2853437662124634, "step": 572 }, { "epoch": 1.568788501026694, "grad_norm": 6.36287260055542, "learning_rate": 2.1643835616438354e-07, "log_odds_chosen": -0.6578937768936157, "log_odds_ratio": -1.361232042312622, "logits/chosen": 0.05470391735434532, "logits/rejected": -0.009883329272270203, "logps/chosen": -3.4437878131866455, "logps/rejected": -2.7723946571350098, "loss": 2.2573, "nll_loss": 2.1211915016174316, "rewards/accuracies": 0.375, "rewards/chosen": -0.3443787693977356, "rewards/margins": -0.06713930517435074, "rewards/rejected": -0.27723947167396545, "step": 573 }, { "epoch": 1.571526351813826, "grad_norm": 5.361265659332275, "learning_rate": 2.1506849315068493e-07, "log_odds_chosen": 0.15570539236068726, "log_odds_ratio": -0.7123672366142273, "logits/chosen": -0.06779703497886658, "logits/rejected": -0.0768340528011322, "logps/chosen": -2.44968843460083, "logps/rejected": -2.553757429122925, "loss": 2.1794, "nll_loss": 2.1081340312957764, "rewards/accuracies": 0.5, "rewards/chosen": -0.24496883153915405, "rewards/margins": 0.01040690578520298, "rewards/rejected": -0.2553757429122925, "step": 574 }, { "epoch": 1.5742642026009581, "grad_norm": 5.569300174713135, "learning_rate": 2.136986301369863e-07, "log_odds_chosen": 0.41982123255729675, "log_odds_ratio": -0.7556486129760742, "logits/chosen": -0.09448663890361786, "logits/rejected": -0.14651884138584137, "logps/chosen": -3.6606955528259277, "logps/rejected": -4.052150726318359, "loss": 2.2524, "nll_loss": 2.176837921142578, "rewards/accuracies": 0.5, "rewards/chosen": -0.3660695552825928, "rewards/margins": 0.03914551064372063, "rewards/rejected": -0.4052150845527649, "step": 575 }, { "epoch": 1.5770020533880902, "grad_norm": 5.966915607452393, "learning_rate": 2.1232876712328765e-07, "log_odds_chosen": -0.4647347331047058, "log_odds_ratio": -1.0405648946762085, "logits/chosen": -0.016438063234090805, "logits/rejected": 0.07092882692813873, "logps/chosen": -3.1212968826293945, "logps/rejected": -2.6526708602905273, "loss": 2.2955, "nll_loss": 2.191460132598877, "rewards/accuracies": 0.25, "rewards/chosen": -0.3121296763420105, "rewards/margins": -0.04686259478330612, "rewards/rejected": -0.26526710391044617, "step": 576 }, { "epoch": 1.5797399041752225, "grad_norm": 4.850164890289307, "learning_rate": 2.1095890410958902e-07, "log_odds_chosen": 0.24344338476657867, "log_odds_ratio": -0.6307194232940674, "logits/chosen": -0.10474321246147156, "logits/rejected": -0.08706086874008179, "logps/chosen": -2.47794246673584, "logps/rejected": -2.6856203079223633, "loss": 2.2503, "nll_loss": 2.1872189044952393, "rewards/accuracies": 0.625, "rewards/chosen": -0.2477942407131195, "rewards/margins": 0.020767800509929657, "rewards/rejected": -0.26856204867362976, "step": 577 }, { "epoch": 1.5824777549623545, "grad_norm": 5.840340614318848, "learning_rate": 2.095890410958904e-07, "log_odds_chosen": 0.6737513542175293, "log_odds_ratio": -0.44432783126831055, "logits/chosen": 0.01895061321556568, "logits/rejected": 0.08875679969787598, "logps/chosen": -2.165029287338257, "logps/rejected": -2.741405725479126, "loss": 2.1163, "nll_loss": 2.0719056129455566, "rewards/accuracies": 0.75, "rewards/chosen": -0.21650293469429016, "rewards/margins": 0.05763763189315796, "rewards/rejected": -0.2741405665874481, "step": 578 }, { "epoch": 1.5852156057494866, "grad_norm": 4.944622993469238, "learning_rate": 2.0821917808219177e-07, "log_odds_chosen": 0.1604032963514328, "log_odds_ratio": -0.6677991151809692, "logits/chosen": -0.03348403051495552, "logits/rejected": -0.0876348689198494, "logps/chosen": -2.663832902908325, "logps/rejected": -2.789684534072876, "loss": 2.2033, "nll_loss": 2.1365573406219482, "rewards/accuracies": 0.75, "rewards/chosen": -0.2663833200931549, "rewards/margins": 0.012585144490003586, "rewards/rejected": -0.27896848320961, "step": 579 }, { "epoch": 1.587953456536619, "grad_norm": 5.1557135581970215, "learning_rate": 2.0684931506849315e-07, "log_odds_chosen": -0.2214706838130951, "log_odds_ratio": -0.9269648194313049, "logits/chosen": -0.04638083279132843, "logits/rejected": -0.05210705101490021, "logps/chosen": -2.971705436706543, "logps/rejected": -2.7431726455688477, "loss": 2.1665, "nll_loss": 2.0737578868865967, "rewards/accuracies": 0.375, "rewards/chosen": -0.29717057943344116, "rewards/margins": -0.022853288799524307, "rewards/rejected": -0.27431729435920715, "step": 580 }, { "epoch": 1.590691307323751, "grad_norm": 5.564054012298584, "learning_rate": 2.054794520547945e-07, "log_odds_chosen": -0.14796192944049835, "log_odds_ratio": -1.0848042964935303, "logits/chosen": -0.0646420270204544, "logits/rejected": 0.0018488354980945587, "logps/chosen": -3.6014719009399414, "logps/rejected": -3.4542555809020996, "loss": 2.2088, "nll_loss": 2.100339889526367, "rewards/accuracies": 0.625, "rewards/chosen": -0.3601471781730652, "rewards/margins": -0.014721645042300224, "rewards/rejected": -0.345425546169281, "step": 581 }, { "epoch": 1.593429158110883, "grad_norm": 5.418309211730957, "learning_rate": 2.0410958904109588e-07, "log_odds_chosen": 0.3440519869327545, "log_odds_ratio": -0.6563155651092529, "logits/chosen": -0.11226478964090347, "logits/rejected": -0.20942528545856476, "logps/chosen": -2.8105690479278564, "logps/rejected": -3.1278393268585205, "loss": 2.183, "nll_loss": 2.1173255443573, "rewards/accuracies": 0.5, "rewards/chosen": -0.2810569107532501, "rewards/margins": 0.03172703832387924, "rewards/rejected": -0.31278395652770996, "step": 582 }, { "epoch": 1.596167008898015, "grad_norm": 6.244126319885254, "learning_rate": 2.0273972602739724e-07, "log_odds_chosen": -1.3351572751998901, "log_odds_ratio": -1.7510015964508057, "logits/chosen": -0.07525582611560822, "logits/rejected": -0.09010274708271027, "logps/chosen": -3.753444194793701, "logps/rejected": -2.45393443107605, "loss": 2.2694, "nll_loss": 2.094249963760376, "rewards/accuracies": 0.25, "rewards/chosen": -0.375344455242157, "rewards/margins": -0.12995097041130066, "rewards/rejected": -0.24539345502853394, "step": 583 }, { "epoch": 1.5989048596851472, "grad_norm": 6.10922908782959, "learning_rate": 2.0136986301369863e-07, "log_odds_chosen": -0.3520846664905548, "log_odds_ratio": -1.2870498895645142, "logits/chosen": -0.01559438556432724, "logits/rejected": 0.042674195021390915, "logps/chosen": -3.7990188598632812, "logps/rejected": -3.4303359985351562, "loss": 2.3038, "nll_loss": 2.1750454902648926, "rewards/accuracies": 0.25, "rewards/chosen": -0.3799018859863281, "rewards/margins": -0.03686828911304474, "rewards/rejected": -0.3430335819721222, "step": 584 }, { "epoch": 1.6016427104722792, "grad_norm": 5.434193134307861, "learning_rate": 2e-07, "log_odds_chosen": 0.06304112076759338, "log_odds_ratio": -0.8204584121704102, "logits/chosen": 0.13958801329135895, "logits/rejected": 0.1592503786087036, "logps/chosen": -2.618809938430786, "logps/rejected": -2.638763427734375, "loss": 2.1997, "nll_loss": 2.1176533699035645, "rewards/accuracies": 0.625, "rewards/chosen": -0.2618809938430786, "rewards/margins": 0.0019953586161136627, "rewards/rejected": -0.263876348733902, "step": 585 }, { "epoch": 1.6043805612594113, "grad_norm": 7.412431716918945, "learning_rate": 1.9863013698630135e-07, "log_odds_chosen": -1.2147984504699707, "log_odds_ratio": -1.7834229469299316, "logits/chosen": 0.08131338655948639, "logits/rejected": 0.14353159070014954, "logps/chosen": -5.085596084594727, "logps/rejected": -3.8742222785949707, "loss": 2.4281, "nll_loss": 2.249748945236206, "rewards/accuracies": 0.375, "rewards/chosen": -0.5085596442222595, "rewards/margins": -0.12113737314939499, "rewards/rejected": -0.38742226362228394, "step": 586 }, { "epoch": 1.6071184120465434, "grad_norm": 5.912126064300537, "learning_rate": 1.9726027397260271e-07, "log_odds_chosen": -0.6296719312667847, "log_odds_ratio": -1.167496681213379, "logits/chosen": -0.005005082115530968, "logits/rejected": 0.05924293026328087, "logps/chosen": -3.5835700035095215, "logps/rejected": -2.9485461711883545, "loss": 2.2865, "nll_loss": 2.169755458831787, "rewards/accuracies": 0.25, "rewards/chosen": -0.3583570122718811, "rewards/margins": -0.06350235641002655, "rewards/rejected": -0.29485464096069336, "step": 587 }, { "epoch": 1.6098562628336754, "grad_norm": 5.4915032386779785, "learning_rate": 1.958904109589041e-07, "log_odds_chosen": 0.24082162976264954, "log_odds_ratio": -0.7046340107917786, "logits/chosen": 0.08292367309331894, "logits/rejected": 0.13162079453468323, "logps/chosen": -2.8182568550109863, "logps/rejected": -3.0356922149658203, "loss": 2.1924, "nll_loss": 2.1218886375427246, "rewards/accuracies": 0.625, "rewards/chosen": -0.2818256914615631, "rewards/margins": 0.021743519231677055, "rewards/rejected": -0.3035691976547241, "step": 588 }, { "epoch": 1.6125941136208075, "grad_norm": 5.473448276519775, "learning_rate": 1.9452054794520547e-07, "log_odds_chosen": -0.3103231191635132, "log_odds_ratio": -1.0448353290557861, "logits/chosen": 0.1299791783094406, "logits/rejected": 0.08350804448127747, "logps/chosen": -3.084275722503662, "logps/rejected": -2.7360758781433105, "loss": 2.2937, "nll_loss": 2.1892144680023193, "rewards/accuracies": 0.5, "rewards/chosen": -0.3084276020526886, "rewards/margins": -0.03481999784708023, "rewards/rejected": -0.27360761165618896, "step": 589 }, { "epoch": 1.6153319644079398, "grad_norm": 4.871999263763428, "learning_rate": 1.9315068493150685e-07, "log_odds_chosen": -0.18999633193016052, "log_odds_ratio": -0.8266924619674683, "logits/chosen": -0.07786361128091812, "logits/rejected": -0.12627661228179932, "logps/chosen": -3.2404356002807617, "logps/rejected": -3.0559134483337402, "loss": 2.214, "nll_loss": 2.1313586235046387, "rewards/accuracies": 0.125, "rewards/chosen": -0.3240435719490051, "rewards/margins": -0.018452221527695656, "rewards/rejected": -0.305591344833374, "step": 590 }, { "epoch": 1.6180698151950719, "grad_norm": 5.336678981781006, "learning_rate": 1.917808219178082e-07, "log_odds_chosen": -0.10508052259683609, "log_odds_ratio": -0.897978663444519, "logits/chosen": -0.008766859769821167, "logits/rejected": 0.011123545467853546, "logps/chosen": -3.1517601013183594, "logps/rejected": -3.0171029567718506, "loss": 2.2147, "nll_loss": 2.1249451637268066, "rewards/accuracies": 0.5, "rewards/chosen": -0.31517601013183594, "rewards/margins": -0.013465732336044312, "rewards/rejected": -0.301710307598114, "step": 591 }, { "epoch": 1.620807665982204, "grad_norm": 5.862100601196289, "learning_rate": 1.9041095890410958e-07, "log_odds_chosen": 0.23669439554214478, "log_odds_ratio": -0.7921369075775146, "logits/chosen": -0.03343817591667175, "logits/rejected": -0.07690057158470154, "logps/chosen": -3.3720171451568604, "logps/rejected": -3.595071792602539, "loss": 2.2891, "nll_loss": 2.209873676300049, "rewards/accuracies": 0.5, "rewards/chosen": -0.33720171451568604, "rewards/margins": 0.022305479273200035, "rewards/rejected": -0.3595072031021118, "step": 592 }, { "epoch": 1.6235455167693362, "grad_norm": 6.5980706214904785, "learning_rate": 1.8904109589041094e-07, "log_odds_chosen": -0.22329150140285492, "log_odds_ratio": -0.8478051424026489, "logits/chosen": 0.039519790560007095, "logits/rejected": 0.1569204181432724, "logps/chosen": -3.6792540550231934, "logps/rejected": -3.47273588180542, "loss": 2.3005, "nll_loss": 2.2157230377197266, "rewards/accuracies": 0.25, "rewards/chosen": -0.36792540550231934, "rewards/margins": -0.020651815459132195, "rewards/rejected": -0.347273588180542, "step": 593 }, { "epoch": 1.6262833675564683, "grad_norm": 6.6485915184021, "learning_rate": 1.8767123287671233e-07, "log_odds_chosen": -0.9320101141929626, "log_odds_ratio": -1.5208487510681152, "logits/chosen": 0.08468257635831833, "logits/rejected": 0.18579307198524475, "logps/chosen": -3.920391321182251, "logps/rejected": -3.0301122665405273, "loss": 2.284, "nll_loss": 2.1319580078125, "rewards/accuracies": 0.375, "rewards/chosen": -0.39203912019729614, "rewards/margins": -0.08902789652347565, "rewards/rejected": -0.3030112385749817, "step": 594 }, { "epoch": 1.6290212183436004, "grad_norm": 5.371721267700195, "learning_rate": 1.863013698630137e-07, "log_odds_chosen": 0.271679162979126, "log_odds_ratio": -0.5888705253601074, "logits/chosen": 0.04132237285375595, "logits/rejected": 0.08964589238166809, "logps/chosen": -1.8514416217803955, "logps/rejected": -2.0735535621643066, "loss": 2.1455, "nll_loss": 2.0865964889526367, "rewards/accuracies": 0.75, "rewards/chosen": -0.18514417111873627, "rewards/margins": 0.02221120148897171, "rewards/rejected": -0.20735538005828857, "step": 595 }, { "epoch": 1.6317590691307324, "grad_norm": 5.793245315551758, "learning_rate": 1.8493150684931505e-07, "log_odds_chosen": -0.7379769682884216, "log_odds_ratio": -1.2550430297851562, "logits/chosen": -0.08992086350917816, "logits/rejected": -0.022838369011878967, "logps/chosen": -3.2800989151000977, "logps/rejected": -2.5735373497009277, "loss": 2.2468, "nll_loss": 2.121326446533203, "rewards/accuracies": 0.375, "rewards/chosen": -0.3280099034309387, "rewards/margins": -0.07065616548061371, "rewards/rejected": -0.2573537528514862, "step": 596 }, { "epoch": 1.6344969199178645, "grad_norm": 5.176736354827881, "learning_rate": 1.8356164383561641e-07, "log_odds_chosen": -0.6095224618911743, "log_odds_ratio": -1.1393523216247559, "logits/chosen": 0.060675449669361115, "logits/rejected": 0.08242342621088028, "logps/chosen": -2.689720392227173, "logps/rejected": -2.1263175010681152, "loss": 2.2538, "nll_loss": 2.139864206314087, "rewards/accuracies": 0.25, "rewards/chosen": -0.2689720690250397, "rewards/margins": -0.056340303272008896, "rewards/rejected": -0.21263174712657928, "step": 597 }, { "epoch": 1.6372347707049966, "grad_norm": 5.602677822113037, "learning_rate": 1.821917808219178e-07, "log_odds_chosen": 0.27726149559020996, "log_odds_ratio": -0.6138153672218323, "logits/chosen": 0.07059195637702942, "logits/rejected": 0.11643052101135254, "logps/chosen": -2.870638132095337, "logps/rejected": -3.113501787185669, "loss": 2.1843, "nll_loss": 2.1229639053344727, "rewards/accuracies": 0.625, "rewards/chosen": -0.2870638072490692, "rewards/margins": 0.02428637258708477, "rewards/rejected": -0.31135016679763794, "step": 598 }, { "epoch": 1.6399726214921286, "grad_norm": 5.67423152923584, "learning_rate": 1.8082191780821916e-07, "log_odds_chosen": -0.110092394053936, "log_odds_ratio": -1.0345964431762695, "logits/chosen": -0.05426688492298126, "logits/rejected": -0.06051209196448326, "logps/chosen": -3.10129976272583, "logps/rejected": -2.929213047027588, "loss": 2.1752, "nll_loss": 2.071751594543457, "rewards/accuracies": 0.625, "rewards/chosen": -0.31012994050979614, "rewards/margins": -0.017208654433488846, "rewards/rejected": -0.2929213047027588, "step": 599 }, { "epoch": 1.6427104722792607, "grad_norm": 6.045656681060791, "learning_rate": 1.7945205479452055e-07, "log_odds_chosen": 0.3923220634460449, "log_odds_ratio": -0.7237187027931213, "logits/chosen": 0.02380421757698059, "logits/rejected": 0.022931702435016632, "logps/chosen": -3.1060571670532227, "logps/rejected": -3.442901134490967, "loss": 2.1679, "nll_loss": 2.0954864025115967, "rewards/accuracies": 0.625, "rewards/chosen": -0.3106057047843933, "rewards/margins": 0.03368441388010979, "rewards/rejected": -0.3442901372909546, "step": 600 }, { "epoch": 1.6454483230663928, "grad_norm": 6.114492893218994, "learning_rate": 1.780821917808219e-07, "log_odds_chosen": -0.44790223240852356, "log_odds_ratio": -0.9909592866897583, "logits/chosen": -0.046985406428575516, "logits/rejected": 0.049061767756938934, "logps/chosen": -3.0724236965179443, "logps/rejected": -2.6573615074157715, "loss": 2.2236, "nll_loss": 2.1244821548461914, "rewards/accuracies": 0.25, "rewards/chosen": -0.30724239349365234, "rewards/margins": -0.04150623828172684, "rewards/rejected": -0.2657361328601837, "step": 601 }, { "epoch": 1.6481861738535248, "grad_norm": 5.613903522491455, "learning_rate": 1.7671232876712328e-07, "log_odds_chosen": -0.5900091528892517, "log_odds_ratio": -1.1112432479858398, "logits/chosen": -0.10307737439870834, "logits/rejected": -0.06156112253665924, "logps/chosen": -3.6886260509490967, "logps/rejected": -3.1087865829467773, "loss": 2.2985, "nll_loss": 2.187370777130127, "rewards/accuracies": 0.375, "rewards/chosen": -0.3688625991344452, "rewards/margins": -0.057983942329883575, "rewards/rejected": -0.3108786642551422, "step": 602 }, { "epoch": 1.6509240246406571, "grad_norm": 6.42303991317749, "learning_rate": 1.7534246575342464e-07, "log_odds_chosen": -0.29632771015167236, "log_odds_ratio": -1.0942301750183105, "logits/chosen": 0.16803258657455444, "logits/rejected": 0.14230607450008392, "logps/chosen": -3.5926883220672607, "logps/rejected": -3.2439465522766113, "loss": 2.2332, "nll_loss": 2.12382173538208, "rewards/accuracies": 0.375, "rewards/chosen": -0.35926884412765503, "rewards/margins": -0.03487417474389076, "rewards/rejected": -0.3243946433067322, "step": 603 }, { "epoch": 1.6536618754277892, "grad_norm": 5.628364086151123, "learning_rate": 1.7397260273972603e-07, "log_odds_chosen": 0.16478905081748962, "log_odds_ratio": -0.9392279386520386, "logits/chosen": -0.04450583830475807, "logits/rejected": 0.029538122937083244, "logps/chosen": -2.758500576019287, "logps/rejected": -2.8995280265808105, "loss": 2.2009, "nll_loss": 2.1069939136505127, "rewards/accuracies": 0.5, "rewards/chosen": -0.2758500874042511, "rewards/margins": 0.014102723449468613, "rewards/rejected": -0.28995281457901, "step": 604 }, { "epoch": 1.6563997262149213, "grad_norm": 6.3369832038879395, "learning_rate": 1.7260273972602742e-07, "log_odds_chosen": -0.3512117862701416, "log_odds_ratio": -1.0562785863876343, "logits/chosen": 0.05240087956190109, "logits/rejected": 0.1324741393327713, "logps/chosen": -3.1208455562591553, "logps/rejected": -2.8031864166259766, "loss": 2.2217, "nll_loss": 2.1160471439361572, "rewards/accuracies": 0.5, "rewards/chosen": -0.3120845556259155, "rewards/margins": -0.0317658856511116, "rewards/rejected": -0.2803186774253845, "step": 605 }, { "epoch": 1.6591375770020536, "grad_norm": 5.154017925262451, "learning_rate": 1.7123287671232875e-07, "log_odds_chosen": 0.39802199602127075, "log_odds_ratio": -0.6113036870956421, "logits/chosen": -0.05108148232102394, "logits/rejected": -0.05925992131233215, "logps/chosen": -1.9033989906311035, "logps/rejected": -2.2217817306518555, "loss": 2.1294, "nll_loss": 2.068286895751953, "rewards/accuracies": 0.75, "rewards/chosen": -0.19033992290496826, "rewards/margins": 0.031838271766901016, "rewards/rejected": -0.22217819094657898, "step": 606 }, { "epoch": 1.6618754277891856, "grad_norm": 5.151348114013672, "learning_rate": 1.698630136986301e-07, "log_odds_chosen": -0.13655731081962585, "log_odds_ratio": -0.9086580276489258, "logits/chosen": 0.015147991478443146, "logits/rejected": -0.016017291694879532, "logps/chosen": -2.921653985977173, "logps/rejected": -2.767646074295044, "loss": 2.1785, "nll_loss": 2.087609052658081, "rewards/accuracies": 0.625, "rewards/chosen": -0.2921653985977173, "rewards/margins": -0.015400800853967667, "rewards/rejected": -0.2767646014690399, "step": 607 }, { "epoch": 1.6646132785763177, "grad_norm": 5.600038051605225, "learning_rate": 1.684931506849315e-07, "log_odds_chosen": -0.41509318351745605, "log_odds_ratio": -1.0732579231262207, "logits/chosen": 0.09531357139348984, "logits/rejected": 0.1458883434534073, "logps/chosen": -3.3366177082061768, "logps/rejected": -2.9086227416992188, "loss": 2.3107, "nll_loss": 2.203362464904785, "rewards/accuracies": 0.5, "rewards/chosen": -0.3336617946624756, "rewards/margins": -0.0427994579076767, "rewards/rejected": -0.2908623218536377, "step": 608 }, { "epoch": 1.6673511293634498, "grad_norm": 5.383072376251221, "learning_rate": 1.671232876712329e-07, "log_odds_chosen": 0.4746454656124115, "log_odds_ratio": -0.5612986087799072, "logits/chosen": -0.09094057977199554, "logits/rejected": -0.06172440946102142, "logps/chosen": -2.696326494216919, "logps/rejected": -3.132331371307373, "loss": 2.1157, "nll_loss": 2.059566020965576, "rewards/accuracies": 0.75, "rewards/chosen": -0.2696326673030853, "rewards/margins": 0.04360046982765198, "rewards/rejected": -0.3132331371307373, "step": 609 }, { "epoch": 1.6700889801505818, "grad_norm": 6.268403053283691, "learning_rate": 1.6575342465753425e-07, "log_odds_chosen": 0.4217242896556854, "log_odds_ratio": -0.6284540891647339, "logits/chosen": 0.0723961815237999, "logits/rejected": 0.15643417835235596, "logps/chosen": -2.5488011837005615, "logps/rejected": -2.923159599304199, "loss": 2.2288, "nll_loss": 2.1659884452819824, "rewards/accuracies": 0.5, "rewards/chosen": -0.2548801302909851, "rewards/margins": 0.03743583709001541, "rewards/rejected": -0.2923159599304199, "step": 610 }, { "epoch": 1.672826830937714, "grad_norm": 4.792841911315918, "learning_rate": 1.643835616438356e-07, "log_odds_chosen": -0.13477520644664764, "log_odds_ratio": -0.8485946655273438, "logits/chosen": -0.06073469668626785, "logits/rejected": -0.13356457650661469, "logps/chosen": -2.7115767002105713, "logps/rejected": -2.570941925048828, "loss": 2.1889, "nll_loss": 2.103999137878418, "rewards/accuracies": 0.625, "rewards/chosen": -0.2711576521396637, "rewards/margins": -0.014063475653529167, "rewards/rejected": -0.25709420442581177, "step": 611 }, { "epoch": 1.675564681724846, "grad_norm": 5.915051460266113, "learning_rate": 1.6301369863013698e-07, "log_odds_chosen": -0.7506124973297119, "log_odds_ratio": -1.225327968597412, "logits/chosen": 0.10729771852493286, "logits/rejected": 0.17102046310901642, "logps/chosen": -3.134643793106079, "logps/rejected": -2.43296217918396, "loss": 2.2479, "nll_loss": 2.1253275871276855, "rewards/accuracies": 0.25, "rewards/chosen": -0.31346437335014343, "rewards/margins": -0.070168137550354, "rewards/rejected": -0.24329620599746704, "step": 612 }, { "epoch": 1.678302532511978, "grad_norm": 5.757928371429443, "learning_rate": 1.6164383561643836e-07, "log_odds_chosen": -0.4467211663722992, "log_odds_ratio": -1.1818376779556274, "logits/chosen": 0.012995347380638123, "logits/rejected": 0.05067349225282669, "logps/chosen": -3.508018970489502, "logps/rejected": -3.058867931365967, "loss": 2.2662, "nll_loss": 2.148015022277832, "rewards/accuracies": 0.375, "rewards/chosen": -0.350801944732666, "rewards/margins": -0.04491513967514038, "rewards/rejected": -0.30588680505752563, "step": 613 }, { "epoch": 1.68104038329911, "grad_norm": 5.351698398590088, "learning_rate": 1.6027397260273973e-07, "log_odds_chosen": -0.0723968893289566, "log_odds_ratio": -0.8510997295379639, "logits/chosen": -0.09907101094722748, "logits/rejected": -0.1545490324497223, "logps/chosen": -2.6135478019714355, "logps/rejected": -2.5317747592926025, "loss": 2.1834, "nll_loss": 2.0983142852783203, "rewards/accuracies": 0.625, "rewards/chosen": -0.2613547742366791, "rewards/margins": -0.008177284151315689, "rewards/rejected": -0.2531774640083313, "step": 614 }, { "epoch": 1.6837782340862422, "grad_norm": 5.204997539520264, "learning_rate": 1.5890410958904111e-07, "log_odds_chosen": -0.33673661947250366, "log_odds_ratio": -0.9876378774642944, "logits/chosen": -0.0918237715959549, "logits/rejected": -0.07853960245847702, "logps/chosen": -3.0509514808654785, "logps/rejected": -2.724540948867798, "loss": 2.2728, "nll_loss": 2.174031972885132, "rewards/accuracies": 0.375, "rewards/chosen": -0.3050951361656189, "rewards/margins": -0.03264106810092926, "rewards/rejected": -0.27245408296585083, "step": 615 }, { "epoch": 1.6865160848733745, "grad_norm": 4.501231670379639, "learning_rate": 1.5753424657534245e-07, "log_odds_chosen": 0.6061562895774841, "log_odds_ratio": -0.5940386056900024, "logits/chosen": 0.024214353412389755, "logits/rejected": -0.13896258175373077, "logps/chosen": -2.2028281688690186, "logps/rejected": -2.7625179290771484, "loss": 2.112, "nll_loss": 2.052549362182617, "rewards/accuracies": 0.625, "rewards/chosen": -0.22028279304504395, "rewards/margins": 0.05596897751092911, "rewards/rejected": -0.27625179290771484, "step": 616 }, { "epoch": 1.6892539356605065, "grad_norm": 5.820075035095215, "learning_rate": 1.5616438356164384e-07, "log_odds_chosen": -0.10748109221458435, "log_odds_ratio": -0.914962887763977, "logits/chosen": 0.10212825983762741, "logits/rejected": 0.16039690375328064, "logps/chosen": -3.126953125, "logps/rejected": -2.984433174133301, "loss": 2.2436, "nll_loss": 2.152122974395752, "rewards/accuracies": 0.625, "rewards/chosen": -0.31269535422325134, "rewards/margins": -0.014252005144953728, "rewards/rejected": -0.2984433174133301, "step": 617 }, { "epoch": 1.6919917864476386, "grad_norm": 4.911747932434082, "learning_rate": 1.547945205479452e-07, "log_odds_chosen": 0.2767152190208435, "log_odds_ratio": -0.6742963194847107, "logits/chosen": 0.02920890599489212, "logits/rejected": -0.028174564242362976, "logps/chosen": -2.4699854850769043, "logps/rejected": -2.701923370361328, "loss": 2.1665, "nll_loss": 2.099071502685547, "rewards/accuracies": 0.625, "rewards/chosen": -0.24699853360652924, "rewards/margins": 0.023193802684545517, "rewards/rejected": -0.27019232511520386, "step": 618 }, { "epoch": 1.6947296372347707, "grad_norm": 5.62509822845459, "learning_rate": 1.534246575342466e-07, "log_odds_chosen": -0.12502694129943848, "log_odds_ratio": -0.8574931621551514, "logits/chosen": -0.17800289392471313, "logits/rejected": -0.1251220405101776, "logps/chosen": -2.948169469833374, "logps/rejected": -2.8195672035217285, "loss": 2.2012, "nll_loss": 2.115468978881836, "rewards/accuracies": 0.5, "rewards/chosen": -0.2948169708251953, "rewards/margins": -0.012860221788287163, "rewards/rejected": -0.2819567322731018, "step": 619 }, { "epoch": 1.697467488021903, "grad_norm": 5.971076011657715, "learning_rate": 1.5205479452054795e-07, "log_odds_chosen": -0.3415546119213104, "log_odds_ratio": -0.9676564335823059, "logits/chosen": -0.00876913033425808, "logits/rejected": 0.0893903598189354, "logps/chosen": -3.4341540336608887, "logps/rejected": -3.0840821266174316, "loss": 2.3318, "nll_loss": 2.235020399093628, "rewards/accuracies": 0.5, "rewards/chosen": -0.34341540932655334, "rewards/margins": -0.03500721603631973, "rewards/rejected": -0.3084082007408142, "step": 620 }, { "epoch": 1.700205338809035, "grad_norm": 5.499115467071533, "learning_rate": 1.506849315068493e-07, "log_odds_chosen": 0.3314300775527954, "log_odds_ratio": -0.8032938241958618, "logits/chosen": -0.06237182766199112, "logits/rejected": -0.06452415883541107, "logps/chosen": -2.65413761138916, "logps/rejected": -2.8931281566619873, "loss": 2.1857, "nll_loss": 2.105377435684204, "rewards/accuracies": 0.625, "rewards/chosen": -0.265413761138916, "rewards/margins": 0.023899056017398834, "rewards/rejected": -0.28931283950805664, "step": 621 }, { "epoch": 1.702943189596167, "grad_norm": 5.604353904724121, "learning_rate": 1.4931506849315067e-07, "log_odds_chosen": -0.4561244547367096, "log_odds_ratio": -1.2067793607711792, "logits/chosen": -0.09358617663383484, "logits/rejected": -0.06413546204566956, "logps/chosen": -3.5371503829956055, "logps/rejected": -3.089348316192627, "loss": 2.3346, "nll_loss": 2.2138912677764893, "rewards/accuracies": 0.5, "rewards/chosen": -0.35371503233909607, "rewards/margins": -0.044780176132917404, "rewards/rejected": -0.3089348077774048, "step": 622 }, { "epoch": 1.7056810403832992, "grad_norm": 5.124629020690918, "learning_rate": 1.4794520547945206e-07, "log_odds_chosen": 0.04351331293582916, "log_odds_ratio": -0.7257742881774902, "logits/chosen": -0.06798259168863297, "logits/rejected": -0.08415520191192627, "logps/chosen": -3.1207196712493896, "logps/rejected": -3.145127058029175, "loss": 2.1874, "nll_loss": 2.114806652069092, "rewards/accuracies": 0.625, "rewards/chosen": -0.3120719790458679, "rewards/margins": 0.002440711483359337, "rewards/rejected": -0.314512699842453, "step": 623 }, { "epoch": 1.7084188911704312, "grad_norm": 6.694784641265869, "learning_rate": 1.4657534246575342e-07, "log_odds_chosen": -0.4750290513038635, "log_odds_ratio": -1.251633644104004, "logits/chosen": -0.03295647352933884, "logits/rejected": 0.11998960375785828, "logps/chosen": -3.6958260536193848, "logps/rejected": -3.236616611480713, "loss": 2.2468, "nll_loss": 2.1216607093811035, "rewards/accuracies": 0.375, "rewards/chosen": -0.3695825934410095, "rewards/margins": -0.04592094197869301, "rewards/rejected": -0.3236616551876068, "step": 624 }, { "epoch": 1.7111567419575633, "grad_norm": 5.392393112182617, "learning_rate": 1.4520547945205479e-07, "log_odds_chosen": -0.9171841740608215, "log_odds_ratio": -1.4660944938659668, "logits/chosen": -0.15328548848628998, "logits/rejected": -0.04107201471924782, "logps/chosen": -3.2413506507873535, "logps/rejected": -2.364842176437378, "loss": 2.2158, "nll_loss": 2.0691640377044678, "rewards/accuracies": 0.375, "rewards/chosen": -0.32413509488105774, "rewards/margins": -0.0876508578658104, "rewards/rejected": -0.23648422956466675, "step": 625 }, { "epoch": 1.7138945927446954, "grad_norm": 4.397271633148193, "learning_rate": 1.4383561643835615e-07, "log_odds_chosen": 0.4294542074203491, "log_odds_ratio": -0.5430968999862671, "logits/chosen": -0.07785681635141373, "logits/rejected": -0.2633960247039795, "logps/chosen": -1.9658989906311035, "logps/rejected": -2.321516990661621, "loss": 2.0869, "nll_loss": 2.0326321125030518, "rewards/accuracies": 0.75, "rewards/chosen": -0.1965899020433426, "rewards/margins": 0.03556180000305176, "rewards/rejected": -0.23215170204639435, "step": 626 }, { "epoch": 1.7166324435318274, "grad_norm": 5.988038539886475, "learning_rate": 1.4246575342465754e-07, "log_odds_chosen": -1.081045389175415, "log_odds_ratio": -1.4546371698379517, "logits/chosen": -0.1070970892906189, "logits/rejected": -0.10772138833999634, "logps/chosen": -3.2780580520629883, "logps/rejected": -2.280566692352295, "loss": 2.324, "nll_loss": 2.178549289703369, "rewards/accuracies": 0.0, "rewards/chosen": -0.32780584692955017, "rewards/margins": -0.09974914789199829, "rewards/rejected": -0.22805668413639069, "step": 627 }, { "epoch": 1.7193702943189595, "grad_norm": 5.550411224365234, "learning_rate": 1.410958904109589e-07, "log_odds_chosen": -0.7475758194923401, "log_odds_ratio": -1.296542763710022, "logits/chosen": 0.05114394426345825, "logits/rejected": 0.08629314601421356, "logps/chosen": -3.643186092376709, "logps/rejected": -2.897096633911133, "loss": 2.2573, "nll_loss": 2.127690315246582, "rewards/accuracies": 0.25, "rewards/chosen": -0.3643186390399933, "rewards/margins": -0.07460896670818329, "rewards/rejected": -0.2897096574306488, "step": 628 }, { "epoch": 1.7221081451060916, "grad_norm": 5.413856506347656, "learning_rate": 1.397260273972603e-07, "log_odds_chosen": -0.528221607208252, "log_odds_ratio": -1.0379891395568848, "logits/chosen": 0.11009465903043747, "logits/rejected": 0.07196956127882004, "logps/chosen": -3.0928077697753906, "logps/rejected": -2.585535764694214, "loss": 2.2774, "nll_loss": 2.17360258102417, "rewards/accuracies": 0.0, "rewards/chosen": -0.30928075313568115, "rewards/margins": -0.05072721093893051, "rewards/rejected": -0.25855356454849243, "step": 629 }, { "epoch": 1.7248459958932238, "grad_norm": 4.997546195983887, "learning_rate": 1.3835616438356162e-07, "log_odds_chosen": 0.39124879240989685, "log_odds_ratio": -0.6862537860870361, "logits/chosen": 0.08866272866725922, "logits/rejected": 0.005247525870800018, "logps/chosen": -2.868762969970703, "logps/rejected": -3.2461159229278564, "loss": 2.1394, "nll_loss": 2.070775270462036, "rewards/accuracies": 0.5, "rewards/chosen": -0.28687629103660583, "rewards/margins": 0.03773530572652817, "rewards/rejected": -0.3246116042137146, "step": 630 }, { "epoch": 1.727583846680356, "grad_norm": 5.923979759216309, "learning_rate": 1.36986301369863e-07, "log_odds_chosen": -0.15659378468990326, "log_odds_ratio": -1.2341790199279785, "logits/chosen": 0.08129055798053741, "logits/rejected": 0.10457579791545868, "logps/chosen": -3.7742409706115723, "logps/rejected": -3.6067264080047607, "loss": 2.2688, "nll_loss": 2.145345449447632, "rewards/accuracies": 0.375, "rewards/chosen": -0.37742412090301514, "rewards/margins": -0.016751471906900406, "rewards/rejected": -0.36067262291908264, "step": 631 }, { "epoch": 1.730321697467488, "grad_norm": 5.800932884216309, "learning_rate": 1.3561643835616437e-07, "log_odds_chosen": -0.21250340342521667, "log_odds_ratio": -0.884538471698761, "logits/chosen": -0.056454647332429886, "logits/rejected": -0.01021384447813034, "logps/chosen": -2.719832420349121, "logps/rejected": -2.5039749145507812, "loss": 2.16, "nll_loss": 2.0715112686157227, "rewards/accuracies": 0.625, "rewards/chosen": -0.27198323607444763, "rewards/margins": -0.021585745736956596, "rewards/rejected": -0.2503975033760071, "step": 632 }, { "epoch": 1.7330595482546203, "grad_norm": 4.816252708435059, "learning_rate": 1.3424657534246576e-07, "log_odds_chosen": -0.052528828382492065, "log_odds_ratio": -0.8029778003692627, "logits/chosen": -0.02859911322593689, "logits/rejected": 0.0343131497502327, "logps/chosen": -2.3405203819274902, "logps/rejected": -2.2809410095214844, "loss": 2.0794, "nll_loss": 1.999150037765503, "rewards/accuracies": 0.625, "rewards/chosen": -0.23405206203460693, "rewards/margins": -0.0059579480439424515, "rewards/rejected": -0.22809410095214844, "step": 633 }, { "epoch": 1.7357973990417523, "grad_norm": 5.871859550476074, "learning_rate": 1.3287671232876712e-07, "log_odds_chosen": -0.6449095010757446, "log_odds_ratio": -1.1487823724746704, "logits/chosen": -0.03469749167561531, "logits/rejected": 0.03984757512807846, "logps/chosen": -3.2972099781036377, "logps/rejected": -2.65516996383667, "loss": 2.2153, "nll_loss": 2.1004490852355957, "rewards/accuracies": 0.25, "rewards/chosen": -0.32972097396850586, "rewards/margins": -0.06420400738716125, "rewards/rejected": -0.265516996383667, "step": 634 }, { "epoch": 1.7385352498288844, "grad_norm": 6.512299060821533, "learning_rate": 1.3150684931506849e-07, "log_odds_chosen": -0.669561505317688, "log_odds_ratio": -1.3645588159561157, "logits/chosen": 0.0035881521180272102, "logits/rejected": 0.12280955910682678, "logps/chosen": -3.703447103500366, "logps/rejected": -3.053006649017334, "loss": 2.345, "nll_loss": 2.2085132598876953, "rewards/accuracies": 0.375, "rewards/chosen": -0.37034472823143005, "rewards/margins": -0.06504404544830322, "rewards/rejected": -0.30530068278312683, "step": 635 }, { "epoch": 1.7412731006160165, "grad_norm": 5.224961757659912, "learning_rate": 1.3013698630136985e-07, "log_odds_chosen": 0.13113299012184143, "log_odds_ratio": -0.6816041469573975, "logits/chosen": 0.030462322756648064, "logits/rejected": 0.033540837466716766, "logps/chosen": -2.6176652908325195, "logps/rejected": -2.7302427291870117, "loss": 2.15, "nll_loss": 2.081882953643799, "rewards/accuracies": 0.375, "rewards/chosen": -0.26176655292510986, "rewards/margins": 0.01125774160027504, "rewards/rejected": -0.2730242609977722, "step": 636 }, { "epoch": 1.7440109514031485, "grad_norm": 5.946280479431152, "learning_rate": 1.2876712328767124e-07, "log_odds_chosen": 0.9341793060302734, "log_odds_ratio": -0.6393193006515503, "logits/chosen": -0.06850956380367279, "logits/rejected": -0.07563327252864838, "logps/chosen": -2.4146361351013184, "logps/rejected": -3.2831201553344727, "loss": 2.0842, "nll_loss": 2.020259141921997, "rewards/accuracies": 0.75, "rewards/chosen": -0.24146361649036407, "rewards/margins": 0.0868484228849411, "rewards/rejected": -0.3283120393753052, "step": 637 }, { "epoch": 1.7467488021902806, "grad_norm": 5.244234561920166, "learning_rate": 1.273972602739726e-07, "log_odds_chosen": -0.7620536684989929, "log_odds_ratio": -1.2651519775390625, "logits/chosen": -0.17592446506023407, "logits/rejected": -0.10754555463790894, "logps/chosen": -3.2485744953155518, "logps/rejected": -2.50815486907959, "loss": 2.3024, "nll_loss": 2.175874710083008, "rewards/accuracies": 0.375, "rewards/chosen": -0.3248574435710907, "rewards/margins": -0.0740419551730156, "rewards/rejected": -0.2508154809474945, "step": 638 }, { "epoch": 1.7494866529774127, "grad_norm": 6.578925132751465, "learning_rate": 1.2602739726027399e-07, "log_odds_chosen": 0.16814813017845154, "log_odds_ratio": -0.6525462865829468, "logits/chosen": 0.08384346961975098, "logits/rejected": 0.14361146092414856, "logps/chosen": -3.4213905334472656, "logps/rejected": -3.5785350799560547, "loss": 2.256, "nll_loss": 2.1907601356506348, "rewards/accuracies": 0.5, "rewards/chosen": -0.3421390950679779, "rewards/margins": 0.015714455395936966, "rewards/rejected": -0.3578535318374634, "step": 639 }, { "epoch": 1.7522245037645447, "grad_norm": 5.305597305297852, "learning_rate": 1.2465753424657535e-07, "log_odds_chosen": 0.5716466307640076, "log_odds_ratio": -0.9676936864852905, "logits/chosen": -0.11541812866926193, "logits/rejected": -0.10879715532064438, "logps/chosen": -2.650192975997925, "logps/rejected": -3.215665578842163, "loss": 2.2092, "nll_loss": 2.1124744415283203, "rewards/accuracies": 0.5, "rewards/chosen": -0.2650192975997925, "rewards/margins": 0.05654727667570114, "rewards/rejected": -0.3215665817260742, "step": 640 }, { "epoch": 1.7549623545516768, "grad_norm": 6.6611409187316895, "learning_rate": 1.232876712328767e-07, "log_odds_chosen": -1.5165081024169922, "log_odds_ratio": -1.929992437362671, "logits/chosen": -0.11454139649868011, "logits/rejected": -0.09629173576831818, "logps/chosen": -4.408331871032715, "logps/rejected": -2.891672372817993, "loss": 2.304, "nll_loss": 2.1109519004821777, "rewards/accuracies": 0.25, "rewards/chosen": -0.4408332109451294, "rewards/margins": -0.15166598558425903, "rewards/rejected": -0.28916722536087036, "step": 641 }, { "epoch": 1.7577002053388089, "grad_norm": 5.581625461578369, "learning_rate": 1.2191780821917807e-07, "log_odds_chosen": -0.4616727828979492, "log_odds_ratio": -1.3851224184036255, "logits/chosen": 0.09838590025901794, "logits/rejected": 0.033308617770671844, "logps/chosen": -3.5978779792785645, "logps/rejected": -3.142831325531006, "loss": 2.238, "nll_loss": 2.0994722843170166, "rewards/accuracies": 0.75, "rewards/chosen": -0.35978779196739197, "rewards/margins": -0.04550466686487198, "rewards/rejected": -0.3142831325531006, "step": 642 }, { "epoch": 1.7604380561259412, "grad_norm": 5.915510654449463, "learning_rate": 1.2054794520547946e-07, "log_odds_chosen": -0.5852013826370239, "log_odds_ratio": -1.179537296295166, "logits/chosen": 0.06352004408836365, "logits/rejected": 0.0566302128136158, "logps/chosen": -3.4277000427246094, "logps/rejected": -2.8340342044830322, "loss": 2.2105, "nll_loss": 2.0925536155700684, "rewards/accuracies": 0.5, "rewards/chosen": -0.3427700102329254, "rewards/margins": -0.05936659127473831, "rewards/rejected": -0.2834034264087677, "step": 643 }, { "epoch": 1.7631759069130732, "grad_norm": 4.688094615936279, "learning_rate": 1.1917808219178081e-07, "log_odds_chosen": 0.0747484415769577, "log_odds_ratio": -0.7730660438537598, "logits/chosen": -0.012827523052692413, "logits/rejected": -0.10591582953929901, "logps/chosen": -2.4351987838745117, "logps/rejected": -2.4880168437957764, "loss": 2.202, "nll_loss": 2.1247222423553467, "rewards/accuracies": 0.5, "rewards/chosen": -0.2435198724269867, "rewards/margins": 0.005281805992126465, "rewards/rejected": -0.24880167841911316, "step": 644 }, { "epoch": 1.7659137577002053, "grad_norm": 5.029994487762451, "learning_rate": 1.1780821917808218e-07, "log_odds_chosen": -0.22123341262340546, "log_odds_ratio": -0.8760698437690735, "logits/chosen": -0.05173248425126076, "logits/rejected": -0.11591203510761261, "logps/chosen": -2.364246129989624, "logps/rejected": -2.1550416946411133, "loss": 2.2164, "nll_loss": 2.1288411617279053, "rewards/accuracies": 0.375, "rewards/chosen": -0.23642461001873016, "rewards/margins": -0.020920436829328537, "rewards/rejected": -0.21550416946411133, "step": 645 }, { "epoch": 1.7686516084873376, "grad_norm": 5.751580715179443, "learning_rate": 1.1643835616438355e-07, "log_odds_chosen": -0.6892553567886353, "log_odds_ratio": -1.282301902770996, "logits/chosen": -0.10106770694255829, "logits/rejected": -0.07885447889566422, "logps/chosen": -3.393799066543579, "logps/rejected": -2.6975417137145996, "loss": 2.2354, "nll_loss": 2.107128381729126, "rewards/accuracies": 0.5, "rewards/chosen": -0.3393799364566803, "rewards/margins": -0.06962572038173676, "rewards/rejected": -0.26975417137145996, "step": 646 }, { "epoch": 1.7713894592744697, "grad_norm": 5.435629367828369, "learning_rate": 1.1506849315068492e-07, "log_odds_chosen": -0.0725766271352768, "log_odds_ratio": -0.8266408443450928, "logits/chosen": 0.009534243494272232, "logits/rejected": -0.094919353723526, "logps/chosen": -2.957961320877075, "logps/rejected": -2.86552095413208, "loss": 2.1945, "nll_loss": 2.111804723739624, "rewards/accuracies": 0.625, "rewards/chosen": -0.29579612612724304, "rewards/margins": -0.009244050830602646, "rewards/rejected": -0.2865521013736725, "step": 647 }, { "epoch": 1.7741273100616017, "grad_norm": 6.633363246917725, "learning_rate": 1.136986301369863e-07, "log_odds_chosen": -0.08845964819192886, "log_odds_ratio": -0.7805674076080322, "logits/chosen": 0.10378271341323853, "logits/rejected": 0.20694079995155334, "logps/chosen": -3.496090888977051, "logps/rejected": -3.397714614868164, "loss": 2.1916, "nll_loss": 2.113530158996582, "rewards/accuracies": 0.375, "rewards/chosen": -0.3496090769767761, "rewards/margins": -0.009837580844759941, "rewards/rejected": -0.3397715091705322, "step": 648 }, { "epoch": 1.7768651608487338, "grad_norm": 4.949241638183594, "learning_rate": 1.1232876712328766e-07, "log_odds_chosen": 0.14591199159622192, "log_odds_ratio": -0.6533138751983643, "logits/chosen": 0.1009298712015152, "logits/rejected": 0.05394883453845978, "logps/chosen": -2.239899158477783, "logps/rejected": -2.362060785293579, "loss": 2.0477, "nll_loss": 1.982352614402771, "rewards/accuracies": 0.625, "rewards/chosen": -0.22398993372917175, "rewards/margins": 0.012216154485940933, "rewards/rejected": -0.2362060844898224, "step": 649 }, { "epoch": 1.7796030116358659, "grad_norm": 5.547770023345947, "learning_rate": 1.1095890410958903e-07, "log_odds_chosen": 0.2777791917324066, "log_odds_ratio": -0.9585287570953369, "logits/chosen": -0.09119582176208496, "logits/rejected": -0.059897519648075104, "logps/chosen": -3.424020767211914, "logps/rejected": -3.6544876098632812, "loss": 2.2283, "nll_loss": 2.1324596405029297, "rewards/accuracies": 0.625, "rewards/chosen": -0.34240204095840454, "rewards/margins": 0.023046720772981644, "rewards/rejected": -0.36544880270957947, "step": 650 }, { "epoch": 1.782340862422998, "grad_norm": 5.487460136413574, "learning_rate": 1.095890410958904e-07, "log_odds_chosen": -0.9365379810333252, "log_odds_ratio": -1.3402577638626099, "logits/chosen": 0.03235619515180588, "logits/rejected": 0.05245175212621689, "logps/chosen": -3.3886022567749023, "logps/rejected": -2.479586362838745, "loss": 2.2618, "nll_loss": 2.1277432441711426, "rewards/accuracies": 0.125, "rewards/chosen": -0.33886027336120605, "rewards/margins": -0.09090159088373184, "rewards/rejected": -0.24795866012573242, "step": 651 }, { "epoch": 1.78507871321013, "grad_norm": 5.111229419708252, "learning_rate": 1.0821917808219177e-07, "log_odds_chosen": 0.11867234855890274, "log_odds_ratio": -0.7996269464492798, "logits/chosen": -0.05391639843583107, "logits/rejected": -0.11602143943309784, "logps/chosen": -2.7065038681030273, "logps/rejected": -2.758796215057373, "loss": 2.1381, "nll_loss": 2.0581037998199463, "rewards/accuracies": 0.5, "rewards/chosen": -0.27065038681030273, "rewards/margins": 0.0052292002364993095, "rewards/rejected": -0.2758796215057373, "step": 652 }, { "epoch": 1.787816563997262, "grad_norm": 5.172855377197266, "learning_rate": 1.0684931506849315e-07, "log_odds_chosen": 0.22489246726036072, "log_odds_ratio": -0.8316199779510498, "logits/chosen": -0.11939143389463425, "logits/rejected": -0.1228819191455841, "logps/chosen": -2.6410889625549316, "logps/rejected": -2.8687782287597656, "loss": 2.1895, "nll_loss": 2.1063132286071777, "rewards/accuracies": 0.25, "rewards/chosen": -0.26410889625549316, "rewards/margins": 0.022768940776586533, "rewards/rejected": -0.28687784075737, "step": 653 }, { "epoch": 1.7905544147843941, "grad_norm": 5.724658966064453, "learning_rate": 1.0547945205479451e-07, "log_odds_chosen": -0.8502175211906433, "log_odds_ratio": -1.4164347648620605, "logits/chosen": 0.02193262055516243, "logits/rejected": 0.08322881162166595, "logps/chosen": -3.263791084289551, "logps/rejected": -2.442112684249878, "loss": 2.1596, "nll_loss": 2.017925500869751, "rewards/accuracies": 0.25, "rewards/chosen": -0.32637909054756165, "rewards/margins": -0.08216781914234161, "rewards/rejected": -0.24421128630638123, "step": 654 }, { "epoch": 1.7932922655715262, "grad_norm": 5.7315497398376465, "learning_rate": 1.0410958904109588e-07, "log_odds_chosen": -0.34446951746940613, "log_odds_ratio": -0.965832531452179, "logits/chosen": -0.09688141942024231, "logits/rejected": -0.050048019737005234, "logps/chosen": -2.8302111625671387, "logps/rejected": -2.5017244815826416, "loss": 2.2389, "nll_loss": 2.1422677040100098, "rewards/accuracies": 0.25, "rewards/chosen": -0.28302109241485596, "rewards/margins": -0.032848671078681946, "rewards/rejected": -0.2501724660396576, "step": 655 }, { "epoch": 1.7960301163586585, "grad_norm": 5.003365516662598, "learning_rate": 1.0273972602739725e-07, "log_odds_chosen": -0.16757312417030334, "log_odds_ratio": -0.8642076849937439, "logits/chosen": -0.10303628444671631, "logits/rejected": -0.1662980318069458, "logps/chosen": -2.9722230434417725, "logps/rejected": -2.809079647064209, "loss": 2.2561, "nll_loss": 2.169684410095215, "rewards/accuracies": 0.5, "rewards/chosen": -0.2972223162651062, "rewards/margins": -0.016314327716827393, "rewards/rejected": -0.2809079885482788, "step": 656 }, { "epoch": 1.7987679671457906, "grad_norm": 6.047646522521973, "learning_rate": 1.0136986301369862e-07, "log_odds_chosen": -0.04172429442405701, "log_odds_ratio": -0.8227792978286743, "logits/chosen": -0.11214721202850342, "logits/rejected": -0.013525009155273438, "logps/chosen": -3.222264289855957, "logps/rejected": -3.1777660846710205, "loss": 2.2127, "nll_loss": 2.130380630493164, "rewards/accuracies": 0.375, "rewards/chosen": -0.32222646474838257, "rewards/margins": -0.00444982573390007, "rewards/rejected": -0.317776620388031, "step": 657 }, { "epoch": 1.8015058179329226, "grad_norm": 5.694316387176514, "learning_rate": 1e-07, "log_odds_chosen": 0.19831596314907074, "log_odds_ratio": -0.6959033012390137, "logits/chosen": -0.0818832591176033, "logits/rejected": -0.09828928112983704, "logps/chosen": -2.8937995433807373, "logps/rejected": -3.0634007453918457, "loss": 2.1723, "nll_loss": 2.102672815322876, "rewards/accuracies": 0.75, "rewards/chosen": -0.2893799841403961, "rewards/margins": 0.016960127279162407, "rewards/rejected": -0.3063400983810425, "step": 658 }, { "epoch": 1.8042436687200547, "grad_norm": 6.539883136749268, "learning_rate": 9.863013698630136e-08, "log_odds_chosen": 0.37509918212890625, "log_odds_ratio": -0.6228616237640381, "logits/chosen": 0.024791253730654716, "logits/rejected": 0.055308207869529724, "logps/chosen": -4.057027816772461, "logps/rejected": -4.390959739685059, "loss": 2.2188, "nll_loss": 2.1565017700195312, "rewards/accuracies": 0.875, "rewards/chosen": -0.40570276975631714, "rewards/margins": 0.033393196761608124, "rewards/rejected": -0.43909597396850586, "step": 659 }, { "epoch": 1.806981519507187, "grad_norm": 6.531866550445557, "learning_rate": 9.726027397260273e-08, "log_odds_chosen": -0.8705602884292603, "log_odds_ratio": -1.4347202777862549, "logits/chosen": -0.012346839532256126, "logits/rejected": 0.07799779623746872, "logps/chosen": -3.8541226387023926, "logps/rejected": -2.989487409591675, "loss": 2.3706, "nll_loss": 2.227086305618286, "rewards/accuracies": 0.375, "rewards/chosen": -0.3854122757911682, "rewards/margins": -0.08646352589130402, "rewards/rejected": -0.298948734998703, "step": 660 }, { "epoch": 1.809719370294319, "grad_norm": 5.281366348266602, "learning_rate": 9.58904109589041e-08, "log_odds_chosen": -0.09810605645179749, "log_odds_ratio": -0.8916271328926086, "logits/chosen": -0.0052715446799993515, "logits/rejected": -0.02431400865316391, "logps/chosen": -3.092799425125122, "logps/rejected": -2.971160888671875, "loss": 2.2109, "nll_loss": 2.1217496395111084, "rewards/accuracies": 0.5, "rewards/chosen": -0.3092799484729767, "rewards/margins": -0.012163838371634483, "rewards/rejected": -0.29711610078811646, "step": 661 }, { "epoch": 1.8124572210814511, "grad_norm": 5.401898384094238, "learning_rate": 9.452054794520547e-08, "log_odds_chosen": -0.08124879002571106, "log_odds_ratio": -0.8549500703811646, "logits/chosen": -0.0836852639913559, "logits/rejected": -0.0809623971581459, "logps/chosen": -3.478626012802124, "logps/rejected": -3.3421216011047363, "loss": 2.2039, "nll_loss": 2.1184144020080566, "rewards/accuracies": 0.5, "rewards/chosen": -0.3478626012802124, "rewards/margins": -0.01365043967962265, "rewards/rejected": -0.33421218395233154, "step": 662 }, { "epoch": 1.8151950718685832, "grad_norm": 5.140756130218506, "learning_rate": 9.315068493150684e-08, "log_odds_chosen": 0.21360638737678528, "log_odds_ratio": -0.7548438906669617, "logits/chosen": -0.07762667536735535, "logits/rejected": -0.1253785341978073, "logps/chosen": -3.1792428493499756, "logps/rejected": -3.370907783508301, "loss": 2.229, "nll_loss": 2.153550624847412, "rewards/accuracies": 0.625, "rewards/chosen": -0.3179243206977844, "rewards/margins": 0.019166475161910057, "rewards/rejected": -0.33709079027175903, "step": 663 }, { "epoch": 1.8179329226557153, "grad_norm": 5.568292617797852, "learning_rate": 9.178082191780821e-08, "log_odds_chosen": 0.4904816448688507, "log_odds_ratio": -0.7310487031936646, "logits/chosen": 0.005956470966339111, "logits/rejected": -0.04623556509613991, "logps/chosen": -2.886014938354492, "logps/rejected": -3.274839162826538, "loss": 2.1486, "nll_loss": 2.0755200386047363, "rewards/accuracies": 0.625, "rewards/chosen": -0.28860148787498474, "rewards/margins": 0.03888241946697235, "rewards/rejected": -0.3274839222431183, "step": 664 }, { "epoch": 1.8206707734428473, "grad_norm": 5.372821807861328, "learning_rate": 9.041095890410958e-08, "log_odds_chosen": 0.10218146443367004, "log_odds_ratio": -0.7538142800331116, "logits/chosen": 0.10930978506803513, "logits/rejected": 0.11763077974319458, "logps/chosen": -3.08683705329895, "logps/rejected": -3.1473939418792725, "loss": 2.2662, "nll_loss": 2.190838575363159, "rewards/accuracies": 0.5, "rewards/chosen": -0.30868369340896606, "rewards/margins": 0.006055699661374092, "rewards/rejected": -0.3147394061088562, "step": 665 }, { "epoch": 1.8234086242299794, "grad_norm": 4.865793704986572, "learning_rate": 8.904109589041094e-08, "log_odds_chosen": 0.4572182893753052, "log_odds_ratio": -0.5725977420806885, "logits/chosen": 0.014657936990261078, "logits/rejected": -0.014104536734521389, "logps/chosen": -2.228241443634033, "logps/rejected": -2.6059670448303223, "loss": 2.0903, "nll_loss": 2.033020257949829, "rewards/accuracies": 0.625, "rewards/chosen": -0.22282415628433228, "rewards/margins": 0.03777254372835159, "rewards/rejected": -0.26059669256210327, "step": 666 }, { "epoch": 1.8261464750171115, "grad_norm": 6.30460262298584, "learning_rate": 8.767123287671232e-08, "log_odds_chosen": -1.624101161956787, "log_odds_ratio": -1.9444382190704346, "logits/chosen": -0.11151491850614548, "logits/rejected": -0.010358568280935287, "logps/chosen": -4.268815994262695, "logps/rejected": -2.6862785816192627, "loss": 2.3567, "nll_loss": 2.1622519493103027, "rewards/accuracies": 0.25, "rewards/chosen": -0.4268816113471985, "rewards/margins": -0.1582537144422531, "rewards/rejected": -0.2686278820037842, "step": 667 }, { "epoch": 1.8288843258042435, "grad_norm": 5.000298500061035, "learning_rate": 8.630136986301371e-08, "log_odds_chosen": 0.11267107725143433, "log_odds_ratio": -0.7034351229667664, "logits/chosen": -0.09731295704841614, "logits/rejected": -0.08781012892723083, "logps/chosen": -2.1997945308685303, "logps/rejected": -2.292513370513916, "loss": 2.1357, "nll_loss": 2.065335273742676, "rewards/accuracies": 0.625, "rewards/chosen": -0.2199794501066208, "rewards/margins": 0.00927189365029335, "rewards/rejected": -0.22925135493278503, "step": 668 }, { "epoch": 1.8316221765913756, "grad_norm": 5.2182488441467285, "learning_rate": 8.493150684931506e-08, "log_odds_chosen": 1.1547291278839111, "log_odds_ratio": -0.6158677339553833, "logits/chosen": 0.015953799709677696, "logits/rejected": -0.06467582285404205, "logps/chosen": -2.640953302383423, "logps/rejected": -3.735689878463745, "loss": 2.1748, "nll_loss": 2.1132330894470215, "rewards/accuracies": 0.75, "rewards/chosen": -0.2640953063964844, "rewards/margins": 0.10947367548942566, "rewards/rejected": -0.3735690116882324, "step": 669 }, { "epoch": 1.834360027378508, "grad_norm": 5.252058982849121, "learning_rate": 8.356164383561644e-08, "log_odds_chosen": -0.8498099446296692, "log_odds_ratio": -1.4490323066711426, "logits/chosen": -0.21022957563400269, "logits/rejected": -0.25710904598236084, "logps/chosen": -3.3515491485595703, "logps/rejected": -2.4895739555358887, "loss": 2.1893, "nll_loss": 2.044381856918335, "rewards/accuracies": 0.5, "rewards/chosen": -0.3351548910140991, "rewards/margins": -0.08619752526283264, "rewards/rejected": -0.24895739555358887, "step": 670 }, { "epoch": 1.83709787816564, "grad_norm": 6.571476459503174, "learning_rate": 8.21917808219178e-08, "log_odds_chosen": -0.6297712326049805, "log_odds_ratio": -1.3253413438796997, "logits/chosen": 0.11897148191928864, "logits/rejected": 0.15310931205749512, "logps/chosen": -3.9475631713867188, "logps/rejected": -3.3056201934814453, "loss": 2.3149, "nll_loss": 2.182368755340576, "rewards/accuracies": 0.5, "rewards/chosen": -0.3947563171386719, "rewards/margins": -0.06419431418180466, "rewards/rejected": -0.3305619955062866, "step": 671 }, { "epoch": 1.839835728952772, "grad_norm": 5.153213024139404, "learning_rate": 8.082191780821918e-08, "log_odds_chosen": 0.4760780930519104, "log_odds_ratio": -0.716446042060852, "logits/chosen": 0.15398938953876495, "logits/rejected": 0.11219853907823563, "logps/chosen": -2.846386194229126, "logps/rejected": -3.2726194858551025, "loss": 2.1811, "nll_loss": 2.1094417572021484, "rewards/accuracies": 0.625, "rewards/chosen": -0.2846386432647705, "rewards/margins": 0.04262330383062363, "rewards/rejected": -0.32726192474365234, "step": 672 }, { "epoch": 1.8425735797399043, "grad_norm": 5.885811805725098, "learning_rate": 7.945205479452056e-08, "log_odds_chosen": -0.4916536211967468, "log_odds_ratio": -1.0470746755599976, "logits/chosen": 0.07689669728279114, "logits/rejected": 0.12903833389282227, "logps/chosen": -3.414283037185669, "logps/rejected": -2.95687198638916, "loss": 2.2549, "nll_loss": 2.150174379348755, "rewards/accuracies": 0.375, "rewards/chosen": -0.34142833948135376, "rewards/margins": -0.045741111040115356, "rewards/rejected": -0.295687198638916, "step": 673 }, { "epoch": 1.8453114305270364, "grad_norm": 5.607716083526611, "learning_rate": 7.808219178082192e-08, "log_odds_chosen": -0.3319481611251831, "log_odds_ratio": -0.9213985204696655, "logits/chosen": -0.10835382342338562, "logits/rejected": -0.0074523985385894775, "logps/chosen": -2.710165023803711, "logps/rejected": -2.412706136703491, "loss": 2.2183, "nll_loss": 2.1261372566223145, "rewards/accuracies": 0.375, "rewards/chosen": -0.27101650834083557, "rewards/margins": -0.029745882377028465, "rewards/rejected": -0.24127061665058136, "step": 674 }, { "epoch": 1.8480492813141685, "grad_norm": 5.527291297912598, "learning_rate": 7.67123287671233e-08, "log_odds_chosen": -0.25301527976989746, "log_odds_ratio": -0.9323247671127319, "logits/chosen": -0.1524653434753418, "logits/rejected": -0.09773121029138565, "logps/chosen": -3.1585795879364014, "logps/rejected": -2.903204917907715, "loss": 2.1778, "nll_loss": 2.084606647491455, "rewards/accuracies": 0.25, "rewards/chosen": -0.3158579468727112, "rewards/margins": -0.025537455454468727, "rewards/rejected": -0.290320485830307, "step": 675 }, { "epoch": 1.8507871321013005, "grad_norm": 5.801095008850098, "learning_rate": 7.534246575342466e-08, "log_odds_chosen": -0.36658719182014465, "log_odds_ratio": -1.0126066207885742, "logits/chosen": -0.17110300064086914, "logits/rejected": -0.1416359394788742, "logps/chosen": -3.7598037719726562, "logps/rejected": -3.3965585231781006, "loss": 2.2091, "nll_loss": 2.1078028678894043, "rewards/accuracies": 0.375, "rewards/chosen": -0.375980406999588, "rewards/margins": -0.03632452338933945, "rewards/rejected": -0.33965587615966797, "step": 676 }, { "epoch": 1.8535249828884326, "grad_norm": 5.593935489654541, "learning_rate": 7.397260273972603e-08, "log_odds_chosen": -0.16299962997436523, "log_odds_ratio": -0.912259042263031, "logits/chosen": 0.24529102444648743, "logits/rejected": 0.2527753412723541, "logps/chosen": -2.824009895324707, "logps/rejected": -2.6205391883850098, "loss": 2.1664, "nll_loss": 2.075174331665039, "rewards/accuracies": 0.625, "rewards/chosen": -0.28240102529525757, "rewards/margins": -0.02034709043800831, "rewards/rejected": -0.262053906917572, "step": 677 }, { "epoch": 1.8562628336755647, "grad_norm": 5.485555171966553, "learning_rate": 7.260273972602739e-08, "log_odds_chosen": 1.2068254947662354, "log_odds_ratio": -0.5935657620429993, "logits/chosen": 0.0665770173072815, "logits/rejected": -0.015150699764490128, "logps/chosen": -3.036245346069336, "logps/rejected": -4.197596549987793, "loss": 2.1303, "nll_loss": 2.0709118843078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.3036245107650757, "rewards/margins": 0.1161351278424263, "rewards/rejected": -0.4197596311569214, "step": 678 }, { "epoch": 1.8590006844626967, "grad_norm": 6.122744560241699, "learning_rate": 7.123287671232877e-08, "log_odds_chosen": -0.107325479388237, "log_odds_ratio": -0.8766381144523621, "logits/chosen": 0.06558924913406372, "logits/rejected": 0.003744460642337799, "logps/chosen": -3.131735324859619, "logps/rejected": -2.9892754554748535, "loss": 2.1258, "nll_loss": 2.038137912750244, "rewards/accuracies": 0.5, "rewards/chosen": -0.3131735324859619, "rewards/margins": -0.014245979487895966, "rewards/rejected": -0.29892757534980774, "step": 679 }, { "epoch": 1.8617385352498288, "grad_norm": 5.408882141113281, "learning_rate": 6.986301369863014e-08, "log_odds_chosen": 0.6288394927978516, "log_odds_ratio": -0.5990310907363892, "logits/chosen": 0.034090183675289154, "logits/rejected": 0.006796067580580711, "logps/chosen": -2.4221482276916504, "logps/rejected": -3.0140984058380127, "loss": 2.0866, "nll_loss": 2.0267460346221924, "rewards/accuracies": 0.75, "rewards/chosen": -0.24221481382846832, "rewards/margins": 0.05919501557946205, "rewards/rejected": -0.30140984058380127, "step": 680 }, { "epoch": 1.8644763860369609, "grad_norm": 6.740182876586914, "learning_rate": 6.84931506849315e-08, "log_odds_chosen": -0.9721670746803284, "log_odds_ratio": -1.4699199199676514, "logits/chosen": 0.031832266598939896, "logits/rejected": 0.12432973086833954, "logps/chosen": -3.8754680156707764, "logps/rejected": -2.9418816566467285, "loss": 2.2432, "nll_loss": 2.0962347984313965, "rewards/accuracies": 0.125, "rewards/chosen": -0.3875468075275421, "rewards/margins": -0.09335865080356598, "rewards/rejected": -0.29418817162513733, "step": 681 }, { "epoch": 1.867214236824093, "grad_norm": 6.704507827758789, "learning_rate": 6.712328767123288e-08, "log_odds_chosen": -0.9513136148452759, "log_odds_ratio": -1.5241360664367676, "logits/chosen": -0.006621785461902618, "logits/rejected": -0.006725984625518322, "logps/chosen": -3.9324393272399902, "logps/rejected": -2.9659605026245117, "loss": 2.2967, "nll_loss": 2.1442577838897705, "rewards/accuracies": 0.375, "rewards/chosen": -0.3932439088821411, "rewards/margins": -0.09664785861968994, "rewards/rejected": -0.29659605026245117, "step": 682 }, { "epoch": 1.8699520876112252, "grad_norm": 5.406399250030518, "learning_rate": 6.575342465753424e-08, "log_odds_chosen": -0.23352307081222534, "log_odds_ratio": -0.8755190372467041, "logits/chosen": -0.08020807802677155, "logits/rejected": -0.08033449202775955, "logps/chosen": -2.7245681285858154, "logps/rejected": -2.4954094886779785, "loss": 2.2068, "nll_loss": 2.119260787963867, "rewards/accuracies": 0.25, "rewards/chosen": -0.2724568247795105, "rewards/margins": -0.022915856912732124, "rewards/rejected": -0.24954095482826233, "step": 683 }, { "epoch": 1.8726899383983573, "grad_norm": 6.781891822814941, "learning_rate": 6.438356164383562e-08, "log_odds_chosen": -0.7915804386138916, "log_odds_ratio": -1.3910249471664429, "logits/chosen": -0.047136303037405014, "logits/rejected": 0.0987454354763031, "logps/chosen": -3.647418737411499, "logps/rejected": -2.8744993209838867, "loss": 2.3285, "nll_loss": 2.1893961429595947, "rewards/accuracies": 0.25, "rewards/chosen": -0.36474186182022095, "rewards/margins": -0.07729195803403854, "rewards/rejected": -0.2874499261379242, "step": 684 }, { "epoch": 1.8754277891854894, "grad_norm": 6.205138683319092, "learning_rate": 6.301369863013699e-08, "log_odds_chosen": -1.3700191974639893, "log_odds_ratio": -1.8005588054656982, "logits/chosen": 0.18989166617393494, "logits/rejected": 0.27025312185287476, "logps/chosen": -4.432271957397461, "logps/rejected": -3.0744946002960205, "loss": 2.3933, "nll_loss": 2.213197708129883, "rewards/accuracies": 0.25, "rewards/chosen": -0.44322723150253296, "rewards/margins": -0.1357777714729309, "rewards/rejected": -0.30744946002960205, "step": 685 }, { "epoch": 1.8781656399726216, "grad_norm": 5.128014087677002, "learning_rate": 6.164383561643836e-08, "log_odds_chosen": -0.21783313155174255, "log_odds_ratio": -0.8667274713516235, "logits/chosen": -0.1537007987499237, "logits/rejected": -0.17121955752372742, "logps/chosen": -2.667583465576172, "logps/rejected": -2.472649574279785, "loss": 2.1732, "nll_loss": 2.0865416526794434, "rewards/accuracies": 0.375, "rewards/chosen": -0.26675838232040405, "rewards/margins": -0.019493401050567627, "rewards/rejected": -0.24726495146751404, "step": 686 }, { "epoch": 1.8809034907597537, "grad_norm": 5.389984607696533, "learning_rate": 6.027397260273973e-08, "log_odds_chosen": -0.28224945068359375, "log_odds_ratio": -0.9248659610748291, "logits/chosen": -0.055084019899368286, "logits/rejected": -0.015483716502785683, "logps/chosen": -2.727403163909912, "logps/rejected": -2.453848361968994, "loss": 2.175, "nll_loss": 2.0824732780456543, "rewards/accuracies": 0.5, "rewards/chosen": -0.27274030447006226, "rewards/margins": -0.027355510741472244, "rewards/rejected": -0.2453848123550415, "step": 687 }, { "epoch": 1.8836413415468858, "grad_norm": 5.524750709533691, "learning_rate": 5.890410958904109e-08, "log_odds_chosen": 1.1772286891937256, "log_odds_ratio": -0.7827163338661194, "logits/chosen": 0.15407338738441467, "logits/rejected": 0.03385048359632492, "logps/chosen": -2.982699155807495, "logps/rejected": -4.1591620445251465, "loss": 2.1579, "nll_loss": 2.0796163082122803, "rewards/accuracies": 0.375, "rewards/chosen": -0.29826992750167847, "rewards/margins": 0.11764626950025558, "rewards/rejected": -0.41591623425483704, "step": 688 }, { "epoch": 1.8863791923340179, "grad_norm": 5.885275363922119, "learning_rate": 5.753424657534246e-08, "log_odds_chosen": -0.7999730706214905, "log_odds_ratio": -1.3734540939331055, "logits/chosen": -0.12690657377243042, "logits/rejected": -0.10199868679046631, "logps/chosen": -4.273979187011719, "logps/rejected": -3.4856505393981934, "loss": 2.2267, "nll_loss": 2.08937406539917, "rewards/accuracies": 0.25, "rewards/chosen": -0.4273979365825653, "rewards/margins": -0.07883287221193314, "rewards/rejected": -0.3485650420188904, "step": 689 }, { "epoch": 1.88911704312115, "grad_norm": 6.778548717498779, "learning_rate": 5.616438356164383e-08, "log_odds_chosen": -0.28231316804885864, "log_odds_ratio": -1.0810861587524414, "logits/chosen": 0.037826914340257645, "logits/rejected": 0.14222553372383118, "logps/chosen": -3.85330867767334, "logps/rejected": -3.55783748626709, "loss": 2.2536, "nll_loss": 2.1455039978027344, "rewards/accuracies": 0.5, "rewards/chosen": -0.38533085584640503, "rewards/margins": -0.02954714186489582, "rewards/rejected": -0.35578373074531555, "step": 690 }, { "epoch": 1.891854893908282, "grad_norm": 5.0487895011901855, "learning_rate": 5.47945205479452e-08, "log_odds_chosen": 0.33871740102767944, "log_odds_ratio": -0.9886988401412964, "logits/chosen": 0.05549152195453644, "logits/rejected": 0.008181564509868622, "logps/chosen": -2.5570051670074463, "logps/rejected": -2.847452163696289, "loss": 2.1121, "nll_loss": 2.013200044631958, "rewards/accuracies": 0.5, "rewards/chosen": -0.2557004988193512, "rewards/margins": 0.029044702649116516, "rewards/rejected": -0.2847452163696289, "step": 691 }, { "epoch": 1.894592744695414, "grad_norm": 6.2457733154296875, "learning_rate": 5.342465753424657e-08, "log_odds_chosen": -0.8656215667724609, "log_odds_ratio": -1.3388056755065918, "logits/chosen": -0.06229789927601814, "logits/rejected": -0.03802639991044998, "logps/chosen": -3.9222917556762695, "logps/rejected": -3.0693185329437256, "loss": 2.3147, "nll_loss": 2.1808676719665527, "rewards/accuracies": 0.25, "rewards/chosen": -0.3922291696071625, "rewards/margins": -0.08529731631278992, "rewards/rejected": -0.30693182349205017, "step": 692 }, { "epoch": 1.8973305954825461, "grad_norm": 5.798455238342285, "learning_rate": 5.205479452054794e-08, "log_odds_chosen": -0.43497663736343384, "log_odds_ratio": -1.0278010368347168, "logits/chosen": -0.07425659894943237, "logits/rejected": -0.1293366253376007, "logps/chosen": -3.5596752166748047, "logps/rejected": -3.112581729888916, "loss": 2.1875, "nll_loss": 2.08475399017334, "rewards/accuracies": 0.625, "rewards/chosen": -0.35596752166748047, "rewards/margins": -0.04470936954021454, "rewards/rejected": -0.3112581670284271, "step": 693 }, { "epoch": 1.9000684462696782, "grad_norm": 4.571294784545898, "learning_rate": 5.068493150684931e-08, "log_odds_chosen": 0.7204031944274902, "log_odds_ratio": -0.6856116056442261, "logits/chosen": -0.021283403038978577, "logits/rejected": -0.18612465262413025, "logps/chosen": -2.578399896621704, "logps/rejected": -3.2635951042175293, "loss": 2.1106, "nll_loss": 2.0420594215393066, "rewards/accuracies": 0.625, "rewards/chosen": -0.25784000754356384, "rewards/margins": 0.06851951777935028, "rewards/rejected": -0.32635951042175293, "step": 694 }, { "epoch": 1.9028062970568103, "grad_norm": 5.895979404449463, "learning_rate": 4.931506849315068e-08, "log_odds_chosen": -0.956085205078125, "log_odds_ratio": -1.3584669828414917, "logits/chosen": -0.26369887590408325, "logits/rejected": -0.1097855344414711, "logps/chosen": -3.56322979927063, "logps/rejected": -2.653989553451538, "loss": 2.2458, "nll_loss": 2.1099822521209717, "rewards/accuracies": 0.125, "rewards/chosen": -0.3563230037689209, "rewards/margins": -0.09092405438423157, "rewards/rejected": -0.26539894938468933, "step": 695 }, { "epoch": 1.9055441478439425, "grad_norm": 5.575479984283447, "learning_rate": 4.794520547945205e-08, "log_odds_chosen": -0.04294109344482422, "log_odds_ratio": -0.8777132034301758, "logits/chosen": -0.13757014274597168, "logits/rejected": -0.09569491446018219, "logps/chosen": -3.1010360717773438, "logps/rejected": -3.0639591217041016, "loss": 2.2612, "nll_loss": 2.173447847366333, "rewards/accuracies": 0.75, "rewards/chosen": -0.3101036250591278, "rewards/margins": -0.0037077125161886215, "rewards/rejected": -0.30639591813087463, "step": 696 }, { "epoch": 1.9082819986310746, "grad_norm": 5.836228370666504, "learning_rate": 4.657534246575342e-08, "log_odds_chosen": -0.42301568388938904, "log_odds_ratio": -0.975828230381012, "logits/chosen": -0.11947991698980331, "logits/rejected": -0.14131468534469604, "logps/chosen": -3.5089659690856934, "logps/rejected": -3.092228651046753, "loss": 2.1322, "nll_loss": 2.0346550941467285, "rewards/accuracies": 0.375, "rewards/chosen": -0.35089659690856934, "rewards/margins": -0.04167373478412628, "rewards/rejected": -0.30922287702560425, "step": 697 }, { "epoch": 1.9110198494182067, "grad_norm": 5.103291034698486, "learning_rate": 4.520547945205479e-08, "log_odds_chosen": 0.07591260969638824, "log_odds_ratio": -0.7958560585975647, "logits/chosen": 0.021693270653486252, "logits/rejected": -0.045982442796230316, "logps/chosen": -2.857455253601074, "logps/rejected": -2.9096360206604004, "loss": 2.1807, "nll_loss": 2.1010947227478027, "rewards/accuracies": 0.75, "rewards/chosen": -0.2857455313205719, "rewards/margins": 0.005218083038926125, "rewards/rejected": -0.29096361994743347, "step": 698 }, { "epoch": 1.913757700205339, "grad_norm": 5.68637228012085, "learning_rate": 4.383561643835616e-08, "log_odds_chosen": 0.44264841079711914, "log_odds_ratio": -0.8217244744300842, "logits/chosen": -0.07463565468788147, "logits/rejected": -0.10222889482975006, "logps/chosen": -3.2775468826293945, "logps/rejected": -3.7115731239318848, "loss": 2.2707, "nll_loss": 2.1884965896606445, "rewards/accuracies": 0.375, "rewards/chosen": -0.3277546763420105, "rewards/margins": 0.04340265318751335, "rewards/rejected": -0.37115734815597534, "step": 699 }, { "epoch": 1.916495550992471, "grad_norm": 6.230350971221924, "learning_rate": 4.246575342465753e-08, "log_odds_chosen": -0.5752593874931335, "log_odds_ratio": -1.106380581855774, "logits/chosen": -0.07745146751403809, "logits/rejected": 0.10424186289310455, "logps/chosen": -3.3442869186401367, "logps/rejected": -2.815304756164551, "loss": 2.2689, "nll_loss": 2.158270835876465, "rewards/accuracies": 0.375, "rewards/chosen": -0.33442869782447815, "rewards/margins": -0.052898213267326355, "rewards/rejected": -0.281530499458313, "step": 700 }, { "epoch": 1.919233401779603, "grad_norm": 5.264270782470703, "learning_rate": 4.10958904109589e-08, "log_odds_chosen": -0.08587852120399475, "log_odds_ratio": -0.7943084836006165, "logits/chosen": -0.00016872305423021317, "logits/rejected": -0.0308876633644104, "logps/chosen": -3.009120464324951, "logps/rejected": -2.9308133125305176, "loss": 2.2465, "nll_loss": 2.167092800140381, "rewards/accuracies": 0.5, "rewards/chosen": -0.3009120523929596, "rewards/margins": -0.007830701768398285, "rewards/rejected": -0.2930813431739807, "step": 701 }, { "epoch": 1.9219712525667352, "grad_norm": 5.218153476715088, "learning_rate": 3.972602739726028e-08, "log_odds_chosen": -0.6611402034759521, "log_odds_ratio": -1.2760014533996582, "logits/chosen": -0.20867598056793213, "logits/rejected": -0.18617083132266998, "logps/chosen": -2.873685359954834, "logps/rejected": -2.209658145904541, "loss": 2.1783, "nll_loss": 2.050736904144287, "rewards/accuracies": 0.625, "rewards/chosen": -0.2873685359954834, "rewards/margins": -0.06640271842479706, "rewards/rejected": -0.22096580266952515, "step": 702 }, { "epoch": 1.9247091033538672, "grad_norm": 5.437517166137695, "learning_rate": 3.835616438356165e-08, "log_odds_chosen": 0.0737241804599762, "log_odds_ratio": -0.7368035316467285, "logits/chosen": 0.06414929032325745, "logits/rejected": 0.029665473848581314, "logps/chosen": -2.9116568565368652, "logps/rejected": -2.9709036350250244, "loss": 2.1766, "nll_loss": 2.102879524230957, "rewards/accuracies": 0.5, "rewards/chosen": -0.29116567969322205, "rewards/margins": 0.005924679338932037, "rewards/rejected": -0.2970903813838959, "step": 703 }, { "epoch": 1.9274469541409993, "grad_norm": 5.62463903427124, "learning_rate": 3.6986301369863016e-08, "log_odds_chosen": -0.2602573037147522, "log_odds_ratio": -1.1548500061035156, "logits/chosen": -0.11644138395786285, "logits/rejected": -0.1654638946056366, "logps/chosen": -3.9084465503692627, "logps/rejected": -3.616156578063965, "loss": 2.2952, "nll_loss": 2.1797564029693604, "rewards/accuracies": 0.375, "rewards/chosen": -0.3908447027206421, "rewards/margins": -0.02922903001308441, "rewards/rejected": -0.3616156578063965, "step": 704 }, { "epoch": 1.9301848049281314, "grad_norm": 5.7441582679748535, "learning_rate": 3.5616438356164384e-08, "log_odds_chosen": -0.4350554645061493, "log_odds_ratio": -1.051883578300476, "logits/chosen": 0.004580514505505562, "logits/rejected": -0.0020808372646570206, "logps/chosen": -2.850236654281616, "logps/rejected": -2.3972885608673096, "loss": 2.1702, "nll_loss": 2.065051555633545, "rewards/accuracies": 0.375, "rewards/chosen": -0.28502365946769714, "rewards/margins": -0.045294806361198425, "rewards/rejected": -0.2397288680076599, "step": 705 }, { "epoch": 1.9329226557152634, "grad_norm": 4.859947204589844, "learning_rate": 3.424657534246575e-08, "log_odds_chosen": 0.20562514662742615, "log_odds_ratio": -0.6894518136978149, "logits/chosen": 0.0024423152208328247, "logits/rejected": -0.1278529316186905, "logps/chosen": -2.638270616531372, "logps/rejected": -2.801126480102539, "loss": 2.1782, "nll_loss": 2.1092538833618164, "rewards/accuracies": 0.625, "rewards/chosen": -0.26382705569267273, "rewards/margins": 0.016285600140690804, "rewards/rejected": -0.2801126539707184, "step": 706 }, { "epoch": 1.9356605065023955, "grad_norm": 5.3231587409973145, "learning_rate": 3.287671232876712e-08, "log_odds_chosen": 0.1865595430135727, "log_odds_ratio": -0.6831786036491394, "logits/chosen": 0.0854879766702652, "logits/rejected": 0.05306233465671539, "logps/chosen": -2.5390710830688477, "logps/rejected": -2.6886730194091797, "loss": 2.0901, "nll_loss": 2.0218255519866943, "rewards/accuracies": 0.75, "rewards/chosen": -0.25390711426734924, "rewards/margins": 0.014960195869207382, "rewards/rejected": -0.2688673138618469, "step": 707 }, { "epoch": 1.9383983572895276, "grad_norm": 5.6481404304504395, "learning_rate": 3.1506849315068497e-08, "log_odds_chosen": -0.36791127920150757, "log_odds_ratio": -0.993281900882721, "logits/chosen": -0.14833466708660126, "logits/rejected": -0.09935380518436432, "logps/chosen": -2.922384023666382, "logps/rejected": -2.538154125213623, "loss": 2.2002, "nll_loss": 2.1009135246276855, "rewards/accuracies": 0.375, "rewards/chosen": -0.29223841428756714, "rewards/margins": -0.03842298686504364, "rewards/rejected": -0.2538154423236847, "step": 708 }, { "epoch": 1.9411362080766599, "grad_norm": 5.813663482666016, "learning_rate": 3.0136986301369865e-08, "log_odds_chosen": 0.3537801206111908, "log_odds_ratio": -0.7281308174133301, "logits/chosen": 0.07875514030456543, "logits/rejected": 0.12528318166732788, "logps/chosen": -3.143986701965332, "logps/rejected": -3.4922144412994385, "loss": 2.1995, "nll_loss": 2.126659870147705, "rewards/accuracies": 0.375, "rewards/chosen": -0.31439870595932007, "rewards/margins": 0.03482275456190109, "rewards/rejected": -0.34922146797180176, "step": 709 }, { "epoch": 1.943874058863792, "grad_norm": 4.798208236694336, "learning_rate": 2.876712328767123e-08, "log_odds_chosen": -0.009471986442804337, "log_odds_ratio": -0.7867789268493652, "logits/chosen": 0.08604486286640167, "logits/rejected": 0.07339426875114441, "logps/chosen": -2.5844340324401855, "logps/rejected": -2.601017475128174, "loss": 2.1771, "nll_loss": 2.0983800888061523, "rewards/accuracies": 0.625, "rewards/chosen": -0.2584434151649475, "rewards/margins": 0.0016583409160375595, "rewards/rejected": -0.2601017355918884, "step": 710 }, { "epoch": 1.946611909650924, "grad_norm": 4.821718692779541, "learning_rate": 2.73972602739726e-08, "log_odds_chosen": -0.07268283516168594, "log_odds_ratio": -0.7786998152732849, "logits/chosen": 0.21587947010993958, "logits/rejected": 0.19980937242507935, "logps/chosen": -2.3406646251678467, "logps/rejected": -2.2778398990631104, "loss": 2.156, "nll_loss": 2.0781760215759277, "rewards/accuracies": 0.625, "rewards/chosen": -0.2340664565563202, "rewards/margins": -0.0062824636697769165, "rewards/rejected": -0.22778400778770447, "step": 711 }, { "epoch": 1.949349760438056, "grad_norm": 6.107242107391357, "learning_rate": 2.602739726027397e-08, "log_odds_chosen": -0.2893872857093811, "log_odds_ratio": -0.9456300139427185, "logits/chosen": 0.23668831586837769, "logits/rejected": 0.27632343769073486, "logps/chosen": -3.092430591583252, "logps/rejected": -2.8202548027038574, "loss": 2.1563, "nll_loss": 2.0617740154266357, "rewards/accuracies": 0.375, "rewards/chosen": -0.3092430531978607, "rewards/margins": -0.027217533439397812, "rewards/rejected": -0.2820255160331726, "step": 712 }, { "epoch": 1.9520876112251884, "grad_norm": 6.049031734466553, "learning_rate": 2.465753424657534e-08, "log_odds_chosen": -0.2130117118358612, "log_odds_ratio": -1.0140470266342163, "logits/chosen": -0.056858453899621964, "logits/rejected": 0.0344066247344017, "logps/chosen": -3.143157720565796, "logps/rejected": -2.8609702587127686, "loss": 2.1712, "nll_loss": 2.069767475128174, "rewards/accuracies": 0.5, "rewards/chosen": -0.3143157660961151, "rewards/margins": -0.028218712657690048, "rewards/rejected": -0.28609704971313477, "step": 713 }, { "epoch": 1.9548254620123204, "grad_norm": 5.795563220977783, "learning_rate": 2.328767123287671e-08, "log_odds_chosen": -0.3729017674922943, "log_odds_ratio": -0.9530424475669861, "logits/chosen": 0.12024345248937607, "logits/rejected": 0.1551714688539505, "logps/chosen": -3.2526021003723145, "logps/rejected": -2.895942449569702, "loss": 2.2671, "nll_loss": 2.1717543601989746, "rewards/accuracies": 0.375, "rewards/chosen": -0.325260192155838, "rewards/margins": -0.035665955394506454, "rewards/rejected": -0.28959423303604126, "step": 714 }, { "epoch": 1.9575633127994525, "grad_norm": 6.699540615081787, "learning_rate": 2.191780821917808e-08, "log_odds_chosen": -0.25421586632728577, "log_odds_ratio": -1.099988341331482, "logits/chosen": 0.17864617705345154, "logits/rejected": 0.2849491238594055, "logps/chosen": -4.136389255523682, "logps/rejected": -3.8888556957244873, "loss": 2.2785, "nll_loss": 2.1685283184051514, "rewards/accuracies": 0.375, "rewards/chosen": -0.4136389493942261, "rewards/margins": -0.02475338615477085, "rewards/rejected": -0.3888855576515198, "step": 715 }, { "epoch": 1.9603011635865846, "grad_norm": 5.788903713226318, "learning_rate": 2.054794520547945e-08, "log_odds_chosen": 0.5568463802337646, "log_odds_ratio": -0.910220742225647, "logits/chosen": -0.14161145687103271, "logits/rejected": -0.14735209941864014, "logps/chosen": -3.3034071922302246, "logps/rejected": -3.823307514190674, "loss": 2.2572, "nll_loss": 2.166182041168213, "rewards/accuracies": 0.625, "rewards/chosen": -0.33034074306488037, "rewards/margins": 0.05199005454778671, "rewards/rejected": -0.3823307752609253, "step": 716 }, { "epoch": 1.9630390143737166, "grad_norm": 6.221311569213867, "learning_rate": 1.9178082191780824e-08, "log_odds_chosen": 0.07863803952932358, "log_odds_ratio": -0.691870927810669, "logits/chosen": 0.011558213271200657, "logits/rejected": 0.05415649712085724, "logps/chosen": -3.1990418434143066, "logps/rejected": -3.2615957260131836, "loss": 2.1579, "nll_loss": 2.0887227058410645, "rewards/accuracies": 0.75, "rewards/chosen": -0.3199041783809662, "rewards/margins": 0.0062553733587265015, "rewards/rejected": -0.3261595368385315, "step": 717 }, { "epoch": 1.9657768651608487, "grad_norm": 5.979490280151367, "learning_rate": 1.7808219178082192e-08, "log_odds_chosen": -0.441130131483078, "log_odds_ratio": -1.0223751068115234, "logits/chosen": -0.04183216392993927, "logits/rejected": 0.03758581727743149, "logps/chosen": -3.120626449584961, "logps/rejected": -2.682558536529541, "loss": 2.2545, "nll_loss": 2.1522879600524902, "rewards/accuracies": 0.375, "rewards/chosen": -0.31206265091896057, "rewards/margins": -0.04380679130554199, "rewards/rejected": -0.2682558596134186, "step": 718 }, { "epoch": 1.9685147159479808, "grad_norm": 5.685652732849121, "learning_rate": 1.643835616438356e-08, "log_odds_chosen": 0.3293711543083191, "log_odds_ratio": -0.5965292453765869, "logits/chosen": -0.04839682579040527, "logits/rejected": 0.01947958394885063, "logps/chosen": -2.6848816871643066, "logps/rejected": -2.9665212631225586, "loss": 2.1458, "nll_loss": 2.0861384868621826, "rewards/accuracies": 0.625, "rewards/chosen": -0.26848816871643066, "rewards/margins": 0.028163960203528404, "rewards/rejected": -0.2966521382331848, "step": 719 }, { "epoch": 1.9712525667351128, "grad_norm": 5.23280668258667, "learning_rate": 1.5068493150684933e-08, "log_odds_chosen": 0.41289186477661133, "log_odds_ratio": -0.6361802816390991, "logits/chosen": -0.16299504041671753, "logits/rejected": -0.16157224774360657, "logps/chosen": -2.8198604583740234, "logps/rejected": -3.199557304382324, "loss": 2.164, "nll_loss": 2.100358009338379, "rewards/accuracies": 0.75, "rewards/chosen": -0.2819860577583313, "rewards/margins": 0.0379696860909462, "rewards/rejected": -0.3199557363986969, "step": 720 }, { "epoch": 1.973990417522245, "grad_norm": 7.400156021118164, "learning_rate": 1.36986301369863e-08, "log_odds_chosen": -0.9321919083595276, "log_odds_ratio": -1.471608281135559, "logits/chosen": 0.024516604840755463, "logits/rejected": 0.1624772548675537, "logps/chosen": -3.885316848754883, "logps/rejected": -2.9742608070373535, "loss": 2.3469, "nll_loss": 2.1997647285461426, "rewards/accuracies": 0.25, "rewards/chosen": -0.3885316848754883, "rewards/margins": -0.09110558778047562, "rewards/rejected": -0.2974260747432709, "step": 721 }, { "epoch": 1.976728268309377, "grad_norm": 4.89007568359375, "learning_rate": 1.232876712328767e-08, "log_odds_chosen": 0.19119930267333984, "log_odds_ratio": -0.7233423590660095, "logits/chosen": 0.008163519203662872, "logits/rejected": -0.10267373919487, "logps/chosen": -2.1398820877075195, "logps/rejected": -2.322624683380127, "loss": 2.0795, "nll_loss": 2.0071616172790527, "rewards/accuracies": 0.625, "rewards/chosen": -0.21398822963237762, "rewards/margins": 0.01827426068484783, "rewards/rejected": -0.2322624772787094, "step": 722 }, { "epoch": 1.9794661190965093, "grad_norm": 5.919083595275879, "learning_rate": 1.095890410958904e-08, "log_odds_chosen": -0.39727991819381714, "log_odds_ratio": -1.2603986263275146, "logits/chosen": -0.10200873017311096, "logits/rejected": -0.07876458019018173, "logps/chosen": -3.570836067199707, "logps/rejected": -3.158310890197754, "loss": 2.2039, "nll_loss": 2.0778841972351074, "rewards/accuracies": 0.375, "rewards/chosen": -0.35708358883857727, "rewards/margins": -0.0412524975836277, "rewards/rejected": -0.31583109498023987, "step": 723 }, { "epoch": 1.9822039698836413, "grad_norm": 5.877322196960449, "learning_rate": 9.589041095890412e-09, "log_odds_chosen": -0.48881709575653076, "log_odds_ratio": -1.062612533569336, "logits/chosen": -0.16062119603157043, "logits/rejected": -0.09331938624382019, "logps/chosen": -3.3591151237487793, "logps/rejected": -2.890413999557495, "loss": 2.2185, "nll_loss": 2.1121997833251953, "rewards/accuracies": 0.375, "rewards/chosen": -0.33591151237487793, "rewards/margins": -0.04687011241912842, "rewards/rejected": -0.2890413999557495, "step": 724 }, { "epoch": 1.9849418206707734, "grad_norm": 5.477645397186279, "learning_rate": 8.21917808219178e-09, "log_odds_chosen": -0.36292198300361633, "log_odds_ratio": -0.9569050073623657, "logits/chosen": -0.009620681405067444, "logits/rejected": 0.07183997333049774, "logps/chosen": -2.81547474861145, "logps/rejected": -2.459580183029175, "loss": 2.1402, "nll_loss": 2.0445003509521484, "rewards/accuracies": 0.5, "rewards/chosen": -0.281547486782074, "rewards/margins": -0.035589441657066345, "rewards/rejected": -0.24595804512500763, "step": 725 }, { "epoch": 1.9876796714579057, "grad_norm": 5.731087684631348, "learning_rate": 6.84931506849315e-09, "log_odds_chosen": -0.19913747906684875, "log_odds_ratio": -0.9507596492767334, "logits/chosen": -0.05412360653281212, "logits/rejected": -0.11122387647628784, "logps/chosen": -3.0975873470306396, "logps/rejected": -2.9085607528686523, "loss": 2.1747, "nll_loss": 2.079608201980591, "rewards/accuracies": 0.25, "rewards/chosen": -0.3097587525844574, "rewards/margins": -0.018902672454714775, "rewards/rejected": -0.2908560633659363, "step": 726 }, { "epoch": 1.9904175222450378, "grad_norm": 6.665831565856934, "learning_rate": 5.47945205479452e-09, "log_odds_chosen": -0.16848522424697876, "log_odds_ratio": -1.0284419059753418, "logits/chosen": 0.016659555956721306, "logits/rejected": 0.01461922749876976, "logps/chosen": -3.4161641597747803, "logps/rejected": -3.2252700328826904, "loss": 2.2255, "nll_loss": 2.1226110458374023, "rewards/accuracies": 0.5, "rewards/chosen": -0.3416164219379425, "rewards/margins": -0.01908942125737667, "rewards/rejected": -0.3225269913673401, "step": 727 }, { "epoch": 1.9931553730321698, "grad_norm": 5.191585063934326, "learning_rate": 4.10958904109589e-09, "log_odds_chosen": 0.05021640658378601, "log_odds_ratio": -0.6863888502120972, "logits/chosen": -0.09850353747606277, "logits/rejected": -0.11650057882070541, "logps/chosen": -2.6401731967926025, "logps/rejected": -2.684995412826538, "loss": 2.1397, "nll_loss": 2.0710203647613525, "rewards/accuracies": 0.5, "rewards/chosen": -0.26401734352111816, "rewards/margins": 0.004482194781303406, "rewards/rejected": -0.26849955320358276, "step": 728 }, { "epoch": 1.995893223819302, "grad_norm": 5.1518635749816895, "learning_rate": 2.73972602739726e-09, "log_odds_chosen": -0.2960870563983917, "log_odds_ratio": -0.9047174453735352, "logits/chosen": -0.12056488543748856, "logits/rejected": -0.14707717299461365, "logps/chosen": -2.535252094268799, "logps/rejected": -2.259671449661255, "loss": 2.1815, "nll_loss": 2.0910210609436035, "rewards/accuracies": 0.5, "rewards/chosen": -0.2535251975059509, "rewards/margins": -0.027558038011193275, "rewards/rejected": -0.2259671539068222, "step": 729 }, { "epoch": 1.998631074606434, "grad_norm": 6.386074066162109, "learning_rate": 1.36986301369863e-09, "log_odds_chosen": -0.34166598320007324, "log_odds_ratio": -1.5606868267059326, "logits/chosen": -0.08939828723669052, "logits/rejected": -0.06152850016951561, "logps/chosen": -4.007881164550781, "logps/rejected": -3.6390252113342285, "loss": 2.3148, "nll_loss": 2.1587107181549072, "rewards/accuracies": 0.25, "rewards/chosen": -0.4007880985736847, "rewards/margins": -0.036885615438222885, "rewards/rejected": -0.3639025092124939, "step": 730 } ], "logging_steps": 1, "max_steps": 730, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }